huge_file.cpp

Go to the documentation of this file.
00001 #ifndef HUGE_FILE_IMPLEMENTATION_FILE
00002 #define HUGE_FILE_IMPLEMENTATION_FILE
00003 
00004 /*****************************************************************************\
00005 *                                                                             *
00006 *  Name   : huge_file                                                         *
00007 *  Author : Chris Koeritz                                                     *
00008 *                                                                             *
00009 *******************************************************************************
00010 * Copyright (c) 2007-$now By Author.  This program is free software; you can  *
00011 * redistribute it and/or modify it under the terms of the GNU General Public  *
00012 * License as published by the Free Software Foundation; either version 2 of   *
00013 * the License or (at your option) any later version.  This is online at:      *
00014 *     http://www.fsf.org/copyleft/gpl.html                                    *
00015 * Please send any updates to: fred@gruntose.com                               *
00016 \*****************************************************************************/
00017 
00018 #include "byte_filer.h"
00019 #include "huge_file.h"
00020 
00021 #include <basis/byte_array.h>
00022 #include <basis/function.h>
00023 #include <basis/guards.h>
00024 #include <basis/log_base.h>
00025 
00026 #undef LOG
00027 #define LOG(s) CLASS_EMERGENCY_LOG(program_wide_logger(), s)
00028 
00029 //#define DEBUG_HUGE_FILE
00030   // uncomment for noisy version.
00031 
00032 huge_file::huge_file(const istring &filename, const istring &permissions)
00033 : _real_file(new byte_filer(filename, permissions)),
00034   _file_pointer(0)
00035 {
00036 }
00037 
00038 huge_file::~huge_file()
00039 {
00040   WHACK(_real_file);
00041 }
00042 
00043 void huge_file::flush() { _real_file->flush(); }
00044 
00045 bool huge_file::truncate() { return _real_file->truncate(); }
00046 
00047 double huge_file::length()
00048 {
00049   FUNCDEF("length");
00050 
00051 //trying to read to see if we're past endpoint.
00052 //  if this approach works, length may want to close and reopen file for
00053 //  reading, since we can't add any bytes to it for writing just to find
00054 //  the length out.
00055 
00056 
00057   double save_posn = _file_pointer;
00058   // skip to the beginning of the file so we can try to find the end.
00059   _file_pointer = 0;
00060   _real_file->seek(0, byte_filer::FROM_START);
00061   size_t naive_size = _real_file->length();
00062   if (naive_size < _real_file->file_size_limit()) {
00063     // lucked out; we are within normal file size limitations.
00064     seek(save_posn, byte_filer::FROM_START);
00065     return double(naive_size);
00066   }
00067   
00068   double best_highest = 0.0;  // the maximum we've safely seeked to.
00069 
00070   size_t big_jump = byte_filer::file_size_limit();
00071     // try with the largest possible seek at first.
00072 
00073   while (true) {
00074 #ifdef DEBUG_HUGE_FILE
00075     LOG(isprintf("best highest=%.0f", best_highest));
00076 #endif
00077     // iterate until we reach our exit condition, which seems like it must
00078     // always occur eventually unless the file is being monkeyed with.
00079     bool seek_ret = _real_file->seek(int(big_jump), byte_filer::FROM_CURRENT);
00080 #ifdef DEBUG_HUGE_FILE
00081     LOG(isprintf("  seek ret=%d", int(seek_ret)));
00082 #endif
00083     byte_array temp_bytes;
00084     int bytes_read = _real_file->read(temp_bytes, 1);
00085     if (bytes_read < 1)
00086       seek_ret = false;
00087 #ifdef DEBUG_HUGE_FILE
00088     LOG(isprintf("  read %d bytes", bytes_read));
00089 #endif
00090     bool at_eof = _real_file->eof();
00091 #ifdef DEBUG_HUGE_FILE
00092     LOG(isprintf("  at_eof=%d", int(at_eof)));
00093 #endif
00094     if (seek_ret && !at_eof) {
00095 #ifdef DEBUG_HUGE_FILE
00096       LOG("seek worked, incrementing best highest and trying same jump again");
00097 #endif
00098       // the seek worked, so we'll just jump forward again.
00099       best_highest += double(big_jump);
00100       _file_pointer += double(big_jump);
00101       continue;
00102     } else if (seek_ret && at_eof) {
00103 #ifdef DEBUG_HUGE_FILE
00104       LOG("seek worked but found eof exactly.");
00105 #endif
00106       // the seek did worked, but apparently we've also found the end point.
00107       best_highest += double(big_jump);
00108       _file_pointer += double(big_jump);
00109       break;
00110     } else {
00111       // that seek was too large, so we need to back down and try a smaller
00112       // seek size.
00113 #ifdef DEBUG_HUGE_FILE
00114       LOG("seek failed, going back to best highest and trying same jump again");
00115 #endif
00116       _file_pointer = 0;
00117       _real_file->seek(0, byte_filer::FROM_START); 
00118       outcome worked = seek(best_highest, byte_filer::FROM_START);
00119         // this uses our version to position at large sizes.
00120       if (worked != OKAY) {
00121         // this is a bad failure; it says that the file size changed or
00122         // something malfunctioned.  we should always be able to get back to
00123         // the last good size we found if the file is static.
00124         LOG(isprintf("failed to seek back to best highest %.0f on ",
00125             best_highest) + _real_file->filename());
00126         // try to repair our ideas about the file by starting the process
00127         // over.
00128 //hmmm: count the number of times restarted and bail after N.
00129         seek_ret = _real_file->seek(0, byte_filer::FROM_START);
00130         _file_pointer = 0;
00131         if (!seek_ret) {
00132           // the heck with this.  we can't even go back to the start.  this
00133           // file seems to be screwed up now.
00134           LOG(istring("failed to seek back to start of file!  on ")
00135               + _real_file->filename());
00136           return 0;
00137         }
00138         // reset the rest of the positions for our failed attempt to return
00139         // to what we already thought was good.
00140         _file_pointer = 0;
00141         big_jump = byte_filer::file_size_limit();
00142         best_highest = 0;
00143         continue;
00144       }
00145       // okay, nothing bad happened when we went back to our last good point.
00146       if (big_jump <= 0) {
00147         // success in finding the smallest place that we can't seek between.
00148 #ifdef DEBUG_HUGE_FILE
00149         LOG("got down to smallest big jump, 0!");
00150 #endif
00151         break;
00152       }
00153       // formula expects that the maximum file size is a power of 2.
00154       big_jump /= 2;
00155 #ifdef DEBUG_HUGE_FILE
00156       LOG(isprintf("restraining big jump down to %u.", big_jump));
00157 #endif
00158       continue;
00159     }
00160   }
00161 
00162   // go back to where we started out.
00163   seek(0, byte_filer::FROM_START);
00164   seek(save_posn, byte_filer::FROM_CURRENT);
00165 #ifdef DEBUG_HUGE_FILE
00166   LOG(isprintf("saying file len is %.0f.", best_highest + 1.0));
00167 #endif
00168   return best_highest + 1.0;
00169 }
00170 
00171 bool huge_file::good() const { return _real_file->good(); }
00172 
00173 bool huge_file::eof() const { return _real_file->eof(); }
00174 
00175 outcome huge_file::move_to(double absolute_posn)
00176 {
00177   FUNCDEF("move_to");
00178   double difference = absolute_posn - _file_pointer;
00179     // calculate the size we want to offset.
00180 #ifdef DEBUG_HUGE_FILE
00181   LOG(isprintf("abs_pos=%.0f difference=%.0f old_filepoint=%.0f",
00182       absolute_posn, difference, _file_pointer));
00183 #endif
00184   // if we're at the same place, we don't have to do anything.
00185   if (difference < 0.000001) {
00186 #ifdef DEBUG_HUGE_FILE
00187     LOG("difference was minimal, saying we're done.");
00188 #endif
00189     return OKAY;
00190   }
00191   while (absolute_value(difference) > 0.000001) {
00192     double seek_size = minimum(double(byte_filer::file_size_limit() - 1),
00193         absolute_value(difference));
00194     if (difference < 0)
00195       seek_size *= -1.0;  // flip sign of seek.
00196 #ifdef DEBUG_HUGE_FILE
00197     LOG(isprintf("  seeksize=%d", int(seek_size)));
00198 #endif
00199     bool seek_ret = _real_file->seek(int(seek_size),
00200         byte_filer::FROM_CURRENT);
00201     if (!seek_ret) {
00202 #ifdef DEBUG_HUGE_FILE
00203       LOG(isprintf("failed to seek %d from current", int(seek_size)));
00204 #endif
00205       return FAILURE;  // seek failed somehow.
00206     }
00207     _file_pointer += seek_size;
00208 #ifdef DEBUG_HUGE_FILE
00209     LOG(isprintf("  now_filepoint=%.0f", _file_pointer));
00210 #endif
00211     difference = absolute_posn - _file_pointer;
00212 #ifdef DEBUG_HUGE_FILE
00213     LOG(isprintf("  now_difference=%.0f", difference));
00214 #endif
00215   }
00216   return OKAY;
00217 }
00218 
00219 outcome huge_file::seek(double new_position, byte_filer::origins origin)
00220 {
00221   FUNCDEF("seek");
00222   if (origin == byte_filer::FROM_CURRENT) {
00223     return move_to(_file_pointer + new_position);
00224   } else if (origin == byte_filer::FROM_START) {
00225     _file_pointer = 0;
00226     if (!_real_file->seek(0, byte_filer::FROM_START))
00227       return FAILURE;
00228     return move_to(new_position);
00229   } else if (origin == byte_filer::FROM_END) {
00230 #ifdef DEBUG_HUGE_FILE
00231     LOG("into precarious FROM_END case.");
00232 #endif
00233     double file_len = length();  // could take a scary long time possibly.
00234 #ifdef DEBUG_HUGE_FILE
00235     LOG(isprintf("  FROM_END got len %.0f.", file_len));
00236 #endif
00237     _file_pointer = file_len;
00238       // it's safe, although not efficient, for us to call the length()
00239       // method here.  our current version of length() uses the byte_filer's
00240       // seek method directly and only FROM_CURRENT and FROM_START from this
00241       // class's seek method.
00242     _real_file->seek(0, byte_filer::FROM_END);
00243     return move_to(_file_pointer - new_position);
00244   }
00245   // unknown origin.
00246   return BAD_INPUT;
00247 }
00248 
00249 outcome huge_file::read(byte_array &to_fill, int desired_size, int &size_read)
00250 {
00251   FUNCDEF("read");
00252   size_read = 0;
00253   int ret = _real_file->read(to_fill, desired_size);
00254   if (ret < 0)
00255     return FAILURE;  // couldn't read the bytes.
00256   _file_pointer += double(size_read);
00257   size_read = ret;
00258   return OKAY; 
00259 }
00260 
00261 outcome huge_file::write(const byte_array &to_write, int &size_written)
00262 {
00263   FUNCDEF("write");
00264   size_written = 0;
00265   int ret = _real_file->write(to_write);
00266   if (ret < 0)
00267     return FAILURE;  // couldn't write the bytes.
00268   _file_pointer += double(size_written);
00269   size_written = ret;
00270   return OKAY;
00271 }
00272 
00273 
00274 #endif //HUGE_FILE_IMPLEMENTATION_FILE
00275 

Generated on Fri Nov 21 04:29:53 2008 for HOOPLE Libraries by  doxygen 1.5.1