huge_file.cpp

Go to the documentation of this file.
00001 /*****************************************************************************\
00002 *                                                                             *
00003 *  Name   : huge_file                                                         *
00004 *  Author : Chris Koeritz                                                     *
00005 *                                                                             *
00006 *******************************************************************************
00007 * Copyright (c) 2007-$now By Author.  This program is free software; you can  *
00008 * redistribute it and/or modify it under the terms of the GNU General Public  *
00009 * License as published by the Free Software Foundation; either version 2 of   *
00010 * the License or (at your option) any later version.  This is online at:      *
00011 *     http://www.fsf.org/copyleft/gpl.html                                    *
00012 * Please send any updates to: fred@gruntose.com                               *
00013 \*****************************************************************************/
00014 
00015 #include "byte_filer.h"
00016 #include "huge_file.h"
00017 
00018 #include <basis/byte_array.h>
00019 #include <basis/functions.h>
00020 #include <basis/guards.h>
00021 
00022 #include <stdio.h>
00023 
00024 #undef LOG
00025 #define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s())
00026 
00027 //#define DEBUG_HUGE_FILE
00028   // uncomment for noisy version.
00029 
00030 using namespace basis;
00031 
00032 namespace filesystem {
00033 
00034 huge_file::huge_file(const astring &filename, const astring &permissions)
00035 : _real_file(new byte_filer(filename, permissions)),
00036   _file_pointer(0)
00037 {
00038 }
00039 
00040 huge_file::~huge_file()
00041 {
00042   WHACK(_real_file);
00043 }
00044 
00045 void huge_file::flush() { _real_file->flush(); }
00046 
00047 bool huge_file::truncate() { return _real_file->truncate(); }
00048 
00049 double huge_file::length()
00050 {
00051   FUNCDEF("length");
00052 
00053 //trying to read to see if we're past endpoint.
00054 //  if this approach works, length may want to close and reopen file for
00055 //  reading, since we can't add any bytes to it for writing just to find
00056 //  the length out.
00057 
00058 
00059   double save_posn = _file_pointer;
00060   // skip to the beginning of the file so we can try to find the end.
00061   _file_pointer = 0;
00062   _real_file->seek(0, byte_filer::FROM_START);
00063   size_t naive_size = _real_file->length();
00064   if (naive_size < _real_file->file_size_limit()) {
00065     // lucked out; we are within normal file size limitations.
00066     seek(save_posn, byte_filer::FROM_START);
00067     return double(naive_size);
00068   }
00069   
00070   double best_highest = 0.0;  // the maximum we've safely seeked to.
00071 
00072   size_t big_jump = byte_filer::file_size_limit();
00073     // try with the largest possible seek at first.
00074 
00075   while (true) {
00076 #ifdef DEBUG_HUGE_FILE
00077     LOG(a_sprintf("best highest=%.0f", best_highest));
00078 #endif
00079     // iterate until we reach our exit condition, which seems like it must
00080     // always occur eventually unless the file is being monkeyed with.
00081     bool seek_ret = _real_file->seek(int(big_jump), byte_filer::FROM_CURRENT);
00082 #ifdef DEBUG_HUGE_FILE
00083     LOG(a_sprintf("  seek ret=%d", int(seek_ret)));
00084 #endif
00085     byte_array temp_bytes;
00086     int bytes_read = _real_file->read(temp_bytes, 1);
00087     if (bytes_read < 1)
00088       seek_ret = false;
00089 #ifdef DEBUG_HUGE_FILE
00090     LOG(a_sprintf("  read %d bytes", bytes_read));
00091 #endif
00092     bool at_eof = _real_file->eof();
00093 #ifdef DEBUG_HUGE_FILE
00094     LOG(a_sprintf("  at_eof=%d", int(at_eof)));
00095 #endif
00096     if (seek_ret && !at_eof) {
00097 #ifdef DEBUG_HUGE_FILE
00098       LOG("seek worked, incrementing best highest and trying same jump again");
00099 #endif
00100       // the seek worked, so we'll just jump forward again.
00101       best_highest += double(big_jump);
00102       _file_pointer += double(big_jump);
00103       continue;
00104     } else if (seek_ret && at_eof) {
00105 #ifdef DEBUG_HUGE_FILE
00106       LOG("seek worked but found eof exactly.");
00107 #endif
00108       // the seek did worked, but apparently we've also found the end point.
00109       best_highest += double(big_jump);
00110       _file_pointer += double(big_jump);
00111       break;
00112     } else {
00113       // that seek was too large, so we need to back down and try a smaller
00114       // seek size.
00115 #ifdef DEBUG_HUGE_FILE
00116       LOG("seek failed, going back to best highest and trying same jump again");
00117 #endif
00118       _file_pointer = 0;
00119       _real_file->seek(0, byte_filer::FROM_START); 
00120       outcome worked = seek(best_highest, byte_filer::FROM_START);
00121         // this uses our version to position at large sizes.
00122       if (worked != OKAY) {
00123         // this is a bad failure; it says that the file size changed or
00124         // something malfunctioned.  we should always be able to get back to
00125         // the last good size we found if the file is static.
00126         LOG(a_sprintf("failed to seek back to best highest %.0f on ",
00127             best_highest) + _real_file->filename());
00128         // try to repair our ideas about the file by starting the process
00129         // over.
00130 //hmmm: count the number of times restarted and bail after N.
00131         seek_ret = _real_file->seek(0, byte_filer::FROM_START);
00132         _file_pointer = 0;
00133         if (!seek_ret) {
00134           // the heck with this.  we can't even go back to the start.  this
00135           // file seems to be screwed up now.
00136           LOG(astring("failed to seek back to start of file!  on ")
00137               + _real_file->filename());
00138           return 0;
00139         }
00140         // reset the rest of the positions for our failed attempt to return
00141         // to what we already thought was good.
00142         _file_pointer = 0;
00143         big_jump = byte_filer::file_size_limit();
00144         best_highest = 0;
00145         continue;
00146       }
00147       // okay, nothing bad happened when we went back to our last good point.
00148       if (big_jump <= 0) {
00149         // success in finding the smallest place that we can't seek between.
00150 #ifdef DEBUG_HUGE_FILE
00151         LOG("got down to smallest big jump, 0!");
00152 #endif
00153         break;
00154       }
00155       // formula expects that the maximum file size is a power of 2.
00156       big_jump /= 2;
00157 #ifdef DEBUG_HUGE_FILE
00158       LOG(a_sprintf("restraining big jump down to %u.", big_jump));
00159 #endif
00160       continue;
00161     }
00162   }
00163 
00164   // go back to where we started out.
00165   seek(0, byte_filer::FROM_START);
00166   seek(save_posn, byte_filer::FROM_CURRENT);
00167 #ifdef DEBUG_HUGE_FILE
00168   LOG(a_sprintf("saying file len is %.0f.", best_highest + 1.0));
00169 #endif
00170   return best_highest + 1.0;
00171 }
00172 
00173 bool huge_file::good() const { return _real_file->good(); }
00174 
00175 bool huge_file::eof() const { return _real_file->eof(); }
00176 
00177 outcome huge_file::move_to(double absolute_posn)
00178 {
00179 #ifdef DEBUG_HUGE_FILE
00180   FUNCDEF("move_to");
00181 #endif
00182   double difference = absolute_posn - _file_pointer;
00183     // calculate the size we want to offset.
00184 #ifdef DEBUG_HUGE_FILE
00185   LOG(a_sprintf("abs_pos=%.0f difference=%.0f old_filepoint=%.0f",
00186       absolute_posn, difference, _file_pointer));
00187 #endif
00188   // if we're at the same place, we don't have to do anything.
00189   if (difference < 0.000001) {
00190 #ifdef DEBUG_HUGE_FILE
00191     LOG("difference was minimal, saying we're done.");
00192 #endif
00193     return OKAY;
00194   }
00195   while (absolute_value(difference) > 0.000001) {
00196     double seek_size = minimum(double(byte_filer::file_size_limit() - 1),
00197         absolute_value(difference));
00198     if (difference < 0)
00199       seek_size *= -1.0;  // flip sign of seek.
00200 #ifdef DEBUG_HUGE_FILE
00201     LOG(a_sprintf("  seeksize=%d", int(seek_size)));
00202 #endif
00203     bool seek_ret = _real_file->seek(int(seek_size),
00204         byte_filer::FROM_CURRENT);
00205     if (!seek_ret) {
00206 #ifdef DEBUG_HUGE_FILE
00207       LOG(a_sprintf("failed to seek %d from current", int(seek_size)));
00208 #endif
00209       return FAILURE;  // seek failed somehow.
00210     }
00211     _file_pointer += seek_size;
00212 #ifdef DEBUG_HUGE_FILE
00213     LOG(a_sprintf("  now_filepoint=%.0f", _file_pointer));
00214 #endif
00215     difference = absolute_posn - _file_pointer;
00216 #ifdef DEBUG_HUGE_FILE
00217     LOG(a_sprintf("  now_difference=%.0f", difference));
00218 #endif
00219   }
00220   return OKAY;
00221 }
00222 
00223 outcome huge_file::seek(double new_position, byte_filer::origins origin)
00224 {
00225 #ifdef DEBUG_HUGE_FILE
00226   FUNCDEF("seek");
00227 #endif
00228   if (origin == byte_filer::FROM_CURRENT) {
00229     return move_to(_file_pointer + new_position);
00230   } else if (origin == byte_filer::FROM_START) {
00231     _file_pointer = 0;
00232     if (!_real_file->seek(0, byte_filer::FROM_START))
00233       return FAILURE;
00234     return move_to(new_position);
00235   } else if (origin == byte_filer::FROM_END) {
00236 #ifdef DEBUG_HUGE_FILE
00237     LOG("into precarious FROM_END case.");
00238 #endif
00239     double file_len = length();  // could take a scary long time possibly.
00240 #ifdef DEBUG_HUGE_FILE
00241     LOG(a_sprintf("  FROM_END got len %.0f.", file_len));
00242 #endif
00243     _file_pointer = file_len;
00244       // it's safe, although not efficient, for us to call the length()
00245       // method here.  our current version of length() uses the byte_filer's
00246       // seek method directly and only FROM_CURRENT and FROM_START from this
00247       // class's seek method.
00248     _real_file->seek(0, byte_filer::FROM_END);
00249     return move_to(_file_pointer - new_position);
00250   }
00251   // unknown origin.
00252   return BAD_INPUT;
00253 }
00254 
00255 outcome huge_file::read(byte_array &to_fill, int desired_size, int &size_read)
00256 {
00257 //  FUNCDEF("read");
00258   size_read = 0;
00259   int ret = _real_file->read(to_fill, desired_size);
00260   if (ret < 0)
00261     return FAILURE;  // couldn't read the bytes.
00262   _file_pointer += double(size_read);
00263   size_read = ret;
00264   return OKAY; 
00265 }
00266 
00267 outcome huge_file::write(const byte_array &to_write, int &size_written)
00268 {
00269 //  FUNCDEF("write");
00270   size_written = 0;
00271   int ret = _real_file->write(to_write);
00272   if (ret < 0)
00273     return FAILURE;  // couldn't write the bytes.
00274   _file_pointer += double(size_written);
00275   size_written = ret;
00276   return OKAY;
00277 }
00278 
00279 } //namespace.
00280 
Generated on Sat Jan 28 04:22:22 2012 for hoople2 project by  doxygen 1.6.3