heavy_file_ops.cpp

Go to the documentation of this file.
00001 /*****************************************************************************\
00002 *                                                                             *
00003 *  Name   : heavy file operations                                             *
00004 *  Author : Chris Koeritz                                                     *
00005 *                                                                             *
00006 *******************************************************************************
00007 * Copyright (c) 2005-$now By Author.  This program is free software; you can  *
00008 * redistribute it and/or modify it under the terms of the GNU General Public  *
00009 * License as published by the Free Software Foundation; either version 2 of   *
00010 * the License or (at your option) any later version.  This is online at:      *
00011 *     http://www.fsf.org/copyleft/gpl.html                                    *
00012 * Please send any updates to: fred@gruntose.com                               *
00013 \*****************************************************************************/
00014 
00015 #include "directory.h"
00016 #include "filename.h"
00017 #include "filename_list.h"
00018 #include "heavy_file_ops.h"
00019 #include "huge_file.h"
00020 
00021 #include <basis/functions.h>
00022 #include <basis/guards.h>
00023 #include <structures/object_packers.h>
00024 
00025 using namespace basis;
00026 using namespace structures;
00027 
00028 namespace filesystem {
00029 
00030 //#define DEBUG_HEAVY_FILE_OPS
00031   // uncomment for noisier debugging.
00032 
00033 #undef LOG
00034 #ifdef DEBUG_HEAVY_FILE_OPS
00035   #include <stdio.h>
00036   #define LOG(to_print) printf("%s::%s: %s\n", static_class_name(), func, astring(to_print).s())
00037 #else
00038   #define LOG(s) {if(!!s){}}
00039 #endif
00040 
00042 
00043 file_transfer_header::file_transfer_header(const file_time &time_stamp)
00044 : _filename(),
00045   _byte_start(0),
00046   _length(0),
00047   _time(time_stamp)
00048 {
00049 }
00050 
00051 astring file_transfer_header::text_form() const
00052 {
00053   astring time_text;
00054   _time.text_form(time_text);
00055   return astring("file=") + _filename
00056       + a_sprintf(" start=%d len=%d stamp=", _byte_start, _length)
00057       + time_text;
00058 }
00059 
00060 astring file_transfer_header::readable_text_form() const
00061 {
00062   astring time_text;
00063   _time.readable_text_form(time_text);
00064   return _filename
00065       + a_sprintf(" [%d bytes, mod ", _length)
00066       + time_text + "]";
00067 }
00068 
00069 void file_transfer_header::pack(byte_array &packed_form) const
00070 {
00071   _filename.pack(packed_form);
00072   attach(packed_form, _byte_start);
00073   attach(packed_form, _length);
00074   _time.pack(packed_form);
00075 }
00076 
00077 bool file_transfer_header::unpack(byte_array &packed_form)
00078 {
00079   if (!_filename.unpack(packed_form)) return false;
00080   if (!detach(packed_form, _byte_start)) return false;
00081   if (!detach(packed_form, _length)) return false;
00082   if (!_time.unpack(packed_form)) return false;
00083   return true;
00084 }
00085 
00086 int file_transfer_header::packed_size() const
00087 {
00088 byte_array temp;
00089 attach(temp, _byte_start);
00090 //hmmm: really ugly above; we should get a more exact way to know the size of
00091 //      packed doubles.
00092   return _filename.length() + 1
00093       + temp.length()
00094       + sizeof(int)
00095       + _time.packed_size();
00096 }
00097 
00099 
00100 const size_t heavy_file_operations::COPY_CHUNK_FACTOR = 1 * MEGABYTE;
00101 
00102 size_t heavy_file_operations::copy_chunk_factor()
00103 { return COPY_CHUNK_FACTOR; }
00104 
00105 heavy_file_operations::~heavy_file_operations() {}
00106   // we only need this due to our use of the root_object class_name support.
00107 
00108 const char *heavy_file_operations::outcome_name(const outcome &to_name)
00109 {
00110   switch (to_name.value()) {
00111     case SOURCE_MISSING: return "SOURCE_MISSING";
00112     case TARGET_ACCESS_ERROR: return "TARGET_ACCESS_ERROR";
00113     case TARGET_DIR_ERROR: return "TARGET_DIR_ERROR";
00114     default: return common::outcome_name(to_name);
00115   }
00116 }
00117 
00118 outcome heavy_file_operations::copy_file(const astring &source,
00119     const astring &destination, int copy_chunk_factor)
00120 {
00121 #ifdef DEBUG_HEAVY_FILE_OPS
00122   FUNCDEF("copy_file");
00123 #endif
00124   // check that the source exists...
00125   filename source_path(source);
00126   if (!source_path.exists()) return SOURCE_MISSING;
00127   file_time source_time(source_path);  // get the time on the source.
00128 
00129   // make sure the target directory exists...
00130   filename target_path(destination);
00131   filename targ_dir = target_path.dirname();
00132   if (!directory::recursive_create(targ_dir.raw())) return TARGET_DIR_ERROR;
00133 
00134   // open the source for reading.
00135   huge_file source_file(source, "rb");
00136   if (!source_file.good()) return SOURCE_MISSING;
00137 //hmmm: could be source is not accessible instead.
00138 
00139   // open target file for writing.
00140   huge_file target_file(destination, "wb");
00141   if (!target_file.good()) return TARGET_ACCESS_ERROR;
00142 
00143   byte_array chunk;
00144   int bytes_read = 0;
00145   outcome ret;
00146   while ( (ret = source_file.read(chunk, copy_chunk_factor, bytes_read))
00147       == huge_file::OKAY) {
00148     int bytes_stored;
00149     ret = target_file.write(chunk, bytes_stored);
00150     if (bytes_stored != bytes_read) return TARGET_ACCESS_ERROR;
00151     if (source_file.eof()) break;  // time to escape.
00152   }
00153 
00154   // set the time on the target file from the source's time.
00155   source_time.set_time(target_path);
00156 
00157 #ifdef DEBUG_HEAVY_FILE_OPS
00158   astring time;
00159   source_time.text_form(time);
00160   LOG(astring("setting file time for ") + source + " to " + time);
00161 #endif
00162 
00163   return OKAY;
00164 }
00165 
00166 outcome heavy_file_operations::write_file_chunk(const astring &target,
00167     double byte_start, const byte_array &chunk, bool truncate,
00168     int copy_chunk_factor)
00169 {
00170 #ifdef DEBUG_HEAVY_FILE_OPS
00171 //  FUNCDEF("write_file_chunk");
00172 #endif
00173   if (byte_start < 0) return BAD_INPUT;
00174 
00175   filename targ_name(target);
00176   if (!directory::recursive_create(targ_name.dirname().raw()))
00177     return TARGET_DIR_ERROR;
00178 
00179   if (!targ_name.exists()) {
00180     huge_file target_file(target, "w");
00181   }
00182 
00183   huge_file target_file(target, "r+b");
00184     // open the file for updating (either read or write).
00185   if (!target_file.good()) return TARGET_ACCESS_ERROR;
00186   double curr_len = target_file.length();
00187 
00188   if (curr_len < byte_start) {
00189     byte_array new_chunk;
00190     while (curr_len < byte_start) {
00191       target_file.seek(0, byte_filer::FROM_END);  // go to the end of the file.
00192       new_chunk.reset(minimum(copy_chunk_factor,
00193           int(curr_len - byte_start + 1)));
00194       int written;
00195       outcome ret = target_file.write(new_chunk, written);
00196       if (written < new_chunk.length()) return TARGET_ACCESS_ERROR;
00197       curr_len = target_file.length();
00198     }
00199   }
00200   target_file.seek(byte_start, byte_filer::FROM_START);
00201     // jump to the proper location in the file.
00202   int wrote;
00203   outcome ret = target_file.write(chunk, wrote);
00204   if (wrote != chunk.length()) return TARGET_ACCESS_ERROR;
00205   if (truncate) {
00206     target_file.truncate();
00207   }
00208   return OKAY;
00209 }
00210 
00211 bool heavy_file_operations::advance(const filename_list &to_transfer,
00212     file_transfer_header &last_action)
00213 {
00214 #ifdef DEBUG_HEAVY_FILE_OPS
00215   FUNCDEF("advance");
00216 #endif
00217   int indy = to_transfer.locate(last_action._filename);
00218   if (negative(indy)) return false;  // error.
00219   if (indy == to_transfer.elements() - 1) return false;  // done.
00220   const file_info *currfile = to_transfer.get(indy + 1);
00221   last_action._filename = currfile->raw();
00222   last_action._time = currfile->_time;
00223 
00224 #ifdef DEBUG_HEAVY_FILE_OPS
00225   if (currfile->_time == file_time(time_t(0)))
00226     LOG(astring("failed for ") + currfile->raw() + " -- has zero file time");
00227 #endif
00228 
00229   last_action._byte_start = 0;
00230   last_action._length = 0;
00231   return true;
00232 }
00233 
00234 outcome heavy_file_operations::buffer_files(const astring &source_root,
00235     const filename_list &to_transfer, file_transfer_header &last_action,
00236     byte_array &storage, int maximum_bytes)
00237 {
00238 #ifdef DEBUG_HEAVY_FILE_OPS
00239 //  FUNCDEF("buffer_files");
00240 #endif
00241   storage.reset();  // clear out the current contents.
00242 
00243   if (!to_transfer.elements()) {
00244     // we seem to be done.
00245     return OKAY;
00246   }
00247 
00248   outcome to_return = OKAY;
00249 
00250   // start filling the array with bytes from the files.
00251   while (storage.length() < maximum_bytes) {
00252     double remaining_in_array = maximum_bytes - storage.length()
00253         - last_action.packed_size();
00254     if (remaining_in_array < 128) {
00255       // ensure that we at least have a reasonable amount of space left
00256       // for storing into the array.
00257       break;
00258     }
00259 
00260     // find the current file we're at, as provided in record.
00261     if (!last_action._filename) {
00262       // no filename yet.  assume this is the first thing we've done.
00263       const file_info *currfile = to_transfer.get(0);
00264       last_action._filename = currfile->raw();
00265       last_action._time = currfile->_time;
00266       last_action._byte_start = 0;
00267       last_action._length = 0;
00268     }
00269 
00270     const file_info *found = to_transfer.find(last_action._filename);
00271     if (!found) {
00272       // they have referenced a file that we don't have.  that's bad news.
00273       return BAD_INPUT;
00274     }
00275 
00276     astring full_file = source_root + "/" + last_action._filename;
00277     huge_file current(full_file, "rb");
00278     if (!current.good()) {
00279       // we need to skip this file.
00280       if (!advance(to_transfer, last_action)) break;
00281       continue;
00282     }
00283 
00284     if ((last_action._byte_start + last_action._length >= current.length())
00285         && current.length()) {
00286       // this file is done now.  go to the next one.
00287       if (!advance(to_transfer, last_action)) break;
00288       continue;
00289     }
00290 
00291     // calculate the largest piece remaining of that file that will fit in the
00292     // allotted space.
00293     double new_start = last_action._byte_start + last_action._length;
00294     double remaining_in_file = current.length() - new_start;
00295     if (remaining_in_file < 0) remaining_in_file = 0;
00296     double new_len = minimum(remaining_in_file, remaining_in_array);
00297     
00298     // pack this new piece of the file.
00299     current.seek(new_start, byte_filer::FROM_START);
00300     byte_array new_chunk;
00301     int bytes_read = 0;
00302     outcome ret = current.read(new_chunk, int(new_len), bytes_read);
00303     if (bytes_read != new_len) {
00304       if (!bytes_read) {
00305         // some kind of problem reading the file.
00306         if (!advance(to_transfer, last_action)) break;
00307         continue;
00308       }
00309 //why would this happen?  just complain, i guess.
00310     }
00311 
00312     // update the record since it seems we're successful here.
00313     last_action._byte_start = new_start;
00314     last_action._length = int(new_len);
00315 
00316     // add in this next new chunk of file.
00317     last_action.pack(storage);  // add the header.
00318     storage += new_chunk;  // add the new stuff.
00319 
00320     if (!current.length()) {
00321       // ensure we don't get stuck redoing zero length files, which we allowed
00322       // to go past their end above (since otherwise we'd never see them).
00323       if (!advance(to_transfer, last_action)) break;
00324       continue;
00325     }
00326     
00327     // just keep going, if there's space...
00328   }
00329 
00330   return to_return;
00331 }
00332 
00333 } //namespace.
00334 
Generated on Sat Jan 28 04:22:22 2012 for hoople2 project by  doxygen 1.6.3