file_info.cpp

Go to the documentation of this file.
00001 #ifndef FILE_INFO_IMPLEMENTATION_FILE
00002 #define FILE_INFO_IMPLEMENTATION_FILE
00003 
00004 /*****************************************************************************\
00005 *                                                                             *
00006 *  Name   : file_info                                                         *
00007 *  Author : Chris Koeritz                                                     *
00008 *                                                                             *
00009 *******************************************************************************
00010 * Copyright (c) 1993-$now By Author.  This program is free software; you can  *
00011 * redistribute it and/or modify it under the terms of the GNU General Public  *
00012 * License as published by the Free Software Foundation; either version 2 of   *
00013 * the License or (at your option) any later version.  This is online at:      *
00014 *     http://www.fsf.org/copyleft/gpl.html                                    *
00015 * Please send any updates to: fred@gruntose.com                               *
00016 \*****************************************************************************/
00017 
00018 #include "file_info.h"
00019 
00020 #include <basis/byte_array.h>
00021 #include <basis/function.h>
00022 #include <basis/istring.h>
00023 #include <basis/log_base.h>
00024 #include <opsystem/huge_file.h>
00025 
00026 #define DEBUG_FILE_INFO
00027   // uncomment for noisy version.
00028 
00029 #undef LOG
00030 #define LOG(s) CLASS_EMERGENCY_LOG(program_wide_logger(), s)
00031 
00032 file_info::file_info(const filename &to_copy, double file_size, int checksum)
00033 : filename(to_copy),
00034   _file_size(file_size),
00035   _checksum(checksum),
00036   _secondary(new istring),
00037   _attachment(new byte_array)
00038 {}
00039 
00040 file_info::file_info(const file_info &to_copy)
00041 : filename(to_copy),
00042   _file_size(to_copy._file_size),
00043   _checksum(to_copy._checksum),
00044   _secondary(new istring(*to_copy._secondary)),
00045   _attachment(new byte_array(*to_copy._attachment))
00046 {
00047 }
00048 
00049 file_info::~file_info()
00050 {
00051   WHACK(_secondary);
00052   WHACK(_attachment);
00053 }
00054 
00055 const byte_array &file_info::attachment() const { return *_attachment; }
00056 
00057 void file_info::attachment(const byte_array &new_attachment)
00058 { *_attachment = new_attachment; }
00059 
00060 const istring &file_info::secondary() const { return *_secondary; }
00061 
00062 void file_info::secondary(const istring &new_sec) const
00063 { *_secondary = new_sec; }
00064 
00065 istring file_info::text_form() const
00066 {
00067   istring to_return = raw()
00068       + isprintf(", size=%0.f, chksum=%d", _file_size, _checksum);
00069   if (_secondary->t())
00070     to_return += istring(", 2ndary=") + *_secondary;
00071   return to_return;
00072 }
00073 
00074 bool file_info::calculate(const istring &prefix, bool just_size,
00075     int checksum_edge)
00076 {
00077   FUNCDEF("calculate");
00078   filename full;
00079   if (prefix.t()) full = prefix + "/" + *this;
00080   else full = *this;
00081   if (!full.exists()) {
00082 #ifdef DEBUG_FILE_INFO
00083     LOG(istring("failed to find file: ") + full.raw());
00084 #endif
00085     return false;
00086   }
00087   // open the file for reading.
00088   huge_file to_read(full.raw(), "rb");
00089   if (!to_read.good()) {
00090 #ifdef DEBUG_FILE_INFO
00091     LOG(istring("file has non-good status: ") + full.raw());
00092 #endif
00093     return false;  // why did that happen?
00094   }
00095   // set the size appropriately.
00096   _file_size = to_read.length();
00097   if (just_size)
00098     return true;  // done for that case.
00099 
00100   // now read the file and compute a checksum.
00101   uint16 curr_sum = 0;  // the current checksum being computed.
00102   byte_array chunk;  // temporary chunk of data from file.
00103 
00104 //hmmm: make this optimization (hack) optional!
00105 
00106   // this algorithm takes a chunk on each end of the file for checksums.
00107   // this saves us from reading a huge amount of data, although it will be
00108   // fooled if a huge binary file is changed only in the middle and has the
00109   // same size as before.  for most purposes, this is not a problem, although
00110   // databases that are fixed size might fool us.  if records are written in
00111   // the middle without updating the head or tail sections, then we're hosed.
00112 
00113   bool skip_tail = false;  // true if we don't need the tail piece.
00114   double head_start = 0, head_end = 0, tail_start = 0,
00115       tail_end = _file_size - 1;
00116   if (_file_size <= double(2 * checksum_edge)) {
00117     // we're applying a rule for when the file is too small compared to
00118     // the chunk factor doubled; we'll just read the whole file.
00119     head_end = _file_size - 1;
00120     skip_tail = true;
00121   } else {
00122     // here we compute the ending of the head piece and the beginning of
00123     // the tail piece.  each will be about checksum_edge in size.
00124     head_end = minimum(_file_size / 2, double(checksum_edge)) - 1;
00125     tail_start = _file_size - minimum(_file_size / 2, double(checksum_edge));
00126   }
00127 
00128   // read the head end of the file.
00129   int size_read = 0;
00130   outcome ret = to_read.read(chunk, int(head_end - head_start + 1), size_read);
00131   if (ret != huge_file::OKAY) {
00132 #ifdef DEBUG_FILE_INFO
00133     LOG(istring("reading file failed: ") + full.raw());
00134 #endif
00135     return false;  // failed to read.
00136   }
00137   curr_sum = utility::rolling_fletcher_checksum(curr_sum, chunk.observe(),
00138       chunk.length());
00139 
00140   // read the tail end of the file.
00141   if (!skip_tail) {
00142     to_read.seek(tail_start, byte_filer::FROM_START);
00143     ret = to_read.read(chunk, int(tail_end - tail_start + 1), size_read);
00144     if (ret != huge_file::OKAY) {
00145 #ifdef DEBUG_FILE_INFO
00146       LOG(istring("reading tail of file failed: ") + full.raw());
00147 #endif
00148       return false;  // failed to read.
00149     }
00150     curr_sum = utility::rolling_fletcher_checksum(curr_sum, chunk.observe(),
00151         chunk.length());
00152   }
00153 
00154   _checksum = curr_sum;
00155   return true;
00156 }
00157 
00158 void file_info::pack(byte_array &packed_form) const
00159 {
00160   filename::pack(packed_form);
00161   basis::attach(packed_form, _file_size);
00162   basis::attach(packed_form, _checksum);
00163   _secondary->pack(packed_form);
00164   basis::attach(packed_form, *_attachment);
00165 }
00166 
00167 bool file_info::unpack(byte_array &packed_form)
00168 {
00169   if (!filename::unpack(packed_form))
00170     return false;
00171   if (!basis::detach(packed_form, _file_size))
00172     return false;
00173   if (!basis::detach(packed_form, _checksum))
00174     return false;
00175   if (!_secondary->unpack(packed_form))
00176     return false;
00177   if (!basis::detach(packed_form, *_attachment))
00178     return false;
00179   return true;
00180 }
00181 
00182 file_info &file_info::operator = (const file_info &to_copy)
00183 {
00184   if (this == &to_copy)
00185     return *this;
00186   (filename &)(*this) = (filename &)to_copy;
00187   *_attachment = *to_copy._attachment;
00188   _file_size = to_copy._file_size;
00189   *_secondary = *to_copy._secondary;
00190   _checksum = to_copy._checksum;
00191   return *this;
00192 }
00193 
00194 
00195 #endif //FILE_INFO_IMPLEMENTATION_FILE
00196 

Generated on Thu Nov 20 04:29:03 2008 for HOOPLE Libraries by  doxygen 1.5.1