unix2dos.cpp

Go to the documentation of this file.
00001 /*****************************************************************************\
00002 *                                                                             *
00003 *  Name   : unix2dos                                                          *
00004 *  Author : Chris Koeritz                                                     *
00005 *                                                                             *
00006 *  Purpose:                                                                   *
00007 *                                                                             *
00008 *    Takes text as input and replaces any line endings with the DOS EOL.      *
00009 *                                                                             *
00010 *******************************************************************************
00011 * Copyright (c) 2007-$now By Author.  This program is free software; you can  *
00012 * redistribute it and/or modify it under the terms of the GNU General Public  *
00013 * License as published by the Free Software Foundation; either version 2 of   *
00014 * the License or (at your option) any later version.  This is online at:      *
00015 *     http://www.fsf.org/copyleft/gpl.html                                    *
00016 * Please send any updates to: fred@gruntose.com                               *
00017 \*****************************************************************************/
00018 
00019 #include <basis/istring.h>
00020 #include <basis/set.cpp>
00021 #include <opsystem/application_shell.h>
00022 #include <opsystem/byte_filer.h>
00023 #include <opsystem/command_line.h>
00024 #include <opsystem/filename.h>
00025 #include <data_struct/static_memory_gremlin.h>
00026 #include <textual/parser_bits.h>
00027 
00028 #include <stdio.h>
00029 
00030 const int MAX_BUFFER = 4096;  // the largest chunk of text we handle at once.
00031 
00033 
00034 class unix2dos_app : public application_shell
00035 {
00036 public:
00037   unix2dos_app() : application_shell(static_class_name()) {}
00038 
00039   IMPLEMENT_CLASS_NAME("unix2dos");
00040 
00041   virtual int execute();
00042 
00043   int print_instructions();
00044 
00045   void read_chunk(byte_filer &f, istring &buffer, istring &pushed_out);
00046     // reads from the file "f" and stores in the primary "buffer".  if there
00047     // are unacceptable contents (meaning that the buffer ended in \r), then
00048     // these get added to "pushed_out" rather than "buffer".
00049 
00050   void chew_input(istring &to_chew);
00051     // consumes the string specified and replaces line feed characters with
00052     // CRLF.  this takes into account when the combo is already present.  it's
00053     // an error for this string to end with \r, since that would be the
00054     // beginning of a multiple character sequence.
00055 
00056   void write_chunk(istring &buffer, istring &pushed_out);
00057     // throws the current "buffer" to standard output and replaces it with
00058     // the "pushed_out" contents.  "pushed_out" is cleared.
00059 };
00060 
00062 
00063 int unix2dos_app::print_instructions()
00064 {
00065   istring name = filename(__argv[0]).basename().raw();
00066   log(isprintf("%s usage:", name.s()));
00067   log("");
00068   log("\
00069 This program consumes the Unix line ending (a single Line Feed) and replaces\n\
00070 it with the DOS CRLF (Carriage Return / Line Feed) ending.  In hex, this\n\
00071 means that characters with value 0a will be replaced with 0d0a.  As a\n\
00072 convenience and to make this operation idempotent, single line feeds will\n\
00073 still be replaced with single line feeds.  Malformed line feed sequences\n\
00074 (such as 0d0d0a) will be replaced with a single line feed also.\n\
00075 Any filenames on the command line are processed and sent to standard output.\n\
00076 The following options are available:\n\
00077    --help or -?\tShow this help information.\n\
00078 ");
00079   return -3;
00080 }
00081 
00082 void unix2dos_app::chew_input(istring &to_chew)
00083 {
00084   if (to_chew[to_chew.end()] == '\r') {
00085     log("error-- a string ending in \\r has been passed for consumption.");
00086   }
00087 
00088   // strategy...
00089   //   collapse R*N into RN
00090   //   translate N into RN
00091   // key: R = \r and N = \n.
00092 
00093   bool saw_lf = false;
00094   // iterate backwards through the chunk of text we were given.
00095   for (int i = to_chew.end(); i >= 0; i--) {
00096     // if this is not an eol character, then it is deemed boring.
00097     if (!parser_bits::is_eol(to_chew[i])) {
00098       if (saw_lf) {
00099         // insert our \r now, since we had already seen an LF.
00100         to_chew.insert(i + 1, "\r");
00101       }
00102       saw_lf = false;
00103       continue;
00104     }
00105     // here, we know we have either a line feed or a carriage return.
00106     if (to_chew[i] == '\n') {
00107       if (saw_lf) {
00108         // insert our \r now, since we had already seen an LF.
00109         to_chew.insert(i + 1, "\r");
00110       }
00111       saw_lf = true;  // we saw a line feed--clean out CRs we see before it.
00112     } else {
00113       // this has to be a CR, unless the definition of eol changed.  remove it.
00114       to_chew.zap(i, i);
00115       if (!saw_lf) saw_lf = true;  // we were given bad data; missing LF.
00116     }
00117   }
00118 }
00119 
00120 void unix2dos_app::read_chunk(byte_filer &f, istring &buffer,
00121     istring &pushed_out)
00122 {
00123   f.read(buffer, MAX_BUFFER);
00124   // we do not allow our consume method to see a lonely carriage return;
00125   // we will make sure that's not how the buffer ends, if we can.
00126   while (buffer[buffer.end()] == '\r') {
00127     pushed_out += '\r';
00128     buffer.zap(buffer.end(), buffer.end());
00129   }
00130   if (!buffer.length()) {
00131     // the crazy thing was empty, or it was all CRs!
00132     buffer = pushed_out;
00133       // at this point, we'll just go with the backup buffer, which might
00134       // also be empty.  but there's not much to lose even if so.
00135     pushed_out.reset();
00136   }
00137 }
00138 
00139 void unix2dos_app::write_chunk(istring &buffer, istring &pushed_out)
00140 {
00141   if (buffer.length())
00142     printf("%s", buffer.s());
00143   buffer = pushed_out;
00144   pushed_out.reset();
00145 }
00146 
00147 int unix2dos_app::execute()
00148 {
00149   command_line cmds(__argc, __argv);  // parse the command line up.
00150 
00151   // look for help commands.
00152   int junk_index = 0;
00153   if (cmds.find("help", junk_index, false)
00154       || cmds.find('h', junk_index, false)
00155       || cmds.find("?", junk_index, false)
00156       || cmds.find('?', junk_index, false) ) {
00157     print_instructions();
00158     return 0;
00159   }
00160 
00161   // gather extra input files.
00162   string_set input_files;
00163   for (int i = 0; i < cmds.entries(); i++) {
00164     const command_parameter &curr = cmds.get(i);
00165     if (curr.type() == command_parameter::VALUE) {
00166 //log(istring("adding input file:") + curr.text());
00167       input_files += curr.text();
00168     }
00169   }
00170 
00171   istring accumulator;  // we will fill this up with data from the file.
00172   istring pushed;  // any stuff we decided to postpone will be dropped here.
00173 
00174   // iterate across the files and process each of them chunkwise.
00175   for (int q = 0; q < input_files.length(); q++) {
00176     byte_filer current(input_files[q], "rb");
00177     if (!current.good()) continue;
00178     while (!current.eof()) {
00179       read_chunk(current, accumulator, pushed);
00180       chew_input(accumulator);
00181       write_chunk(accumulator, pushed);
00182     }
00183   }
00184 
00185   // now get from standard input if there weren't any files specified.
00186   if (!input_files.length()) {
00187     byte_filer s_in(false, stdin);
00188     while (!s_in.eof()) {
00189       read_chunk(s_in, accumulator, pushed);
00190       chew_input(accumulator);
00191       write_chunk(accumulator, pushed);
00192     }
00193   }
00194 
00195   return 0;
00196 }
00197 
00199 
00200 HOOPLE_MAIN(unix2dos_app, )
00201 
00202 #ifdef __BUILD_STATIC_APPLICATION__
00203   // static dependencies found by buildor_gen_deps.sh:
00204   #include <basis/array.cpp>
00205   #include <basis/byte_array.cpp>
00206   #include <basis/callstack_tracker.cpp>
00207   #include <basis/chaos.cpp>
00208   #include <basis/convert_utf.cpp>
00209   #include <basis/definitions.cpp>
00210   #include <basis/earth_time.cpp>
00211   #include <basis/guards.cpp>
00212   #include <basis/istring.cpp>
00213   #include <basis/log_base.cpp>
00214   #include <basis/memory_checker.cpp>
00215   #include <basis/mutex.cpp>
00216   #include <basis/object_base.cpp>
00217   #include <basis/outcome.cpp>
00218   #include <basis/packable.cpp>
00219   #include <basis/portable.cpp>
00220   #include <basis/sequence.cpp>
00221   #include <basis/set.cpp>
00222   #include <basis/utility.cpp>
00223   #include <basis/version_record.cpp>
00224   #include <data_struct/amorph.cpp>
00225   #include <data_struct/bit_vector.cpp>
00226   #include <data_struct/byte_hasher.cpp>
00227   #include <data_struct/configurator.cpp>
00228   #include <data_struct/hash_table.cpp>
00229   #include <data_struct/pointer_hash.cpp>
00230   #include <data_struct/stack.cpp>
00231   #include <data_struct/static_memory_gremlin.cpp>
00232   #include <data_struct/string_hash.cpp>
00233   #include <data_struct/string_hasher.cpp>
00234   #include <data_struct/string_table.cpp>
00235   #include <data_struct/symbol_table.cpp>
00236   #include <data_struct/table_configurator.cpp>
00237   #include <loggers/console_logger.cpp>
00238   #include <loggers/file_logger.cpp>
00239   #include <loggers/locked_logger.cpp>
00240   #include <loggers/null_logger.cpp>
00241   #include <loggers/program_wide_logger.cpp>
00242   #include <opsystem/application_base.cpp>
00243   #include <opsystem/application_shell.cpp>
00244   #include <opsystem/byte_filer.cpp>
00245   #include <opsystem/command_line.cpp>
00246   #include <opsystem/critical_events.cpp>
00247   #include <opsystem/directory.cpp>
00248   #include <opsystem/filename.cpp>
00249   #include <opsystem/ini_config.cpp>
00250   #include <opsystem/ini_parser.cpp>
00251   #include <opsystem/path_configuration.cpp>
00252   #include <opsystem/rendezvous.cpp>
00253   #include <textual/byte_format.cpp>
00254   #include <textual/parser_bits.cpp>
00255   #include <textual/string_manipulation.cpp>
00256   #include <textual/tokenizer.cpp>
00257 #endif // __BUILD_STATIC_APPLICATION__
00258 

Generated on Fri Nov 28 04:28:52 2008 for HOOPLE Libraries by  doxygen 1.5.1