/*****************************************************************************\
*                                                                             *
*  Name   : unix2dos                                                          *
*  Author : Chris Koeritz                                                     *
*                                                                             *
*  Purpose:                                                                   *
*                                                                             *
*    Takes text as input and replaces any line endings with the DOS EOL.      *
*                                                                             *
*******************************************************************************
* Copyright (c) 2007-$now By Author.  This program is free software; you can  *
* redistribute it and/or modify it under the terms of the GNU General Public  *
* License as published by the Free Software Foundation; either version 2 of   *
* the License or (at your option) any later version.  This is online at:      *
*     http://www.fsf.org/copyleft/gpl.html                                    *
* Please send any updates to: fred@gruntose.com                               *
\*****************************************************************************/

#include <basis/istring.h>
#include <basis/set.cpp>
#include <opsystem/application_shell.h>
#include <opsystem/byte_filer.h>
#include <opsystem/command_line.h>
#include <opsystem/filename.h>
#include <data_struct/static_memory_gremlin.h>
#include <textual/parser_bits.h>

#include <stdio.h>

const int MAX_BUFFER = 4096;  // the largest chunk of text we handle at once.

////////////////////////////////////////////////////////////////////////////

class unix2dos_app : public application_shell
{
public:
  unix2dos_app() : application_shell(static_class_name()) {}

  IMPLEMENT_CLASS_NAME("unix2dos");

  virtual int execute();

  int print_instructions();

  void read_chunk(byte_filer &f, istring &buffer, istring &pushed_out);
    // reads from the file "f" and stores in the primary "buffer".  if there
    // are unacceptable contents (meaning that the buffer ended in \r), then
    // these get added to "pushed_out" rather than "buffer".

  void chew_input(istring &to_chew);
    // consumes the string specified and replaces line feed characters with
    // CRLF.  this takes into account when the combo is already present.  it's
    // an error for this string to end with \r, since that would be the
    // beginning of a multiple character sequence.

  void write_chunk(istring &buffer, istring &pushed_out);
    // throws the current "buffer" to standard output and replaces it with
    // the "pushed_out" contents.  "pushed_out" is cleared.
};

////////////////////////////////////////////////////////////////////////////

int unix2dos_app::print_instructions()
{
  istring name = filename(__argv[0]).basename().raw();
  log(isprintf("%s usage:", name.s()));
  log("");
  log("\
This program consumes the Unix line ending (a single Line Feed) and replaces\n\
it with the DOS CRLF (Carriage Return / Line Feed) ending.  In hex, this\n\
means that characters with value 0a will be replaced with 0d0a.  As a\n\
convenience and to make this operation idempotent, single line feeds will\n\
still be replaced with single line feeds.  Malformed line feed sequences\n\
(such as 0d0d0a) will be replaced with a single line feed also.\n\
Any filenames on the command line are processed and sent to standard output.\n\
The following options are available:\n\
   --help or -?\tShow this help information.\n\
");
  return -3;
}

void unix2dos_app::chew_input(istring &to_chew)
{
  if (to_chew[to_chew.end()] == '\r') {
    log("error-- a string ending in \\r has been passed for consumption.");
  }

  // strategy...
  //   collapse R*N into RN
  //   translate N into RN
  // key: R = \r and N = \n.

  bool saw_lf = false;
  // iterate backwards through the chunk of text we were given.
  for (int i = to_chew.end(); i >= 0; i--) {
    // if this is not an eol character, then it is deemed boring.
    if (!parser_bits::is_eol(to_chew[i])) {
      if (saw_lf) {
        // insert our \r now, since we had already seen an LF.
        to_chew.insert(i + 1, "\r");
      }
      saw_lf = false;
      continue;
    }
    // here, we know we have either a line feed or a carriage return.
    if (to_chew[i] == '\n') {
      if (saw_lf) {
        // insert our \r now, since we had already seen an LF.
        to_chew.insert(i + 1, "\r");
      }
      saw_lf = true;  // we saw a line feed--clean out CRs we see before it.
    } else {
      // this has to be a CR, unless the definition of eol changed.  remove it.
      to_chew.zap(i, i);
      if (!saw_lf) saw_lf = true;  // we were given bad data; missing LF.
    }
  }
}

void unix2dos_app::read_chunk(byte_filer &f, istring &buffer,
    istring &pushed_out)
{
  f.read(buffer, MAX_BUFFER);
  // we do not allow our consume method to see a lonely carriage return;
  // we will make sure that's not how the buffer ends, if we can.
  while (buffer[buffer.end()] == '\r') {
    pushed_out += '\r';
    buffer.zap(buffer.end(), buffer.end());
  }
  if (!buffer.length()) {
    // the crazy thing was empty, or it was all CRs!
    buffer = pushed_out;
      // at this point, we'll just go with the backup buffer, which might
      // also be empty.  but there's not much to lose even if so.
    pushed_out.reset();
  }
}

void unix2dos_app::write_chunk(istring &buffer, istring &pushed_out)
{
  if (buffer.length())
    printf("%s", buffer.s());
  buffer = pushed_out;
  pushed_out.reset();
}

int unix2dos_app::execute()
{
  command_line cmds(__argc, __argv);  // parse the command line up.

  // look for help commands.
  int junk_index = 0;
  if (cmds.find("help", junk_index, false)
      || cmds.find('h', junk_index, false)
      || cmds.find("?", junk_index, false)
      || cmds.find('?', junk_index, false) ) {
    print_instructions();
    return 0;
  }

  // gather extra input files.
  string_set input_files;
  for (int i = 0; i < cmds.entries(); i++) {
    const command_parameter &curr = cmds.get(i);
    if (curr.type() == command_parameter::VALUE) {
//log(istring("adding input file:") + curr.text());
      input_files += curr.text();
    }
  }

  istring accumulator;  // we will fill this up with data from the file.
  istring pushed;  // any stuff we decided to postpone will be dropped here.

  // iterate across the files and process each of them chunkwise.
  for (int q = 0; q < input_files.length(); q++) {
    byte_filer current(input_files[q], "rb");
    if (!current.good()) continue;
    while (!current.eof()) {
      read_chunk(current, accumulator, pushed);
      chew_input(accumulator);
      write_chunk(accumulator, pushed);
    }
  }

  // now get from standard input if there weren't any files specified.
  if (!input_files.length()) {
    byte_filer s_in(false, stdin);
    while (!s_in.eof()) {
      read_chunk(s_in, accumulator, pushed);
      chew_input(accumulator);
      write_chunk(accumulator, pushed);
    }
  }

  return 0;
}

////////////////////////////////////////////////////////////////////////////

HOOPLE_MAIN(unix2dos_app, )

#ifdef __BUILD_STATIC_APPLICATION__
  // static dependencies found by buildor_gen_deps.sh:
  #include <basis/array.cpp>
  #include <basis/byte_array.cpp>
  #include <basis/callstack_tracker.cpp>
  #include <basis/chaos.cpp>
  #include <basis/convert_utf.cpp>
  #include <basis/definitions.cpp>
  #include <basis/earth_time.cpp>
  #include <basis/guards.cpp>
  #include <basis/istring.cpp>
  #include <basis/log_base.cpp>
  #include <basis/memory_checker.cpp>
  #include <basis/mutex.cpp>
  #include <basis/object_base.cpp>
  #include <basis/outcome.cpp>
  #include <basis/packable.cpp>
  #include <basis/portable.cpp>
  #include <basis/sequence.cpp>
  #include <basis/set.cpp>
  #include <basis/utility.cpp>
  #include <basis/version_record.cpp>
  #include <data_struct/amorph.cpp>
  #include <data_struct/bit_vector.cpp>
  #include <data_struct/byte_hasher.cpp>
  #include <data_struct/configurator.cpp>
  #include <data_struct/hash_table.cpp>
  #include <data_struct/pointer_hash.cpp>
  #include <data_struct/stack.cpp>
  #include <data_struct/static_memory_gremlin.cpp>
  #include <data_struct/string_hash.cpp>
  #include <data_struct/string_hasher.cpp>
  #include <data_struct/string_table.cpp>
  #include <data_struct/symbol_table.cpp>
  #include <data_struct/table_configurator.cpp>
  #include <loggers/console_logger.cpp>
  #include <loggers/file_logger.cpp>
  #include <loggers/locked_logger.cpp>
  #include <loggers/null_logger.cpp>
  #include <loggers/program_wide_logger.cpp>
  #include <opsystem/application_base.cpp>
  #include <opsystem/application_shell.cpp>
  #include <opsystem/byte_filer.cpp>
  #include <opsystem/command_line.cpp>
  #include <opsystem/critical_events.cpp>
  #include <opsystem/directory.cpp>
  #include <opsystem/filename.cpp>
  #include <opsystem/ini_config.cpp>
  #include <opsystem/ini_parser.cpp>
  #include <opsystem/path_configuration.cpp>
  #include <opsystem/rendezvous.cpp>
  #include <textual/byte_format.cpp>
  #include <textual/parser_bits.cpp>
  #include <textual/string_manipulation.cpp>
  #include <textual/tokenizer.cpp>
#endif // __BUILD_STATIC_APPLICATION__

