marks_sorter.cpp

Go to the documentation of this file.
00001 /*****************************************************************************\
00002 *                                                                             *
00003 *  Name   : marks_sorter                                                      *
00004 *  Author : Chris Koeritz                                                     *
00005 *                                                                             *
00006 *  Purpose:                                                                   *
00007 *                                                                             *
00008 *    Processes a link database in HOOPLE format and generates a new database  *
00009 *  that is sorted and always uses category nicknames where defined.           *
00010 *                                                                             *
00011 *******************************************************************************
00012 * Copyright (c) 2006-$now By Author.  This program is free software; you can  *
00013 * redistribute it and/or modify it under the terms of the GNU General Public  *
00014 * License as published by the Free Software Foundation; either version 2 of   *
00015 * the License or (at your option) any later version.  This is online at:      *
00016 *     http://www.fsf.org/copyleft/gpl.html                                    *
00017 * Please send any updates to: fred@gruntose.com                               *
00018 \*****************************************************************************/
00019 
00020 #include "bookmark_tree.cpp"
00021 
00022 #include <basis/function.h>
00023 #include <basis/guards.h>
00024 #include <basis/istring.h>
00025 #include <opsystem/application_shell.h>
00026 #include <opsystem/byte_filer.h>
00027 #include <opsystem/command_line.h>
00028 #include <loggers/file_logger.h>
00029 #include <opsystem/filename.h>
00030 #include <data_struct/static_memory_gremlin.h>
00031 #include <textual/list_parsing.h>
00032 
00033 using namespace nodes;
00034 
00035 //#define DEBUG_MARKS
00036   // uncomment to have more debugging noise.
00037 
00038 #undef BASE_LOG
00039 #define BASE_LOG(s) program_wide_logger().log(s)
00040 #undef LOG
00041 #define LOG(s) CLASS_EMERGENCY_LOG(program_wide_logger(), \
00042    isprintf("line %d: ", _categories._line_number) + s)
00043 
00044 const int MAX_FILE_SIZE = 4 * MEGABYTE;
00045   // the largest file we'll read.
00046 
00048 
00049 class marks_sorter : public application_shell
00050 {
00051 public:
00052   marks_sorter()
00053       : application_shell(static_class_name()), _loader_count(0),
00054         _link_spool(0) {}
00055   IMPLEMENT_CLASS_NAME("marks_sorter");
00056   virtual int execute();
00057   int print_instructions(const filename &program_name);
00058 
00059   int write_new_marks(const istring &output_filename);
00060     // given a tree of links, this writes out a new sorted file to the
00061     // "output_filename".
00062 
00063 private:
00064   bookmark_tree _categories;  // our tree of categories.
00065   int _loader_count;  // count of the loader functions.
00066   int _link_spool;  // count of which link we're writing.
00067 };
00068 
00070 
00071 int marks_sorter::print_instructions(const filename &program_name)
00072 {
00073   isprintf to_show("%s:\n\
00074 This program needs two filenames as command-line parameters.  The -i flag\n\
00075 is used to specify the input filename, which is expected to be in the HOOPLE\n\
00076 link database format.  The -o flag specifies the new bookmarks file to be\n\
00077 created, which will also be in the HOOPLE link format.\n\
00078 The HOOPLE link format is documented here:\n\
00079     http://hoople.org/guides/link_database/format_manifesto.txt\n\
00080 ", program_name.basename().raw().s(), program_name.basename().raw().s());
00081   program_wide_logger().log(to_show.s());
00082   return 12;
00083 }
00084 
00085 int marks_sorter::execute()
00086 {
00087   FUNCDEF("execute");
00088   SET_DEFAULT_COMBO_LOGGER;
00089 
00090   command_line cmds(__argc, __argv);  // process the command line parameters.
00091   istring input_filename;  // we'll store our link database name here.
00092   istring output_filename;  // where the web page we're creating goes.
00093   if (!cmds.get_value('i', input_filename, false))
00094     return print_instructions(cmds.program_name());
00095   if (!cmds.get_value('o', output_filename, false))
00096     return print_instructions(cmds.program_name());
00097 
00098   BASE_LOG(istring("input file: ") + input_filename);
00099   BASE_LOG(istring("output file: ") + output_filename);
00100 
00101   filename outname(output_filename);
00102   if (outname.exists()) {
00103     non_continuable_error(class_name(), func, istring("the output file ")
00104         + output_filename + " already exists.  It would be over-written if "
00105         "we continued.");
00106   }
00107 
00108   int ret = _categories.read_csv_file(input_filename);
00109   if (ret) return ret;
00110 
00111   ret = write_new_marks(output_filename);
00112   if (ret) return ret;
00113   
00114   return 0;
00115 }
00116 
00117 int marks_sorter::write_new_marks(const istring &output_filename)
00118 {
00119   FUNCDEF("write_new_marks");
00120   // open the output file for streaming out the new marks file.
00121   filename outname(output_filename);
00122   byte_filer output_file(output_filename, "w");
00123   if (!output_file.good())
00124     non_continuable_error(class_name(), func, "the output file could not be opened");
00125 
00126   bool just_had_return = false;  // did we just see a carriage return?
00127   bool first_line = true;  // is this the first line to be emitted?
00128 
00129   // traverse the tree in prefix order.
00130   tree::iterator itty = _categories.access_root().start(tree::prefix);
00131   tree *curr = NIL;  // the current node.
00132 
00133   while ( (curr = itty.next()) ) {
00134     inner_mark_tree *nod = (inner_mark_tree *)curr;
00135     // set up a category printout for this node.
00136     string_array cat_list;
00137     cat_list += "C";
00138     cat_list += nod->name();
00139     inner_mark_tree *pare = (inner_mark_tree *)nod->parent();
00140     if (pare) {
00141       istring name_split, nick_split;
00142       _categories.break_name(pare->name(), name_split, nick_split);
00143       if (!nick_split) cat_list += name_split;
00144       else cat_list += nick_split;
00145     } else {
00146       cat_list += "";
00147     }
00148 
00149     // create a text line to send to the output file.
00150     istring tmp;
00151     list_parsing::create_csv_line(cat_list, tmp);
00152     tmp += "\n";
00153     if (!just_had_return && !first_line) {
00154       // generate a blank line before the category name.
00155       output_file.write(log_base::platform_ending());
00156     }
00157 
00158     // reset the flags after we've checked them.
00159     just_had_return = false;
00160     first_line = false;
00161 
00162     output_file.write(tmp);
00163       // write the actual category definition.
00164 
00165     // print the link for all of the ones stored at this node.
00166     for (int i = 0; i < nod->_links.elements(); i++) {
00167       link_record *lin = nod->_links.borrow(i);
00168       if (!lin->_url) {
00169         // just a comment.
00170         istring descrip = lin->_description;
00171         if (descrip.contains("http:")) {
00172           // we'll clean the html formatting out that we added earlier.
00173           int indy = descrip.find('"');
00174           if (non_negative(indy)) {
00175             descrip.zap(0, indy);
00176             indy = descrip.find('"');
00177             if (non_negative(indy)) descrip.zap(indy, descrip.end());
00178           }
00179           descrip = istring("    ") + descrip;
00180              // add a little spacing.
00181         }
00182         if (descrip.t()) {
00183           output_file.write(istring("#") + descrip + "\n");
00184           just_had_return = false;
00185         } else {
00186           // this line's totally blank, so we'll generate a blank line.
00187           // we don't want to put in more than one blank though, so we check
00188           // whether we did this recently.
00189           if (!just_had_return) {
00190             output_file.write(log_base::platform_ending());
00191             just_had_return = true;  // set our flag for a carriage return.
00192           }
00193         }
00194       } else {
00195         // should be a real link.
00196         string_array lnks;
00197         lnks += "L";
00198         lnks += lin->_description;
00199         // use just the nickname for the parent, if there is a nick.
00200         istring name_split;
00201         istring nick_split;
00202         _categories.break_name(nod->name(), name_split, nick_split);
00203         if (!nick_split) lnks += nod->name();
00204         else lnks += nick_split;
00205         lnks += lin->_url;
00206         list_parsing::create_csv_line(lnks, tmp);
00207         tmp += "\n";
00208         output_file.write(tmp);
00209         just_had_return = false;
00210       }
00211     }
00212   }
00213 
00214   output_file.close();
00215 
00216   BASE_LOG(isprintf("wrote %d links in %d categories.",
00217       _categories.link_count(), _categories.category_count()));
00218   BASE_LOG("");
00219 
00220   return 0;
00221 }
00222 
00224 
00225 HOOPLE_MAIN(marks_sorter, )
00226 
00227 #ifdef __BUILD_STATIC_APPLICATION__
00228   // static dependencies found by buildor_gen_deps.sh:
00229   #include <basis/array.cpp>
00230   #include <basis/byte_array.cpp>
00231   #include <basis/callstack_tracker.cpp>
00232   #include <basis/chaos.cpp>
00233   #include <basis/convert_utf.cpp>
00234   #include <basis/definitions.cpp>
00235   #include <basis/earth_time.cpp>
00236   #include <basis/guards.cpp>
00237   #include <basis/istring.cpp>
00238   #include <basis/log_base.cpp>
00239   #include <basis/memory_checker.cpp>
00240   #include <basis/mutex.cpp>
00241   #include <basis/object_base.cpp>
00242   #include <basis/outcome.cpp>
00243   #include <basis/packable.cpp>
00244   #include <basis/portable.cpp>
00245   #include <basis/sequence.cpp>
00246   #include <basis/set.cpp>
00247   #include <basis/utility.cpp>
00248   #include <basis/version_record.cpp>
00249   #include <data_struct/amorph.cpp>
00250   #include <data_struct/bit_vector.cpp>
00251   #include <data_struct/byte_hasher.cpp>
00252   #include <data_struct/configurator.cpp>
00253   #include <data_struct/hash_table.cpp>
00254   #include <data_struct/pointer_hash.cpp>
00255   #include <data_struct/stack.cpp>
00256   #include <data_struct/static_memory_gremlin.cpp>
00257   #include <data_struct/string_hash.cpp>
00258   #include <data_struct/string_hasher.cpp>
00259   #include <data_struct/string_table.cpp>
00260   #include <data_struct/symbol_table.cpp>
00261   #include <data_struct/table_configurator.cpp>
00262   #include <loggers/console_logger.cpp>
00263   #include <loggers/file_logger.cpp>
00264   #include <loggers/locked_logger.cpp>
00265   #include <loggers/null_logger.cpp>
00266   #include <loggers/program_wide_logger.cpp>
00267   #include <nodes/node.cpp>
00268   #include <nodes/path.cpp>
00269   #include <nodes/symbol_tree.cpp>
00270   #include <nodes/tree.cpp>
00271   #include <opsystem/application_base.cpp>
00272   #include <opsystem/application_shell.cpp>
00273   #include <opsystem/byte_filer.cpp>
00274   #include <opsystem/command_line.cpp>
00275   #include <opsystem/critical_events.cpp>
00276   #include <opsystem/directory.cpp>
00277   #include <opsystem/filename.cpp>
00278   #include <opsystem/ini_config.cpp>
00279   #include <opsystem/ini_parser.cpp>
00280   #include <opsystem/path_configuration.cpp>
00281   #include <opsystem/rendezvous.cpp>
00282   #include <textual/byte_format.cpp>
00283   #include <textual/list_parsing.cpp>
00284   #include <textual/parser_bits.cpp>
00285   #include <textual/string_manipulation.cpp>
00286   #include <textual/tokenizer.cpp>
00287 #endif // __BUILD_STATIC_APPLICATION__
00288 

Generated on Fri Nov 28 04:28:49 2008 for HOOPLE Libraries by  doxygen 1.5.1