value_tagger.cpp

Go to the documentation of this file.
00001 /*****************************************************************************\
00002 *                                                                             *
00003 *  Name   : value_tagger                                                      *
00004 *  Author : Chris Koeritz                                                     *
00005 *                                                                             *
00006 *  Purpose:                                                                   *
00007 *                                                                             *
00008 *    Scoots through the entire known code base and builds a list of all the   *
00009 *  outcome (and filter) values for that tree.  A manifest of the names is     *
00010 *  produced.  Most of the behavior is driven by the ini file whose name is    *
00011 *  passed on the command line.                                                *
00012 *    Note that the set of items that can be searched for can be specified     *
00013 *  in the ini file, although they must follow the format of:                  *
00014 *      pattern(name, value, description)                                      *
00015 *  where the "pattern" is the search term and the other three items specify   *
00016 *  the enumerated value to be marked.                                         *
00017 *                                                                             *
00018 *******************************************************************************
00019 * Copyright (c) 2005-$now By Author.  This program is free software; you can  *
00020 * redistribute it and/or modify it under the terms of the GNU General Public  *
00021 * License as published by the Free Software Foundation; either version 2 of   *
00022 * the License or (at your option) any later version.  This is online at:      *
00023 *     http://www.fsf.org/copyleft/gpl.html                                    *
00024 * Please send any updates to: fred@gruntose.com                               *
00025 \*****************************************************************************/
00026 
00027 #include <basis/convert_utf.h>
00028 #include <basis/function.h>
00029 #include <basis/portable.h>
00030 #include <basis/set.cpp>
00031 #include <basis/shell_sort.h>
00032 #include <basis/string_array.h>
00033 #include <data_struct/string_table.h>
00034 #include <opsystem/application_shell.h>
00035 #include <opsystem/byte_filer.h>
00036 #include <opsystem/directory_tree.h>
00037 #include <loggers/file_logger.h>
00038 #include <opsystem/filename.h>
00039 #include <opsystem/ini_config.h>
00040 #include <data_struct/static_memory_gremlin.h>
00041 #include <textual/parser_bits.h>
00042 
00043 #include <sys/stat.h>
00044 #ifdef __WIN32__
00045   #include <io.h>
00046 #endif
00047 
00048 #undef LOG
00049 #define LOG(s) EMERGENCY_LOG(program_wide_logger(), s)
00050 
00051 using namespace basis;
00052 
00053 const int LONGEST_SEPARATION = 128;
00054   // the longest we expect a single line of text to be in definition blocks.
00055   // if the definition of an outcome or whatever is farther away than this
00056   // many characters from a comment start, we will no longer consider the
00057   // line to be commented out.  this pretty much will never happen unless it's
00058   // intentionally done to break this case.
00059 
00060 const char *SKIP_VALUE_PHRASE = "SKIP_TO_VALUE";
00061   // the special phrase we use to indicate that values should jump to
00062   // a specific number.
00063 
00065 
00066 // this object records all the data that we gather for the defined items.
00067 class item_record
00068 {
00069 public:
00070   istring _name;
00071   int _value;
00072   istring _description;
00073   istring _path;
00074   istring _extra_tag;  
00075 
00076   item_record(const istring &name = istring::empty_string(), int value = 999,
00077       const istring &description = istring::empty_string(),
00078       const istring &path = istring::empty_string(),
00079       const istring &extra_tag = istring::empty_string())
00080   : _name(name), _value(value), _description(description), _path(path),
00081     _extra_tag(extra_tag) {}
00082 };
00083 
00085 
00086 class search_record
00087 {
00088 public:
00089   search_record(const istring &search = istring::empty_string(),
00090       bool is_link = false, search_record *link = NIL)
00091   : _search(search), _no_modify(false), _is_link(is_link), _our_link(link),
00092     _current_value(0), _value_increment(1) {}
00093 
00094   // these properties are available for both real or linked records.
00095   istring _search;  // our term to search for in the files.
00096   bool _no_modify;  // true if values should not be automatically incremented.
00097   istring _tag;  // extra information attached to this type.
00098   
00099   inline bool is_link() const { return _is_link; }
00100     // returns true if this object is leeching off another object for data.
00101 
00102   search_record *our_link() const { return _our_link; }
00103     // returns the object that this object is a mere shadow of.
00104 
00105   symbol_table<item_record> &definitions() {
00106     if (is_link()) return _our_link->_definitions;
00107     else return _definitions;
00108   }
00109   
00110   int &current_value() {
00111     if (is_link()) return _our_link->_current_value;
00112     else return _current_value;
00113   }
00114 
00115   int &value_increment() {
00116     if (is_link()) return _our_link->_value_increment;
00117     else return _value_increment;
00118   }
00119 
00120   int_set &out_of_band() {
00121     if (is_link()) return _our_link->_out_of_band;
00122     else return _out_of_band;
00123   }
00124 
00125 private:
00126   bool _is_link;  // true if this object links to another.
00127   search_record *_our_link;  // the search we share for our values.
00128   symbol_table<item_record> _definitions;
00129     // the definitions that we found in the code.
00130   int _current_value;  // the next value to use for our term.
00131   int _value_increment;
00132     // how much to add for each new value, if this is an incrementing search.
00133   int_set _out_of_band;
00134     // values we've seen that were premature.  we always want to honor this
00135     // set, if it exists, but there will be nothing in it if the search has
00136     // completely standard non-incrementing type.  this could be varied by
00137     // a non-incrementer linking to a standard incrementer.
00138 };
00139 
00141 class active_searches : public symbol_table<search_record>
00142 {};
00143 
00145 
00146 // this class provides us a way to easily sort our items based on value.
00147 
00148 class simple_sorter {
00149 public:
00150   int _index;
00151   int _value;
00152   simple_sorter(int index = 0, int value = 0) : _index(index), _value(value) {}
00153   bool operator < (const simple_sorter &to_compare) const
00154     { return _value < to_compare._value; }
00155   bool operator == (const simple_sorter &to_compare) const
00156     { return _value == to_compare._value; }
00157 };
00158 
00159 class sorting_array : public array<simple_sorter> {};
00160 
00162 
00163 class value_tagger : public application_shell
00164 {
00165 public:
00166   value_tagger();
00167   virtual ~value_tagger();
00168   IMPLEMENT_CLASS_NAME("value_tagger");
00169   int execute();
00170   int print_instructions_and_exit();
00171 
00172   bool process_tree(const istring &path);
00173     // called on each directory hierarchy that we need to process.
00174 
00175   bool process_file(const istring &path);
00176     // examines the file specified to see if it matches our needs.
00177 
00178   bool parse_define(const istring &scanning, int indy, istring &name,
00179           int &value, istring &description, int &num_start, int &num_end);
00180     // processes the string in "scanning" to find parentheses surrounding
00181     // the "name", "value" and "description".  the "description" field may
00182     // occupy multiple lines, so all are gathered together to form one
00183     // unbroken string.  the "num_start" and "num_end" specify where the
00184     // numeric value was found, in case it needs to be patched.
00185 
00186 private:
00187   ini_configurator *_ini;  // the configuration for what we'll scan.
00188   string_table _dirs;  // the list of directories.
00189   string_table _dirs_seen;  // full list of already processed directories.
00190   istring _manifest_filename;  // the name of the manifest we'll create.
00191   byte_filer _manifest;  // the actual file we're building.
00192   active_searches _search_list;  // tracks our progress in scanning files.
00193   int_array _search_ordering;
00194     // lists the terms in the order they should be applied.  initially this
00195     // carries the first pass items, but later will be reset for second pass.
00196   int_array _postponed_searches;
00197     // lists the searches that must wait until the main search is done.
00198   string_table _modified_files;  // the list of files that we touched.
00199 };
00200 
00202 
00203 value_tagger::value_tagger()
00204 : application_shell(static_class_name()),
00205   _ini(NIL),
00206   _dirs_seen(10)
00207 {
00208   SET_DEFAULT_COMBO_LOGGER;
00209 }
00210 
00211 value_tagger::~value_tagger()
00212 {
00213   WHACK(_ini);
00214 }
00215 
00216 int value_tagger::print_instructions_and_exit()
00217 {
00218   LOG(isprintf("%s usage:", filename(__argv[0]).basename().raw().s()));
00219   LOG("");
00220   LOG("\
00221 This utility scans a code base for outcome and filter definitions.  It will\n\
00222 only scan the header files (*.h) found in the directories specified.  The\n\
00223 single parameter is expected to be an INI filename that contains the scanning\n\
00224 configuration.  The INI file should be formatted like this (where the $HOME\n\
00225 can be any variable substitution from the environment):");
00226   LOG("");
00227   LOG("\
00228 [manifest]\n\
00229 output=$HOME/manifest.txt\n\
00230 \n\
00231 [searches]\n\
00232 DEFINE_OUTCOME=1\n\
00233 DEFINE_FILTER=1\n\
00234 \n\
00235 [directories]\n\
00236 $HOME/source/lib_src/library/basis\n\
00237 $HOME/source/lib_src/library\n\
00238 $HOME/source/lib_src/communication/sockets\n\
00239 $HOME/source/lib_src/communication\n\
00240 $HOME/source/lib_src\n\
00241 $HOME/source/app_src\n\
00242 $HOME/source/test_src\n\
00243 \n\
00244 [DEFINE_OUTCOME]\n\
00245 first=0\n\
00246 increment=-1\n\
00247 \n\
00248 [DEFINE_FILTER]\n\
00249 first=-1\n\
00250 increment=1\n\
00251 no_modify=1\n\
00252 \n\
00253 [DEFINE_API_OUTCOME]\n\
00254 no_modify=1\n\
00255 link=DEFINE_OUTCOME\n\
00256 tag=API\n\
00257 \n\
00258   The \"first\" field defines the starting value that should be assigned to\n\
00259 items.\n\
00260   The \"increment\" field specifies what to add to a value for the next item.\n\
00261   The optional \"no_modify\" flag means that the values should not be auto-\n\
00262 incremented; their current value will be used.\n\
00263   The optional \"link\" field defines this type of item as using the current\n\
00264 values for another type of item.  In this case, API_OUTCOME will use the\n\
00265 values for OUTCOME to share its integer space, but API_OUTCOME is not auto-\n\
00266 incremented even though OUTCOME is.  This causes the values for OUTCOME and\n\
00267 API_OUTCOME to be checked for uniqueness together, but only OUTCOME will be\n\
00268 auto-incremented.  Note that only one level of linking is supported currently.\n\
00269   The optional \"tag\" can be used to distinguish the entries for a particular\n\
00270 search type if needed.  This is most helpful for links, so that they can be\n\
00271 distinguished from their base type.\n\
00272 \n\
00273 ");
00274 
00275   return 23;
00276 }
00277 
00278 istring header_string(const istring &build_number)
00279 {
00280   return isprintf("\
00281 #ifndef GENERATED_VALUES_MANIFEST\n\
00282 #define GENERATED_VALUES_MANIFEST\n\
00283 \n\
00284 // This file contains all outcomes and filters for this build.\n\
00285 \n\
00286 // Generated for build %s on %s\n\
00287 \n\
00288 ", build_number.s(), timestamp(true, true).s());
00289 }
00290 
00291 istring footer_string(const byte_array &full_config_file)
00292 {
00293   return isprintf("\n\
00294 // End of definitions.\n\
00295 \n\
00296 \n\
00297 // The following is the full configuration for this build:\n\
00298 \n\
00299 /*\n\
00300 \n\
00301 %s\n\
00302 */\n\
00303 \n\
00304 \n\
00305 #endif // outer guard.\n\
00306 ", (char *)full_config_file.observe());
00307 }
00308 
00309 int value_tagger::execute()
00310 {
00311   FUNCDEF("execute");
00312   if (__argc < 2) {
00313     return print_instructions_and_exit();
00314   }
00315 
00316   log(timestamp(true, true) + "value_tagger started.");
00317 
00318   istring test_repository = portable::env_string("REPOSITORY_DIR");
00319   if (!test_repository) {
00320     istring msg = "\
00321 There is a problem with a required build precondition.  The following\r\n\
00322 variables must be set before the build is run:\r\n\
00323 \r\n\
00324   REPOSITORY_DIR    This should point at the root of the build tree.\r\n\
00325 \r\n\
00326 There are also a few variables only required for CLAM-based compilation:\r\n\
00327 \r\n\
00328   MAKEFLAGS         This should be set to \"-I $REPOSITORY_DIR/clam\" or to\r\n\
00329                     \"-I $YETI_DIR/clam\" depending on where the CLAM files\r\n\
00330                     actually reside.\r\n\
00331 \r\n\
00332 Note that on Win32 platforms, these should be set in the System or User\r\n\
00333 variables before running a build.\r\n";
00334 #ifdef __WIN32__
00335     ::MessageBox(0, to_unicode_temp(msg),
00336         to_unicode_temp("Missing Precondition"), MB_ICONWARNING|MB_OK);
00337 #endif
00338     non_continuable_error(class_name(), func, msg);
00339   }
00340 
00341   istring ini_file = __argv[1];  // the name of our ini file.
00342   _ini = new ini_configurator(ini_file, ini_configurator::RETURN_ONLY);
00343 
00344   // read the name of the manifest file to create.
00345   _manifest_filename = _ini->load("manifest", "output", "");
00346   if (!_manifest_filename) {
00347     non_continuable_error(class_name(), ini_file, "The 'output' file entry is missing");
00348   }
00349   _manifest_filename = parser_bits::substitute_env_vars(_manifest_filename);
00350 
00351   LOG(istring("Sending Manifest to ") + _manifest_filename);
00352   LOG("");
00353 
00354   filename(_manifest_filename).unlink();
00355     // clean out the manifest ahead of time.
00356 
00357   // read the list of directories to scan for code.
00358   string_table temp_dirs;
00359   bool read_dirs = _ini->get_section("directories", temp_dirs);
00360   if (!read_dirs || !temp_dirs.symbols()) {
00361     non_continuable_error(class_name(), ini_file,
00362         "The 'directories' section is missing");
00363   }
00364   for (int i = 0; i < temp_dirs.symbols(); i++) {
00365 //log(istring("curr is ") + current);
00366     istring current = parser_bits::substitute_env_vars(temp_dirs.name(i));
00367     _dirs.add(current, "");
00368   }
00369 
00370   LOG(istring("Directories to scan..."));
00371   LOG(_dirs.text_form());
00372 
00373   istring rdir = portable::env_string("REPOSITORY_DIR");
00374   istring fname = rdir + "/" + "build.ini";
00375 
00376   // read the list of search patterns.
00377   string_table searches;
00378   bool read_searches = _ini->get_section("searches", searches);
00379   if (!read_searches || !searches.symbols()) {
00380     non_continuable_error(class_name(), ini_file,
00381         "The 'searches' section is missing");
00382   }
00383 
00384   LOG("Searching for...");
00385   LOG(searches.text_form());
00386 
00387   // now make sure that we get the configuration for each type of value.
00388   for (int i = 0; i < searches.symbols(); i++) {
00389     const istring &curr_name = searches.name(i);
00390 
00391     search_record *check_search = _search_list.find(curr_name);
00392     if (check_search) {
00393       non_continuable_error(class_name(), ini_file,
00394           istring("section ") + curr_name + " is being defined twice");
00395     }
00396 
00397     {
00398       // check for whether this section is linked to another or not.
00399       istring linked = _ini->load(curr_name, "link", "");
00400       search_record *our_link_found = NIL;
00401       if (linked.t()) {
00402         // we found that this should be linked to another item.
00403         our_link_found = _search_list.find(linked);
00404         if (!our_link_found) {
00405           non_continuable_error(class_name(), ini_file,
00406               istring("linked section ") + curr_name + " is linked to missing "
00407                   "section " + linked);
00408         }
00409         search_record new_guy(curr_name, true, our_link_found);
00410         _search_list.add(curr_name, new_guy);
00411       } else {
00412         // this section is a stand-alone section.
00413         search_record new_guy(curr_name);
00414         _search_list.add(curr_name, new_guy);
00415       }
00416     }
00417 
00418     // find our new search cabinet again so we can use it.
00419     search_record *curr_search = _search_list.find(curr_name);
00420     if (!curr_search) {
00421       non_continuable_error(class_name(), ini_file,
00422           istring("section ") + curr_name + " is missing from table "
00423               "after addition; logic error");
00424     }
00425 
00426     // specify some defaults first.
00427     int start = 0;
00428     int increm = 1;
00429     if (!curr_search->is_link()) {
00430       // a linked object doesn't get to specify starting value or increment.
00431       start = _ini->load(curr_name, "first", start);
00432       curr_search->current_value() = start;
00433       increm = _ini->load(curr_name, "increment", increm);
00434       curr_search->value_increment() = increm;
00435     } else {
00436       start = curr_search->our_link()->current_value();
00437       increm = curr_search->our_link()->value_increment();
00438     }
00439 
00440     int no_modify = _ini->load(curr_name, "no_modify", 0);
00441     if (no_modify) {
00442       curr_search->_no_modify = true;
00443     }
00444 
00445     istring tag = _ini->load(curr_name, "tag", "");
00446     if (tag.t()) {
00447       curr_search->_tag = tag;
00448     }
00449 
00450     isprintf to_show("%s: no_modify=%s", curr_name.s(),
00451          no_modify? "true" : "false");
00452 
00453     if (curr_search->is_link()) {
00454       // links show who they're hooked to.
00455       to_show += istring(" link=") + curr_search->our_link()->_search;
00456     } else {
00457       // non-links get to show off their start value and increment.
00458       to_show += isprintf(" start=%d increment=%d", start, increm);
00459     }
00460     if (tag.t()) {
00461       to_show += istring(" tag=") + curr_search->_tag;
00462     }
00463     LOG(to_show);
00464   }
00465   LOG("");
00466 
00467   // now gather some info about the build that we can plug into the manifest.
00468 
00469   byte_filer build_file(fname, "r");
00470   if (!build_file.good()) {
00471     non_continuable_error(class_name(), build_file.filename(),
00472         "Could not find the build configuration; is REPOSITORY_DIR set?");
00473   }
00474   byte_array full_config;
00475   build_file.read(full_config, 100000);  // a good chance to be big enough.
00476   build_file.close();
00477 
00478 //log("got config info:");
00479 //log((char *)full_config.observe());
00480 
00481   istring build_number;
00482   ini_configurator temp_ini(fname, configurator::RETURN_ONLY);
00483   build_number += temp_ini.load("version", "major", "");
00484   build_number += ".";
00485   build_number += temp_ini.load("version", "minor", "");
00486   build_number += ".";
00487   build_number += temp_ini.load("version", "revision", "");
00488   build_number += ".";
00489   build_number += temp_ini.load("version", "build", "");
00490   if (build_number == "...") {
00491     non_continuable_error(class_name(), build_file.filename(),
00492         "Could not read the build number; is build.ini malformed?");
00493   }
00494 
00495 //log(istring("got build num: ") + build_number);
00496 
00497   // now that we know what file to create, write the header blob for it.
00498   _manifest.open(_manifest_filename, "wb");
00499   if (!_manifest.good()) {
00500     non_continuable_error(class_name(), _manifest_filename,
00501         "Could not write to the manifest file!");
00502   }
00503   _manifest.write(header_string(build_number));
00504 
00505   // make sure we have the right ordering for our terms.  items that are
00506   // non-modify types must come before the modifying types.
00507   for (int i = 0; i < _search_list.symbols(); i++) {
00508     search_record &curr_reco = _search_list[i];
00509     if (curr_reco._no_modify)
00510       _search_ordering += i;
00511     else
00512       _postponed_searches += i;
00513   }
00514 
00515   // scan across each directory specified for our first pass.
00516   LOG("First pass...");
00517   for (int i = 0; i < _dirs.symbols(); i++) {
00518     LOG(istring("  Processing: ") + _dirs.name(i));
00519     bool ret = process_tree(_dirs.name(i));
00520     if (!ret) {
00521       LOG(istring("Problem encountered in directory ") + _dirs.name(i));
00522     }
00523   }
00524   LOG("");
00525 
00526   // second pass now.
00527   LOG("Second pass...");
00528   _search_ordering = _postponed_searches;  // recharge the list for 2nd pass.
00529   _dirs_seen.reset();  // drop any directories we saw before.
00530   for (int i = 0; i < _dirs.symbols(); i++) {
00531     LOG(istring("  Processing: ") + _dirs.name(i));
00532     bool ret = process_tree(_dirs.name(i));
00533     if (!ret) {
00534       LOG(istring("Problem encountered in directory ") + _dirs.name(i));
00535     }
00536   }
00537   LOG("");
00538 
00539   const istring quote = "\"";
00540   const istring comma = ",";
00541 
00542   // scoot across all the completed searches and dump results.
00543   for (int i = 0; i < _search_list.symbols(); i++) {
00544     search_record &curr_reco = _search_list[i];
00545     const istring &pattern = curr_reco._search;
00546 
00547     _manifest.write(istring("/* START ") + pattern + "\n");
00548     _manifest.write(istring("[") + pattern + "]\n");
00549 
00550     if (!curr_reco.is_link()) {
00551       // scoot across all definitions and print them out.
00552 
00553       // do the print out in order, as dictated by the sign of the increment.
00554       sorting_array sortie;
00555       for (int j = 0; j < curr_reco.definitions().symbols(); j++) {
00556         const item_record &rec = curr_reco.definitions().get(j);
00557         sortie += simple_sorter(j, rec._value);
00558       }
00559       shell_sort(sortie.access(), sortie.length(),
00560           negative(curr_reco.value_increment()));
00561 
00562       for (int j = 0; j < sortie.length(); j++) {
00563         int indy = sortie[j]._index;
00564         const item_record &rec = curr_reco.definitions().get(indy);
00565         istring to_write = "  ";
00566         if (rec._extra_tag.t()) {
00567           to_write += istring("(") + rec._extra_tag + ") ";
00568         }
00569         to_write += quote + rec._name + quote + comma + " ";
00570         to_write += quote + isprintf("%d", rec._value) + quote + comma + " ";
00571         to_write += quote + rec._description + quote + comma + " ";
00572         to_write += quote + rec._path + quote;
00573         to_write += "\n";
00574         _manifest.write(to_write);
00575       }
00576     } else {
00577       // this is just a link.
00578       istring to_write = "  Linked to search item ";
00579       to_write += curr_reco.our_link()->_search;
00580       to_write += "\n";
00581       _manifest.write(to_write);
00582     }
00583 
00584     _manifest.write(istring("END ") + pattern + " */\n\n");
00585   }
00586 
00587   _manifest.write(footer_string(full_config));
00588 
00589   // show all the modified files.
00590   if (_modified_files.symbols()) {
00591     const int syms = _modified_files.symbols();
00592     LOG("Modified Files:");
00593     LOG("===============");
00594     for (int i = 0; i < syms; i++) {
00595       LOG(_modified_files.name(i));
00596     }
00597   } else {
00598     LOG("No files needed modification for generated values.");
00599   }
00600   LOG("");
00601 
00602   log(timestamp(true, true) + "value_tagger finished.");
00603 
00604   return 0;
00605 }
00606 
00607 #define INBO (indy < scanning.length())
00608   // a macro that makes length checking less verbose.
00609 
00610 // make sure we drop any spaces in between important bits.
00611 #define SKIP_SPACES \
00612   while (INBO && parser_bits::white_space(scanning[indy])) indy++;
00613 
00614 // return with a failure but say why it happened.
00615 #define FAIL_PARSE(why) { \
00616   log(istring("failed to parse the string because ") + why + "."); \
00617   return false; \
00618 }
00619 
00620 bool value_tagger::parse_define(const istring &scanning, int indy,
00621     istring &name, int &value, istring &description, int &num_start,
00622     int &num_end)
00623 {
00624   // prepare our result objects.
00625   name = ""; value = -1; description = ""; num_start = -1; num_end = -1;
00626 
00627   SKIP_SPACES;
00628 
00629   // look for starting parenthesis.
00630   if (!INBO || (scanning[indy] != '(') )
00631     FAIL_PARSE("the first parenthesis is missing");
00632 
00633   indy++;  // skip paren.
00634   SKIP_SPACES;
00635 
00636   // find the name of the item being defined.
00637   while (INBO && (scanning[indy] != ',') ) {
00638     name += scanning[indy];
00639     indy++;
00640   }
00641 
00642   indy++;  // skip the comma.
00643   SKIP_SPACES;
00644 
00645   istring num_string;
00646   num_start = indy;
00647   while (INBO && parser_bits::is_numeric(scanning[indy])) {
00648     num_string += scanning[indy];
00649     indy++;
00650   }
00651   num_end = indy - 1;
00652   value = num_string.convert(0);
00653 
00654   SKIP_SPACES;
00655 
00656   if (!INBO || (scanning[indy] != ',') )
00657     FAIL_PARSE("the post-value comma is missing");
00658 
00659   indy++;
00660   SKIP_SPACES;
00661 
00662   if (!INBO || (scanning[indy] != '"') )
00663     FAIL_PARSE("the opening quote for the description is missing");
00664 
00665   indy++;  // now we should be at raw text.
00666 
00667   // scan through the full description, taking into account that it might
00668   // be broken across multiple lines as several quoted bits.
00669   bool in_quote = true;  // we're inside a quote now.
00670   while (INBO && (scanning[indy] != ')') ) {
00671     const char curr = scanning[indy];
00672 //hmmm: escaped quotes are not currently handled.
00673     if (curr == '"') in_quote = !in_quote;  // switch quoting state.
00674     else if (in_quote) description += curr;
00675     indy++;
00676   }
00677 
00678   return scanning[indy] == ')';
00679 }
00680 
00681 bool value_tagger::process_file(const istring &path)
00682 {
00683   byte_filer examining(path, "rb");
00684   if (!examining.good()) {
00685     log(istring("Error reading file: ") + path);
00686     return false;
00687   }
00688   examining.seek(0, byte_filer::FROM_END);
00689   int fsize = int(examining.tell());
00690   examining.seek(0, byte_filer::FROM_START);
00691 
00692   istring contents('\0', fsize + 20);
00693   int bytes_read = examining.read((byte *)contents.access(), fsize);
00694     // read the file directly into a big istring.
00695   examining.close();
00696   contents[bytes_read] = '\0';
00697   contents.shrink();  // drop any extra stuff at end.
00698 
00699   bool modified = false;  // set to true if we need to write the file back.
00700 
00701   // check if the file matches our phrases of interest.
00702   bool matched = false;
00703   for (int q = 0; q < _search_list.symbols(); q++) {
00704     search_record &curr_reco = _search_list[q];
00705     if (contents.contains(curr_reco._search)) {
00706 //_manifest.write(istring("MATCH-") + curr_pattern + ": " + path + "\n" ); //temp
00707       matched = true;
00708       break;
00709     }
00710   }
00711 
00712   if (!matched) return true;
00713 
00714   // now we have verified that there's something interesting in this file.
00715   // go through to find the interesting bits.
00716 
00717   // we do this in the search ordering that we established earlier, so we
00718   // will tag the values in the proper order.
00719   for (int x = 0; x < _search_ordering.length(); x++) {
00720     int q = _search_ordering[x];  // get our real index.
00721     search_record &curr_reco = _search_list[q];
00722     const istring &curr_pattern = curr_reco._search;
00724     int start_from = 0;  // where searches will start from.
00725 
00726     while (true) {
00727       // search forward for next match.
00728       int indy = contents.find(curr_pattern, start_from);
00729       if (negative(indy)) break;  // no more matches.
00730       start_from = indy + 5;  // ensure we'll skip past the last match.
00731 
00732       // make sure our deadly pattern isn't in front; we don't want to
00733       // process the actual definition of the macro in question.
00734 //log(isprintf("indy=%d [indy-1]=%c [indy-2]=%c", indy, contents[indy-1], contents[indy-2]));
00735       if ( (indy > 3) && (contents[indy-1] == ' ')
00736           && (contents[indy-2] == 'e') ) {
00737         int def_indy = contents.find("#define", indy, true);
00738 //log(istring("checking ") + curr_pattern + isprintf(": defindy %d, ", def_indy) + path + "\n" );
00739 
00740         if (non_negative(def_indy) && (absolute_value(indy - def_indy) < 12) ) {
00741           // they're close enough that we probably need to skip this
00742           // occurrence of our search term.
00743 //_manifest.write(istring("DEMATCH-") + curr_pattern + ": had the #define! " + path + "\n" );
00744           continue;
00745         }
00746       }
00747 
00748       // make sure we don't include commented lines in consideration.
00749       int comm_indy = contents.find("//", indy, true);
00750       if (non_negative(comm_indy)) {
00751 //log("found a comment marker");
00752         // we found a comment before the definition, but we're not sure how
00753         // far before.
00754         if (absolute_value(comm_indy - indy) < LONGEST_SEPARATION) {
00755 //log("comment is close enough...");
00756           // they could be on the same line...  unless lines are longer than
00757           // our constant.
00758           bool found_cr = false;
00759           for (int q = comm_indy; q < indy; q++) {
00760             if (parser_bits::is_eol(contents[q])) {
00761               found_cr = true;
00762               break;
00763             }
00764           }
00765           if (!found_cr) {
00766             // if there's a comment before the definition and no carriage
00767             // returns in between, then this is just a comment.
00768 //log(istring("DEMATCH-") + curr_pattern + ": had the comment! " + path + "\n" );
00769             continue;
00770           }
00771         }
00772       }
00773 
00774       // now we are pretty sure this is a righteous definition of an outcome,
00775       // and not the definition of the macro itself.
00776       int value, num_start, num_end;
00777       istring name, description;
00778       bool found_it = parse_define(contents, indy + curr_pattern.length(),
00779           name, value, description, num_start, num_end);
00780       if (!found_it) {
00781         log(istring("there was a problem parsing ") + curr_pattern
00782             + " in " + path);
00783         continue;
00784       }
00785 
00786       // handle the special keyword for changing the value.  this is useful
00787       // if you want a set of outcomes to start at a specific range.
00788       if (name == SKIP_VALUE_PHRASE) {
00789         LOG(istring("\tSkipping value for ") + curr_pattern
00790             + isprintf(" to %d because of request in\n\t", value) + path);
00791         curr_reco.current_value() = value;
00792       }
00793       while (true) {
00794         // make sure that the current value is not already in use.
00795         if (!curr_reco.out_of_band().member(curr_reco.current_value()))
00796           break;
00797         // if we had a match above, we need to adjust the current value.
00798         curr_reco.current_value() += curr_reco.value_increment();
00799       }
00800       if (name == SKIP_VALUE_PHRASE) {
00801         continue;  // keep going now that we vetted the current value.
00802       }
00803 
00804 //must catch some conditions here for values:
00805 //  for incrementing types, we can always just try to use the next value
00806 //  once we know it wasn't already defined out of band?
00807 //  for non-incrementing types, we need to ensure we haven't already seen
00808 //  the thing.  do we just always add a value seen to out of band?
00809 //  for mixed types, the incrementing side needs to not reuse out of band
00810 //  values.  
00811 
00812       istring other_place;  // the other place it was defined.
00813       if (curr_reco.out_of_band().member(value) && curr_reco._no_modify) {
00814         // this is bad; we have already seen this value elsewhere...
00815         for (int x = 0; x < curr_reco.definitions().symbols(); x++) {
00816           // see if we can find the previous definition in our list.
00817           if (value == curr_reco.definitions()[x]._value)
00818             other_place = curr_reco.definitions()[x]._path;
00819         }
00820         non_continuable_error(class_name(), path,
00821             isprintf("There is a duplicate value here for %s=%d !  "
00822                 "Also defined in %s.", name.s(), value, other_place.s()));
00823       }
00824 
00825       // we care sometimes that this value is different than the next
00826       // sequential one we'd assign.  if it's a non-modifying type of
00827       // search, then we can't change the assigned value anyway--we can
00828       // only report the error in re-using a value (above).
00829       if (!curr_reco._no_modify) {
00830         // check that the defined value matches the next one we'd assign.
00831         if (value != curr_reco.current_value()) {
00832           // patch the value with the appropriate one we've been tracking.
00833           modified = true;
00834           value = curr_reco.current_value();
00835           contents.zap(num_start, num_end);  // remove old fusty value.
00836           contents.insert(num_start, isprintf("%d", value));
00837           _modified_files.add(path, "");
00838         }
00839         // move the current value up (or down).
00840         curr_reco.current_value() += curr_reco.value_increment();
00841       } else {
00842         // non-modifying type of value here.
00843 //anything to do?
00844       }
00845 
00846       curr_reco.out_of_band() += value;
00847         // we've vetted the value, and now we're definitely using it.
00848 
00849       // make sure they aren't trying to reuse the name for this item.
00850       item_record rec;
00851       bool found_name = false;  // we don't want to find name already there.
00852       if (curr_reco.definitions().find(name)) {
00853         rec = *curr_reco.definitions().find(name);
00854         found_name = true;
00855       }
00856       if (found_name) {
00857         // this is bad.  this means we are not unique.  remove the manifest
00858         // file due to this error.
00859         _manifest.close();  // close the file since we want to whack it.
00860         filename(_manifest_filename).unlink();
00861         non_continuable_error(class_name(), path,
00862             isprintf("There is a duplicate name here (%s)!  "
00863                 "Also defined in %s.", name.s(), rec._path.s()));
00864       }
00865 
00866       // record the definition in the appropriate table.
00867       curr_reco.definitions().add(name, item_record(name, value,
00868           description, path, curr_reco._tag));
00869 
00870 //log(curr_pattern + isprintf(": name=%s value=%d desc=[%s]\n", name.s(), value, description.s()));
00871 
00872     }
00873   }
00874 
00875   if (modified) {
00876     // rewrite the file, since we modified its contents.
00877     bool chmod_result = filename(path).chmod(filename::ALLOW_BOTH,
00878         filename::USER_RIGHTS);
00879 /*
00880     int chmod_value;
00881 #ifdef __UNIX__
00882     chmod_value = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
00883 #elif defined(__WIN32__)
00884     chmod_value = _S_IREAD | _S_IWRITE;
00885 #else
00886     //unknown.  let's try unix...
00887     chmod_value = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
00888 #endif
00889     int chmod_result = chmod(path.s(), chmod_value);
00890 */
00891     if (!chmod_result) {
00892       log(istring("there was a problem changing permissions on ") + path
00893           + "; writing the new version might fail.");
00894     }
00895 
00896     byte_filer rewriting(path, "wb");
00897     rewriting.write(contents);
00898     rewriting.close();
00899   }
00900 
00901   return true;
00902 }
00903 
00904 bool value_tagger::process_tree(const istring &path)
00905 {
00906   directory_tree dir(path, "*.h");
00907   if (!dir.good()) return false;
00908 
00909   dir_tree_iterator *ted = dir.start(directory_tree::prefix);
00910     // create our iterator to perform a prefix traversal.
00911 
00912   filename curr_dir;  // the current path the iterator is at.
00913   string_array files;  // the filenames held at the iterator.
00914 
00915   while (directory_tree::current(*ted, curr_dir, files)) {
00916     // we have a good directory to process.
00917 
00918     // omit any subdirectories that exactly match directories we've already
00919     // scanned.  necessary to avoid redoing whole areas.
00920     if (!_dirs_seen.find(curr_dir)) {
00921       // deal with each matching header file we've found.
00922       for (int i = 0; i < files.length(); i++) {
00923         bool file_ret = process_file(filename(curr_dir.raw(), files[i]));
00924         if (!file_ret) {
00925           log(istring("There was an error while processing ") + files[i]);
00926         }
00927       }
00928 
00929       _dirs_seen.add(curr_dir, "");
00930     }
00931 
00932     // go to the next place.
00933     directory_tree::next(*ted);
00934   }
00935 
00936   directory_tree::throw_out(ted);
00937   return true;
00938 }
00939 
00940 HOOPLE_MAIN(value_tagger, )
00941 
00942 #ifdef __BUILD_STATIC_APPLICATION__
00943   // static dependencies found by buildor_gen_deps.sh:
00944   #include <basis/array.cpp>
00945   #include <basis/byte_array.cpp>
00946   #include <basis/callstack_tracker.cpp>
00947   #include <basis/chaos.cpp>
00948   #include <basis/convert_utf.cpp>
00949   #include <basis/definitions.cpp>
00950   #include <basis/earth_time.cpp>
00951   #include <basis/guards.cpp>
00952   #include <basis/istring.cpp>
00953   #include <basis/log_base.cpp>
00954   #include <basis/memory_checker.cpp>
00955   #include <basis/mutex.cpp>
00956   #include <basis/object_base.cpp>
00957   #include <basis/outcome.cpp>
00958   #include <basis/packable.cpp>
00959   #include <basis/portable.cpp>
00960   #include <basis/sequence.cpp>
00961   #include <basis/set.cpp>
00962   #include <basis/utility.cpp>
00963   #include <basis/version_record.cpp>
00964   #include <data_struct/amorph.cpp>
00965   #include <data_struct/bit_vector.cpp>
00966   #include <data_struct/byte_hasher.cpp>
00967   #include <data_struct/configurator.cpp>
00968   #include <data_struct/hash_table.cpp>
00969   #include <data_struct/pointer_hash.cpp>
00970   #include <data_struct/stack.cpp>
00971   #include <data_struct/static_memory_gremlin.cpp>
00972   #include <data_struct/string_hash.cpp>
00973   #include <data_struct/string_hasher.cpp>
00974   #include <data_struct/string_table.cpp>
00975   #include <data_struct/symbol_table.cpp>
00976   #include <data_struct/table_configurator.cpp>
00977   #include <loggers/console_logger.cpp>
00978   #include <loggers/file_logger.cpp>
00979   #include <loggers/locked_logger.cpp>
00980   #include <loggers/null_logger.cpp>
00981   #include <loggers/program_wide_logger.cpp>
00982   #include <nodes/node.cpp>
00983   #include <nodes/packable_tree.cpp>
00984   #include <nodes/path.cpp>
00985   #include <nodes/tree.cpp>
00986   #include <opsystem/application_base.cpp>
00987   #include <opsystem/application_shell.cpp>
00988   #include <opsystem/byte_filer.cpp>
00989   #include <opsystem/command_line.cpp>
00990   #include <opsystem/critical_events.cpp>
00991   #include <opsystem/directory.cpp>
00992   #include <opsystem/directory_tree.cpp>
00993   #include <opsystem/file_info.cpp>
00994   #include <opsystem/filename.cpp>
00995   #include <opsystem/filename_list.cpp>
00996   #include <opsystem/filename_tree.cpp>
00997   #include <opsystem/huge_file.cpp>
00998   #include <opsystem/ini_config.cpp>
00999   #include <opsystem/ini_parser.cpp>
01000   #include <opsystem/path_configuration.cpp>
01001   #include <opsystem/rendezvous.cpp>
01002   #include <textual/byte_format.cpp>
01003   #include <textual/parser_bits.cpp>
01004   #include <textual/string_manipulation.cpp>
01005   #include <textual/tokenizer.cpp>
01006 #endif // __BUILD_STATIC_APPLICATION__
01007 

Generated on Fri Aug 29 04:28:26 2008 for HOOPLE Libraries by  doxygen 1.5.1