ini_parser.cpp

Go to the documentation of this file.
00001 #ifndef INI_PARSER_IMPLEMENTATION_FILE
00002 #define INI_PARSER_IMPLEMENTATION_FILE
00003 
00004 /*****************************************************************************\
00005 *                                                                             *
00006 *  Name   : ini_parser                                                        *
00007 *  Author : Chris Koeritz                                                     *
00008 *                                                                             *
00009 *******************************************************************************
00010 * Copyright (c) 2000-$now By Author.  This program is free software; you can  *
00011 * redistribute it and/or modify it under the terms of the GNU General Public  *
00012 * License as published by the Free Software Foundation; either version 2 of   *
00013 * the License or (at your option) any later version.  This is online at:      *
00014 *     http://www.fsf.org/copyleft/gpl.html                                    *
00015 * Please send any updates to: fred@gruntose.com                               *
00016 \*****************************************************************************/
00017 
00018 #include "ini_parser.h"
00019 
00020 #include <basis/function.h>
00021 #include <basis/istring.h>
00022 #include <basis/log_base.h>
00023 #include <basis/string_array.h>
00024 #include <data_struct/amorph.h>
00025 #include <data_struct/table_configurator.h>
00026 #include <data_struct/string_table.h>
00027 #include <textual/parser_bits.h>
00028 #include <textual/tokenizer.h>
00029 
00030 //#define DEBUG_INI_PARSER
00031   // uncomment for noisy version.
00032 
00033 #undef LOG
00034 #ifdef DEBUG_INI_PARSER
00035   #define LOG(to_print) program_wide_logger().log(to_print);
00036 #else
00037   #define LOG(a) {}
00038 #endif
00039 
00041 
00042 //algorithm:
00043 //  gather section until next section definition or end of file.
00044 //  parse the section with tokenizer.
00045 //  eat that out of the string.
00046 //  repeat.
00047 
00048 ini_parser::ini_parser(const istring &to_parse, treatment_of_defaults behavior)
00049 : table_configurator(behavior),
00050   _well_formed(false),
00051   _preface(new istring)
00052 {
00053   reset(to_parse);
00054 }
00055 
00056 ini_parser::~ini_parser()
00057 {
00058   WHACK(_preface);
00059 }
00060 
00061 void ini_parser::chow_through_eol(istring &to_chow)
00062 {
00063   while (to_chow.length()) {
00064     if (parser_bits::is_eol(to_chow[0])) {
00065       // zap all carriage return type chars now that we found one.
00066       while (to_chow.length() && parser_bits::is_eol(to_chow[0])) {
00067         *_preface += to_chow[0];
00068         to_chow.zap(0, 0);
00069       }
00070       return;  // mission accomplished.
00071     }
00072     *_preface += to_chow[0];
00073     to_chow.zap(0, 0);
00074   }
00075 }
00076 
00077 /*
00078 //this is a super expensive operation...
00079 // it would be better to have the parser be a bit more intelligent.
00080 void strip_blank_lines(istring &to_strip)
00081 {
00082   bool last_was_ret = false;
00083   for (int i = 0; i < to_strip.length(); i++) {
00084     if (parser_bits::is_eol(to_strip[i])) {
00085       if (last_was_ret) {
00086         // two in a row; now that's bogus.
00087         to_strip.zap(i, i);
00088         i--;  // skip back.
00089         continue;
00090       }
00091       last_was_ret = true;
00092       to_strip[i] = '\n';  // make sure we know which type to look for.
00093     } else {
00094       if (last_was_ret && parser_bits::white_space(to_strip[i])) {
00095         // well, the last was a return but this is white space.  that's also
00096         // quite bogus.
00097         to_strip.zap(i, i);
00098         i--;  // skip back.
00099         continue;
00100       }
00101       last_was_ret = false;
00102     }
00103   }
00104 }
00105 */
00106 
00107 void ini_parser::reset(const istring &to_parse)
00108 {
00109   _well_formed = false;
00110   table_configurator::reset();  // clean out existing contents.
00111   _preface->reset();  // set the preface string back to nothing.
00112   add(to_parse);
00113 }
00114 
00115 void ini_parser::add(const istring &to_parse)
00116 {
00117   istring parsing = to_parse;
00118 //  strip_blank_lines(parsing);
00119   _preface->reset();  // set the preface string back to nothing.
00120   while (parsing.length()) {
00121     istring section_name;
00122     bool found_sect = parse_section(parsing, section_name);
00123     if (!found_sect) {
00124       // the line is not a section name.  toss it.
00125       chow_through_eol(parsing);
00126       continue;  // try to find another section name.
00127     }
00128     // we got a section.  yee hah.
00129     int next_sect = 0;
00130     for (next_sect = 0; next_sect < parsing.length(); next_sect++) {
00131 //      LOG(istring("[") + istring(parsing[next_sect], 1) + "]");
00132       if (parser_bits::is_eol(parsing[next_sect])) {
00133         // we found the requisite return; let's see if a section beginning
00134         // is just after it.  we know nothing else should be, since we stripped
00135         // out the blank lines and blanks after CRs.
00136         if (parsing[next_sect + 1] == '[') {
00137           // aha, found the bracket that should be a section start.
00138           break;  // done seeking next section beginning.
00139         }
00140       }
00141     }
00142     // skip back one if we hit the end of the string.
00143     if (next_sect >= parsing.length()) next_sect--;
00144     // now grab what should be all values within a section.
00145     LOG(isprintf("bounds are %d to %d, string len is %d.", 0, next_sect,
00146         parsing.length()));
00147     istring sect_parsing = parsing.substring(0, next_sect);
00148     LOG(istring("going to parse: >>") + sect_parsing + "<<");
00149     parsing.zap(0, next_sect);
00150     tokenizer section_reader("\n", "=");
00151     section_reader.set_comment_chars(";#");
00152     section_reader.parse(sect_parsing);
00153     LOG(istring("read: ") + section_reader.text_form());
00154     merge_section(section_name, section_reader.table());
00155   }
00156   _well_formed = true;
00157 }
00158 
00159 void ini_parser::merge_section(const istring &section_name,
00160     const string_table &to_merge)
00161 {
00162   if (!section_exists(section_name)) {
00163     // didn't exist yet, so just plunk it in.
00164     put_section(section_name, to_merge);
00165     return;
00166   }
00167   
00168   // since the section exists, we just write the individual entries from the
00169   // new section.  they'll stamp out any old values.
00170   for (int i = 0; i < to_merge.symbols(); i++)
00171     put(section_name, to_merge.name(i), to_merge[i]);
00172 }
00173 
00174 bool ini_parser::parse_section(istring &to_parse, istring &section_name)
00175 {
00176   section_name = "";  // reset the section.
00177 
00178   // we have a simple state machine here...
00179   enum states {
00180     SEEKING_OPENING_BRACKET,  // looking for the first bracket.
00181     EATING_SECTION_NAME       // got a bracket, now getting section name.
00182   };
00183   states state = SEEKING_OPENING_BRACKET;
00184 
00185   // zip through the string trying to find a valid section name.
00186   for (int i = 0; i < to_parse.length(); i++) {
00187     char curr = to_parse[i];
00188     LOG(istring("<") + istring(curr, 1) + ">");
00189     switch (state) {
00190       case SEEKING_OPENING_BRACKET:
00191         // we're looking for the first bracket now...
00192         if (parser_bits::white_space(curr)) continue;  // ignore white space.
00193         if (curr != '[') return false;  // argh, bad characters before bracket.
00194         state = EATING_SECTION_NAME;  // found the bracket.
00195         break;
00196       case EATING_SECTION_NAME:
00197         // we're adding to the section name now...
00198         if (curr == ']') {
00199           // that's the end of the section name.
00200           to_parse.zap(0, i);  // remove what we saw.
00201 //should we take out to end of line also?
00202 //eventually up to eol could be kept as a comment?
00203           return true;
00204         }
00205         section_name += curr;  // add a character to the name.
00206         break;
00207       default:
00208         //LOG("got to unknown case in section parser!");
00209         return false;
00210     }
00211   }
00212   // if we got to here, the section was badly formed...  the whole string was
00213   // parsed through but no conclusion was reached.
00214   return false;
00215 }
00216 
00217 bool ini_parser::restate(istring &new_ini, bool add_spaces)
00218 {
00219   new_ini = *_preface;  // give it the initial text back again.
00220   string_array sects;
00221   sections(sects);
00222   for (int i = 0; i < sects.length(); i++) {
00223     new_ini += istring("[") + sects[i] + "]" + log_base::platform_ending();
00224     string_table tab;
00225     if (!get_section(sects[i], tab)) continue;  // serious error.
00226     tab.add_spaces(add_spaces);
00227     new_ini += tab.text_form();
00228   }
00229   return true;
00230 }
00231 
00232 
00233 #endif //INI_PARSER_IMPLEMENTATION_FILE
00234 

Generated on Thu Nov 20 04:29:04 2008 for HOOPLE Libraries by  doxygen 1.5.1