ini_parser.cpp

Go to the documentation of this file.
00001 /*****************************************************************************\
00002 *                                                                             *
00003 *  Name   : ini_parser                                                        *
00004 *  Author : Chris Koeritz                                                     *
00005 *                                                                             *
00006 *******************************************************************************
00007 * Copyright (c) 2000-$now By Author.  This program is free software; you can  *
00008 * redistribute it and/or modify it under the terms of the GNU General Public  *
00009 * License as published by the Free Software Foundation; either version 2 of   *
00010 * the License or (at your option) any later version.  This is online at:      *
00011 *     http://www.fsf.org/copyleft/gpl.html                                    *
00012 * Please send any updates to: fred@gruntose.com                               *
00013 \*****************************************************************************/
00014 
00015 #include "ini_parser.h"
00016 #include "table_configurator.h"
00017 #include "variable_tokenizer.h"
00018 
00019 #include <basis/astring.h>
00020 #include <basis/functions.h>
00021 #include <structures/amorph.h>
00022 #include <structures/string_array.h>
00023 #include <structures/string_table.h>
00024 #include <textual/parser_bits.h>
00025 
00026 //#define DEBUG_INI_PARSER
00027   // uncomment for noisy version.
00028 
00029 #undef LOG
00030 #ifdef DEBUG_INI_PARSER
00031   #define LOG(to_print) printf("%s\n", astring(to_print).s())
00032 #else
00033   #define LOG(a) {}
00034 #endif
00035 
00037 
00038 using namespace basis;
00039 using namespace structures;
00040 using namespace textual;
00041 //using namespace ;
00042 
00043 namespace configuration {
00044 
00045 //algorithm:
00046 //  gather section until next section definition or end of file.
00047 //  parse the section with variable_tokenizer.
00048 //  eat that out of the string.
00049 //  repeat.
00050 
00051 ini_parser::ini_parser(const astring &to_parse, treatment_of_defaults behavior)
00052 : table_configurator(behavior),
00053   _well_formed(false),
00054   _preface(new astring)
00055 {
00056   reset(to_parse);
00057 }
00058 
00059 ini_parser::~ini_parser()
00060 {
00061   WHACK(_preface);
00062 }
00063 
00064 void ini_parser::chow_through_eol(astring &to_chow)
00065 {
00066   while (to_chow.length()) {
00067     if (parser_bits::is_eol(to_chow[0])) {
00068       // zap all carriage return type chars now that we found one.
00069       while (to_chow.length() && parser_bits::is_eol(to_chow[0])) {
00070         *_preface += to_chow[0];
00071         to_chow.zap(0, 0);
00072       }
00073       return;  // mission accomplished.
00074     }
00075     *_preface += to_chow[0];
00076     to_chow.zap(0, 0);
00077   }
00078 }
00079 
00080 /*
00081 //this is a super expensive operation...
00082 // it would be better to have the parser be a bit more intelligent.
00083 void strip_blank_lines(astring &to_strip)
00084 {
00085   bool last_was_ret = false;
00086   for (int i = 0; i < to_strip.length(); i++) {
00087     if (parser_bits::is_eol(to_strip[i])) {
00088       if (last_was_ret) {
00089         // two in a row; now that's bogus.
00090         to_strip.zap(i, i);
00091         i--;  // skip back.
00092         continue;
00093       }
00094       last_was_ret = true;
00095       to_strip[i] = '\n';  // make sure we know which type to look for.
00096     } else {
00097       if (last_was_ret && parser_bits::white_space(to_strip[i])) {
00098         // well, the last was a return but this is white space.  that's also
00099         // quite bogus.
00100         to_strip.zap(i, i);
00101         i--;  // skip back.
00102         continue;
00103       }
00104       last_was_ret = false;
00105     }
00106   }
00107 }
00108 */
00109 
00110 void ini_parser::reset(const astring &to_parse)
00111 {
00112   _well_formed = false;
00113   table_configurator::reset();  // clean out existing contents.
00114   _preface->reset();  // set the preface string back to nothing.
00115   add(to_parse);
00116 }
00117 
00118 void ini_parser::add(const astring &to_parse)
00119 {
00120   astring parsing = to_parse;
00121 //  strip_blank_lines(parsing);
00122   _preface->reset();  // set the preface string back to nothing.
00123   while (parsing.length()) {
00124     astring section_name;
00125     bool found_sect = parse_section(parsing, section_name);
00126     if (!found_sect) {
00127       // the line is not a section name.  toss it.
00128       chow_through_eol(parsing);
00129       continue;  // try to find another section name.
00130     }
00131     // we got a section.  yee hah.
00132     int next_sect = 0;
00133     for (next_sect = 0; next_sect < parsing.length(); next_sect++) {
00134 //      LOG(astring("[") + astring(parsing[next_sect], 1) + "]");
00135       if (parser_bits::is_eol(parsing[next_sect])) {
00136         // we found the requisite return; let's see if a section beginning
00137         // is just after it.  we know nothing else should be, since we stripped
00138         // out the blank lines and blanks after CRs.
00139         if (parsing[next_sect + 1] == '[') {
00140           // aha, found the bracket that should be a section start.
00141           break;  // done seeking next section beginning.
00142         }
00143       }
00144     }
00145     // skip back one if we hit the end of the string.
00146     if (next_sect >= parsing.length()) next_sect--;
00147     // now grab what should be all values within a section.
00148     LOG(a_sprintf("bounds are %d to %d, string len is %d.", 0, next_sect,
00149         parsing.length()));
00150     astring sect_parsing = parsing.substring(0, next_sect);
00151     LOG(astring("going to parse: >>") + sect_parsing + "<<");
00152     parsing.zap(0, next_sect);
00153     variable_tokenizer section_reader("\n", "=");
00154     section_reader.set_comment_chars(";#");
00155     section_reader.parse(sect_parsing);
00156     LOG(astring("read: ") + section_reader.text_form());
00157     merge_section(section_name, section_reader.table());
00158   }
00159   _well_formed = true;
00160 }
00161 
00162 void ini_parser::merge_section(const astring &section_name,
00163     const string_table &to_merge)
00164 {
00165   if (!section_exists(section_name)) {
00166     // didn't exist yet, so just plunk it in.
00167     put_section(section_name, to_merge);
00168     return;
00169   }
00170   
00171   // since the section exists, we just write the individual entries from the
00172   // new section.  they'll stamp out any old values.
00173   for (int i = 0; i < to_merge.symbols(); i++)
00174     put(section_name, to_merge.name(i), to_merge[i]);
00175 }
00176 
00177 bool ini_parser::parse_section(astring &to_parse, astring &section_name)
00178 {
00179   section_name = "";  // reset the section.
00180 
00181   // we have a simple state machine here...
00182   enum states {
00183     SEEKING_OPENING_BRACKET,  // looking for the first bracket.
00184     EATING_SECTION_NAME       // got a bracket, now getting section name.
00185   };
00186   states state = SEEKING_OPENING_BRACKET;
00187 
00188   // zip through the string trying to find a valid section name.
00189   for (int i = 0; i < to_parse.length(); i++) {
00190     char curr = to_parse[i];
00191     LOG(astring("<") + astring(curr, 1) + ">");
00192     switch (state) {
00193       case SEEKING_OPENING_BRACKET:
00194         // we're looking for the first bracket now...
00195         if (parser_bits::white_space(curr)) continue;  // ignore white space.
00196         if (curr != '[') return false;  // argh, bad characters before bracket.
00197         state = EATING_SECTION_NAME;  // found the bracket.
00198         break;
00199       case EATING_SECTION_NAME:
00200         // we're adding to the section name now...
00201         if (curr == ']') {
00202           // that's the end of the section name.
00203           to_parse.zap(0, i);  // remove what we saw.
00204 //should we take out to end of line also?
00205 //eventually up to eol could be kept as a comment?
00206           return true;
00207         }
00208         section_name += curr;  // add a character to the name.
00209         break;
00210       default:
00211         //LOG("got to unknown case in section parser!");
00212         return false;
00213     }
00214   }
00215   // if we got to here, the section was badly formed...  the whole string was
00216   // parsed through but no conclusion was reached.
00217   return false;
00218 }
00219 
00220 bool ini_parser::restate(astring &new_ini, bool add_spaces)
00221 {
00222   new_ini = *_preface;  // give it the initial text back again.
00223   string_array sects;
00224   sections(sects);
00225   for (int i = 0; i < sects.length(); i++) {
00226     new_ini += astring("[") + sects[i] + "]" + parser_bits::platform_eol_to_chars();
00227     string_table tab;
00228     if (!get_section(sects[i], tab)) continue;  // serious error.
00229     tab.add_spaces(add_spaces);
00230     new_ini += tab.text_form();
00231   }
00232   return true;
00233 }
00234 
00235 } //namespace.
00236 
00237 
Generated on Sat Jan 28 04:22:17 2012 for hoople2 project by  doxygen 1.6.3