t_tokenizer.cpp

Go to the documentation of this file.
00001 /*****************************************************************************\
00002 *                                                                             *
00003 *  Name   : test_tokenizer                                                    *
00004 *  Author : Chris Koeritz                                                     *
00005 *                                                                             *
00006 *  Purpose:                                                                   *
00007 *                                                                             *
00008 *    Puts the tokenizer through some paces.                                   *
00009 *                                                                             *
00010 *******************************************************************************
00011 * Copyright (c) 1998-$now By Author.  This program is free software; you can  *
00012 * redistribute it and/or modify it under the terms of the GNU General Public  *
00013 * License as published by the Free Software Foundation; either version 2 of   *
00014 * the License or (at your option) any later version.  This is online at:      *
00015 *     http://www.fsf.org/copyleft/gpl.html                                    *
00016 * Please send any updates to: fred@gruntose.com                               *
00017 \*****************************************************************************/
00018 
00019 #include <basis/byte_array.h>
00020 #include <basis/function.h>
00021 #include <basis/guards.h>
00022 #include <basis/istring.h>
00023 #include <opsystem/application_shell.h>
00024 #include <opsystem/byte_filer.h>
00025 #include <data_struct/static_memory_gremlin.h>
00026 #include <data_struct/string_table.h>
00027 #include <textual/parser_bits.h>
00028 #include <textual/tokenizer.h>
00029 
00030 HOOPLE_STARTUP_CODE;
00031 
00032 #define TEST_OCM_STOCK_LIST
00033   // uncomment for a test similar to what the OCM does.
00034 
00035 const int MAX_LINE_SIZE = 1000;
00036   // the largest line we will deal with in a file.
00037 
00038 class test_tokenizer : public application_shell
00039 {
00040 public:
00041   test_tokenizer() : application_shell("t_tokenizer") {}
00042 
00043   IMPLEMENT_CLASS_NAME("test_tokenizer");
00044 
00045   virtual int execute();
00046 };
00047 
00049 
00050 int test_tokenizer::execute()
00051 {
00052   {
00053     istring test_set_1 = "\n\
00054 [frederick]\n\
00055 samba=dance\n\
00056 tantalus rex=gumby\n\
00057 57 chevy heap=\"16 anagrams of misty immediately\"\n\
00058 lingus distractus\n\
00059 shouldus havus assignmentum=\n\
00060 above better be parsed = 1\n\
00061 ;and this comment too yo\n\
00062 ted=agent 12\n";
00063 
00064     const char *TEST = "First Test";
00065     istring testing = test_set_1;
00066     log(istring("file before parsing:\n") + testing);
00067     tokenizer jed("\n\r", "=");
00068     jed.parse(testing);
00069     istring out;
00070     jed.text_form(out);
00071     tokenizer gorp("\n\r", "=");
00072     gorp.parse(out);
00073     log(istring("file after parsing:\n") + out);
00074     log("and in tabular form:");
00075     log(jed.table().text_form());
00076 
00077 //for (int i = 0; i < gorp.table().symbols(); i++) {
00078 //istring name, value;
00079 //gorp.table().retrieve(i, name, value);
00080 //log(isprintf("item %d: name=\"%s\" value=\"%s\"", i, name.s(), value.s()));
00081 //}
00082 
00083     if (!jed.exists("[frederick]"))
00084       deadly_error(class_name(), TEST, "jed section header was omitted!");
00085     if (jed.find("[frederick]") != "")
00086       deadly_error(class_name(), TEST, "jed section header had unexpected contents!");
00087     if (jed.find("ted") != "agent 12")
00088       deadly_error(class_name(), TEST, "jed's ted is missing or invalid!");
00089     if (jed.find("shouldus havus assignmentum").t())
00090       deadly_error(class_name(), TEST, "jed's shouldus had contents but shouldn't!");
00091     istring value = *jed.table().find("shouldus havus assignmentum");
00092     if (value != " ")
00093       deadly_error(class_name(), TEST, "shouldus had wrong contents, not special!");
00094     if (!gorp.exists("[frederick]"))
00095       deadly_error(class_name(), TEST, "gorp section header was omitted!");
00096     if (gorp.find("[frederick]") != "")
00097       deadly_error(class_name(), TEST, "gorp section header had unexpected contents!");
00098     if (gorp.find("ted") != "agent 12")
00099       deadly_error(class_name(), TEST, "gorp's ted is missing or invalid!");
00100     if (gorp.find("shouldus havus assignmentum").t())
00101       deadly_error(class_name(), TEST, "gorp's shouldus had contents but shouldn't!");
00102     value = *gorp.table().find("shouldus havus assignmentum");
00103     if (value != " ")
00104       deadly_error(class_name(), TEST, "shouldus had wrong contents, not special!");
00105   }    
00106   {
00107     istring test_set_2 = "Name=SRV,   Parent=,        Persist=Y,  Entry=Y, Required=Y, Desc=Server,               Tbl=Server";
00108 
00109     const char *TEST = "Second Test";
00110     istring testing = test_set_2;
00111     log(istring("file before parsing:\n") + testing);
00112     tokenizer jed(",", "=");
00113     jed.parse(testing);
00114     istring out;
00115     jed.text_form(out);
00116     log(istring("file after parsing:\n") + out);
00117     log("and in tabular form:");
00118     log(jed.table().text_form());
00119     if (jed.find("Name") != "SRV")
00120       deadly_error(class_name(), TEST, "Name is missing or invalid!");
00121     if (jed.find("Parent").t())
00122       deadly_error(class_name(), TEST, "Parent had contents but shouldn't!");
00123     istring value = *jed.table().find("Parent");
00124     if (value != " ")
00125       deadly_error(class_name(), TEST, "Parent had wrong contents, not special!");
00126     if (jed.find("Persist") != "Y")
00127       deadly_error(class_name(), TEST, "Persist is missing or invalid!");
00128   }    
00129 
00130 //hmmm: this should be turned off when we don't have access to the file.
00131 //      or...  why not just put a copy of the file in the test folder?
00132 #ifdef TEST_OCM_STOCK_LIST
00133   {
00134     // test the tokenizer on the stock ocm list.  this is very specialized
00135     // and so is not part of the normal testing.
00136     byte_filer list_file("l:/packages/srvcfg/odms/ocm_list.ini", "r");
00137     istring ocms;
00138     byte_array chunks;
00139     istring the_line;
00140     int curr_line = 0;
00141     while (true) {
00142       chunks.reset();
00143       if (list_file.getline(chunks, MAX_LINE_SIZE)) {
00144         curr_line++;
00145         the_line = (char *)chunks.observe();
00146         // add the line before we strip anything, since the main tokenizer
00147         // needs to find CR on the end.
00148         ocms += the_line;
00149         // chop any extra spaces or returns off the end.
00150         while (the_line.length()
00151             && parser_bits::white_space(the_line[the_line.end()]))
00152           the_line.zap(the_line.end(), the_line.end());
00153         // eat each line and test tokenizer for a different set of parms.
00154         tokenizer liner(",", "=");
00155         liner.parse(the_line);
00156         log(istring(istring::SPRINTF, "%02d: ", curr_line) + liner.text_form());
00157       } else break;
00158     }
00159 
00160     tokenizer stocky("\n\r", "=");
00161     stocky.parse(ocms);
00162     log("");
00163     log("got these for the tokenized list:");
00164     log(stocky.text_form());
00165   }
00166 #endif
00167 
00168   {
00169     istring test_set_3 = "\n\
00170 [frederick]\n\
00171 samba=dance\n\
00172 tantalus rex=gumby \"don#t\n\n'play'\nthat\" homey '\n\ndog\n\n yo \"\ncreen\" arf'\n\
00173 57 chevy heap=\"16 anagrams of misty immediately\"\n\
00174 lingus distractus\n\
00175 shouldus havus assignmentum=\n\
00176 above better be parsed = 1\n\
00177 ;and this comment too yo\n\
00178 ted=agent 12\n";
00179 
00180     const char *TEST = "Third Test";
00181     istring testing = test_set_3;
00182     log(istring("file before parsing:\n") + testing);
00183     tokenizer jed("\n\r", "=", "\'\"");
00184     jed.parse(testing);
00185     istring out;
00186     jed.text_form(out);
00187     tokenizer gorp("\n\r", "=", "\'\"");
00188     gorp.parse(out);
00189     log(istring("file after parsing:\n") + out);
00190     log("and in tabular form:");
00191     log(jed.table().text_form());
00192     if (!jed.exists("[frederick]"))
00193       deadly_error(class_name(), TEST, "section header was omitted!");
00194     if (jed.find("[frederick]") != "")
00195       deadly_error(class_name(), TEST, "section header had unexpected contents!");
00196     if (jed.find("ted") != "agent 12")
00197       deadly_error(class_name(), TEST, "ted is missing or invalid!");
00198     if (jed.find("shouldus havus assignmentum").t())
00199       deadly_error(class_name(), TEST, "shouldus had contents but shouldn't!");
00200     istring value = *jed.table().find("shouldus havus assignmentum");
00201     if (value != " ")
00202       deadly_error(class_name(), TEST, "shouldus had wrong contents, not special!");
00203     if (!gorp.exists("[frederick]"))
00204       deadly_error(class_name(), TEST, "section header was omitted!");
00205     if (gorp.find("[frederick]") != "")
00206       deadly_error(class_name(), TEST, "section header had unexpected contents!");
00207     if (gorp.find("ted") != "agent 12")
00208       deadly_error(class_name(), TEST, "ted is missing or invalid!");
00209     if (gorp.find("shouldus havus assignmentum").t())
00210       deadly_error(class_name(), TEST, "shouldus had contents but shouldn't!");
00211     value = *gorp.table().find("shouldus havus assignmentum");
00212     if (value != " ")
00213       deadly_error(class_name(), TEST, "shouldus had wrong contents, not special!");
00214     if (!gorp.exists("tantalus rex"))
00215       deadly_error(class_name(), TEST, "tantalus rex is missing!");
00216     if (gorp.find("tantalus rex")
00217         != "gumby \"don#t\n\n'play'\nthat\" homey '\n\ndog\n\n yo "
00218            "\"\ncreen\" arf'")
00219       deadly_error(class_name(), TEST, "tantalus rex has incorrect contents!");
00220   }
00221   {
00222     istring test_set_4 = "\n\
00223 [garfola]\n\
00224 treadmill=\"this ain't the place\nwhere'n we been done\nseein' no quotes\"\n\
00225 borfulate='similarly \"we\" do not like\nthe \" quote \" type thing here'\n\
00226 ";
00227 
00228     const char *TEST = "Fourth Test";
00229     istring testing = test_set_4;
00230     log(istring("file before parsing:\n") + testing);
00231     tokenizer jed("\n\r", "=", "\'\"", false);
00232     jed.parse(testing);
00233     istring out;
00234     jed.text_form(out);
00235     tokenizer gorp("\n\r", "=", "\'\"", false);
00236     gorp.parse(out);
00237     log(istring("file after parsing:\n") + out);
00238     log("and in tabular form:");
00239     log(jed.table().text_form());
00240     if (!gorp.exists("[garfola]"))
00241       deadly_error(class_name(), TEST, "section header was omitted!");
00242     if (gorp.find("[garfola]") != "")
00243       deadly_error(class_name(), TEST, "section header had unexpected contents!");
00244     if (!gorp.exists("treadmill"))
00245       deadly_error(class_name(), TEST, "treadmill is missing!");
00246     if (gorp.find("treadmill")
00247         != "\"this ain't the place\nwhere'n we been done\nseein' no quotes\"")
00248       deadly_error(class_name(), TEST, "treadmill has incorrect contents!");
00249     if (!gorp.exists("borfulate"))
00250       deadly_error(class_name(), TEST, "borfulate is missing!");
00251     if (gorp.find("borfulate")
00252         != "'similarly \"we\" do not like\nthe \" quote \" type thing here'")
00253       deadly_error(class_name(), TEST, "borfulate has incorrect contents!");
00254   }
00255   {
00256     istring test_set_5 = "\n\
00257  x~35; y~92   ;#comment   ; d   ~83 ;  e~   54   ; ? new comment  ;sud   ~  xj23-8 ; nigh ~2";
00258 
00259     const char *TEST = "Fifth Test";
00260     istring testing = test_set_5;
00261     log(istring("file before parsing:\n") + testing);
00262     tokenizer jed(";", "~");
00263     jed.set_comment_chars("#?");
00264     jed.parse(testing);
00265     istring out;
00266     jed.text_form(out);
00267     log(istring("file after parsing:\n") + out);
00268     log("and in tabular form:");
00269     log(jed.table().text_form());
00270 
00271     tokenizer gorp(";", "~");
00272     gorp.set_comment_chars("#?");
00273     gorp.parse(out);
00274     log("gorp in tabular form:");
00275     log(gorp.table().text_form());
00276     if (gorp.table() != jed.table()) 
00277       deadly_error(class_name(), TEST, "gorp text not same as jed!");
00278 
00279     if (jed.find("x") != "35")
00280       deadly_error(class_name(), TEST, "value for x missing or invalid");
00281     if (jed.find("y") != "92")
00282       deadly_error(class_name(), TEST, "value for y missing or invalid");
00283     if (jed.find("d") != "83")
00284       deadly_error(class_name(), TEST, "value for d missing or invalid");
00285     if (jed.find("e") != "54")
00286       deadly_error(class_name(), TEST, "value for e missing or invalid");
00287     if (jed.find("sud") != "xj23-8")
00288       deadly_error(class_name(), TEST, "value for sud missing or invalid");
00289     if (jed.find("nigh") != "2")
00290       deadly_error(class_name(), TEST, "value for nigh missing or invalid");
00291   }    
00292   {
00293     istring test_set_6 = "\r\n\r\n\r\
00294 # this is yet another test with comments.\r\n\
00295 ; we want to be sure stuff works right.\r\n\
00296 crumpet=tempest\r\n\
00297   moomar=18\r\n\
00298 shagbot  =once upon a time there was a man  \r\n\
00299 \t\t\tpunzola megamum  =brandle the handle  \r\n\
00300 trapzoot=  uhhh\r\n\
00301 mensch   = racer X\r\n\
00302 \r\n\r\n\r\n";
00303 
00304     const char *TEST = "Sixth Test";
00305     istring testing = test_set_6;
00306     log(istring("file before parsing:\n") + testing);
00307     tokenizer jed("\n\r", "=");
00308     jed.set_comment_chars("#;");
00309     jed.parse(testing);
00310     istring out;
00311     jed.text_form(out);
00312     log(istring("file after parsing:\n") + out);
00313     log("and in tabular form:");
00314     log(jed.table().text_form());
00315 
00316     tokenizer gorp("\n\r", "=");
00317     gorp.set_comment_chars("#;");
00318     gorp.parse(out);
00319     log("gorp in tabular form:");
00320     log(gorp.table().text_form());
00321 log(isprintf("gorp has %d fields, jed has %d fields", gorp.symbols(), jed.symbols()));
00322     if (gorp.table() != jed.table()) 
00323       deadly_error(class_name(), TEST, "gorp text not same as jed!");
00324 
00325     if (jed.find("crumpet") != "tempest")
00326       deadly_error(class_name(), TEST, "value for crumpet missing or invalid");
00327     if (jed.find("moomar") != "18")
00328       deadly_error(class_name(), TEST, "value for moomar missing or invalid");
00329     if (jed.find("shagbot") != "once upon a time there was a man")
00330       deadly_error(class_name(), TEST, "value for shagbot missing or invalid");
00331     if (jed.find("trapzoot") != "uhhh")
00332       deadly_error(class_name(), TEST, "value for trapzoot missing or invalid");
00333     if (jed.find("punzola megamum") != "brandle the handle")
00334       deadly_error(class_name(), TEST, "value for punzola missing or invalid");
00335     if (jed.find("mensch") != "racer X")
00336       deadly_error(class_name(), TEST, "value for mensch missing or invalid");
00337   }    
00338 
00339   guards::alert_message("tokenizer:: works for those functions tested.");
00340   return 0;
00341 }
00342 
00344 
00345 int main(int formal(argc), char *formal(argv)[])
00346 {
00347   test_tokenizer to_test;
00348   return to_test.execute();
00349 }
00350 

Generated on Fri Nov 28 04:29:40 2008 for HOOPLE Libraries by  doxygen 1.5.1