parser_bits.cpp

Go to the documentation of this file.
00001 /*****************************************************************************\
00002 *                                                                             *
00003 *  Name   : parser_bits                                                       *
00004 *  Author : Chris Koeritz                                                     *
00005 *                                                                             *
00006 *******************************************************************************
00007 * Copyright (c) 2000-$now By Author.  This program is free software; you can  *
00008 * redistribute it and/or modify it under the terms of the GNU General Public  *
00009 * License as published by the Free Software Foundation; either version 2 of   *
00010 * the License or (at your option) any later version.  This is online at:      *
00011 *     http://www.fsf.org/copyleft/gpl.html                                    *
00012 * Please send any updates to: fred@gruntose.com                               *
00013 \*****************************************************************************/
00014 
00015 #include "parser_bits.h"
00016 
00017 #include <basis/astring.h>
00018 #include <basis/environment.h>
00019 #include <basis/functions.h>
00020 
00021 #include <ctype.h>
00022 #include <stdio.h>
00023 
00024 using namespace basis;
00025 
00026 #undef LOG
00027 #define LOG(prf) printf("%s\n", basis::astring(prf).s())
00028 
00029 namespace textual {
00030 
00031 parser_bits::line_ending parser_bits::platform_eol()
00032 {
00033 #ifdef __UNIX__
00034   // obviously a unix OS, unless someone's playing games with us.
00035   return LF_AT_END;
00036 #elif defined(__WIN32__)
00037   // smells like DOS.
00038   return CRLF_AT_END;
00039 #else
00040   // pick the unix default if we can't tell.
00041   return LF_AT_END;
00042 #endif
00043 }
00044 
00045 const char *parser_bits::eol_to_chars(line_ending end)
00046 {
00047   static const char *CRLF_AT_END_STRING = "\r\n";
00048   static const char *LF_AT_END_STRING = "\n";
00049   static const char *NO_ENDING_STRING = "";
00050 
00051   switch (end) {
00052     case CRLF_AT_END: return CRLF_AT_END_STRING;
00053     case NO_ENDING: return NO_ENDING_STRING;
00054     case LF_AT_END:  // fall-through to default.
00055     default: return LF_AT_END_STRING;
00056   }
00057 }
00058 
00059 const char *parser_bits::platform_eol_to_chars()
00060 { return eol_to_chars(platform_eol()); }
00061 
00062 bool parser_bits::is_printable_ascii(char to_check)
00063 { return (to_check >= 32) && (to_check <= 126); }
00064 
00065 bool parser_bits::white_space_no_cr(char to_check)
00066 { return (to_check == ' ') || (to_check == '\t'); }
00067 
00068 bool parser_bits::is_eol(char to_check)
00069 { return (to_check == '\n') || (to_check == '\r'); }
00070 
00071 bool parser_bits::white_space(char to_check)
00072 { return white_space_no_cr(to_check) || is_eol(to_check); }
00073 
00074 void parser_bits::translate_CR_for_platform(astring &to_translate)
00075 {
00076   line_ending plat_eol = platform_eol();
00077   bool last_was_lf = false;
00078   for (int i = 0; i <= to_translate.end(); i++) {
00079     if (to_translate[i] == '\r') {
00080       if (last_was_lf) continue;  // ignore two in a row.
00081       last_was_lf = true;
00082     } else if (to_translate[i] == '\n') {
00083       if (last_was_lf) {
00084         if (plat_eol != CRLF_AT_END) {
00085           // fix it, since there was not supposed to be an LF.
00086           to_translate.zap(i - 1, i - 1);
00087           i--;
00088         }
00089       } else {
00090         if (plat_eol == CRLF_AT_END) {
00091           // fix it, since we're missing an LF that we want.
00092           to_translate.insert(i, "\r");
00093           i++;
00094         }
00095       }
00096       last_was_lf = false;
00097     } else {
00098       // not the two power characters.
00099       last_was_lf = false;
00100     }
00101   }
00102 }
00103 
00104 bool parser_bits::is_hexadecimal(char look_at)
00105 {
00106   return range_check(look_at, 'a', 'f')
00107       || range_check(look_at, 'A', 'F')
00108       || range_check(look_at, '0', '9');
00109 }
00110 
00111 bool parser_bits::is_hexadecimal(const char *look_at, int len)
00112 {
00113   for (int i = 0; i < len; i++)
00114     if (!is_hexadecimal(look_at[i])) return false;
00115   return true;
00116 }
00117 
00118 bool parser_bits::is_hexadecimal(const astring &look_at, int len)
00119 { return is_hexadecimal(look_at.observe(), len); }
00120 
00121 bool parser_bits::is_alphanumeric(char look_at)
00122 {
00123   return range_check(look_at, 'a', 'z')
00124       || range_check(look_at, 'A', 'Z')
00125       || range_check(look_at, '0', '9');
00126 }
00127 
00128 bool parser_bits::is_alphanumeric(const char *look_at, int len)
00129 {
00130   for (int i = 0; i < len; i++)
00131     if (!is_alphanumeric(look_at[i])) return false;
00132   return true;
00133 }
00134 
00135 bool parser_bits::is_alphanumeric(const astring &look_at, int len)
00136 { return is_alphanumeric(look_at.observe(), len); }
00137 
00138 bool parser_bits::is_identifier(char look_at)
00139 {
00140   return range_check(look_at, 'a', 'z')
00141       || range_check(look_at, 'A', 'Z')
00142       || range_check(look_at, '0', '9')
00143       || (look_at == '_');
00144 }
00145 
00146 bool parser_bits::is_identifier(const char *look_at, int len)
00147 {
00148   if (is_numeric(look_at[0])) return false;
00149   for (int i = 0; i < len; i++)
00150     if (!is_identifier(look_at[i])) return false;
00151   return true;
00152 }
00153 
00154 bool parser_bits::is_identifier(const astring &look_at, int len)
00155 { return is_identifier(look_at.observe(), len); }
00156 
00157 bool parser_bits::is_numeric(char look_at)
00158 {
00159   return range_check(look_at, '0', '9') || (look_at == '-');
00160 }
00161 
00162 bool parser_bits::is_numeric(const char *look_at, int len)
00163 {
00164   for (int i = 0; i < len; i++) {
00165     if (!is_numeric(look_at[i])) return false;
00166     if ( (i > 0) && (look_at[i] == '-') ) return false;
00167   }
00168   return true;
00169 }
00170 
00171 bool parser_bits::is_numeric(const astring &look_at, int len)
00172 { return is_numeric(look_at.observe(), len); }
00173 
00174 astring parser_bits::substitute_env_vars(const astring &to_process,
00175     bool leave_unknown)
00176 {
00177   astring editing = to_process;
00178 
00179 //LOG(astring("input to subst env: ") + to_process);
00180 
00181   int indy;  // index of the dollar sign in the string.
00182   while (true) {
00183     indy = editing.find('$');
00184     if (negative(indy)) break;  // all done.
00185     int q;
00186     for (q = indy + 1; q < editing.length(); q++) {
00187       if (!parser_bits::is_identifier(editing[q]))
00188         break;  // done getting variable name.
00189     }
00190     if (q != indy + 1) {
00191       // we caught something in our environment variable trap...
00192       astring var_name = editing.substring(indy + 1, q - 1);
00193 //LOG(astring("var name ") + var_name);
00194       astring value_found = environment::get(var_name);
00195 //LOG(astring("val found ") + value_found);
00196       if (value_found.t()) {
00197         editing.zap(indy, q - 1);
00198         editing.insert(indy, value_found);
00199       } else {
00200         if (leave_unknown) {
00201           // that lookup failed.  let's mark it.
00202           editing[indy] = '?';
00203             // simple replacement, shows variables that failed.
00204         } else {
00205           // replace it with blankness.
00206           editing.zap(indy, q - 1);
00207         }
00208       }
00209     } else {
00210       // well, we didn't see a valid variable name, but we don't want to leave
00211       // the dollar sign in there.
00212       editing[indy] = '!';  // simple replacement, marks where syntax is bad.
00213     }
00214 
00215   }
00216 
00217 //LOG(astring("output from subst env: ") + editing);
00218 
00219   return editing;
00220 }
00221 
00222 } //namespace.
00223 
Generated on Sat Jan 28 04:22:33 2012 for hoople2 project by  doxygen 1.6.3