00001 #ifndef LIST_PARSING_IMPLEMENTATION_FILE
00002 #define LIST_PARSING_IMPLEMENTATION_FILE
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "list_parsing.h"
00022 #include "parser_bits.h"
00023
00024 #include <basis/istring.h>
00025 #include <basis/log_base.h>
00026 #include <basis/set.cpp>
00027 #include <data_struct/string_table.h>
00028
00029 #include <ctype.h>
00030
00031 #undef LOG
00032 #define LOG(to_print) CLASS_EMERGENCY_LOG(program_wide_logger(), to_print)
00033
00034 list_parsing::~list_parsing() {}
00035
00036
00037 bool list_parsing::get_ids_from_string(const istring &to_parse,
00038 int_set &identifiers)
00039 {
00040 identifiers.clear();
00041 int_array found;
00042 bool ret = get_ids_from_string(to_parse, found);
00043 if (!ret) return false;
00044 for (int i = 0; i < found.length(); i++) identifiers.add(found[i]);
00045 return true;
00046 }
00047
00048
00049 bool list_parsing::get_ids_from_string(const istring &to_parse,
00050 int_array &identifiers)
00051 {
00052 identifiers.reset();
00053 if (!to_parse) return false;
00054
00055
00056 int last_id = -1;
00057 int tmp_id;
00058 bool done = false;
00059 char last_separator = ' ';
00060
00061 int index = 0;
00062 while (!done && (index < to_parse.length())) {
00063 tmp_id = 0;
00064 bool got_digit = false;
00065 while ( (to_parse[index] != ',') && (to_parse[index] != '-')
00066 && (to_parse[index] != ' ') && (index < to_parse.length()) ) {
00067 if (!isdigit(to_parse[index])) return false;
00068 tmp_id *= 10;
00069 tmp_id += int(to_parse[index++]) - 0x30;
00070 got_digit = true;
00071 }
00072
00073 if (got_digit) {
00074 if (tmp_id > MAXINT) return false;
00075
00076 if (last_id == -1) {
00077 last_id = tmp_id;
00078 identifiers += last_id;
00079 } else {
00080
00081 if (last_separator == '-') {
00082 if (tmp_id >= last_id) {
00083 for (int i = last_id + 1; i <= tmp_id; i++)
00084 identifiers += i;
00085 }
00086 else {
00087 for (int i = tmp_id; i < last_id; i++)
00088 identifiers += i;
00089 }
00090 last_id = 0;
00091 last_separator = ' ';
00092 } else {
00093 last_id = tmp_id;
00094 identifiers += last_id;
00095 }
00096 }
00097 } else {
00098
00099 if ( (to_parse[index] != ' ') && (to_parse[index] != '-')
00100 && (to_parse[index] != ',') ) return false;
00101 last_separator = to_parse[index++];
00102 }
00103 }
00104 return true;
00105 }
00106
00107
00108 istring list_parsing::put_ids_in_string(const int_set &ids, char separator)
00109 {
00110 istring to_return;
00111 for (int i = 0; i < ids.length(); i++) {
00112 to_return += isprintf("%d", ids[i]);
00113 if (i < ids.length() - 1) {
00114 to_return += separator;
00115 to_return += " ";
00116 }
00117 }
00118 return to_return;
00119 }
00120
00121
00122 istring list_parsing::put_ids_in_string(const int_array &ids, char separator)
00123 {
00124 istring to_return;
00125 for (int i = 0; i < ids.length(); i++) {
00126 to_return += isprintf("%d", ids[i]);
00127 if (i < ids.length() - 1) {
00128 to_return += separator;
00129 to_return += " ";
00130 }
00131 }
00132 return to_return;
00133 }
00134
00135 #define ADD_TO_VALUES() \
00136 string.substring(value, start, end - 1); \
00137 if( true == strip_spaces ) value.strip_spaces(); \
00138 if( value.length() > 0 ) values += value;
00139
00140 bool list_parsing::get_values_from_string(const istring &string,
00141 string_array &values, const char separator, const bool strip_spaces)
00142 {
00143 values.reset();
00144 const int strlen = string.length();
00145 int start = 0;
00146 int end = -1;
00147 istring value;
00148 while( (end = string.find(separator, start)) >= 0 )
00149 {
00150 ADD_TO_VALUES();
00151 start = end + 1;
00152 }
00153 if( start < strlen )
00154 {
00155 end = strlen;
00156 ADD_TO_VALUES();
00157 }
00158 return values.length() > 0;
00159 }
00160
00161 #define LOC_VALID(loc) if( (loc) < 0 ) return false;
00162
00163 bool list_parsing::get_separated_value(const istring &string,
00164 const istring &name,
00165 istring &value, const byte assign,
00166 const byte separator)
00167 {
00168
00169 const int start_pos = string.ifind(name);
00170 LOC_VALID(start_pos);
00171
00172 const int assign_pos = string.find(assign, start_pos + name.length());
00173 LOC_VALID(assign_pos);
00174
00175 const int sep_pos = string.find(separator, assign_pos + 1);
00176 LOC_VALID(sep_pos);
00177
00178 value = string.substring(assign_pos + 1, sep_pos - 1);
00179 return true;
00180 }
00181
00182 bool list_parsing::get_rest_of_line(const istring &string,
00183 const istring &name, istring &value,
00184 const byte assign)
00185 {
00186 const int len = string.length();
00187
00188 const int start_pos = string.ifind(name);
00189 LOC_VALID(start_pos);
00190
00191 const int assign_pos = string.find(assign, start_pos + name.length());
00192 LOC_VALID(assign_pos);
00193
00194 value = string.substring(assign_pos + 1, len - 1);
00195 return true;
00196 }
00197
00198 int list_parsing::get_positions(const istring &string, const char separator,
00199 int_array &positions)
00200 {
00201 positions.reset();
00202 int pos = -1;
00203 while( (pos = string.find(separator, pos + 1)) >= 0 )
00204 positions += pos;
00205 return positions.length();
00206 }
00207
00208
00209
00210
00211 istring list_parsing::emit_quoted_chunk(const istring &to_emit)
00212 {
00213 istring to_return('\0', 256);
00214 to_return = "";
00215 for (int i = 0; i < to_emit.length(); i++) {
00216 char next_char = to_emit[i];
00217 if (next_char == '"') to_return += "\\";
00218 to_return += istring(next_char, 1);
00219 }
00220 return to_return;
00221 }
00222
00223 void list_parsing::create_csv_line(const string_table &to_csv, istring &target)
00224 {
00225 target = istring::empty_string();
00226 for (int i = 0; i < to_csv.symbols(); i++) {
00227 target += istring("\"") + emit_quoted_chunk(to_csv.name(i))
00228 + "=" + emit_quoted_chunk(to_csv[i]) + "\"";
00229 if (i < to_csv.symbols() - 1) target += ",";
00230 }
00231 }
00232
00233 void list_parsing::create_csv_line(const string_array &to_csv, istring &target)
00234 {
00235 target = istring::empty_string();
00236 for (int i = 0; i < to_csv.length(); i++) {
00237 target += istring("\"") + emit_quoted_chunk(to_csv[i]) + "\"";
00238 if (i < to_csv.length() - 1) target += ",";
00239 }
00240 }
00241
00242
00243 #define handle_escapes \
00244 if (to_parse[i] == '\\') { \
00245 if (to_parse[i + 1] == '"') { \
00246 i++; \
00247 accumulator += to_parse[i]; \
00248 continue; \
00249 } \
00250 }
00251
00252 const int ARRAY_PREFILL_AMOUNT = 7;
00253
00254
00255 #define ADD_LINE_TO_FIELDS(new_line) { \
00256 storage_slot++; \
00257 \
00258 \
00259 if (fields.length() < storage_slot + 2) \
00260 fields.insert(fields.length(), ARRAY_PREFILL_AMOUNT); \
00261 \
00262 fields[storage_slot] = new_line; \
00263 }
00264
00265
00266
00267 bool list_parsing::parse_csv_line(const istring &to_parse, string_array &fields)
00268 {
00269 FUNCDEF("parse_csv_line");
00270
00271
00272 istring accumulator(' ', 256);
00273 accumulator = istring::empty_string();
00274
00275
00276
00277 enum states { seeking_quote, eating_string, seeking_comma };
00278 states state = seeking_quote;
00279
00280 bool no_second_quote = false;
00281 bool just_saw_comma = false;
00282
00283 int storage_slot = -1;
00284
00285 for (int i = 0; i < to_parse.length(); i++) {
00286 switch (state) {
00287 case seeking_quote:
00288 if (parser_bits::white_space(to_parse[i])) continue;
00289 if (to_parse[i] == ',') {
00290
00291 ADD_LINE_TO_FIELDS(istring::empty_string());
00292 just_saw_comma = true;
00293 continue;
00294 }
00295 just_saw_comma = false;
00296 if (to_parse[i] != '"') {
00297
00298 accumulator += to_parse[i];
00299 no_second_quote = true;
00300 }
00301 state = eating_string;
00302 break;
00303 case eating_string:
00304 just_saw_comma = false;
00305 if (no_second_quote && (to_parse[i] != ',') ) {
00306 handle_escapes;
00307 accumulator += to_parse[i];
00308 } else if (!no_second_quote && (to_parse[i] != '"') ) {
00309 handle_escapes;
00310 accumulator += to_parse[i];
00311 } else {
00312
00313 if (no_second_quote) {
00314 state = seeking_quote;
00315 just_saw_comma = true;
00316 } else state = seeking_comma;
00317 ADD_LINE_TO_FIELDS(accumulator)
00318 accumulator = istring::empty_string();
00319 no_second_quote = false;
00320 }
00321 break;
00322 case seeking_comma:
00323 if (parser_bits::white_space(to_parse[i])) continue;
00324 if (to_parse[i] == ',') {
00325
00326 state = seeking_quote;
00327 just_saw_comma = true;
00328 continue;
00329 }
00330
00331 return false;
00332 break;
00333 default:
00334 LOG("erroneous state reached during csv parsing");
00335 break;
00336 }
00337 }
00338 if ( (state == eating_string) && (accumulator.length()) )
00339 ADD_LINE_TO_FIELDS(accumulator)
00340 else if (just_saw_comma)
00341 ADD_LINE_TO_FIELDS(istring::empty_string())
00342 if (fields.length() > storage_slot + 1)
00343 fields.zap(storage_slot + 1, fields.last());
00344 return true;
00345 }
00346
00349
00350
00351 #endif //LIST_PARSING_IMPLEMENTATION_FILE
00352