00001 #ifndef STRING_MANIPULATION_IMPLEMENTATION_FILE
00002 #define STRING_MANIPULATION_IMPLEMENTATION_FILE
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "parser_bits.h"
00021 #include "string_convert.h"
00022 #include "string_manipulation.h"
00023
00024 #include <basis/byte_array.h>
00025 #include <basis/chaos.h>
00026 #include <basis/function.h>
00027 #include <basis/istring.h>
00028 #include <basis/log_base.h>
00029 #include <basis/mutex.h>
00030 #include <data_struct/static_memory_gremlin.h>
00031
00032 SAFE_STATIC_CONST(istring_object, string_manipulation::splitter_finding_set,
00033 ("\t\r\n -,;?!.:"))
00034
00035
00036
00037 istring string_manipulation::make_random_name(int min, int max)
00038 {
00039 chaos rando;
00040 int length = rando.inclusive(min, max);
00041
00042 istring to_return;
00043 for (int i = 0; i < length; i++) {
00044 int chah = rando.inclusive(0, 26);
00045
00046 char to_add = 'a' + chah;
00047 if (chah == 26) to_add = '_';
00048
00049 to_return += to_add;
00050 }
00051 return to_return;
00052 }
00053
00054
00055 bool string_manipulation::quote_string(const istring &to_quote,
00056 istring "ed_string)
00057 {
00058 bool bRet = true;
00059
00060 const char *dquote = "\"";
00061 const char *squote = "\'";
00062
00063 bool bHasDQuote = to_quote.contains(dquote);
00064 bool bHasSQuote = to_quote.contains(squote);
00065
00066 if (bHasDQuote && bHasSQuote) {
00067
00068 quoted_string = to_quote;
00069 bRet = false;
00070 } else if (bHasDQuote)
00071 {
00072 quoted_string = squote;
00073 quoted_string += to_quote;
00074 quoted_string += squote;
00075 }
00076 else
00077 {
00078 quoted_string = dquote;
00079 quoted_string += to_quote;
00080 quoted_string += dquote;
00081 }
00082
00083 return bRet;
00084 }
00085
00086
00087 istring string_manipulation::long_line(char line_item, int repeat)
00088 { return istring(line_item, repeat); }
00089
00090
00091 istring string_manipulation::indentation(int spaces)
00092 {
00093 istring s;
00094 for (int i = 0; i < spaces; i++) s += ' ';
00095 return s;
00096 }
00097
00098
00099 istring &string_manipulation::escape_chars(istring &input_string)
00100 {
00101
00102
00103
00104 const char *backslash = "\\";
00105 for( int i = 0; i < input_string.length(); ++i )
00106 {
00107 const char cur = input_string[i];
00108 if( ('\\' == cur) || ('\"' == cur) )
00109 {
00110 input_string.insert(i, backslash);
00111 ++i;
00112 }
00113 }
00114 return input_string;
00115 }
00116
00117
00118 istring &string_manipulation::unescape_chars(istring &input_string)
00119 {
00120 for( int i = 1; i < input_string.length(); ++i )
00121 {
00122 if( ('\\' == input_string[i-1]) &&
00123 (('\\' == input_string[i]) || ('\"' == input_string[i])) )
00124 {
00125 input_string.zap(i-1, i-1);
00126
00127
00128
00129 }
00130 }
00131 return input_string;
00132 }
00133
00134
00135 bool string_manipulation::substring(const istring &look_in,
00136 const istring &after, const istring &before, istring &found)
00137 {
00138 int start = look_in.find(after);
00139 if( start < 0 )
00140 return false;
00141
00142 start += after.length();
00143 const int end = look_in.find(before, start);
00144 if( end < 0 )
00145 return false;
00146
00147 found = look_in.substring(start, end - 1);
00148 return true;
00149 }
00150
00151 void string_manipulation::carriage_returns_to_spaces(istring &to_strip)
00152 {
00153 for (int j = 0; j < to_strip.length(); j++) {
00154 int original_j = j;
00155 if (!parser_bits::is_eol(to_strip[j])) continue;
00156
00157 if ( (to_strip[j] == '\r') && (to_strip[j + 1] == '\n') ) {
00158
00159 j++;
00160 }
00161 j++;
00162 if (parser_bits::is_eol(to_strip[j])) {
00163
00164
00165 j++;
00166 while (parser_bits::is_eol(to_strip[j]) && (j < to_strip.length()))
00167 j++;
00168
00169
00170 to_strip[original_j] = '\n';
00171 for (int k = original_j + 1; k < j; k++) to_strip[k] = ' ';
00172
00173 j--;
00174 continue;
00175 } else {
00176
00177
00178
00179 for (int k = original_j; k < j; k++) to_strip[k] = ' ';
00180 }
00181 }
00182
00183 }
00184
00185
00186 void string_manipulation::split_lines(const istring &input_in, istring &output,
00187 int min_column, int max_column)
00188 {
00189 output = "";
00190 if (max_column - min_column + 1 < 2) return;
00191
00192 istring input = input_in;
00193 carriage_returns_to_spaces(input);
00194
00195 int col = min_column;
00196 istring indent_add = indentation(min_column);
00197 output = indent_add;
00198
00199 bool just_had_break = false;
00200
00201 bool put_accum_before_break = false;
00202 istring accumulated;
00203
00204
00205
00206
00207 for (int j = 0; j < input.length(); j++) {
00208
00209
00210
00211
00212
00213
00214
00215 if (just_had_break) {
00216 if (put_accum_before_break) {
00217 output += accumulated;
00218
00219 output.strip_spaces(istring::FROM_END);
00220 output += log_base::platform_ending();
00221 accumulated = "";
00222 j++;
00223 }
00224
00225 output.strip_spaces(istring::FROM_END);
00226 output += log_base::platform_ending();
00227 col = min_column;
00228 output += indent_add;
00229 just_had_break = false;
00230 if (accumulated.length()) {
00231 output += accumulated;
00232 col += accumulated.length();
00233 accumulated = "";
00234 }
00235 j--;
00236 continue;
00237 }
00238
00239 put_accum_before_break = false;
00240
00241
00242 while ( (input[j] == ' ') || (input[j] == '\t') ) {
00243 j++;
00244 if (j >= input.length()) break;
00245 }
00246
00247 if (j >= input.length()) break;
00248
00249
00250 char current_char = input[j];
00251 if (parser_bits::is_eol(current_char)) {
00252 just_had_break = true;
00253 put_accum_before_break = true;
00254 continue;
00255 }
00256
00257
00258
00259 bool add_dash = false;
00260 bool break_line = false;
00261 bool invisible = false;
00262 bool end_sentence = false;
00263 bool punctuate = false;
00264 bool keep_on_line = false;
00265 char prior_break = '\0';
00266 char prior_break_plus_1 = '\0';
00267
00268
00269 int next_break = input.find_any(splitter_finding_set(), j);
00270
00271 if (negative(next_break))
00272 next_break = input.length() - 1;
00273
00274
00275
00276 prior_break = input[next_break];
00277
00278 prior_break_plus_1 = input[next_break + 1];
00279
00280 switch (prior_break) {
00281 case '\r': case '\n':
00282 break_line = true;
00283 just_had_break = true;
00284 put_accum_before_break = true;
00285
00286 case '\t': case ' ':
00287 invisible = true;
00288 next_break--;
00289 break;
00290 case '?': case '!': case '.':
00291 end_sentence = true;
00292
00293 while ( (input[next_break + 1] == '?')
00294 || (input[next_break + 1] == '!')
00295 || (input[next_break + 1] == '.') ) {
00296 next_break++;
00297 }
00298
00299 if (!parser_bits::white_space(input[next_break + 1]))
00300 end_sentence = false;
00301 break;
00302 case ',': case ';': case ':':
00303 punctuate = true;
00304
00305 if (!parser_bits::white_space(input[next_break + 1]))
00306 punctuate = false;
00307 break;
00308 }
00309
00310
00311 int punct_adder = 0;
00312 if (punctuate || invisible) punct_adder = 1;
00313 if (end_sentence) punct_adder = 2;
00314
00315
00316 int chars_added = next_break - j + 1;
00317 if (col + chars_added + punct_adder > max_column + 1) {
00318
00319 break_line = true;
00320 just_had_break = true;
00321 if (col + chars_added <= max_column + 1) {
00322
00323
00324 invisible = false;
00325 punctuate = false;
00326 end_sentence = false;
00327 punct_adder = 0;
00328 keep_on_line = true;
00329 } else if (min_column + chars_added > max_column + 1) {
00330
00331 int chars_left = max_column - col + 1;
00332
00333 if (chars_left < 2) {
00334 j--;
00335 continue;
00336 } else {
00337 next_break = j + chars_left - 2;
00338 chars_added = next_break - j + 1;
00339 if (next_break >= input.length())
00340 next_break = input.length() - 1;
00341 else if (next_break < j)
00342 next_break = j;
00343 add_dash = true;
00344 }
00345 }
00346 }
00347
00348 istring adding_chunk = input.substring(j, next_break);
00349
00350
00351
00352 if (break_line) {
00353 col = min_column;
00354 if (add_dash || keep_on_line) {
00355
00356 output += adding_chunk;
00357 if (add_dash) output += "-";
00358 j = next_break;
00359 continue;
00360 }
00361
00362
00363 accumulated = adding_chunk;
00364 if (punctuate || invisible) {
00365 accumulated += " ";
00366 } else if (end_sentence) {
00367 accumulated += " ";
00368 }
00369 j = next_break;
00370 continue;
00371 }
00372
00373
00374 output += adding_chunk;
00375 col += chars_added + punct_adder;
00376 j = next_break;
00377 just_had_break = false;
00378
00379
00380 if (punctuate || invisible) {
00381 output += " ";
00382 } else if (end_sentence) {
00383 output += " ";
00384 }
00385 }
00386
00387 if (accumulated.length()) {
00388 output.strip_spaces(istring::FROM_END);
00389 output += log_base::platform_ending();
00390 output += indent_add;
00391 output += accumulated;
00392 }
00393 output.strip_spaces(istring::FROM_END);
00394 output += log_base::platform_ending();
00395 }
00396
00397 char string_manipulation::hex_to_char(byte to_convert)
00398 {
00399 if (to_convert <= 9) return char('0' + to_convert);
00400 else if ( (to_convert >= 10) && (to_convert <= 15) )
00401 return char('A' - 10 + to_convert);
00402 else return '?';
00403 }
00404
00405 byte string_manipulation::char_to_hex(char to_convert)
00406 {
00407 if ( (to_convert >= '0') && (to_convert <= '9') )
00408 return char(to_convert - '0');
00409 else if ( (to_convert >= 'a') && (to_convert <= 'f') )
00410 return char(to_convert - 'a' + 10);
00411 else if ( (to_convert >= 'A') && (to_convert <= 'F') )
00412 return char(to_convert - 'A' + 10);
00413 else return 0;
00414 }
00415
00416 byte_array string_manipulation::string_to_hex(const istring &to_convert)
00417 {
00418 byte_array to_return(0, NIL);
00419 for (int i = 0; i < to_convert.length() / 2; i++) {
00420 int str_index = i * 2;
00421 byte first_byte = char_to_hex(to_convert.get(str_index));
00422 byte second_byte = char_to_hex(to_convert.get(str_index + 1));
00423 byte to_stuff = byte(first_byte * 16 + second_byte);
00424 to_return.concatenate(to_stuff);
00425 }
00426 return to_return;
00427 }
00428
00429 istring string_manipulation::hex_to_string(const byte_array &to_convert)
00430 {
00431 istring to_return;
00432 for (int i = 0; i < to_convert.length() * 2; i += 2) {
00433 int str_index = i / 2;
00434 char first_char = hex_to_char(char(to_convert.get(str_index) / 16));
00435 char second_char = hex_to_char(char(to_convert.get(str_index) % 16));
00436 to_return += istring(first_char, 1);
00437 to_return += istring(second_char, 1);
00438 }
00439 return to_return;
00440 }
00441
00442
00443 #endif //STRING_MANIPULATION_IMPLEMENTATION_FILE
00444