00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #include "bookmark_tree.h"
00016
00017 #include <basis/astring.h>
00018 #include <basis/functions.h>
00019 #include <basis/guards.h>
00020 #include <filesystem/byte_filer.h>
00021 #include <filesystem/filename.h>
00022 #include <loggers/critical_events.h>
00023 #include <loggers/file_logger.h>
00024 #include <loggers/program_wide_logger.h>
00025 #include <nodes/symbol_tree.h>
00026 #include <structures/amorph.h>
00027 #include <structures/string_table.cpp>
00028 #include <structures/symbol_table.h>
00029 #include <textual/list_parsing.h>
00030 #include <textual/parser_bits.h>
00031
00033
00034 using namespace basis;
00035 using namespace filesystem;
00036 using namespace loggers;
00037 using namespace nodes;
00038 using namespace structures;
00039 using namespace textual;
00040
00041 #define DEBUG_MARKS
00042
00043
00044
00045
00046 #undef BASE_LOG
00047 #define BASE_LOG(s) program_wide_logger::get().log(s)
00048 #define SHOW_LINE a_sprintf("line %d: ", _line_number)
00049 #undef LOG
00050 #define LOG(s) CLASS_EMERGENCY_LOG(program_wide_logger::get(), SHOW_LINE + s)
00051 #define DEADLY_LINE (astring(func) + a_sprintf(", line %d: ", _line_number))
00052
00053 const int ESTIMATED_ELEMENTS = 100;
00054
00055
00056 const int MAX_LINE_SIZE = 256 * KILOBYTE;
00057
00058
00059
00060
00061 bool case_insense_compare(const astring &a, const astring &b)
00062 { return a.iequals(b); }
00063
00065
00066 listo_links::listo_links() : amorph<link_record>(), _next_index(0) {}
00067
00068 void listo_links::add(link_record *new_rec, bool sort)
00069 {
00070
00071
00072 if (sort && new_rec->_description.t()) {
00073 for (int i = _next_index; i < elements(); i++) {
00074 const astring &desc_cur = borrow(i)->_description;
00075
00076
00077 if (desc_cur.iequals(new_rec->_description)
00078
00079 || !desc_cur) {
00080 insert(i + 1, 1);
00081 put(i + 1, new_rec);
00082 return;
00083 }
00084 }
00085 }
00086 append(new_rec);
00087 if (!sort)
00088 _next_index = elements();
00089 }
00090
00092
00093 class symbol_int : public symbol_table<int>
00094 {
00095 public:
00096 symbol_int() : symbol_table<int>(10) {}
00097 };
00098
00100
00101 bookmark_tree::bookmark_tree()
00102 : _line_number(0),
00103 _mark_tree(new inner_mark_tree("Root", 0, ESTIMATED_ELEMENTS)),
00104 _link_count(0),
00105 _category_count(0),
00106 _last_parent(_mark_tree),
00107 _last_node(_mark_tree),
00108 _links_seen(new symbol_int),
00109 _category_names(new string_table)
00110 {}
00111
00112 bookmark_tree::~bookmark_tree()
00113 {
00114 WHACK(_links_seen);
00115 WHACK(_mark_tree);
00116 WHACK(_category_names);
00117 }
00118
00119 void bookmark_tree::break_name(const astring &to_break, astring &name,
00120 astring &nick)
00121 {
00122 nick = astring::empty_string();
00123 name = to_break;
00124 int indy = name.find('[');
00125 if (negative(indy)) return;
00126 nick = name.substring(indy + 1, name.end());
00127 while ( (nick[nick.end()] == ' ') || (nick[nick.end()] == ']') )
00128 nick.zap(nick.end(), nick.end());
00129 name.zap(indy, name.end());
00130 name.strip_spaces();
00131 nick.strip_spaces();
00132 }
00133
00134 inner_mark_tree &bookmark_tree::access_root() { return *_mark_tree; }
00135
00136 bool bookmark_tree::magic_category_comparison(const astring &a, const astring &b)
00137 {
00138
00139
00140 if (a.iequals(b)) return true;
00141 astring a_name, a_nick;
00142 break_name(a, a_name, a_nick);
00143 astring b_name, b_nick;
00144 break_name(b, b_name, b_nick);
00145 if (a_name.iequals(b_name)) return true;
00146 if (a_nick.t() && a_nick.iequals(b_name)) return true;
00147 if (b_nick.t() && a_name.iequals(b_nick)) return true;
00148 if (a_nick.t() && b_nick.t() && a_nick.iequals(b_nick)) return true;
00149 return false;
00150 }
00151
00152 const astring &HTTP_HEAD = "http://";
00153 const astring &FTP_HEAD = "ftp://";
00154 const astring &WWW_SITE = "www.";
00155 const astring &FTP_SITE = "ftp.";
00156
00157 bool bookmark_tree::advance(int &index, const astring &check, const astring &finding)
00158 {
00159 if (check.compare(finding, index, 0, finding.length(), false)) {
00160 index += finding.length();
00161 return true;
00162 }
00163 return false;
00164 }
00165
00166 int bookmark_tree::find_prune_point(const astring &to_prune)
00167 {
00168 int to_return = 0;
00169 advance(to_return, to_prune, HTTP_HEAD);
00170 advance(to_return, to_prune, FTP_HEAD);
00171 advance(to_return, to_prune, WWW_SITE);
00172 advance(to_return, to_prune, FTP_SITE);
00173 return to_return;
00174 }
00175
00176 astring bookmark_tree::prune_link_down(const astring &to_prune)
00177 {
00178
00179 return to_prune.substring(find_prune_point(to_prune), to_prune.end()); }
00180
00181 bool bookmark_tree::excellent_link_comparator(const astring &a, const astring &b)
00182 {
00183 int prune_a = find_prune_point(a);
00184 int prune_b = find_prune_point(b);
00185 int bigger_len = maximum(a.length() - prune_a, b.length() - prune_b);
00186 bool to_return = a.compare(b, prune_a, prune_b, bigger_len, false);
00187
00188 return to_return;
00189 }
00190
00191 inner_mark_tree *bookmark_tree::find_parent(const astring &parent_name)
00192 {
00193 FUNCDEF("find_parent");
00194
00195 inner_mark_tree *parent = dynamic_cast<inner_mark_tree *>
00196 (_last_parent->find(parent_name, inner_mark_tree::recurse_upward,
00197 magic_category_comparison));
00198
00199 #ifdef DEBUG_MARKS_TREE
00200 if (parent)
00201 LOG(astring("trying upwards find for parent node ") + parent_name);
00202 #endif
00203
00204 if (!parent) {
00205 #ifdef DEBUG_MARKS_TREE
00206 LOG(astring("upwards find failed seeking on last_parent node ")
00207 + parent_name);
00208 #endif
00209
00210
00211 parent = dynamic_cast<inner_mark_tree *>(_last_node->find(parent_name,
00212 inner_mark_tree::recurse_upward, magic_category_comparison));
00213 }
00214
00215 if (!parent) {
00216 #ifdef DEBUG_MARKS_TREE
00217 LOG(astring("upwards find failed seeking on last_node ") + parent_name);
00218 #endif
00219
00220
00221 parent = dynamic_cast<inner_mark_tree *>(_mark_tree->find(parent_name,
00222 inner_mark_tree::recurse_downward, magic_category_comparison));
00223 }
00224 if (!parent) {
00225
00226 LOG(astring("failed to find parent node ") + parent_name);
00227
00228
00229 inner_mark_tree *new_node = new inner_mark_tree(parent_name,
00230 _line_number, ESTIMATED_ELEMENTS);
00231 _mark_tree->attach(new_node);
00232 _mark_tree->sort();
00233 _category_count++;
00234
00235 parent = new_node;
00236 } else {
00237 #ifdef DEBUG_MARKS_TREE
00238 LOG(astring("found parent node ") + parent_name);
00239 #endif
00240 }
00241
00242 return parent;
00243 }
00244
00245 inner_mark_tree *bookmark_tree::process_category(const string_array &items)
00246 {
00247 FUNCDEF("process_category");
00248 const astring &category_name = items[1];
00249 const astring &parent_name = items[2];
00250
00251 if (items.length() > 3) {
00252
00253 LOG(astring("category ") + category_name + " under " + parent_name
00254 + " has extra fields!");
00255 }
00256
00257
00258
00259
00260
00261 if (!parent_name || magic_category_comparison("Root", category_name)) {
00262 #ifdef DEBUG_MARKS_TREE
00263 LOG(astring("skipping parent node for ") + category_name);
00264 #endif
00265 return _mark_tree;
00266 }
00267
00268
00269 astring main_name, nickname;
00270 break_name(category_name, main_name, nickname);
00271 astring *found1 = _category_names->find(main_name, case_insense_compare);
00272 astring *found2 = _category_names->find(nickname, case_insense_compare);
00273 if (found1 || found2) {
00274 astring catnames;
00275 if (found1) catnames = *found1;
00276 if (found2) {
00277 if (!!catnames) catnames += " and ";
00278 catnames += *found2;
00279 }
00280 LOG(astring("category name \"") + category_name
00281 + "\" in conflict with existing: " + catnames);
00282 inner_mark_tree *fake_it = NIL;
00283
00284
00285
00286
00287 if (found1) {
00288 #ifdef DEBUG_MARKS
00289 LOG(astring("found existing category for main name: ") + main_name);
00290 #endif
00291
00292
00293 fake_it = dynamic_cast<inner_mark_tree *>(_mark_tree->find
00294 (*found1, inner_mark_tree::recurse_downward,
00295 magic_category_comparison));
00296 }
00297 if (fake_it) {
00298 #ifdef DEBUG_MARKS
00299 LOG(astring("returning existing category for main name: ") + main_name
00300 + " as: " + fake_it->name());
00301 #endif
00302 return fake_it;
00303 }
00304 if (found2) {
00305 #ifdef DEBUG_MARKS
00306 LOG(astring("found existing category for nickname: ") + nickname);
00307 #endif
00308
00309
00310 fake_it = dynamic_cast<inner_mark_tree *>(_mark_tree->find
00311 (*found2, inner_mark_tree::recurse_downward,
00312 magic_category_comparison));
00313 }
00314 if (fake_it) {
00315 #ifdef DEBUG_MARKS
00316 LOG(astring("returning existing category for nickname: ") + nickname
00317 + " as: " + fake_it->name());
00318 #endif
00319 return fake_it;
00320 }
00321 LOG("==> failure to find a match for either category!");
00322 deadly_error(class_name(), func, "collision resolution code failed; "
00323 "please fix category error");
00324 return NIL;
00325 }
00326
00327
00328 _category_names->add(main_name, main_name);
00329 if (!!nickname) _category_names->add(nickname, nickname);
00330
00331 inner_mark_tree *parent = find_parent(parent_name);
00332 _last_parent = parent;
00333
00334
00335 for (int i = 0; i < parent->branches(); i++) {
00336 inner_mark_tree *curr = dynamic_cast<inner_mark_tree *>(parent->branch(i));
00337 if (!curr)
00338 non_continuable_error(class_name(), DEADLY_LINE, "missing branch in tree");
00339 #ifdef DEBUG_MARKS_TREE
00340 LOG(astring("looking at branch ") + curr->name());
00341 #endif
00342 if (magic_category_comparison(curr->name(), category_name)) {
00343
00344 LOG(astring("category ") + category_name + " already exists under "
00345 + parent_name + ".");
00346 _last_node = curr;
00347 return curr;
00348 }
00349 }
00350
00351 inner_mark_tree *new_node = new inner_mark_tree(category_name,
00352 _line_number, ESTIMATED_ELEMENTS);
00353 parent->attach(new_node);
00354 parent->sort();
00355 _last_node = new_node;
00356
00357 _category_count++;
00358
00359 #ifdef DEBUG_MARKS_TREE
00360 LOG(astring("attaching node ") + category_name + " to parent "
00361 + parent->name());
00362 #endif
00363 return new_node;
00364 }
00365
00366 void bookmark_tree::process_link(const string_array &items)
00367 {
00368 FUNCDEF("process_link");
00369 astring description = items[1];
00370 astring parent_name = items[2];
00371 astring url = "UNKNOWN";
00372 if (items.length() >= 4) url = items[3];
00373
00374
00375
00376 while (url[url.end()] == '/') {
00377 url.zap(url.end(), url.end());
00378 }
00379
00380
00381 if (items.length() < 4) {
00382 LOG(astring("link ") + description + " under " + parent_name
00383 + " has no URL!");
00384 } else if (items.length() > 4) {
00385 LOG(astring("link ") + description + " under " + parent_name
00386 + " has extra fields!");
00387 }
00388
00389
00390 inner_mark_tree *parent = find_parent(parent_name);
00391 _last_parent = parent;
00392
00393
00394 int *found = _links_seen->find(url, excellent_link_comparator);
00395 if (found) {
00396
00397 LOG(a_sprintf("Duplicate Link: line %d already has ", *found) + url);
00398 return;
00399 } else {
00400 _links_seen->add(prune_link_down(url), _line_number);
00401 }
00402
00403
00404 link_record *new_rec = new link_record(description, url, _line_number);
00405 parent->_links.add(new_rec);
00406
00407 _link_count++;
00408 }
00409
00410 void bookmark_tree::process_comment(const astring ¤t_line_in)
00411 {
00413 astring current_line = current_line_in;
00414
00415
00416
00417 if (current_line.contains("http:")) {
00418 astring hold_curr = current_line;
00419 int indy = current_line.find("http:");
00420 hold_curr.zap(0, indy - 1);
00421 current_line = astring(" "
00422 "<a href=\"") + hold_curr + "\">" + hold_curr + "</a>";
00423 } else if (current_line.t()) {
00424
00425 current_line.zap(0, 0);
00426 }
00427
00428 link_record *new_rec = new link_record(current_line,
00429 astring::empty_string(), _line_number);
00430 _last_node->_links.add(new_rec, false);
00431 }
00432
00433 int bookmark_tree::read_csv_file(const astring &input_filename)
00434 {
00435 FUNCDEF("read_csv_file");
00436 byte_filer input_file(input_filename, "r");
00437 if (!input_file.good())
00438 non_continuable_error(class_name(), DEADLY_LINE,
00439 "the input file could not be opened");
00440
00441 string_array items;
00442 astring current_line;
00443
00444
00445 while (input_file.getline(current_line, MAX_LINE_SIZE) > 0) {
00446 _line_number++;
00447
00448 while (parser_bits::is_eol(current_line[current_line.end()])) {
00449 current_line.zap(current_line.end(), current_line.end());
00450 }
00451 current_line.strip_spaces();
00452 if (!current_line.length()) {
00453
00454
00455
00456 continue;
00457 } else if (current_line[0] == '#') {
00458
00459 process_comment(current_line);
00460 } else {
00461
00462 bool parsed = list_parsing::parse_csv_line(current_line, items);
00463 if (!parsed)
00464 non_continuable_error(class_name(), DEADLY_LINE,
00465 astring("the line could not be parsed: ") + current_line);
00466 if (!items.length()) {
00467 LOG("bad formatting on this line.");
00468 continue;
00469 }
00470 if (items[0].iequals("C")) {
00471 inner_mark_tree *node = process_category(items);
00472 if (!node) {
00473 LOG(astring("failed to get a node for ") + items[1]);
00474 }
00475 } else if (items[0].iequals("L")) {
00476 process_link(items);
00477 } else {
00478 non_continuable_error(class_name(), DEADLY_LINE,
00479 astring("unknown format in line: ") + current_line);
00480 }
00481 }
00482 }
00483 return 0;
00484 }
00485