00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015 #include "bookmark_tree.h"
00016
00017 #include <basis/function.h>
00018 #include <basis/guards.h>
00019 #include <basis/istring.h>
00020 #include <data_struct/amorph.cpp>
00021 #include <data_struct/string_table.cpp>
00022 #include <data_struct/symbol_table.cpp>
00023 #include <nodes/symbol_tree.h>
00024 #include <opsystem/byte_filer.h>
00025 #include <loggers/file_logger.h>
00026 #include <opsystem/filename.h>
00027 #include <textual/list_parsing.h>
00028 #include <textual/parser_bits.h>
00029
00030 using namespace nodes;
00031
00032
00033
00034
00035 #undef BASE_LOG
00036 #define BASE_LOG(s) program_wide_logger().log(s)
00037 #define SHOW_LINE \
00038 isprintf("line %d: ", _line_number)
00039 #undef LOG
00040 #define LOG(s) CLASS_EMERGENCY_LOG(program_wide_logger(), SHOW_LINE + s)
00041 #define DEADLY_LINE \
00042 (istring(func) + isprintf(", line %d: ", _line_number))
00043
00044 const int HASHING_PARAMETER = 6;
00045
00046
00047
00048
00049
00050
00051 const int MAX_LINE_SIZE = 256 * KILOBYTE;
00052
00053
00054
00055
00056 bool case_insense_compare(const istring &a, const istring &b)
00057 { return a.iequals(b); }
00058
00060
00061 listo_links::listo_links() : amorph<link_record>(), _next_index(0) {}
00062
00063 void listo_links::add(link_record *new_rec, bool sort)
00064 {
00065
00066
00067 if (sort && new_rec->_description.t()) {
00068 for (int i = _next_index; i < elements(); i++) {
00069 const istring &desc_cur = borrow(i)->_description;
00070 if ( (desc_cur.icompare(new_rec->_description) > 0) || !desc_cur) {
00071 insert(i, 1);
00072 put(i, new_rec);
00073 return;
00074 }
00075 }
00076 }
00077 append(new_rec);
00078 if (!sort)
00079 _next_index = elements();
00080 }
00081
00083
00084 class symbol_int : public symbol_table<int>
00085 {
00086 public:
00087 symbol_int() : symbol_table<int>(10) {}
00088 };
00089
00091
00092 bookmark_tree::bookmark_tree()
00093 : _line_number(0),
00094 _mark_tree(new inner_mark_tree("Root", 0, HASHING_PARAMETER)),
00095 _link_count(0),
00096 _category_count(0),
00097 _last_parent(_mark_tree),
00098 _last_node(_mark_tree),
00099 _links_seen(new symbol_int),
00100 _category_names(new string_table)
00101 {}
00102
00103 bookmark_tree::~bookmark_tree()
00104 {
00105 WHACK(_links_seen);
00106 WHACK(_mark_tree);
00107 WHACK(_category_names);
00108 }
00109
00110 void bookmark_tree::break_name(const istring &to_break, istring &name,
00111 istring &nick)
00112 {
00113 nick = istring::empty_string();
00114 name = to_break;
00115 int indy = name.find('[');
00116 if (negative(indy)) return;
00117 nick = name.substring(indy + 1, name.end());
00118 while ( (nick[nick.end()] == ' ') || (nick[nick.end()] == ']') )
00119 nick.zap(nick.end(), nick.end());
00120 name.zap(indy, name.end());
00121 name.strip_spaces();
00122 nick.strip_spaces();
00123 }
00124
00125 inner_mark_tree &bookmark_tree::access_root() { return *_mark_tree; }
00126
00127 bool bookmark_tree::magic_category_comparison(const istring &a,
00128 const istring &b)
00129 {
00130 FUNCDEF("magic_category_comparison");
00131
00132 if (a.iequals(b)) return true;
00133 istring a_name, a_nick;
00134 break_name(a, a_name, a_nick);
00135 istring b_name, b_nick;
00136 break_name(b, b_name, b_nick);
00137 if (a_name.iequals(b_name)) return true;
00138 if (a_nick.t() && a_nick.iequals(b_name)) return true;
00139 if (b_nick.t() && a_name.iequals(b_nick)) return true;
00140 if (a_nick.t() && b_nick.t() && a_nick.iequals(b_nick)) return true;
00141 return false;
00142 }
00143
00144 inner_mark_tree *bookmark_tree::find_parent(const istring &parent_name)
00145 {
00146 FUNCDEF("find_parent");
00147
00148 inner_mark_tree *parent = dynamic_cast<inner_mark_tree *>
00149 (_last_parent->find(parent_name, inner_mark_tree::recurse_upward,
00150 magic_category_comparison));
00151
00152 #ifdef DEBUG_MARKS
00153 if (parent)
00154 LOG(istring("trying upwards find for parent node ") + parent_name);
00155 #endif
00156
00157 if (!parent) {
00158 #ifdef DEBUG_MARKS
00159 LOG(istring("upwards find failed seeking on last_parent node ")
00160 + parent_name);
00161 #endif
00162
00163
00164 parent = dynamic_cast<inner_mark_tree *>(_last_node->find(parent_name,
00165 inner_mark_tree::recurse_upward, magic_category_comparison));
00166 }
00167
00168 if (!parent) {
00169 #ifdef DEBUG_MARKS
00170 LOG(istring("upwards find failed seeking on last_node ") + parent_name);
00171 #endif
00172
00173
00174 parent = dynamic_cast<inner_mark_tree *>(_mark_tree->find(parent_name,
00175 inner_mark_tree::recurse_downward, magic_category_comparison));
00176 }
00177 if (!parent) {
00178
00179 LOG(istring("failed to find parent node ") + parent_name);
00180
00181
00182 inner_mark_tree *new_node = new inner_mark_tree(parent_name,
00183 _line_number, HASHING_PARAMETER);
00184 _mark_tree->attach(new_node);
00185 _mark_tree->sort();
00186 _category_count++;
00187
00188 parent = new_node;
00189 } else {
00190 #ifdef DEBUG_MARKS
00191 LOG(istring("found parent node ") + parent_name);
00192 #endif
00193 }
00194
00195 return parent;
00196 }
00197
00198 inner_mark_tree *bookmark_tree::process_category(const string_array &items)
00199 {
00200 FUNCDEF("process_category");
00201 const istring &category_name = items[1];
00202 const istring &parent_name = items[2];
00203
00204 if (items.length() > 3) {
00205
00206 LOG(istring("category ") + category_name + " under " + parent_name
00207 + " has extra fields!");
00208 }
00209
00210
00211
00212
00213
00214 if (!parent_name || magic_category_comparison("Root", category_name)) {
00215 #ifdef DEBUG_MARKS
00216 LOG(istring("skipping parent node for ") + category_name);
00217 #endif
00218 return _mark_tree;
00219 }
00220
00221
00222 istring main_name, nickname;
00223 break_name(category_name, main_name, nickname);
00224 istring *found1 = _category_names->find(main_name, case_insense_compare);
00225 istring *found2 = _category_names->find(nickname, case_insense_compare);
00226 if (found1 || found2) {
00227 istring catnames;
00228 if (found1) catnames = *found1;
00229 if (found2) {
00230 if (!!catnames) catnames += " and ";
00231 catnames += *found2;
00232 }
00233 LOG(istring("category name \"") + category_name
00234 + "\" in conflict with existing: " + catnames);
00235 inner_mark_tree *fake_it = NIL;
00236
00237
00238 deadly_error(class_name(), func, "collision resolution code not present yet; please fix category error");
00239 return fake_it;
00240 if (found1)
00241 fake_it = (inner_mark_tree *)_mark_tree->find
00242 (*found1, symbol_tree::recurse_downward);
00243 if (fake_it) return fake_it;
00244 if (found2)
00245 fake_it = (inner_mark_tree *)_mark_tree->find
00246 (*found2, symbol_tree::recurse_downward);
00247 if (fake_it) return fake_it;
00248 LOG(" failure to find a match for either category!");
00249 return NIL;
00250 }
00251
00252
00253 _category_names->add(main_name, main_name);
00254 if (!!nickname) _category_names->add(nickname, nickname);
00255
00256 inner_mark_tree *parent = find_parent(parent_name);
00257 _last_parent = parent;
00258
00259
00260 for (int i = 0; i < parent->branches(); i++) {
00261 inner_mark_tree *curr = dynamic_cast<inner_mark_tree *>(parent->branch(i));
00262 if (!curr)
00263 non_continuable_error(class_name(), DEADLY_LINE, "missing branch in tree");
00264 #ifdef DEBUG_MARKS
00265 LOG(istring("looking at branch ") + curr->name());
00266 #endif
00267 if (magic_category_comparison(curr->name(), category_name)) {
00268
00269 LOG(istring("category ") + category_name + " already exists under "
00270 + parent_name + ".");
00271 _last_node = curr;
00272 return curr;
00273 }
00274 }
00275
00276 inner_mark_tree *new_node = new inner_mark_tree(category_name,
00277 _line_number, HASHING_PARAMETER);
00278 parent->attach(new_node);
00279 parent->sort();
00280 _last_node = new_node;
00281
00282 _category_count++;
00283
00284 #ifdef DEBUG_MARKS
00285 LOG(istring("attaching node ") + category_name + " to parent "
00286 + parent->name());
00287 #endif
00288 return new_node;
00289 }
00290
00291 void bookmark_tree::process_link(const string_array &items)
00292 {
00293 FUNCDEF("process_link");
00294 istring description = items[1];
00295 istring parent_name = items[2];
00296 istring url = "UNKNOWN";
00297 if (items.length() >= 4) url = items[3];
00298
00299
00300
00301 while (url[url.end()] == '/') {
00302 url.zap(url.end(), url.end());
00303 }
00304
00305
00306 if (items.length() < 4) {
00307 LOG(istring("link ") + description + " under " + parent_name
00308 + " has no URL!");
00309 } else if (items.length() > 4) {
00310 LOG(istring("link ") + description + " under " + parent_name
00311 + " has extra fields!");
00312 }
00313
00314
00315 inner_mark_tree *parent = find_parent(parent_name);
00316 _last_parent = parent;
00317
00318
00319 istring url_copy = url.lower();
00320
00321 int *found = _links_seen->find(url_copy);
00322 if (found) {
00323
00324 LOG(isprintf("Duplicate Link: line %d already has ", *found) + url);
00325 return;
00326 } else {
00327 _links_seen->add(url_copy, _line_number);
00328 }
00329
00330
00331 link_record *new_rec = new link_record(description, url, _line_number);
00332 parent->_links.add(new_rec);
00333
00334 _link_count++;
00335 }
00336
00337 void bookmark_tree::process_comment(const istring ¤t_line_in)
00338 {
00339 FUNCDEF("process_comment");
00340 istring current_line = current_line_in;
00341
00342
00343
00344 if (current_line.contains("http:")) {
00345 istring hold_curr = current_line;
00346 int indy = current_line.find("http:");
00347 hold_curr.zap(0, indy - 1);
00348 current_line = istring(" "
00349 "<a href=\"") + hold_curr + "\">" + hold_curr + "</a>";
00350 } else if (current_line.t()) {
00351
00352 current_line.zap(0, 0);
00353 }
00354
00355 link_record *new_rec = new link_record(current_line,
00356 istring::empty_string(), _line_number);
00357 _last_node->_links.add(new_rec, false);
00358 }
00359
00360 int bookmark_tree::read_csv_file(const istring &input_filename)
00361 {
00362 FUNCDEF("read_csv_file");
00363 byte_filer input_file(input_filename, "r");
00364 if (!input_file.good())
00365 non_continuable_error(class_name(), DEADLY_LINE,
00366 "the input file could not be opened");
00367
00368 string_array items;
00369 istring current_line;
00370
00371
00372 while (input_file.getline(current_line, MAX_LINE_SIZE) > 0) {
00373 _line_number++;
00374
00375 while (parser_bits::is_eol(current_line[current_line.end()])) {
00376 current_line.zap(current_line.end(), current_line.end());
00377 }
00378 current_line.strip_spaces();
00379 if (!current_line.length()) {
00380
00381
00382
00383 continue;
00384 } else if (current_line[0] == '#') {
00385
00386 process_comment(current_line);
00387 } else {
00388
00389 bool parsed = list_parsing::parse_csv_line(current_line, items);
00390 if (!parsed)
00391 non_continuable_error(class_name(), DEADLY_LINE,
00392 istring("the line could not be parsed: ") + current_line);
00393 if (!items.length()) {
00394 LOG("bad formatting on this line.");
00395 continue;
00396 }
00397 if (items[0].iequals("C")) {
00398 inner_mark_tree *node = process_category(items);
00399 if (!node) {
00400 LOG(istring("failed to get a node for ") + items[1]);
00401 }
00402 } else if (items[0].iequals("L")) {
00403 process_link(items);
00404 } else {
00405 non_continuable_error(class_name(), DEADLY_LINE,
00406 istring("unknown format in line: ") + current_line);
00407 }
00408 }
00409 }
00410 return 0;
00411 }
00412