/*****************************************************************************\
*                                                                             *
*  Name   : bookmark_tree                                                     *
*  Author : Chris Koeritz                                                     *
*                                                                             *
*******************************************************************************
* Copyright (c) 2005-$now By Author.  This program is free software; you can  *
* redistribute it and/or modify it under the terms of the GNU General Public  *
* License as published by the Free Software Foundation; either version 2 of   *
* the License or (at your option) any later version.  This is online at:      *
*     http://www.fsf.org/copyleft/gpl.html                                    *
* Please send any updates to: fred@gruntose.com                               *
\*****************************************************************************/

#include "bookmark_tree.h"

#include <basis/function.h>
#include <basis/guards.h>
#include <basis/istring.h>
#include <data_struct/amorph.cpp>
#include <data_struct/string_table.cpp>
#include <data_struct/symbol_table.cpp>
#include <nodes/symbol_tree.h>
#include <opsystem/byte_filer.h>
#include <loggers/file_logger.h>
#include <opsystem/filename.h>
#include <textual/list_parsing.h>
#include <textual/parser_bits.h>

using namespace nodes;

//#define DEBUG_MARKS
  // uncomment to have more debugging noise.

#undef BASE_LOG
#define BASE_LOG(s) program_wide_logger().log(s)
#define SHOW_LINE \
   isprintf("line %d: ", _line_number)
#undef LOG
#define LOG(s) CLASS_EMERGENCY_LOG(program_wide_logger(), SHOW_LINE + s)
#define DEADLY_LINE \
   (istring(func) + isprintf(", line %d: ", _line_number))

const int HASHING_PARAMETER = 6;
  // we will have 2^n hashing slots, i.e. locations into which things will
  // be stored.  for example, if n is 4, then there will be 16 slots, and on
  // average a search for an item would take only a 16th of the number of
  // comparisons as it would in a simple linear search (assuming a good
  // hashing function is used).

const int MAX_LINE_SIZE = 256 * KILOBYTE;
  // the largest line we'll process in the links database.

// used to compare two strings while ignoring case; we use this to find
// our categories in the symbol table.
bool case_insense_compare(const istring &a, const istring &b)
{ return a.iequals(b); }

////////////////////////////////////////////////////////////////////////////

listo_links::listo_links() : amorph<link_record>(), _next_index(0) {}

void listo_links::add(link_record *new_rec, bool sort)
{
  // we don't sort blank lines--they just get dropped in the end of
  // the section.
  if (sort && new_rec->_description.t()) {
    for (int i = _next_index; i < elements(); i++) {
      const istring &desc_cur = borrow(i)->_description;
      if ( (desc_cur.icompare(new_rec->_description) > 0) || !desc_cur) {
        insert(i, 1);
        put(i, new_rec);
        return;
      }
    }
  }
  append(new_rec);
  if (!sort)
    _next_index = elements();
}

////////////////////////////////////////////////////////////////////////////

class symbol_int : public symbol_table<int>
{
public:
  symbol_int() : symbol_table<int>(10) {}
};

////////////////////////////////////////////////////////////////////////////

bookmark_tree::bookmark_tree()
: _line_number(0),
  _mark_tree(new inner_mark_tree("Root", 0, HASHING_PARAMETER)),
  _link_count(0),
  _category_count(0),
  _last_parent(_mark_tree),
  _last_node(_mark_tree),
  _links_seen(new symbol_int),
  _category_names(new string_table)
{}

bookmark_tree::~bookmark_tree()
{
  WHACK(_links_seen);
  WHACK(_mark_tree);
  WHACK(_category_names);
}

void bookmark_tree::break_name(const istring &to_break, istring &name,
    istring &nick)
{
  nick = istring::empty_string();
  name = to_break;
  int indy = name.find('[');
  if (negative(indy)) return;
  nick = name.substring(indy + 1, name.end());
  while ( (nick[nick.end()] == ' ') || (nick[nick.end()] == ']') )
    nick.zap(nick.end(), nick.end());
  name.zap(indy, name.end());
  name.strip_spaces();
  nick.strip_spaces();
}

inner_mark_tree &bookmark_tree::access_root() { return *_mark_tree; }

bool bookmark_tree::magic_category_comparison(const istring &a,
    const istring &b)
{
  FUNCDEF("magic_category_comparison");
//LOG(istring("compare: a=") + a + " b=" + b);
  if (a.iequals(b)) return true;
  istring a_name, a_nick;
  break_name(a, a_name, a_nick);
  istring b_name, b_nick;
  break_name(b, b_name, b_nick);
  if (a_name.iequals(b_name)) return true;
  if (a_nick.t() && a_nick.iequals(b_name)) return true;
  if (b_nick.t() && a_name.iequals(b_nick)) return true;
  if (a_nick.t() && b_nick.t() && a_nick.iequals(b_nick)) return true;
  return false;
}

inner_mark_tree *bookmark_tree::find_parent(const istring &parent_name)
{
  FUNCDEF("find_parent");
  // first, look for the node above the last parent.
  inner_mark_tree *parent = dynamic_cast<inner_mark_tree *>
      (_last_parent->find(parent_name, inner_mark_tree::recurse_upward,
       magic_category_comparison));

#ifdef DEBUG_MARKS
  if (parent)
    LOG(istring("trying upwards find for parent node ") + parent_name);
#endif

  if (!parent) {
#ifdef DEBUG_MARKS
    LOG(istring("upwards find failed seeking on last_parent node ")
        + parent_name);
#endif

    // we didn't find the parent above the last category...
    parent = dynamic_cast<inner_mark_tree *>(_last_node->find(parent_name,
        inner_mark_tree::recurse_upward, magic_category_comparison));
  }

  if (!parent) {
#ifdef DEBUG_MARKS
    LOG(istring("upwards find failed seeking on last_node ") + parent_name);
#endif

    // last node didn't help either.
    parent = dynamic_cast<inner_mark_tree *>(_mark_tree->find(parent_name,
        inner_mark_tree::recurse_downward, magic_category_comparison));
  }
  if (!parent) {
    // failed to find the parent node, so hook it to the root node.
    LOG(istring("failed to find parent node ") + parent_name);

    // create a parent node and use it for this guy.
    inner_mark_tree *new_node = new inner_mark_tree(parent_name,
        _line_number, HASHING_PARAMETER);
    _mark_tree->attach(new_node);
    _mark_tree->sort();
    _category_count++;

    parent = new_node;
  } else {
#ifdef DEBUG_MARKS
    LOG(istring("found parent node ") + parent_name);
#endif
  }

  return parent;
}

inner_mark_tree *bookmark_tree::process_category(const string_array &items)
{
  FUNCDEF("process_category");
  const istring &category_name = items[1];
  const istring &parent_name = items[2];

  if (items.length() > 3) {
    // complain about a possibly malformed category.
    LOG(istring("category ") + category_name + " under " + parent_name
        + " has extra fields!");
  }

//BASE_LOG("CURRENT:");
//BASE_LOG(_mark_tree->text_form());

  // make sure we don't add anything to the tree if this is the root.
  if (!parent_name || magic_category_comparison("Root", category_name)) {
#ifdef DEBUG_MARKS
    LOG(istring("skipping parent node for ") + category_name);
#endif
    return _mark_tree;
  }

  // ensure that the categories aren't competing with other names.
  istring main_name, nickname;
  break_name(category_name, main_name, nickname);
  istring *found1 = _category_names->find(main_name, case_insense_compare);
  istring *found2 = _category_names->find(nickname, case_insense_compare);
  if (found1 || found2) {
    istring catnames;
    if (found1) catnames = *found1;  // add the first match, if it exists.
    if (found2) {
      if (!!catnames) catnames += " and ";
      catnames += *found2;
    }
    LOG(istring("category name \"") + category_name
        + "\" in conflict with existing: " + catnames);
    inner_mark_tree *fake_it = NIL;
//hmmm: neither of these are right; they need to use a comparator that
//      uses our magic comparison function.
deadly_error(class_name(), func, "collision resolution code not present yet; please fix category error");
return fake_it;
    if (found1)
      fake_it = (inner_mark_tree *)_mark_tree->find
          (*found1, symbol_tree::recurse_downward);
    if (fake_it) return fake_it;
    if (found2)
      fake_it = (inner_mark_tree *)_mark_tree->find
          (*found2, symbol_tree::recurse_downward);
    if (fake_it) return fake_it;
    LOG("  failure to find a match for either category!");
    return NIL;
  }
  // now that we know these names are unique, we'll add them into the list
  // so future categories can't reuse these.
  _category_names->add(main_name, main_name);
  if (!!nickname) _category_names->add(nickname, nickname);

  inner_mark_tree *parent = find_parent(parent_name);
  _last_parent = parent;  // set the parent for the next time.

  // see if the category is already present under the parent.
  for (int i = 0; i < parent->branches(); i++) {
    inner_mark_tree *curr = dynamic_cast<inner_mark_tree *>(parent->branch(i));
    if (!curr)
      non_continuable_error(class_name(), DEADLY_LINE, "missing branch in tree");
#ifdef DEBUG_MARKS
    LOG(istring("looking at branch ") + curr->name());
#endif
    if (magic_category_comparison(curr->name(), category_name)) {
      // it already exists?  argh.
      LOG(istring("category ") + category_name + " already exists under "
          + parent_name + ".");
      _last_node = curr;
      return curr;
    }
  }

  inner_mark_tree *new_node = new inner_mark_tree(category_name,
      _line_number, HASHING_PARAMETER);
  parent->attach(new_node);
  parent->sort();
  _last_node = new_node;

  _category_count++;

#ifdef DEBUG_MARKS
  LOG(istring("attaching node ") + category_name + " to parent "
      + parent->name());
#endif
  return new_node;
}

void bookmark_tree::process_link(const string_array &items)
{
  FUNCDEF("process_link");
  istring description = items[1];
  istring parent_name = items[2];
  istring url = "UNKNOWN";
  if (items.length() >= 4) url = items[3];

  // strip any directory slashes that are provided as a suffix.  we don't need
  // them and they tend to confuse the issue when we look for duplicates.
  while (url[url.end()] == '/') {
    url.zap(url.end(), url.end());
  }

  // make some noise if they seem to have a badly formed link.
  if (items.length() < 4) {
    LOG(istring("link ") + description + " under " + parent_name
        + " has no URL!");
  } else if (items.length() > 4) {
    LOG(istring("link ") + description + " under " + parent_name
        + " has extra fields!");
  }

  // find the parent for this link.
  inner_mark_tree *parent = find_parent(parent_name);
  _last_parent = parent;  // set the parent for the next time.

  // see if the link already exists.
  istring url_copy = url.lower();
    // make a copy in lower-case so we ignore case differences.
  int *found = _links_seen->find(url_copy);
  if (found) {
    // this is not so great; a duplicate link has been found.
    LOG(isprintf("Duplicate Link: line %d already has ", *found) + url);
    return;
  } else {
    _links_seen->add(url_copy, _line_number);
  }

  // add the link to the parent.
  link_record *new_rec = new link_record(description, url, _line_number);
  parent->_links.add(new_rec);

  _link_count++;
}

void bookmark_tree::process_comment(const istring &current_line_in)
{
  FUNCDEF("process_comment");
  istring current_line = current_line_in;

  // output the comment as simple text.
//BASE_LOG("comment case");
  if (current_line.contains("http:")) {
    istring hold_curr = current_line;
    int indy = current_line.find("http:");
    hold_curr.zap(0, indy - 1);
    current_line = istring("&nbsp; &nbsp; &nbsp; &nbsp; "
        "<a href=\"") + hold_curr + "\">" + hold_curr + "</a>";
  } else if (current_line.t()) {
    // snap the comment character off of the front.
    current_line.zap(0, 0);
  }

  link_record *new_rec = new link_record(current_line,
      istring::empty_string(), _line_number);
  _last_node->_links.add(new_rec, false);
}

int bookmark_tree::read_csv_file(const istring &input_filename)
{
  FUNCDEF("read_csv_file");
  byte_filer input_file(input_filename, "r");
  if (!input_file.good())
    non_continuable_error(class_name(), DEADLY_LINE,
        "the input file could not be opened");

  string_array items;  // parsed in csv line.
  istring current_line;  // read from input file.

  // read the lines in the file, one at a time.
  while (input_file.getline(current_line, MAX_LINE_SIZE) > 0) {
    _line_number++;  // go to the next line.
    // remove the carriage returns first.
    while (parser_bits::is_eol(current_line[current_line.end()])) {
      current_line.zap(current_line.end(), current_line.end());
    }
    current_line.strip_spaces();
    if (!current_line.length()) {
//      // blank lines get treated as a special case.  they are always added
//      // at the end of the list.
//      process_comment(current_line);
      continue;
    } else if (current_line[0] == '#') {
      // handle a comment in the database.
      process_comment(current_line);
    } else {
      // csv parse the line, since we don't support much else.
      bool parsed = list_parsing::parse_csv_line(current_line, items);
      if (!parsed)
        non_continuable_error(class_name(), DEADLY_LINE,
            istring("the line could not be parsed: ") + current_line);
      if (!items.length()) {
        LOG("bad formatting on this line.");
        continue;
      }
      if (items[0].iequals("C")) {
        inner_mark_tree *node = process_category(items);
        if (!node) {
          LOG(istring("failed to get a node for ") + items[1]);
        }
      } else if (items[0].iequals("L")) {
        process_link(items);
      } else {
        non_continuable_error(class_name(), DEADLY_LINE,
            istring("unknown format in line: ") + current_line);
      }
    }
  }
  return 0;
}

