symbol_table.cpp

Go to the documentation of this file.
00001 #ifndef SYMBOL_TABLE_IMPLEMENTATION
00002 #define SYMBOL_TABLE_IMPLEMENTATION
00003 
00004 /*****************************************************************************\
00005 *                                                                             *
00006 *  Name   : symbol_table                                                      *
00007 *  Author : Chris Koeritz                                                     *
00008 *                                                                             *
00009 *******************************************************************************
00010 * Copyright (c) 1991-$now By Author.  This program is free software; you can  *
00011 * redistribute it and/or modify it under the terms of the GNU General Public  *
00012 * License as published by the Free Software Foundation; either version 2 of   *
00013 * the License or (at your option) any later version.  This is online at:      *
00014 *     http://www.fsf.org/copyleft/gpl.html                                    *
00015 * Please send any updates to: fred@gruntose.com                               *
00016 \*****************************************************************************/
00017 
00018 #include "pointer_hash.cpp"
00019 #include "string_hash.cpp"
00020 #include "symbol_table.h"
00021 
00022 #include <basis/function.h>
00023 #include <basis/guards.h>
00024 #include <basis/istring.h>
00025 #include <basis/log_base.h>
00026 
00027 //#define DEBUG_SYMBOL_TABLE
00028   // uncomment for noisier debugging.
00029 
00030 #ifdef DEBUG_SYMBOL_TABLE
00031   #undef LOG
00032   #define LOG(s) CLASS_EMERGENCY_LOG(program_wide_logger(), s)
00033 #endif
00034 
00036 
00037 // this structure keeps track of our symbol table elements.
00038 
00039 template <class contents>
00040 class internal_symbol_info
00041 {
00042 public:
00043   contents _content;  // the object provided by the user.
00044   istring _name;  // symbol's name.
00045 
00046   internal_symbol_info(const contents &content, const istring &name)
00047   : _content(content), _name(name) {}
00048 };
00049 
00051 
00052 // this is our tracking system that allows us to offer array like services.
00053 // each symbol held has an address as one of our wrappers.  thus we can
00054 // list those addresses in a hash table along with listing the contents
00055 // in the main hash table.  the pointer_hash gives us a list of ids set, so
00056 // we can have some ordering for the items in the table.
00057 
00058 template <class contents>
00059 class internal_pointer_hider
00060 {
00061 public:
00062   internal_symbol_info<contents> *_held;
00063 
00064   internal_pointer_hider(internal_symbol_info<contents> *held) : _held(held) {}
00065 };
00066 
00067 template <class contents>
00068 class internal_symbol_indexer
00069 : public pointer_hash<internal_pointer_hider<contents> >
00070 {
00071 public:
00072   internal_symbol_indexer(int max_bits)
00073       : pointer_hash<internal_pointer_hider<contents> >(max_bits) {}
00074 };
00075 //hmmm: danger, danger.  when OS uses a 64 bit pointer length, this will
00076 //      be terribly awfully busted.  we then need a void * hash.  what about
00077 //      that?  would serve as handy way to ensure we know a pointer is valid
00078 //      too; store all in the main hash.
00079 
00081 
00082 // this object maintains the symbol table's contents.
00083 
00084 template <class contents>
00085 class internal_symbol_list
00086 : public string_hash<internal_symbol_info<contents> >
00087 {
00088 public:
00089   internal_symbol_list(int max_bits)
00090       : string_hash<internal_symbol_info<contents> >(max_bits) {}
00091 };
00092 
00094 
00095 // the main algorithms class implementing the external interface.
00096 
00097 #undef static_class_name
00098 #define static_class_name() "symbol_table"
00099 
00100 template <class contents>
00101 symbol_table<contents>::symbol_table(int max_bits)
00102 : _symbol_list(new internal_symbol_list<contents>(max_bits)),
00103   _tracker(new internal_symbol_indexer<contents>(max_bits))
00104 {}
00105 
00106 template <class contents>
00107 symbol_table<contents>::symbol_table(const symbol_table<contents> &to_copy)
00108 : _symbol_list(new internal_symbol_list<contents>
00109       (to_copy._symbol_list->max_bits())),
00110   _tracker(new internal_symbol_indexer<contents>(to_copy.max_bits()))
00111 { *this = to_copy; }
00112 
00113 template <class contents>
00114 symbol_table<contents>::~symbol_table()
00115 {
00116   WHACK(_symbol_list);
00117   WHACK(_tracker);
00118 }
00119 
00120 template <class contents>
00121 int symbol_table<contents>::max_bits() const
00122 { return _symbol_list->max_bits(); }
00123 
00124 template <class contents>
00125 void symbol_table<contents>::rehash(int max_bits)
00126 {
00127   _symbol_list->rehash(max_bits);
00128   _tracker->rehash(max_bits);
00129 }
00130 
00131 //hmmm: provide shrink capability too.
00132 template <class contents>
00133 void symbol_table<contents>::hash_appropriately(int max_per_bucket)
00134 {
00135   FUNCDEF("hash_appropriately");
00136   if (max_per_bucket < 1) max_per_bucket = 1;
00137   int new_bits = max_bits();  // start with the current number of bits.
00138   int current_average_size = symbols() / int(pow(2.0, new_bits));
00139     // this is what the average travel size is currently.
00140 #ifdef DEBUG_SYMBOL_TABLE
00141   int curr_bits = max_bits();
00142   int orig_elems = symbols();
00143 #endif
00144   while (current_average_size > max_per_bucket) {
00145     // loop until we find a nice size.
00146     new_bits++;
00147 //hmmm: better would be to calculate size needed with logarithmic formula.
00148     current_average_size = symbols() / int(pow(2.0, new_bits));
00149   }
00150   if (new_bits == max_bits()) return;  // no changes.
00151 #ifdef DEBUG_SYMBOL_TABLE
00152   LOG(isprintf("size desired=%d, current bits=%d, elems=%d, "
00153       "new bits=%d, new avg size=%d", max_per_bucket, curr_bits, orig_elems,
00154       new_bits, current_average_size));
00155 #endif
00156   if (new_bits > 30) return;
00157     // too big for us to help you; we don't want to blow away too much memory.
00158   rehash(new_bits);
00159 }
00160 
00161 template <class contents>
00162 int symbol_table<contents>::symbols() const
00163 { return _tracker->ids().elements(); }
00164 
00165 template <class contents>
00166 symbol_table<contents> &symbol_table<contents>::
00167     operator =(const symbol_table &to_copy)
00168 {
00169   if (this == &to_copy) return *this;
00170   reset();
00171   for (int i = 0; i < to_copy.symbols(); i++) {
00172     internal_symbol_info<contents> *info = to_copy.get_index(i);
00173     if (info) {
00174       internal_symbol_info<contents> *new_info
00175           = new internal_symbol_info<contents>(*info);
00176       _symbol_list->add(info->_name, new_info);
00177       internal_pointer_hider<contents> *new_track = 
00178           new internal_pointer_hider<contents>(new_info);
00179       _tracker->add(new_info, new_track);
00180     }
00181   }
00182   return *this;
00183 }
00184 
00185 template <class contents>
00186 void symbol_table<contents>::reset()
00187 {
00188   _symbol_list->reset();
00189   _tracker->reset();
00190 }
00191 
00192 template <class contents>
00193 const istring &symbol_table<contents>::name(int index) const
00194 {
00195   bounds_return(index, 0, symbols() - 1, bogonic<istring>());
00196   return get_index(index)->_name;
00197 }
00198 
00199 template <class contents>
00200 void symbol_table<contents>::names(string_set &to_fill) const
00201 {
00202   to_fill.reset();
00203   for (int i = 0; i < symbols(); i++)
00204     to_fill += get_index(i)->_name;
00205 }
00206 
00207 template <class contents>
00208 internal_symbol_info<contents> *symbol_table<contents>::
00209     get_index(int index) const
00210 {
00211   bounds_return(index, 0, symbols() - 1, NIL);
00212   return _tracker->find(_tracker->ids()[index])->_held;
00213 }
00214 
00215 template <class contents>
00216 const contents &symbol_table<contents>::operator [] (int index) const
00217 {
00218   bounds_return(index, 0, symbols() - 1, bogonic<contents>());
00219   internal_symbol_info<contents> *found = get_index(index);
00220   if (!found) return bogonic<contents>();
00221   return found->_content;
00222 }
00223 
00224 template <class contents>
00225 contents &symbol_table<contents>::operator [] (int index)
00226 {
00227   bounds_return(index, 0, symbols() - 1, bogonic<contents>());
00228   internal_symbol_info<contents> *found = get_index(index);
00229   if (!found) return bogonic<contents>();
00230   return found->_content;
00231 }
00232 
00233 template <class contents>
00234 contents *symbol_table<contents>::find(const istring &name) const
00235 { 
00236   FUNCDEF("find [name]");
00237   internal_symbol_info<contents> *found = _symbol_list->find(name);
00238   if (!found) return NIL;
00239   return &found->_content;
00240 }
00241 
00242 template <class contents>
00243 int symbol_table<contents>::dep_find(const istring &name) const
00244 {
00245   internal_symbol_info<contents> *entry = _symbol_list->find(name);
00246   if (!entry) return common::NOT_FOUND;
00247 
00248   for (int i = 0; i < symbols(); i++) {
00249     if (_tracker->ids()[i] == entry) return i;
00250   }
00251   return common::NOT_FOUND;  // this is bad; it should have been found.
00252 }
00253 
00254 template <class contents>
00255 struct sym_tab_apply_data {
00256   string_comparator_function *_scf;
00257   contents *_found;
00258   istring _to_find;
00259 
00260   sym_tab_apply_data() : _scf(NIL), _found(NIL) {}
00261 };
00262 
00263 template <class contents>
00264 bool sym_tab_finder_apply(const istring &key, contents &current,
00265     void *data_link)
00266 {
00267   FUNCDEF("sym_tab_finder_apply");
00268   sym_tab_apply_data<contents> *package
00269       = (sym_tab_apply_data<contents> *)data_link;
00270 #ifdef DEBUG_SYMBOL_TABLE
00271   LOG(istring("  checking ") + key);
00272 #endif
00273   bool equals = package->_scf(key, package->_to_find);
00274   if (equals) {
00275     package->_found = &current;
00276     return false;  // done.
00277   }
00278   return true;  // keep going.
00279 }
00280 
00281 template <class contents>
00282 contents *symbol_table<contents>::find(const istring &name,
00283     string_comparator_function *comparator) const
00284 {
00285   FUNCDEF("find [comparator]");
00286   if (!comparator) return find(name);  // devolve to simplified call.
00287 #ifdef DEBUG_SYMBOL_TABLE
00288   LOG(istring("looking for ") + name);
00289 #endif
00290   sym_tab_apply_data<contents> pack;
00291   pack._to_find = name;
00292   pack._scf = comparator;
00293   // iterate across all the items in the hash.
00294   _symbol_list->apply(sym_tab_finder_apply, &pack);
00295   return pack._found;
00296 }
00297 
00298 template <class contents>
00299 outcome symbol_table<contents>::add(const istring &name, const contents &to_add)
00300 {
00301   FUNCDEF("add");
00302   internal_symbol_info<contents> *found = _symbol_list->find(name);
00303   if (!found) {
00304     // not there already.
00305     internal_symbol_info<contents> *new_item
00306         = new internal_symbol_info<contents>(to_add, name);
00307     _symbol_list->add(name, new_item);
00308     internal_pointer_hider<contents> *new_track = 
00309         new internal_pointer_hider<contents>(new_item);
00310     _tracker->add(new_item, new_track);
00311     return common::IS_NEW;
00312   }
00313   // overwrite the existing contents.
00314   found->_content = to_add;
00315   return common::EXISTING;
00316 }
00317 
00318 template <class contents>
00319 outcome symbol_table<contents>::zap_index(int index)
00320 {
00321   istring dead_name = name(index);
00322   return whack(dead_name);
00323 }
00324 
00325 template <class contents>
00326 outcome symbol_table<contents>::whack(const istring &name)
00327 {
00328   FUNCDEF("whack");
00329   internal_symbol_info<contents> *sep_ind = _symbol_list->find(name);
00330     // we need this pointer so we can find the tracker entry easily.
00331   bool found_it = _symbol_list->zap(name);
00332   if (found_it) {
00333     _tracker->zap(sep_ind);
00334   }
00335   return found_it? common::OKAY : common::NOT_FOUND;
00336 }
00337 
00338 template <class contents>
00339 outcome symbol_table<contents>::retrieve(int index, istring &name,
00340     contents &got) const
00341 {
00342   bounds_return(index, 0, symbols() - 1, common::NOT_FOUND);
00343   internal_symbol_info<contents> *found = get_index(index);
00344   name = found->_name;
00345   got = found->_content;
00346   return common::OKAY;
00347 }
00348 
00350 
00351 template <class contents>
00352 bool symbol_table_compare(const symbol_table<contents> &a,
00353     const symbol_table<contents> &b)
00354 {
00355   FUNCDEF("symbol_table_compare");
00356 
00357   string_set names_a;
00358   a.names(names_a);
00359   string_set names_b;
00360   b.names(names_b);
00361   if (names_a != names_b) return false;
00362 
00363   for (int i = 0; i < names_a.elements(); i++) {
00364     const istring &current_key = names_a[i];
00365     const contents *a_value = a.find(current_key);
00366     const contents *b_value = b.find(current_key);
00367     if (!a_value || !b_value) continue;  // not good.
00368     if (*a_value != *b_value) return false;
00369   }
00370   return true;
00371 }
00372 
00373 #undef static_class_name
00374 
00375 #endif
00376 

Generated on Fri Sep 5 04:28:42 2008 for HOOPLE Libraries by  doxygen 1.5.1