hash_table.h

Go to the documentation of this file.
00001 #ifndef HASH_TABLE_CLASS
00002 #define HASH_TABLE_CLASS
00003 
00004 /*****************************************************************************\
00005 *                                                                             *
00006 *  Name   : hash_table                                                        *
00007 *  Author : Chris Koeritz                                                     *
00008 *                                                                             *
00009 *******************************************************************************
00010 * Copyright (c) 2001-$now By Author.  This program is free software; you can  *
00011 * redistribute it and/or modify it under the terms of the GNU General Public  *
00012 * License as published by the Free Software Foundation; either version 2 of   *
00013 * the License or (at your option) any later version.  This is online at:      *
00014 *     http://www.fsf.org/copyleft/gpl.html                                    *
00015 * Please send any updates to: fred@gruntose.com                               *
00016 \*****************************************************************************/
00017 
00018 #include "amorph.h"
00019 
00020 #include <basis/array.h>
00021 #include <basis/byte_array.h>
00022 #include <basis/contracts.h>
00023 #include <basis/enhance_cpp.h>
00024 #include <basis/functions.h>
00025 
00026 #include <math.h>
00027 
00028 namespace structures {
00029 
00030 // forward.
00031 template <class key, class contents> class internal_hash_array;
00032 
00034 
00040 class hashing_algorithm : public virtual basis::clonable
00041 {
00042 public:
00043   virtual basis::un_int hash(const void *key_data, int key_length) const = 0;
00045 
00051   virtual hashing_algorithm *clone() const = 0;
00053 };
00054 
00056 
00058 
00065 template <class key_type, class contents>
00066   // the key_type must support valid copy constructor, assignment operator and
00067   // equality operators.  the contents need not support any special operations;
00068   // it is always considered as just a pointer.
00069 class hash_table : public virtual basis::nameable
00070 {
00071 public:
00072   hash_table(const hashing_algorithm &hasher, int estimated_elements);
00074 
00080   virtual ~hash_table();
00082 
00083   DEFINE_CLASS_NAME("hash_table");
00084 
00085   void rehash(int estimated_elements);
00087 
00089   enum outcomes {
00090     IS_NEW = basis::common::IS_NEW,
00091     EXISTING = basis::common::EXISTING
00092   };
00093 
00094   static int calculate_num_slots(int estimated_elements);
00096 
00097   int elements() const;
00099 
00101   int estimated_elements() const { return c_estim_elements; }
00103 
00104   basis::outcome add(const key_type &key, contents *to_store);
00106 
00114   basis::outcome fast_dangerous_add(const key_type &key, contents *to_store);
00116 
00119   bool find(const key_type &key, contents * &item_found) const;
00121 
00125   contents *find(const key_type &key) const
00126           { contents *c = NIL; return find(key, c)? c : NIL; }
00128 
00129   contents *acquire(const key_type &key);
00131 
00135   bool zap(const key_type &key);
00137 
00139   void reset();
00141 
00142   typedef bool apply_function(const key_type &key, contents &current,
00143           void *data_link);
00145 
00150   void apply(apply_function *to_apply, void *data_link);
00152 
00158   basis::outcome add(key_type *key, contents *to_store, bool check_dupes = true);
00160 
00167   bool verify() const;
00169 
00171 private:
00172   int c_estim_elements;  
00173   hashing_algorithm *_hasher;  
00174   internal_hash_array<key_type, contents> *_table;  
00175   int _last_iter;
00177 
00178   hash_table(const hash_table &to_copy);
00180   hash_table &operator =(const hash_table &to_copy);
00182 
00183 public:
00184   internal_hash_array<key_type, contents> &table_access() const;
00186 };
00187 
00189 
00191 
00197 template <class key_type, class contents>
00198 void copy_hash_table(hash_table<key_type, contents> &target,
00199     const hash_table<key_type, contents> &source);
00200 
00202 
00203 // implementations for longer methods below....
00204 
00205 // this is a very special micro-managed class.  it holds two pointers which
00206 // it neither creates nor destroys.  thus all interaction with it must be
00207 // careful about removing these objects at the appropriate time.  we don't
00208 // want automatic memory management since we want a cheap copy on the items
00209 // in the buckets.
00210 
00211 template <class key_type, class contents>
00212 class hash_wrapper : public virtual basis::root_object
00213 {
00214 public:
00215   key_type *_id;
00216   contents *_data;
00217 
00218   hash_wrapper(key_type *id = NIL, contents *data = NIL)
00219       : _id(id), _data(data) {}
00220 };
00221 
00223 
00224 template <class key_type, class contents>
00225 class bucket
00226     : public basis::array<hash_wrapper<key_type, contents> >,
00227       public virtual basis::root_object
00228 {
00229 public:
00230   bucket() : basis::array<hash_wrapper<key_type, contents> >(0, NIL,
00231       basis::byte_array::SIMPLE_COPY | basis::byte_array::EXPONE
00232       | basis::byte_array::FLUSH_INVISIBLE) {}
00233 
00234   int find(const key_type &to_find) {
00235     for (int i = 0; i < this->length(); i++) {
00236       key_type *curr_key = this->get(i)._id;
00237 //hmmm: if curr key is not set, is that a logic error?  it seems like we
00238 //      are seeing the potential for this.
00239       if (curr_key && (to_find == *curr_key))
00240         return i;
00241     }
00242     return basis::common::NOT_FOUND;
00243   }
00244 };
00245 
00247 
00248 template <class key_type, class contents>
00249 class internal_hash_array : public amorph<bucket<key_type, contents> >
00250 {
00251 public:
00252   internal_hash_array(int estimated_elements)
00253       : amorph<bucket<key_type, contents> >
00254            (hash_table<key_type, contents>::calculate_num_slots(estimated_elements)) {}
00255 };
00256 
00258 
00259 template <class key_type, class contents>
00260 hash_table<key_type, contents>::hash_table(const hashing_algorithm &hasher, int estimated_elements)
00261 : c_estim_elements(estimated_elements),
00262   _hasher(hasher.clone()),
00263   _table(new internal_hash_array<key_type, contents>(estimated_elements)),
00264   _last_iter(0)
00265 {}
00266 
00267 template <class key_type, class contents>
00268 hash_table<key_type, contents>::~hash_table()
00269 {
00270 #ifdef EXTREME_CHECKING
00271   FUNCDEF("destructor");
00272   if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00273 #endif
00274   reset();
00275   basis::WHACK(_table);
00276   basis::WHACK(_hasher);
00277 }
00278 
00279 template <class key_type, class contents>
00280 int hash_table<key_type, contents>::calculate_num_slots(int estimated_elements)
00281 {
00282 //printf("elems wanted = %d\n", estimated_elements);
00283   int log_2_truncated = int(log(float(estimated_elements)) / log(2.0));
00284 //printf("log 2 of elems, truncated = %d\n", log_2_truncated);
00285   int slots_needed_for_elems = (int)pow(2.0, double(log_2_truncated + 1));
00286 //printf("slots needed = %d\n", slots_needed_for_elems );
00287   return slots_needed_for_elems;
00288 }
00289 
00290 // the specialized copy operation.
00291 template <class key_type, class contents>
00292 void copy_hash_table(hash_table<key_type, contents> &target,
00293     const hash_table<key_type, contents> &source)
00294 {
00295 #ifdef EXTREME_CHECKING
00296   FUNCDEF("copy_hash_table");
00297   if (!source.verify())
00298     deadly_error(class_name(), func, "source state did not verify.");
00299 #endif
00300   target.reset();
00301   for (int i = 0; i < source.table_access().elements(); i++) {
00302     bucket<key_type, contents> *buck = source.table_access().borrow(i);
00303     if (!buck) continue;
00304     for (int j = 0; j < buck->length(); j++) {
00305       target.add(*((*buck)[j]._id), new contents(*((*buck)[j]._data)));
00306     }
00307   }
00308 #ifdef EXTREME_CHECKING
00309   if (!target.verify())
00310     deadly_error(class_name(), func, "target state did not verify.");
00311 #endif
00312   #undef class_name
00313 }
00314 
00315 template <class key_type, class contents>
00316 void hash_table<key_type, contents>::reset()
00317 {
00318 #ifdef EXTREME_CHECKING
00319   FUNCDEF("reset");
00320   if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00321 #endif
00322   // iterate over the list whacking the content items in the buckets.
00323   for (int i = 0; i < _table->elements(); i++) {
00324     bucket<key_type, contents> *buck = _table->acquire(i);
00325     if (!buck) continue;
00326     for (int j = 0; j < buck->length(); j++) {
00327       basis::WHACK((*buck)[j]._data);  // eliminate the stored data.
00328       basis::WHACK((*buck)[j]._id);  // eliminate the stored key.
00329       buck->zap(j, j);  // remove the element.
00330       j--;  // skip back before whack happened.
00331     }
00332     basis::WHACK(buck);
00333   }
00334 #ifdef EXTREME_CHECKING
00335   if (!verify())
00336     deadly_error(class_name(), func, "state did not verify afterwards.");
00337 #endif
00338 }
00339 
00340 template <class key_type, class contents>
00341 bool hash_table<key_type, contents>::verify() const
00342 {
00343   for (int i = 0; i < _table->elements(); i++) {
00344     const bucket<key_type, contents> *buck = _table->borrow(i);
00345     if (!buck) continue;  // that's acceptable.
00346     for (int j = 0; j < buck->length(); j++) {
00347       const hash_wrapper<key_type, contents> &wrap = (*buck)[j];
00348       if (!wrap._data) {
00349 //        program_wide_logger::get().log(a_sprintf("hash table: no data segment at position %d.", j));
00350         return false;
00351       }
00352       if (!wrap._id) {
00353 //        program_wide_logger::get().log(a_sprintf("hash table: no identifier at position %d.", j));
00354         return false;
00355       }
00356     }
00357   }
00358   return true;
00359 }
00360 
00361 template <class key_type, class contents>
00362 internal_hash_array<key_type, contents> &hash_table<key_type, contents>
00363     ::table_access() const
00364 { return *_table; }
00365 
00366 template <class key_type, class contents>
00367 void hash_table<key_type, contents>::rehash(int estimated_elements)
00368 {
00369 #ifdef EXTREME_CHECKING
00370   FUNCDEF("rehash");
00371   if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00372 #endif
00373   typedef bucket<key_type, contents> buckie;
00374   hash_table<key_type, contents> new_hash(*_hasher, estimated_elements);
00375     // this is the new table that will be used.
00376 
00377   // scoot through the existing table so we can move items into the new one.
00378   for (int i = 0; i < _table->elements(); i++) {
00379     buckie *b = _table->borrow(i);  // look at the current element.
00380     if (!b) continue;  // nothing here, keep going.
00381     for (int j = b->length() - 1; j >= 0; j--) {
00382       key_type *k = b->use(j)._id;
00383       contents *c = b->use(j)._data;
00384       new_hash.add(k, c);
00385     }
00386     b->reset();
00387       // clean out any items in the buckets here now that they've moved.
00388   }
00389 
00390   // now flip the contents of the new guy into "this".
00391   _table->reset();  // toss previous stuff.
00392   _table->adjust(new_hash._table->elements());
00393   for (int q = 0; q < new_hash._table->elements(); q++)
00394     _table->put(q, new_hash._table->acquire(q));
00395   // reset other data members.
00396   c_estim_elements = new_hash.c_estim_elements;
00397   _last_iter = 0;
00398 #ifdef EXTREME_CHECKING
00399   if (!verify())
00400     deadly_error(class_name(), func, "state did not verify afterwards.");
00401 #endif
00402 }
00403 
00404 template <class key_type, class contents>
00405 basis::outcome hash_table<key_type, contents>::add(const key_type &key,
00406     contents *to_store)
00407 { return add(new key_type(key), to_store); }
00408 
00409 template <class key_type, class contents>
00410 basis::outcome hash_table<key_type, contents>::add(key_type *key,
00411     contents *to_store, bool check_dupes)
00412 {
00413 #ifdef EXTREME_CHECKING
00414   FUNCDEF("add");
00415   if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00416 #endif
00417   // get a hash value.
00418   basis::un_int hashed = _hasher->hash((const void *)key, sizeof(key_type));
00419   // make the value appropriate for our table.
00420   hashed = hashed % _table->elements();
00421   // see if the key already exists there.
00422   bucket<key_type, contents> *buck = _table->borrow(hashed);
00423   if (!buck) {
00424     // this slot doesn't exist yet.
00425     buck = new bucket<key_type, contents>;
00426     _table->put(hashed, buck);
00427   }
00428   if (!check_dupes) {
00429     // we aren't even going to check for its existence.
00430     *buck += hash_wrapper<key_type, contents>(key, to_store);
00431     return IS_NEW;
00432   }
00433   int indy = buck->find(*key);
00434   if (basis::negative(indy)) {
00435     // that value was not seen yet, so we're adding a new entry.
00436     *buck += hash_wrapper<key_type, contents>(key, to_store);
00437 #ifdef EXTREME_CHECKING
00438     if (!verify())
00439       deadly_error(class_name(), func, "state did not verify afterwards.");
00440 #endif
00441     return IS_NEW;
00442   }
00443   // that key already existed, so we'll re-use its slot with the new data.
00444   basis::WHACK((*buck)[indy]._data);
00445   basis::WHACK(key);  // toss since we're not using.
00446   (*buck)[indy]._data = to_store;
00447 #ifdef EXTREME_CHECKING
00448   if (!verify())
00449     deadly_error(class_name(), func, "state did not verify afterwards.");
00450 #endif
00451   return EXISTING;
00452 }
00453 
00454 template <class key_type, class contents>
00455 basis::outcome hash_table<key_type, contents>::fast_dangerous_add
00456     (const key_type &key, contents *to_store)
00457 { return add(new key_type(key), to_store, false); }
00458 
00459 template <class key_type, class contents>
00460 bool hash_table<key_type, contents>::find(const key_type &key,
00461     contents * &item_found) const
00462 {
00463 #ifdef EXTREME_CHECKING
00464   FUNCDEF("find");
00465   if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00466 #endif
00467   item_found = NIL;
00468   // get a hash value.
00469   basis::un_int hashed = _hasher->hash((const void *)&key, sizeof(key));
00470   // make the value appropriate for our table.
00471   hashed = hashed % _table->elements();
00472   // see if the key exists in the bucket.
00473   bucket<key_type, contents> *buck = _table->borrow(hashed);
00474   if (!buck) return false;
00475   int indy = buck->find(key);
00476   if (basis::negative(indy)) return false;  // not there.
00477   // was there, so retrieve the data.
00478   item_found = (*buck)[indy]._data;
00479   return true;
00480 }
00481 
00482 template <class key_type, class contents>
00483 contents *hash_table<key_type, contents>::acquire(const key_type &key)
00484 {
00485 #ifdef EXTREME_CHECKING
00486   FUNCDEF("acquire");
00487   if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00488 #endif
00489   // get a hash value.
00490   basis::un_int hashed = _hasher->hash((const void *)&key, sizeof(key));
00491   // make the value appropriate for our table.
00492   hashed = hashed % _table->elements();
00493   // see if the key exists in the bucket.
00494   bucket<key_type, contents> *buck = _table->borrow(hashed);
00495   if (!buck) return NIL;
00496   int indy = buck->find(key);
00497   if (basis::negative(indy)) return NIL;  // nope, not there.
00498   contents *to_return = (*buck)[indy]._data;
00499   basis::WHACK((*buck)[indy]._id);  // clean the id.
00500   buck->zap(indy, indy);  // toss the storage blob for the item.
00501 #ifdef EXTREME_CHECKING
00502   if (!verify())
00503     deadly_error(class_name(), func, "state did not verify afterwards.");
00504 #endif
00505   return to_return;
00506 }
00507 
00508 template <class key_type, class contents>
00509 bool hash_table<key_type, contents>::zap(const key_type &key)
00510 {
00511 #ifdef EXTREME_CHECKING
00512   FUNCDEF("zap");
00513   if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00514 #endif
00515   // get a hash value.
00516   basis::un_int hashed = _hasher->hash((const void *)&key, sizeof(key));
00517   // make the value appropriate for our table.
00518   hashed = hashed % _table->elements();
00519   // see if the key exists in the bucket.
00520   bucket<key_type, contents> *buck = _table->borrow(hashed);
00521   if (!buck) return false;
00522   int indy = buck->find(key);
00523   if (basis::negative(indy)) {
00524     // nope, not there.
00525     return false;
00526   }
00527   basis::WHACK((*buck)[indy]._data);  // delete the data held.
00528   basis::WHACK((*buck)[indy]._id);  // delete the data held.
00529   buck->zap(indy, indy);  // toss the storage blob for the item.
00530   if (!buck->length()) {
00531     // clean up this bucket now.
00532     buck = _table->acquire(hashed);
00533     basis::WHACK(buck);
00534   }
00535 #ifdef EXTREME_CHECKING
00536   if (!verify())
00537     deadly_error(class_name(), func, "state did not verify afterwards.");
00538 #endif
00539   return true;
00540 }
00541 
00542 template <class key_type, class contents>
00543 void hash_table<key_type, contents>::apply(apply_function *to_apply,
00544     void *data_link)
00545 {
00546 #ifdef EXTREME_CHECKING
00547   FUNCDEF("apply");
00548   if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00549 #endif
00550   typedef bucket<key_type, contents> buckie;
00551   int bucks_seen = 0;
00552   int posn = _last_iter;  // start at the same place we left.
00553   while (bucks_seen++ < _table->elements()) {
00554     if ( (posn < 0) || (posn >= _table->elements()) )
00555       posn = 0;
00556     buckie *b = _table->borrow(posn);
00557     _last_iter = posn++;
00558       // record where the iteration last touched and increment next position.
00559       // we must do this before we check if the bucket exists or we'll rotate
00560       // on this same place forever.
00561     if (!b) continue;  // nothing here, keep going.
00562     for (int j = 0; j < b->length(); j++) {
00563       contents *c = b->use(j)._data;
00564       if (!c) {
00565 #ifdef EXTREME_CHECKING
00566         deadly_error("hash_table", "apply", "logic error--missing pointer");
00567 #endif
00568         continue;
00569       }
00570       if (!to_apply(*b->use(j)._id, *c, data_link)) return;
00571     }
00572   }
00573 }
00574 
00575 template <class key_type, class contents>
00576 int hash_table<key_type, contents>::elements() const
00577 {
00578 #ifdef EXTREME_CHECKING
00579   FUNCDEF("elements");
00580   if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00581 #endif
00582   typedef bucket<key_type, contents> buckie;
00583   int to_return = 0;
00584   for (int i = 0; i < _table->elements(); i++) {
00585     bucket<key_type, contents> *buck = _table->borrow(i);
00586     if (!buck) continue;  // nothing to count.
00587     to_return += buck->length();
00588   }
00589   return to_return;
00590 }
00591 
00592 #undef static_class_name
00593 
00594 } //namespace.
00595 
00596 #endif // outer guard.
00597 
Generated on Sat Jan 28 04:22:25 2012 for hoople2 project by  doxygen 1.6.3