hash_table.cpp

Go to the documentation of this file.
00001 #ifndef HASH_TABLE_IMPLEMENTATION
00002 #define HASH_TABLE_IMPLEMENTATION
00003 
00004 /*****************************************************************************\
00005 *                                                                             *
00006 *  Name   : hash_table                                                        *
00007 *  Author : Chris Koeritz                                                     *
00008 *                                                                             *
00009 *******************************************************************************
00010 * Copyright (c) 2001-$now By Author.  This program is free software; you can  *
00011 * redistribute it and/or modify it under the terms of the GNU General Public  *
00012 * License as published by the Free Software Foundation; either version 2 of   *
00013 * the License or (at your option) any later version.  This is online at:      *
00014 *     http://www.fsf.org/copyleft/gpl.html                                    *
00015 * Please send any updates to: fred@gruntose.com                               *
00016 \*****************************************************************************/
00017 
00018 #include "amorph.cpp"
00019 #include "hash_table.h"
00020 
00021 #include <basis/byte_array.h>
00022 #include <basis/array.cpp>
00023 #include <basis/function.h>
00024 #include <basis/log_base.h>
00025 #include <mathematics/math_ops.h>
00026 
00027 #include <math.h>
00028 
00030 
00031 // this is a very special micro-managed class.  it holds two pointers which
00032 // it neither creates nor destroys.  thus all interaction with it must be
00033 // careful about removing these objects at the appropriate time.  we don't
00034 // want automatic memory management since we want a cheap copy on the items
00035 // in the buckets.
00036 
00037 template <class key_type, class contents>
00038 class hash_wrapper
00039 {
00040 public:
00041   key_type *_id;
00042   contents *_data;
00043 
00044   hash_wrapper(key_type *id = NIL, contents *data = NIL)
00045       : _id(id), _data(data) {}
00046 };
00047 
00049 
00050 template <class key_type, class contents>
00051 class bucket : public array<hash_wrapper<key_type, contents> >
00052 {
00053 public:
00054   bucket() : array<hash_wrapper<key_type, contents> >(0, NIL,
00055       byte_array::SIMPLE_COPY | byte_array::EXPONE
00056       | byte_array::FLUSH_INVISIBLE) {}
00057 
00058   int find(const key_type &to_find) {
00059     for (int i = 0; i < this->length(); i++) {
00060       key_type *curr_key = this->get(i)._id;
00061 //hmmm: if curr key is not set, is that a logic error?  it seems like we
00062 //      are seeing the potential for this.
00063       if (curr_key && (to_find == *curr_key))
00064         return i;
00065     }
00066     return common::NOT_FOUND;
00067   }
00068 };
00069 
00071 
00072 template <class key_type, class contents>
00073 class internal_hash_array : public amorph<bucket<key_type, contents> >
00074 {
00075 public:
00076   internal_hash_array(u_int max_bits)
00077       : amorph<bucket<key_type, contents> >(math_ops::pow_2(max_bits)) {}
00078 };
00079 
00081 
00082 #define static_class_name() "hash_table"
00083 
00084 template <class key_type, class contents>
00085 hash_table<key_type, contents>::hash_table(const hashing_algorithm &hasher,
00086     int max_table_bits)
00087 : _max_bits((max_table_bits > 0)? max_table_bits : 1),
00088   _hasher(hasher.clone()),
00089   _table(new internal_hash_array<key_type, contents>(max_table_bits)),
00090   _last_iter(0)
00091 {}
00092 
00093 template <class key_type, class contents>
00094 hash_table<key_type, contents>::~hash_table()
00095 {
00096   FUNCDEF("destructor");
00097 #ifdef EXTREME_CHECKING
00098   if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00099 #endif
00100   reset();
00101   WHACK(_table);
00102   WHACK(_hasher);
00103 }
00104 
00105 // the specialized copy operation.
00106 template <class key_type, class contents>
00107 void copy_hash_table(hash_table<key_type, contents> &target,
00108     const hash_table<key_type, contents> &source)
00109 {
00110   #define class_name() "hash_table"
00111   FUNCDEF("copy_hash_table");
00112 #ifdef EXTREME_CHECKING
00113   if (!source.verify())
00114     deadly_error(class_name(), func, "source state did not verify.");
00115 #endif
00116   target.reset();
00117   for (int i = 0; i < source.table_access().elements(); i++) {
00118     bucket<key_type, contents> *buck = source.table_access().borrow(i);
00119     if (!buck) continue;
00120     for (int j = 0; j < buck->length(); j++) {
00121       target.add(*((*buck)[j]._id), new contents(*((*buck)[j]._data)));
00122     }
00123   }
00124 #ifdef EXTREME_CHECKING
00125   if (!target.verify())
00126     deadly_error(class_name(), func, "target state did not verify.");
00127 #endif
00128   #undef class_name
00129 }
00130 
00131 template <class key_type, class contents>
00132 void hash_table<key_type, contents>::reset()
00133 {
00134   FUNCDEF("reset");
00135 #ifdef EXTREME_CHECKING
00136   if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00137 #endif
00138   // iterate over the list whacking the content items in the buckets.
00139   for (int i = 0; i < _table->elements(); i++) {
00140     bucket<key_type, contents> *buck = _table->acquire(i);
00141     if (!buck) continue;
00142     for (int j = 0; j < buck->length(); j++) {
00143       WHACK((*buck)[j]._data);  // eliminate the stored data.
00144       WHACK((*buck)[j]._id);  // eliminate the stored key.
00145       buck->zap(j, j);  // remove the element.
00146       j--;  // skip back before whack happened.
00147     }
00148     WHACK(buck);
00149   }
00150 #ifdef EXTREME_CHECKING
00151   if (!verify())
00152     deadly_error(class_name(), func, "state did not verify afterwards.");
00153 #endif
00154 }
00155 
00156 template <class key_type, class contents>
00157 bool hash_table<key_type, contents>::verify() const
00158 {
00159   for (int i = 0; i < _table->elements(); i++) {
00160     const bucket<key_type, contents> *buck = _table->borrow(i);
00161     if (!buck) continue;  // that's acceptable.
00162     for (int j = 0; j < buck->length(); j++) {
00163       const hash_wrapper<key_type, contents> &wrap = (*buck)[j];
00164       if (!wrap._data) {
00165 //        program_wide_logger().log(isprintf("hash table: no data segment at position %d.", j));
00166         return false;
00167       }
00168       if (!wrap._id) {
00169 //        program_wide_logger().log(isprintf("hash table: no identifier at position %d.", j));
00170         return false;
00171       }
00172     }
00173   }
00174   return true;
00175 }
00176 
00177 template <class key_type, class contents>
00178 internal_hash_array<key_type, contents> &hash_table<key_type, contents>
00179     ::table_access() const
00180 { return *_table; }
00181 
00182 template <class key_type, class contents>
00183 void hash_table<key_type, contents>::rehash(int max_bits)
00184 {
00185   FUNCDEF("rehash");
00186 #ifdef EXTREME_CHECKING
00187   if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00188 #endif
00189   typedef bucket<key_type, contents> buckie;
00190   hash_table<key_type, contents> new_hash(*_hasher, max_bits);
00191     // this is the new table that will be used.
00192 
00193   // scoot through the existing table so we can move items into the new one.
00194   for (int i = 0; i < _table->elements(); i++) {
00195     buckie *b = _table->borrow(i);  // look at the current element.
00196     if (!b) continue;  // nothing here, keep going.
00197     for (int j = b->length() - 1; j >= 0; j--) {
00198       key_type *k = b->use(j)._id;
00199       contents *c = b->use(j)._data;
00200       new_hash.add(k, c);
00201     }
00202     b->reset();
00203       // clean out any items in the buckets here now that they've moved.
00204   }
00205 
00206   // now flip the contents of the new guy into "this".
00207   _table->reset();  // toss previous stuff.
00208   _table->adjust(new_hash._table->elements());
00209   for (int q = 0; q < new_hash._table->elements(); q++)
00210     _table->put(q, new_hash._table->acquire(q));
00211   // reset other data members.
00212   _max_bits = new_hash._max_bits;
00213   _last_iter = 0;
00214 #ifdef EXTREME_CHECKING
00215   if (!verify())
00216     deadly_error(class_name(), func, "state did not verify afterwards.");
00217 #endif
00218 }
00219 
00220 template <class key_type, class contents>
00221 outcome hash_table<key_type, contents>::add(const key_type &key,
00222     contents *to_store)
00223 { return add(new key_type(key), to_store); }
00224 
00225 template <class key_type, class contents>
00226 outcome hash_table<key_type, contents>::add(key_type *key,
00227     contents *to_store, bool check_dupes)
00228 {
00229   FUNCDEF("add");
00230 #ifdef EXTREME_CHECKING
00231   if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00232 #endif
00233   // get a hash value.
00234   u_int hashed = _hasher->hash((const void *)key, sizeof(key_type));
00235   // make the value appropriate for our table.
00236   hashed = hashed % _table->elements();
00237   // see if the key already exists there.
00238   bucket<key_type, contents> *buck = _table->borrow(hashed);
00239   if (!buck) {
00240     // this slot doesn't exist yet.
00241     buck = new bucket<key_type, contents>;
00242     _table->put(hashed, buck);
00243   }
00244   if (!check_dupes) {
00245     // we aren't even going to check for its existence.
00246     *buck += hash_wrapper<key_type, contents>(key, to_store);
00247     return IS_NEW;
00248   }
00249   int indy = buck->find(*key);
00250   if (negative(indy)) {
00251     // that value was not seen yet, so we're adding a new entry.
00252     *buck += hash_wrapper<key_type, contents>(key, to_store);
00253 #ifdef EXTREME_CHECKING
00254     if (!verify())
00255       deadly_error(class_name(), func, "state did not verify afterwards.");
00256 #endif
00257     return IS_NEW;
00258   }
00259   // that key already existed, so we'll re-use its slot with the new data.
00260   WHACK((*buck)[indy]._data);
00261   WHACK(key);  // toss since we're not using.
00262   (*buck)[indy]._data = to_store;
00263 #ifdef EXTREME_CHECKING
00264   if (!verify())
00265     deadly_error(class_name(), func, "state did not verify afterwards.");
00266 #endif
00267   return EXISTING;
00268 }
00269 
00270 template <class key_type, class contents>
00271 outcome hash_table<key_type, contents>::fast_dangerous_add
00272     (const key_type &key, contents *to_store)
00273 { return add(new key_type(key), to_store, false); }
00274 
00275 template <class key_type, class contents>
00276 bool hash_table<key_type, contents>::find(const key_type &key,
00277     contents * &item_found) const
00278 {
00279   FUNCDEF("find");
00280 #ifdef EXTREME_CHECKING
00281   if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00282 #endif
00283   item_found = NIL;
00284   // get a hash value.
00285   u_int hashed = _hasher->hash((const void *)&key, sizeof(key));
00286   // make the value appropriate for our table.
00287   hashed = hashed % _table->elements();
00288   // see if the key exists in the bucket.
00289   bucket<key_type, contents> *buck = _table->borrow(hashed);
00290   if (!buck) return false;
00291   int indy = buck->find(key);
00292   if (negative(indy)) return false;  // not there.
00293   // was there, so retrieve the data.
00294   item_found = (*buck)[indy]._data;
00295   return true;
00296 }
00297 
00298 template <class key_type, class contents>
00299 contents *hash_table<key_type, contents>::acquire(const key_type &key)
00300 {
00301   FUNCDEF("acquire");
00302 #ifdef EXTREME_CHECKING
00303   if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00304 #endif
00305   // get a hash value.
00306   u_int hashed = _hasher->hash((const void *)&key, sizeof(key));
00307   // make the value appropriate for our table.
00308   hashed = hashed % _table->elements();
00309   // see if the key exists in the bucket.
00310   bucket<key_type, contents> *buck = _table->borrow(hashed);
00311   if (!buck) return NIL;
00312   int indy = buck->find(key);
00313   if (negative(indy)) return NIL;  // nope, not there.
00314   contents *to_return = (*buck)[indy]._data;
00315   WHACK((*buck)[indy]._id);  // clean the id.
00316   buck->zap(indy, indy);  // toss the storage blob for the item.
00317 #ifdef EXTREME_CHECKING
00318   if (!verify())
00319     deadly_error(class_name(), func, "state did not verify afterwards.");
00320 #endif
00321   return to_return;
00322 }
00323 
00324 template <class key_type, class contents>
00325 bool hash_table<key_type, contents>::zap(const key_type &key)
00326 {
00327   FUNCDEF("zap");
00328 #ifdef EXTREME_CHECKING
00329   if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00330 #endif
00331   // get a hash value.
00332   u_int hashed = _hasher->hash((const void *)&key, sizeof(key));
00333   // make the value appropriate for our table.
00334   hashed = hashed % _table->elements();
00335   // see if the key exists in the bucket.
00336   bucket<key_type, contents> *buck = _table->borrow(hashed);
00337   if (!buck) return false;
00338   int indy = buck->find(key);
00339   if (negative(indy)) {
00340     // nope, not there.
00341     return false;
00342   }
00343   WHACK((*buck)[indy]._data);  // delete the data held.
00344   WHACK((*buck)[indy]._id);  // delete the data held.
00345   buck->zap(indy, indy);  // toss the storage blob for the item.
00346   if (!buck->length()) {
00347     // clean up this bucket now.
00348     buck = _table->acquire(hashed);
00349     WHACK(buck);
00350   }
00351 #ifdef EXTREME_CHECKING
00352   if (!verify())
00353     deadly_error(class_name(), func, "state did not verify afterwards.");
00354 #endif
00355   return true;
00356 }
00357 
00358 template <class key_type, class contents>
00359 void hash_table<key_type, contents>::apply(apply_function *to_apply,
00360     void *data_link)
00361 {
00362   FUNCDEF("apply");
00363 #ifdef EXTREME_CHECKING
00364   if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00365 #endif
00366   typedef bucket<key_type, contents> buckie;
00367   int bucks_seen = 0;
00368   int posn = _last_iter;  // start at the same place we left.
00369   while (bucks_seen++ < _table->elements()) {
00370     if ( (posn < 0) || (posn >= _table->elements()) )
00371       posn = 0;
00372     buckie *b = _table->borrow(posn);
00373     _last_iter = posn++;
00374       // record where the iteration last touched and increment next position.
00375       // we must do this before we check if the bucket exists or we'll rotate
00376       // on this same place forever.
00377     if (!b) continue;  // nothing here, keep going.
00378     for (int j = 0; j < b->length(); j++) {
00379       contents *c = b->use(j)._data;
00380       if (!c) {
00381 #ifdef EXTREME_CHECKING
00382         deadly_error("hash_table", "apply", "logic error--missing pointer");
00383 #endif
00384         continue;
00385       }
00386       if (!to_apply(*b->use(j)._id, *c, data_link)) return;
00387     }
00388   }
00389 }
00390 
00391 template <class key_type, class contents>
00392 int hash_table<key_type, contents>::elements() const
00393 {
00394   FUNCDEF("elements");
00395 #ifdef EXTREME_CHECKING
00396   if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00397 #endif
00398   typedef bucket<key_type, contents> buckie;
00399   int to_return = 0;
00400   for (int i = 0; i < _table->elements(); i++) {
00401     bucket<key_type, contents> *buck = _table->borrow(i);
00402     if (!buck) continue;  // nothing to count.
00403     to_return += buck->length();
00404   }
00405   return to_return;
00406 }
00407 
00408 #undef static_class_name
00409 
00410 #endif // outer guard.
00411 

Generated on Fri Nov 28 04:29:12 2008 for HOOPLE Libraries by  doxygen 1.5.1