00001 #ifndef HASH_TABLE_CLASS
00002 #define HASH_TABLE_CLASS
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include "amorph.h"
00019
00020 #include <basis/array.h>
00021 #include <basis/byte_array.h>
00022 #include <basis/contracts.h>
00023 #include <basis/enhance_cpp.h>
00024 #include <basis/functions.h>
00025
00026 #include <math.h>
00027
00028 namespace structures {
00029
00030
00031 template <class key, class contents> class internal_hash_array;
00032
00034
00040 class hashing_algorithm : public virtual basis::clonable
00041 {
00042 public:
00043 virtual basis::un_int hash(const void *key_data, int key_length) const = 0;
00045
00051 virtual hashing_algorithm *clone() const = 0;
00053 };
00054
00056
00058
00065 template <class key_type, class contents>
00066
00067
00068
00069 class hash_table : public virtual basis::nameable
00070 {
00071 public:
00072 hash_table(const hashing_algorithm &hasher, int estimated_elements);
00074
00080 virtual ~hash_table();
00082
00083 DEFINE_CLASS_NAME("hash_table");
00084
00085 void rehash(int estimated_elements);
00087
00089 enum outcomes {
00090 IS_NEW = basis::common::IS_NEW,
00091 EXISTING = basis::common::EXISTING
00092 };
00093
00094 static int calculate_num_slots(int estimated_elements);
00096
00097 int elements() const;
00099
00101 int estimated_elements() const { return c_estim_elements; }
00103
00104 basis::outcome add(const key_type &key, contents *to_store);
00106
00114 basis::outcome fast_dangerous_add(const key_type &key, contents *to_store);
00116
00119 bool find(const key_type &key, contents * &item_found) const;
00121
00125 contents *find(const key_type &key) const
00126 { contents *c = NIL; return find(key, c)? c : NIL; }
00128
00129 contents *acquire(const key_type &key);
00131
00135 bool zap(const key_type &key);
00137
00139 void reset();
00141
00142 typedef bool apply_function(const key_type &key, contents ¤t,
00143 void *data_link);
00145
00150 void apply(apply_function *to_apply, void *data_link);
00152
00158 basis::outcome add(key_type *key, contents *to_store, bool check_dupes = true);
00160
00167 bool verify() const;
00169
00171 private:
00172 int c_estim_elements;
00173 hashing_algorithm *_hasher;
00174 internal_hash_array<key_type, contents> *_table;
00175 int _last_iter;
00177
00178 hash_table(const hash_table &to_copy);
00180 hash_table &operator =(const hash_table &to_copy);
00182
00183 public:
00184 internal_hash_array<key_type, contents> &table_access() const;
00186 };
00187
00189
00191
00197 template <class key_type, class contents>
00198 void copy_hash_table(hash_table<key_type, contents> &target,
00199 const hash_table<key_type, contents> &source);
00200
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211 template <class key_type, class contents>
00212 class hash_wrapper : public virtual basis::root_object
00213 {
00214 public:
00215 key_type *_id;
00216 contents *_data;
00217
00218 hash_wrapper(key_type *id = NIL, contents *data = NIL)
00219 : _id(id), _data(data) {}
00220 };
00221
00223
00224 template <class key_type, class contents>
00225 class bucket
00226 : public basis::array<hash_wrapper<key_type, contents> >,
00227 public virtual basis::root_object
00228 {
00229 public:
00230 bucket() : basis::array<hash_wrapper<key_type, contents> >(0, NIL,
00231 basis::byte_array::SIMPLE_COPY | basis::byte_array::EXPONE
00232 | basis::byte_array::FLUSH_INVISIBLE) {}
00233
00234 int find(const key_type &to_find) {
00235 for (int i = 0; i < this->length(); i++) {
00236 key_type *curr_key = this->get(i)._id;
00237
00238
00239 if (curr_key && (to_find == *curr_key))
00240 return i;
00241 }
00242 return basis::common::NOT_FOUND;
00243 }
00244 };
00245
00247
00248 template <class key_type, class contents>
00249 class internal_hash_array : public amorph<bucket<key_type, contents> >
00250 {
00251 public:
00252 internal_hash_array(int estimated_elements)
00253 : amorph<bucket<key_type, contents> >
00254 (hash_table<key_type, contents>::calculate_num_slots(estimated_elements)) {}
00255 };
00256
00258
00259 template <class key_type, class contents>
00260 hash_table<key_type, contents>::hash_table(const hashing_algorithm &hasher, int estimated_elements)
00261 : c_estim_elements(estimated_elements),
00262 _hasher(hasher.clone()),
00263 _table(new internal_hash_array<key_type, contents>(estimated_elements)),
00264 _last_iter(0)
00265 {}
00266
00267 template <class key_type, class contents>
00268 hash_table<key_type, contents>::~hash_table()
00269 {
00270 #ifdef EXTREME_CHECKING
00271 FUNCDEF("destructor");
00272 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00273 #endif
00274 reset();
00275 basis::WHACK(_table);
00276 basis::WHACK(_hasher);
00277 }
00278
00279 template <class key_type, class contents>
00280 int hash_table<key_type, contents>::calculate_num_slots(int estimated_elements)
00281 {
00282
00283 int log_2_truncated = int(log(float(estimated_elements)) / log(2.0));
00284
00285 int slots_needed_for_elems = (int)pow(2.0, double(log_2_truncated + 1));
00286
00287 return slots_needed_for_elems;
00288 }
00289
00290
00291 template <class key_type, class contents>
00292 void copy_hash_table(hash_table<key_type, contents> &target,
00293 const hash_table<key_type, contents> &source)
00294 {
00295 #ifdef EXTREME_CHECKING
00296 FUNCDEF("copy_hash_table");
00297 if (!source.verify())
00298 deadly_error(class_name(), func, "source state did not verify.");
00299 #endif
00300 target.reset();
00301 for (int i = 0; i < source.table_access().elements(); i++) {
00302 bucket<key_type, contents> *buck = source.table_access().borrow(i);
00303 if (!buck) continue;
00304 for (int j = 0; j < buck->length(); j++) {
00305 target.add(*((*buck)[j]._id), new contents(*((*buck)[j]._data)));
00306 }
00307 }
00308 #ifdef EXTREME_CHECKING
00309 if (!target.verify())
00310 deadly_error(class_name(), func, "target state did not verify.");
00311 #endif
00312 #undef class_name
00313 }
00314
00315 template <class key_type, class contents>
00316 void hash_table<key_type, contents>::reset()
00317 {
00318 #ifdef EXTREME_CHECKING
00319 FUNCDEF("reset");
00320 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00321 #endif
00322
00323 for (int i = 0; i < _table->elements(); i++) {
00324 bucket<key_type, contents> *buck = _table->acquire(i);
00325 if (!buck) continue;
00326 for (int j = 0; j < buck->length(); j++) {
00327 basis::WHACK((*buck)[j]._data);
00328 basis::WHACK((*buck)[j]._id);
00329 buck->zap(j, j);
00330 j--;
00331 }
00332 basis::WHACK(buck);
00333 }
00334 #ifdef EXTREME_CHECKING
00335 if (!verify())
00336 deadly_error(class_name(), func, "state did not verify afterwards.");
00337 #endif
00338 }
00339
00340 template <class key_type, class contents>
00341 bool hash_table<key_type, contents>::verify() const
00342 {
00343 for (int i = 0; i < _table->elements(); i++) {
00344 const bucket<key_type, contents> *buck = _table->borrow(i);
00345 if (!buck) continue;
00346 for (int j = 0; j < buck->length(); j++) {
00347 const hash_wrapper<key_type, contents> &wrap = (*buck)[j];
00348 if (!wrap._data) {
00349
00350 return false;
00351 }
00352 if (!wrap._id) {
00353
00354 return false;
00355 }
00356 }
00357 }
00358 return true;
00359 }
00360
00361 template <class key_type, class contents>
00362 internal_hash_array<key_type, contents> &hash_table<key_type, contents>
00363 ::table_access() const
00364 { return *_table; }
00365
00366 template <class key_type, class contents>
00367 void hash_table<key_type, contents>::rehash(int estimated_elements)
00368 {
00369 #ifdef EXTREME_CHECKING
00370 FUNCDEF("rehash");
00371 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00372 #endif
00373 typedef bucket<key_type, contents> buckie;
00374 hash_table<key_type, contents> new_hash(*_hasher, estimated_elements);
00375
00376
00377
00378 for (int i = 0; i < _table->elements(); i++) {
00379 buckie *b = _table->borrow(i);
00380 if (!b) continue;
00381 for (int j = b->length() - 1; j >= 0; j--) {
00382 key_type *k = b->use(j)._id;
00383 contents *c = b->use(j)._data;
00384 new_hash.add(k, c);
00385 }
00386 b->reset();
00387
00388 }
00389
00390
00391 _table->reset();
00392 _table->adjust(new_hash._table->elements());
00393 for (int q = 0; q < new_hash._table->elements(); q++)
00394 _table->put(q, new_hash._table->acquire(q));
00395
00396 c_estim_elements = new_hash.c_estim_elements;
00397 _last_iter = 0;
00398 #ifdef EXTREME_CHECKING
00399 if (!verify())
00400 deadly_error(class_name(), func, "state did not verify afterwards.");
00401 #endif
00402 }
00403
00404 template <class key_type, class contents>
00405 basis::outcome hash_table<key_type, contents>::add(const key_type &key,
00406 contents *to_store)
00407 { return add(new key_type(key), to_store); }
00408
00409 template <class key_type, class contents>
00410 basis::outcome hash_table<key_type, contents>::add(key_type *key,
00411 contents *to_store, bool check_dupes)
00412 {
00413 #ifdef EXTREME_CHECKING
00414 FUNCDEF("add");
00415 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00416 #endif
00417
00418 basis::un_int hashed = _hasher->hash((const void *)key, sizeof(key_type));
00419
00420 hashed = hashed % _table->elements();
00421
00422 bucket<key_type, contents> *buck = _table->borrow(hashed);
00423 if (!buck) {
00424
00425 buck = new bucket<key_type, contents>;
00426 _table->put(hashed, buck);
00427 }
00428 if (!check_dupes) {
00429
00430 *buck += hash_wrapper<key_type, contents>(key, to_store);
00431 return IS_NEW;
00432 }
00433 int indy = buck->find(*key);
00434 if (basis::negative(indy)) {
00435
00436 *buck += hash_wrapper<key_type, contents>(key, to_store);
00437 #ifdef EXTREME_CHECKING
00438 if (!verify())
00439 deadly_error(class_name(), func, "state did not verify afterwards.");
00440 #endif
00441 return IS_NEW;
00442 }
00443
00444 basis::WHACK((*buck)[indy]._data);
00445 basis::WHACK(key);
00446 (*buck)[indy]._data = to_store;
00447 #ifdef EXTREME_CHECKING
00448 if (!verify())
00449 deadly_error(class_name(), func, "state did not verify afterwards.");
00450 #endif
00451 return EXISTING;
00452 }
00453
00454 template <class key_type, class contents>
00455 basis::outcome hash_table<key_type, contents>::fast_dangerous_add
00456 (const key_type &key, contents *to_store)
00457 { return add(new key_type(key), to_store, false); }
00458
00459 template <class key_type, class contents>
00460 bool hash_table<key_type, contents>::find(const key_type &key,
00461 contents * &item_found) const
00462 {
00463 #ifdef EXTREME_CHECKING
00464 FUNCDEF("find");
00465 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00466 #endif
00467 item_found = NIL;
00468
00469 basis::un_int hashed = _hasher->hash((const void *)&key, sizeof(key));
00470
00471 hashed = hashed % _table->elements();
00472
00473 bucket<key_type, contents> *buck = _table->borrow(hashed);
00474 if (!buck) return false;
00475 int indy = buck->find(key);
00476 if (basis::negative(indy)) return false;
00477
00478 item_found = (*buck)[indy]._data;
00479 return true;
00480 }
00481
00482 template <class key_type, class contents>
00483 contents *hash_table<key_type, contents>::acquire(const key_type &key)
00484 {
00485 #ifdef EXTREME_CHECKING
00486 FUNCDEF("acquire");
00487 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00488 #endif
00489
00490 basis::un_int hashed = _hasher->hash((const void *)&key, sizeof(key));
00491
00492 hashed = hashed % _table->elements();
00493
00494 bucket<key_type, contents> *buck = _table->borrow(hashed);
00495 if (!buck) return NIL;
00496 int indy = buck->find(key);
00497 if (basis::negative(indy)) return NIL;
00498 contents *to_return = (*buck)[indy]._data;
00499 basis::WHACK((*buck)[indy]._id);
00500 buck->zap(indy, indy);
00501 #ifdef EXTREME_CHECKING
00502 if (!verify())
00503 deadly_error(class_name(), func, "state did not verify afterwards.");
00504 #endif
00505 return to_return;
00506 }
00507
00508 template <class key_type, class contents>
00509 bool hash_table<key_type, contents>::zap(const key_type &key)
00510 {
00511 #ifdef EXTREME_CHECKING
00512 FUNCDEF("zap");
00513 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00514 #endif
00515
00516 basis::un_int hashed = _hasher->hash((const void *)&key, sizeof(key));
00517
00518 hashed = hashed % _table->elements();
00519
00520 bucket<key_type, contents> *buck = _table->borrow(hashed);
00521 if (!buck) return false;
00522 int indy = buck->find(key);
00523 if (basis::negative(indy)) {
00524
00525 return false;
00526 }
00527 basis::WHACK((*buck)[indy]._data);
00528 basis::WHACK((*buck)[indy]._id);
00529 buck->zap(indy, indy);
00530 if (!buck->length()) {
00531
00532 buck = _table->acquire(hashed);
00533 basis::WHACK(buck);
00534 }
00535 #ifdef EXTREME_CHECKING
00536 if (!verify())
00537 deadly_error(class_name(), func, "state did not verify afterwards.");
00538 #endif
00539 return true;
00540 }
00541
00542 template <class key_type, class contents>
00543 void hash_table<key_type, contents>::apply(apply_function *to_apply,
00544 void *data_link)
00545 {
00546 #ifdef EXTREME_CHECKING
00547 FUNCDEF("apply");
00548 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00549 #endif
00550 typedef bucket<key_type, contents> buckie;
00551 int bucks_seen = 0;
00552 int posn = _last_iter;
00553 while (bucks_seen++ < _table->elements()) {
00554 if ( (posn < 0) || (posn >= _table->elements()) )
00555 posn = 0;
00556 buckie *b = _table->borrow(posn);
00557 _last_iter = posn++;
00558
00559
00560
00561 if (!b) continue;
00562 for (int j = 0; j < b->length(); j++) {
00563 contents *c = b->use(j)._data;
00564 if (!c) {
00565 #ifdef EXTREME_CHECKING
00566 deadly_error("hash_table", "apply", "logic error--missing pointer");
00567 #endif
00568 continue;
00569 }
00570 if (!to_apply(*b->use(j)._id, *c, data_link)) return;
00571 }
00572 }
00573 }
00574
00575 template <class key_type, class contents>
00576 int hash_table<key_type, contents>::elements() const
00577 {
00578 #ifdef EXTREME_CHECKING
00579 FUNCDEF("elements");
00580 if (!verify()) deadly_error(class_name(), func, "state did not verify.");
00581 #endif
00582 typedef bucket<key_type, contents> buckie;
00583 int to_return = 0;
00584 for (int i = 0; i < _table->elements(); i++) {
00585 bucket<key_type, contents> *buck = _table->borrow(i);
00586 if (!buck) continue;
00587 to_return += buck->length();
00588 }
00589 return to_return;
00590 }
00591
00592 #undef static_class_name
00593
00594 }
00595
00596 #endif // outer guard.
00597