Main Page | Class Hierarchy | Class List | File List | Class Members | File Members

hash.h

Go to the documentation of this file.
00001 // See the end of this file for license information.
00002 
00003 #ifndef TORSION_HASH_H
00004 #define TORSION_HASH_H
00005 
00006 #include "asm.h"
00007 #include "list.h"
00008 #include "array.h"
00009 #include "types.h"
00010 #include "screen.h"
00011 
00013 template <class Key>
00014 struct hash { };
00015 
00017 inline unsigned
00018 hash_string(const char* str) {
00019   unsigned long hash_value = 0;
00020   for ( ; *str; ++str)
00021     hash_value = 5 * hash_value + *str;
00022                                                                                 
00023   return unsigned(hash_value);
00024 }
00025 
00027 inline unsigned
00028 hash_data(const char* data, Size length) {
00029   unsigned long hash_value = 0;
00030   for (unsigned i = 0; i < length; ++i, ++data)
00031     hash_value = 5 * hash_value + *data;
00032                                                                                 
00033   return unsigned(hash_value);
00034 }
00035 
00037 template<>
00038 struct hash<char*> {
00039   unsigned
00040   operator()(const char* str) const {
00041     return hash_string(str);
00042   }
00043 };
00044 
00046 template<>
00047 struct hash<const char*> {
00048   unsigned
00049   operator()(const char* str) const {
00050     return hash_string(str);
00051   }
00052 };
00053 
00055 template<>
00056 struct hash<void*> {
00057   unsigned
00058   operator()(const void* data) const {
00059     return hash_data((char*)&data, sizeof(void*));
00060   }
00061 };
00062 
00064 template <class Key>
00065 struct compare { };
00066 
00071 template<>
00072 struct compare<char*> {
00073   int
00074   operator()(const char* str1, const char* str2, Size size) const {
00075     return strncmp(str1, str2, size);
00076   }
00077 };
00078 
00083 template<>
00084 struct compare<const char*> {
00085   int
00086   operator()(const char* str1, const char* str2, Size size) const {
00087     return strncmp(str1, str2, size);
00088   }
00089 };
00090 
00095 template<>
00096 struct compare<void*> {
00097   int
00098   operator()(void* ptr1, void* ptr2, Size size) const {
00099     if (ptr1 > ptr2)
00100       return 1;
00101     if (ptr1 < ptr2)
00102       return -1;
00103     return 0;
00104   }
00105 };
00106 
00108 template <class Key, class Value>
00109 class Hash_node : public List_node {
00110 public:
00111   Key key;
00112   Value value;
00113 };
00114 
00115 
00116 // forward declaration of death
00117 template <class Key, class Value, class Hash_func = hash<Key>,
00118           class Compare_func = compare<Key> >
00119 class Hash_table;
00120 
00122 template <class Key, class Value>
00123 class Hash_iterator {
00124 public:
00125   Hash_table<Key, Value>* hash_table; 
00126   unsigned bucket_index;    
00127 
00128   List_iterator< Hash_node<Key, Value> > position;
00129 
00130   Hash_iterator&
00131   operator++();       // prefix ++
00132 
00133   Hash_iterator&
00134   operator--();       // prefix --
00135 
00136   Hash_iterator&
00137   operator++(int);      // postfix ++
00138 
00139   Hash_iterator&
00140   operator--(int);      // postfix --
00141 
00142   Hash_node<Key, Value>&
00143   operator*() const;
00144 
00145   Hash_node<Key, Value>*
00146   operator->() const;
00147 
00148   bool
00149   operator==(const Hash_iterator& iterator) const;
00150 
00151   bool
00152   operator!=(const Hash_iterator& iterator) const;
00153 };
00154 
00155 template <class Key, class Value>
00156 inline Hash_iterator<Key, Value>&
00157 Hash_iterator<Key, Value>::operator++() {
00158   // iterate to the next element in the current bucket
00159   ++position;
00160 
00161   // if we've reached the end of the current bucket, move to the start of the
00162   // next non-empty bucket
00163   while (position == hash_table->buckets[bucket_index].end()) {
00164     ++bucket_index;
00165     position = hash_table->buckets[bucket_index].begin();
00166     
00167     // at the very last bucket, stop looking for non-empty buckets
00168     if (bucket_index == hash_table->buckets.size() - 1)
00169       break;
00170   }
00171 
00172   return *this;
00173 }
00174 
00175 template <class Key, class Value>
00176 inline Hash_iterator<Key, Value>&
00177 Hash_iterator<Key, Value>::operator--() {
00178   // iterate to the previous element in the current bucket
00179   --position;
00180 
00181   // if we're at the start of the current bucket, move to end of the previous
00182   // non-empty bucket
00183   while (position == hash_table->buckets[bucket_index].begin()) {
00184     --bucket_index;
00185     position = --(hash_table->buckets[bucket_index].end());
00186     
00187     // at the very first bucket, stop looking for non-empty buckets
00188     if (bucket_index == 0)
00189       break;
00190   }
00191 
00192   return *this;
00193 }
00194 
00195 template <class Key, class Value>
00196 inline Hash_iterator<Key, Value>&
00197 Hash_iterator<Key, Value>::operator++(int) {
00198   return ++this;
00199 }
00200 
00201 template <class Key, class Value>
00202 inline Hash_iterator<Key, Value>&
00203 Hash_iterator<Key, Value>::operator--(int) {
00204   return --this;
00205 }
00206 
00207 template <class Key, class Value>
00208 inline Hash_node<Key, Value>&
00209 Hash_iterator<Key, Value>::operator*() const {
00210   return *(Hash_node<Key, Value>*)position.node;
00211 }
00212 
00213 template <class Key, class Value>
00214 inline Hash_node<Key, Value>*
00215 Hash_iterator<Key, Value>::operator->() const {
00216   return (Hash_node<Key, Value>*)position.node;
00217 }
00218 
00219 template <class Key, class Value>
00220 inline bool
00221 Hash_iterator<Key, Value>::operator==(const Hash_iterator& iterator) const {
00222   return (position == position.position);
00223 }
00224 
00225 template <class Key, class Value>
00226 inline bool
00227 Hash_iterator<Key, Value>::operator!=(const Hash_iterator& iterator) const {
00228   return (position != iterator.position);
00229 }
00230 
00232 template <class Key, class Value, class Hash_func, class Compare_func >
00233 class Hash_table {
00234 protected:
00235   Hash_func hash_func;      
00236   Compare_func compare_func;    
00237   static const Size MAX_KEY_SIZE = 1024;
00238   bool physical_memory;     
00239   bool alloc_nodes;     
00240   bool physical;
00241 
00243   inline unsigned
00244   get_bucket_index(Key key) {
00245     return hash_func(key) % buckets.get_capacity();
00246   }
00247 
00250   List_iterator< Hash_node<Key, Value> >
00251   find(unsigned bucket_index, Key key);
00252 
00253 public:
00255   Array< List< Hash_node<Key, Value> > > buckets;
00256   typedef Hash_iterator<Key, Value> Iterator;
00257 
00259   Hash_table(bool physical = false, Size init_buckets = 0,
00260              bool alloc_nodes = false) {
00261     init(physical, init_buckets);
00262   }
00263 
00272   void
00273   init(bool physical = false, Size init_buckets = 0,
00274        bool alloc_nodes = false) {
00275     this->physical = physical;
00276     this->alloc_nodes = alloc_nodes;
00277     buckets.init(physical, init_buckets);
00278     buckets.grow(true);
00279 
00280     for (unsigned i = 0; i < buckets.get_capacity(); ++i)
00281       buckets[i].clear();
00282   }
00283 
00285   inline void
00286   clear() {
00287     for (unsigned i = 0; i < buckets.get_capacity(); ++i)
00288       buckets[i].clear();
00289     
00290     buckets.clear();
00291   }
00292 
00296   void
00297   free_and_clear();
00298 
00300   bool
00301   has_key(Key key);
00302 
00304   Value*
00305   get(Key key);
00306 
00309   void
00310   insert(Hash_node<Key, Value>& node);
00311 
00314   bool
00315   remove(Key key);
00316 
00317   // TODO
00318   bool
00319   grow() { return false; }
00320 
00322   Iterator
00323   begin();
00324 
00327   Iterator
00328   end();
00329 
00331   void
00332   print();
00333 
00335   void
00336   unit_test();
00337 };
00338 
00339 template <class Key, class Value, class Hash_func, class Compare_func>
00340 List_iterator< Hash_node<Key, Value> >
00341 Hash_table<Key, Value, Hash_func, Compare_func>::find(unsigned bucket_index, Key key) {
00342   // search for the key within the contents of the bucket
00343   for (List_iterator< Hash_node<Key, Value> > position = buckets[bucket_index].begin();
00344        position != buckets[bucket_index].end(); ++position) {
00345     // if the key is found, return its position in the bucket contents
00346     if (compare_func(position->key, key, MAX_KEY_SIZE) == 0)
00347       return position;
00348   }
00349   
00350   return buckets[bucket_index].end();
00351 }
00352 
00353 template <class Key, class Value, class Hash_func, class Compare_func>
00354 void
00355 Hash_table<Key, Value, Hash_func, Compare_func>::free_and_clear() {
00356   // free the hash nodes within each bucket's list
00357   for (unsigned i = 0; i < buckets.size(); ++i)
00358     buckets[i].free_and_clear();
00359 
00360   // clear all the buckets out of this hash table
00361   buckets.clear();
00362 }
00363 
00364 template <class Key, class Value, class Hash_func, class Compare_func>
00365 bool
00366 Hash_table<Key, Value, Hash_func, Compare_func>::has_key(Key key) {
00367   // if the key exists in the corresponding bucket, return true
00368   unsigned bucket_index = get_bucket_index(key);
00369   List_iterator< Hash_node<Key, Value> > position = find(bucket_index, key);
00370 
00371   if (position == buckets[bucket_index].end())
00372     return false;
00373 
00374   return true;
00375 }
00376 
00377 template <class Key, class Value, class Hash_func, class Compare_func>
00378 Value*
00379 Hash_table<Key, Value, Hash_func, Compare_func>::get(Key key) {
00380   // if the key exists in the corresponding bucket, return true
00381   unsigned bucket_index = get_bucket_index(key);
00382   List_iterator< Hash_node<Key, Value> > position = find(bucket_index, key);
00383 
00384   if (position == buckets[bucket_index].end())
00385     return NULL;
00386 
00387   return &position->value;
00388 }
00389 
00390 
00391 template <class Key, class Value, class Hash_func, class Compare_func>
00392 void
00393 Hash_table<Key, Value, Hash_func, Compare_func>::insert(Hash_node<Key, Value>& node) {
00394   // if the key exists in the corresponding bucket, replace it with the new
00395   // value. otherwise, insert the key/value pair into the hash table
00396   unsigned bucket_index = get_bucket_index(node.key);
00397   List_iterator< Hash_node<Key, Value> > position = find(bucket_index, node.key);
00398 
00399   if (position == buckets[bucket_index].end()) {
00400     buckets[bucket_index].insert_back(&node);
00401   } else {
00402     buckets[bucket_index].remove(position);
00403     buckets[bucket_index].insert_back(&node);
00404   }
00405 }
00406 
00407 template <class Key, class Value, class Hash_func, class Compare_func>
00408 bool
00409 Hash_table<Key, Value, Hash_func, Compare_func>::remove(Key key) {
00410   // find the given key and them remove its node. if the key could not be
00411   // found, return false
00412   unsigned bucket_index = get_bucket_index(key);
00413   List_iterator< Hash_node<Key, Value> > position = find(bucket_index, key);
00414 
00415   if (position == buckets[bucket_index].end())
00416     return false;
00417 
00418   buckets[bucket_index].remove(position);
00419   return true;
00420 }
00421 
00422 template <class Key, class Value, class Hash_func, class Compare_func>
00423 inline Hash_iterator<Key, Value>
00424 Hash_table<Key, Value, Hash_func, Compare_func>::begin() {
00425   // return an iterator for the first element in the first bucket
00426   Iterator iterator;
00427   iterator.hash_table = this;
00428   iterator.bucket_index = 0;
00429   iterator.position = buckets[0].begin();
00430 
00431   // if the first bucket is empty, start the iterator off at the first actual
00432   // element
00433   if (buckets[0].size() == 0)
00434     ++iterator;
00435   return iterator;
00436 }
00437 
00438 template <class Key, class Value, class Hash_func, class Compare_func>
00439 inline Hash_iterator<Key, Value>
00440 Hash_table<Key, Value, Hash_func, Compare_func>::end() {
00441   // return an iterator for the end of the hash table, after the last
00442   // element in the last bucket
00443   Iterator iterator;
00444   iterator.hash_table = this;
00445   iterator.bucket_index = buckets.size() - 1;
00446   iterator.position = buckets[buckets.size() - 1].end();
00447   return iterator;
00448 }
00449 
00450 template <class Key, class Value, class Hash_func, class Compare_func>
00451 void
00452 Hash_table<Key, Value, Hash_func, Compare_func>::print() {
00453   screen.print("hash table ", false);
00454   screen.print(this, false);
00455   screen.print(": ");
00456   
00457   unsigned count = 0;
00458   for (Hash_iterator<Key, Value> pos = begin(); pos != end(); ++pos) {
00459     screen.print("  node ", false);
00460     screen.print(&*pos, false);
00461     screen.print(" index ", false);
00462     screen.print(pos.bucket_index, false);
00463     screen.print(": ", false);
00464     screen.print(pos->key, false);
00465     screen.print(" -> ", false);
00466     screen.print(pos->value);
00467     for (unsigned i=0; i<999999; i++){}
00468     count++;
00469   }
00470 
00471   screen.print(count, false);
00472   screen.print(" elements");
00473 
00474   screen.print(buckets.size(), false);
00475   screen.print(" buckets");
00476 }
00477 
00478 template <class Key, class Value, class Hash_func, class Compare_func>
00479 void
00480 Hash_table<Key, Value, Hash_func, Compare_func>::unit_test() {
00481   const unsigned NODE_COUNT = 1;
00482   Hash_node<Key, Value> nodes[NODE_COUNT];
00483 
00484   // insert a ton of nodes into the hash table
00485   screen.print("inserting test nodes into hash table");
00486   for (unsigned i = 0; i < NODE_COUNT; ++i) {
00487     nodes[i].key = (Key)i;
00488     nodes[i].value = (Key)(i+1);
00489     insert(nodes[i]);
00490   }
00491   
00492   screen.print("checking values of all test nodes");
00493   for (unsigned i = 0; i < NODE_COUNT; ++i) {
00494     Value* value = get(nodes[i].key);
00495     if (*value != nodes[i].value) {
00496       screen.print("error: expected test value not found");
00497       while (1) { }
00498     }
00499   }
00500 
00501   screen.print("removing all test nodes");
00502   for (unsigned i = 0; i < NODE_COUNT; ++i) {
00503     if (!remove(nodes[i].key)) {
00504       screen.print("error: node removal failed");
00505       while (1) { }
00506     }
00507   }
00508 
00509   screen.print("unit test complete");
00510 }
00511 
00512 #endif    
00513 
00514 /* Torsion Operating System, Copyright (C) 2000-2004 Dan Helfman
00515  *
00516  * This program is free software; you can redistribute it and/or modify it
00517  * under the terms of the GNU General Public License as published by the
00518  * Free Software Foundation; either version 2 of the License, or (at your
00519  * option) any later version.
00520  * 
00521  * This program is distributed in the hope that it will be useful, but
00522  * WITHOUT ANY WARRANTY; without even the implied warranty of
00523  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00524  * General Public License for more details (in the COPYING file).
00525  * 
00526  * You should have received a copy of the GNU General Public License along
00527  * with this program; if not, write to the Free Software Foundation, Inc.,
00528  * 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00529  */

Torsion Operating System, Copyright (C) 2000-2004 Dan Helfman