Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

Hash.h

Go to the documentation of this file.
00001 // -*- C++ -*- 00002 00003 // PLearn (A C++ Machine Learning Library) 00004 // Copyright (C) 1998 Pascal Vincent 00005 // Copyright (C) 1999-2002 Pascal Vincent, Yoshua Bengio and University of Montreal 00006 // 00007 00008 // Redistribution and use in source and binary forms, with or without 00009 // modification, are permitted provided that the following conditions are met: 00010 // 00011 // 1. Redistributions of source code must retain the above copyright 00012 // notice, this list of conditions and the following disclaimer. 00013 // 00014 // 2. Redistributions in binary form must reproduce the above copyright 00015 // notice, this list of conditions and the following disclaimer in the 00016 // documentation and/or other materials provided with the distribution. 00017 // 00018 // 3. The name of the authors may not be used to endorse or promote 00019 // products derived from this software without specific prior written 00020 // permission. 00021 // 00022 // THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR 00023 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00024 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN 00025 // NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00026 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 00027 // TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 00028 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 00029 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 00030 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00031 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00032 // 00033 // This file is part of the PLearn library. For more information on the PLearn 00034 // library, go to the PLearn Web site at www.plearn.org 00035 00036 00037 /* ******************************************************* 00038 * $Id: Hash.h,v 1.5 2004/07/21 16:30:53 chrish42 Exp $ 00039 * This file is part of the PLearn library. 00040 ******************************************************* */ 00041 00046 #ifndef MODULE_HASH 00047 #define MODULE_HASH 00048 00049 #include <cstdlib> 00050 #include <plearn/base/general.h> 00051 #include <plearn/base/PP.h> 00052 00053 namespace PLearn { 00054 using namespace std; 00055 00056 00066 00067 00069 00070 extern const unsigned int Hash_UNUSED_TAG; 00071 extern const void * Hash_DELETED_SLOT; 00072 00073 template <class KeyType, class DataType> 00074 class HashKeyDataPair 00075 { 00076 public: 00077 KeyType *key; 00078 DataType *data; 00079 00080 HashKeyDataPair(); 00081 HashKeyDataPair(const KeyType &the_key, const DataType &the_data); 00082 ~HashKeyDataPair(); 00083 }; 00084 00085 00087 template <class KeyType, class DataType> 00088 class Hash : public PPointable 00089 { 00090 private: 00091 HashKeyDataPair<KeyType,DataType> **table; 00092 unsigned int tableSize; 00093 unsigned int elements; 00094 unsigned int pace; 00095 unsigned int jumps; 00096 00097 unsigned int hashKey(const KeyType &) const; 00098 void computePace(); 00099 unsigned int pgcd(unsigned int, unsigned int) const; 00100 00101 unsigned int find(const KeyType &); 00102 unsigned int findGap(const KeyType &); 00103 00105 bool static_tables_initialized; 00106 KeyType *key_table; 00107 DataType *data_table; 00108 HashKeyDataPair<KeyType,DataType> *hash_table; 00109 unsigned int next_free_slot; 00110 00111 public: 00112 bool isStatic; 00113 00114 bool add(const KeyType &, const DataType &); 00115 void addAndResize(const KeyType &, const DataType &); 00116 bool element(const KeyType &); 00117 bool del(const KeyType &); 00118 00119 unsigned int hashAddress(const KeyType &); 00120 00121 DataType * operator[](unsigned int addr) const; 00122 KeyType * operator()(unsigned int addr) const; 00123 HashKeyDataPair<KeyType,DataType>** getTable() const { return table; } 00124 00125 unsigned int size() const; 00126 unsigned int cardinal() const; 00127 00128 void flush(); 00129 00130 void initializeTable(unsigned int size); 00131 00133 00134 bool full() const; 00135 bool resizeTime() const; 00136 void resize(int newsize = -1); 00137 00138 void cleanup(); 00139 unsigned int probes() const; 00140 void diagnostics(unsigned int & clusters, 00141 unsigned int & maxCluster) const; 00142 00143 00144 Hash(unsigned int size=10000, bool static_allocation=false); 00145 ~Hash(); 00146 private: 00147 Hash(const Hash& obj) {} 00148 void operator=(const Hash& obj) {} 00149 }; 00150 00152 00153 template <class KeyType, class DataType> 00154 HashKeyDataPair<KeyType, DataType>::HashKeyDataPair() 00155 : key(0), data(0) 00156 { 00157 } 00158 00159 template <class KeyType, class DataType> 00160 HashKeyDataPair<KeyType, DataType>::HashKeyDataPair(const KeyType &the_key, const DataType &the_data) 00161 { 00162 key = new KeyType(the_key); 00163 data = new DataType(the_data); 00164 } 00165 00166 template <class KeyType, class DataType> 00167 HashKeyDataPair<KeyType, DataType>::~HashKeyDataPair() 00168 { 00169 delete key; 00170 delete data; 00171 } 00172 00173 00175 00177 00178 extern const unsigned int Hash_NOMBRES_MAGIQUES[256]; 00179 00181 00185 template <class KeyType, class DataType> 00186 unsigned int Hash<KeyType,DataType>::hashKey(const KeyType & key) const 00187 { 00188 int l = key.byteLength(); 00189 unsigned int hashKey=0u; 00190 unsigned char *pKey = l>0?((unsigned char *)(char *)key):(unsigned char*)0; 00191 00192 for (int i=0;i<l;i++) 00193 { 00194 unsigned char t = (hashKey >> 24); 00195 hashKey = (hashKey << 8) + pKey[i]; 00196 hashKey ^= Hash_NOMBRES_MAGIQUES[t]; 00197 } 00198 00199 return hashKey % tableSize; 00200 } 00201 00203 template <class KeyType, class DataType> 00204 unsigned int Hash<KeyType,DataType>::pgcd(unsigned int a, unsigned int b) const 00205 { 00206 while (b) 00207 { 00208 unsigned int r=(a % b); 00209 a=b; 00210 b=r; 00211 } 00212 return a; 00213 } 00214 00216 00221 template <class KeyType, class DataType> 00222 void Hash<KeyType,DataType>::computePace() 00223 { 00224 unsigned int t = (tableSize * 6) / 10; 00225 while (pgcd(tableSize,t)!=1) t--; 00226 pace = t; 00227 } 00228 00230 00235 template <class KeyType, class DataType> 00236 unsigned int Hash<KeyType,DataType>::find(const KeyType & key) 00237 { 00238 unsigned int h = hashKey(key); 00245 jumps=0; 00247 while ( (table[h]) && (jumps<tableSize) && 00248 ((table[h]==Hash_DELETED_SLOT) || (*table[h]->key!=key) ) ) 00249 { 00250 h = (h+pace) % tableSize; 00251 jumps++; 00252 } 00253 00254 return h; 00255 } 00256 00258 00263 template <class KeyType, class DataType> 00264 unsigned int Hash<KeyType,DataType>::findGap(const KeyType & key) 00265 { 00266 unsigned int h = hashKey(key); 00267 00268 jumps=0; 00269 while ((table[h]>Hash_DELETED_SLOT) && (jumps<tableSize)) 00270 { 00271 h = (h+pace) % tableSize; 00272 jumps++; 00273 } 00274 00275 return h; 00276 } 00277 00278 00280 00285 template <class KeyType, class DataType> 00286 bool Hash<KeyType,DataType>::add(const KeyType & key, const DataType & data) 00287 { 00288 unsigned int h = findGap(key); 00290 if ((table[h]>Hash_DELETED_SLOT) && (*table[h]->key==key)) 00291 { 00292 if (!isStatic) 00293 delete table[h]; 00294 table[h]=0; 00295 elements--; 00296 } 00297 if ((table[h]==0) || (table[h]==Hash_DELETED_SLOT)) 00298 { 00299 if (!isStatic) 00300 table[h] = new HashKeyDataPair<KeyType,DataType>(key, data); 00301 else { 00302 if (static_tables_initialized==false) 00303 PLERROR("You must initialized the static table before using the add method"); 00304 if (next_free_slot == tableSize) 00305 return false; 00306 table[h] = &hash_table[next_free_slot]; 00307 table[h]->key = &key_table[next_free_slot]; 00308 table[h]->data = &data_table[next_free_slot]; 00309 key_table[next_free_slot] = key; 00310 data_table[next_free_slot] = data; 00311 next_free_slot++; 00312 } 00313 elements++; 00314 return true; 00315 } 00316 else return false; 00317 } 00318 00320 00324 template <class KeyType, class DataType> 00325 void Hash<KeyType,DataType>::addAndResize(const KeyType & key, const DataType & data) 00326 { 00327 if (resizeTime()) resize(); 00328 add(key,data); 00329 } 00330 00332 00336 template <class KeyType, class DataType> 00337 bool Hash<KeyType,DataType>::del(const KeyType & key) 00338 { 00339 unsigned int h = find(key); 00340 00341 if (table[h]>Hash_DELETED_SLOT) 00342 { 00343 if (!isStatic) 00344 delete table[h]; 00345 table[h] = (HashKeyDataPair<KeyType,DataType> *)Hash_DELETED_SLOT; 00346 elements--; 00347 return true; 00348 } 00349 else return false; 00350 } 00351 00353 00357 template <class KeyType, class DataType> 00358 bool Hash<KeyType,DataType>::element(const KeyType & key) 00359 { 00360 unsigned int h = find(key); 00361 return (table[h]>Hash_DELETED_SLOT) && (*table[h]->key==key); 00362 } 00363 00365 00370 template <class KeyType, class DataType> 00371 unsigned int Hash<KeyType,DataType>::hashAddress(const KeyType & key) 00372 { 00373 unsigned int h = find(key); 00374 00375 if ( (table[h]>Hash_DELETED_SLOT) && (*table[h]->key==key) ) 00376 return h; 00377 else return Hash_UNUSED_TAG; 00378 } 00379 00380 00384 template <class KeyType, class DataType> 00385 unsigned int Hash<KeyType,DataType>::size() const 00386 { 00387 return tableSize; 00388 } 00389 00393 template <class KeyType, class DataType> 00394 unsigned int Hash<KeyType,DataType>::cardinal() const 00395 { 00396 return elements; 00397 } 00398 00399 00401 00405 template <class KeyType, class DataType> 00406 bool Hash<KeyType,DataType>::resizeTime() const 00407 { 00408 return (elements > (tableSize * 7) / 10); 00409 } 00410 00412 00416 template <class KeyType, class DataType> 00417 bool Hash<KeyType,DataType>::full() const 00418 { 00419 return elements == tableSize; 00420 } 00421 00422 00424 00428 template <class KeyType, class DataType> 00429 void Hash<KeyType,DataType>::resize(int newsize) 00430 { 00431 //PLWARNING("You are resizing the Hash table in static mode"); 00432 00433 if (newsize<0) newsize=tableSize*4; 00434 if (newsize<=(int)tableSize) return; 00435 Hash t(newsize); 00436 cout << "Resize Hash table to " << newsize << endl; 00437 for (unsigned int i=0;i<tableSize;i++) 00438 if (table[i] > Hash_DELETED_SLOT) 00439 t.add(*table[i]->key,*table[i]->data); 00440 00441 flush(); 00442 delete[] table; 00443 00444 table = t.table; 00445 t.table = 0; 00446 tableSize = t.tableSize; 00447 elements = t.elements; 00448 pace = t.pace; 00449 t.table= 0; 00450 } 00451 00453 00457 template <class KeyType, class DataType> 00458 void Hash<KeyType,DataType>::cleanup() 00459 { 00460 Hash t(tableSize); 00461 00462 for (unsigned int i=0;i<tableSize;i++) 00463 if (table[i] > Hash_DELETED_SLOT) 00464 t.add(*table[i]->key,*table[i]->data); 00465 00466 flush(); 00467 delete[] table; 00468 00469 table = t.table; 00470 t.table = 0; 00471 tableSize = t.tableSize; 00472 elements = t.elements; 00473 pace = t.pace; 00474 t.table=0; 00475 } 00476 00477 00481 template <class KeyType, class DataType> 00482 DataType * Hash<KeyType,DataType>::operator[](unsigned int addr) const 00483 { 00484 if ((addr<tableSize) && (table[addr]>Hash_DELETED_SLOT)) 00485 return table[addr]->data; 00486 else return 0; 00487 } 00488 00489 00493 template <class KeyType, class DataType> 00494 KeyType * Hash<KeyType,DataType>::operator()(unsigned int addr) const 00495 { 00496 if ((addr<tableSize) && (table[addr]>Hash_DELETED_SLOT)) 00497 return table[addr]->key; 00498 else return 0; 00499 } 00500 00501 00502 00504 00508 template <class KeyType, class DataType> 00509 void Hash<KeyType,DataType>::flush() 00510 { 00511 if (table) 00512 { 00513 for (unsigned int i=0;i<tableSize;i++) 00514 if (table[i]>Hash_DELETED_SLOT) 00515 { 00516 if (!isStatic) 00517 delete table[i]; 00518 table[i]=0; 00519 } 00520 elements=0; 00521 } 00522 } 00523 00524 00528 template <class KeyType, class DataType> 00529 void Hash<KeyType, DataType>::initializeTable(unsigned int size) 00530 { 00531 if (!isStatic) 00532 PLERROR("The static tables can be initialized only in static mode"); 00533 00534 if (static_tables_initialized==false) { 00535 key_table = new KeyType[size]; 00536 data_table = new DataType[size]; 00537 hash_table = new HashKeyDataPair<KeyType,DataType>[size]; 00538 } 00539 else 00540 PLWARNING("The static tables are already initialized"); 00541 00542 static_tables_initialized = true; 00543 } 00544 00545 00549 template <class KeyType, class DataType> 00550 Hash<KeyType, DataType>::Hash(unsigned int size, bool static_allocation) 00551 : tableSize(size), elements(0), static_tables_initialized(false), 00552 next_free_slot(0), isStatic(static_allocation) 00553 { 00555 //table = (HashKeyDataPair<KeyType,DataType> **)calloc(size, sizeof(HashKeyDataPair<KeyType,DataType> *)); 00556 assert (size > 0); 00557 typedef HashKeyDataPair<KeyType,DataType>* hash_key_data_table; 00558 table = new hash_key_data_table[size]; 00559 for (unsigned int i=0; i<size; i++) table[i]=0; 00560 00561 computePace(); 00562 } 00563 00564 00566 template <class KeyType, class DataType> 00567 Hash<KeyType,DataType>::~Hash() 00568 { 00569 if (table) 00570 { 00571 flush(); 00572 delete[] table; 00573 } 00574 if (isStatic) { 00581 long i, maxi=tableSize; 00582 for (i=0; i<maxi; ++i) { 00583 hash_table[i].key = 0; 00584 hash_table[i].data = 0; 00585 } 00586 00587 delete[] key_table; 00588 delete[] data_table; 00589 delete[] hash_table; 00590 } 00591 } 00592 00594 template <class KeyType, class DataType> 00595 unsigned int Hash<KeyType,DataType>::probes() const 00596 { 00597 return jumps+1; 00598 } 00599 00601 #ifndef __max 00602 #define __max(a,b) ((a)>(b)?(a):(b)) 00603 #define __maxredefined 00604 #endif 00605 00606 template <class KeyType, class DataType> 00607 void Hash<KeyType,DataType>::diagnostics(unsigned int & clusters, unsigned int & maxCluster) const 00608 { 00609 clusters=0; 00610 maxCluster=0; 00611 00612 unsigned int d=0; 00613 while (d<tableSize) 00614 { 00615 while ((d<tableSize) && (table[d]==0)) d++; 00616 if (d<tableSize) 00617 { 00618 clusters++; 00619 unsigned int c=0; 00620 while ((d<tableSize) && (table[d])) c++,d++; 00621 maxCluster=__max(maxCluster,c); 00622 } 00623 } 00624 } 00625 00630 struct Symbol 00631 { 00632 int s; 00633 Symbol(int symbol=0) : s(symbol) {} 00634 Symbol(const Symbol& b) : s(b.s) { } 00635 operator char*() const { return (char*)(&s); } 00636 // norman: removed useless const 00637 size_t byteLength() const { return sizeof(int); } 00638 operator int() { return s; } 00639 bool operator==(const Symbol& o) const { return o.s==s; } 00640 bool operator!=(const Symbol& o) const { return o.s!=s; } 00641 Symbol operator++(int) { return ++s; } 00642 Symbol operator++() { return ++s; } 00643 }; 00644 00645 #ifdef __maxredefined 00646 #undef __maxredefined 00647 #endif 00648 00650 00651 class IntPair { 00652 public: 00653 int i0; 00654 int i1; 00655 00656 IntPair() { i0=i1=0; } 00657 IntPair(int j0, int j1) : i0(j0), i1(j1) {} 00658 IntPair(const IntPair& b) : i0(b.i0), i1(b.i1) { } 00659 operator char*() const { return (char*)(&i0); } 00660 int size() const { return 2; } 00661 // norman: removed useless const 00662 size_t byteLength() const { return 2*sizeof(int); } 00663 bool operator!=(const IntPair& b) const { return (i0!=b.i0 || i1!=b.i1); } 00664 bool operator==(const IntPair& b) const { return (i0==b.i0 && i1==b.i1); } 00665 int& operator[](int i) { if (i==0) return i0; else return i1; } 00666 }; 00667 00668 00669 } // end of namespace PLearn 00670 00671 #endif 00672

Generated on Tue Aug 17 15:54:55 2004 for PLearn by doxygen 1.3.7