00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00046
#ifndef MODULE_HASH
00047
#define MODULE_HASH
00048
00049
#include <cstdlib>
00050
#include <plearn/base/general.h>
00051
#include <plearn/base/PP.h>
00052
00053
namespace PLearn {
00054
using namespace std;
00055
00056
00066
00067
00069
00070
extern const unsigned int Hash_UNUSED_TAG;
00071
extern const void *
Hash_DELETED_SLOT;
00072
00073
template <
class KeyType,
class DataType>
00074 class HashKeyDataPair
00075 {
00076
public:
00077 KeyType *
key;
00078 DataType *
data;
00079
00080
HashKeyDataPair();
00081
HashKeyDataPair(
const KeyType &the_key,
const DataType &the_data);
00082
~HashKeyDataPair();
00083 };
00084
00085
00087
template <
class KeyType,
class DataType>
00088 class Hash :
public PPointable
00089 {
00090
private:
00091 HashKeyDataPair<KeyType,DataType> **
table;
00092 unsigned int tableSize;
00093 unsigned int elements;
00094 unsigned int pace;
00095 unsigned int jumps;
00096
00097
unsigned int hashKey(
const KeyType &) const;
00098
void computePace();
00099
unsigned int pgcd(
unsigned int,
unsigned int) const;
00100
00101
unsigned int find(const KeyType &);
00102
unsigned int findGap(const KeyType &);
00103
00105 bool static_tables_initialized;
00106 KeyType *key_table;
00107 DataType *data_table;
00108 HashKeyDataPair<KeyType,DataType> *hash_table;
00109 unsigned int next_free_slot;
00110
00111 public:
00112 bool isStatic;
00113
00114
bool add(const KeyType &, const DataType &);
00115
void addAndResize(const KeyType &, const DataType &);
00116
bool element(const KeyType &);
00117
bool del(const KeyType &);
00118
00119
unsigned int hashAddress(const KeyType &);
00120
00121 DataType * operator[](
unsigned int addr) const;
00122 KeyType * operator()(
unsigned int addr) const;
00123 HashKeyDataPair<KeyType,DataType>** getTable()
const {
return table; }
00124
00125
unsigned int size() const;
00126
unsigned int cardinal() const;
00127
00128
void flush();
00129
00130
void initializeTable(
unsigned int size);
00131
00133
00134
bool full() const;
00135
bool resizeTime() const;
00136
void resize(
int newsize = -1);
00137
00138
void cleanup();
00139
unsigned int probes() const;
00140
void diagnostics(
unsigned int & clusters,
00141
unsigned int & maxCluster) const;
00142
00143
00144
Hash(
unsigned int size=10000,
bool static_allocation=false);
00145 ~
Hash();
00146 private:
00147 Hash(const
Hash& obj) {}
00148 void operator=(
const Hash& obj) {}
00149 };
00150
00152
00153
template <
class KeyType,
class DataType>
00154 HashKeyDataPair<KeyType, DataType>::HashKeyDataPair()
00155 : key(0), data(0)
00156 {
00157 }
00158
00159
template <
class KeyType,
class DataType>
00160 HashKeyDataPair<KeyType, DataType>::HashKeyDataPair(
const KeyType &the_key,
const DataType &the_data)
00161 {
00162
key =
new KeyType(the_key);
00163
data =
new DataType(the_data);
00164 }
00165
00166
template <
class KeyType,
class DataType>
00167 HashKeyDataPair<KeyType, DataType>::~HashKeyDataPair()
00168 {
00169
delete key;
00170
delete data;
00171 }
00172
00173
00175
00177
00178
extern const unsigned int Hash_NOMBRES_MAGIQUES[256];
00179
00181
00185
template <
class KeyType,
class DataType>
00186 unsigned int Hash<KeyType,DataType>::hashKey(
const KeyType & key)
const
00187
{
00188
int l = key.byteLength();
00189
unsigned int hashKey=0u;
00190
unsigned char *pKey = l>0?((
unsigned char *)(
char *)key):(
unsigned char*)0;
00191
00192
for (
int i=0;i<l;i++)
00193 {
00194
unsigned char t = (hashKey >> 24);
00195 hashKey = (hashKey << 8) + pKey[i];
00196 hashKey ^=
Hash_NOMBRES_MAGIQUES[t];
00197 }
00198
00199
return hashKey %
tableSize;
00200 }
00201
00203
template <
class KeyType,
class DataType>
00204 unsigned int Hash<KeyType,DataType>::pgcd(
unsigned int a,
unsigned int b)
const
00205
{
00206
while (b)
00207 {
00208
unsigned int r=(a % b);
00209 a=b;
00210 b=r;
00211 }
00212
return a;
00213 }
00214
00216
00221
template <
class KeyType,
class DataType>
00222 void Hash<KeyType,DataType>::computePace()
00223 {
00224
unsigned int t = (
tableSize * 6) / 10;
00225
while (
pgcd(
tableSize,t)!=1) t--;
00226
pace = t;
00227 }
00228
00230
00235
template <
class KeyType,
class DataType>
00236 unsigned int Hash<KeyType,DataType>::find(
const KeyType & key)
00237 {
00238
unsigned int h =
hashKey(key);
00245
jumps=0;
00247
while ( (
table[h]) && (
jumps<
tableSize) &&
00248 ((
table[h]==
Hash_DELETED_SLOT) || (*
table[h]->key!=key) ) )
00249 {
00250 h = (h+
pace) %
tableSize;
00251
jumps++;
00252 }
00253
00254
return h;
00255 }
00256
00258
00263
template <
class KeyType,
class DataType>
00264 unsigned int Hash<KeyType,DataType>::findGap(
const KeyType & key)
00265 {
00266
unsigned int h =
hashKey(key);
00267
00268
jumps=0;
00269
while ((
table[h]>
Hash_DELETED_SLOT) && (
jumps<
tableSize))
00270 {
00271 h = (h+
pace) %
tableSize;
00272
jumps++;
00273 }
00274
00275
return h;
00276 }
00277
00278
00280
00285
template <
class KeyType,
class DataType>
00286 bool Hash<KeyType,DataType>::add(
const KeyType & key,
const DataType & data)
00287 {
00288
unsigned int h =
findGap(key);
00290
if ((
table[h]>
Hash_DELETED_SLOT) && (*
table[h]->key==key))
00291 {
00292
if (!
isStatic)
00293
delete table[h];
00294
table[h]=0;
00295
elements--;
00296 }
00297
if ((
table[h]==0) || (
table[h]==
Hash_DELETED_SLOT))
00298 {
00299
if (!
isStatic)
00300
table[h] =
new HashKeyDataPair<KeyType,DataType>(key, data);
00301
else {
00302
if (
static_tables_initialized==
false)
00303
PLERROR(
"You must initialized the static table before using the add method");
00304
if (
next_free_slot ==
tableSize)
00305
return false;
00306
table[h] = &
hash_table[
next_free_slot];
00307
table[h]->key = &
key_table[
next_free_slot];
00308
table[h]->data = &
data_table[
next_free_slot];
00309
key_table[
next_free_slot] = key;
00310
data_table[
next_free_slot] = data;
00311
next_free_slot++;
00312 }
00313
elements++;
00314
return true;
00315 }
00316
else return false;
00317 }
00318
00320
00324
template <
class KeyType,
class DataType>
00325 void Hash<KeyType,DataType>::addAndResize(
const KeyType & key,
const DataType & data)
00326 {
00327
if (
resizeTime())
resize();
00328
add(key,data);
00329 }
00330
00332
00336
template <
class KeyType,
class DataType>
00337 bool Hash<KeyType,DataType>::del(
const KeyType & key)
00338 {
00339
unsigned int h =
find(key);
00340
00341
if (
table[h]>
Hash_DELETED_SLOT)
00342 {
00343
if (!
isStatic)
00344
delete table[h];
00345
table[h] = (
HashKeyDataPair<KeyType,DataType> *)
Hash_DELETED_SLOT;
00346
elements--;
00347
return true;
00348 }
00349
else return false;
00350 }
00351
00353
00357
template <
class KeyType,
class DataType>
00358 bool Hash<KeyType,DataType>::element(
const KeyType & key)
00359 {
00360
unsigned int h =
find(key);
00361
return (
table[h]>
Hash_DELETED_SLOT) && (*
table[h]->key==key);
00362 }
00363
00365
00370
template <
class KeyType,
class DataType>
00371 unsigned int Hash<KeyType,DataType>::hashAddress(
const KeyType & key)
00372 {
00373
unsigned int h =
find(key);
00374
00375
if ( (
table[h]>
Hash_DELETED_SLOT) && (*
table[h]->key==key) )
00376
return h;
00377
else return Hash_UNUSED_TAG;
00378 }
00379
00380
00384
template <
class KeyType,
class DataType>
00385 unsigned int Hash<KeyType,DataType>::size()
const
00386
{
00387
return tableSize;
00388 }
00389
00393
template <
class KeyType,
class DataType>
00394 unsigned int Hash<KeyType,DataType>::cardinal()
const
00395
{
00396
return elements;
00397 }
00398
00399
00401
00405
template <
class KeyType,
class DataType>
00406 bool Hash<KeyType,DataType>::resizeTime()
const
00407
{
00408
return (
elements > (
tableSize * 7) / 10);
00409 }
00410
00412
00416
template <
class KeyType,
class DataType>
00417 bool Hash<KeyType,DataType>::full()
const
00418
{
00419
return elements ==
tableSize;
00420 }
00421
00422
00424
00428
template <
class KeyType,
class DataType>
00429 void Hash<KeyType,DataType>::resize(
int newsize)
00430 {
00431
00432
00433
if (newsize<0) newsize=
tableSize*4;
00434
if (newsize<=(
int)tableSize)
return;
00435
Hash t(newsize);
00436 cout <<
"Resize Hash table to " << newsize <<
endl;
00437
for (
unsigned int i=0;i<tableSize;i++)
00438
if (
table[i] >
Hash_DELETED_SLOT)
00439 t.
add(*
table[i]->key,*
table[i]->data);
00440
00441
flush();
00442
delete[]
table;
00443
00444
table = t.
table;
00445 t.
table = 0;
00446 tableSize = t.
tableSize;
00447
elements = t.
elements;
00448
pace = t.
pace;
00449 t.
table= 0;
00450 }
00451
00453
00457
template <
class KeyType,
class DataType>
00458 void Hash<KeyType,DataType>::cleanup()
00459 {
00460
Hash t(
tableSize);
00461
00462
for (
unsigned int i=0;i<
tableSize;i++)
00463
if (
table[i] >
Hash_DELETED_SLOT)
00464 t.
add(*
table[i]->key,*
table[i]->data);
00465
00466
flush();
00467
delete[]
table;
00468
00469
table = t.
table;
00470 t.
table = 0;
00471 tableSize = t.
tableSize;
00472
elements = t.
elements;
00473
pace = t.
pace;
00474 t.
table=0;
00475 }
00476
00477
00481
template <
class KeyType,
class DataType>
00482 DataType *
Hash<KeyType,DataType>::operator[](
unsigned int addr)
const
00483
{
00484
if ((addr<tableSize) && (table[addr]>
Hash_DELETED_SLOT))
00485
return table[addr]->data;
00486
else return 0;
00487 }
00488
00489
00493
template <
class KeyType,
class DataType>
00494 KeyType *
Hash<KeyType,DataType>::operator()(
unsigned int addr)
const
00495
{
00496
if ((addr<tableSize) && (table[addr]>
Hash_DELETED_SLOT))
00497
return table[addr]->key;
00498
else return 0;
00499 }
00500
00501
00502
00504
00508
template <
class KeyType,
class DataType>
00509 void Hash<KeyType,DataType>::flush()
00510 {
00511
if (
table)
00512 {
00513
for (
unsigned int i=0;i<
tableSize;i++)
00514
if (
table[i]>
Hash_DELETED_SLOT)
00515 {
00516
if (!
isStatic)
00517
delete table[i];
00518
table[i]=0;
00519 }
00520
elements=0;
00521 }
00522 }
00523
00524
00528
template <
class KeyType,
class DataType>
00529 void Hash<KeyType, DataType>::initializeTable(
unsigned int size)
00530 {
00531
if (!
isStatic)
00532
PLERROR(
"The static tables can be initialized only in static mode");
00533
00534
if (
static_tables_initialized==
false) {
00535
key_table =
new KeyType[size];
00536
data_table =
new DataType[size];
00537
hash_table =
new HashKeyDataPair<KeyType,DataType>[size];
00538 }
00539
else
00540
PLWARNING(
"The static tables are already initialized");
00541
00542
static_tables_initialized =
true;
00543 }
00544
00545
00549
template <
class KeyType,
class DataType>
00550 Hash<KeyType, DataType>::Hash(
unsigned int size,
bool static_allocation)
00551 : tableSize(size), elements(0), static_tables_initialized(false),
00552 next_free_slot(0), isStatic(static_allocation)
00553 {
00555
00556 assert (size > 0);
00557
typedef HashKeyDataPair<KeyType,DataType>* hash_key_data_table;
00558
table =
new hash_key_data_table[size];
00559
for (
unsigned int i=0; i<size; i++)
table[i]=0;
00560
00561
computePace();
00562 }
00563
00564
00566
template <
class KeyType,
class DataType>
00567 Hash<KeyType,DataType>::~Hash()
00568 {
00569
if (
table)
00570 {
00571
flush();
00572
delete[]
table;
00573 }
00574
if (
isStatic) {
00581
long i, maxi=
tableSize;
00582
for (i=0; i<maxi; ++i) {
00583
hash_table[i].key = 0;
00584
hash_table[i].data = 0;
00585 }
00586
00587
delete[]
key_table;
00588
delete[]
data_table;
00589
delete[]
hash_table;
00590 }
00591 }
00592
00594
template <
class KeyType,
class DataType>
00595 unsigned int Hash<KeyType,DataType>::probes()
const
00596
{
00597
return jumps+1;
00598 }
00599
00601
#ifndef __max
00602 #define __max(a,b) ((a)>(b)?(a):(b))
00603 #define __maxredefined
00604
#endif
00605
00606
template <
class KeyType,
class DataType>
00607 void Hash<KeyType,DataType>::diagnostics(
unsigned int & clusters,
unsigned int & maxCluster)
const
00608
{
00609 clusters=0;
00610 maxCluster=0;
00611
00612
unsigned int d=0;
00613
while (d<
tableSize)
00614 {
00615
while ((d<
tableSize) && (
table[d]==0)) d++;
00616
if (d<
tableSize)
00617 {
00618 clusters++;
00619
unsigned int c=0;
00620
while ((d<
tableSize) && (
table[d])) c++,d++;
00621 maxCluster=
__max(maxCluster,c);
00622 }
00623 }
00624 }
00625
00630 struct Symbol
00631 {
00632 int s;
00633 Symbol(
int symbol=0) :
s(symbol) {}
00634 Symbol(
const Symbol& b) :
s(b.
s) { }
00635 operator char*()
const {
return (
char*)(&
s); }
00636
00637 size_t
byteLength()
const {
return sizeof(
int); }
00638 operator int() {
return s; }
00639 bool operator==(
const Symbol& o)
const {
return o.
s==
s; }
00640 bool operator!=(
const Symbol& o)
const {
return o.
s!=
s; }
00641 Symbol operator++(
int) {
return ++
s; }
00642 Symbol operator++() {
return ++
s; }
00643 };
00644
00645
#ifdef __maxredefined
00646
#undef __maxredefined
00647
#endif
00648
00650
00651 class IntPair {
00652
public:
00653 int i0;
00654 int i1;
00655
00656 IntPair() {
i0=
i1=0; }
00657 IntPair(
int j0,
int j1) :
i0(j0),
i1(j1) {}
00658 IntPair(
const IntPair& b) :
i0(b.
i0),
i1(b.
i1) { }
00659 operator char*()
const {
return (
char*)(&
i0); }
00660 int size()
const {
return 2; }
00661
00662 size_t
byteLength()
const {
return 2*
sizeof(
int); }
00663 bool operator!=(
const IntPair& b)
const {
return (
i0!=b.
i0 ||
i1!=b.
i1); }
00664 bool operator==(
const IntPair& b)
const {
return (
i0==b.
i0 &&
i1==b.
i1); }
00665 int&
operator[](
int i) {
if (i==0)
return i0;
else return i1; }
00666 };
00667
00668
00669 }
00670
00671
#endif
00672