version 1.2, 2001/04/27 01:05:03
|
version 1.3, 2001/04/27 18:46:20
|
|
|
| |
PEGASUS_NAMESPACE_BEGIN | PEGASUS_NAMESPACE_BEGIN |
| |
PEGASUS_COMMON_LINKAGE Uint32 Hash(const String& str); |
/* This is the default hash function object used by the HashTable template. |
|
Specializations are provided for common types. |
|
*/ |
|
template<class K> |
|
struct HashFunc |
|
{ |
|
}; |
| |
inline Uint32 Hash(Uint32 x) { return x + 13; } |
template<> struct PEGASUS_COMMON_LINKAGE HashFunc<String> |
|
{ |
|
static Uint32 hash(const String& str); |
|
}; |
| |
/** Representation for a bucket. The HashTable class derives from this |
template<> struct HashFunc<Uint32> |
|
{ |
|
static Uint32 hash(Uint32 x) { return x + 13; } |
|
}; |
|
|
|
/* This is a function object used by the HashTable to compare keys. This is |
|
the default implementation. Others may be defined and passed in the |
|
template argument list to perform other kinds of comparisons. |
|
*/ |
|
template<class K> |
|
struct EqualFunc |
|
{ |
|
static Boolean equal(const K& x, const K& y) |
|
{ |
|
return x == y; |
|
} |
|
}; |
|
|
|
/* Representation for a bucket. The HashTable class derives from this |
bucket to append a key and value. This base class just defines | bucket to append a key and value. This base class just defines |
the pointer to the next bucket in the chain. | the pointer to the next bucket in the chain. |
*/ | */ |
|
|
{ | { |
public: | public: |
| |
/** Default constructor. */ |
/* Default constructor. */ |
_BucketBase() : next(0) { } | _BucketBase() : next(0) { } |
| |
/** Virtual destructor to ensure destruction of derived class |
/* Virtual destructor to ensure destruction of derived class |
elements. | elements. |
*/ | */ |
virtual ~_BucketBase(); | virtual ~_BucketBase(); |
| |
/** returns true if the key pointed to by the key argument is equal |
/* returns true if the key pointed to by the key argument is equal |
to the internal key of this bucket. This method must be overridden | to the internal key of this bucket. This method must be overridden |
by the derived class. | by the derived class. |
*/ | */ |
virtual Boolean equal(const void* key) const = 0; | virtual Boolean equal(const void* key) const = 0; |
| |
/** Clone this bucket. */ |
/* Clone this bucket. */ |
virtual _BucketBase* clone() const = 0; | virtual _BucketBase* clone() const = 0; |
| |
_BucketBase* next; | _BucketBase* next; |
}; | }; |
| |
class _HashTableBase; |
class _HashTableRep; |
| |
/** This class implements a simple hash table forward iterator. */ |
/* This class implements a simple hash table forward iterator. */ |
class PEGASUS_COMMON_LINKAGE _HashTableIteratorBase | class PEGASUS_COMMON_LINKAGE _HashTableIteratorBase |
{ | { |
public: | public: |
|
|
_BucketBase** _first; | _BucketBase** _first; |
_BucketBase** _last; | _BucketBase** _last; |
_BucketBase* _bucket; | _BucketBase* _bucket; |
friend _HashTableBase; |
friend _HashTableRep; |
}; | }; |
| |
/** The _HashTableBase class is the base class which HashTable derives from. |
// ATTN: reorganization not supported yet. |
|
|
|
/*- The _HashTableRep class is the representation class used by HashTable. |
| |
This code is primarily an internal class used to implement the HashTable. | This code is primarily an internal class used to implement the HashTable. |
But there may be occasions to use it directly. | But there may be occasions to use it directly. |
| |
_HashTableBase parcels out much of the large code so that that code is not |
_HashTableRep parcels out much of the large code so that that code is not |
instantiated by the HashTable template class many times. This scheme helps | instantiated by the HashTable template class many times. This scheme helps |
reduce code bloat caused by templates. The HashTable template class below | reduce code bloat caused by templates. The HashTable template class below |
acts as kind of a wrapper around this class. | acts as kind of a wrapper around this class. |
| |
_HashTableBase is implemented as an array of pointers to chains of hash |
_HashTableRep is implemented as an array of pointers to chains of hash |
buckets. The table initially allocates some number of chains (which can | buckets. The table initially allocates some number of chains (which can |
be controlled by the constructor) and then may increase the number of | be controlled by the constructor) and then may increase the number of |
chains later (resulting in a reorganization of the hash table). | chains later (resulting in a reorganization of the hash table). |
|
|
ATTN: reorganization not supported yet. |
|
*/ | */ |
class PEGASUS_COMMON_LINKAGE _HashTableBase |
class PEGASUS_COMMON_LINKAGE _HashTableRep |
{ | { |
public: | public: |
| |
/** This constructor allocates an array of pointers to chains of buckets, |
/*- This constructor allocates an array of pointers to chains of buckets, |
which of course are all empty at this time. The numChains argument | which of course are all empty at this time. The numChains argument |
If the numChains argument is less than eight, then eight chains will | If the numChains argument is less than eight, then eight chains will |
be created. | be created. |
@param numChains - specifies the initial number of chains. |
@param numChains specifies the initial number of chains. |
*/ | */ |
_HashTableBase(Uint32 numChains); |
_HashTableRep(Uint32 numChains); |
| |
/** Copy constructor. */ |
/*- Copy constructor. */ |
_HashTableBase(const _HashTableBase& x); |
_HashTableRep(const _HashTableRep& x); |
| |
/** Destructor. */ |
/*- Destructor. */ |
~_HashTableBase(); |
~_HashTableRep(); |
| |
/** Assignment operator. */ |
/*- Assignment operator. */ |
_HashTableBase& operator=(const _HashTableBase& x); |
_HashTableRep& operator=(const _HashTableRep& x); |
| |
/** Returns the size of this hash table (the number of entries). */ |
/*- Returns the size of this hash table (the number of entries). */ |
Uint32 getSize() const { return _size; } | Uint32 getSize() const { return _size; } |
| |
/** Clears the contents of this hash table. After this is called, the |
/*- Clears the contents of this hash table. After this is called, the |
getSize() method returns zero. | getSize() method returns zero. |
*/ | */ |
void clear(); | void clear(); |
| |
/** Inserts new key-value pair into hash table. Deletes the bucket on |
/*- Inserts new key-value pair into hash table. Deletes the bucket on |
failure so caller need not. | failure so caller need not. |
@param hashCode - hash code generated by caller's hash function. |
@param hashCode hash code generated by caller's hash function. |
@param bucket - bucket to be inserted. |
@param bucket bucket to be inserted. |
@param key - pointer to key. |
@param key pointer to key. |
@return true if insertion successful; false if duplicate key. | @return true if insertion successful; false if duplicate key. |
*/ | */ |
Boolean insert(Uint32 hashCode, _BucketBase* bucket, const void* key); | Boolean insert(Uint32 hashCode, _BucketBase* bucket, const void* key); |
| |
/** Finds the bucket with the given key. This method uses the |
/*- Finds the bucket with the given key. This method uses the |
_BucketBase::equal() method to compare keys. | _BucketBase::equal() method to compare keys. |
@param hashCode - hash code generated by caller's hash function. |
@param hashCode hash code generated by caller's hash function. |
@param key - void pointer to key. |
@param key void pointer to key. |
@return pointer to bucket with that key or zero otherwise. | @return pointer to bucket with that key or zero otherwise. |
*/ | */ |
const _BucketBase* lookup(Uint32 hashCode, const void* key); | const _BucketBase* lookup(Uint32 hashCode, const void* key); |
| |
/** Removes the bucket with the given key. This method uses the |
/*- Removes the bucket with the given key. This method uses the |
_BucketBase::equal() method to compare keys. | _BucketBase::equal() method to compare keys. |
@param hashCode - hash code generated by caller's hash function. |
@param hashCode hash code generated by caller's hash function. |
@param key - void pointer to key. |
@param key void pointer to key. |
@return true if entry found and removed and false otherwise. | @return true if entry found and removed and false otherwise. |
*/ | */ |
Boolean remove(Uint32 hashCode, const void* key); | Boolean remove(Uint32 hashCode, const void* key); |
| |
|
_BucketBase** getChains() const { return _chains; } |
|
|
|
Uint32 getNumChains() const { return _numChains; } |
|
|
protected: | protected: |
| |
Uint32 _size; | Uint32 _size; |
|
|
_BucketBase** _chains; | _BucketBase** _chains; |
}; | }; |
| |
/** The _Bucket class is used to implement the HashTable class. |
/* The _Bucket class is used to implement the HashTable class. |
*/ | */ |
template<class K, class V> |
template<class K, class V, class E> |
class _Bucket : public _BucketBase | class _Bucket : public _BucketBase |
{ | { |
public: | public: |
|
|
V _value; | V _value; |
}; | }; |
| |
template<class K, class V> |
template<class K, class V, class E> |
Boolean _Bucket<K,V>::equal(const void* key) const |
Boolean _Bucket<K, V, E>::equal(const void* key) const |
{ | { |
return *((K*)key) == _key; |
return E::equal(*((K*)key), _key); |
} | } |
| |
template<class K, class V> |
template<class K, class V, class E> |
_Bucket<K,V>::~_Bucket() |
_Bucket<K, V, E>::~_Bucket() |
{ | { |
| |
} | } |
| |
template<class K, class V> |
template<class K, class V, class E> |
_BucketBase* _Bucket<K,V>::clone() const |
_BucketBase* _Bucket<K, V, E>::clone() const |
{ | { |
return new _Bucket<K,V>(_key, _value); |
return new _Bucket<K, V, E>(_key, _value); |
} | } |
| |
/** Iterator for HashTable class. */ |
/* Iterator for HashTable class. */ |
template<class K, class V> |
template<class K, class V, class E> |
class _HashTableIterator : public _HashTableIteratorBase | class _HashTableIterator : public _HashTableIteratorBase |
{ | { |
public: | public: |
|
|
_HashTableIterator(_BucketBase** first, _BucketBase** last) | _HashTableIterator(_BucketBase** first, _BucketBase** last) |
: _HashTableIteratorBase(first, last) { } | : _HashTableIteratorBase(first, last) { } |
| |
const K& key() const { return ((_Bucket<K,V>*)_bucket)->getKey(); } |
const K& key() const { return ((_Bucket<K, V, E>*)_bucket)->getKey(); } |
| |
const V& value() const { return ((_Bucket<K,V>*)_bucket)->getValue(); } |
const V& value() const { return ((_Bucket<K, V, E>*)_bucket)->getValue(); } |
}; | }; |
| |
/** HashTable provides a simple hash table implementation which associates |
/** The HashTable class provides a simple hash table implementation which |
key-value pairs. |
associates key-value pairs. |
|
|
|
This implementation minimizes template bloat considerably by factoring out |
|
most of the code into a common non-template class (see _HashTableRep). |
|
The HashTable class is mostly a wrapper to add proper type semantics to the |
|
use of its representation class. |
|
|
|
Hashing as always is O(1). |
|
|
|
HashTable uses the most popular hash table implementation which utilizes |
|
an array of pointers to bucket chains. This is organized as follows: |
|
|
|
<pre> |
|
+---+ |
|
| | +-----+-------+ |
|
0 | ----->| key | value | |
|
| | +-----+-------+ |
|
+---+ |
|
| | +-----+-------+ +-----+-------+ +-----+-------+ |
|
1 | ----->| key | value |-->| key | value |-->| key | value | |
|
| | +-----+-------+ +-----+-------+ +-----+-------+ |
|
+---+ |
|
. |
|
. |
|
. |
|
+---+ |
|
| | +-----+-------+ +-----+-------+ |
|
N-1| ----->| key | value |-->| key | value | |
|
| | +-----+-------+ +-----+-------+ |
|
+---+ |
|
</pre> |
|
|
|
To locate an item a hash function is applied to the key to produce an |
|
integer value. Then the modulo of that integer is taken with N to select |
|
a chain (as shown above). Then the chain is searched for a bucket whose |
|
key value is the same as the target key. |
|
|
|
The number of chains default to DEFAULT_NUM_CHAINS but should be about |
|
one-third the number of expected entries (so that the average chain |
|
will be three long). Making the number of chains too large will waste |
|
space causing the hash table to be very sparse. But for optimal efficiency, |
|
one might set the number of chains to be the same as the expected number |
|
of entries. |
|
|
|
This implementation does have NOT an adaptive growth algorithm yet which |
|
would allow it to increase the number of chains periodically based on some |
|
statistic (e.g., when the number of entries is more than three times the |
|
number of chains; this would keep the average chain length below three). |
|
|
|
The following example shows how to instantiate a HashTable which associates |
|
String keys with Uint32 values. |
|
|
|
<pre> |
|
typedef HashTable<String, Uint32> HT; |
|
HT ht; |
|
</pre> |
|
|
|
Some of the template arguments are defaulted in the above example (the |
|
third and forth). The instantiation is explicitly qualified like this |
|
(which by the way has exactly the same effect). |
|
|
|
<pre> |
|
typedef HashTable<String, Uint32, EqualFunc<String>, HashFunc<String>> HT; |
|
</pre> |
|
|
|
The third and forth arguments are described more in detail later. |
|
|
|
Then, entries may be inserted like this: |
|
|
|
<pre> |
|
ht.insert("Red", 111); |
|
ht.insert("Green", 222); |
|
ht.insert("Blue", 222); |
|
</pre> |
|
|
|
And entries may be looked up as follows: |
|
|
|
<pre> |
|
Uint32 value; |
|
ht.lookup("Red", value); |
|
</pre> |
|
|
|
And entries may be removed like this: |
|
|
|
<pre> |
|
h.remove("Red"); |
|
</pre> |
|
|
|
Iteration is done like this: |
|
|
|
<pre> |
|
for (HT::Iterator i = ht.start(); i; i++) |
|
{ |
|
// To access the key call i.key()! |
|
// To access the value call i.value()! |
|
} |
|
</pre> |
|
|
|
Note that only forward iteration is supported (no backwards iteration). |
|
|
|
Equality of keys is determined using the EqualFunc class which is |
|
the default third argument of the template argument list. A new function |
|
object may be defined and passed to modify the behavior (for example, one |
|
might define equality of strings to ignore whitespace). Here is how to |
|
define and use a new equality function object: |
|
|
|
<pre> |
|
struct MyEqualFunc |
|
{ |
|
static Boolean equal(const String& x, const String& y) |
|
{ |
|
// do something here to test for equality! |
|
} |
|
}; |
|
|
|
... |
|
|
|
EqualFunc<String, Uint32, MyEqualFunc> ht; |
|
</pre> |
|
|
|
When the lookup(), insert(), and remove() methods are called, the |
|
MyEqualFunc::equal() method will be used to determine equality. |
|
|
|
Hash functions are provided for common types (as part of the default |
|
HashFunc class). For other types it is possible to define a custom function |
|
object as follows: |
|
|
|
<pre> |
|
struct MyHashFunc |
|
{ |
|
static Uint32 hash(const String& x) |
|
{ |
|
// Do some hashing here! |
|
} |
|
}; |
|
|
|
... |
|
|
|
EqualFunc<String, Uint32, MyEqualFunc, MyHashFunc> ht; |
|
</pre> |
|
|
|
As always, the hash function should provide a reasonably uniform |
|
distrubtion so that all of the entries don't get crowded into a few |
|
chains. Note that a hash function which returns zero, would force |
|
the pathalogical case in which all entries are placed in the first |
|
chain. |
*/ | */ |
template<class K, class V> |
template<class K, class V, class E = EqualFunc<K>, class H = HashFunc<K> > |
class HashTable : public _HashTableBase |
class HashTable |
{ | { |
public: | public: |
| |
typedef _HashTableIterator<K,V> Iterator; |
typedef _HashTableIterator<K, V, E> Iterator; |
| |
/** By default, we create this many chains initially */ |
/* By default, we create this many chains initially */ |
enum { DEFAULT_NUM_CHAINS = 32 }; | enum { DEFAULT_NUM_CHAINS = 32 }; |
| |
/** Constructor. | /** Constructor. |
@param numChains - number of chains to create. |
@param numChains number of chains to create. |
*/ | */ |
HashTable(Uint32 numChains = DEFAULT_NUM_CHAINS) |
HashTable(Uint32 numChains = DEFAULT_NUM_CHAINS) : _rep(numChains) |
: _HashTableBase(numChains) |
{ |
|
|
|
} |
|
|
|
/** Copy constructor. */ |
|
HashTable(const HashTable& x) : _rep(x._rep) |
|
{ |
|
|
|
} |
|
|
|
/** Assignment operator. */ |
|
HashTable& operator=(const HashTable& x) |
{ | { |
|
if (this != &x) |
|
_rep = x._rep; |
|
return *this; |
} | } |
| |
|
/** Returns the size of this hash table (the number of entries). */ |
|
Uint32 getSize() const { return _rep.getSize(); } |
|
|
|
/** Clears the contents of this hash table. After this is called, the |
|
getSize() method returns zero. |
|
*/ |
|
void clear() { _rep.clear(); } |
|
|
/** Inserts new key-value pair into hash table. | /** Inserts new key-value pair into hash table. |
@param key - key component. |
@param key key component. |
@param value - value component. |
@param value value component. |
@return true on success; false if duplicate key. | @return true on success; false if duplicate key. |
*/ | */ |
Boolean insert(const K& key, const V& value) | Boolean insert(const K& key, const V& value) |
{ | { |
return _HashTableBase::insert( |
return _rep.insert( |
Hash(key), new _Bucket<K,V>(key, value), &key); |
H::hash(key), new _Bucket<K, V, E>(key, value), &key); |
} | } |
| |
/** Looks up the entry with the given key. | /** Looks up the entry with the given key. |
@param key - key of entry to be located. |
@param key key of entry to be located. |
@param value - output value. |
@param value output value. |
@return true if found; false otherwise. | @return true if found; false otherwise. |
*/ | */ |
Boolean lookup(const K& key, V& value); | Boolean lookup(const K& key, V& value); |
| |
/** Removes the entry with the given key. | /** Removes the entry with the given key. |
@param key - key of entry to be removed. |
@param key key of entry to be removed. |
@return true on success; false otherwise. | @return true on success; false otherwise. |
*/ | */ |
Boolean remove(const K& key) | Boolean remove(const K& key) |
{ | { |
return _HashTableBase::remove(Hash(key), &key); |
return _rep.remove(H::hash(key), &key); |
} | } |
| |
/** Obtains an iterator for this object. */ | /** Obtains an iterator for this object. */ |
Iterator start() const | Iterator start() const |
{ | { |
return Iterator(_chains, _chains + _numChains); |
return Iterator( |
|
_rep.getChains(), _rep.getChains() + _rep.getNumChains()); |
} | } |
|
|
|
private: |
|
|
|
_HashTableRep _rep; |
}; | }; |
| |
template<class K, class V> |
template<class K, class V, class E, class H> |
inline Boolean HashTable<K,V>::lookup(const K& key, V& value) |
inline Boolean HashTable<K, V, E, H>::lookup(const K& key, V& value) |
{ | { |
_Bucket<K,V>* bucket |
_Bucket<K, V, E>* bucket |
= (_Bucket<K,V>*)_HashTableBase::lookup(Hash(key), &key); |
= (_Bucket<K, V, E>*)_rep.lookup(H::hash(key), &key); |
| |
if (bucket) | if (bucket) |
{ | { |