(file) Return to HashTable.h CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 mike  1.1 //%/////////////////////////////////////////////////////////////////////////////
  2           //
  3           // Copyright (c) 2000 The Open Group, BMC Software, Tivoli Systems, IBM
  4           //
  5           // Permission is hereby granted, free of charge, to any person obtaining a
  6           // copy of this software and associated documentation files (the "Software"),
  7           // to deal in the Software without restriction, including without limitation
  8           // the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9           // and/or sell copies of the Software, and to permit persons to whom the
 10           // Software is furnished to do so, subject to the following conditions:
 11           //
 12           // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 13           // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 14           // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 15           // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 16           // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 17           // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 18           // DEALINGS IN THE SOFTWARE.
 19           //
 20           //==============================================================================
 21           //
 22 mike  1.1 // Author: Mike Brasher (mbrasher@bmc.com)
 23           //
 24           // Modified By:
 25           //
 26           //%/////////////////////////////////////////////////////////////////////////////
 27           
 28           #ifndef Pegasus_HashTable_h
 29           #define Pegasus_HashTable_h
 30           
 31           #include <Pegasus/Common/Config.h>
 32           #include <Pegasus/Common/String.h>
 33           
 34           PEGASUS_NAMESPACE_BEGIN
 35           
 36 mike  1.3 /*  This is the default hash function object used by the HashTable template.
 37               Specializations are provided for common types.
 38           */
 39           template<class K>
 40           struct HashFunc
 41           {
 42           };
 43 mike  1.2 
 44 mike  1.3 template<> struct PEGASUS_COMMON_LINKAGE HashFunc<String>
 45           {
 46               static Uint32 hash(const String& str);
 47           };
 48 mike  1.1 
 49 mike  1.3 template<> struct HashFunc<Uint32>
 50           {
 51               static Uint32 hash(Uint32 x) { return x + 13; }
 52           };
 53           
 54           /*  This is a function object used by the HashTable to compare keys. This is
 55               the default implementation. Others may be defined and passed in the 
 56               template argument list to perform other kinds of comparisons.
 57           */
 58           template<class K>
 59           struct EqualFunc
 60           {
 61               static Boolean equal(const K& x, const K& y)
 62               {
 63           	return x == y;
 64               }
 65           };
 66           
 67           /*  Representation for a bucket. The HashTable class derives from this
 68 mike  1.1     bucket to append a key and value. This base class just defines
 69               the pointer to the next bucket in the chain.
 70           */
 71 mike  1.2 class PEGASUS_COMMON_LINKAGE _BucketBase
 72 mike  1.1 {
 73           public:
 74           
 75 mike  1.3     /* Default constructor. */
 76 mike  1.2     _BucketBase() : next(0) { }
 77           
 78 mike  1.3     /* Virtual destructor to ensure destruction of derived class 
 79 mike  1.1 	elements.
 80               */
 81 mike  1.2     virtual ~_BucketBase();
 82 mike  1.1 
 83 mike  1.3     /* returns true if the key pointed to by the key argument is equal
 84 mike  1.1 	to the internal key of this bucket. This method must be overridden
 85           	by the derived class.
 86               */
 87               virtual Boolean equal(const void* key) const = 0;
 88           
 89 mike  1.3     /* Clone this bucket. */
 90 mike  1.2     virtual _BucketBase* clone() const = 0;
 91           
 92               _BucketBase* next;
 93 mike  1.1 };
 94           
 95 mike  1.3 class _HashTableRep;
 96 mike  1.1 
 97 mike  1.3 /* This class implements a simple hash table forward iterator. */
 98 mike  1.2 class PEGASUS_COMMON_LINKAGE _HashTableIteratorBase
 99 mike  1.1 {
100           public:
101               
102 mike  1.2     _HashTableIteratorBase() : _first(0), _last(0), _bucket(0) { }
103 mike  1.1 
104               operator Boolean() const { return _bucket != 0; }
105           
106 mike  1.2     _HashTableIteratorBase operator++(int);
107 mike  1.1 
108 mike  1.2     _HashTableIteratorBase& operator++();
109 mike  1.1 
110 mike  1.2     _HashTableIteratorBase(_BucketBase** first, _BucketBase** last);
111 mike  1.1 
112           protected:
113           
114 mike  1.2     _BucketBase** _first;
115               _BucketBase** _last;
116               _BucketBase* _bucket;
117 mike  1.3     friend _HashTableRep;
118 mike  1.1 };
119           
120 mike  1.3 // ATTN: reorganization not supported yet.
121           
122           /*- The _HashTableRep class is the representation class used by HashTable.
123 mike  1.1 
124               This code is primarily an internal class used to implement the HashTable.
125               But there may be occasions to use it directly.
126           
127 mike  1.3     _HashTableRep parcels out much of the large code so that that code is not 
128 mike  1.1     instantiated by the HashTable template class many times. This scheme helps 
129               reduce code bloat caused by templates. The HashTable template class below
130               acts as kind of a wrapper around this class.
131           
132 mike  1.3     _HashTableRep is implemented as an array of pointers to chains of hash
133 mike  1.1     buckets. The table initially allocates some number of chains (which can
134               be controlled by the constructor) and then may increase the number of
135               chains later (resulting in a reorganization of the hash table).
136           */
137 mike  1.3 class PEGASUS_COMMON_LINKAGE _HashTableRep
138 mike  1.1 {
139           public:
140           
141 mike  1.3     /*- This constructor allocates an array of pointers to chains of buckets,
142 mike  1.1 	which of course are all empty at this time. The numChains argument
143           	If the numChains argument is less than eight, then eight chains will
144           	be created.
145 mike  1.3 	@param numChains specifies the initial number of chains.
146 mike  1.1     */
147 mike  1.3     _HashTableRep(Uint32 numChains);
148 mike  1.2 
149 mike  1.3     /*- Copy constructor. */
150               _HashTableRep(const _HashTableRep& x);
151 mike  1.1 
152 mike  1.3     /*- Destructor. */
153               ~_HashTableRep();
154 mike  1.2 
155 mike  1.3     /*- Assignment operator. */
156               _HashTableRep& operator=(const _HashTableRep& x);
157 mike  1.1 
158 mike  1.3     /*- Returns the size of this hash table (the number of entries). */
159 mike  1.5     Uint32 size() const { return _size; }
160 mike  1.1 
161 mike  1.3     /*- Clears the contents of this hash table. After this is called, the
162 mike  1.5 	size() method returns zero.
163 mike  1.1     */
164               void clear();
165           
166 mike  1.3     /*- Inserts new key-value pair into hash table. Deletes the bucket on
167 mike  1.1 	failure so caller need not.
168 mike  1.3 	@param hashCode hash code generated by caller's hash function.
169           	@param bucket bucket to be inserted.
170           	@param key pointer to key.
171 mike  1.1 	@return true if insertion successful; false if duplicate key.
172               */
173 mike  1.2     Boolean insert(Uint32 hashCode, _BucketBase* bucket, const void* key);
174 mike  1.1 
175 mike  1.3     /*- Finds the bucket with the given key. This method uses the
176 mike  1.2 	_BucketBase::equal() method to compare keys.
177 mike  1.3 	@param hashCode hash code generated by caller's hash function.
178           	@param key void pointer to key.
179 mike  1.1 	@return pointer to bucket with that key or zero otherwise.
180               */
181 mike  1.2     const _BucketBase* lookup(Uint32 hashCode, const void* key);
182 mike  1.1 
183 mike  1.3     /*- Removes the bucket with the given key. This method uses the
184 mike  1.2 	_BucketBase::equal() method to compare keys.
185 mike  1.3 	@param hashCode hash code generated by caller's hash function.
186           	@param key void pointer to key.
187 mike  1.1 	@return true if entry found and removed and false otherwise.
188               */
189               Boolean remove(Uint32 hashCode, const void* key);
190           
191 mike  1.3     _BucketBase** getChains() const { return _chains; }
192           
193               Uint32 getNumChains() const { return _numChains; }
194           
195 mike  1.1 protected:
196           
197               Uint32 _size;
198               Uint32 _numChains;
199 mike  1.2     _BucketBase** _chains;
200 mike  1.1 };
201           
202 mike  1.3 /* The _Bucket class is used to implement the HashTable class.
203 mike  1.1 */
204 mike  1.3 template<class K, class V, class E>
205 mike  1.2 class _Bucket : public _BucketBase
206 mike  1.1 {
207           public:
208           
209 mike  1.2     _Bucket(const K& key, const V& value) : _key(key), _value(value) { }
210 mike  1.1 
211 mike  1.2     virtual ~_Bucket();
212 mike  1.1 
213               virtual Boolean equal(const void* key) const;
214           
215 mike  1.2     virtual _BucketBase* clone() const;
216           
217 mike  1.1     K& getKey() { return _key; }
218           
219               V& getValue() { return _value; }
220           
221           private:
222           
223               K _key;
224               V _value;
225           };
226           
227 mike  1.3 template<class K, class V, class E>
228           Boolean _Bucket<K, V, E>::equal(const void* key) const
229 mike  1.1 {
230 mike  1.3     return E::equal(*((K*)key), _key);
231 mike  1.1 }
232           
233 mike  1.3 template<class K, class V, class E>
234           _Bucket<K, V, E>::~_Bucket()
235 mike  1.1 {
236           
237           }
238           
239 mike  1.3 template<class K, class V, class E>
240           _BucketBase* _Bucket<K, V, E>::clone() const
241 mike  1.2 {
242 mike  1.3     return new _Bucket<K, V, E>(_key, _value);
243 mike  1.2 }
244           
245 mike  1.3 /* Iterator for HashTable class. */
246           template<class K, class V, class E>
247 mike  1.2 class _HashTableIterator : public _HashTableIteratorBase
248 mike  1.1 {
249           public:
250           
251 mike  1.2     _HashTableIterator() 
252           	: _HashTableIteratorBase() { }
253 mike  1.1 
254 mike  1.2     _HashTableIterator(_BucketBase** first, _BucketBase** last)
255           	: _HashTableIteratorBase(first, last) { }
256 mike  1.1 
257 mike  1.3     const K& key() const { return ((_Bucket<K, V, E>*)_bucket)->getKey(); }
258 mike  1.1 
259 mike  1.3     const V& value() const { return ((_Bucket<K, V, E>*)_bucket)->getValue(); }
260 mike  1.1 };
261           
262 mike  1.3 /** The HashTable class provides a simple hash table implementation which 
263               associates key-value pairs.
264           
265               This implementation minimizes template bloat considerably by factoring out
266               most of the code into a common non-template class (see _HashTableRep).
267               The HashTable class is mostly a wrapper to add proper type semantics to the
268               use of its representation class.
269           
270               Hashing as always is O(1).
271           
272               HashTable uses the most popular hash table implementation which utilizes 
273               an array of pointers to bucket chains. This is organized as follows:
274           
275                   <pre>
276                      +---+ 
277                      |   |   +-----+-------+
278                    0 | ----->| key | value |
279                      |   |   +-----+-------+
280                      +---+
281                      |   |   +-----+-------+   +-----+-------+   +-----+-------+
282                    1 | ----->| key | value |-->| key | value |-->| key | value |
283 mike  1.3            |   |   +-----+-------+   +-----+-------+   +-----+-------+
284                      +---+
285                        .
286                        .
287                        .
288                      +---+
289                      |   |   +-----+-------+   +-----+-------+
290                   N-1| ----->| key | value |-->| key | value |
291                      |   |   +-----+-------+   +-----+-------+
292                      +---+
293                   </pre>
294           
295               To locate an item a hash function is applied to the key to produce an 
296               integer value. Then the modulo of that integer is taken with N to select 
297               a chain (as shown above). Then the chain is searched for a bucket whose
298               key value is the same as the target key.
299           
300               The number of chains default to DEFAULT_NUM_CHAINS but should be about
301               one-third the number of expected entries (so that the average chain
302               will be three long). Making the number of chains too large will waste
303               space causing the hash table to be very sparse. But for optimal efficiency,
304 mike  1.3     one might set the number of chains to be the same as the expected number
305               of entries.
306           
307               This implementation does have NOT an adaptive growth algorithm yet which 
308               would allow it to increase the number of chains periodically based on some
309               statistic (e.g., when the number of entries is more than three times the
310               number of chains; this would keep the average chain length below three).
311           
312               The following example shows how to instantiate a HashTable which associates
313               String keys with Uint32 values.
314           
315           	<pre>
316           	typedef HashTable&lt;String, Uint32&gt; HT;
317           	HT ht;
318           	</pre>
319           
320               Some of the template arguments are defaulted in the above example (the 
321               third and forth). The instantiation is explicitly qualified like this
322               (which by the way has exactly the same effect).
323           
324           	<pre>
325 mike  1.3 	typedef HashTable&lt;String, Uint32, EqualFunc&lt;String&gt;, HashFunc&lt;String&gt;&gt; HT;
326           	</pre>
327           
328               The third and forth arguments are described more in detail later.
329           
330               Then, entries may be inserted like this:
331           
332           	<pre>
333           	ht.insert("Red", 111);
334           	ht.insert("Green", 222);
335           	ht.insert("Blue", 222);
336           	</pre>
337           
338               And entries may be looked up as follows:
339           
340           	<pre>
341           	Uint32 value;
342           	ht.lookup("Red", value);
343           	</pre>
344           
345               And entries may be removed like this:
346 mike  1.3 
347           	<pre>
348           	h.remove("Red");
349           	</pre>
350           
351               Iteration is done like this:
352           
353           	<pre>
354           	for (HT::Iterator i = ht.start(); i; i++)
355           	{
356           	    // To access the key call i.key()!
357           	    // To access the value call i.value()!
358           	}
359           	</pre>
360           
361               Note that only forward iteration is supported (no backwards iteration).
362           
363               Equality of keys is determined using the EqualFunc class which is
364               the default third argument of the template argument list. A new function 
365               object may be defined and passed to modify the behavior (for example, one 
366               might define equality of strings to ignore whitespace). Here is how to 
367 mike  1.3     define and use a new equality function object:
368           
369           	<pre>
370           	struct MyEqualFunc
371           	{
372           	    static Boolean equal(const String& x, const String& y)
373           	    {
374           		// do something here to test for equality!
375           	    }
376           	};
377           
378           	...
379           
380           	EqualFunc&lt;String, Uint32, MyEqualFunc&gt; ht;
381           	</pre>
382           
383               When the lookup(), insert(), and remove() methods are called, the 
384               MyEqualFunc::equal() method will be used to determine equality.
385           
386               Hash functions are provided for common types (as part of the default
387               HashFunc class). For other types it is possible to define a custom function
388 mike  1.3     object as follows:
389           
390           	<pre>
391           	struct MyHashFunc
392           	{
393           	    static Uint32 hash(const String& x)
394           	    {
395           		// Do some hashing here!
396           	    }
397           	};
398           
399           	...
400           
401           	EqualFunc&lt;String, Uint32, MyEqualFunc, MyHashFunc&gt; ht;
402           	</pre>
403           
404               As always, the hash function should provide a reasonably uniform 
405               distrubtion so that all of the entries don't get crowded into a few
406               chains. Note that a hash function which returns zero, would force
407               the pathalogical case in which all entries are placed in the first
408               chain.
409 mike  1.1 */
410 mike  1.3 template<class K, class V, class E = EqualFunc<K>, class H = HashFunc<K> >
411           class HashTable
412 mike  1.1 {
413           public:
414           
415 mike  1.3     typedef _HashTableIterator<K, V, E> Iterator;
416 mike  1.1 
417 mike  1.3     /* By default, we create this many chains initially */
418 mike  1.1     enum { DEFAULT_NUM_CHAINS = 32 };
419           
420               /** Constructor.
421 mike  1.3 	@param numChains number of chains to create.
422 mike  1.1     */
423 mike  1.3     HashTable(Uint32 numChains = DEFAULT_NUM_CHAINS) : _rep(numChains)
424 mike  1.1     {
425 mike  1.3 
426 mike  1.1     }
427           
428 mike  1.3     /** Copy constructor. */
429               HashTable(const HashTable& x) : _rep(x._rep)
430               {
431           
432               }
433           
434               /** Assignment operator. */
435               HashTable& operator=(const HashTable& x)
436               {
437           	if (this != &x)
438           	    _rep = x._rep;
439           	return *this;
440               }
441           
442               /** Returns the size of this hash table (the number of entries). */
443 mike  1.5     Uint32 size() const { return _rep.size(); }
444 mike  1.3 
445               /** Clears the contents of this hash table. After this is called, the
446 mike  1.5 	size() method returns zero.
447 mike  1.3     */
448               void clear() { _rep.clear(); }
449           
450 mike  1.1     /** Inserts new key-value pair into hash table.
451 mike  1.3 	@param key key component.
452           	@param value value component.
453 mike  1.1 	@return true on success; false if duplicate key.
454               */
455               Boolean insert(const K& key, const V& value)
456               {
457 mike  1.3 	return _rep.insert(
458           	    H::hash(key), new _Bucket<K, V, E>(key, value), &key);
459 mike  1.1     }
460           
461 mike  1.4     /** Checks to see if hash table contains an entry with the given key.
462           	@param key key to be searched for
463           	@return true if hash table contains an entry with the given key.
464               */
465               Boolean contains(const K& key)
466               {
467           	V value;
468           	return lookup(key, value);
469               }
470           
471 mike  1.1     /** Looks up the entry with the given key.
472 mike  1.3 	@param key key of entry to be located.
473           	@param value output value.
474 mike  1.1 	@return true if found; false otherwise.
475               */
476               Boolean lookup(const K& key, V& value);
477           
478               /** Removes the entry with the given key.
479 mike  1.3 	@param key key of entry to be removed.
480 mike  1.1 	@return true on success; false otherwise.
481               */
482               Boolean remove(const K& key)
483               {
484 mike  1.3 	return _rep.remove(H::hash(key), &key);
485 mike  1.1     }
486           
487               /** Obtains an iterator for this object. */
488               Iterator start() const 
489               {
490 mike  1.3 	return Iterator(
491           	    _rep.getChains(), _rep.getChains() + _rep.getNumChains());
492 mike  1.1     }
493 mike  1.3 
494           private:
495           
496               _HashTableRep _rep;
497 mike  1.1 };
498           
499 mike  1.3 template<class K, class V, class E, class H>
500           inline Boolean HashTable<K, V, E, H>::lookup(const K& key, V& value)
501 mike  1.1 {
502 mike  1.3     _Bucket<K, V, E>* bucket 
503           	= (_Bucket<K, V, E>*)_rep.lookup(H::hash(key), &key);
504 mike  1.1 
505               if (bucket)
506               {
507           	value = bucket->getValue();
508           	return true;
509               }
510           
511               return false;
512           }
513           
514           PEGASUS_NAMESPACE_END
515           
516           #endif /* Pegasus_HashTable_h */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2