(file) Return to HashTable.h CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 mike  1.11 //%/////////////////////////////////////////////////////////////////////////////
  2            //
  3 kumpf 1.13 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM,
  4            // The Open Group, Tivoli Systems
  5 mike  1.11 //
  6            // Permission is hereby granted, free of charge, to any person obtaining a copy
  7 kumpf 1.13 // of this software and associated documentation files (the "Software"), to
  8            // deal in the Software without restriction, including without limitation the
  9            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 10 mike  1.11 // sell copies of the Software, and to permit persons to whom the Software is
 11            // furnished to do so, subject to the following conditions:
 12            // 
 13 kumpf 1.13 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 14 mike  1.11 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 15            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 16 kumpf 1.13 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 17            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 18            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 19 mike  1.11 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 20            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21            //
 22            //==============================================================================
 23            //
 24            // Author: Mike Brasher (mbrasher@bmc.com)
 25            //
 26            // Modified By:
 27            //
 28            //%/////////////////////////////////////////////////////////////////////////////
 29            
 30            #ifndef Pegasus_HashTable_h
 31            #define Pegasus_HashTable_h
 32            
 33            #include <Pegasus/Common/Config.h>
 34            #include <Pegasus/Common/String.h>
 35 kumpf 1.14 #include <Pegasus/Common/Linkage.h>
 36 mike  1.11 
 37            PEGASUS_NAMESPACE_BEGIN
 38            
 39            /*  This is the default hash function object used by the HashTable template.
 40                Specializations are provided for common types.
 41            */
 42            template<class K>
 43            struct HashFunc
 44            {
 45            };
 46            
 47            PEGASUS_TEMPLATE_SPECIALIZATION struct PEGASUS_COMMON_LINKAGE HashFunc<String>
 48            {
 49                static Uint32 hash(const String& str);
 50            };
 51            
 52            PEGASUS_TEMPLATE_SPECIALIZATION struct HashFunc<Uint32>
 53            {
 54                static Uint32 hash(Uint32 x) { return x + 13; }
 55            };
 56            
 57 mike  1.11 /*  This is a function object used by the HashTable to compare keys. This is
 58                the default implementation. Others may be defined and passed in the 
 59                template argument list to perform other kinds of comparisons.
 60            */
 61            template<class K>
 62            struct EqualFunc
 63            {
 64                static Boolean equal(const K& x, const K& y)
 65                {
 66            	return x == y;
 67                }
 68            };
 69            
 70            /*  Representation for a bucket. The HashTable class derives from this
 71                bucket to append a key and value. This base class just defines
 72                the pointer to the next bucket in the chain.
 73            */
 74            class PEGASUS_COMMON_LINKAGE _BucketBase
 75            {
 76            public:
 77            
 78 mike  1.11     /* Default constructor. */
 79                _BucketBase() : next(0) { }
 80            
 81                /* Virtual destructor to ensure destruction of derived class 
 82            	elements.
 83                */
 84                virtual ~_BucketBase();
 85            
 86                /* returns true if the key pointed to by the key argument is equal
 87            	to the internal key of this bucket. This method must be overridden
 88            	by the derived class.
 89                */
 90                virtual Boolean equal(const void* key) const = 0;
 91            
 92                /* Clone this bucket. */
 93                virtual _BucketBase* clone() const = 0;
 94            
 95                _BucketBase* next;
 96            };
 97            
 98            class _HashTableRep;
 99 mike  1.11 
100            /* This class implements a simple hash table forward iterator. */
101            class PEGASUS_COMMON_LINKAGE _HashTableIteratorBase
102            {
103            public:
104                
105                _HashTableIteratorBase() : _first(0), _last(0), _bucket(0) { }
106            
107                operator int() const { return _bucket != 0; }
108            
109                _HashTableIteratorBase operator++(int);
110            
111                _HashTableIteratorBase& operator++();
112            
113                _HashTableIteratorBase(_BucketBase** first, _BucketBase** last);
114            
115            protected:
116            
117                _BucketBase** _first;
118                _BucketBase** _last;
119                _BucketBase* _bucket;
120 mike  1.11     friend class _HashTableRep;
121            };
122            
123            // ATTN: reorganization not supported yet.
124            
125            /*- The _HashTableRep class is the representation class used by HashTable.
126            
127                This code is primarily an internal class used to implement the HashTable.
128                But there may be occasions to use it directly.
129            
130                _HashTableRep parcels out much of the large code so that that code is not 
131                instantiated by the HashTable template class many times. This scheme helps 
132                reduce code bloat caused by templates. The HashTable template class below
133                acts as kind of a wrapper around this class.
134            
135                _HashTableRep is implemented as an array of pointers to chains of hash
136                buckets. The table initially allocates some number of chains (which can
137                be controlled by the constructor) and then may increase the number of
138                chains later (resulting in a reorganization of the hash table).
139            */
140            class PEGASUS_COMMON_LINKAGE _HashTableRep
141 mike  1.11 {
142            public:
143            
144                /*- This constructor allocates an array of pointers to chains of buckets,
145            	which of course are all empty at this time. The numChains argument
146            	If the numChains argument is less than eight, then eight chains will
147            	be created.
148            	@param numChains specifies the initial number of chains.
149                */
150                _HashTableRep(Uint32 numChains);
151            
152                /*- Copy constructor. */
153                _HashTableRep(const _HashTableRep& x);
154            
155                /*- Destructor. */
156                ~_HashTableRep();
157            
158                /*- Assignment operator. */
159                _HashTableRep& operator=(const _HashTableRep& x);
160            
161                /*- Returns the size of this hash table (the number of entries). */
162 mike  1.11     Uint32 size() const { return _size; }
163            
164                /*- Clears the contents of this hash table. After this is called, the
165            	size() method returns zero.
166                */
167                void clear();
168            
169                /*- Inserts new key-value pair into hash table. Deletes the bucket on
170            	failure so caller need not.
171            	@param hashCode hash code generated by caller's hash function.
172            	@param bucket bucket to be inserted.
173            	@param key pointer to key.
174            	@return true if insertion successful; false if duplicate key.
175                */
176                Boolean insert(Uint32 hashCode, _BucketBase* bucket, const void* key);
177            
178                /*- Finds the bucket with the given key. This method uses the
179            	_BucketBase::equal() method to compare keys.
180            	@param hashCode hash code generated by caller's hash function.
181            	@param key void pointer to key.
182            	@return pointer to bucket with that key or zero otherwise.
183 mike  1.11     */
184 mike  1.12     const _BucketBase* lookup(Uint32 hashCode, const void* key) const;
185 mike  1.11 
186                /*- Removes the bucket with the given key. This method uses the
187            	_BucketBase::equal() method to compare keys.
188            	@param hashCode hash code generated by caller's hash function.
189            	@param key void pointer to key.
190            	@return true if entry found and removed and false otherwise.
191                */
192                Boolean remove(Uint32 hashCode, const void* key);
193            
194                _BucketBase** getChains() const { return _chains; }
195            
196                Uint32 getNumChains() const { return _numChains; }
197            
198            protected:
199            
200                Uint32 _size;
201                Uint32 _numChains;
202                _BucketBase** _chains;
203            };
204            
205            /* The _Bucket class is used to implement the HashTable class.
206 mike  1.11 */
207            template<class K, class V, class E>
208            class _Bucket : public _BucketBase
209            {
210            public:
211            
212                _Bucket(const K& key, const V& value) : _key(key), _value(value) { }
213            
214                virtual ~_Bucket();
215            
216                virtual Boolean equal(const void* key) const;
217            
218                virtual _BucketBase* clone() const;
219            
220                K& getKey() { return _key; }
221            
222                V& getValue() { return _value; }
223            
224            private:
225            
226                K _key;
227 mike  1.11     V _value;
228            };
229            
230            template<class K, class V, class E>
231            Boolean _Bucket<K, V, E>::equal(const void* key) const
232            {
233                return E::equal(*((K*)key), _key);
234            }
235            
236            template<class K, class V, class E>
237            _Bucket<K, V, E>::~_Bucket()
238            {
239            
240            }
241            
242            template<class K, class V, class E>
243            _BucketBase* _Bucket<K, V, E>::clone() const
244            {
245                return new _Bucket<K, V, E>(_key, _value);
246            }
247            
248 mike  1.11 /* Iterator for HashTable class. */
249            template<class K, class V, class E>
250            class _HashTableIterator : public _HashTableIteratorBase
251            {
252            public:
253            
254                _HashTableIterator() 
255            	: _HashTableIteratorBase() { }
256            
257                _HashTableIterator(_BucketBase** first, _BucketBase** last)
258            	: _HashTableIteratorBase(first, last) { }
259            
260                const K& key() const { return ((_Bucket<K, V, E>*)_bucket)->getKey(); }
261            
262                const V& value() const { return ((_Bucket<K, V, E>*)_bucket)->getValue(); }
263            };
264            
265            /** The HashTable class provides a simple hash table implementation which 
266                associates key-value pairs.
267            
268                This implementation minimizes template bloat considerably by factoring out
269 mike  1.11     most of the code into a common non-template class (see _HashTableRep).
270                The HashTable class is mostly a wrapper to add proper type semantics to the
271                use of its representation class.
272            
273                Hashing as always is O(1).
274            
275                HashTable uses the most popular hash table implementation which utilizes 
276                an array of pointers to bucket chains. This is organized as follows:
277            
278                    <pre>
279                       +---+ 
280                       |   |   +-----+-------+
281                     0 | ----->| key | value |
282                       |   |   +-----+-------+
283                       +---+
284                       |   |   +-----+-------+   +-----+-------+   +-----+-------+
285                     1 | ----->| key | value |-->| key | value |-->| key | value |
286                       |   |   +-----+-------+   +-----+-------+   +-----+-------+
287                       +---+
288                         .
289                         .
290 mike  1.11              .
291                       +---+
292                       |   |   +-----+-------+   +-----+-------+
293                    N-1| ----->| key | value |-->| key | value |
294                       |   |   +-----+-------+   +-----+-------+
295                       +---+
296                    </pre>
297            
298                To locate an item a hash function is applied to the key to produce an 
299                integer value. Then the modulo of that integer is taken with N to select 
300                a chain (as shown above). Then the chain is searched for a bucket whose
301                key value is the same as the target key.
302            
303                The number of chains default to DEFAULT_NUM_CHAINS but should be about
304                one-third the number of expected entries (so that the average chain
305                will be three long). Making the number of chains too large will waste
306                space causing the hash table to be very sparse. But for optimal efficiency,
307                one might set the number of chains to be the same as the expected number
308                of entries.
309            
310                This implementation does have NOT an adaptive growth algorithm yet which 
311 mike  1.11     would allow it to increase the number of chains periodically based on some
312                statistic (e.g., when the number of entries is more than three times the
313                number of chains; this would keep the average chain length below three).
314            
315                The following example shows how to instantiate a HashTable which associates
316                String keys with Uint32 values.
317            
318            	<pre>
319            	typedef HashTable&lt;String, Uint32&gt; HT;
320            	HT ht;
321            	</pre>
322            
323                Some of the template arguments are defaulted in the above example (the 
324                third and forth). The instantiation is explicitly qualified like this
325                (which by the way has exactly the same effect).
326            
327            	<pre>
328            	typedef HashTable&lt;String, Uint32, EqualFunc&lt;String&gt;, HashFunc&lt;String&gt;&gt; HT;
329            	</pre>
330            
331                The third and forth arguments are described more in detail later.
332 mike  1.11 
333                Then, entries may be inserted like this:
334            
335            	<pre>
336            	ht.insert("Red", 111);
337            	ht.insert("Green", 222);
338            	ht.insert("Blue", 222);
339            	</pre>
340            
341                And entries may be looked up as follows:
342            
343            	<pre>
344            	Uint32 value;
345            	ht.lookup("Red", value);
346            	</pre>
347            
348                And entries may be removed like this:
349            
350            	<pre>
351            	h.remove("Red");
352            	</pre>
353 mike  1.11 
354                Iteration is done like this:
355            
356            	<pre>
357            	for (HT::Iterator i = ht.start(); i; i++)
358            	{
359            	    // To access the key call i.key()!
360            	    // To access the value call i.value()!
361            	}
362            	</pre>
363            
364                Note that only forward iteration is supported (no backwards iteration).
365            
366                Equality of keys is determined using the EqualFunc class which is
367                the default third argument of the template argument list. A new function 
368                object may be defined and passed to modify the behavior (for example, one 
369                might define equality of strings to ignore whitespace). Here is how to 
370                define and use a new equality function object:
371            
372            	<pre>
373            	struct MyEqualFunc
374 mike  1.11 	{
375            	    static Boolean equal(const String& x, const String& y)
376            	    {
377            		// do something here to test for equality!
378            	    }
379            	};
380            
381            	...
382            
383            	EqualFunc&lt;String, Uint32, MyEqualFunc&gt; ht;
384            	</pre>
385            
386                When the lookup(), insert(), and remove() methods are called, the 
387                MyEqualFunc::equal() method will be used to determine equality.
388            
389                Hash functions are provided for common types (as part of the default
390                HashFunc class). For other types it is possible to define a custom function
391                object as follows:
392            
393            	<pre>
394            	struct MyHashFunc
395 mike  1.11 	{
396            	    static Uint32 hash(const String& x)
397            	    {
398            		// Do some hashing here!
399            	    }
400            	};
401            
402            	...
403            
404            	EqualFunc&lt;String, Uint32, MyEqualFunc, MyHashFunc&gt; ht;
405            	</pre>
406            
407                As always, the hash function should provide a reasonably uniform 
408                distrubtion so that all of the entries don't get crowded into a few
409                chains. Note that a hash function which returns zero, would force
410                the pathalogical case in which all entries are placed in the first
411                chain.
412            */
413            template<class K, class V, class E , class H >
414            class HashTable
415            {
416 mike  1.11 public:
417            
418                typedef _HashTableIterator<K, V, E> Iterator;
419            
420                /* By default, we create this many chains initially */
421                enum { DEFAULT_NUM_CHAINS = 32 };
422            
423                /** Constructor.
424            	@param numChains number of chains to create.
425                */
426                HashTable(Uint32 numChains = DEFAULT_NUM_CHAINS) : _rep(numChains)
427                {
428            
429                }
430            
431                /** Copy constructor. */
432                HashTable(const HashTable<K,V,E,H>& x) : _rep(x._rep)
433                {
434            
435                }
436            
437 mike  1.11     /** Assignment operator. */
438                HashTable<K,V,E,H>& operator=(const HashTable<K,V,E,H>& x)
439                {
440            	if (this != &x)
441            	    _rep = x._rep;
442            	return *this;
443                }
444            
445                /** Returns the size of this hash table (the number of entries). */
446                Uint32 size() const { return _rep.size(); }
447            
448                /** Clears the contents of this hash table. After this is called, the
449            	size() method returns zero.
450                */
451                void clear() { _rep.clear(); }
452            
453                /** Inserts new key-value pair into hash table.
454            	@param key key component.
455            	@param value value component.
456            	@return true on success; false if duplicate key.
457                */
458 mike  1.11     Boolean insert(const K& key, const V& value)
459                {
460            	return _rep.insert(
461            	    H::hash(key), new _Bucket<K, V, E>(key, value), &key);
462                }
463            
464                /** Checks to see if hash table contains an entry with the given key.
465            	@param key key to be searched for
466            	@return true if hash table contains an entry with the given key.
467                */
468 mike  1.12     Boolean contains(const K& key) const
469 mike  1.11     {
470            	V value;
471            	return lookup(key, value);
472                }
473            
474                /** Looks up the entry with the given key.
475            	@param key key of entry to be located.
476            	@param value output value.
477            	@return true if found; false otherwise.
478                */
479 mike  1.12     Boolean lookup(const K& key, V& value) const;
480 mike  1.11 
481                /** Removes the entry with the given key.
482            	@param key key of entry to be removed.
483            	@return true on success; false otherwise.
484                */
485                Boolean remove(const K& key)
486                {
487            	return _rep.remove(H::hash(key), &key);
488                }
489            
490                /** Obtains an iterator for this object. */
491                Iterator start() const 
492                {
493            	return Iterator(
494            	    _rep.getChains(), _rep.getChains() + _rep.getNumChains());
495                }
496            
497            private:
498            
499                _HashTableRep _rep;
500            };
501 mike  1.11 
502            template<class K, class V, class E, class H>
503 mike  1.12 inline Boolean HashTable<K, V, E, H>::lookup(const K& key, V& value) const
504 mike  1.11 {
505                _Bucket<K, V, E>* bucket 
506            	= (_Bucket<K, V, E>*)_rep.lookup(H::hash(key), &key);
507            
508                if (bucket)
509                {
510            	value = bucket->getValue();
511            	return true;
512                }
513            
514                return false;
515            }
516            
517            PEGASUS_NAMESPACE_END
518            
519            #endif /* Pegasus_HashTable_h */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2