(file) Return to HashTable.h CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 karl  1.15 //%2003////////////////////////////////////////////////////////////////////////
  2 mike  1.11 //
  3 karl  1.15 // Copyright (c) 2000, 2001, 2002  BMC Software, Hewlett-Packard Development
  4            // Company, L. P., IBM Corp., The Open Group, Tivoli Systems.
  5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L. P.;
  6            // IBM Corp.; EMC Corporation, The Open Group.
  7 mike  1.11 //
  8            // Permission is hereby granted, free of charge, to any person obtaining a copy
  9 kumpf 1.13 // of this software and associated documentation files (the "Software"), to
 10            // deal in the Software without restriction, including without limitation the
 11            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 12 mike  1.11 // sell copies of the Software, and to permit persons to whom the Software is
 13            // furnished to do so, subject to the following conditions:
 14            // 
 15 kumpf 1.13 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 16 mike  1.11 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 17            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 18 kumpf 1.13 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 19            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 20            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 21 mike  1.11 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 22            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 23            //
 24            //==============================================================================
 25            //
 26            // Author: Mike Brasher (mbrasher@bmc.com)
 27            //
 28            // Modified By:
 29            //
 30            //%/////////////////////////////////////////////////////////////////////////////
 31            
 32            #ifndef Pegasus_HashTable_h
 33            #define Pegasus_HashTable_h
 34            
 35            #include <Pegasus/Common/Config.h>
 36            #include <Pegasus/Common/String.h>
 37 kumpf 1.14 #include <Pegasus/Common/Linkage.h>
 38 mike  1.11 
 39            PEGASUS_NAMESPACE_BEGIN
 40            
 41            /*  This is the default hash function object used by the HashTable template.
 42                Specializations are provided for common types.
 43            */
 44            template<class K>
 45            struct HashFunc
 46            {
 47            };
 48            
 49            PEGASUS_TEMPLATE_SPECIALIZATION struct PEGASUS_COMMON_LINKAGE HashFunc<String>
 50            {
 51                static Uint32 hash(const String& str);
 52            };
 53            
 54            PEGASUS_TEMPLATE_SPECIALIZATION struct HashFunc<Uint32>
 55            {
 56                static Uint32 hash(Uint32 x) { return x + 13; }
 57            };
 58            
 59 mike  1.11 /*  This is a function object used by the HashTable to compare keys. This is
 60                the default implementation. Others may be defined and passed in the 
 61                template argument list to perform other kinds of comparisons.
 62            */
 63            template<class K>
 64            struct EqualFunc
 65            {
 66                static Boolean equal(const K& x, const K& y)
 67                {
 68            	return x == y;
 69                }
 70            };
 71            
 72            /*  Representation for a bucket. The HashTable class derives from this
 73                bucket to append a key and value. This base class just defines
 74                the pointer to the next bucket in the chain.
 75            */
 76            class PEGASUS_COMMON_LINKAGE _BucketBase
 77            {
 78            public:
 79            
 80 mike  1.11     /* Default constructor. */
 81                _BucketBase() : next(0) { }
 82            
 83                /* Virtual destructor to ensure destruction of derived class 
 84            	elements.
 85                */
 86                virtual ~_BucketBase();
 87            
 88                /* returns true if the key pointed to by the key argument is equal
 89            	to the internal key of this bucket. This method must be overridden
 90            	by the derived class.
 91                */
 92                virtual Boolean equal(const void* key) const = 0;
 93            
 94                /* Clone this bucket. */
 95                virtual _BucketBase* clone() const = 0;
 96            
 97                _BucketBase* next;
 98            };
 99            
100            class _HashTableRep;
101 mike  1.11 
102            /* This class implements a simple hash table forward iterator. */
103            class PEGASUS_COMMON_LINKAGE _HashTableIteratorBase
104            {
105            public:
106                
107                _HashTableIteratorBase() : _first(0), _last(0), _bucket(0) { }
108            
109                operator int() const { return _bucket != 0; }
110            
111                _HashTableIteratorBase operator++(int);
112            
113                _HashTableIteratorBase& operator++();
114            
115                _HashTableIteratorBase(_BucketBase** first, _BucketBase** last);
116            
117            protected:
118            
119                _BucketBase** _first;
120                _BucketBase** _last;
121                _BucketBase* _bucket;
122 mike  1.11     friend class _HashTableRep;
123            };
124            
125            // ATTN: reorganization not supported yet.
126            
127            /*- The _HashTableRep class is the representation class used by HashTable.
128            
129                This code is primarily an internal class used to implement the HashTable.
130                But there may be occasions to use it directly.
131            
132                _HashTableRep parcels out much of the large code so that that code is not 
133                instantiated by the HashTable template class many times. This scheme helps 
134                reduce code bloat caused by templates. The HashTable template class below
135                acts as kind of a wrapper around this class.
136            
137                _HashTableRep is implemented as an array of pointers to chains of hash
138                buckets. The table initially allocates some number of chains (which can
139                be controlled by the constructor) and then may increase the number of
140                chains later (resulting in a reorganization of the hash table).
141            */
142            class PEGASUS_COMMON_LINKAGE _HashTableRep
143 mike  1.11 {
144            public:
145            
146                /*- This constructor allocates an array of pointers to chains of buckets,
147            	which of course are all empty at this time. The numChains argument
148            	If the numChains argument is less than eight, then eight chains will
149            	be created.
150            	@param numChains specifies the initial number of chains.
151                */
152                _HashTableRep(Uint32 numChains);
153            
154                /*- Copy constructor. */
155                _HashTableRep(const _HashTableRep& x);
156            
157                /*- Destructor. */
158                ~_HashTableRep();
159            
160                /*- Assignment operator. */
161                _HashTableRep& operator=(const _HashTableRep& x);
162            
163                /*- Returns the size of this hash table (the number of entries). */
164 mike  1.11     Uint32 size() const { return _size; }
165            
166                /*- Clears the contents of this hash table. After this is called, the
167            	size() method returns zero.
168                */
169                void clear();
170            
171                /*- Inserts new key-value pair into hash table. Deletes the bucket on
172            	failure so caller need not.
173            	@param hashCode hash code generated by caller's hash function.
174            	@param bucket bucket to be inserted.
175            	@param key pointer to key.
176            	@return true if insertion successful; false if duplicate key.
177                */
178                Boolean insert(Uint32 hashCode, _BucketBase* bucket, const void* key);
179            
180                /*- Finds the bucket with the given key. This method uses the
181            	_BucketBase::equal() method to compare keys.
182            	@param hashCode hash code generated by caller's hash function.
183            	@param key void pointer to key.
184            	@return pointer to bucket with that key or zero otherwise.
185 mike  1.11     */
186 mike  1.12     const _BucketBase* lookup(Uint32 hashCode, const void* key) const;
187 mike  1.11 
188                /*- Removes the bucket with the given key. This method uses the
189            	_BucketBase::equal() method to compare keys.
190            	@param hashCode hash code generated by caller's hash function.
191            	@param key void pointer to key.
192            	@return true if entry found and removed and false otherwise.
193                */
194                Boolean remove(Uint32 hashCode, const void* key);
195            
196                _BucketBase** getChains() const { return _chains; }
197            
198                Uint32 getNumChains() const { return _numChains; }
199            
200            protected:
201            
202                Uint32 _size;
203                Uint32 _numChains;
204                _BucketBase** _chains;
205            };
206            
207            /* The _Bucket class is used to implement the HashTable class.
208 mike  1.11 */
209            template<class K, class V, class E>
210            class _Bucket : public _BucketBase
211            {
212            public:
213            
214                _Bucket(const K& key, const V& value) : _key(key), _value(value) { }
215            
216                virtual ~_Bucket();
217            
218                virtual Boolean equal(const void* key) const;
219            
220                virtual _BucketBase* clone() const;
221            
222                K& getKey() { return _key; }
223            
224                V& getValue() { return _value; }
225            
226            private:
227            
228                K _key;
229 mike  1.11     V _value;
230            };
231            
232            template<class K, class V, class E>
233            Boolean _Bucket<K, V, E>::equal(const void* key) const
234            {
235                return E::equal(*((K*)key), _key);
236            }
237            
238            template<class K, class V, class E>
239            _Bucket<K, V, E>::~_Bucket()
240            {
241            
242            }
243            
244            template<class K, class V, class E>
245            _BucketBase* _Bucket<K, V, E>::clone() const
246            {
247                return new _Bucket<K, V, E>(_key, _value);
248            }
249            
250 mike  1.11 /* Iterator for HashTable class. */
251            template<class K, class V, class E>
252            class _HashTableIterator : public _HashTableIteratorBase
253            {
254            public:
255            
256                _HashTableIterator() 
257            	: _HashTableIteratorBase() { }
258            
259                _HashTableIterator(_BucketBase** first, _BucketBase** last)
260            	: _HashTableIteratorBase(first, last) { }
261            
262                const K& key() const { return ((_Bucket<K, V, E>*)_bucket)->getKey(); }
263            
264                const V& value() const { return ((_Bucket<K, V, E>*)_bucket)->getValue(); }
265            };
266            
267            /** The HashTable class provides a simple hash table implementation which 
268                associates key-value pairs.
269            
270                This implementation minimizes template bloat considerably by factoring out
271 mike  1.11     most of the code into a common non-template class (see _HashTableRep).
272                The HashTable class is mostly a wrapper to add proper type semantics to the
273                use of its representation class.
274            
275                Hashing as always is O(1).
276            
277                HashTable uses the most popular hash table implementation which utilizes 
278                an array of pointers to bucket chains. This is organized as follows:
279            
280                    <pre>
281                       +---+ 
282                       |   |   +-----+-------+
283                     0 | ----->| key | value |
284                       |   |   +-----+-------+
285                       +---+
286                       |   |   +-----+-------+   +-----+-------+   +-----+-------+
287                     1 | ----->| key | value |-->| key | value |-->| key | value |
288                       |   |   +-----+-------+   +-----+-------+   +-----+-------+
289                       +---+
290                         .
291                         .
292 mike  1.11              .
293                       +---+
294                       |   |   +-----+-------+   +-----+-------+
295                    N-1| ----->| key | value |-->| key | value |
296                       |   |   +-----+-------+   +-----+-------+
297                       +---+
298                    </pre>
299            
300                To locate an item a hash function is applied to the key to produce an 
301                integer value. Then the modulo of that integer is taken with N to select 
302                a chain (as shown above). Then the chain is searched for a bucket whose
303                key value is the same as the target key.
304            
305                The number of chains default to DEFAULT_NUM_CHAINS but should be about
306                one-third the number of expected entries (so that the average chain
307                will be three long). Making the number of chains too large will waste
308                space causing the hash table to be very sparse. But for optimal efficiency,
309                one might set the number of chains to be the same as the expected number
310                of entries.
311            
312                This implementation does have NOT an adaptive growth algorithm yet which 
313 mike  1.11     would allow it to increase the number of chains periodically based on some
314                statistic (e.g., when the number of entries is more than three times the
315                number of chains; this would keep the average chain length below three).
316            
317                The following example shows how to instantiate a HashTable which associates
318                String keys with Uint32 values.
319            
320            	<pre>
321            	typedef HashTable&lt;String, Uint32&gt; HT;
322            	HT ht;
323            	</pre>
324            
325                Some of the template arguments are defaulted in the above example (the 
326                third and forth). The instantiation is explicitly qualified like this
327                (which by the way has exactly the same effect).
328            
329            	<pre>
330            	typedef HashTable&lt;String, Uint32, EqualFunc&lt;String&gt;, HashFunc&lt;String&gt;&gt; HT;
331            	</pre>
332            
333                The third and forth arguments are described more in detail later.
334 mike  1.11 
335                Then, entries may be inserted like this:
336            
337            	<pre>
338            	ht.insert("Red", 111);
339            	ht.insert("Green", 222);
340            	ht.insert("Blue", 222);
341            	</pre>
342            
343                And entries may be looked up as follows:
344            
345            	<pre>
346            	Uint32 value;
347            	ht.lookup("Red", value);
348            	</pre>
349            
350                And entries may be removed like this:
351            
352            	<pre>
353            	h.remove("Red");
354            	</pre>
355 mike  1.11 
356                Iteration is done like this:
357            
358            	<pre>
359            	for (HT::Iterator i = ht.start(); i; i++)
360            	{
361            	    // To access the key call i.key()!
362            	    // To access the value call i.value()!
363            	}
364            	</pre>
365            
366                Note that only forward iteration is supported (no backwards iteration).
367            
368                Equality of keys is determined using the EqualFunc class which is
369                the default third argument of the template argument list. A new function 
370                object may be defined and passed to modify the behavior (for example, one 
371                might define equality of strings to ignore whitespace). Here is how to 
372                define and use a new equality function object:
373            
374            	<pre>
375            	struct MyEqualFunc
376 mike  1.11 	{
377            	    static Boolean equal(const String& x, const String& y)
378            	    {
379            		// do something here to test for equality!
380            	    }
381            	};
382            
383            	...
384            
385            	EqualFunc&lt;String, Uint32, MyEqualFunc&gt; ht;
386            	</pre>
387            
388                When the lookup(), insert(), and remove() methods are called, the 
389                MyEqualFunc::equal() method will be used to determine equality.
390            
391                Hash functions are provided for common types (as part of the default
392                HashFunc class). For other types it is possible to define a custom function
393                object as follows:
394            
395            	<pre>
396            	struct MyHashFunc
397 mike  1.11 	{
398            	    static Uint32 hash(const String& x)
399            	    {
400            		// Do some hashing here!
401            	    }
402            	};
403            
404            	...
405            
406            	EqualFunc&lt;String, Uint32, MyEqualFunc, MyHashFunc&gt; ht;
407            	</pre>
408            
409                As always, the hash function should provide a reasonably uniform 
410                distrubtion so that all of the entries don't get crowded into a few
411                chains. Note that a hash function which returns zero, would force
412                the pathalogical case in which all entries are placed in the first
413                chain.
414            */
415            template<class K, class V, class E , class H >
416            class HashTable
417            {
418 mike  1.11 public:
419            
420                typedef _HashTableIterator<K, V, E> Iterator;
421            
422                /* By default, we create this many chains initially */
423                enum { DEFAULT_NUM_CHAINS = 32 };
424            
425                /** Constructor.
426            	@param numChains number of chains to create.
427                */
428                HashTable(Uint32 numChains = DEFAULT_NUM_CHAINS) : _rep(numChains)
429                {
430            
431                }
432            
433                /** Copy constructor. */
434                HashTable(const HashTable<K,V,E,H>& x) : _rep(x._rep)
435                {
436            
437                }
438            
439 mike  1.11     /** Assignment operator. */
440                HashTable<K,V,E,H>& operator=(const HashTable<K,V,E,H>& x)
441                {
442            	if (this != &x)
443            	    _rep = x._rep;
444            	return *this;
445                }
446            
447                /** Returns the size of this hash table (the number of entries). */
448                Uint32 size() const { return _rep.size(); }
449            
450                /** Clears the contents of this hash table. After this is called, the
451            	size() method returns zero.
452                */
453                void clear() { _rep.clear(); }
454            
455                /** Inserts new key-value pair into hash table.
456            	@param key key component.
457            	@param value value component.
458            	@return true on success; false if duplicate key.
459                */
460 mike  1.11     Boolean insert(const K& key, const V& value)
461                {
462            	return _rep.insert(
463            	    H::hash(key), new _Bucket<K, V, E>(key, value), &key);
464                }
465            
466                /** Checks to see if hash table contains an entry with the given key.
467            	@param key key to be searched for
468            	@return true if hash table contains an entry with the given key.
469                */
470 mike  1.12     Boolean contains(const K& key) const
471 mike  1.11     {
472            	V value;
473            	return lookup(key, value);
474                }
475            
476                /** Looks up the entry with the given key.
477            	@param key key of entry to be located.
478            	@param value output value.
479            	@return true if found; false otherwise.
480                */
481 mike  1.12     Boolean lookup(const K& key, V& value) const;
482 mike  1.11 
483                /** Removes the entry with the given key.
484            	@param key key of entry to be removed.
485            	@return true on success; false otherwise.
486                */
487                Boolean remove(const K& key)
488                {
489            	return _rep.remove(H::hash(key), &key);
490                }
491            
492                /** Obtains an iterator for this object. */
493                Iterator start() const 
494                {
495            	return Iterator(
496            	    _rep.getChains(), _rep.getChains() + _rep.getNumChains());
497                }
498            
499            private:
500            
501                _HashTableRep _rep;
502            };
503 mike  1.11 
504            template<class K, class V, class E, class H>
505 mike  1.12 inline Boolean HashTable<K, V, E, H>::lookup(const K& key, V& value) const
506 mike  1.11 {
507                _Bucket<K, V, E>* bucket 
508            	= (_Bucket<K, V, E>*)_rep.lookup(H::hash(key), &key);
509            
510                if (bucket)
511                {
512            	value = bucket->getValue();
513            	return true;
514                }
515            
516                return false;
517            }
518            
519            PEGASUS_NAMESPACE_END
520            
521            #endif /* Pegasus_HashTable_h */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2