(file) Return to HashTable.h CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 mike  1.11 //%/////////////////////////////////////////////////////////////////////////////
  2            //
  3 kumpf 1.13 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM,
  4            // The Open Group, Tivoli Systems
  5 mike  1.11 //
  6            // Permission is hereby granted, free of charge, to any person obtaining a copy
  7 kumpf 1.13 // of this software and associated documentation files (the "Software"), to
  8            // deal in the Software without restriction, including without limitation the
  9            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 10 mike  1.11 // sell copies of the Software, and to permit persons to whom the Software is
 11            // furnished to do so, subject to the following conditions:
 12            // 
 13 kumpf 1.13 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 14 mike  1.11 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 15            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 16 kumpf 1.13 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 17            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 18            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 19 mike  1.11 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 20            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21            //
 22            //==============================================================================
 23            //
 24            // Author: Mike Brasher (mbrasher@bmc.com)
 25            //
 26            // Modified By:
 27            //
 28            //%/////////////////////////////////////////////////////////////////////////////
 29            
 30            #ifndef Pegasus_HashTable_h
 31            #define Pegasus_HashTable_h
 32            
 33            #include <Pegasus/Common/Config.h>
 34            #include <Pegasus/Common/String.h>
 35            
 36            PEGASUS_NAMESPACE_BEGIN
 37            
 38            /*  This is the default hash function object used by the HashTable template.
 39                Specializations are provided for common types.
 40 mike  1.11 */
 41            template<class K>
 42            struct HashFunc
 43            {
 44            };
 45            
 46            PEGASUS_TEMPLATE_SPECIALIZATION struct PEGASUS_COMMON_LINKAGE HashFunc<String>
 47            {
 48                static Uint32 hash(const String& str);
 49            };
 50            
 51            PEGASUS_TEMPLATE_SPECIALIZATION struct HashFunc<Uint32>
 52            {
 53                static Uint32 hash(Uint32 x) { return x + 13; }
 54            };
 55            
 56            /*  This is a function object used by the HashTable to compare keys. This is
 57                the default implementation. Others may be defined and passed in the 
 58                template argument list to perform other kinds of comparisons.
 59            */
 60            template<class K>
 61 mike  1.11 struct EqualFunc
 62            {
 63                static Boolean equal(const K& x, const K& y)
 64                {
 65            	return x == y;
 66                }
 67            };
 68            
 69            /*  Representation for a bucket. The HashTable class derives from this
 70                bucket to append a key and value. This base class just defines
 71                the pointer to the next bucket in the chain.
 72            */
 73            class PEGASUS_COMMON_LINKAGE _BucketBase
 74            {
 75            public:
 76            
 77                /* Default constructor. */
 78                _BucketBase() : next(0) { }
 79            
 80                /* Virtual destructor to ensure destruction of derived class 
 81            	elements.
 82 mike  1.11     */
 83                virtual ~_BucketBase();
 84            
 85                /* returns true if the key pointed to by the key argument is equal
 86            	to the internal key of this bucket. This method must be overridden
 87            	by the derived class.
 88                */
 89                virtual Boolean equal(const void* key) const = 0;
 90            
 91                /* Clone this bucket. */
 92                virtual _BucketBase* clone() const = 0;
 93            
 94                _BucketBase* next;
 95            };
 96            
 97            class _HashTableRep;
 98            
 99            /* This class implements a simple hash table forward iterator. */
100            class PEGASUS_COMMON_LINKAGE _HashTableIteratorBase
101            {
102            public:
103 mike  1.11     
104                _HashTableIteratorBase() : _first(0), _last(0), _bucket(0) { }
105            
106                operator int() const { return _bucket != 0; }
107            
108                _HashTableIteratorBase operator++(int);
109            
110                _HashTableIteratorBase& operator++();
111            
112                _HashTableIteratorBase(_BucketBase** first, _BucketBase** last);
113            
114            protected:
115            
116                _BucketBase** _first;
117                _BucketBase** _last;
118                _BucketBase* _bucket;
119                friend class _HashTableRep;
120            };
121            
122            // ATTN: reorganization not supported yet.
123            
124 mike  1.11 /*- The _HashTableRep class is the representation class used by HashTable.
125            
126                This code is primarily an internal class used to implement the HashTable.
127                But there may be occasions to use it directly.
128            
129                _HashTableRep parcels out much of the large code so that that code is not 
130                instantiated by the HashTable template class many times. This scheme helps 
131                reduce code bloat caused by templates. The HashTable template class below
132                acts as kind of a wrapper around this class.
133            
134                _HashTableRep is implemented as an array of pointers to chains of hash
135                buckets. The table initially allocates some number of chains (which can
136                be controlled by the constructor) and then may increase the number of
137                chains later (resulting in a reorganization of the hash table).
138            */
139            class PEGASUS_COMMON_LINKAGE _HashTableRep
140            {
141            public:
142            
143                /*- This constructor allocates an array of pointers to chains of buckets,
144            	which of course are all empty at this time. The numChains argument
145 mike  1.11 	If the numChains argument is less than eight, then eight chains will
146            	be created.
147            	@param numChains specifies the initial number of chains.
148                */
149                _HashTableRep(Uint32 numChains);
150            
151                /*- Copy constructor. */
152                _HashTableRep(const _HashTableRep& x);
153            
154                /*- Destructor. */
155                ~_HashTableRep();
156            
157                /*- Assignment operator. */
158                _HashTableRep& operator=(const _HashTableRep& x);
159            
160                /*- Returns the size of this hash table (the number of entries). */
161                Uint32 size() const { return _size; }
162            
163                /*- Clears the contents of this hash table. After this is called, the
164            	size() method returns zero.
165                */
166 mike  1.11     void clear();
167            
168                /*- Inserts new key-value pair into hash table. Deletes the bucket on
169            	failure so caller need not.
170            	@param hashCode hash code generated by caller's hash function.
171            	@param bucket bucket to be inserted.
172            	@param key pointer to key.
173            	@return true if insertion successful; false if duplicate key.
174                */
175                Boolean insert(Uint32 hashCode, _BucketBase* bucket, const void* key);
176            
177                /*- Finds the bucket with the given key. This method uses the
178            	_BucketBase::equal() method to compare keys.
179            	@param hashCode hash code generated by caller's hash function.
180            	@param key void pointer to key.
181            	@return pointer to bucket with that key or zero otherwise.
182                */
183 mike  1.12     const _BucketBase* lookup(Uint32 hashCode, const void* key) const;
184 mike  1.11 
185                /*- Removes the bucket with the given key. This method uses the
186            	_BucketBase::equal() method to compare keys.
187            	@param hashCode hash code generated by caller's hash function.
188            	@param key void pointer to key.
189            	@return true if entry found and removed and false otherwise.
190                */
191                Boolean remove(Uint32 hashCode, const void* key);
192            
193                _BucketBase** getChains() const { return _chains; }
194            
195                Uint32 getNumChains() const { return _numChains; }
196            
197            protected:
198            
199                Uint32 _size;
200                Uint32 _numChains;
201                _BucketBase** _chains;
202            };
203            
204            /* The _Bucket class is used to implement the HashTable class.
205 mike  1.11 */
206            template<class K, class V, class E>
207            class _Bucket : public _BucketBase
208            {
209            public:
210            
211                _Bucket(const K& key, const V& value) : _key(key), _value(value) { }
212            
213                virtual ~_Bucket();
214            
215                virtual Boolean equal(const void* key) const;
216            
217                virtual _BucketBase* clone() const;
218            
219                K& getKey() { return _key; }
220            
221                V& getValue() { return _value; }
222            
223            private:
224            
225                K _key;
226 mike  1.11     V _value;
227            };
228            
229            template<class K, class V, class E>
230            Boolean _Bucket<K, V, E>::equal(const void* key) const
231            {
232                return E::equal(*((K*)key), _key);
233            }
234            
235            template<class K, class V, class E>
236            _Bucket<K, V, E>::~_Bucket()
237            {
238            
239            }
240            
241            template<class K, class V, class E>
242            _BucketBase* _Bucket<K, V, E>::clone() const
243            {
244                return new _Bucket<K, V, E>(_key, _value);
245            }
246            
247 mike  1.11 /* Iterator for HashTable class. */
248            template<class K, class V, class E>
249            class _HashTableIterator : public _HashTableIteratorBase
250            {
251            public:
252            
253                _HashTableIterator() 
254            	: _HashTableIteratorBase() { }
255            
256                _HashTableIterator(_BucketBase** first, _BucketBase** last)
257            	: _HashTableIteratorBase(first, last) { }
258            
259                const K& key() const { return ((_Bucket<K, V, E>*)_bucket)->getKey(); }
260            
261                const V& value() const { return ((_Bucket<K, V, E>*)_bucket)->getValue(); }
262            };
263            
264            /** The HashTable class provides a simple hash table implementation which 
265                associates key-value pairs.
266            
267                This implementation minimizes template bloat considerably by factoring out
268 mike  1.11     most of the code into a common non-template class (see _HashTableRep).
269                The HashTable class is mostly a wrapper to add proper type semantics to the
270                use of its representation class.
271            
272                Hashing as always is O(1).
273            
274                HashTable uses the most popular hash table implementation which utilizes 
275                an array of pointers to bucket chains. This is organized as follows:
276            
277                    <pre>
278                       +---+ 
279                       |   |   +-----+-------+
280                     0 | ----->| key | value |
281                       |   |   +-----+-------+
282                       +---+
283                       |   |   +-----+-------+   +-----+-------+   +-----+-------+
284                     1 | ----->| key | value |-->| key | value |-->| key | value |
285                       |   |   +-----+-------+   +-----+-------+   +-----+-------+
286                       +---+
287                         .
288                         .
289 mike  1.11              .
290                       +---+
291                       |   |   +-----+-------+   +-----+-------+
292                    N-1| ----->| key | value |-->| key | value |
293                       |   |   +-----+-------+   +-----+-------+
294                       +---+
295                    </pre>
296            
297                To locate an item a hash function is applied to the key to produce an 
298                integer value. Then the modulo of that integer is taken with N to select 
299                a chain (as shown above). Then the chain is searched for a bucket whose
300                key value is the same as the target key.
301            
302                The number of chains default to DEFAULT_NUM_CHAINS but should be about
303                one-third the number of expected entries (so that the average chain
304                will be three long). Making the number of chains too large will waste
305                space causing the hash table to be very sparse. But for optimal efficiency,
306                one might set the number of chains to be the same as the expected number
307                of entries.
308            
309                This implementation does have NOT an adaptive growth algorithm yet which 
310 mike  1.11     would allow it to increase the number of chains periodically based on some
311                statistic (e.g., when the number of entries is more than three times the
312                number of chains; this would keep the average chain length below three).
313            
314                The following example shows how to instantiate a HashTable which associates
315                String keys with Uint32 values.
316            
317            	<pre>
318            	typedef HashTable&lt;String, Uint32&gt; HT;
319            	HT ht;
320            	</pre>
321            
322                Some of the template arguments are defaulted in the above example (the 
323                third and forth). The instantiation is explicitly qualified like this
324                (which by the way has exactly the same effect).
325            
326            	<pre>
327            	typedef HashTable&lt;String, Uint32, EqualFunc&lt;String&gt;, HashFunc&lt;String&gt;&gt; HT;
328            	</pre>
329            
330                The third and forth arguments are described more in detail later.
331 mike  1.11 
332                Then, entries may be inserted like this:
333            
334            	<pre>
335            	ht.insert("Red", 111);
336            	ht.insert("Green", 222);
337            	ht.insert("Blue", 222);
338            	</pre>
339            
340                And entries may be looked up as follows:
341            
342            	<pre>
343            	Uint32 value;
344            	ht.lookup("Red", value);
345            	</pre>
346            
347                And entries may be removed like this:
348            
349            	<pre>
350            	h.remove("Red");
351            	</pre>
352 mike  1.11 
353                Iteration is done like this:
354            
355            	<pre>
356            	for (HT::Iterator i = ht.start(); i; i++)
357            	{
358            	    // To access the key call i.key()!
359            	    // To access the value call i.value()!
360            	}
361            	</pre>
362            
363                Note that only forward iteration is supported (no backwards iteration).
364            
365                Equality of keys is determined using the EqualFunc class which is
366                the default third argument of the template argument list. A new function 
367                object may be defined and passed to modify the behavior (for example, one 
368                might define equality of strings to ignore whitespace). Here is how to 
369                define and use a new equality function object:
370            
371            	<pre>
372            	struct MyEqualFunc
373 mike  1.11 	{
374            	    static Boolean equal(const String& x, const String& y)
375            	    {
376            		// do something here to test for equality!
377            	    }
378            	};
379            
380            	...
381            
382            	EqualFunc&lt;String, Uint32, MyEqualFunc&gt; ht;
383            	</pre>
384            
385                When the lookup(), insert(), and remove() methods are called, the 
386                MyEqualFunc::equal() method will be used to determine equality.
387            
388                Hash functions are provided for common types (as part of the default
389                HashFunc class). For other types it is possible to define a custom function
390                object as follows:
391            
392            	<pre>
393            	struct MyHashFunc
394 mike  1.11 	{
395            	    static Uint32 hash(const String& x)
396            	    {
397            		// Do some hashing here!
398            	    }
399            	};
400            
401            	...
402            
403            	EqualFunc&lt;String, Uint32, MyEqualFunc, MyHashFunc&gt; ht;
404            	</pre>
405            
406                As always, the hash function should provide a reasonably uniform 
407                distrubtion so that all of the entries don't get crowded into a few
408                chains. Note that a hash function which returns zero, would force
409                the pathalogical case in which all entries are placed in the first
410                chain.
411            */
412            template<class K, class V, class E , class H >
413            class HashTable
414            {
415 mike  1.11 public:
416            
417                typedef _HashTableIterator<K, V, E> Iterator;
418            
419                /* By default, we create this many chains initially */
420                enum { DEFAULT_NUM_CHAINS = 32 };
421            
422                /** Constructor.
423            	@param numChains number of chains to create.
424                */
425                HashTable(Uint32 numChains = DEFAULT_NUM_CHAINS) : _rep(numChains)
426                {
427            
428                }
429            
430                /** Copy constructor. */
431                HashTable(const HashTable<K,V,E,H>& x) : _rep(x._rep)
432                {
433            
434                }
435            
436 mike  1.11     /** Assignment operator. */
437                HashTable<K,V,E,H>& operator=(const HashTable<K,V,E,H>& x)
438                {
439            	if (this != &x)
440            	    _rep = x._rep;
441            	return *this;
442                }
443            
444                /** Returns the size of this hash table (the number of entries). */
445                Uint32 size() const { return _rep.size(); }
446            
447                /** Clears the contents of this hash table. After this is called, the
448            	size() method returns zero.
449                */
450                void clear() { _rep.clear(); }
451            
452                /** Inserts new key-value pair into hash table.
453            	@param key key component.
454            	@param value value component.
455            	@return true on success; false if duplicate key.
456                */
457 mike  1.11     Boolean insert(const K& key, const V& value)
458                {
459            	return _rep.insert(
460            	    H::hash(key), new _Bucket<K, V, E>(key, value), &key);
461                }
462            
463                /** Checks to see if hash table contains an entry with the given key.
464            	@param key key to be searched for
465            	@return true if hash table contains an entry with the given key.
466                */
467 mike  1.12     Boolean contains(const K& key) const
468 mike  1.11     {
469            	V value;
470            	return lookup(key, value);
471                }
472            
473                /** Looks up the entry with the given key.
474            	@param key key of entry to be located.
475            	@param value output value.
476            	@return true if found; false otherwise.
477                */
478 mike  1.12     Boolean lookup(const K& key, V& value) const;
479 mike  1.11 
480                /** Removes the entry with the given key.
481            	@param key key of entry to be removed.
482            	@return true on success; false otherwise.
483                */
484                Boolean remove(const K& key)
485                {
486            	return _rep.remove(H::hash(key), &key);
487                }
488            
489                /** Obtains an iterator for this object. */
490                Iterator start() const 
491                {
492            	return Iterator(
493            	    _rep.getChains(), _rep.getChains() + _rep.getNumChains());
494                }
495            
496            private:
497            
498                _HashTableRep _rep;
499            };
500 mike  1.11 
501            template<class K, class V, class E, class H>
502 mike  1.12 inline Boolean HashTable<K, V, E, H>::lookup(const K& key, V& value) const
503 mike  1.11 {
504                _Bucket<K, V, E>* bucket 
505            	= (_Bucket<K, V, E>*)_rep.lookup(H::hash(key), &key);
506            
507                if (bucket)
508                {
509            	value = bucket->getValue();
510            	return true;
511                }
512            
513                return false;
514            }
515            
516            PEGASUS_NAMESPACE_END
517            
518            #endif /* Pegasus_HashTable_h */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2