(file) Return to HashTable.h CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 mike  1.11 //%/////////////////////////////////////////////////////////////////////////////
  2            //
  3            // Copyright (c) 2000, 2001 The Open group, BMC Software, Tivoli Systems, IBM
  4            //
  5            // Permission is hereby granted, free of charge, to any person obtaining a copy
  6            // of this software and associated documentation files (the "Software"), to 
  7            // deal in the Software without restriction, including without limitation the 
  8            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 
  9            // sell copies of the Software, and to permit persons to whom the Software is
 10            // furnished to do so, subject to the following conditions:
 11            // 
 12            // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN 
 13            // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 14            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 15            // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
 16            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
 17            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 
 18            // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 19            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 20            //
 21            //==============================================================================
 22 mike  1.11 //
 23            // Author: Mike Brasher (mbrasher@bmc.com)
 24            //
 25            // Modified By:
 26            //
 27            //%/////////////////////////////////////////////////////////////////////////////
 28            
 29            #ifndef Pegasus_HashTable_h
 30            #define Pegasus_HashTable_h
 31            
 32            #include <Pegasus/Common/Config.h>
 33            #include <Pegasus/Common/String.h>
 34            
 35            PEGASUS_NAMESPACE_BEGIN
 36            
 37            /*  This is the default hash function object used by the HashTable template.
 38                Specializations are provided for common types.
 39            */
 40            template<class K>
 41            struct HashFunc
 42            {
 43 mike  1.11 };
 44            
 45            PEGASUS_TEMPLATE_SPECIALIZATION struct PEGASUS_COMMON_LINKAGE HashFunc<String>
 46            {
 47                static Uint32 hash(const String& str);
 48            };
 49            
 50            PEGASUS_TEMPLATE_SPECIALIZATION struct HashFunc<Uint32>
 51            {
 52                static Uint32 hash(Uint32 x) { return x + 13; }
 53            };
 54            
 55            /*  This is a function object used by the HashTable to compare keys. This is
 56                the default implementation. Others may be defined and passed in the 
 57                template argument list to perform other kinds of comparisons.
 58            */
 59            template<class K>
 60            struct EqualFunc
 61            {
 62                static Boolean equal(const K& x, const K& y)
 63                {
 64 mike  1.11 	return x == y;
 65                }
 66            };
 67            
 68            /*  Representation for a bucket. The HashTable class derives from this
 69                bucket to append a key and value. This base class just defines
 70                the pointer to the next bucket in the chain.
 71            */
 72            class PEGASUS_COMMON_LINKAGE _BucketBase
 73            {
 74            public:
 75            
 76                /* Default constructor. */
 77                _BucketBase() : next(0) { }
 78            
 79                /* Virtual destructor to ensure destruction of derived class 
 80            	elements.
 81                */
 82                virtual ~_BucketBase();
 83            
 84                /* returns true if the key pointed to by the key argument is equal
 85 mike  1.11 	to the internal key of this bucket. This method must be overridden
 86            	by the derived class.
 87                */
 88                virtual Boolean equal(const void* key) const = 0;
 89            
 90                /* Clone this bucket. */
 91                virtual _BucketBase* clone() const = 0;
 92            
 93                _BucketBase* next;
 94            };
 95            
 96            class _HashTableRep;
 97            
 98            /* This class implements a simple hash table forward iterator. */
 99            class PEGASUS_COMMON_LINKAGE _HashTableIteratorBase
100            {
101            public:
102                
103                _HashTableIteratorBase() : _first(0), _last(0), _bucket(0) { }
104            
105                operator int() const { return _bucket != 0; }
106 mike  1.11 
107                _HashTableIteratorBase operator++(int);
108            
109                _HashTableIteratorBase& operator++();
110            
111                _HashTableIteratorBase(_BucketBase** first, _BucketBase** last);
112            
113            protected:
114            
115                _BucketBase** _first;
116                _BucketBase** _last;
117                _BucketBase* _bucket;
118                friend class _HashTableRep;
119            };
120            
121            // ATTN: reorganization not supported yet.
122            
123            /*- The _HashTableRep class is the representation class used by HashTable.
124            
125                This code is primarily an internal class used to implement the HashTable.
126                But there may be occasions to use it directly.
127 mike  1.11 
128                _HashTableRep parcels out much of the large code so that that code is not 
129                instantiated by the HashTable template class many times. This scheme helps 
130                reduce code bloat caused by templates. The HashTable template class below
131                acts as kind of a wrapper around this class.
132            
133                _HashTableRep is implemented as an array of pointers to chains of hash
134                buckets. The table initially allocates some number of chains (which can
135                be controlled by the constructor) and then may increase the number of
136                chains later (resulting in a reorganization of the hash table).
137            */
138            class PEGASUS_COMMON_LINKAGE _HashTableRep
139            {
140            public:
141            
142                /*- This constructor allocates an array of pointers to chains of buckets,
143            	which of course are all empty at this time. The numChains argument
144            	If the numChains argument is less than eight, then eight chains will
145            	be created.
146            	@param numChains specifies the initial number of chains.
147                */
148 mike  1.11     _HashTableRep(Uint32 numChains);
149            
150                /*- Copy constructor. */
151                _HashTableRep(const _HashTableRep& x);
152            
153                /*- Destructor. */
154                ~_HashTableRep();
155            
156                /*- Assignment operator. */
157                _HashTableRep& operator=(const _HashTableRep& x);
158            
159                /*- Returns the size of this hash table (the number of entries). */
160                Uint32 size() const { return _size; }
161            
162                /*- Clears the contents of this hash table. After this is called, the
163            	size() method returns zero.
164                */
165                void clear();
166            
167                /*- Inserts new key-value pair into hash table. Deletes the bucket on
168            	failure so caller need not.
169 mike  1.11 	@param hashCode hash code generated by caller's hash function.
170            	@param bucket bucket to be inserted.
171            	@param key pointer to key.
172            	@return true if insertion successful; false if duplicate key.
173                */
174                Boolean insert(Uint32 hashCode, _BucketBase* bucket, const void* key);
175            
176                /*- Finds the bucket with the given key. This method uses the
177            	_BucketBase::equal() method to compare keys.
178            	@param hashCode hash code generated by caller's hash function.
179            	@param key void pointer to key.
180            	@return pointer to bucket with that key or zero otherwise.
181                */
182                const _BucketBase* lookup(Uint32 hashCode, const void* key);
183            
184                /*- Removes the bucket with the given key. This method uses the
185            	_BucketBase::equal() method to compare keys.
186            	@param hashCode hash code generated by caller's hash function.
187            	@param key void pointer to key.
188            	@return true if entry found and removed and false otherwise.
189                */
190 mike  1.11     Boolean remove(Uint32 hashCode, const void* key);
191            
192                _BucketBase** getChains() const { return _chains; }
193            
194                Uint32 getNumChains() const { return _numChains; }
195            
196            protected:
197            
198                Uint32 _size;
199                Uint32 _numChains;
200                _BucketBase** _chains;
201            };
202            
203            /* The _Bucket class is used to implement the HashTable class.
204            */
205            template<class K, class V, class E>
206            class _Bucket : public _BucketBase
207            {
208            public:
209            
210                _Bucket(const K& key, const V& value) : _key(key), _value(value) { }
211 mike  1.11 
212                virtual ~_Bucket();
213            
214                virtual Boolean equal(const void* key) const;
215            
216                virtual _BucketBase* clone() const;
217            
218                K& getKey() { return _key; }
219            
220                V& getValue() { return _value; }
221            
222            private:
223            
224                K _key;
225                V _value;
226            };
227            
228            template<class K, class V, class E>
229            Boolean _Bucket<K, V, E>::equal(const void* key) const
230            {
231                return E::equal(*((K*)key), _key);
232 mike  1.11 }
233            
234            template<class K, class V, class E>
235            _Bucket<K, V, E>::~_Bucket()
236            {
237            
238            }
239            
240            template<class K, class V, class E>
241            _BucketBase* _Bucket<K, V, E>::clone() const
242            {
243                return new _Bucket<K, V, E>(_key, _value);
244            }
245            
246            /* Iterator for HashTable class. */
247            template<class K, class V, class E>
248            class _HashTableIterator : public _HashTableIteratorBase
249            {
250            public:
251            
252                _HashTableIterator() 
253 mike  1.11 	: _HashTableIteratorBase() { }
254            
255                _HashTableIterator(_BucketBase** first, _BucketBase** last)
256            	: _HashTableIteratorBase(first, last) { }
257            
258                const K& key() const { return ((_Bucket<K, V, E>*)_bucket)->getKey(); }
259            
260                const V& value() const { return ((_Bucket<K, V, E>*)_bucket)->getValue(); }
261            };
262            
263            /** The HashTable class provides a simple hash table implementation which 
264                associates key-value pairs.
265            
266                This implementation minimizes template bloat considerably by factoring out
267                most of the code into a common non-template class (see _HashTableRep).
268                The HashTable class is mostly a wrapper to add proper type semantics to the
269                use of its representation class.
270            
271                Hashing as always is O(1).
272            
273                HashTable uses the most popular hash table implementation which utilizes 
274 mike  1.11     an array of pointers to bucket chains. This is organized as follows:
275            
276                    <pre>
277                       +---+ 
278                       |   |   +-----+-------+
279                     0 | ----->| key | value |
280                       |   |   +-----+-------+
281                       +---+
282                       |   |   +-----+-------+   +-----+-------+   +-----+-------+
283                     1 | ----->| key | value |-->| key | value |-->| key | value |
284                       |   |   +-----+-------+   +-----+-------+   +-----+-------+
285                       +---+
286                         .
287                         .
288                         .
289                       +---+
290                       |   |   +-----+-------+   +-----+-------+
291                    N-1| ----->| key | value |-->| key | value |
292                       |   |   +-----+-------+   +-----+-------+
293                       +---+
294                    </pre>
295 mike  1.11 
296                To locate an item a hash function is applied to the key to produce an 
297                integer value. Then the modulo of that integer is taken with N to select 
298                a chain (as shown above). Then the chain is searched for a bucket whose
299                key value is the same as the target key.
300            
301                The number of chains default to DEFAULT_NUM_CHAINS but should be about
302                one-third the number of expected entries (so that the average chain
303                will be three long). Making the number of chains too large will waste
304                space causing the hash table to be very sparse. But for optimal efficiency,
305                one might set the number of chains to be the same as the expected number
306                of entries.
307            
308                This implementation does have NOT an adaptive growth algorithm yet which 
309                would allow it to increase the number of chains periodically based on some
310                statistic (e.g., when the number of entries is more than three times the
311                number of chains; this would keep the average chain length below three).
312            
313                The following example shows how to instantiate a HashTable which associates
314                String keys with Uint32 values.
315            
316 mike  1.11 	<pre>
317            	typedef HashTable&lt;String, Uint32&gt; HT;
318            	HT ht;
319            	</pre>
320            
321                Some of the template arguments are defaulted in the above example (the 
322                third and forth). The instantiation is explicitly qualified like this
323                (which by the way has exactly the same effect).
324            
325            	<pre>
326            	typedef HashTable&lt;String, Uint32, EqualFunc&lt;String&gt;, HashFunc&lt;String&gt;&gt; HT;
327            	</pre>
328            
329                The third and forth arguments are described more in detail later.
330            
331                Then, entries may be inserted like this:
332            
333            	<pre>
334            	ht.insert("Red", 111);
335            	ht.insert("Green", 222);
336            	ht.insert("Blue", 222);
337 mike  1.11 	</pre>
338            
339                And entries may be looked up as follows:
340            
341            	<pre>
342            	Uint32 value;
343            	ht.lookup("Red", value);
344            	</pre>
345            
346                And entries may be removed like this:
347            
348            	<pre>
349            	h.remove("Red");
350            	</pre>
351            
352                Iteration is done like this:
353            
354            	<pre>
355            	for (HT::Iterator i = ht.start(); i; i++)
356            	{
357            	    // To access the key call i.key()!
358 mike  1.11 	    // To access the value call i.value()!
359            	}
360            	</pre>
361            
362                Note that only forward iteration is supported (no backwards iteration).
363            
364                Equality of keys is determined using the EqualFunc class which is
365                the default third argument of the template argument list. A new function 
366                object may be defined and passed to modify the behavior (for example, one 
367                might define equality of strings to ignore whitespace). Here is how to 
368                define and use a new equality function object:
369            
370            	<pre>
371            	struct MyEqualFunc
372            	{
373            	    static Boolean equal(const String& x, const String& y)
374            	    {
375            		// do something here to test for equality!
376            	    }
377            	};
378            
379 mike  1.11 	...
380            
381            	EqualFunc&lt;String, Uint32, MyEqualFunc&gt; ht;
382            	</pre>
383            
384                When the lookup(), insert(), and remove() methods are called, the 
385                MyEqualFunc::equal() method will be used to determine equality.
386            
387                Hash functions are provided for common types (as part of the default
388                HashFunc class). For other types it is possible to define a custom function
389                object as follows:
390            
391            	<pre>
392            	struct MyHashFunc
393            	{
394            	    static Uint32 hash(const String& x)
395            	    {
396            		// Do some hashing here!
397            	    }
398            	};
399            
400 mike  1.11 	...
401            
402            	EqualFunc&lt;String, Uint32, MyEqualFunc, MyHashFunc&gt; ht;
403            	</pre>
404            
405                As always, the hash function should provide a reasonably uniform 
406                distrubtion so that all of the entries don't get crowded into a few
407                chains. Note that a hash function which returns zero, would force
408                the pathalogical case in which all entries are placed in the first
409                chain.
410            */
411            template<class K, class V, class E , class H >
412            class HashTable
413            {
414            public:
415            
416                typedef _HashTableIterator<K, V, E> Iterator;
417            
418                /* By default, we create this many chains initially */
419                enum { DEFAULT_NUM_CHAINS = 32 };
420            
421 mike  1.11     /** Constructor.
422            	@param numChains number of chains to create.
423                */
424                HashTable(Uint32 numChains = DEFAULT_NUM_CHAINS) : _rep(numChains)
425                {
426            
427                }
428            
429                /** Copy constructor. */
430                HashTable(const HashTable<K,V,E,H>& x) : _rep(x._rep)
431                {
432            
433                }
434            
435                /** Assignment operator. */
436                HashTable<K,V,E,H>& operator=(const HashTable<K,V,E,H>& x)
437                {
438            	if (this != &x)
439            	    _rep = x._rep;
440            	return *this;
441                }
442 mike  1.11 
443                /** Returns the size of this hash table (the number of entries). */
444                Uint32 size() const { return _rep.size(); }
445            
446                /** Clears the contents of this hash table. After this is called, the
447            	size() method returns zero.
448                */
449                void clear() { _rep.clear(); }
450            
451                /** Inserts new key-value pair into hash table.
452            	@param key key component.
453            	@param value value component.
454            	@return true on success; false if duplicate key.
455                */
456                Boolean insert(const K& key, const V& value)
457                {
458            	return _rep.insert(
459            	    H::hash(key), new _Bucket<K, V, E>(key, value), &key);
460                }
461            
462                /** Checks to see if hash table contains an entry with the given key.
463 mike  1.11 	@param key key to be searched for
464            	@return true if hash table contains an entry with the given key.
465                */
466                Boolean contains(const K& key)
467                {
468            	V value;
469            	return lookup(key, value);
470                }
471            
472                /** Looks up the entry with the given key.
473            	@param key key of entry to be located.
474            	@param value output value.
475            	@return true if found; false otherwise.
476                */
477                Boolean lookup(const K& key, V& value);
478            
479                /** Removes the entry with the given key.
480            	@param key key of entry to be removed.
481            	@return true on success; false otherwise.
482                */
483                Boolean remove(const K& key)
484 mike  1.11     {
485            	return _rep.remove(H::hash(key), &key);
486                }
487            
488                /** Obtains an iterator for this object. */
489                Iterator start() const 
490                {
491            	return Iterator(
492            	    _rep.getChains(), _rep.getChains() + _rep.getNumChains());
493                }
494            
495            private:
496            
497                _HashTableRep _rep;
498            };
499            
500            template<class K, class V, class E, class H>
501            inline Boolean HashTable<K, V, E, H>::lookup(const K& key, V& value)
502            {
503                _Bucket<K, V, E>* bucket 
504            	= (_Bucket<K, V, E>*)_rep.lookup(H::hash(key), &key);
505 mike  1.11 
506                if (bucket)
507                {
508            	value = bucket->getValue();
509            	return true;
510                }
511            
512                return false;
513            }
514            
515            PEGASUS_NAMESPACE_END
516            
517            #endif /* Pegasus_HashTable_h */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2