(file) Return to HashTable.h CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 mike  1.11 //%/////////////////////////////////////////////////////////////////////////////
  2            //
  3            // Copyright (c) 2000, 2001 The Open group, BMC Software, Tivoli Systems, IBM
  4            //
  5            // Permission is hereby granted, free of charge, to any person obtaining a copy
  6            // of this software and associated documentation files (the "Software"), to 
  7            // deal in the Software without restriction, including without limitation the 
  8            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 
  9            // sell copies of the Software, and to permit persons to whom the Software is
 10            // furnished to do so, subject to the following conditions:
 11            // 
 12            // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN 
 13            // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 14            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 15            // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 
 16            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
 17            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 
 18            // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 19            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 20            //
 21            //==============================================================================
 22 mike  1.11 //
 23            // Author: Mike Brasher (mbrasher@bmc.com)
 24            //
 25            // Modified By:
 26            //
 27            //%/////////////////////////////////////////////////////////////////////////////
 28            
 29            #ifndef Pegasus_HashTable_h
 30            #define Pegasus_HashTable_h
 31            
 32            #include <Pegasus/Common/Config.h>
 33            #include <Pegasus/Common/String.h>
 34            
 35            PEGASUS_NAMESPACE_BEGIN
 36            
 37            /*  This is the default hash function object used by the HashTable template.
 38                Specializations are provided for common types.
 39            */
 40            template<class K>
 41            struct HashFunc
 42            {
 43 mike  1.11 };
 44            
 45            PEGASUS_TEMPLATE_SPECIALIZATION struct PEGASUS_COMMON_LINKAGE HashFunc<String>
 46            {
 47                static Uint32 hash(const String& str);
 48            };
 49            
 50            PEGASUS_TEMPLATE_SPECIALIZATION struct HashFunc<Uint32>
 51            {
 52                static Uint32 hash(Uint32 x) { return x + 13; }
 53            };
 54            
 55            /*  This is a function object used by the HashTable to compare keys. This is
 56                the default implementation. Others may be defined and passed in the 
 57                template argument list to perform other kinds of comparisons.
 58            */
 59            template<class K>
 60            struct EqualFunc
 61            {
 62                static Boolean equal(const K& x, const K& y)
 63                {
 64 mike  1.11 	return x == y;
 65                }
 66            };
 67            
 68            /*  Representation for a bucket. The HashTable class derives from this
 69                bucket to append a key and value. This base class just defines
 70                the pointer to the next bucket in the chain.
 71            */
 72            class PEGASUS_COMMON_LINKAGE _BucketBase
 73            {
 74            public:
 75            
 76                /* Default constructor. */
 77                _BucketBase() : next(0) { }
 78            
 79                /* Virtual destructor to ensure destruction of derived class 
 80            	elements.
 81                */
 82                virtual ~_BucketBase();
 83            
 84                /* returns true if the key pointed to by the key argument is equal
 85 mike  1.11 	to the internal key of this bucket. This method must be overridden
 86            	by the derived class.
 87                */
 88                virtual Boolean equal(const void* key) const = 0;
 89            
 90                /* Clone this bucket. */
 91                virtual _BucketBase* clone() const = 0;
 92            
 93                _BucketBase* next;
 94            };
 95            
 96            class _HashTableRep;
 97            
 98            /* This class implements a simple hash table forward iterator. */
 99            class PEGASUS_COMMON_LINKAGE _HashTableIteratorBase
100            {
101            public:
102                
103                _HashTableIteratorBase() : _first(0), _last(0), _bucket(0) { }
104            
105                operator int() const { return _bucket != 0; }
106 mike  1.11 
107                _HashTableIteratorBase operator++(int);
108            
109                _HashTableIteratorBase& operator++();
110            
111                _HashTableIteratorBase(_BucketBase** first, _BucketBase** last);
112            
113            protected:
114            
115                _BucketBase** _first;
116                _BucketBase** _last;
117                _BucketBase* _bucket;
118                friend class _HashTableRep;
119            };
120            
121            // ATTN: reorganization not supported yet.
122            
123            /*- The _HashTableRep class is the representation class used by HashTable.
124            
125                This code is primarily an internal class used to implement the HashTable.
126                But there may be occasions to use it directly.
127 mike  1.11 
128                _HashTableRep parcels out much of the large code so that that code is not 
129                instantiated by the HashTable template class many times. This scheme helps 
130                reduce code bloat caused by templates. The HashTable template class below
131                acts as kind of a wrapper around this class.
132            
133                _HashTableRep is implemented as an array of pointers to chains of hash
134                buckets. The table initially allocates some number of chains (which can
135                be controlled by the constructor) and then may increase the number of
136                chains later (resulting in a reorganization of the hash table).
137            */
138            class PEGASUS_COMMON_LINKAGE _HashTableRep
139            {
140            public:
141            
142                /*- This constructor allocates an array of pointers to chains of buckets,
143            	which of course are all empty at this time. The numChains argument
144            	If the numChains argument is less than eight, then eight chains will
145            	be created.
146            	@param numChains specifies the initial number of chains.
147                */
148 mike  1.11     _HashTableRep(Uint32 numChains);
149            
150                /*- Copy constructor. */
151                _HashTableRep(const _HashTableRep& x);
152            
153                /*- Destructor. */
154                ~_HashTableRep();
155            
156                /*- Assignment operator. */
157                _HashTableRep& operator=(const _HashTableRep& x);
158            
159                /*- Returns the size of this hash table (the number of entries). */
160                Uint32 size() const { return _size; }
161            
162                /*- Clears the contents of this hash table. After this is called, the
163            	size() method returns zero.
164                */
165                void clear();
166            
167                /*- Inserts new key-value pair into hash table. Deletes the bucket on
168            	failure so caller need not.
169 mike  1.11 	@param hashCode hash code generated by caller's hash function.
170            	@param bucket bucket to be inserted.
171            	@param key pointer to key.
172            	@return true if insertion successful; false if duplicate key.
173                */
174                Boolean insert(Uint32 hashCode, _BucketBase* bucket, const void* key);
175            
176                /*- Finds the bucket with the given key. This method uses the
177            	_BucketBase::equal() method to compare keys.
178            	@param hashCode hash code generated by caller's hash function.
179            	@param key void pointer to key.
180            	@return pointer to bucket with that key or zero otherwise.
181                */
182 mike  1.11.2.1     const _BucketBase* lookup(Uint32 hashCode, const void* key) const;
183 mike  1.11     
184                    /*- Removes the bucket with the given key. This method uses the
185                	_BucketBase::equal() method to compare keys.
186                	@param hashCode hash code generated by caller's hash function.
187                	@param key void pointer to key.
188                	@return true if entry found and removed and false otherwise.
189                    */
190                    Boolean remove(Uint32 hashCode, const void* key);
191                
192                    _BucketBase** getChains() const { return _chains; }
193                
194                    Uint32 getNumChains() const { return _numChains; }
195                
196                protected:
197                
198                    Uint32 _size;
199                    Uint32 _numChains;
200                    _BucketBase** _chains;
201                };
202                
203                /* The _Bucket class is used to implement the HashTable class.
204 mike  1.11     */
205                template<class K, class V, class E>
206                class _Bucket : public _BucketBase
207                {
208                public:
209                
210                    _Bucket(const K& key, const V& value) : _key(key), _value(value) { }
211                
212                    virtual ~_Bucket();
213                
214                    virtual Boolean equal(const void* key) const;
215                
216                    virtual _BucketBase* clone() const;
217                
218                    K& getKey() { return _key; }
219                
220                    V& getValue() { return _value; }
221                
222                private:
223                
224                    K _key;
225 mike  1.11         V _value;
226                };
227                
228                template<class K, class V, class E>
229                Boolean _Bucket<K, V, E>::equal(const void* key) const
230                {
231                    return E::equal(*((K*)key), _key);
232                }
233                
234                template<class K, class V, class E>
235                _Bucket<K, V, E>::~_Bucket()
236                {
237                
238                }
239                
240                template<class K, class V, class E>
241                _BucketBase* _Bucket<K, V, E>::clone() const
242                {
243                    return new _Bucket<K, V, E>(_key, _value);
244                }
245                
246 mike  1.11     /* Iterator for HashTable class. */
247                template<class K, class V, class E>
248                class _HashTableIterator : public _HashTableIteratorBase
249                {
250                public:
251                
252                    _HashTableIterator() 
253                	: _HashTableIteratorBase() { }
254                
255                    _HashTableIterator(_BucketBase** first, _BucketBase** last)
256                	: _HashTableIteratorBase(first, last) { }
257                
258                    const K& key() const { return ((_Bucket<K, V, E>*)_bucket)->getKey(); }
259                
260                    const V& value() const { return ((_Bucket<K, V, E>*)_bucket)->getValue(); }
261                };
262                
263                /** The HashTable class provides a simple hash table implementation which 
264                    associates key-value pairs.
265                
266                    This implementation minimizes template bloat considerably by factoring out
267 mike  1.11         most of the code into a common non-template class (see _HashTableRep).
268                    The HashTable class is mostly a wrapper to add proper type semantics to the
269                    use of its representation class.
270                
271                    Hashing as always is O(1).
272                
273                    HashTable uses the most popular hash table implementation which utilizes 
274                    an array of pointers to bucket chains. This is organized as follows:
275                
276                        <pre>
277                           +---+ 
278                           |   |   +-----+-------+
279                         0 | ----->| key | value |
280                           |   |   +-----+-------+
281                           +---+
282                           |   |   +-----+-------+   +-----+-------+   +-----+-------+
283                         1 | ----->| key | value |-->| key | value |-->| key | value |
284                           |   |   +-----+-------+   +-----+-------+   +-----+-------+
285                           +---+
286                             .
287                             .
288 mike  1.11                  .
289                           +---+
290                           |   |   +-----+-------+   +-----+-------+
291                        N-1| ----->| key | value |-->| key | value |
292                           |   |   +-----+-------+   +-----+-------+
293                           +---+
294                        </pre>
295                
296                    To locate an item a hash function is applied to the key to produce an 
297                    integer value. Then the modulo of that integer is taken with N to select 
298                    a chain (as shown above). Then the chain is searched for a bucket whose
299                    key value is the same as the target key.
300                
301                    The number of chains default to DEFAULT_NUM_CHAINS but should be about
302                    one-third the number of expected entries (so that the average chain
303                    will be three long). Making the number of chains too large will waste
304                    space causing the hash table to be very sparse. But for optimal efficiency,
305                    one might set the number of chains to be the same as the expected number
306                    of entries.
307                
308                    This implementation does have NOT an adaptive growth algorithm yet which 
309 mike  1.11         would allow it to increase the number of chains periodically based on some
310                    statistic (e.g., when the number of entries is more than three times the
311                    number of chains; this would keep the average chain length below three).
312                
313                    The following example shows how to instantiate a HashTable which associates
314                    String keys with Uint32 values.
315                
316                	<pre>
317                	typedef HashTable&lt;String, Uint32&gt; HT;
318                	HT ht;
319                	</pre>
320                
321                    Some of the template arguments are defaulted in the above example (the 
322                    third and forth). The instantiation is explicitly qualified like this
323                    (which by the way has exactly the same effect).
324                
325                	<pre>
326                	typedef HashTable&lt;String, Uint32, EqualFunc&lt;String&gt;, HashFunc&lt;String&gt;&gt; HT;
327                	</pre>
328                
329                    The third and forth arguments are described more in detail later.
330 mike  1.11     
331                    Then, entries may be inserted like this:
332                
333                	<pre>
334                	ht.insert("Red", 111);
335                	ht.insert("Green", 222);
336                	ht.insert("Blue", 222);
337                	</pre>
338                
339                    And entries may be looked up as follows:
340                
341                	<pre>
342                	Uint32 value;
343                	ht.lookup("Red", value);
344                	</pre>
345                
346                    And entries may be removed like this:
347                
348                	<pre>
349                	h.remove("Red");
350                	</pre>
351 mike  1.11     
352                    Iteration is done like this:
353                
354                	<pre>
355                	for (HT::Iterator i = ht.start(); i; i++)
356                	{
357                	    // To access the key call i.key()!
358                	    // To access the value call i.value()!
359                	}
360                	</pre>
361                
362                    Note that only forward iteration is supported (no backwards iteration).
363                
364                    Equality of keys is determined using the EqualFunc class which is
365                    the default third argument of the template argument list. A new function 
366                    object may be defined and passed to modify the behavior (for example, one 
367                    might define equality of strings to ignore whitespace). Here is how to 
368                    define and use a new equality function object:
369                
370                	<pre>
371                	struct MyEqualFunc
372 mike  1.11     	{
373                	    static Boolean equal(const String& x, const String& y)
374                	    {
375                		// do something here to test for equality!
376                	    }
377                	};
378                
379                	...
380                
381                	EqualFunc&lt;String, Uint32, MyEqualFunc&gt; ht;
382                	</pre>
383                
384                    When the lookup(), insert(), and remove() methods are called, the 
385                    MyEqualFunc::equal() method will be used to determine equality.
386                
387                    Hash functions are provided for common types (as part of the default
388                    HashFunc class). For other types it is possible to define a custom function
389                    object as follows:
390                
391                	<pre>
392                	struct MyHashFunc
393 mike  1.11     	{
394                	    static Uint32 hash(const String& x)
395                	    {
396                		// Do some hashing here!
397                	    }
398                	};
399                
400                	...
401                
402                	EqualFunc&lt;String, Uint32, MyEqualFunc, MyHashFunc&gt; ht;
403                	</pre>
404                
405                    As always, the hash function should provide a reasonably uniform 
406                    distrubtion so that all of the entries don't get crowded into a few
407                    chains. Note that a hash function which returns zero, would force
408                    the pathalogical case in which all entries are placed in the first
409                    chain.
410                */
411                template<class K, class V, class E , class H >
412                class HashTable
413                {
414 mike  1.11     public:
415                
416                    typedef _HashTableIterator<K, V, E> Iterator;
417                
418                    /* By default, we create this many chains initially */
419                    enum { DEFAULT_NUM_CHAINS = 32 };
420                
421                    /** Constructor.
422                	@param numChains number of chains to create.
423                    */
424                    HashTable(Uint32 numChains = DEFAULT_NUM_CHAINS) : _rep(numChains)
425                    {
426                
427                    }
428                
429                    /** Copy constructor. */
430                    HashTable(const HashTable<K,V,E,H>& x) : _rep(x._rep)
431                    {
432                
433                    }
434                
435 mike  1.11         /** Assignment operator. */
436                    HashTable<K,V,E,H>& operator=(const HashTable<K,V,E,H>& x)
437                    {
438                	if (this != &x)
439                	    _rep = x._rep;
440                	return *this;
441                    }
442                
443                    /** Returns the size of this hash table (the number of entries). */
444                    Uint32 size() const { return _rep.size(); }
445                
446                    /** Clears the contents of this hash table. After this is called, the
447                	size() method returns zero.
448                    */
449                    void clear() { _rep.clear(); }
450                
451                    /** Inserts new key-value pair into hash table.
452                	@param key key component.
453                	@param value value component.
454                	@return true on success; false if duplicate key.
455                    */
456 mike  1.11         Boolean insert(const K& key, const V& value)
457                    {
458                	return _rep.insert(
459                	    H::hash(key), new _Bucket<K, V, E>(key, value), &key);
460                    }
461                
462                    /** Checks to see if hash table contains an entry with the given key.
463                	@param key key to be searched for
464                	@return true if hash table contains an entry with the given key.
465                    */
466 mike  1.11.2.1     Boolean contains(const K& key) const
467 mike  1.11         {
468                	V value;
469                	return lookup(key, value);
470                    }
471                
472                    /** Looks up the entry with the given key.
473                	@param key key of entry to be located.
474                	@param value output value.
475                	@return true if found; false otherwise.
476                    */
477 mike  1.11.2.1     Boolean lookup(const K& key, V& value) const;
478 mike  1.11     
479                    /** Removes the entry with the given key.
480                	@param key key of entry to be removed.
481                	@return true on success; false otherwise.
482                    */
483                    Boolean remove(const K& key)
484                    {
485                	return _rep.remove(H::hash(key), &key);
486                    }
487                
488                    /** Obtains an iterator for this object. */
489                    Iterator start() const 
490                    {
491                	return Iterator(
492                	    _rep.getChains(), _rep.getChains() + _rep.getNumChains());
493                    }
494                
495                private:
496                
497                    _HashTableRep _rep;
498                };
499 mike  1.11     
500                template<class K, class V, class E, class H>
501 mike  1.11.2.1 inline Boolean HashTable<K, V, E, H>::lookup(const K& key, V& value) const
502 mike  1.11     {
503                    _Bucket<K, V, E>* bucket 
504                	= (_Bucket<K, V, E>*)_rep.lookup(H::hash(key), &key);
505                
506                    if (bucket)
507                    {
508                	value = bucket->getValue();
509                	return true;
510                    }
511                
512                    return false;
513                }
514                
515                PEGASUS_NAMESPACE_END
516                
517                #endif /* Pegasus_HashTable_h */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2