(file) Return to HashTable.h CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 karl  1.17 //%2004////////////////////////////////////////////////////////////////////////
  2 mike  1.11 //
  3 karl  1.17 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
  4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
  5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
  6 karl  1.15 // IBM Corp.; EMC Corporation, The Open Group.
  7 karl  1.17 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
  8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
  9 mike  1.11 //
 10            // Permission is hereby granted, free of charge, to any person obtaining a copy
 11 kumpf 1.13 // of this software and associated documentation files (the "Software"), to
 12            // deal in the Software without restriction, including without limitation the
 13            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 14 mike  1.11 // sell copies of the Software, and to permit persons to whom the Software is
 15            // furnished to do so, subject to the following conditions:
 16 r.kieninger 1.18 //
 17 kumpf       1.13 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 18 mike        1.11 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 19                  // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 20 kumpf       1.13 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 21                  // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22                  // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 23 mike        1.11 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 24                  // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 25                  //
 26                  //==============================================================================
 27                  //
 28                  // Author: Mike Brasher (mbrasher@bmc.com)
 29                  //
 30 r.kieninger 1.18 // Modified By: Carol Ann Krug Graves, Hewlett-Packard Company
 31 kumpf       1.16 //                  (carolann_graves@hp.com)
 32 mike        1.11 //
 33                  //%/////////////////////////////////////////////////////////////////////////////
 34                  
 35                  #ifndef Pegasus_HashTable_h
 36                  #define Pegasus_HashTable_h
 37                  
 38                  #include <Pegasus/Common/Config.h>
 39                  #include <Pegasus/Common/String.h>
 40 kumpf       1.14 #include <Pegasus/Common/Linkage.h>
 41 mike        1.11 
 42                  PEGASUS_NAMESPACE_BEGIN
 43                  
 44                  /*  This is the default hash function object used by the HashTable template.
 45                      Specializations are provided for common types.
 46                  */
 47                  template<class K>
 48                  struct HashFunc
 49                  {
 50                  };
 51                  
 52                  PEGASUS_TEMPLATE_SPECIALIZATION struct PEGASUS_COMMON_LINKAGE HashFunc<String>
 53                  {
 54                      static Uint32 hash(const String& str);
 55                  };
 56                  
 57                  PEGASUS_TEMPLATE_SPECIALIZATION struct HashFunc<Uint32>
 58                  {
 59                      static Uint32 hash(Uint32 x) { return x + 13; }
 60                  };
 61                  
 62 kumpf       1.16 /*
 63 r.kieninger 1.18     Hash function object that converts to lowercase.
 64 kumpf       1.16 
 65                      This function can be used for hash table keys constructed from strings that
 66 r.kieninger 1.18     should be treated as case insensitive (e.g. class names, namespace names,
 67                      system names).
 68 kumpf       1.16 
 69                      Note: this function converts to lower case based on the process locale.
 70                  */
 71                  struct HashLowerCaseFunc
 72                  {
 73                      static Uint32 hash (const String & str)
 74                      {
 75                          String cpy (str);
 76                          cpy.toLower ();
 77                          Uint32 h = 0;
 78                          for (Uint32 i = 0, n = cpy.size (); i < n; i++)
 79                              h = 5 * h + cpy [i];
 80                          return h;
 81                      }
 82                  };
 83                  
 84 mike        1.11 /*  This is a function object used by the HashTable to compare keys. This is
 85 r.kieninger 1.18     the default implementation. Others may be defined and passed in the
 86 mike        1.11     template argument list to perform other kinds of comparisons.
 87                  */
 88                  template<class K>
 89                  struct EqualFunc
 90                  {
 91                      static Boolean equal(const K& x, const K& y)
 92                      {
 93                  	return x == y;
 94 kumpf       1.16     }
 95                  };
 96                  
 97                  /*
 98 r.kieninger 1.18     Equal function object that can be used by HashTable to compare keys that
 99                      should be treated as case insensitive.
100 kumpf       1.16 
101                      This function can be used for hash table keys constructed from strings that
102 r.kieninger 1.18     should be treated as case insensitive (e.g. class names, namespace names,
103 kumpf       1.16     system names).
104                  
105                      Note: this function compares Strings based on the process locale.
106                  */
107                  struct EqualNoCaseFunc
108                  {
109                      static Boolean equal (const String & x, const String & y)
110                      {
111                          return (0 == String::compareNoCase (x, y));
112 mike        1.11     }
113                  };
114                  
115                  /*  Representation for a bucket. The HashTable class derives from this
116                      bucket to append a key and value. This base class just defines
117                      the pointer to the next bucket in the chain.
118                  */
119                  class PEGASUS_COMMON_LINKAGE _BucketBase
120                  {
121                  public:
122                  
123                      /* Default constructor. */
124                      _BucketBase() : next(0) { }
125                  
126 r.kieninger 1.18     /* Virtual destructor to ensure destruction of derived class
127 mike        1.11 	elements.
128                      */
129                      virtual ~_BucketBase();
130                  
131                      /* returns true if the key pointed to by the key argument is equal
132                  	to the internal key of this bucket. This method must be overridden
133                  	by the derived class.
134                      */
135                      virtual Boolean equal(const void* key) const = 0;
136                  
137                      /* Clone this bucket. */
138                      virtual _BucketBase* clone() const = 0;
139                  
140                      _BucketBase* next;
141                  };
142                  
143                  class _HashTableRep;
144                  
145                  /* This class implements a simple hash table forward iterator. */
146                  class PEGASUS_COMMON_LINKAGE _HashTableIteratorBase
147                  {
148 mike        1.11 public:
149 r.kieninger 1.18 
150 mike        1.11     _HashTableIteratorBase() : _first(0), _last(0), _bucket(0) { }
151                  
152                      operator int() const { return _bucket != 0; }
153                  
154                      _HashTableIteratorBase operator++(int);
155                  
156                      _HashTableIteratorBase& operator++();
157                  
158                      _HashTableIteratorBase(_BucketBase** first, _BucketBase** last);
159                  
160                  protected:
161                  
162                      _BucketBase** _first;
163                      _BucketBase** _last;
164                      _BucketBase* _bucket;
165                      friend class _HashTableRep;
166                  };
167                  
168                  // ATTN: reorganization not supported yet.
169                  
170                  /*- The _HashTableRep class is the representation class used by HashTable.
171 mike        1.11 
172                      This code is primarily an internal class used to implement the HashTable.
173                      But there may be occasions to use it directly.
174                  
175 r.kieninger 1.18     _HashTableRep parcels out much of the large code so that that code is not
176                      instantiated by the HashTable template class many times. This scheme helps
177 mike        1.11     reduce code bloat caused by templates. The HashTable template class below
178                      acts as kind of a wrapper around this class.
179                  
180                      _HashTableRep is implemented as an array of pointers to chains of hash
181                      buckets. The table initially allocates some number of chains (which can
182                      be controlled by the constructor) and then may increase the number of
183                      chains later (resulting in a reorganization of the hash table).
184                  */
185                  class PEGASUS_COMMON_LINKAGE _HashTableRep
186                  {
187                  public:
188                  
189                      /*- This constructor allocates an array of pointers to chains of buckets,
190                  	which of course are all empty at this time. The numChains argument
191                  	If the numChains argument is less than eight, then eight chains will
192                  	be created.
193                  	@param numChains specifies the initial number of chains.
194                      */
195                      _HashTableRep(Uint32 numChains);
196                  
197                      /*- Copy constructor. */
198 mike        1.11     _HashTableRep(const _HashTableRep& x);
199                  
200                      /*- Destructor. */
201                      ~_HashTableRep();
202                  
203                      /*- Assignment operator. */
204                      _HashTableRep& operator=(const _HashTableRep& x);
205                  
206                      /*- Returns the size of this hash table (the number of entries). */
207                      Uint32 size() const { return _size; }
208                  
209                      /*- Clears the contents of this hash table. After this is called, the
210                  	size() method returns zero.
211                      */
212                      void clear();
213                  
214                      /*- Inserts new key-value pair into hash table. Deletes the bucket on
215                  	failure so caller need not.
216                  	@param hashCode hash code generated by caller's hash function.
217                  	@param bucket bucket to be inserted.
218                  	@param key pointer to key.
219 mike        1.11 	@return true if insertion successful; false if duplicate key.
220                      */
221                      Boolean insert(Uint32 hashCode, _BucketBase* bucket, const void* key);
222                  
223                      /*- Finds the bucket with the given key. This method uses the
224                  	_BucketBase::equal() method to compare keys.
225                  	@param hashCode hash code generated by caller's hash function.
226                  	@param key void pointer to key.
227                  	@return pointer to bucket with that key or zero otherwise.
228                      */
229 mike        1.12     const _BucketBase* lookup(Uint32 hashCode, const void* key) const;
230 mike        1.11 
231                      /*- Removes the bucket with the given key. This method uses the
232                  	_BucketBase::equal() method to compare keys.
233                  	@param hashCode hash code generated by caller's hash function.
234                  	@param key void pointer to key.
235                  	@return true if entry found and removed and false otherwise.
236                      */
237                      Boolean remove(Uint32 hashCode, const void* key);
238                  
239                      _BucketBase** getChains() const { return _chains; }
240                  
241                      Uint32 getNumChains() const { return _numChains; }
242                  
243                  protected:
244                  
245                      Uint32 _size;
246                      Uint32 _numChains;
247                      _BucketBase** _chains;
248                  };
249                  
250                  /* The _Bucket class is used to implement the HashTable class.
251 mike        1.11 */
252                  template<class K, class V, class E>
253                  class _Bucket : public _BucketBase
254                  {
255                  public:
256                  
257                      _Bucket(const K& key, const V& value) : _key(key), _value(value) { }
258                  
259                      virtual ~_Bucket();
260                  
261                      virtual Boolean equal(const void* key) const;
262                  
263                      virtual _BucketBase* clone() const;
264                  
265                      K& getKey() { return _key; }
266                  
267                      V& getValue() { return _value; }
268                  
269                  private:
270                  
271                      K _key;
272 mike        1.11     V _value;
273                  };
274                  
275                  template<class K, class V, class E>
276                  Boolean _Bucket<K, V, E>::equal(const void* key) const
277                  {
278                      return E::equal(*((K*)key), _key);
279                  }
280                  
281                  template<class K, class V, class E>
282                  _Bucket<K, V, E>::~_Bucket()
283                  {
284                  
285                  }
286                  
287                  template<class K, class V, class E>
288                  _BucketBase* _Bucket<K, V, E>::clone() const
289                  {
290                      return new _Bucket<K, V, E>(_key, _value);
291                  }
292                  
293 mike        1.11 /* Iterator for HashTable class. */
294                  template<class K, class V, class E>
295                  class _HashTableIterator : public _HashTableIteratorBase
296                  {
297                  public:
298                  
299 r.kieninger 1.18     _HashTableIterator()
300 mike        1.11 	: _HashTableIteratorBase() { }
301                  
302                      _HashTableIterator(_BucketBase** first, _BucketBase** last)
303                  	: _HashTableIteratorBase(first, last) { }
304                  
305                      const K& key() const { return ((_Bucket<K, V, E>*)_bucket)->getKey(); }
306                  
307                      const V& value() const { return ((_Bucket<K, V, E>*)_bucket)->getValue(); }
308                  };
309                  
310 r.kieninger 1.18 /** The HashTable class provides a simple hash table implementation which
311 mike        1.11     associates key-value pairs.
312                  
313                      This implementation minimizes template bloat considerably by factoring out
314                      most of the code into a common non-template class (see _HashTableRep).
315                      The HashTable class is mostly a wrapper to add proper type semantics to the
316                      use of its representation class.
317                  
318                      Hashing as always is O(1).
319                  
320 r.kieninger 1.18     HashTable uses the most popular hash table implementation which utilizes
321 mike        1.11     an array of pointers to bucket chains. This is organized as follows:
322                  
323                          <pre>
324 r.kieninger 1.18            +---+
325 mike        1.11            |   |   +-----+-------+
326                           0 | ----->| key | value |
327                             |   |   +-----+-------+
328                             +---+
329                             |   |   +-----+-------+   +-----+-------+   +-----+-------+
330                           1 | ----->| key | value |-->| key | value |-->| key | value |
331                             |   |   +-----+-------+   +-----+-------+   +-----+-------+
332                             +---+
333                               .
334                               .
335                               .
336                             +---+
337                             |   |   +-----+-------+   +-----+-------+
338                          N-1| ----->| key | value |-->| key | value |
339                             |   |   +-----+-------+   +-----+-------+
340                             +---+
341                          </pre>
342                  
343 r.kieninger 1.18     To locate an item a hash function is applied to the key to produce an
344                      integer value. Then the modulo of that integer is taken with N to select
345 mike        1.11     a chain (as shown above). Then the chain is searched for a bucket whose
346                      key value is the same as the target key.
347                  
348                      The number of chains default to DEFAULT_NUM_CHAINS but should be about
349                      one-third the number of expected entries (so that the average chain
350                      will be three long). Making the number of chains too large will waste
351                      space causing the hash table to be very sparse. But for optimal efficiency,
352                      one might set the number of chains to be the same as the expected number
353                      of entries.
354                  
355 r.kieninger 1.18     This implementation does have NOT an adaptive growth algorithm yet which
356 mike        1.11     would allow it to increase the number of chains periodically based on some
357                      statistic (e.g., when the number of entries is more than three times the
358                      number of chains; this would keep the average chain length below three).
359                  
360                      The following example shows how to instantiate a HashTable which associates
361                      String keys with Uint32 values.
362                  
363                  	<pre>
364                  	typedef HashTable&lt;String, Uint32&gt; HT;
365                  	HT ht;
366                  	</pre>
367                  
368 r.kieninger 1.18     Some of the template arguments are defaulted in the above example (the
369 mike        1.11     third and forth). The instantiation is explicitly qualified like this
370                      (which by the way has exactly the same effect).
371                  
372                  	<pre>
373                  	typedef HashTable&lt;String, Uint32, EqualFunc&lt;String&gt;, HashFunc&lt;String&gt;&gt; HT;
374                  	</pre>
375                  
376                      The third and forth arguments are described more in detail later.
377                  
378                      Then, entries may be inserted like this:
379                  
380                  	<pre>
381                  	ht.insert("Red", 111);
382                  	ht.insert("Green", 222);
383                  	ht.insert("Blue", 222);
384                  	</pre>
385                  
386                      And entries may be looked up as follows:
387                  
388                  	<pre>
389                  	Uint32 value;
390 mike        1.11 	ht.lookup("Red", value);
391                  	</pre>
392                  
393                      And entries may be removed like this:
394                  
395                  	<pre>
396                  	h.remove("Red");
397                  	</pre>
398                  
399                      Iteration is done like this:
400                  
401                  	<pre>
402                  	for (HT::Iterator i = ht.start(); i; i++)
403                  	{
404                  	    // To access the key call i.key()!
405                  	    // To access the value call i.value()!
406                  	}
407                  	</pre>
408                  
409 r.kieninger 1.18     Note that only forward iteration is supported (no backwards iteration),
410                      AND that the hashtable MUST NOT be modified during the iteration!!!
411 mike        1.11 
412                      Equality of keys is determined using the EqualFunc class which is
413 r.kieninger 1.18     the default third argument of the template argument list. A new function
414                      object may be defined and passed to modify the behavior (for example, one
415                      might define equality of strings to ignore whitespace). Here is how to
416 mike        1.11     define and use a new equality function object:
417                  
418                  	<pre>
419                  	struct MyEqualFunc
420                  	{
421                  	    static Boolean equal(const String& x, const String& y)
422                  	    {
423                  		// do something here to test for equality!
424                  	    }
425                  	};
426                  
427                  	...
428                  
429                  	EqualFunc&lt;String, Uint32, MyEqualFunc&gt; ht;
430                  	</pre>
431                  
432 r.kieninger 1.18     When the lookup(), insert(), and remove() methods are called, the
433 mike        1.11     MyEqualFunc::equal() method will be used to determine equality.
434                  
435                      Hash functions are provided for common types (as part of the default
436                      HashFunc class). For other types it is possible to define a custom function
437                      object as follows:
438                  
439                  	<pre>
440                  	struct MyHashFunc
441                  	{
442                  	    static Uint32 hash(const String& x)
443                  	    {
444                  		// Do some hashing here!
445                  	    }
446                  	};
447                  
448                  	...
449                  
450                  	EqualFunc&lt;String, Uint32, MyEqualFunc, MyHashFunc&gt; ht;
451                  	</pre>
452                  
453 r.kieninger 1.18     As always, the hash function should provide a reasonably uniform
454 mike        1.11     distrubtion so that all of the entries don't get crowded into a few
455                      chains. Note that a hash function which returns zero, would force
456                      the pathalogical case in which all entries are placed in the first
457                      chain.
458                  */
459                  template<class K, class V, class E , class H >
460                  class HashTable
461                  {
462                  public:
463                  
464                      typedef _HashTableIterator<K, V, E> Iterator;
465                  
466                      /* By default, we create this many chains initially */
467                      enum { DEFAULT_NUM_CHAINS = 32 };
468                  
469                      /** Constructor.
470                  	@param numChains number of chains to create.
471                      */
472                      HashTable(Uint32 numChains = DEFAULT_NUM_CHAINS) : _rep(numChains)
473                      {
474                  
475 mike        1.11     }
476                  
477                      /** Copy constructor. */
478                      HashTable(const HashTable<K,V,E,H>& x) : _rep(x._rep)
479                      {
480                  
481                      }
482                  
483                      /** Assignment operator. */
484                      HashTable<K,V,E,H>& operator=(const HashTable<K,V,E,H>& x)
485                      {
486                  	if (this != &x)
487                  	    _rep = x._rep;
488                  	return *this;
489                      }
490                  
491                      /** Returns the size of this hash table (the number of entries). */
492                      Uint32 size() const { return _rep.size(); }
493                  
494                      /** Clears the contents of this hash table. After this is called, the
495                  	size() method returns zero.
496 mike        1.11     */
497                      void clear() { _rep.clear(); }
498                  
499                      /** Inserts new key-value pair into hash table.
500                  	@param key key component.
501                  	@param value value component.
502                  	@return true on success; false if duplicate key.
503                      */
504                      Boolean insert(const K& key, const V& value)
505                      {
506                  	return _rep.insert(
507                  	    H::hash(key), new _Bucket<K, V, E>(key, value), &key);
508                      }
509                  
510                      /** Checks to see if hash table contains an entry with the given key.
511                  	@param key key to be searched for
512                  	@return true if hash table contains an entry with the given key.
513                      */
514 mike        1.12     Boolean contains(const K& key) const
515 mike        1.11     {
516                  	V value;
517                  	return lookup(key, value);
518                      }
519                  
520                      /** Looks up the entry with the given key.
521                  	@param key key of entry to be located.
522                  	@param value output value.
523                  	@return true if found; false otherwise.
524                      */
525 mike        1.12     Boolean lookup(const K& key, V& value) const;
526 mike        1.11 
527                      /** Removes the entry with the given key.
528                  	@param key key of entry to be removed.
529                  	@return true on success; false otherwise.
530                      */
531                      Boolean remove(const K& key)
532                      {
533                  	return _rep.remove(H::hash(key), &key);
534                      }
535                  
536                      /** Obtains an iterator for this object. */
537 r.kieninger 1.18     Iterator start() const
538 mike        1.11     {
539                  	return Iterator(
540                  	    _rep.getChains(), _rep.getChains() + _rep.getNumChains());
541                      }
542                  
543                  private:
544                  
545                      _HashTableRep _rep;
546                  };
547                  
548                  template<class K, class V, class E, class H>
549 mike        1.12 inline Boolean HashTable<K, V, E, H>::lookup(const K& key, V& value) const
550 mike        1.11 {
551 r.kieninger 1.18     _Bucket<K, V, E>* bucket
552 mike        1.11 	= (_Bucket<K, V, E>*)_rep.lookup(H::hash(key), &key);
553                  
554                      if (bucket)
555                      {
556                  	value = bucket->getValue();
557                  	return true;
558                      }
559                  
560                      return false;
561                  }
562                  
563                  PEGASUS_NAMESPACE_END
564                  
565                  #endif /* Pegasus_HashTable_h */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2