(file) Return to strhash.cpp CVS log (file) (dir) Up to [OMI] / omi / strhash

  1 mike  1.1 /*
  2           **==============================================================================
  3           **
  4           ** Open Management Infrastructure (OMI)
  5           **
  6           ** Copyright (c) Microsoft Corporation
  7           ** 
  8           ** Licensed under the Apache License, Version 2.0 (the "License"); you may not 
  9           ** use this file except in compliance with the License. You may obtain a copy 
 10           ** of the License at 
 11           **
 12           **     http://www.apache.org/licenses/LICENSE-2.0 
 13           **
 14           ** THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15           ** KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 
 16           ** WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 
 17           ** MERCHANTABLITY OR NON-INFRINGEMENT. 
 18           **
 19           ** See the Apache 2 License for the specific language governing permissions 
 20           ** and limitations under the License.
 21           **
 22 mike  1.1 **==============================================================================
 23           */
 24           
 25           #include <iostream>
 26           #include <cctype>
 27           #include <vector>
 28           #include <string>
 29           #include <cstdio>
 30           #include <cctype>
 31           #include <cstdlib>
 32           #include <cstring>
 33           #include <map>
 34           
 35           using namespace std;
 36           const char* arg0;
 37           
 38           typedef pair<string, string> Pair;
 39           typedef vector<Pair> Vector;
 40           
 41           void LoadSpecFile(const char* path, Vector& pairs)
 42           {
 43 mike  1.1     // Open input file:
 44               FILE* is = fopen(path, "r");
 45           
 46               if (!is)
 47               {
 48                   fprintf(stderr, "%s: failed to open: %s\n", arg0, path);
 49                   exit(1);
 50               }
 51           
 52               // Read each line:
 53               char buf[1024];
 54               unsigned int line = 1;
 55           
 56               for (; fgets(buf, sizeof(buf), is) != NULL; line++)
 57               {
 58                   char* p;
 59                   vector<string> toks;
 60                   char* str;
 61                   char* tag;
 62           
 63                   // Skip leading spaces:
 64 mike  1.1         for (p = buf; isspace(*p); p++)
 65                       ;
 66           
 67                   // Skip comments:
 68                   if (p[0] == '#')
 69                       continue;
 70           
 71                   // Remove trailing spaces:
 72                   {
 73                       char* end = p + strlen(buf);
 74                       while (end != p && isspace(end[-1]))
 75                           *--end = '\0';
 76                   }
 77           
 78                   // Expect string
 79                   {
 80                       str = p;
 81           
 82                       while (*p && *p != ',')
 83                           p++;
 84                   }
 85 mike  1.1 
 86                   // Skip spaces:
 87                   while (isspace(*p))
 88                       p++;
 89           
 90                   if (!*p)
 91                   {
 92                       char buf[32];
 93                       sprintf(buf, "TAG%u", line);
 94                       pairs.push_back(Pair(str, buf));
 95                       continue;
 96                   }
 97           
 98                   // Expect ':'
 99                   if (*p != ',')
100                   {
101                       fprintf(stderr, "%s: syntax error on line %u\n", arg0, line);
102                       exit(1);
103                   }
104           
105                   *p++ = '\0';
106 mike  1.1 
107                   // Skip spaces:
108                   while (isspace(*p))
109                       p++;
110           
111                   // Expect tag
112                   {
113                       tag = p;
114           
115                       if (!isalpha(*p) && *p != '_')
116                       {
117                           fprintf(stderr, "%s: syntax error on line %u\n", arg0, line);
118                           exit(1);
119                       }
120           
121                       while (isalnum(*p) || *p == '_')
122                           p++;
123                   }
124           
125                   *p++ = '\0';
126           
127 mike  1.1         // Skip spaces:
128                   while (isspace(*p))
129                       p++;
130           
131                   if (*p)
132                   {
133                       fprintf(stderr, "%s: syntax error on line %u\n", arg0, line);
134                       exit(1);
135                   }
136           
137                   pairs.push_back(Pair(str, tag));
138               }
139           }
140           
141           static void _GenEnum(FILE* f, const Vector& pairs)
142           {
143               // Generate enumeration:
144               {
145                   fprintf(f, "enum\n");
146                   fprintf(f, "{\n");
147           
148 mike  1.1         for (size_t i = 0; i < pairs.size(); i++)
149                   {
150                       fprintf(f, "    %s = %u%s\n", pairs[i].second.c_str(), (int)(i + 1), ((i+1) == pairs.size() ? "" : ","));
151                   }
152           
153                   fprintf(f, "};\n");
154                   fprintf(f, "\n");
155               }
156           }
157           
158           #if 0
159           static void GenSimpleFunction(const char* fileName, const Vector& pairs)
160           {
161               FILE* f = fopen(fileName, "w");
162           
163               if (!f)
164               {
165                   fprintf(stderr, "%s: failed to open: %s\n", arg0, fileName);
166                   exit(1);
167               }
168           
169 mike  1.1 
170               //_GenEnum(f, pairs);
171           
172               /* header */
173               fprintf(f, "static int SimpleStr(const char* s, size_t n)\n");
174               fprintf(f, "{\n");
175           
176               /* generate function body */
177               fprintf(f, "n=n;\n   ");
178               for (size_t i = 0; i < pairs.size(); i++)
179               {
180                   const string& str = pairs[i].first.c_str();
181                   const string& tok = pairs[i].second.c_str();
182           
183                   fprintf(f, " if (strcmp(s, \"%s\") == 0)\n",
184                          str.c_str());
185                   
186                   fprintf(f, "        ");
187                   fprintf(f, "return %s;\n", tok.c_str());
188                   fprintf(f, "    else");
189               }
190 mike  1.1 
191               fprintf(f, "\n        return 0; /* not found */\n");
192           
193               /* close the function */
194               //fprintf(f, "\n");
195               fprintf(f, "}\n\n");
196           
197           
198               fclose(f);
199           }
200           #endif
201           
202           #if 0
203           static void GenHashFunction(const char* fileName, const Vector& pairs)
204           {
205               FILE* f = fopen(fileName, "w");
206           
207               if (!f)
208               {
209                   fprintf(stderr, "%s: failed to open: %s\n", arg0, fileName);
210                   exit(1);
211 mike  1.1     }
212           
213           
214               //_GenEnum(f, pairs);
215           
216               // Generate function
217               {
218                   typedef pair<unsigned int, Vector> MPair;
219                   typedef map<unsigned int, Vector> Map;
220                   Map m;
221           
222                   fprintf(f, "int HashStr(const char* s, size_t n)\n");
223                   fprintf(f, "{\n");
224                   fprintf(f, "    unsigned int h = (unsigned int)(n ^ s[0] ^ s[n-1]);\n");
225                   fprintf(f, "\n");
226                   fprintf(f, "    switch (h)\n");
227                   fprintf(f, "    {\n");
228           
229                   // Conslidate pairs with like hash codes
230                   for (size_t i = 0; i < pairs.size(); i++)
231                   {
232 mike  1.1             const char* s = pairs[i].first.c_str();
233                       size_t n = pairs[i].first.size();
234                       unsigned int h = n ^ s[0] ^ s[n-1];
235           
236                       Map::iterator pos = m.find(h);
237           
238                       if (pos == m.end())
239                       {
240                           Vector v;
241                           v.push_back(pairs[i]);
242                           m.insert(MPair(h,v));
243                       }
244                       else
245                       {
246                           (*pos).second.push_back(pairs[i]);
247                       }
248                   }
249           
250                   // Print cases:
251                   for (Map::iterator p = m.begin(); p != m.end(); p++)
252                   {
253 mike  1.1             const Vector& v = (*p).second;
254           
255                       fprintf(f, "        case %u:\n", (*p).first);
256           
257                       for (size_t i = 0; i < v.size(); i++)
258                       {
259                           const string& str = v[i].first.c_str();
260                           const string& tok = v[i].second.c_str();
261           
262                           fprintf(f, "            ");
263                           fprintf(f, "if (n == %u && memcmp(s, \"%s\", n) == 0)\n",
264                               (int)str.size(), str.c_str());
265           
266                           fprintf(f, "                ");
267                           fprintf(f, "return %s;\n", tok.c_str());
268                       }
269           
270                       fprintf(f, "            break;\n");
271                   }
272           
273                   fprintf(f, "    }\n");
274 mike  1.1         fprintf(f, "    /* Not found */\n");
275                   fprintf(f, "    return 0;\n");
276                   fprintf(f, "}\n");
277               }
278               fclose(f);
279           }
280           #endif
281           
282           typedef multimap<unsigned int, Pair> Map;
283           typedef pair<unsigned int, Pair> MPair;
284           static void _GenStringCmp(FILE* f, const Pair& p)
285           {
286               const string& str = p.first.c_str();
287               const string& tok = p.second.c_str();
288               
289               fprintf(f, "            ");
290               fprintf(f, "if (strcmp(s, \"%s\") == 0)\n",
291                       str.c_str());
292               
293               fprintf(f, "                ");
294               fprintf(f, "return %s;\n", tok.c_str());
295 mike  1.1 }
296           
297           
298           static void _GenSubSwitchForRange(
299               FILE* f, 
300               Map::const_iterator pos, 
301               Map::const_iterator range_end)
302           {
303               // find best character with min. number of collisions
304               unsigned int bestIndex = 0;
305               unsigned int bestCollision = 0xFFFFFFFF;
306               unsigned int n = pos->first;
307           
308               for ( unsigned int index = 0; index < n; index++ )
309               {
310                   vector<unsigned int> collisions(256, 0);
311                   unsigned int currentMax = 0;
312           
313           
314                   for ( Map::const_iterator it = pos; it != range_end; it++ )
315                   {
316 mike  1.1             unsigned char c = it->second.first[index];
317                       collisions[c]++;
318                       if (collisions[c] > currentMax)
319                           currentMax = collisions[c];
320                   }
321           
322           #if 0
323                   printf("n is %d, index %d, currentMax %d, best %d\n",
324                          n, index, currentMax, bestCollision);
325           #endif
326           
327                   if (bestCollision > currentMax)
328                   {
329                       bestCollision = currentMax;
330                       bestIndex = index;
331                   }
332           
333                   if (1 == bestCollision)
334                       break;
335               }
336           
337 mike  1.1 
338           #if 0
339               printf("n is %d, best index %d, best %d\n",
340                      n, bestIndex, bestCollision);
341           #endif
342           
343               // build a submap based on best index value
344               Map subMap;
345           
346               for ( Map::const_iterator it = pos; it != range_end; it++ )
347               {
348                   unsigned char c = it->second.first[bestIndex];
349           
350                   subMap.insert(MPair(c, it->second));
351               }
352           
353               fprintf(f, "        switch (s[%u])\n", bestIndex);
354               fprintf(f, "        {\n");
355           
356               // process submap
357               // generate cases for each length
358 mike  1.1     Map::const_iterator posSubMap = subMap.begin();
359           
360               while (posSubMap != subMap.end())
361               {
362                   unsigned int n = posSubMap->first;
363           
364                   Map::const_iterator range_end = subMap.upper_bound(n);
365           
366                   fprintf(f, "        case %u:\n", n);
367           
368                   while (posSubMap != range_end)
369                   {
370                       _GenStringCmp(f, posSubMap->second);
371                       posSubMap++;
372                   }
373           
374                   //printf("n is %d, number of items is %d\n", n, distance(posSubMap, range_end));
375                   fprintf(f, "        break;\n");
376           
377                   posSubMap = subMap.lower_bound(n+1);
378               }
379 mike  1.1 
380               fprintf(f, "        }\n");
381               fprintf(f, "\n");
382           
383           }
384           
385           static void GenHashStrHeader(
386               const string& fileName,
387               const Vector& pairs)
388           {
389               FILE* f = fopen(fileName.c_str(), "w");
390           
391               if (!f)
392               {
393                   fprintf(stderr, "%s: failed to open: %s\n", arg0, fileName.c_str());
394                   exit(1);
395               }
396           
397               _GenEnum(f, pairs);
398           
399               /* file header */
400 mike  1.1     fprintf(f, "int HashStr(const char* s, size_t n);\n");
401               fprintf(f, "\n");
402           
403               printf("Created %s\n", fileName.c_str());
404           }
405           
406           static void GenHashStrFunction(
407               const string& fileName, 
408               const Vector& pairs)
409           {
410               FILE* f = fopen(fileName.c_str(), "w");
411           
412               if (!f)
413               {
414                   fprintf(stderr, "%s: failed to open: %s\n", arg0, fileName.c_str());
415                   exit(1);
416               }
417           
418               //_GenEnum(f, pairs);
419           
420               /* file header */
421 mike  1.1     fprintf(f, "int HashStr(const char* s, size_t n)\n");
422               fprintf(f, "{\n");
423               fprintf(f, "\n");
424               fprintf(f, "    switch (n)\n");
425               fprintf(f, "    {\n");
426               /* create a map based on str len */
427           
428               Map m;
429           
430               // Conslidate pairs with like str length
431               for (size_t i = 0; i < pairs.size(); i++)
432               {
433                   size_t n = pairs[i].first.size();
434           
435                   m.insert(MPair(n, pairs[i]));   
436               }
437           
438           
439               // generate cases for each length
440               Map::const_iterator pos = m.begin();
441           
442 mike  1.1     while (pos != m.end())
443               {
444                   unsigned int n = pos->first;
445           
446                   Map::const_iterator range_end = m.upper_bound(n);
447           
448                   fprintf(f, "    case %u:\n", n);
449           
450                   if (1 >= distance(pos, range_end))
451                   {
452                       // special case - only few elements in range
453                       while (pos != range_end)
454                       {
455                           _GenStringCmp(f, pos->second);
456                           pos++;
457                       }
458           
459                   }
460                   else
461                   {
462                       _GenSubSwitchForRange(f, pos, range_end);
463 mike  1.1         }
464           
465                   //printf("n is %d, number of items is %d\n", n, distance(pos, range_end));
466                   fprintf(f, "    break;\n");
467           
468                   pos = m.lower_bound(n+1);
469               }
470           
471           
472               fprintf(f, "    }\n");
473               fprintf(f, "    /* Not found */\n");
474               fprintf(f, "    return 0;\n");
475               fprintf(f, "}\n");
476           
477               fclose(f);
478           
479               printf("Created %s\n", fileName.c_str());
480           }
481           
482           #if 0
483           static void GenUnittest(const char* fileName, const Vector& pairs)
484 mike  1.1 {
485               FILE* f = fopen(fileName, "w");
486           
487               if (!f)
488               {
489                   fprintf(stderr, "%s: failed to open: %s\n", arg0, fileName);
490                   exit(1);
491               }
492           
493               /* file header */
494               fprintf(f, "#include <ut/ut.h>\n");
495               fprintf(f, "#include <string.h>\n");
496               fprintf(f, "#include \"../quickstr.h\"\n");
497               fprintf(f, "#include \"../quickstr.inc\"\n");
498               fprintf(f, "#include \"hash.inc\"\n");
499               fprintf(f, "#include \"simple.inc\"\n");
500           
501               fprintf(f, "\n");
502               fprintf(f, "static void setUp()\n{\n}\n");
503               fprintf(f, "static void cleanup()\n{\n}\n");
504           
505 mike  1.1     fprintf(f, "typedef int (*FN)(const char* s, size_t n);\n");
506           
507               fprintf(f, "static void testAllStrings(FN f)\n{\n");
508           
509               /* generate all tests */
510               for (size_t i = 0; i < pairs.size(); i++)
511               {
512                   const string& str = pairs[i].first.c_str();
513                   const string& tok = pairs[i].second.c_str();
514           
515                   fprintf(f, "    UT_ASSERT((*f)(\"%s\", %u) == %s);\n",
516                           str.c_str(), (int)str.size(), tok.c_str() );
517                   
518                }
519           
520               /* add comparison with not-exisitng strings */
521               fprintf(f, "    UT_ASSERT((*f)(\"*\", 1) == 0);\n") ;
522               fprintf(f, "    UT_ASSERT((*f)(\"**\", 2) == 0);\n") ;
523               fprintf(f, "    UT_ASSERT((*f)(\"***\", 3) == 0);\n") ;
524               fprintf(f, "    UT_ASSERT((*f)(\"****\", 4) == 0);\n") ;
525               fprintf(f, "    UT_ASSERT((*f)(\"*****\", 5) == 0);\n") ;
526 mike  1.1     fprintf(f, "    UT_ASSERT((*f)(\"******\", 6) == 0);\n") ;
527           
528           
529               /* Generate end of unittest file  */
530               fprintf(f, "}\n\n");
531               fprintf(f, "static void TestSimple()\n");
532               fprintf(f, "{\n");
533               fprintf(f, "    for(int i=0; i< 10000; i++) testAllStrings(SimpleStr);\n");
534               fprintf(f, "}\n");
535               fprintf(f, "\n");
536           
537               fprintf(f, "static void TestHash()\n");
538               fprintf(f, "{\n");
539               fprintf(f, "    for(int i=0; i< 10000; i++) testAllStrings(HashStr);\n");
540               fprintf(f, "}\n");
541               fprintf(f, "\n");
542           
543               fprintf(f, "static void TestHashStr()\n");
544               fprintf(f, "{\n");
545               fprintf(f, "    for(int i=0; i< 10000; i++) testAllStrings(HashStr);\n");
546               fprintf(f, "}\n");
547 mike  1.1     fprintf(f, "\n");
548           
549               fprintf(f, "static void RunTests()\n");
550               fprintf(f, "{\n");
551               fprintf(f, "    UT_TEST(TestSimple);\n");
552               fprintf(f, "    UT_TEST(TestHash);\n");
553               fprintf(f, "    UT_TEST(TestHashStr);\n");
554               fprintf(f, "}\n");
555               fprintf(f, "\n");
556               fprintf(f, "UT_ENTRY_POINT(RunTests);\n");
557           
558           
559               fclose(f);
560           }
561           #endif
562           
563           int main(int argc, char** argv)
564           {
565               arg0 = argv[0];
566           
567               if (argc < 2)
568 mike  1.1     {
569                   fprintf(stderr, "Usage: %s path\n", arg0);
570                   exit(1);
571               }
572           
573               Vector pairs;
574               LoadSpecFile(argv[1], pairs);
575           
576               if (pairs.empty())
577               {
578                   fprintf(stderr, "no data for building hash function\n");
579                   exit(1);
580               }
581           
582           #if 0
583               GenSimpleFunction("simple.inc", pairs);
584               GenHashFunction("hash.inc", pairs);
585           #endif
586           
587               // Determine the base filename:
588               string base = argv[1];
589 mike  1.1     {
590                   size_t pos = base.rfind('/');
591           
592                   if (pos != string::npos)
593                       base = base.substr(pos + 1);
594           
595                   pos = base.rfind('.');
596           
597                   if (pos != string::npos)
598                       base = base.substr(0, pos);
599               }
600           
601               GenHashStrHeader(base + ".h", pairs);
602               GenHashStrFunction(base + ".inc", pairs);
603           
604           #if 0
605               GenUnittest("tests/test_strhash.cpp", pairs);
606           #endif
607           
608               return 0;
609           }

ViewCVS 0.9.2