(file) Return to strhash.cpp CVS log (file) (dir) Up to [OMI] / omi / strhash

  1 mike  1.1 /*
  2           **==============================================================================
  3           **
  4           ** Open Management Infrastructure (OMI)
  5           **
  6           ** Copyright (c) Microsoft Corporation
  7           ** 
  8           ** Licensed under the Apache License, Version 2.0 (the "License"); you may not 
  9           ** use this file except in compliance with the License. You may obtain a copy 
 10           ** of the License at 
 11           **
 12           **     http://www.apache.org/licenses/LICENSE-2.0 
 13           **
 14           ** THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15           ** KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED 
 16           ** WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, 
 17           ** MERCHANTABLITY OR NON-INFRINGEMENT. 
 18           **
 19           ** See the Apache 2 License for the specific language governing permissions 
 20           ** and limitations under the License.
 21           **
 22 mike  1.1 **==============================================================================
 23           */
 24           
 25           #include <iostream>
 26           #include <cctype>
 27           #include <vector>
 28           #include <string>
 29           #include <cstdio>
 30           #include <cctype>
 31           #include <cstdlib>
 32           #include <cstring>
 33           #include <map>
 34           
 35           using namespace std;
 36           const char* arg0;
 37           
 38 krisbash 1.3 struct Tuple
 39              {
 40                  Tuple(char ch_, const string& str_, const string& tag_) :
 41                      ch(ch_), str(str_), tag(tag_)
 42                  {
 43                  }
 44              
 45                  char ch;
 46                  string str;
 47                  string tag;
 48              };
 49              
 50              typedef vector<Tuple> Vector;
 51              
 52              string BaseName(const string& name)
 53              {
 54                  string base = name;
 55              
 56                  size_t pos = base.rfind('/');
 57              
 58                  if (pos != string::npos)
 59 krisbash 1.3         base = base.substr(pos + 1);
 60              
 61                  pos = base.rfind('.');
 62              
 63                  if (pos != string::npos)
 64                      base = base.substr(0, pos);
 65              
 66                  return base;
 67              }
 68 mike     1.1 
 69 krisbash 1.3 void LoadSpecFile(const char* path, Vector& tuples)
 70 mike     1.1 {
 71                  // Open input file:
 72                  FILE* is = fopen(path, "r");
 73              
 74                  if (!is)
 75                  {
 76                      fprintf(stderr, "%s: failed to open: %s\n", arg0, path);
 77                      exit(1);
 78                  }
 79              
 80                  // Read each line:
 81                  char buf[1024];
 82                  unsigned int line = 1;
 83              
 84                  for (; fgets(buf, sizeof(buf), is) != NULL; line++)
 85                  {
 86                      char* p;
 87                      vector<string> toks;
 88                      char* str;
 89                      char* tag;
 90 krisbash 1.3         char ch;
 91 mike     1.1 
 92                      // Skip leading spaces:
 93                      for (p = buf; isspace(*p); p++)
 94                          ;
 95              
 96                      // Skip comments:
 97                      if (p[0] == '#')
 98                          continue;
 99              
100                      // Remove trailing spaces:
101                      {
102                          char* end = p + strlen(buf);
103                          while (end != p && isspace(end[-1]))
104                              *--end = '\0';
105                      }
106              
107 krisbash 1.3         // Ignore empty lines:
108              
109                      if (*p == '\0')
110                          continue;
111              
112                      // Get tag:
113 mike     1.1         {
114 krisbash 1.3             ch = *p++;
115              
116                          if (!isalpha(ch) && ch != '0')
117                          {
118                              fprintf(stderr, "%s: expected character identifier: %u\n", arg0, line);
119                              exit(1);
120                          }
121 mike     1.1 
122                          while (*p && *p != ',')
123                              p++;
124 krisbash 1.3 
125                          // Skip spaces:
126                          while (isspace(*p))
127                              p++;
128              
129                          // Expect ','
130                          if (*p != ',')
131                          {
132                              fprintf(stderr, "%s: expected comma and then string: %u\n", arg0, line);
133                              exit(1);
134                          }
135              
136                          p++;
137 mike     1.1         }
138              
139                      // Skip spaces:
140                      while (isspace(*p))
141                          p++;
142              
143 krisbash 1.3         // Get string:
144 mike     1.1         {
145 krisbash 1.3             str = p;
146              
147                          while (*p && *p != ',')
148                              p++;
149              
150                          // Skip spaces:
151                          while (isspace(*p))
152                              p++;
153              
154                          // Expect ','
155                          if (*p != ',')
156                          {
157                              fprintf(stderr, "%s: expected comman and then tag name: %u\n", arg0, line);
158                              exit(1);
159                          }
160 mike     1.1 
161 krisbash 1.3             *p++ = '\0';
162 mike     1.1         }
163              
164                      // Skip spaces:
165                      while (isspace(*p))
166                          p++;
167              
168                      // Expect tag
169                      {
170                          tag = p;
171              
172                          if (!isalpha(*p) && *p != '_')
173                          {
174                              fprintf(stderr, "%s: syntax error on line %u\n", arg0, line);
175                              exit(1);
176                          }
177              
178                          while (isalnum(*p) || *p == '_')
179                              p++;
180                      }
181              
182                      *p++ = '\0';
183 mike     1.1 
184                      // Skip spaces:
185                      while (isspace(*p))
186                          p++;
187              
188                      if (*p)
189                      {
190                          fprintf(stderr, "%s: syntax error on line %u\n", arg0, line);
191                          exit(1);
192                      }
193              
194 krisbash 1.3         tuples.push_back(Tuple(ch, str, tag));
195 mike     1.1     }
196              }
197              
198 krisbash 1.3 static void _GenEnum(FILE* f, const Vector& tuples)
199 mike     1.1 {
200                  // Generate enumeration:
201                  {
202                      fprintf(f, "enum\n");
203                      fprintf(f, "{\n");
204              
205 krisbash 1.3         for (size_t i = 0; i < tuples.size(); i++)
206 mike     1.1         {
207 krisbash 1.3             fprintf(f, "    %s = %u%s\n", 
208                              tuples[i].tag.c_str(), 
209                              (int)(i + 1), 
210                              ((i+1) == tuples.size() ? "" : ","));
211 mike     1.1         }
212              
213                      fprintf(f, "};\n");
214                      fprintf(f, "\n");
215                  }
216              }
217              
218 krisbash 1.3 typedef multimap<unsigned int, Tuple> Map;
219              typedef pair<unsigned int, Tuple> MTuple;
220              static void _GenStringCmp(FILE* f, const Tuple& p)
221 mike     1.1 {
222 krisbash 1.3     char ch = p.ch;
223                  const string& str = p.str.c_str();
224                  const string& tok = p.tag.c_str();
225                  
226                  fprintf(f, "            ");
227 mike     1.1 
228 krisbash 1.3     if (ch == '0')
229 mike     1.1     {
230 krisbash 1.3         fprintf(f, "if (HASHSTR_STRCMP(s, HASHSTR_T(\"%s\")) == 0)\n",
231                          str.c_str());
232 mike     1.1     }
233 krisbash 1.3     else
234 mike     1.1     {
235 krisbash 1.3         fprintf(f, "if (c == '%c' && HASHSTR_STRCMP(s, HASHSTR_T(\"%s\")) == 0)\n",
236                          ch, str.c_str());
237 mike     1.1     }
238                  
239                  fprintf(f, "                ");
240                  fprintf(f, "return %s;\n", tok.c_str());
241              }
242              
243              
244              static void _GenSubSwitchForRange(
245                  FILE* f, 
246                  Map::const_iterator pos, 
247                  Map::const_iterator range_end)
248              {
249                  // find best character with min. number of collisions
250                  unsigned int bestIndex = 0;
251                  unsigned int bestCollision = 0xFFFFFFFF;
252                  unsigned int n = pos->first;
253              
254                  for ( unsigned int index = 0; index < n; index++ )
255                  {
256                      vector<unsigned int> collisions(256, 0);
257                      unsigned int currentMax = 0;
258 mike     1.1 
259              
260                      for ( Map::const_iterator it = pos; it != range_end; it++ )
261                      {
262 krisbash 1.3             unsigned char c = it->second.str[index];
263 mike     1.1             collisions[c]++;
264                          if (collisions[c] > currentMax)
265                              currentMax = collisions[c];
266                      }
267              
268              #if 0
269                      printf("n is %d, index %d, currentMax %d, best %d\n",
270                             n, index, currentMax, bestCollision);
271              #endif
272              
273                      if (bestCollision > currentMax)
274                      {
275                          bestCollision = currentMax;
276                          bestIndex = index;
277                      }
278              
279                      if (1 == bestCollision)
280                          break;
281                  }
282              
283              
284 mike     1.1 #if 0
285                  printf("n is %d, best index %d, best %d\n",
286                         n, bestIndex, bestCollision);
287              #endif
288              
289                  // build a submap based on best index value
290                  Map subMap;
291              
292                  for ( Map::const_iterator it = pos; it != range_end; it++ )
293                  {
294 krisbash 1.3         unsigned char c = it->second.str[bestIndex];
295 mike     1.1 
296 krisbash 1.3         subMap.insert(MTuple(c, it->second));
297 mike     1.1     }
298              
299                  fprintf(f, "        switch (s[%u])\n", bestIndex);
300                  fprintf(f, "        {\n");
301              
302                  // process submap
303                  // generate cases for each length
304                  Map::const_iterator posSubMap = subMap.begin();
305              
306                  while (posSubMap != subMap.end())
307                  {
308                      unsigned int n = posSubMap->first;
309              
310                      Map::const_iterator range_end = subMap.upper_bound(n);
311              
312                      fprintf(f, "        case %u:\n", n);
313              
314                      while (posSubMap != range_end)
315                      {
316                          _GenStringCmp(f, posSubMap->second);
317                          posSubMap++;
318 mike     1.1         }
319              
320                      //printf("n is %d, number of items is %d\n", n, distance(posSubMap, range_end));
321                      fprintf(f, "        break;\n");
322              
323                      posSubMap = subMap.lower_bound(n+1);
324                  }
325              
326                  fprintf(f, "        }\n");
327                  fprintf(f, "\n");
328              
329              }
330              
331 krisbash 1.3 static void GenDoNotEditMessage(
332                  FILE* f)
333              {
334                  fprintf(f, "/*\n");
335                  fprintf(f, "**==============================================================================\n");
336                  fprintf(f, "**\n");
337                  fprintf(f, "** DO NOT EDIT THIS FILE!!! IT WAS AUTOMATICALLY GENERATED\n");
338                  fprintf(f, "**\n");
339                  fprintf(f, "**==============================================================================\n");
340                  fprintf(f, "*/\n");
341                  fprintf(f, "\n");
342              }
343              
344 mike     1.1 static void GenHashStrHeader(
345                  const string& fileName,
346 krisbash 1.3     const Vector& tuples)
347 mike     1.1 {
348                  FILE* f = fopen(fileName.c_str(), "w");
349              
350                  if (!f)
351                  {
352                      fprintf(stderr, "%s: failed to open: %s\n", arg0, fileName.c_str());
353                      exit(1);
354                  }
355              
356 krisbash 1.3     GenDoNotEditMessage(f);
357              
358                  string base = BaseName(fileName);
359              
360                  fprintf(f, "#ifndef _%s_h\n", base.c_str());
361                  fprintf(f, "#define _%s_h\n", base.c_str());
362                  fprintf(f, "\n");
363              
364                  _GenEnum(f, tuples);
365              
366                  fprintf(f, "#if !defined(HASHSTR_CHAR)\n");
367                  fprintf(f, "# define HASHSTR_CHAR char\n");
368                  fprintf(f, "#endif\n\n");
369              
370                  fprintf(f, "#if !defined(HASHSTR_T)\n");
371                  fprintf(f, "# define HASHSTR_T(STR) STR\n");
372                  fprintf(f, "#endif\n\n");
373              
374                  fprintf(f, "#if !defined(HASHSTR_STRCMP)\n");
375                  fprintf(f, "# define HASHSTR_STRCMP strcmp\n");
376                  fprintf(f, "#endif\n\n");
377 mike     1.1 
378                  /* file header */
379 krisbash 1.3     fprintf(f, "int HashStr(HASHSTR_CHAR c, const HASHSTR_CHAR* s, size_t n);\n");
380 mike     1.1     fprintf(f, "\n");
381              
382 krisbash 1.3     fprintf(f, "#endif /* %s_h */\n", base.c_str());
383              
384 mike     1.1     printf("Created %s\n", fileName.c_str());
385              }
386              
387 krisbash 1.3 static void GenFastHashStrFunction(
388 mike     1.1     const string& fileName, 
389 krisbash 1.3     const string& base,
390                  const Vector& tuples)
391 mike     1.1 {
392                  FILE* f = fopen(fileName.c_str(), "w");
393              
394                  if (!f)
395                  {
396                      fprintf(stderr, "%s: failed to open: %s\n", arg0, fileName.c_str());
397                      exit(1);
398                  }
399              
400 krisbash 1.3     GenDoNotEditMessage(f);
401              
402                  fprintf(f, "#include \"%s.h\"\n", base.c_str());
403                  fprintf(f, "\n");
404 mike     1.1 
405                  /* file header */
406 krisbash 1.3     fprintf(f, "int HashStr(HASHSTR_CHAR c, const HASHSTR_CHAR* s, size_t n)\n");
407 mike     1.1     fprintf(f, "{\n");
408                  fprintf(f, "\n");
409                  fprintf(f, "    switch (n)\n");
410                  fprintf(f, "    {\n");
411                  /* create a map based on str len */
412              
413                  Map m;
414              
415 krisbash 1.3     // Conslidate tuples with like str length
416                  for (size_t i = 0; i < tuples.size(); i++)
417 mike     1.1     {
418 krisbash 1.3         size_t n = tuples[i].str.size();
419 mike     1.1 
420 krisbash 1.3         m.insert(MTuple(n, tuples[i]));   
421 mike     1.1     }
422              
423                  // generate cases for each length
424                  Map::const_iterator pos = m.begin();
425              
426                  while (pos != m.end())
427                  {
428                      unsigned int n = pos->first;
429              
430                      Map::const_iterator range_end = m.upper_bound(n);
431              
432                      fprintf(f, "    case %u:\n", n);
433              
434                      if (1 >= distance(pos, range_end))
435                      {
436                          // special case - only few elements in range
437                          while (pos != range_end)
438                          {
439                              _GenStringCmp(f, pos->second);
440                              pos++;
441                          }
442 mike     1.1 
443                      }
444                      else
445                      {
446                          _GenSubSwitchForRange(f, pos, range_end);
447                      }
448              
449                      //printf("n is %d, number of items is %d\n", n, distance(pos, range_end));
450                      fprintf(f, "    break;\n");
451              
452                      pos = m.lower_bound(n+1);
453                  }
454              
455              
456                  fprintf(f, "    }\n");
457                  fprintf(f, "    /* Not found */\n");
458                  fprintf(f, "    return 0;\n");
459                  fprintf(f, "}\n");
460              
461                  fclose(f);
462              
463 mike     1.1     printf("Created %s\n", fileName.c_str());
464              }
465              
466 krisbash 1.3 static void GenSmallHashStrFunction(
467                  const string& fileName, 
468                  const string& base,
469                  const Vector& tuples)
470 mike     1.1 {
471 krisbash 1.3     FILE* f = fopen(fileName.c_str(), "w");
472 mike     1.1 
473                  if (!f)
474                  {
475 krisbash 1.3         fprintf(stderr, "%s: failed to open: %s\n", arg0, fileName.c_str());
476 mike     1.1         exit(1);
477                  }
478              
479 krisbash 1.3     GenDoNotEditMessage(f);
480 mike     1.1 
481 krisbash 1.3     fprintf(f, "#include \"%s.h\"\n", base.c_str());
482 mike     1.1     fprintf(f, "\n");
483              
484 krisbash 1.3     Map m;
485 mike     1.1 
486 krisbash 1.3     // Conslidate tuples with like hash code:
487                  for (size_t i = 0; i < tuples.size(); i++)
488                  {
489                      const string& s = tuples[i].str;
490 mike     1.1 
491 krisbash 1.3         size_t n = tuples[i].str.size();
492 mike     1.1 
493 krisbash 1.3         unsigned char h = (unsigned char)s[0] ^ (unsigned char)s[n-1] ^ (unsigned char)n;
494 mike     1.1 
495 krisbash 1.3         m.insert(MTuple(h, tuples[i]));   
496                  }
497 mike     1.1 
498 krisbash 1.3     // generate cases for each length
499                  Map::const_iterator pos = m.begin();
500 mike     1.1 
501 krisbash 1.3     fprintf(f, "typedef struct _HashStrTuple\n");
502 mike     1.1     fprintf(f, "{\n");
503 krisbash 1.3     fprintf(f, "    unsigned char code;\n");
504                  fprintf(f, "    char ch;\n");
505                  fprintf(f, "    unsigned short tag;\n");
506                  fprintf(f, "    const HASHSTR_CHAR* str;\n");
507 mike     1.1     fprintf(f, "}\n");
508 krisbash 1.3     fprintf(f, "HashStrTuple;\n");
509 mike     1.1     fprintf(f, "\n");
510              
511 krisbash 1.3     fprintf(f, "static const HashStrTuple _tuples[] =\n");
512 mike     1.1     fprintf(f, "{\n");
513 krisbash 1.3 
514                  while (pos != m.end())
515                  {
516                      unsigned int h = pos->first;
517                      const Tuple& t = pos->second;
518              
519                      // Lookup tag id:
520              
521                      size_t tag = size_t(-1);
522              
523                      for (size_t i = 0; i < tuples.size(); i++)
524                      {
525                          if (tuples[i].tag == t.tag)
526                          {
527                              tag = i + 1;
528                          }
529                      }
530              
531                      if (tag == size_t(-1))
532                      {
533                          fprintf(stderr, "%s: tag not found: %s\n", arg0, t.tag.c_str());
534 krisbash 1.3             exit(1);
535                      }
536              
537                      fprintf(f, "    {\n");
538                      fprintf(f, "        0x%02X, /* code */\n", h);
539              
540                      if (t.ch == '0')
541                          fprintf(f, "        0, /* ch */\n");
542                      else
543                          fprintf(f, "        '%c', /* ch */\n", t.ch);
544              
545                      fprintf(f, "        %s,\n", t.tag.c_str());
546                      fprintf(f, "        HASHSTR_T(\"%s\")\n", t.str.c_str());
547                      fprintf(f, "    },\n");
548              
549                      pos++;
550                  }
551              
552                  fprintf(f, "    {\n");
553                  fprintf(f, "        0xFF, /* code */\n");
554                  fprintf(f, "    }\n");
555 krisbash 1.3 
556                  fprintf(f, "};\n");
557                  fprintf(f, "\n");
558                  fprintf(f, "static const size_t _ntuples = sizeof(_tuples) / sizeof(_tuples[0]) - 1;\n");
559 mike     1.1     fprintf(f, "\n");
560              
561 krisbash 1.3     /* file header */
562                  fprintf(f, "int HashStr(HASHSTR_CHAR c, const HASHSTR_CHAR* s, size_t n)\n");
563 mike     1.1     fprintf(f, "{\n");
564                  fprintf(f, "\n");
565 krisbash 1.3     fprintf(f, "    size_t i;\n");
566                  fprintf(f, "    size_t j;\n");
567                  fprintf(f, "    unsigned char h = (unsigned char)s[0] ^ (unsigned char)s[n-1] ^ (unsigned char)n;\n");
568                  fprintf(f, "\n");
569              
570                  fprintf(f, "    for (i = 0; h > _tuples[i].code; i++)\n");
571                  fprintf(f, "        ;\n");
572                  fprintf(f, "\n");
573                  fprintf(f, "    for (j = i; j < _ntuples && h == _tuples[j].code; j++)\n");
574                  fprintf(f, "    {\n");
575                  fprintf(f, "        if (c == _tuples[j].ch && HASHSTR_STRCMP(s, _tuples[j].str) == 0)\n");
576                  fprintf(f, "            return _tuples[j].tag;\n");
577                  fprintf(f, "    }\n");
578 mike     1.1 
579                  fprintf(f, "\n");
580              
581 krisbash 1.3     // ATTN:
582              
583                  fprintf(f, "    /* Not found */\n");
584                  fprintf(f, "    return 0;\n");
585                  fprintf(f, "}\n");
586 mike     1.1 
587                  fclose(f);
588 krisbash 1.3 
589                  printf("Created %s\n", fileName.c_str());
590 mike     1.1 }
591              
592              int main(int argc, char** argv)
593              {
594                  arg0 = argv[0];
595              
596                  if (argc < 2)
597                  {
598                      fprintf(stderr, "Usage: %s path\n", arg0);
599                      exit(1);
600                  }
601              
602 krisbash 1.3     Vector tuples;
603                  LoadSpecFile(argv[1], tuples);
604 mike     1.1 
605 krisbash 1.3 #if 0
606                  for (size_t i = 0; i < tuples.size(); i++)
607 mike     1.1     {
608 krisbash 1.3         const Tuple& t = tuples[i];
609                      printf("{%c}{%s}{%s}\n", t.ch, t.str.c_str(), t.tag.c_str());
610 mike     1.1     }
611              #endif
612              
613 krisbash 1.3     if (tuples.empty())
614 mike     1.1     {
615 krisbash 1.3         fprintf(stderr, "no data for building hash function\n");
616                      exit(1);
617 mike     1.1     }
618              
619 krisbash 1.3     // Determine the base filename:
620                  string base = BaseName(argv[1]);
621 mike     1.1 
622 krisbash 1.3     GenHashStrHeader(base + ".h", tuples);
623                  GenFastHashStrFunction(base + "_quick.inc", base, tuples);
624                  GenSmallHashStrFunction(base + "_small.inc", base, tuples);
625 mike     1.1 
626                  return 0;
627              }

ViewCVS 0.9.2