1 mike 1.1 /*
2 **==============================================================================
3 **
4 ** Open Management Infrastructure (OMI)
5 **
6 ** Copyright (c) Microsoft Corporation
7 **
8 ** Licensed under the Apache License, Version 2.0 (the "License"); you may not
9 ** use this file except in compliance with the License. You may obtain a copy
10 ** of the License at
11 **
12 ** http://www.apache.org/licenses/LICENSE-2.0
13 **
14 ** THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 ** KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
16 ** WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
17 ** MERCHANTABLITY OR NON-INFRINGEMENT.
18 **
19 ** See the Apache 2 License for the specific language governing permissions
20 ** and limitations under the License.
21 **
22 mike 1.1 **==============================================================================
23 */
24
25 #include <iostream>
26 #include <cctype>
27 #include <vector>
28 #include <string>
29 #include <cstdio>
30 #include <cctype>
31 #include <cstdlib>
32 #include <cstring>
33 #include <map>
34
35 using namespace std;
36 const char* arg0;
37
38 typedef pair<string, string> Pair;
39 typedef vector<Pair> Vector;
40
41 void LoadSpecFile(const char* path, Vector& pairs)
42 {
43 mike 1.1 // Open input file:
44 FILE* is = fopen(path, "r");
45
46 if (!is)
47 {
48 fprintf(stderr, "%s: failed to open: %s\n", arg0, path);
49 exit(1);
50 }
51
52 // Read each line:
53 char buf[1024];
54 unsigned int line = 1;
55
56 for (; fgets(buf, sizeof(buf), is) != NULL; line++)
57 {
58 char* p;
59 vector<string> toks;
60 char* str;
61 char* tag;
62
63 // Skip leading spaces:
64 mike 1.1 for (p = buf; isspace(*p); p++)
65 ;
66
67 // Skip comments:
68 if (p[0] == '#')
69 continue;
70
71 // Remove trailing spaces:
72 {
73 char* end = p + strlen(buf);
74 while (end != p && isspace(end[-1]))
75 *--end = '\0';
76 }
77
78 // Expect string
79 {
80 str = p;
81
82 while (*p && *p != ',')
83 p++;
84 }
85 mike 1.1
86 // Skip spaces:
87 while (isspace(*p))
88 p++;
89
90 if (!*p)
91 {
92 char buf[32];
93 sprintf(buf, "TAG%u", line);
94 pairs.push_back(Pair(str, buf));
95 continue;
96 }
97
98 // Expect ':'
99 if (*p != ',')
100 {
101 fprintf(stderr, "%s: syntax error on line %u\n", arg0, line);
102 exit(1);
103 }
104
105 *p++ = '\0';
106 mike 1.1
107 // Skip spaces:
108 while (isspace(*p))
109 p++;
110
111 // Expect tag
112 {
113 tag = p;
114
115 if (!isalpha(*p) && *p != '_')
116 {
117 fprintf(stderr, "%s: syntax error on line %u\n", arg0, line);
118 exit(1);
119 }
120
121 while (isalnum(*p) || *p == '_')
122 p++;
123 }
124
125 *p++ = '\0';
126
127 mike 1.1 // Skip spaces:
128 while (isspace(*p))
129 p++;
130
131 if (*p)
132 {
133 fprintf(stderr, "%s: syntax error on line %u\n", arg0, line);
134 exit(1);
135 }
136
137 pairs.push_back(Pair(str, tag));
138 }
139 }
140
141 static void _GenEnum(FILE* f, const Vector& pairs)
142 {
143 // Generate enumeration:
144 {
145 fprintf(f, "enum\n");
146 fprintf(f, "{\n");
147
148 mike 1.1 for (size_t i = 0; i < pairs.size(); i++)
149 {
150 fprintf(f, " %s = %u%s\n", pairs[i].second.c_str(), (int)(i + 1), ((i+1) == pairs.size() ? "" : ","));
151 }
152
153 fprintf(f, "};\n");
154 fprintf(f, "\n");
155 }
156 }
157
158 #if 0
159 static void GenSimpleFunction(const char* fileName, const Vector& pairs)
160 {
161 FILE* f = fopen(fileName, "w");
162
163 if (!f)
164 {
165 fprintf(stderr, "%s: failed to open: %s\n", arg0, fileName);
166 exit(1);
167 }
168
169 mike 1.1
170 //_GenEnum(f, pairs);
171
172 /* header */
173 fprintf(f, "static int SimpleStr(const char* s, size_t n)\n");
174 fprintf(f, "{\n");
175
176 /* generate function body */
177 fprintf(f, "n=n;\n ");
178 for (size_t i = 0; i < pairs.size(); i++)
179 {
180 const string& str = pairs[i].first.c_str();
181 const string& tok = pairs[i].second.c_str();
182
183 fprintf(f, " if (strcmp(s, \"%s\") == 0)\n",
184 str.c_str());
185
186 fprintf(f, " ");
187 fprintf(f, "return %s;\n", tok.c_str());
188 fprintf(f, " else");
189 }
190 mike 1.1
191 fprintf(f, "\n return 0; /* not found */\n");
192
193 /* close the function */
194 //fprintf(f, "\n");
195 fprintf(f, "}\n\n");
196
197
198 fclose(f);
199 }
200 #endif
201
202 #if 0
203 static void GenHashFunction(const char* fileName, const Vector& pairs)
204 {
205 FILE* f = fopen(fileName, "w");
206
207 if (!f)
208 {
209 fprintf(stderr, "%s: failed to open: %s\n", arg0, fileName);
210 exit(1);
211 mike 1.1 }
212
213
214 //_GenEnum(f, pairs);
215
216 // Generate function
217 {
218 typedef pair<unsigned int, Vector> MPair;
219 typedef map<unsigned int, Vector> Map;
220 Map m;
221
222 fprintf(f, "int HashStr(const char* s, size_t n)\n");
223 fprintf(f, "{\n");
224 fprintf(f, " unsigned int h = (unsigned int)(n ^ s[0] ^ s[n-1]);\n");
225 fprintf(f, "\n");
226 fprintf(f, " switch (h)\n");
227 fprintf(f, " {\n");
228
229 // Conslidate pairs with like hash codes
230 for (size_t i = 0; i < pairs.size(); i++)
231 {
232 mike 1.1 const char* s = pairs[i].first.c_str();
233 size_t n = pairs[i].first.size();
234 unsigned int h = n ^ s[0] ^ s[n-1];
235
236 Map::iterator pos = m.find(h);
237
238 if (pos == m.end())
239 {
240 Vector v;
241 v.push_back(pairs[i]);
242 m.insert(MPair(h,v));
243 }
244 else
245 {
246 (*pos).second.push_back(pairs[i]);
247 }
248 }
249
250 // Print cases:
251 for (Map::iterator p = m.begin(); p != m.end(); p++)
252 {
253 mike 1.1 const Vector& v = (*p).second;
254
255 fprintf(f, " case %u:\n", (*p).first);
256
257 for (size_t i = 0; i < v.size(); i++)
258 {
259 const string& str = v[i].first.c_str();
260 const string& tok = v[i].second.c_str();
261
262 fprintf(f, " ");
263 fprintf(f, "if (n == %u && memcmp(s, \"%s\", n) == 0)\n",
264 (int)str.size(), str.c_str());
265
266 fprintf(f, " ");
267 fprintf(f, "return %s;\n", tok.c_str());
268 }
269
270 fprintf(f, " break;\n");
271 }
272
273 fprintf(f, " }\n");
274 mike 1.1 fprintf(f, " /* Not found */\n");
275 fprintf(f, " return 0;\n");
276 fprintf(f, "}\n");
277 }
278 fclose(f);
279 }
280 #endif
281
282 typedef multimap<unsigned int, Pair> Map;
283 typedef pair<unsigned int, Pair> MPair;
284 static void _GenStringCmp(FILE* f, const Pair& p)
285 {
286 const string& str = p.first.c_str();
287 const string& tok = p.second.c_str();
288
289 fprintf(f, " ");
290 fprintf(f, "if (strcmp(s, \"%s\") == 0)\n",
291 str.c_str());
292
293 fprintf(f, " ");
294 fprintf(f, "return %s;\n", tok.c_str());
295 mike 1.1 }
296
297
298 static void _GenSubSwitchForRange(
299 FILE* f,
300 Map::const_iterator pos,
301 Map::const_iterator range_end)
302 {
303 // find best character with min. number of collisions
304 unsigned int bestIndex = 0;
305 unsigned int bestCollision = 0xFFFFFFFF;
306 unsigned int n = pos->first;
307
308 for ( unsigned int index = 0; index < n; index++ )
309 {
310 vector<unsigned int> collisions(256, 0);
311 unsigned int currentMax = 0;
312
313
314 for ( Map::const_iterator it = pos; it != range_end; it++ )
315 {
316 mike 1.1 unsigned char c = it->second.first[index];
317 collisions[c]++;
318 if (collisions[c] > currentMax)
319 currentMax = collisions[c];
320 }
321
322 #if 0
323 printf("n is %d, index %d, currentMax %d, best %d\n",
324 n, index, currentMax, bestCollision);
325 #endif
326
327 if (bestCollision > currentMax)
328 {
329 bestCollision = currentMax;
330 bestIndex = index;
331 }
332
333 if (1 == bestCollision)
334 break;
335 }
336
337 mike 1.1
338 #if 0
339 printf("n is %d, best index %d, best %d\n",
340 n, bestIndex, bestCollision);
341 #endif
342
343 // build a submap based on best index value
344 Map subMap;
345
346 for ( Map::const_iterator it = pos; it != range_end; it++ )
347 {
348 unsigned char c = it->second.first[bestIndex];
349
350 subMap.insert(MPair(c, it->second));
351 }
352
353 fprintf(f, " switch (s[%u])\n", bestIndex);
354 fprintf(f, " {\n");
355
356 // process submap
357 // generate cases for each length
358 mike 1.1 Map::const_iterator posSubMap = subMap.begin();
359
360 while (posSubMap != subMap.end())
361 {
362 unsigned int n = posSubMap->first;
363
364 Map::const_iterator range_end = subMap.upper_bound(n);
365
366 fprintf(f, " case %u:\n", n);
367
368 while (posSubMap != range_end)
369 {
370 _GenStringCmp(f, posSubMap->second);
371 posSubMap++;
372 }
373
374 //printf("n is %d, number of items is %d\n", n, distance(posSubMap, range_end));
375 fprintf(f, " break;\n");
376
377 posSubMap = subMap.lower_bound(n+1);
378 }
379 mike 1.1
380 fprintf(f, " }\n");
381 fprintf(f, "\n");
382
383 }
384
385 static void GenHashStrHeader(
386 const string& fileName,
387 const Vector& pairs)
388 {
389 FILE* f = fopen(fileName.c_str(), "w");
390
391 if (!f)
392 {
393 fprintf(stderr, "%s: failed to open: %s\n", arg0, fileName.c_str());
394 exit(1);
395 }
396
397 _GenEnum(f, pairs);
398
399 /* file header */
400 mike 1.1 fprintf(f, "int HashStr(const char* s, size_t n);\n");
401 fprintf(f, "\n");
402
403 printf("Created %s\n", fileName.c_str());
404 }
405
406 static void GenHashStrFunction(
407 const string& fileName,
408 const Vector& pairs)
409 {
410 FILE* f = fopen(fileName.c_str(), "w");
411
412 if (!f)
413 {
414 fprintf(stderr, "%s: failed to open: %s\n", arg0, fileName.c_str());
415 exit(1);
416 }
417
418 //_GenEnum(f, pairs);
419
420 /* file header */
421 mike 1.1 fprintf(f, "int HashStr(const char* s, size_t n)\n");
422 fprintf(f, "{\n");
423 fprintf(f, "\n");
424 fprintf(f, " switch (n)\n");
425 fprintf(f, " {\n");
426 /* create a map based on str len */
427
428 Map m;
429
430 // Conslidate pairs with like str length
431 for (size_t i = 0; i < pairs.size(); i++)
432 {
433 size_t n = pairs[i].first.size();
434
435 m.insert(MPair(n, pairs[i]));
436 }
437
438
439 // generate cases for each length
440 Map::const_iterator pos = m.begin();
441
442 mike 1.1 while (pos != m.end())
443 {
444 unsigned int n = pos->first;
445
446 Map::const_iterator range_end = m.upper_bound(n);
447
448 fprintf(f, " case %u:\n", n);
449
450 if (1 >= distance(pos, range_end))
451 {
452 // special case - only few elements in range
453 while (pos != range_end)
454 {
455 _GenStringCmp(f, pos->second);
456 pos++;
457 }
458
459 }
460 else
461 {
462 _GenSubSwitchForRange(f, pos, range_end);
463 mike 1.1 }
464
465 //printf("n is %d, number of items is %d\n", n, distance(pos, range_end));
466 fprintf(f, " break;\n");
467
468 pos = m.lower_bound(n+1);
469 }
470
471
472 fprintf(f, " }\n");
473 fprintf(f, " /* Not found */\n");
474 fprintf(f, " return 0;\n");
475 fprintf(f, "}\n");
476
477 fclose(f);
478
479 printf("Created %s\n", fileName.c_str());
480 }
481
482 #if 0
483 static void GenUnittest(const char* fileName, const Vector& pairs)
484 mike 1.1 {
485 FILE* f = fopen(fileName, "w");
486
487 if (!f)
488 {
489 fprintf(stderr, "%s: failed to open: %s\n", arg0, fileName);
490 exit(1);
491 }
492
493 /* file header */
494 fprintf(f, "#include <ut/ut.h>\n");
495 fprintf(f, "#include <string.h>\n");
496 fprintf(f, "#include \"../quickstr.h\"\n");
497 fprintf(f, "#include \"../quickstr.inc\"\n");
498 fprintf(f, "#include \"hash.inc\"\n");
499 fprintf(f, "#include \"simple.inc\"\n");
500
501 fprintf(f, "\n");
502 fprintf(f, "static void setUp()\n{\n}\n");
503 fprintf(f, "static void cleanup()\n{\n}\n");
504
505 mike 1.1 fprintf(f, "typedef int (*FN)(const char* s, size_t n);\n");
506
507 fprintf(f, "static void testAllStrings(FN f)\n{\n");
508
509 /* generate all tests */
510 for (size_t i = 0; i < pairs.size(); i++)
511 {
512 const string& str = pairs[i].first.c_str();
513 const string& tok = pairs[i].second.c_str();
514
515 fprintf(f, " UT_ASSERT((*f)(\"%s\", %u) == %s);\n",
516 str.c_str(), (int)str.size(), tok.c_str() );
517
518 }
519
520 /* add comparison with not-exisitng strings */
521 fprintf(f, " UT_ASSERT((*f)(\"*\", 1) == 0);\n") ;
522 fprintf(f, " UT_ASSERT((*f)(\"**\", 2) == 0);\n") ;
523 fprintf(f, " UT_ASSERT((*f)(\"***\", 3) == 0);\n") ;
524 fprintf(f, " UT_ASSERT((*f)(\"****\", 4) == 0);\n") ;
525 fprintf(f, " UT_ASSERT((*f)(\"*****\", 5) == 0);\n") ;
526 mike 1.1 fprintf(f, " UT_ASSERT((*f)(\"******\", 6) == 0);\n") ;
527
528
529 /* Generate end of unittest file */
530 fprintf(f, "}\n\n");
531 fprintf(f, "static void TestSimple()\n");
532 fprintf(f, "{\n");
533 fprintf(f, " for(int i=0; i< 10000; i++) testAllStrings(SimpleStr);\n");
534 fprintf(f, "}\n");
535 fprintf(f, "\n");
536
537 fprintf(f, "static void TestHash()\n");
538 fprintf(f, "{\n");
539 fprintf(f, " for(int i=0; i< 10000; i++) testAllStrings(HashStr);\n");
540 fprintf(f, "}\n");
541 fprintf(f, "\n");
542
543 fprintf(f, "static void TestHashStr()\n");
544 fprintf(f, "{\n");
545 fprintf(f, " for(int i=0; i< 10000; i++) testAllStrings(HashStr);\n");
546 fprintf(f, "}\n");
547 mike 1.1 fprintf(f, "\n");
548
549 fprintf(f, "static void RunTests()\n");
550 fprintf(f, "{\n");
551 fprintf(f, " UT_TEST(TestSimple);\n");
552 fprintf(f, " UT_TEST(TestHash);\n");
553 fprintf(f, " UT_TEST(TestHashStr);\n");
554 fprintf(f, "}\n");
555 fprintf(f, "\n");
556 fprintf(f, "UT_ENTRY_POINT(RunTests);\n");
557
558
559 fclose(f);
560 }
561 #endif
562
563 int main(int argc, char** argv)
564 {
565 arg0 = argv[0];
566
567 if (argc < 2)
568 mike 1.1 {
569 fprintf(stderr, "Usage: %s path\n", arg0);
570 exit(1);
571 }
572
573 Vector pairs;
574 LoadSpecFile(argv[1], pairs);
575
576 if (pairs.empty())
577 {
578 fprintf(stderr, "no data for building hash function\n");
579 exit(1);
580 }
581
582 #if 0
583 GenSimpleFunction("simple.inc", pairs);
584 GenHashFunction("hash.inc", pairs);
585 #endif
586
587 // Determine the base filename:
588 string base = argv[1];
589 mike 1.1 {
590 size_t pos = base.rfind('/');
591
592 if (pos != string::npos)
593 base = base.substr(pos + 1);
594
595 pos = base.rfind('.');
596
597 if (pos != string::npos)
598 base = base.substr(0, pos);
599 }
600
601 GenHashStrHeader(base + ".h", pairs);
602 GenHashStrFunction(base + ".inc", pairs);
603
604 #if 0
605 GenUnittest("tests/test_strhash.cpp", pairs);
606 #endif
607
608 return 0;
609 }
|