/* **============================================================================== ** ** Open Management Infrastructure (OMI) ** ** Copyright (c) Microsoft Corporation ** ** Licensed under the Apache License, Version 2.0 (the "License"); you may not ** use this file except in compliance with the License. You may obtain a copy ** of the License at ** ** http://www.apache.org/licenses/LICENSE-2.0 ** ** THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY ** KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED ** WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, ** MERCHANTABLITY OR NON-INFRINGEMENT. ** ** See the Apache 2 License for the specific language governing permissions ** and limitations under the License. ** **============================================================================== */ #include #include #include #include #include #include #include #include #include using namespace std; const char* arg0; typedef pair Pair; typedef vector Vector; void LoadSpecFile(const char* path, Vector& pairs) { // Open input file: FILE* is = fopen(path, "r"); if (!is) { fprintf(stderr, "%s: failed to open: %s\n", arg0, path); exit(1); } // Read each line: char buf[1024]; unsigned int line = 1; for (; fgets(buf, sizeof(buf), is) != NULL; line++) { char* p; vector toks; char* str; char* tag; // Skip leading spaces: for (p = buf; isspace(*p); p++) ; // Skip comments: if (p[0] == '#') continue; // Remove trailing spaces: { char* end = p + strlen(buf); while (end != p && isspace(end[-1])) *--end = '\0'; } // Expect string { str = p; while (*p && *p != ',') p++; } // Skip spaces: while (isspace(*p)) p++; if (!*p) { char buf[32]; sprintf(buf, "TAG%u", line); pairs.push_back(Pair(str, buf)); continue; } // Expect ':' if (*p != ',') { fprintf(stderr, "%s: syntax error on line %u\n", arg0, line); exit(1); } *p++ = '\0'; // Skip spaces: while (isspace(*p)) p++; // Expect tag { tag = p; if (!isalpha(*p) && *p != '_') { fprintf(stderr, "%s: syntax error on line %u\n", arg0, line); exit(1); } while (isalnum(*p) || *p == '_') p++; } *p++ = '\0'; // Skip spaces: while (isspace(*p)) p++; if (*p) { fprintf(stderr, "%s: syntax error on line %u\n", arg0, line); exit(1); } pairs.push_back(Pair(str, tag)); } } static void _GenEnum(FILE* f, const Vector& pairs) { // Generate enumeration: { fprintf(f, "enum\n"); fprintf(f, "{\n"); for (size_t i = 0; i < pairs.size(); i++) { fprintf(f, " %s = %u%s\n", pairs[i].second.c_str(), (int)(i + 1), ((i+1) == pairs.size() ? "" : ",")); } fprintf(f, "};\n"); fprintf(f, "\n"); } } #if 0 static void GenSimpleFunction(const char* fileName, const Vector& pairs) { FILE* f = fopen(fileName, "w"); if (!f) { fprintf(stderr, "%s: failed to open: %s\n", arg0, fileName); exit(1); } //_GenEnum(f, pairs); /* header */ fprintf(f, "static int SimpleStr(const char* s, size_t n)\n"); fprintf(f, "{\n"); /* generate function body */ fprintf(f, "n=n;\n "); for (size_t i = 0; i < pairs.size(); i++) { const string& str = pairs[i].first.c_str(); const string& tok = pairs[i].second.c_str(); fprintf(f, " if (strcmp(s, \"%s\") == 0)\n", str.c_str()); fprintf(f, " "); fprintf(f, "return %s;\n", tok.c_str()); fprintf(f, " else"); } fprintf(f, "\n return 0; /* not found */\n"); /* close the function */ //fprintf(f, "\n"); fprintf(f, "}\n\n"); fclose(f); } #endif #if 0 static void GenHashFunction(const char* fileName, const Vector& pairs) { FILE* f = fopen(fileName, "w"); if (!f) { fprintf(stderr, "%s: failed to open: %s\n", arg0, fileName); exit(1); } //_GenEnum(f, pairs); // Generate function { typedef pair MPair; typedef map Map; Map m; fprintf(f, "int HashStr(const char* s, size_t n)\n"); fprintf(f, "{\n"); fprintf(f, " unsigned int h = (unsigned int)(n ^ s[0] ^ s[n-1]);\n"); fprintf(f, "\n"); fprintf(f, " switch (h)\n"); fprintf(f, " {\n"); // Conslidate pairs with like hash codes for (size_t i = 0; i < pairs.size(); i++) { const char* s = pairs[i].first.c_str(); size_t n = pairs[i].first.size(); unsigned int h = n ^ s[0] ^ s[n-1]; Map::iterator pos = m.find(h); if (pos == m.end()) { Vector v; v.push_back(pairs[i]); m.insert(MPair(h,v)); } else { (*pos).second.push_back(pairs[i]); } } // Print cases: for (Map::iterator p = m.begin(); p != m.end(); p++) { const Vector& v = (*p).second; fprintf(f, " case %u:\n", (*p).first); for (size_t i = 0; i < v.size(); i++) { const string& str = v[i].first.c_str(); const string& tok = v[i].second.c_str(); fprintf(f, " "); fprintf(f, "if (n == %u && memcmp(s, \"%s\", n) == 0)\n", (int)str.size(), str.c_str()); fprintf(f, " "); fprintf(f, "return %s;\n", tok.c_str()); } fprintf(f, " break;\n"); } fprintf(f, " }\n"); fprintf(f, " /* Not found */\n"); fprintf(f, " return 0;\n"); fprintf(f, "}\n"); } fclose(f); } #endif typedef multimap Map; typedef pair MPair; static void _GenStringCmp(FILE* f, const Pair& p) { const string& str = p.first.c_str(); const string& tok = p.second.c_str(); fprintf(f, " "); fprintf(f, "if (strcmp(s, \"%s\") == 0)\n", str.c_str()); fprintf(f, " "); fprintf(f, "return %s;\n", tok.c_str()); } static void _GenSubSwitchForRange( FILE* f, Map::const_iterator pos, Map::const_iterator range_end) { // find best character with min. number of collisions unsigned int bestIndex = 0; unsigned int bestCollision = 0xFFFFFFFF; unsigned int n = pos->first; for ( unsigned int index = 0; index < n; index++ ) { vector collisions(256, 0); unsigned int currentMax = 0; for ( Map::const_iterator it = pos; it != range_end; it++ ) { unsigned char c = it->second.first[index]; collisions[c]++; if (collisions[c] > currentMax) currentMax = collisions[c]; } #if 0 printf("n is %d, index %d, currentMax %d, best %d\n", n, index, currentMax, bestCollision); #endif if (bestCollision > currentMax) { bestCollision = currentMax; bestIndex = index; } if (1 == bestCollision) break; } #if 0 printf("n is %d, best index %d, best %d\n", n, bestIndex, bestCollision); #endif // build a submap based on best index value Map subMap; for ( Map::const_iterator it = pos; it != range_end; it++ ) { unsigned char c = it->second.first[bestIndex]; subMap.insert(MPair(c, it->second)); } fprintf(f, " switch (s[%u])\n", bestIndex); fprintf(f, " {\n"); // process submap // generate cases for each length Map::const_iterator posSubMap = subMap.begin(); while (posSubMap != subMap.end()) { unsigned int n = posSubMap->first; Map::const_iterator range_end = subMap.upper_bound(n); fprintf(f, " case %u:\n", n); while (posSubMap != range_end) { _GenStringCmp(f, posSubMap->second); posSubMap++; } //printf("n is %d, number of items is %d\n", n, distance(posSubMap, range_end)); fprintf(f, " break;\n"); posSubMap = subMap.lower_bound(n+1); } fprintf(f, " }\n"); fprintf(f, "\n"); } static void GenHashStrHeader( const string& fileName, const Vector& pairs) { FILE* f = fopen(fileName.c_str(), "w"); if (!f) { fprintf(stderr, "%s: failed to open: %s\n", arg0, fileName.c_str()); exit(1); } _GenEnum(f, pairs); /* file header */ fprintf(f, "int HashStr(const char* s, size_t n);\n"); fprintf(f, "\n"); printf("Created %s\n", fileName.c_str()); } static void GenHashStrFunction( const string& fileName, const Vector& pairs) { FILE* f = fopen(fileName.c_str(), "w"); if (!f) { fprintf(stderr, "%s: failed to open: %s\n", arg0, fileName.c_str()); exit(1); } //_GenEnum(f, pairs); /* file header */ fprintf(f, "int HashStr(const char* s, size_t n)\n"); fprintf(f, "{\n"); fprintf(f, "\n"); fprintf(f, " switch (n)\n"); fprintf(f, " {\n"); /* create a map based on str len */ Map m; // Conslidate pairs with like str length for (size_t i = 0; i < pairs.size(); i++) { size_t n = pairs[i].first.size(); m.insert(MPair(n, pairs[i])); } // generate cases for each length Map::const_iterator pos = m.begin(); while (pos != m.end()) { unsigned int n = pos->first; Map::const_iterator range_end = m.upper_bound(n); fprintf(f, " case %u:\n", n); if (1 >= distance(pos, range_end)) { // special case - only few elements in range while (pos != range_end) { _GenStringCmp(f, pos->second); pos++; } } else { _GenSubSwitchForRange(f, pos, range_end); } //printf("n is %d, number of items is %d\n", n, distance(pos, range_end)); fprintf(f, " break;\n"); pos = m.lower_bound(n+1); } fprintf(f, " }\n"); fprintf(f, " /* Not found */\n"); fprintf(f, " return 0;\n"); fprintf(f, "}\n"); fclose(f); printf("Created %s\n", fileName.c_str()); } #if 0 static void GenUnittest(const char* fileName, const Vector& pairs) { FILE* f = fopen(fileName, "w"); if (!f) { fprintf(stderr, "%s: failed to open: %s\n", arg0, fileName); exit(1); } /* file header */ fprintf(f, "#include \n"); fprintf(f, "#include \n"); fprintf(f, "#include \"../quickstr.h\"\n"); fprintf(f, "#include \"../quickstr.inc\"\n"); fprintf(f, "#include \"hash.inc\"\n"); fprintf(f, "#include \"simple.inc\"\n"); fprintf(f, "\n"); fprintf(f, "static void setUp()\n{\n}\n"); fprintf(f, "static void cleanup()\n{\n}\n"); fprintf(f, "typedef int (*FN)(const char* s, size_t n);\n"); fprintf(f, "static void testAllStrings(FN f)\n{\n"); /* generate all tests */ for (size_t i = 0; i < pairs.size(); i++) { const string& str = pairs[i].first.c_str(); const string& tok = pairs[i].second.c_str(); fprintf(f, " UT_ASSERT((*f)(\"%s\", %u) == %s);\n", str.c_str(), (int)str.size(), tok.c_str() ); } /* add comparison with not-exisitng strings */ fprintf(f, " UT_ASSERT((*f)(\"*\", 1) == 0);\n") ; fprintf(f, " UT_ASSERT((*f)(\"**\", 2) == 0);\n") ; fprintf(f, " UT_ASSERT((*f)(\"***\", 3) == 0);\n") ; fprintf(f, " UT_ASSERT((*f)(\"****\", 4) == 0);\n") ; fprintf(f, " UT_ASSERT((*f)(\"*****\", 5) == 0);\n") ; fprintf(f, " UT_ASSERT((*f)(\"******\", 6) == 0);\n") ; /* Generate end of unittest file */ fprintf(f, "}\n\n"); fprintf(f, "static void TestSimple()\n"); fprintf(f, "{\n"); fprintf(f, " for(int i=0; i< 10000; i++) testAllStrings(SimpleStr);\n"); fprintf(f, "}\n"); fprintf(f, "\n"); fprintf(f, "static void TestHash()\n"); fprintf(f, "{\n"); fprintf(f, " for(int i=0; i< 10000; i++) testAllStrings(HashStr);\n"); fprintf(f, "}\n"); fprintf(f, "\n"); fprintf(f, "static void TestHashStr()\n"); fprintf(f, "{\n"); fprintf(f, " for(int i=0; i< 10000; i++) testAllStrings(HashStr);\n"); fprintf(f, "}\n"); fprintf(f, "\n"); fprintf(f, "static void RunTests()\n"); fprintf(f, "{\n"); fprintf(f, " UT_TEST(TestSimple);\n"); fprintf(f, " UT_TEST(TestHash);\n"); fprintf(f, " UT_TEST(TestHashStr);\n"); fprintf(f, "}\n"); fprintf(f, "\n"); fprintf(f, "UT_ENTRY_POINT(RunTests);\n"); fclose(f); } #endif int main(int argc, char** argv) { arg0 = argv[0]; if (argc < 2) { fprintf(stderr, "Usage: %s path\n", arg0); exit(1); } Vector pairs; LoadSpecFile(argv[1], pairs); if (pairs.empty()) { fprintf(stderr, "no data for building hash function\n"); exit(1); } #if 0 GenSimpleFunction("simple.inc", pairs); GenHashFunction("hash.inc", pairs); #endif // Determine the base filename: string base = argv[1]; { size_t pos = base.rfind('/'); if (pos != string::npos) base = base.substr(pos + 1); pos = base.rfind('.'); if (pos != string::npos) base = base.substr(0, pos); } GenHashStrHeader(base + ".h", pairs); GenHashStrFunction(base + ".inc", pairs); #if 0 GenUnittest("tests/test_strhash.cpp", pairs); #endif return 0; }