![]() ![]() |
![]() |
1 karl 1.18 //%2006//////////////////////////////////////////////////////////////////////// | ||
2 david 1.1 // | ||
3 karl 1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development 4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. 5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.; | ||
6 karl 1.4 // IBM Corp.; EMC Corporation, The Open Group. | ||
7 karl 1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; 8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group. 9 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; 10 // EMC Corporation; VERITAS Software Corporation; The Open Group. | ||
11 karl 1.18 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.; 12 // EMC Corporation; Symantec Corporation; The Open Group. | ||
13 david 1.1 // 14 // Permission is hereby granted, free of charge, to any person obtaining a copy 15 // of this software and associated documentation files (the "Software"), to 16 // deal in the Software without restriction, including without limitation the 17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 18 // sell copies of the Software, and to permit persons to whom the Software is 19 // furnished to do so, subject to the following conditions: | ||
20 karl 1.18 // | ||
21 david 1.1 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN 22 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT 24 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 29 // 30 //============================================================================== 31 // 32 // Author: Dave Rosckes (rosckes@us.ibm.com) 33 // | ||
34 david.dillard 1.12 // Modified By: David Dillard, VERITAS Software Corp. 35 // (david.dillard@veritas.com) | ||
36 yi.zhou 1.16 // Yi Zhou, Hewlett-Packard Company (yi.zhou@hp.com) | ||
37 david 1.1 // 38 //%///////////////////////////////////////////////////////////////////////////// 39 | ||
40 chuck 1.7 #include <Pegasus/Common/Config.h> 41 #include <Pegasus/Common/Array.h> | ||
42 yi.zhou 1.16 #include <Pegasus/Common/Logger.h> | ||
43 david 1.1 #include "CommonUTF.h" | ||
44 chuck 1.7 #include <cstdio> | ||
45 david 1.2 #include <cstring> | ||
46 david.dillard 1.15 #include <cctype> | ||
47 kumpf 1.3 | ||
48 yi.zhou 1.16 #ifdef PEGASUS_HAS_ICU 49 #include <unicode/uclean.h> 50 #endif 51 | ||
52 david 1.1 PEGASUS_NAMESPACE_BEGIN | ||
53 kumpf 1.3 | ||
54 karl 1.19 const Uint32 halfBase = 0x0010000UL; 55 const Uint32 halfMask = 0x3FFUL; 56 const int halfShift = 10; 57 const Uint8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; 58 59 const Uint32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 60 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; 61 62 const char trailingBytesForUTF8[256] = { 63 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 64 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 65 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 66 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 67 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 68 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 69 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 70 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 71 }; | ||
72 david.dillard 1.12 inline Uint8 _hexCharToNumeric(Char16 c) | ||
73 chuck 1.6 { 74 Uint8 n; 75 | ||
76 david.dillard 1.14 if (isdigit(c)) | ||
77 chuck 1.6 n = (c - '0'); | ||
78 david.dillard 1.14 else if (isupper(c)) | ||
79 chuck 1.6 n = (c - 'A' + 10); 80 else // if (islower(c)) 81 n = (c - 'a' + 10); 82 83 return n; 84 } 85 | ||
86 kumpf 1.3 // Note: Caller must ensure that "src" contains "size" bytes. | ||
87 chip 1.11 Boolean isValid_U8(const Uint8 *src, int size) | ||
88 david 1.1 { 89 Uint8 U8_char; 90 const Uint8 *srcptr = src+size; 91 switch (size) 92 { | ||
93 chip 1.11 case 4: 94 if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF) 95 { 96 return false; 97 } 98 case 3: 99 if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF) 100 { 101 return false; 102 } 103 case 2: 104 if ((U8_char = (*--srcptr)) > 0xBF) 105 { 106 return false; 107 } 108 switch (*src) 109 { 110 case 0xE0: 111 if (U8_char < 0xA0) 112 { 113 return false; 114 chip 1.11 } 115 break; 116 case 0xF0: 117 if (U8_char < 0x90) 118 { 119 return false; 120 } 121 break; 122 case 0xF4: 123 if (U8_char > 0x8F) 124 { 125 return false; 126 } 127 break; 128 default: 129 if (U8_char < 0x80) 130 { 131 return false; 132 } 133 } 134 case 1: 135 chip 1.11 if (*src >= 0x80 && *src < 0xC2) 136 { 137 return false; 138 } 139 if (*src > 0xF4) 140 { 141 return false; 142 } 143 break; | ||
144 david 1.2 default: | ||
145 chip 1.11 { 146 return false; | ||
147 david 1.2 } | ||
148 david 1.1 149 } 150 return true; | ||
151 chip 1.11 } | ||
152 david 1.1 153 int UTF16toUTF8(const Uint16** srcHead, | ||
154 chip 1.11 const Uint16* srcEnd, 155 Uint8** tgtHead, 156 Uint8* tgtEnd) | ||
157 david 1.1 { 158 int returnCode = 0; 159 const Uint16* src = *srcHead; 160 Uint8* tgt = *tgtHead; 161 while (src < srcEnd) 162 { | ||
163 mike 1.17 if (*src < 128) 164 { 165 if (tgt == tgtEnd) 166 { 167 returnCode = -1; 168 break; 169 } 170 171 *tgt++ = *src++; 172 continue; 173 } 174 | ||
175 chip 1.11 Uint32 tempchar; 176 Uint16 numberOfBytes = 0; 177 const Uint16* oldsrc = src; 178 tempchar = *src++; 179 if (tempchar >= FIRST_HIGH_SURROGATE 180 && tempchar <= LAST_HIGH_SURROGATE) 181 { 182 if (src < srcEnd) 183 { 184 Uint32 tempchar2 = *src; 185 if (tempchar2 >= FIRST_LOW_SURROGATE && 186 tempchar2 <= LAST_LOW_SURROGATE) 187 { 188 tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift) 189 + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase; 190 ++src; 191 } 192 } 193 else 194 { 195 --src; 196 chip 1.11 returnCode = -1; 197 break; 198 } 199 } 200 if (tempchar < (Uint32)0x80) 201 { 202 numberOfBytes = 1; 203 } 204 else if (tempchar < (Uint32)0x800) 205 { 206 numberOfBytes = 2; 207 } 208 else if (tempchar < (Uint32)0x10000) 209 { 210 numberOfBytes = 3; 211 } 212 else if (tempchar < (Uint32)0x200000) 213 { 214 numberOfBytes = 4; 215 } 216 else 217 chip 1.11 { 218 numberOfBytes = 2; 219 tempchar = REPLACEMENT_CHARACTER; 220 } 221 222 tgt += numberOfBytes; 223 if (tgt > tgtEnd) 224 { 225 src = oldsrc; 226 tgt -= numberOfBytes; 227 returnCode = -1; 228 break; 229 } 230 231 switch (numberOfBytes) 232 { 233 case 4: 234 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF); 235 tempchar >>= 6; 236 case 3: 237 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF); 238 chip 1.11 tempchar >>= 6; 239 case 2: 240 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF); 241 tempchar >>= 6; 242 case 1: 243 *--tgt = (Uint8)(tempchar | firstByteMark[numberOfBytes]); 244 } 245 tgt += numberOfBytes; | ||
246 david 1.1 } 247 *srcHead = src; 248 *tgtHead = tgt; 249 return returnCode; 250 } 251 252 int UTF8toUTF16 (const Uint8** srcHead, | ||
253 chip 1.11 const Uint8* srcEnd, 254 Uint16** tgtHead, 255 Uint16* tgtEnd) | ||
256 david 1.1 { 257 int returnCode = 0; 258 const Uint8* src = *srcHead; 259 Uint16* tgt = *tgtHead; 260 while (src < srcEnd) 261 { | ||
262 chip 1.11 Uint32 tempchar = 0; 263 Uint16 moreBytes = trailingBytesForUTF8[*src]; 264 if (src + moreBytes >= srcEnd) 265 { 266 returnCode = -1; 267 break; 268 } 269 switch (moreBytes) 270 { 271 case 3: 272 tempchar += *src++; 273 tempchar <<= 6; 274 case 2: 275 tempchar += *src++; 276 tempchar <<= 6; 277 case 1: 278 tempchar += *src++; 279 tempchar <<= 6; 280 case 0: 281 tempchar += *src++; 282 } 283 chip 1.11 tempchar -= offsetsFromUTF8[moreBytes]; 284 285 if (tgt >= tgtEnd) 286 { 287 src -= (moreBytes+1); 288 returnCode = -1; break; 289 } 290 if (tempchar <= MAX_BYTE) 291 { 292 if ((tempchar >= FIRST_HIGH_SURROGATE && 293 tempchar <= LAST_LOW_SURROGATE) || 294 ((tempchar & 0xFFFE) == 0xFFFE)) 295 { 296 *tgt++ = REPLACEMENT_CHARACTER; 297 } 298 else 299 { 300 *tgt++ = (Uint16)tempchar; 301 } 302 } 303 else if (tempchar > MAX_UTF16) 304 chip 1.11 { 305 *tgt++ = REPLACEMENT_CHARACTER; 306 } 307 else 308 { 309 if (tgt + 1 >= tgtEnd) 310 { 311 src -= (moreBytes+1); 312 returnCode = -1; 313 break; 314 } 315 tempchar -= halfBase; 316 *tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE); 317 *tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE); 318 } | ||
319 david 1.1 } 320 *srcHead = src; 321 *tgtHead = tgt; 322 return returnCode; 323 } | ||
324 david 1.5 | ||
325 mike 1.17 Boolean isUTF8Aux(const char *legal) | ||
326 david 1.5 { 327 char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1; 328 329 // Validate that the string is long enough to hold all the expected bytes. 330 // Note that if legal[0] == 0, numBytes will be 1. 331 for (char i=1; i<numBytes; i++) 332 { 333 if (legal[i] == 0) 334 { 335 return false; 336 } 337 } 338 339 return (isValid_U8((const Uint8 *)legal, numBytes)); 340 } | ||
341 chuck 1.6 | ||
342 chuck 1.9 Boolean isUTF8Str(const char *legal) 343 { | ||
344 chip 1.11 /*char tmp[] = {0xCE,0x99,0xCE,0xBF,0xCF,0x8D,0xCE,0xBD,0xCE, | ||
345 chuck 1.9 0xB9,0xCE,0xBA,0xCE,0xBF,0xCE,0xBD,0xCF,0x84, 346 0x00};*/ | ||
347 chip 1.11 // char tmp_[] = "class"; 348 // char * tmp = legal; | ||
349 david.dillard 1.13 size_t count = 0; 350 const size_t size = strlen(legal); | ||
351 chip 1.11 // printf("size = %d\n",size); | ||
352 david.dillard 1.13 while(count<size) 353 { | ||
354 chip 1.11 // printf("count = %d\n",count); | ||
355 david.dillard 1.13 if(isUTF8(&legal[count]) == true){ 356 UTF8_NEXT(legal,count); | ||
357 chip 1.11 }else{ 358 // printf("bad string\n"); 359 return false; 360 } | ||
361 david.dillard 1.13 } | ||
362 chip 1.11 // printf("good string\n"); 363 return true; | ||
364 chuck 1.9 /* | ||
365 chip 1.11 printf("legal = %s\n\n", legal); 366 Uint32 count = 0; 367 Uint32 trailingBytes = 0; | ||
368 chuck 1.9 Uint32 size = strlen(legal); | ||
369 chip 1.11 printf("size of legal is %d\n",size); | ||
370 chuck 1.9 while(count<size-1) 371 { | ||
372 chip 1.11 printf("count = %d\n", count); | ||
373 chuck 1.9 if(isUTF8((char*)&legal[count]) == true){ | ||
374 chip 1.11 UTF8_NEXT(legal,trailingBytes); 375 count += trailingBytes; 376 } else{ 377 printf("CommonUTF8:: returning false; position[%d]",count); 378 return false; 379 } | ||
380 chuck 1.9 } | ||
381 chip 1.11 printf("CommonUTF8:: returning false; position[%d]",count); 382 return true;*/ | ||
383 chuck 1.9 } | ||
384 chuck 1.6 385 String escapeStringEncoder(const String& Str) 386 { 387 String escapeStr; 388 Uint16 escChar; 389 char hexencoding[6]; | ||
390 chip 1.11 | ||
391 chuck 1.6 for(Uint32 i = 0; i < Str.size(); ++i) 392 { | ||
393 chip 1.11 escChar = Str[i]; 394 if(escChar <= 0x7F) | ||
395 chuck 1.6 { | ||
396 chip 1.11 escapeStr.append(escChar); | ||
397 chuck 1.6 } | ||
398 chip 1.11 else 399 { 400 memset(hexencoding,0x00,sizeof(hexencoding)); | ||
401 chuck 1.6 sprintf(hexencoding, "%%%03X%X", escChar/16, escChar%16); 402 escapeStr.append(hexencoding); | ||
403 chip 1.11 } | ||
404 chuck 1.6 } 405 return(escapeStr); 406 } 407 408 String escapeStringDecoder(const String& Str) 409 { 410 Uint32 i; 411 | ||
412 chip 1.11 Array<Uint16> utf16Chars; | ||
413 chuck 1.6 414 for (i=0; i< Str.size(); ++i) 415 { 416 if (Str[i] == '%') 417 { 418 Uint8 digit1 = _hexCharToNumeric((Str[++i])); 419 Uint8 digit2 = _hexCharToNumeric((Str[++i])); 420 Uint8 digit3 = _hexCharToNumeric((Str[++i])); 421 Uint8 digit4 = _hexCharToNumeric((Str[++i])); 422 | ||
423 chip 1.11 Uint16 decodedChar = (digit1<<12) + (digit2<<8) + | ||
424 chuck 1.6 (digit3<< 4) + (digit4); 425 | ||
426 chip 1.11 utf16Chars.append(decodedChar); | ||
427 chuck 1.6 } 428 else 429 { | ||
430 chip 1.11 utf16Chars.append((Uint16)Str[i]); | ||
431 chuck 1.6 } 432 } 433 434 // If there was a string to decode... 435 if (Str.size() > 0) 436 { 437 utf16Chars.append('\0'); 438 return String((Char16 *)utf16Chars.getData()); 439 } 440 else 441 { 442 return String(); 443 } 444 } 445 | ||
446 yi.zhou 1.16 #ifdef PEGASUS_HAS_ICU 447 448 Boolean InitializeICU::_initAttempted = false; 449 Boolean InitializeICU::_initSuccessful = false; 450 Mutex InitializeICU::_initMutex; 451 452 Boolean InitializeICU::initICUSuccessful() 453 { 454 if (!_initAttempted) 455 { 456 { 457 AutoMutex lock(_initMutex); 458 459 if (!_initAttempted) 460 { 461 UErrorCode _status = U_ZERO_ERROR; 462 463 // Initialize ICU 464 u_init(&_status); 465 466 if (U_FAILURE(_status)) 467 yi.zhou 1.16 { 468 _initSuccessful = false; 469 Logger::put (Logger::STANDARD_LOG , System::CIMSERVER, 470 Logger::WARNING, 471 "ICU initialization failed with error: $0.", 472 _status); 473 } 474 else 475 { 476 _initSuccessful = true; 477 } 478 _initAttempted = true; 479 } 480 } 481 } 482 483 return _initSuccessful; 484 } 485 486 #endif 487 | ||
488 david 1.1 PEGASUS_NAMESPACE_END |
No CVS admin address has been configured |
Powered by ViewCVS 0.9.2 |