![]() ![]() |
![]() |
1 karl 1.18 //%2006//////////////////////////////////////////////////////////////////////// | ||
2 david 1.1 // | ||
3 karl 1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development 4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. 5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.; | ||
6 karl 1.4 // IBM Corp.; EMC Corporation, The Open Group. | ||
7 karl 1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; 8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group. 9 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; 10 // EMC Corporation; VERITAS Software Corporation; The Open Group. | ||
11 karl 1.18 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.; 12 // EMC Corporation; Symantec Corporation; The Open Group. | ||
13 david 1.1 // 14 // Permission is hereby granted, free of charge, to any person obtaining a copy 15 // of this software and associated documentation files (the "Software"), to 16 // deal in the Software without restriction, including without limitation the 17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 18 // sell copies of the Software, and to permit persons to whom the Software is 19 // furnished to do so, subject to the following conditions: | ||
20 karl 1.18 // | ||
21 david 1.1 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN 22 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT 24 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 29 // | ||
30 kamal.locahana 1.22 //============================================================================= | ||
31 david 1.1 // | ||
32 kamal.locahana 1.22 //%//////////////////////////////////////////////////////////////////////////// | ||
33 david 1.1 | ||
34 chuck 1.7 #include <Pegasus/Common/Config.h> 35 #include <Pegasus/Common/Array.h> | ||
36 yi.zhou 1.16 #include <Pegasus/Common/Logger.h> | ||
37 david 1.1 #include "CommonUTF.h" | ||
38 chuck 1.7 #include <cstdio> | ||
39 david 1.2 #include <cstring> | ||
40 david.dillard 1.15 #include <cctype> | ||
41 kumpf 1.3 | ||
42 yi.zhou 1.16 #ifdef PEGASUS_HAS_ICU 43 #include <unicode/uclean.h> 44 #endif 45 | ||
46 david 1.1 PEGASUS_NAMESPACE_BEGIN | ||
47 kumpf 1.3 | ||
48 karl 1.19 const Uint32 halfBase = 0x0010000UL; 49 const Uint32 halfMask = 0x3FFUL; 50 const int halfShift = 10; 51 const Uint8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; 52 53 const Uint32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 54 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; 55 56 const char trailingBytesForUTF8[256] = { 57 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 58 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 59 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 60 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 61 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 62 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 63 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 64 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 65 }; | ||
66 david.dillard 1.12 inline Uint8 _hexCharToNumeric(Char16 c) | ||
67 chuck 1.6 { 68 Uint8 n; 69 | ||
70 david.dillard 1.14 if (isdigit(c)) | ||
71 chuck 1.6 n = (c - '0'); | ||
72 david.dillard 1.14 else if (isupper(c)) | ||
73 chuck 1.6 n = (c - 'A' + 10); 74 else // if (islower(c)) 75 n = (c - 'a' + 10); 76 77 return n; 78 } 79 | ||
80 kumpf 1.3 // Note: Caller must ensure that "src" contains "size" bytes. | ||
81 chip 1.11 Boolean isValid_U8(const Uint8 *src, int size) | ||
82 david 1.1 { 83 Uint8 U8_char; 84 const Uint8 *srcptr = src+size; 85 switch (size) 86 { | ||
87 chip 1.11 case 4: 88 if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF) 89 { 90 return false; 91 } 92 case 3: 93 if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF) 94 { 95 return false; 96 } 97 case 2: 98 if ((U8_char = (*--srcptr)) > 0xBF) 99 { 100 return false; 101 } 102 switch (*src) 103 { 104 case 0xE0: 105 if (U8_char < 0xA0) 106 { 107 return false; 108 chip 1.11 } 109 break; 110 case 0xF0: 111 if (U8_char < 0x90) 112 { 113 return false; 114 } 115 break; 116 case 0xF4: 117 if (U8_char > 0x8F) 118 { 119 return false; 120 } 121 break; 122 default: 123 if (U8_char < 0x80) 124 { 125 return false; 126 } 127 } 128 case 1: 129 chip 1.11 if (*src >= 0x80 && *src < 0xC2) 130 { 131 return false; 132 } 133 if (*src > 0xF4) 134 { 135 return false; 136 } 137 break; | ||
138 david 1.2 default: | ||
139 chip 1.11 { 140 return false; | ||
141 david 1.2 } | ||
142 david 1.1 143 } 144 return true; | ||
145 chip 1.11 } | ||
146 david 1.1 147 int UTF16toUTF8(const Uint16** srcHead, | ||
148 chip 1.11 const Uint16* srcEnd, 149 Uint8** tgtHead, 150 Uint8* tgtEnd) | ||
151 david 1.1 { 152 int returnCode = 0; 153 const Uint16* src = *srcHead; 154 Uint8* tgt = *tgtHead; 155 while (src < srcEnd) 156 { | ||
157 kumpf 1.20 if (*src < 128) 158 { 159 if (tgt == tgtEnd) 160 { 161 returnCode = -1; 162 break; 163 } 164 | ||
165 kamal.locahana 1.22 *tgt++ = (Uint8)*src++; | ||
166 kumpf 1.20 continue; 167 } | ||
168 mike 1.17 | ||
169 chip 1.11 Uint32 tempchar; 170 Uint16 numberOfBytes = 0; 171 const Uint16* oldsrc = src; 172 tempchar = *src++; 173 if (tempchar >= FIRST_HIGH_SURROGATE 174 && tempchar <= LAST_HIGH_SURROGATE) 175 { 176 if (src < srcEnd) 177 { 178 Uint32 tempchar2 = *src; 179 if (tempchar2 >= FIRST_LOW_SURROGATE && 180 tempchar2 <= LAST_LOW_SURROGATE) 181 { 182 tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift) 183 + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase; 184 ++src; 185 } 186 } 187 else 188 { 189 --src; 190 chip 1.11 returnCode = -1; 191 break; 192 } 193 } 194 if (tempchar < (Uint32)0x80) 195 { 196 numberOfBytes = 1; 197 } 198 else if (tempchar < (Uint32)0x800) 199 { 200 numberOfBytes = 2; 201 } 202 else if (tempchar < (Uint32)0x10000) 203 { 204 numberOfBytes = 3; 205 } 206 else if (tempchar < (Uint32)0x200000) 207 { 208 numberOfBytes = 4; 209 } 210 else 211 chip 1.11 { 212 numberOfBytes = 2; 213 tempchar = REPLACEMENT_CHARACTER; 214 } 215 216 tgt += numberOfBytes; 217 if (tgt > tgtEnd) 218 { 219 src = oldsrc; 220 tgt -= numberOfBytes; 221 returnCode = -1; 222 break; 223 } 224 225 switch (numberOfBytes) 226 { 227 case 4: 228 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF); 229 tempchar >>= 6; 230 case 3: 231 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF); 232 chip 1.11 tempchar >>= 6; 233 case 2: 234 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF); 235 tempchar >>= 6; 236 case 1: 237 *--tgt = (Uint8)(tempchar | firstByteMark[numberOfBytes]); 238 } 239 tgt += numberOfBytes; | ||
240 david 1.1 } 241 *srcHead = src; 242 *tgtHead = tgt; 243 return returnCode; 244 } 245 246 int UTF8toUTF16 (const Uint8** srcHead, | ||
247 chip 1.11 const Uint8* srcEnd, 248 Uint16** tgtHead, 249 Uint16* tgtEnd) | ||
250 david 1.1 { 251 int returnCode = 0; 252 const Uint8* src = *srcHead; 253 Uint16* tgt = *tgtHead; 254 while (src < srcEnd) 255 { | ||
256 chip 1.11 Uint32 tempchar = 0; 257 Uint16 moreBytes = trailingBytesForUTF8[*src]; 258 if (src + moreBytes >= srcEnd) 259 { 260 returnCode = -1; 261 break; 262 } 263 switch (moreBytes) 264 { 265 case 3: 266 tempchar += *src++; 267 tempchar <<= 6; 268 case 2: 269 tempchar += *src++; 270 tempchar <<= 6; 271 case 1: 272 tempchar += *src++; 273 tempchar <<= 6; 274 case 0: 275 tempchar += *src++; 276 } 277 chip 1.11 tempchar -= offsetsFromUTF8[moreBytes]; 278 279 if (tgt >= tgtEnd) 280 { 281 src -= (moreBytes+1); 282 returnCode = -1; break; 283 } 284 if (tempchar <= MAX_BYTE) 285 { 286 if ((tempchar >= FIRST_HIGH_SURROGATE && 287 tempchar <= LAST_LOW_SURROGATE) || 288 ((tempchar & 0xFFFE) == 0xFFFE)) 289 { 290 *tgt++ = REPLACEMENT_CHARACTER; 291 } 292 else 293 { 294 *tgt++ = (Uint16)tempchar; 295 } 296 } 297 else if (tempchar > MAX_UTF16) 298 chip 1.11 { 299 *tgt++ = REPLACEMENT_CHARACTER; 300 } 301 else 302 { 303 if (tgt + 1 >= tgtEnd) 304 { 305 src -= (moreBytes+1); 306 returnCode = -1; 307 break; 308 } 309 tempchar -= halfBase; 310 *tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE); 311 *tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE); 312 } | ||
313 david 1.1 } 314 *srcHead = src; 315 *tgtHead = tgt; 316 return returnCode; 317 } | ||
318 david 1.5 | ||
319 mike 1.17 Boolean isUTF8Aux(const char *legal) | ||
320 david 1.5 { 321 char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1; 322 323 // Validate that the string is long enough to hold all the expected bytes. 324 // Note that if legal[0] == 0, numBytes will be 1. 325 for (char i=1; i<numBytes; i++) 326 { 327 if (legal[i] == 0) 328 { 329 return false; 330 } 331 } 332 | ||
333 kumpf 1.21 return isValid_U8((const Uint8 *)legal, numBytes); | ||
334 david 1.5 } | ||
335 chuck 1.6 | ||
336 chuck 1.9 Boolean isUTF8Str(const char *legal) 337 { | ||
338 chip 1.11 /*char tmp[] = {0xCE,0x99,0xCE,0xBF,0xCF,0x8D,0xCE,0xBD,0xCE, | ||
339 chuck 1.9 0xB9,0xCE,0xBA,0xCE,0xBF,0xCE,0xBD,0xCF,0x84, 340 0x00};*/ | ||
341 chip 1.11 // char tmp_[] = "class"; 342 // char * tmp = legal; | ||
343 david.dillard 1.13 size_t count = 0; 344 const size_t size = strlen(legal); | ||
345 chip 1.11 // printf("size = %d\n",size); | ||
346 kumpf 1.21 while (count<size) | ||
347 david.dillard 1.13 { | ||
348 chip 1.11 // printf("count = %d\n",count); | ||
349 kumpf 1.21 if (isUTF8(&legal[count]) == true) 350 { | ||
351 david.dillard 1.13 UTF8_NEXT(legal,count); | ||
352 kumpf 1.21 } 353 else 354 { | ||
355 chip 1.11 // printf("bad string\n"); 356 return false; 357 } | ||
358 david.dillard 1.13 } | ||
359 chip 1.11 // printf("good string\n"); 360 return true; | ||
361 chuck 1.9 /* | ||
362 chip 1.11 printf("legal = %s\n\n", legal); 363 Uint32 count = 0; 364 Uint32 trailingBytes = 0; | ||
365 kumpf 1.21 Uint32 size = strlen(legal); | ||
366 chip 1.11 printf("size of legal is %d\n",size); | ||
367 kumpf 1.21 while (count<size-1) 368 { 369 printf("count = %d\n", count); 370 if (isUTF8((char*)&legal[count]) == true) | ||
371 chuck 1.9 { | ||
372 kumpf 1.21 UTF8_NEXT(legal,trailingBytes); | ||
373 chip 1.11 count += trailingBytes; | ||
374 kumpf 1.21 } 375 else 376 { | ||
377 chip 1.11 printf("CommonUTF8:: returning false; position[%d]",count); | ||
378 kumpf 1.21 return false; | ||
379 chip 1.11 } | ||
380 kumpf 1.21 } 381 printf("CommonUTF8:: returning false; position[%d]",count); | ||
382 chip 1.11 return true;*/ | ||
383 chuck 1.9 } | ||
384 chuck 1.6 385 String escapeStringEncoder(const String& Str) 386 { 387 String escapeStr; 388 Uint16 escChar; 389 char hexencoding[6]; | ||
390 chip 1.11 | ||
391 kumpf 1.21 for (Uint32 i = 0; i < Str.size(); ++i) | ||
392 chuck 1.6 { | ||
393 kumpf 1.21 escChar = Str[i]; 394 if (escChar <= 0x7F) | ||
395 chuck 1.6 { | ||
396 kumpf 1.21 escapeStr.append(escChar); | ||
397 chuck 1.6 } | ||
398 kumpf 1.21 else 399 { 400 memset(hexencoding,0x00,sizeof(hexencoding)); | ||
401 chuck 1.6 sprintf(hexencoding, "%%%03X%X", escChar/16, escChar%16); 402 escapeStr.append(hexencoding); | ||
403 kumpf 1.21 } | ||
404 chip 1.11 } | ||
405 kumpf 1.21 return escapeStr; | ||
406 chuck 1.6 } 407 408 String escapeStringDecoder(const String& Str) 409 { 410 Uint32 i; 411 | ||
412 chip 1.11 Array<Uint16> utf16Chars; | ||
413 chuck 1.6 414 for (i=0; i< Str.size(); ++i) 415 { 416 if (Str[i] == '%') 417 { 418 Uint8 digit1 = _hexCharToNumeric((Str[++i])); 419 Uint8 digit2 = _hexCharToNumeric((Str[++i])); 420 Uint8 digit3 = _hexCharToNumeric((Str[++i])); 421 Uint8 digit4 = _hexCharToNumeric((Str[++i])); 422 | ||
423 chip 1.11 Uint16 decodedChar = (digit1<<12) + (digit2<<8) + | ||
424 chuck 1.6 (digit3<< 4) + (digit4); 425 | ||
426 chip 1.11 utf16Chars.append(decodedChar); | ||
427 chuck 1.6 } 428 else 429 { | ||
430 chip 1.11 utf16Chars.append((Uint16)Str[i]); | ||
431 chuck 1.6 } 432 } 433 434 // If there was a string to decode... 435 if (Str.size() > 0) 436 { 437 utf16Chars.append('\0'); 438 return String((Char16 *)utf16Chars.getData()); 439 } 440 else 441 { 442 return String(); 443 } 444 } 445 | ||
446 yi.zhou 1.16 #ifdef PEGASUS_HAS_ICU 447 448 Boolean InitializeICU::_initAttempted = false; 449 Boolean InitializeICU::_initSuccessful = false; 450 Mutex InitializeICU::_initMutex; 451 452 Boolean InitializeICU::initICUSuccessful() 453 { 454 if (!_initAttempted) 455 { | ||
456 kumpf 1.20 { | ||
457 yi.zhou 1.16 AutoMutex lock(_initMutex); 458 | ||
459 kumpf 1.20 if (!_initAttempted) 460 { | ||
461 yi.zhou 1.16 UErrorCode _status = U_ZERO_ERROR; 462 | ||
463 kumpf 1.20 // Initialize ICU | ||
464 yi.zhou 1.16 u_init(&_status); 465 466 if (U_FAILURE(_status)) 467 { 468 _initSuccessful = false; | ||
469 kumpf 1.20 Logger::put( 470 Logger::STANDARD_LOG , System::CIMSERVER, 471 Logger::WARNING, 472 "ICU initialization failed with error: $0.", 473 _status); | ||
474 yi.zhou 1.16 } 475 else 476 { 477 _initSuccessful = true; 478 } 479 _initAttempted = true; | ||
480 kumpf 1.20 } 481 } | ||
482 yi.zhou 1.16 } 483 484 return _initSuccessful; 485 } 486 487 #endif 488 | ||
489 david 1.1 PEGASUS_NAMESPACE_END |
No CVS admin address has been configured |
Powered by ViewCVS 0.9.2 |