![]() ![]() |
![]() |
1 martin 1.23 //%LICENSE//////////////////////////////////////////////////////////////// | ||
2 martin 1.24 // | ||
3 martin 1.23 // Licensed to The Open Group (TOG) under one or more contributor license 4 // agreements. Refer to the OpenPegasusNOTICE.txt file distributed with 5 // this work for additional information regarding copyright ownership. 6 // Each contributor licenses this file to you under the OpenPegasus Open 7 // Source License; you may not use this file except in compliance with the 8 // License. | ||
9 martin 1.24 // | ||
10 martin 1.23 // Permission is hereby granted, free of charge, to any person obtaining a 11 // copy of this software and associated documentation files (the "Software"), 12 // to deal in the Software without restriction, including without limitation 13 // the rights to use, copy, modify, merge, publish, distribute, sublicense, 14 // and/or sell copies of the Software, and to permit persons to whom the 15 // Software is furnished to do so, subject to the following conditions: | ||
16 martin 1.24 // | ||
17 martin 1.23 // The above copyright notice and this permission notice shall be included 18 // in all copies or substantial portions of the Software. | ||
19 martin 1.24 // | ||
20 martin 1.23 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | ||
21 martin 1.24 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
22 martin 1.23 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 23 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 24 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 25 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 26 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | ||
27 martin 1.24 // | ||
28 martin 1.23 ////////////////////////////////////////////////////////////////////////// | ||
29 david 1.1 // | ||
30 kamal.locahana 1.22 //%//////////////////////////////////////////////////////////////////////////// | ||
31 david 1.1 | ||
32 chuck 1.7 #include <Pegasus/Common/Config.h> 33 #include <Pegasus/Common/Array.h> | ||
34 yi.zhou 1.16 #include <Pegasus/Common/Logger.h> | ||
35 david 1.1 #include "CommonUTF.h" | ||
36 thilo.boehm 1.25 #include <Pegasus/Common/String.h> | ||
37 chuck 1.7 #include <cstdio> | ||
38 david 1.2 #include <cstring> | ||
39 david.dillard 1.15 #include <cctype> | ||
40 kumpf 1.3 | ||
41 yi.zhou 1.16 #ifdef PEGASUS_HAS_ICU 42 #include <unicode/uclean.h> 43 #endif 44 | ||
45 david 1.1 PEGASUS_NAMESPACE_BEGIN | ||
46 kumpf 1.3 | ||
47 karl 1.19 const Uint32 halfBase = 0x0010000UL; 48 const Uint32 halfMask = 0x3FFUL; 49 const int halfShift = 10; 50 const Uint8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; 51 52 const Uint32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 53 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; 54 55 const char trailingBytesForUTF8[256] = { 56 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 57 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 58 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 59 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 60 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 61 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 62 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 63 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 64 }; | ||
65 david.dillard 1.12 inline Uint8 _hexCharToNumeric(Char16 c) | ||
66 chuck 1.6 { 67 Uint8 n; 68 | ||
69 david.dillard 1.14 if (isdigit(c)) | ||
70 chuck 1.6 n = (c - '0'); | ||
71 david.dillard 1.14 else if (isupper(c)) | ||
72 chuck 1.6 n = (c - 'A' + 10); 73 else // if (islower(c)) 74 n = (c - 'a' + 10); 75 76 return n; 77 } 78 | ||
79 kumpf 1.3 // Note: Caller must ensure that "src" contains "size" bytes. | ||
80 chip 1.11 Boolean isValid_U8(const Uint8 *src, int size) | ||
81 david 1.1 { 82 Uint8 U8_char; 83 const Uint8 *srcptr = src+size; 84 switch (size) 85 { | ||
86 chip 1.11 case 4: 87 if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF) 88 { 89 return false; 90 } 91 case 3: 92 if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF) 93 { 94 return false; 95 } 96 case 2: 97 if ((U8_char = (*--srcptr)) > 0xBF) 98 { 99 return false; 100 } 101 switch (*src) 102 { 103 case 0xE0: 104 if (U8_char < 0xA0) 105 { 106 return false; 107 chip 1.11 } 108 break; 109 case 0xF0: 110 if (U8_char < 0x90) 111 { 112 return false; 113 } 114 break; 115 case 0xF4: 116 if (U8_char > 0x8F) 117 { 118 return false; 119 } 120 break; 121 default: 122 if (U8_char < 0x80) 123 { 124 return false; 125 } 126 } 127 case 1: 128 chip 1.11 if (*src >= 0x80 && *src < 0xC2) 129 { 130 return false; 131 } 132 if (*src > 0xF4) 133 { 134 return false; 135 } 136 break; | ||
137 david 1.2 default: | ||
138 chip 1.11 { 139 return false; | ||
140 david 1.2 } | ||
141 david 1.1 142 } 143 return true; | ||
144 chip 1.11 } | ||
145 david 1.1 146 int UTF16toUTF8(const Uint16** srcHead, | ||
147 chip 1.11 const Uint16* srcEnd, 148 Uint8** tgtHead, 149 Uint8* tgtEnd) | ||
150 david 1.1 { 151 int returnCode = 0; 152 const Uint16* src = *srcHead; 153 Uint8* tgt = *tgtHead; 154 while (src < srcEnd) 155 { | ||
156 kumpf 1.20 if (*src < 128) 157 { 158 if (tgt == tgtEnd) 159 { 160 returnCode = -1; 161 break; 162 } 163 | ||
164 kamal.locahana 1.22 *tgt++ = (Uint8)*src++; | ||
165 kumpf 1.20 continue; 166 } | ||
167 mike 1.17 | ||
168 chip 1.11 Uint32 tempchar; 169 Uint16 numberOfBytes = 0; 170 const Uint16* oldsrc = src; 171 tempchar = *src++; 172 if (tempchar >= FIRST_HIGH_SURROGATE 173 && tempchar <= LAST_HIGH_SURROGATE) 174 { 175 if (src < srcEnd) 176 { 177 Uint32 tempchar2 = *src; 178 if (tempchar2 >= FIRST_LOW_SURROGATE && 179 tempchar2 <= LAST_LOW_SURROGATE) 180 { 181 tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift) 182 + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase; 183 ++src; 184 } 185 } 186 else 187 { 188 --src; 189 chip 1.11 returnCode = -1; 190 break; 191 } 192 } 193 if (tempchar < (Uint32)0x80) 194 { 195 numberOfBytes = 1; 196 } 197 else if (tempchar < (Uint32)0x800) 198 { 199 numberOfBytes = 2; 200 } 201 else if (tempchar < (Uint32)0x10000) 202 { 203 numberOfBytes = 3; 204 } 205 else if (tempchar < (Uint32)0x200000) 206 { 207 numberOfBytes = 4; 208 } 209 else 210 chip 1.11 { 211 numberOfBytes = 2; 212 tempchar = REPLACEMENT_CHARACTER; 213 } 214 215 tgt += numberOfBytes; 216 if (tgt > tgtEnd) 217 { 218 src = oldsrc; 219 tgt -= numberOfBytes; 220 returnCode = -1; 221 break; 222 } 223 224 switch (numberOfBytes) 225 { 226 case 4: 227 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF); 228 tempchar >>= 6; 229 case 3: 230 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF); 231 chip 1.11 tempchar >>= 6; 232 case 2: 233 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF); 234 tempchar >>= 6; 235 case 1: 236 *--tgt = (Uint8)(tempchar | firstByteMark[numberOfBytes]); 237 } 238 tgt += numberOfBytes; | ||
239 david 1.1 } 240 *srcHead = src; 241 *tgtHead = tgt; 242 return returnCode; 243 } 244 245 int UTF8toUTF16 (const Uint8** srcHead, | ||
246 chip 1.11 const Uint8* srcEnd, 247 Uint16** tgtHead, 248 Uint16* tgtEnd) | ||
249 david 1.1 { 250 int returnCode = 0; 251 const Uint8* src = *srcHead; 252 Uint16* tgt = *tgtHead; 253 while (src < srcEnd) 254 { | ||
255 chip 1.11 Uint32 tempchar = 0; 256 Uint16 moreBytes = trailingBytesForUTF8[*src]; 257 if (src + moreBytes >= srcEnd) 258 { 259 returnCode = -1; 260 break; 261 } 262 switch (moreBytes) 263 { 264 case 3: 265 tempchar += *src++; 266 tempchar <<= 6; 267 case 2: 268 tempchar += *src++; 269 tempchar <<= 6; 270 case 1: 271 tempchar += *src++; 272 tempchar <<= 6; 273 case 0: 274 tempchar += *src++; 275 } 276 chip 1.11 tempchar -= offsetsFromUTF8[moreBytes]; 277 278 if (tgt >= tgtEnd) 279 { 280 src -= (moreBytes+1); 281 returnCode = -1; break; 282 } 283 if (tempchar <= MAX_BYTE) 284 { 285 if ((tempchar >= FIRST_HIGH_SURROGATE && 286 tempchar <= LAST_LOW_SURROGATE) || 287 ((tempchar & 0xFFFE) == 0xFFFE)) 288 { 289 *tgt++ = REPLACEMENT_CHARACTER; 290 } 291 else 292 { 293 *tgt++ = (Uint16)tempchar; 294 } 295 } 296 else if (tempchar > MAX_UTF16) 297 chip 1.11 { 298 *tgt++ = REPLACEMENT_CHARACTER; 299 } 300 else 301 { 302 if (tgt + 1 >= tgtEnd) 303 { 304 src -= (moreBytes+1); 305 returnCode = -1; 306 break; 307 } 308 tempchar -= halfBase; 309 *tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE); 310 *tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE); 311 } | ||
312 david 1.1 } 313 *srcHead = src; 314 *tgtHead = tgt; 315 return returnCode; 316 } | ||
317 david 1.5 | ||
318 mike 1.17 Boolean isUTF8Aux(const char *legal) | ||
319 david 1.5 { 320 char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1; 321 322 // Validate that the string is long enough to hold all the expected bytes. 323 // Note that if legal[0] == 0, numBytes will be 1. 324 for (char i=1; i<numBytes; i++) 325 { 326 if (legal[i] == 0) 327 { 328 return false; 329 } 330 } 331 | ||
332 kumpf 1.21 return isValid_U8((const Uint8 *)legal, numBytes); | ||
333 david 1.5 } | ||
334 chuck 1.6 | ||
335 chuck 1.9 Boolean isUTF8Str(const char *legal) 336 { | ||
337 chip 1.11 /*char tmp[] = {0xCE,0x99,0xCE,0xBF,0xCF,0x8D,0xCE,0xBD,0xCE, | ||
338 chuck 1.9 0xB9,0xCE,0xBA,0xCE,0xBF,0xCE,0xBD,0xCF,0x84, 339 0x00};*/ | ||
340 chip 1.11 // char tmp_[] = "class"; 341 // char * tmp = legal; | ||
342 david.dillard 1.13 size_t count = 0; 343 const size_t size = strlen(legal); | ||
344 chip 1.11 // printf("size = %d\n",size); | ||
345 kumpf 1.21 while (count<size) | ||
346 david.dillard 1.13 { | ||
347 chip 1.11 // printf("count = %d\n",count); | ||
348 kumpf 1.21 if (isUTF8(&legal[count]) == true) 349 { | ||
350 david.dillard 1.13 UTF8_NEXT(legal,count); | ||
351 kumpf 1.21 } 352 else 353 { | ||
354 chip 1.11 // printf("bad string\n"); 355 return false; 356 } | ||
357 david.dillard 1.13 } | ||
358 chip 1.11 // printf("good string\n"); 359 return true; | ||
360 chuck 1.9 /* | ||
361 chip 1.11 printf("legal = %s\n\n", legal); 362 Uint32 count = 0; 363 Uint32 trailingBytes = 0; | ||
364 kumpf 1.21 Uint32 size = strlen(legal); | ||
365 chip 1.11 printf("size of legal is %d\n",size); | ||
366 kumpf 1.21 while (count<size-1) 367 { 368 printf("count = %d\n", count); 369 if (isUTF8((char*)&legal[count]) == true) | ||
370 chuck 1.9 { | ||
371 kumpf 1.21 UTF8_NEXT(legal,trailingBytes); | ||
372 chip 1.11 count += trailingBytes; | ||
373 kumpf 1.21 } 374 else 375 { | ||
376 chip 1.11 printf("CommonUTF8:: returning false; position[%d]",count); | ||
377 kumpf 1.21 return false; | ||
378 chip 1.11 } | ||
379 kumpf 1.21 } 380 printf("CommonUTF8:: returning false; position[%d]",count); | ||
381 chip 1.11 return true;*/ | ||
382 chuck 1.9 } | ||
383 chuck 1.6 384 String escapeStringEncoder(const String& Str) 385 { 386 String escapeStr; 387 Uint16 escChar; 388 char hexencoding[6]; | ||
389 chip 1.11 | ||
390 kumpf 1.21 for (Uint32 i = 0; i < Str.size(); ++i) | ||
391 chuck 1.6 { | ||
392 kumpf 1.21 escChar = Str[i]; 393 if (escChar <= 0x7F) | ||
394 chuck 1.6 { | ||
395 kumpf 1.21 escapeStr.append(escChar); | ||
396 chuck 1.6 } | ||
397 kumpf 1.21 else 398 { 399 memset(hexencoding,0x00,sizeof(hexencoding)); | ||
400 chuck 1.6 sprintf(hexencoding, "%%%03X%X", escChar/16, escChar%16); 401 escapeStr.append(hexencoding); | ||
402 kumpf 1.21 } | ||
403 chip 1.11 } | ||
404 kumpf 1.21 return escapeStr; | ||
405 chuck 1.6 } 406 407 String escapeStringDecoder(const String& Str) 408 { 409 Uint32 i; 410 | ||
411 chip 1.11 Array<Uint16> utf16Chars; | ||
412 chuck 1.6 413 for (i=0; i< Str.size(); ++i) 414 { 415 if (Str[i] == '%') 416 { 417 Uint8 digit1 = _hexCharToNumeric((Str[++i])); 418 Uint8 digit2 = _hexCharToNumeric((Str[++i])); 419 Uint8 digit3 = _hexCharToNumeric((Str[++i])); 420 Uint8 digit4 = _hexCharToNumeric((Str[++i])); 421 | ||
422 chip 1.11 Uint16 decodedChar = (digit1<<12) + (digit2<<8) + | ||
423 chuck 1.6 (digit3<< 4) + (digit4); 424 | ||
425 chip 1.11 utf16Chars.append(decodedChar); | ||
426 chuck 1.6 } 427 else 428 { | ||
429 chip 1.11 utf16Chars.append((Uint16)Str[i]); | ||
430 chuck 1.6 } 431 } 432 433 // If there was a string to decode... 434 if (Str.size() > 0) 435 { 436 utf16Chars.append('\0'); 437 return String((Char16 *)utf16Chars.getData()); 438 } 439 else 440 { 441 return String(); 442 } 443 } 444 | ||
445 yi.zhou 1.16 #ifdef PEGASUS_HAS_ICU 446 447 Boolean InitializeICU::_initAttempted = false; 448 Boolean InitializeICU::_initSuccessful = false; 449 Mutex InitializeICU::_initMutex; 450 451 Boolean InitializeICU::initICUSuccessful() 452 { 453 if (!_initAttempted) 454 { | ||
455 kumpf 1.20 { | ||
456 yi.zhou 1.16 AutoMutex lock(_initMutex); 457 | ||
458 kumpf 1.20 if (!_initAttempted) 459 { | ||
460 yi.zhou 1.16 UErrorCode _status = U_ZERO_ERROR; 461 | ||
462 kumpf 1.20 // Initialize ICU | ||
463 yi.zhou 1.16 u_init(&_status); 464 465 if (U_FAILURE(_status)) 466 { 467 _initSuccessful = false; | ||
468 kumpf 1.20 Logger::put( 469 Logger::STANDARD_LOG , System::CIMSERVER, 470 Logger::WARNING, 471 "ICU initialization failed with error: $0.", 472 _status); | ||
473 yi.zhou 1.16 } 474 else 475 { 476 _initSuccessful = true; 477 } 478 _initAttempted = true; | ||
479 kumpf 1.20 } 480 } | ||
481 yi.zhou 1.16 } 482 483 return _initSuccessful; 484 } 485 486 #endif 487 | ||
488 david 1.1 PEGASUS_NAMESPACE_END |
No CVS admin address has been configured |
Powered by ViewCVS 0.9.2 |