pegasus/src/Pegasus/Common/CommonUTF.cpp - annotate

Return to CommonUTF.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 karl 1.18 //%2006////////////////////////////////////////////////////////////////////////
2 david 1.1 //
3 karl 1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development 4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. 5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
6 karl 1.4 // IBM Corp.; EMC Corporation, The Open Group.
7 karl 1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; 8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group. 9 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; 10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
11 karl 1.18 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.; 12 // EMC Corporation; Symantec Corporation; The Open Group.
13 david 1.1 // 14 // Permission is hereby granted, free of charge, to any person obtaining a copy 15 // of this software and associated documentation files (the "Software"), to 16 // deal in the Software without restriction, including without limitation the 17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 18 // sell copies of the Software, and to permit persons to whom the Software is 19 // furnished to do so, subject to the following conditions:
20 karl 1.18 //
21 david 1.1 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN 22 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT 24 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 29 // 30 //============================================================================== 31 // 32 // Author: Dave Rosckes (rosckes@us.ibm.com) 33 //
34 david.dillard 1.12 // Modified By: David Dillard, VERITAS Software Corp. 35 // (david.dillard@veritas.com)
36 yi.zhou 1.16 // Yi Zhou, Hewlett-Packard Company (yi.zhou@hp.com)
37 david 1.1 // 38 //%///////////////////////////////////////////////////////////////////////////// 39
40 chuck 1.7 #include <Pegasus/Common/Config.h> 41 #include <Pegasus/Common/Array.h>
42 yi.zhou 1.16 #include <Pegasus/Common/Logger.h>
43 david 1.1 #include "CommonUTF.h"
44 chuck 1.7 #include <cstdio>
45 david 1.2 #include <cstring>
46 david.dillard 1.15 #include <cctype>
47 kumpf 1.3
48 yi.zhou 1.16 #ifdef PEGASUS_HAS_ICU 49 #include <unicode/uclean.h> 50 #endif 51
52 david 1.1 PEGASUS_NAMESPACE_BEGIN
53 kumpf 1.3
54 karl 1.19 const Uint32 halfBase = 0x0010000UL; 55 const Uint32 halfMask = 0x3FFUL; 56 const int halfShift = 10; 57 const Uint8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; 58 59 const Uint32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 60 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; 61 62 const char trailingBytesForUTF8[256] = { 63 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 64 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 65 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 66 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 67 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 68 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 69 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 70 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 71 };
72 david.dillard 1.12 inline Uint8 _hexCharToNumeric(Char16 c)
73 chuck 1.6 { 74 Uint8 n; 75
76 david.dillard 1.14 if (isdigit(c))
77 chuck 1.6 n = (c - '0');
78 david.dillard 1.14 else if (isupper(c))
79 chuck 1.6 n = (c - 'A' + 10); 80 else // if (islower(c)) 81 n = (c - 'a' + 10); 82 83 return n; 84 } 85
86 kumpf 1.3 // Note: Caller must ensure that "src" contains "size" bytes.
87 chip 1.11 Boolean isValid_U8(const Uint8 *src, int size)
88 david 1.1 { 89 Uint8 U8_char; 90 const Uint8 *srcptr = src+size; 91 switch (size) 92 {
93 chip 1.11 case 4: 94 if ((U8_char = (--srcptr)) < 0x80 \|\| U8_char > 0xBF) 95 { 96 return false; 97 } 98 case 3: 99 if ((U8_char = (--srcptr)) < 0x80 \|\| U8_char > 0xBF) 100 { 101 return false; 102 } 103 case 2: 104 if ((U8_char = (--srcptr)) > 0xBF) 105 { 106 return false; 107 } 108 switch (src) 109 { 110 case 0xE0: 111 if (U8_char < 0xA0) 112 { 113 return false; 114 chip 1.11 } 115 break; 116 case 0xF0: 117 if (U8_char < 0x90) 118 { 119 return false; 120 } 121 break; 122 case 0xF4: 123 if (U8_char > 0x8F) 124 { 125 return false; 126 } 127 break; 128 default: 129 if (U8_char < 0x80) 130 { 131 return false; 132 } 133 } 134 case 1: 135 chip 1.11 if (src >= 0x80 && src < 0xC2) 136 { 137 return false; 138 } 139 if (*src > 0xF4) 140 { 141 return false; 142 } 143 break;
144 david 1.2 default:
145 chip 1.11 { 146 return false;
147 david 1.2 }
148 david 1.1 149 } 150 return true;
151 chip 1.11 }
152 david 1.1 153 int UTF16toUTF8(const Uint16** srcHead,
154 chip 1.11 const Uint16* srcEnd, 155 Uint8** tgtHead, 156 Uint8* tgtEnd)
157 david 1.1 { 158 int returnCode = 0; 159 const Uint16* src = srcHead; 160 Uint8 tgt = *tgtHead; 161 while (src < srcEnd) 162 {
163 mike 1.17 if (src < 128) 164 { 165 if (tgt == tgtEnd) 166 { 167 returnCode = -1; 168 break; 169 } 170 171 tgt++ = *src++; 172 continue; 173 } 174
175 chip 1.11 Uint32 tempchar; 176 Uint16 numberOfBytes = 0; 177 const Uint16* oldsrc = src; 178 tempchar = src++; 179 if (tempchar >= FIRST_HIGH_SURROGATE 180 && tempchar <= LAST_HIGH_SURROGATE) 181 { 182 if (src < srcEnd) 183 { 184 Uint32 tempchar2 = src; 185 if (tempchar2 >= FIRST_LOW_SURROGATE && 186 tempchar2 <= LAST_LOW_SURROGATE) 187 { 188 tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift) 189 + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase; 190 ++src; 191 } 192 } 193 else 194 { 195 --src; 196 chip 1.11 returnCode = -1; 197 break; 198 } 199 } 200 if (tempchar < (Uint32)0x80) 201 { 202 numberOfBytes = 1; 203 } 204 else if (tempchar < (Uint32)0x800) 205 { 206 numberOfBytes = 2; 207 } 208 else if (tempchar < (Uint32)0x10000) 209 { 210 numberOfBytes = 3; 211 } 212 else if (tempchar < (Uint32)0x200000) 213 { 214 numberOfBytes = 4; 215 } 216 else 217 chip 1.11 { 218 numberOfBytes = 2; 219 tempchar = REPLACEMENT_CHARACTER; 220 } 221 222 tgt += numberOfBytes; 223 if (tgt > tgtEnd) 224 { 225 src = oldsrc; 226 tgt -= numberOfBytes; 227 returnCode = -1; 228 break; 229 } 230 231 switch (numberOfBytes) 232 { 233 case 4: 234 --tgt = (Uint8)((tempchar \| 0x80) & 0xBF); 235 tempchar >>= 6; 236 case 3: 237 --tgt = (Uint8)((tempchar \| 0x80) & 0xBF); 238 chip 1.11 tempchar >>= 6; 239 case 2: 240 --tgt = (Uint8)((tempchar \| 0x80) & 0xBF); 241 tempchar >>= 6; 242 case 1: 243 --tgt = (Uint8)(tempchar \| firstByteMark[numberOfBytes]); 244 } 245 tgt += numberOfBytes;
246 david 1.1 } 247 srcHead = src; 248 tgtHead = tgt; 249 return returnCode; 250 } 251 252 int UTF8toUTF16 (const Uint8** srcHead,
253 chip 1.11 const Uint8* srcEnd, 254 Uint16** tgtHead, 255 Uint16* tgtEnd)
256 david 1.1 { 257 int returnCode = 0; 258 const Uint8* src = srcHead; 259 Uint16 tgt = *tgtHead; 260 while (src < srcEnd) 261 {
262 chip 1.11 Uint32 tempchar = 0; 263 Uint16 moreBytes = trailingBytesForUTF8[src]; 264 if (src + moreBytes >= srcEnd) 265 { 266 returnCode = -1; 267 break; 268 } 269 switch (moreBytes) 270 { 271 case 3: 272 tempchar += src++; 273 tempchar <<= 6; 274 case 2: 275 tempchar += src++; 276 tempchar <<= 6; 277 case 1: 278 tempchar += src++; 279 tempchar <<= 6; 280 case 0: 281 tempchar += src++; 282 } 283 chip 1.11 tempchar -= offsetsFromUTF8[moreBytes]; 284 285 if (tgt >= tgtEnd) 286 { 287 src -= (moreBytes+1); 288 returnCode = -1; break; 289 } 290 if (tempchar <= MAX_BYTE) 291 { 292 if ((tempchar >= FIRST_HIGH_SURROGATE && 293 tempchar <= LAST_LOW_SURROGATE) \|\| 294 ((tempchar & 0xFFFE) == 0xFFFE)) 295 { 296 tgt++ = REPLACEMENT_CHARACTER; 297 } 298 else 299 { 300 tgt++ = (Uint16)tempchar; 301 } 302 } 303 else if (tempchar > MAX_UTF16) 304 chip 1.11 { 305 tgt++ = REPLACEMENT_CHARACTER; 306 } 307 else 308 { 309 if (tgt + 1 >= tgtEnd) 310 { 311 src -= (moreBytes+1); 312 returnCode = -1; 313 break; 314 } 315 tempchar -= halfBase; 316 tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE); 317 tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE); 318 }
319 david 1.1 } 320 srcHead = src; 321 tgtHead = tgt; 322 return returnCode; 323 }
324 david 1.5
325 mike 1.17 Boolean isUTF8Aux(const char *legal)
326 david 1.5 { 327 char numBytes = UTF_8_COUNT_TRAIL_BYTES(legal)+1; 328 329 // Validate that the string is long enough to hold all the expected bytes. 330 // Note that if legal[0] == 0, numBytes will be 1. 331 for (char i=1; i<numBytes; i++) 332 { 333 if (legal[i] == 0) 334 { 335 return false; 336 } 337 } 338 339 return (isValid_U8((const Uint8 )legal, numBytes)); 340 }
341 chuck 1.6
342 chuck 1.9 Boolean isUTF8Str(const char *legal) 343 {
344 chip 1.11 /*char tmp[] = {0xCE,0x99,0xCE,0xBF,0xCF,0x8D,0xCE,0xBD,0xCE,
345 chuck 1.9 0xB9,0xCE,0xBA,0xCE,0xBF,0xCE,0xBD,0xCF,0x84, 346 0x00};*/
347 chip 1.11 // char tmp_[] = "class"; 348 // char * tmp = legal;
349 david.dillard 1.13 size_t count = 0; 350 const size_t size = strlen(legal);
351 chip 1.11 // printf("size = %d\n",size);
352 david.dillard 1.13 while(count<size) 353 {
354 chip 1.11 // printf("count = %d\n",count);
355 david.dillard 1.13 if(isUTF8(&legal[count]) == true){ 356 UTF8_NEXT(legal,count);
357 chip 1.11 }else{ 358 // printf("bad string\n"); 359 return false; 360 }
361 david.dillard 1.13 }
362 chip 1.11 // printf("good string\n"); 363 return true;
364 chuck 1.9 /*
365 chip 1.11 printf("legal = %s\n\n", legal); 366 Uint32 count = 0; 367 Uint32 trailingBytes = 0;
368 chuck 1.9 Uint32 size = strlen(legal);
369 chip 1.11 printf("size of legal is %d\n",size);
370 chuck 1.9 while(count<size-1) 371 {
372 chip 1.11 printf("count = %d\n", count);
373 chuck 1.9 if(isUTF8((char*)&legal[count]) == true){
374 chip 1.11 UTF8_NEXT(legal,trailingBytes); 375 count += trailingBytes; 376 } else{ 377 printf("CommonUTF8:: returning false; position[%d]",count); 378 return false; 379 }
380 chuck 1.9 }
381 chip 1.11 printf("CommonUTF8:: returning false; position[%d]",count); 382 return true;*/
383 chuck 1.9 }
384 chuck 1.6 385 String escapeStringEncoder(const String& Str) 386 { 387 String escapeStr; 388 Uint16 escChar; 389 char hexencoding[6];
390 chip 1.11
391 chuck 1.6 for(Uint32 i = 0; i < Str.size(); ++i) 392 {
393 chip 1.11 escChar = Str[i]; 394 if(escChar <= 0x7F)
395 chuck 1.6 {
396 chip 1.11 escapeStr.append(escChar);
397 chuck 1.6 }
398 chip 1.11 else 399 { 400 memset(hexencoding,0x00,sizeof(hexencoding));
401 chuck 1.6 sprintf(hexencoding, "%%%03X%X", escChar/16, escChar%16); 402 escapeStr.append(hexencoding);
403 chip 1.11 }
404 chuck 1.6 } 405 return(escapeStr); 406 } 407 408 String escapeStringDecoder(const String& Str) 409 { 410 Uint32 i; 411
412 chip 1.11 Array<Uint16> utf16Chars;
413 chuck 1.6 414 for (i=0; i< Str.size(); ++i) 415 { 416 if (Str[i] == '%') 417 { 418 Uint8 digit1 = _hexCharToNumeric((Str[++i])); 419 Uint8 digit2 = _hexCharToNumeric((Str[++i])); 420 Uint8 digit3 = _hexCharToNumeric((Str[++i])); 421 Uint8 digit4 = _hexCharToNumeric((Str[++i])); 422
423 chip 1.11 Uint16 decodedChar = (digit1<<12) + (digit2<<8) +
424 chuck 1.6 (digit3<< 4) + (digit4); 425
426 chip 1.11 utf16Chars.append(decodedChar);
427 chuck 1.6 } 428 else 429 {
430 chip 1.11 utf16Chars.append((Uint16)Str[i]);
431 chuck 1.6 } 432 } 433 434 // If there was a string to decode... 435 if (Str.size() > 0) 436 { 437 utf16Chars.append('\0'); 438 return String((Char16 *)utf16Chars.getData()); 439 } 440 else 441 { 442 return String(); 443 } 444 } 445
446 yi.zhou 1.16 #ifdef PEGASUS_HAS_ICU 447 448 Boolean InitializeICU::_initAttempted = false; 449 Boolean InitializeICU::_initSuccessful = false; 450 Mutex InitializeICU::_initMutex; 451 452 Boolean InitializeICU::initICUSuccessful() 453 { 454 if (!_initAttempted) 455 { 456 { 457 AutoMutex lock(_initMutex); 458 459 if (!_initAttempted) 460 { 461 UErrorCode _status = U_ZERO_ERROR; 462 463 // Initialize ICU 464 u_init(&_status); 465 466 if (U_FAILURE(_status)) 467 yi.zhou 1.16 { 468 _initSuccessful = false; 469 Logger::put (Logger::STANDARD_LOG , System::CIMSERVER, 470 Logger::WARNING, 471 "ICU initialization failed with error: $0.", 472 _status); 473 } 474 else 475 { 476 _initSuccessful = true; 477 } 478 _initAttempted = true; 479 } 480 } 481 } 482 483 return _initSuccessful; 484 } 485 486 #endif 487
488 david 1.1 PEGASUS_NAMESPACE_END

No CVS admin address has been configured