pegasus/src/Pegasus/Common/CommonUTF.cpp - annotate

Return to CommonUTF.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 karl 1.10 //%2005////////////////////////////////////////////////////////////////////////
2 david 1.1 //
3 karl 1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development 4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. 5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
6 karl 1.4 // IBM Corp.; EMC Corporation, The Open Group.
7 karl 1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; 8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group. 9 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; 10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
11 david 1.1 // 12 // Permission is hereby granted, free of charge, to any person obtaining a copy 13 // of this software and associated documentation files (the "Software"), to 14 // deal in the Software without restriction, including without limitation the 15 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 16 // sell copies of the Software, and to permit persons to whom the Software is 17 // furnished to do so, subject to the following conditions:
18 chip 1.11 //
19 david 1.1 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN 20 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 21 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT 22 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 23 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 24 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 25 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 27 // 28 //============================================================================== 29 // 30 // Author: Dave Rosckes (rosckes@us.ibm.com) 31 //
32 david.dillard 1.12 // Modified By: David Dillard, VERITAS Software Corp. 33 // (david.dillard@veritas.com)
34 yi.zhou 1.16 // Yi Zhou, Hewlett-Packard Company (yi.zhou@hp.com)
35 david 1.1 // 36 //%///////////////////////////////////////////////////////////////////////////// 37
38 chuck 1.7 #include <Pegasus/Common/Config.h> 39 #include <Pegasus/Common/Array.h>
40 yi.zhou 1.16 #include <Pegasus/Common/Logger.h>
41 david 1.1 #include "CommonUTF.h"
42 chuck 1.7 #include <cstdio>
43 david 1.2 #include <cstring>
44 david.dillard 1.15 #include <cctype>
45 kumpf 1.3
46 yi.zhou 1.16 #ifdef PEGASUS_HAS_ICU 47 #include <unicode/uclean.h> 48 #endif 49
50 david 1.1 PEGASUS_NAMESPACE_BEGIN
51 kumpf 1.3
52 chuck 1.6
53 david.dillard 1.12 inline Uint8 _hexCharToNumeric(Char16 c)
54 chuck 1.6 { 55 Uint8 n; 56
57 david.dillard 1.14 if (isdigit(c))
58 chuck 1.6 n = (c - '0');
59 david.dillard 1.14 else if (isupper(c))
60 chuck 1.6 n = (c - 'A' + 10); 61 else // if (islower(c)) 62 n = (c - 'a' + 10); 63 64 return n; 65 } 66
67 kumpf 1.3 // Note: Caller must ensure that "src" contains "size" bytes.
68 chip 1.11 Boolean isValid_U8(const Uint8 *src, int size)
69 david 1.1 { 70 Uint8 U8_char; 71 const Uint8 *srcptr = src+size; 72 switch (size) 73 {
74 chip 1.11 case 4: 75 if ((U8_char = (--srcptr)) < 0x80 \|\| U8_char > 0xBF) 76 { 77 return false; 78 } 79 case 3: 80 if ((U8_char = (--srcptr)) < 0x80 \|\| U8_char > 0xBF) 81 { 82 return false; 83 } 84 case 2: 85 if ((U8_char = (--srcptr)) > 0xBF) 86 { 87 return false; 88 } 89 switch (src) 90 { 91 case 0xE0: 92 if (U8_char < 0xA0) 93 { 94 return false; 95 chip 1.11 } 96 break; 97 case 0xF0: 98 if (U8_char < 0x90) 99 { 100 return false; 101 } 102 break; 103 case 0xF4: 104 if (U8_char > 0x8F) 105 { 106 return false; 107 } 108 break; 109 default: 110 if (U8_char < 0x80) 111 { 112 return false; 113 } 114 } 115 case 1: 116 chip 1.11 if (src >= 0x80 && src < 0xC2) 117 { 118 return false; 119 } 120 if (*src > 0xF4) 121 { 122 return false; 123 } 124 break;
125 david 1.2 default:
126 chip 1.11 { 127 return false;
128 david 1.2 }
129 david 1.1 130 } 131 return true;
132 chip 1.11 }
133 david 1.1 134 int UTF16toUTF8(const Uint16** srcHead,
135 chip 1.11 const Uint16* srcEnd, 136 Uint8** tgtHead, 137 Uint8* tgtEnd)
138 david 1.1 { 139 int returnCode = 0; 140 const Uint16* src = srcHead; 141 Uint8 tgt = *tgtHead; 142 while (src < srcEnd) 143 {
144 chip 1.11 Uint32 tempchar; 145 Uint16 numberOfBytes = 0; 146 const Uint16* oldsrc = src; 147 tempchar = src++; 148 if (tempchar >= FIRST_HIGH_SURROGATE 149 && tempchar <= LAST_HIGH_SURROGATE) 150 { 151 if (src < srcEnd) 152 { 153 Uint32 tempchar2 = src; 154 if (tempchar2 >= FIRST_LOW_SURROGATE && 155 tempchar2 <= LAST_LOW_SURROGATE) 156 { 157 tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift) 158 + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase; 159 ++src; 160 } 161 } 162 else 163 { 164 --src; 165 chip 1.11 returnCode = -1; 166 break; 167 } 168 } 169 if (tempchar < (Uint32)0x80) 170 { 171 numberOfBytes = 1; 172 } 173 else if (tempchar < (Uint32)0x800) 174 { 175 numberOfBytes = 2; 176 } 177 else if (tempchar < (Uint32)0x10000) 178 { 179 numberOfBytes = 3; 180 } 181 else if (tempchar < (Uint32)0x200000) 182 { 183 numberOfBytes = 4; 184 } 185 else 186 chip 1.11 { 187 numberOfBytes = 2; 188 tempchar = REPLACEMENT_CHARACTER; 189 } 190 191 tgt += numberOfBytes; 192 if (tgt > tgtEnd) 193 { 194 src = oldsrc; 195 tgt -= numberOfBytes; 196 returnCode = -1; 197 break; 198 } 199 200 switch (numberOfBytes) 201 { 202 case 4: 203 --tgt = (Uint8)((tempchar \| 0x80) & 0xBF); 204 tempchar >>= 6; 205 case 3: 206 --tgt = (Uint8)((tempchar \| 0x80) & 0xBF); 207 chip 1.11 tempchar >>= 6; 208 case 2: 209 --tgt = (Uint8)((tempchar \| 0x80) & 0xBF); 210 tempchar >>= 6; 211 case 1: 212 --tgt = (Uint8)(tempchar \| firstByteMark[numberOfBytes]); 213 } 214 tgt += numberOfBytes;
215 david 1.1 } 216 srcHead = src; 217 tgtHead = tgt; 218 return returnCode; 219 } 220 221 int UTF8toUTF16 (const Uint8** srcHead,
222 chip 1.11 const Uint8* srcEnd, 223 Uint16** tgtHead, 224 Uint16* tgtEnd)
225 david 1.1 { 226 int returnCode = 0; 227 const Uint8* src = srcHead; 228 Uint16 tgt = *tgtHead; 229 while (src < srcEnd) 230 {
231 chip 1.11 Uint32 tempchar = 0; 232 Uint16 moreBytes = trailingBytesForUTF8[src]; 233 if (src + moreBytes >= srcEnd) 234 { 235 returnCode = -1; 236 break; 237 } 238 switch (moreBytes) 239 { 240 case 3: 241 tempchar += src++; 242 tempchar <<= 6; 243 case 2: 244 tempchar += src++; 245 tempchar <<= 6; 246 case 1: 247 tempchar += src++; 248 tempchar <<= 6; 249 case 0: 250 tempchar += src++; 251 } 252 chip 1.11 tempchar -= offsetsFromUTF8[moreBytes]; 253 254 if (tgt >= tgtEnd) 255 { 256 src -= (moreBytes+1); 257 returnCode = -1; break; 258 } 259 if (tempchar <= MAX_BYTE) 260 { 261 if ((tempchar >= FIRST_HIGH_SURROGATE && 262 tempchar <= LAST_LOW_SURROGATE) \|\| 263 ((tempchar & 0xFFFE) == 0xFFFE)) 264 { 265 tgt++ = REPLACEMENT_CHARACTER; 266 } 267 else 268 { 269 tgt++ = (Uint16)tempchar; 270 } 271 } 272 else if (tempchar > MAX_UTF16) 273 chip 1.11 { 274 tgt++ = REPLACEMENT_CHARACTER; 275 } 276 else 277 { 278 if (tgt + 1 >= tgtEnd) 279 { 280 src -= (moreBytes+1); 281 returnCode = -1; 282 break; 283 } 284 tempchar -= halfBase; 285 tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE); 286 tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE); 287 }
288 david 1.1 } 289 srcHead = src; 290 tgtHead = tgt; 291 return returnCode; 292 }
293 david 1.5 294 Boolean isUTF8(const char legal) 295 { 296 char numBytes = UTF_8_COUNT_TRAIL_BYTES(legal)+1; 297 298 // Validate that the string is long enough to hold all the expected bytes. 299 // Note that if legal[0] == 0, numBytes will be 1. 300 for (char i=1; i<numBytes; i++) 301 { 302 if (legal[i] == 0) 303 { 304 return false; 305 } 306 } 307 308 return (isValid_U8((const Uint8 *)legal, numBytes)); 309 }
310 chuck 1.6
311 chuck 1.9 Boolean isUTF8Str(const char *legal) 312 {
313 chip 1.11 /*char tmp[] = {0xCE,0x99,0xCE,0xBF,0xCF,0x8D,0xCE,0xBD,0xCE,
314 chuck 1.9 0xB9,0xCE,0xBA,0xCE,0xBF,0xCE,0xBD,0xCF,0x84, 315 0x00};*/
316 chip 1.11 // char tmp_[] = "class"; 317 // char * tmp = legal;
318 david.dillard 1.13 size_t count = 0; 319 const size_t size = strlen(legal);
320 chip 1.11 // printf("size = %d\n",size);
321 david.dillard 1.13 while(count<size) 322 {
323 chip 1.11 // printf("count = %d\n",count);
324 david.dillard 1.13 if(isUTF8(&legal[count]) == true){ 325 UTF8_NEXT(legal,count);
326 chip 1.11 }else{ 327 // printf("bad string\n"); 328 return false; 329 }
330 david.dillard 1.13 }
331 chip 1.11 // printf("good string\n"); 332 return true;
333 chuck 1.9 /*
334 chip 1.11 printf("legal = %s\n\n", legal); 335 Uint32 count = 0; 336 Uint32 trailingBytes = 0;
337 chuck 1.9 Uint32 size = strlen(legal);
338 chip 1.11 printf("size of legal is %d\n",size);
339 chuck 1.9 while(count<size-1) 340 {
341 chip 1.11 printf("count = %d\n", count);
342 chuck 1.9 if(isUTF8((char*)&legal[count]) == true){
343 chip 1.11 UTF8_NEXT(legal,trailingBytes); 344 count += trailingBytes; 345 } else{ 346 printf("CommonUTF8:: returning false; position[%d]",count); 347 return false; 348 }
349 chuck 1.9 }
350 chip 1.11 printf("CommonUTF8:: returning false; position[%d]",count); 351 return true;*/
352 chuck 1.9 }
353 chuck 1.6 354 String escapeStringEncoder(const String& Str) 355 { 356 String escapeStr; 357 Uint16 escChar; 358 char hexencoding[6];
359 chip 1.11
360 chuck 1.6 for(Uint32 i = 0; i < Str.size(); ++i) 361 {
362 chip 1.11 escChar = Str[i]; 363 if(escChar <= 0x7F)
364 chuck 1.6 {
365 chip 1.11 escapeStr.append(escChar);
366 chuck 1.6 }
367 chip 1.11 else 368 { 369 memset(hexencoding,0x00,sizeof(hexencoding));
370 chuck 1.6 sprintf(hexencoding, "%%%03X%X", escChar/16, escChar%16); 371 escapeStr.append(hexencoding);
372 chip 1.11 }
373 chuck 1.6 } 374 return(escapeStr); 375 } 376 377 String escapeStringDecoder(const String& Str) 378 { 379 Uint32 i; 380
381 chip 1.11 Array<Uint16> utf16Chars;
382 chuck 1.6 383 for (i=0; i< Str.size(); ++i) 384 { 385 if (Str[i] == '%') 386 { 387 Uint8 digit1 = _hexCharToNumeric((Str[++i])); 388 Uint8 digit2 = _hexCharToNumeric((Str[++i])); 389 Uint8 digit3 = _hexCharToNumeric((Str[++i])); 390 Uint8 digit4 = _hexCharToNumeric((Str[++i])); 391
392 chip 1.11 Uint16 decodedChar = (digit1<<12) + (digit2<<8) +
393 chuck 1.6 (digit3<< 4) + (digit4); 394
395 chip 1.11 utf16Chars.append(decodedChar);
396 chuck 1.6 } 397 else 398 {
399 chip 1.11 utf16Chars.append((Uint16)Str[i]);
400 chuck 1.6 } 401 } 402 403 // If there was a string to decode... 404 if (Str.size() > 0) 405 { 406 utf16Chars.append('\0'); 407 return String((Char16 *)utf16Chars.getData()); 408 } 409 else 410 { 411 return String(); 412 } 413 } 414
415 yi.zhou 1.16 #ifdef PEGASUS_HAS_ICU 416 417 Boolean InitializeICU::_initAttempted = false; 418 Boolean InitializeICU::_initSuccessful = false; 419 Mutex InitializeICU::_initMutex; 420 421 Boolean InitializeICU::initICUSuccessful() 422 { 423 if (!_initAttempted) 424 { 425 { 426 AutoMutex lock(_initMutex); 427 428 if (!_initAttempted) 429 { 430 UErrorCode _status = U_ZERO_ERROR; 431 432 // Initialize ICU 433 u_init(&_status); 434 435 if (U_FAILURE(_status)) 436 yi.zhou 1.16 { 437 _initSuccessful = false; 438 Logger::put (Logger::STANDARD_LOG , System::CIMSERVER, 439 Logger::WARNING, 440 "ICU initialization failed with error: $0.", 441 _status); 442 } 443 else 444 { 445 _initSuccessful = true; 446 } 447 _initAttempted = true; 448 } 449 } 450 } 451 452 return _initSuccessful; 453 } 454 455 #endif 456
457 david 1.1 PEGASUS_NAMESPACE_END

No CVS admin address has been configured