pegasus/src/Pegasus/Common/CommonUTF.cpp - annotate

Return to CommonUTF.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 karl 1.10 //%2005////////////////////////////////////////////////////////////////////////
2 david 1.1 //
3 karl 1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development 4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. 5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
6 karl 1.4 // IBM Corp.; EMC Corporation, The Open Group.
7 karl 1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; 8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group. 9 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; 10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
11 david 1.1 // 12 // Permission is hereby granted, free of charge, to any person obtaining a copy 13 // of this software and associated documentation files (the "Software"), to 14 // deal in the Software without restriction, including without limitation the 15 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 16 // sell copies of the Software, and to permit persons to whom the Software is 17 // furnished to do so, subject to the following conditions:
18 chip 1.11 //
19 david 1.1 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN 20 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 21 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT 22 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 23 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 24 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 25 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 27 // 28 //============================================================================== 29 // 30 // Author: Dave Rosckes (rosckes@us.ibm.com) 31 //
32 david.dillard 1.12 // Modified By: David Dillard, VERITAS Software Corp. 33 // (david.dillard@veritas.com)
34 yi.zhou 1.16 // Yi Zhou, Hewlett-Packard Company (yi.zhou@hp.com)
35 david 1.1 // 36 //%///////////////////////////////////////////////////////////////////////////// 37
38 chuck 1.7 #include <Pegasus/Common/Config.h> 39 #include <Pegasus/Common/Array.h>
40 yi.zhou 1.16 #include <Pegasus/Common/Logger.h>
41 david 1.1 #include "CommonUTF.h"
42 chuck 1.7 #include <cstdio>
43 david 1.2 #include <cstring>
44 david.dillard 1.15 #include <cctype>
45 kumpf 1.3
46 yi.zhou 1.16 #ifdef PEGASUS_HAS_ICU 47 #include <unicode/uclean.h> 48 #endif 49
50 david 1.1 PEGASUS_NAMESPACE_BEGIN
51 kumpf 1.3
52 chuck 1.6
53 david.dillard 1.12 inline Uint8 _hexCharToNumeric(Char16 c)
54 chuck 1.6 { 55 Uint8 n; 56
57 david.dillard 1.14 if (isdigit(c))
58 chuck 1.6 n = (c - '0');
59 david.dillard 1.14 else if (isupper(c))
60 chuck 1.6 n = (c - 'A' + 10); 61 else // if (islower(c)) 62 n = (c - 'a' + 10); 63 64 return n; 65 } 66
67 kumpf 1.3 // Note: Caller must ensure that "src" contains "size" bytes.
68 chip 1.11 Boolean isValid_U8(const Uint8 *src, int size)
69 david 1.1 { 70 Uint8 U8_char; 71 const Uint8 *srcptr = src+size; 72 switch (size) 73 {
74 chip 1.11 case 4: 75 if ((U8_char = (--srcptr)) < 0x80 \|\| U8_char > 0xBF) 76 { 77 return false; 78 } 79 case 3: 80 if ((U8_char = (--srcptr)) < 0x80 \|\| U8_char > 0xBF) 81 { 82 return false; 83 } 84 case 2: 85 if ((U8_char = (--srcptr)) > 0xBF) 86 { 87 return false; 88 } 89 switch (src) 90 { 91 case 0xE0: 92 if (U8_char < 0xA0) 93 { 94 return false; 95 chip 1.11 } 96 break; 97 case 0xF0: 98 if (U8_char < 0x90) 99 { 100 return false; 101 } 102 break; 103 case 0xF4: 104 if (U8_char > 0x8F) 105 { 106 return false; 107 } 108 break; 109 default: 110 if (U8_char < 0x80) 111 { 112 return false; 113 } 114 } 115 case 1: 116 chip 1.11 if (src >= 0x80 && src < 0xC2) 117 { 118 return false; 119 } 120 if (*src > 0xF4) 121 { 122 return false; 123 } 124 break;
125 david 1.2 default:
126 chip 1.11 { 127 return false;
128 david 1.2 }
129 david 1.1 130 } 131 return true;
132 chip 1.11 }
133 david 1.1 134 int UTF16toUTF8(const Uint16** srcHead,
135 chip 1.11 const Uint16* srcEnd, 136 Uint8** tgtHead, 137 Uint8* tgtEnd)
138 david 1.1 { 139 int returnCode = 0; 140 const Uint16* src = srcHead; 141 Uint8 tgt = *tgtHead; 142 while (src < srcEnd) 143 {
144 chip 1.11 Uint32 tempchar; 145 Uint16 numberOfBytes = 0; 146 const Uint16* oldsrc = src; 147 tempchar = src++; 148 if (tempchar >= FIRST_HIGH_SURROGATE 149 && tempchar <= LAST_HIGH_SURROGATE) 150 { 151 if (src < srcEnd) 152 { 153 Uint32 tempchar2 = src; 154 if (tempchar2 >= FIRST_LOW_SURROGATE && 155 tempchar2 <= LAST_LOW_SURROGATE) 156 { 157 tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift) 158 + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase; 159 ++src; 160 } 161 } 162 else 163 { 164 --src; 165 chip 1.11 returnCode = -1; 166 break; 167 } 168 } 169 if (tempchar < (Uint32)0x80) 170 { 171 numberOfBytes = 1; 172 } 173 else if (tempchar < (Uint32)0x800) 174 { 175 numberOfBytes = 2; 176 } 177 else if (tempchar < (Uint32)0x10000) 178 { 179 numberOfBytes = 3; 180 } 181 else if (tempchar < (Uint32)0x200000) 182 { 183 numberOfBytes = 4; 184 } 185 else 186 chip 1.11 { 187 numberOfBytes = 2; 188 tempchar = REPLACEMENT_CHARACTER; 189 } 190 191 tgt += numberOfBytes; 192 if (tgt > tgtEnd) 193 { 194 src = oldsrc; 195 tgt -= numberOfBytes; 196 returnCode = -1; 197 break; 198 } 199 200 switch (numberOfBytes) 201 { 202 case 4: 203 --tgt = (Uint8)((tempchar \| 0x80) & 0xBF); 204 tempchar >>= 6; 205 case 3: 206 --tgt = (Uint8)((tempchar \| 0x80) & 0xBF); 207 chip 1.11 tempchar >>= 6; 208 case 2: 209 --tgt = (Uint8)((tempchar \| 0x80) & 0xBF); 210 tempchar >>= 6; 211 case 1: 212 --tgt = (Uint8)(tempchar \| firstByteMark[numberOfBytes]); 213 } 214 tgt += numberOfBytes;
215 david 1.1 } 216 srcHead = src; 217 tgtHead = tgt; 218 return returnCode; 219 } 220 221 int UTF8toUTF16 (const Uint8** srcHead,
222 chip 1.11 const Uint8* srcEnd, 223 Uint16** tgtHead, 224 Uint16* tgtEnd)
225 david 1.1 { 226 int returnCode = 0; 227 const Uint8* src = srcHead; 228 Uint16 tgt = *tgtHead; 229 while (src < srcEnd) 230 {
231 chip 1.11 Uint32 tempchar = 0; 232 Uint16 moreBytes = trailingBytesForUTF8[src]; 233 if (src + moreBytes >= srcEnd) 234 { 235 returnCode = -1; 236 break; 237 } 238 switch (moreBytes) 239 { 240 case 3: 241 tempchar += src++; 242 tempchar <<= 6; 243 case 2: 244 tempchar += src++; 245 tempchar <<= 6; 246 case 1: 247 tempchar += src++; 248 tempchar <<= 6; 249 case 0: 250 tempchar += src++; 251 } 252 chip 1.11 tempchar -= offsetsFromUTF8[moreBytes]; 253 254 if (tgt >= tgtEnd) 255 { 256 src -= (moreBytes+1); 257 returnCode = -1; break; 258 } 259 if (tempchar <= MAX_BYTE) 260 { 261 if ((tempchar >= FIRST_HIGH_SURROGATE && 262 tempchar <= LAST_LOW_SURROGATE) \|\| 263 ((tempchar & 0xFFFE) == 0xFFFE)) 264 { 265 tgt++ = REPLACEMENT_CHARACTER; 266 } 267 else 268 { 269 tgt++ = (Uint16)tempchar; 270 } 271 } 272 else if (tempchar > MAX_UTF16) 273 chip 1.11 { 274 tgt++ = REPLACEMENT_CHARACTER; 275 } 276 else 277 { 278 if (tgt + 1 >= tgtEnd) 279 { 280 src -= (moreBytes+1); 281 returnCode = -1; 282 break; 283 } 284 tempchar -= halfBase; 285 tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE); 286 tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE); 287 }
288 david 1.1 } 289 srcHead = src; 290 tgtHead = tgt; 291 return returnCode; 292 }
293 david 1.5 294 Boolean isUTF8(const char *legal) 295 {
296 mike 1.16.6.1 if (Uint8(*legal) < 128) 297 return false; 298
299 david 1.5 char numBytes = UTF_8_COUNT_TRAIL_BYTES(legal)+1; 300 301 // Validate that the string is long enough to hold all the expected bytes. 302 // Note that if legal[0] == 0, numBytes will be 1. 303 for (char i=1; i<numBytes; i++) 304 { 305 if (legal[i] == 0) 306 { 307 return false; 308 } 309 } 310 311 return (isValid_U8((const Uint8 )legal, numBytes)); 312 }
313 chuck 1.6
314 chuck 1.9 Boolean isUTF8Str(const char *legal) 315 {
316 chip 1.11 /*char tmp[] = {0xCE,0x99,0xCE,0xBF,0xCF,0x8D,0xCE,0xBD,0xCE,
317 chuck 1.9 0xB9,0xCE,0xBA,0xCE,0xBF,0xCE,0xBD,0xCF,0x84, 318 0x00};*/
319 chip 1.11 // char tmp_[] = "class"; 320 // char * tmp = legal;
321 david.dillard 1.13 size_t count = 0; 322 const size_t size = strlen(legal);
323 chip 1.11 // printf("size = %d\n",size);
324 david.dillard 1.13 while(count<size) 325 {
326 chip 1.11 // printf("count = %d\n",count);
327 david.dillard 1.13 if(isUTF8(&legal[count]) == true){ 328 UTF8_NEXT(legal,count);
329 chip 1.11 }else{ 330 // printf("bad string\n"); 331 return false; 332 }
333 david.dillard 1.13 }
334 chip 1.11 // printf("good string\n"); 335 return true;
336 chuck 1.9 /*
337 chip 1.11 printf("legal = %s\n\n", legal); 338 Uint32 count = 0; 339 Uint32 trailingBytes = 0;
340 chuck 1.9 Uint32 size = strlen(legal);
341 chip 1.11 printf("size of legal is %d\n",size);
342 chuck 1.9 while(count<size-1) 343 {
344 chip 1.11 printf("count = %d\n", count);
345 chuck 1.9 if(isUTF8((char*)&legal[count]) == true){
346 chip 1.11 UTF8_NEXT(legal,trailingBytes); 347 count += trailingBytes; 348 } else{ 349 printf("CommonUTF8:: returning false; position[%d]",count); 350 return false; 351 }
352 chuck 1.9 }
353 chip 1.11 printf("CommonUTF8:: returning false; position[%d]",count); 354 return true;*/
355 chuck 1.9 }
356 chuck 1.6 357 String escapeStringEncoder(const String& Str) 358 { 359 String escapeStr; 360 Uint16 escChar; 361 char hexencoding[6];
362 chip 1.11
363 chuck 1.6 for(Uint32 i = 0; i < Str.size(); ++i) 364 {
365 chip 1.11 escChar = Str[i]; 366 if(escChar <= 0x7F)
367 chuck 1.6 {
368 chip 1.11 escapeStr.append(escChar);
369 chuck 1.6 }
370 chip 1.11 else 371 { 372 memset(hexencoding,0x00,sizeof(hexencoding));
373 chuck 1.6 sprintf(hexencoding, "%%%03X%X", escChar/16, escChar%16); 374 escapeStr.append(hexencoding);
375 chip 1.11 }
376 chuck 1.6 } 377 return(escapeStr); 378 } 379 380 String escapeStringDecoder(const String& Str) 381 { 382 Uint32 i; 383
384 chip 1.11 Array<Uint16> utf16Chars;
385 chuck 1.6 386 for (i=0; i< Str.size(); ++i) 387 { 388 if (Str[i] == '%') 389 { 390 Uint8 digit1 = _hexCharToNumeric((Str[++i])); 391 Uint8 digit2 = _hexCharToNumeric((Str[++i])); 392 Uint8 digit3 = _hexCharToNumeric((Str[++i])); 393 Uint8 digit4 = _hexCharToNumeric((Str[++i])); 394
395 chip 1.11 Uint16 decodedChar = (digit1<<12) + (digit2<<8) +
396 chuck 1.6 (digit3<< 4) + (digit4); 397
398 chip 1.11 utf16Chars.append(decodedChar);
399 chuck 1.6 } 400 else 401 {
402 chip 1.11 utf16Chars.append((Uint16)Str[i]);
403 chuck 1.6 } 404 } 405 406 // If there was a string to decode... 407 if (Str.size() > 0) 408 { 409 utf16Chars.append('\0'); 410 return String((Char16 *)utf16Chars.getData()); 411 } 412 else 413 { 414 return String(); 415 } 416 } 417
418 yi.zhou 1.16 #ifdef PEGASUS_HAS_ICU 419 420 Boolean InitializeICU::_initAttempted = false; 421 Boolean InitializeICU::_initSuccessful = false; 422 Mutex InitializeICU::_initMutex; 423 424 Boolean InitializeICU::initICUSuccessful() 425 { 426 if (!_initAttempted) 427 { 428 { 429 AutoMutex lock(_initMutex); 430 431 if (!_initAttempted) 432 { 433 UErrorCode _status = U_ZERO_ERROR; 434 435 // Initialize ICU 436 u_init(&_status); 437 438 if (U_FAILURE(_status)) 439 yi.zhou 1.16 { 440 _initSuccessful = false; 441 Logger::put (Logger::STANDARD_LOG , System::CIMSERVER, 442 Logger::WARNING, 443 "ICU initialization failed with error: $0.", 444 _status); 445 } 446 else 447 { 448 _initSuccessful = true; 449 } 450 _initAttempted = true; 451 } 452 } 453 } 454 455 return _initSuccessful; 456 } 457 458 #endif 459
460 david 1.1 PEGASUS_NAMESPACE_END

No CVS admin address has been configured