1 karl 1.10 //%2005////////////////////////////////////////////////////////////////////////
|
2 david 1.1 //
|
3 karl 1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.4 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
9 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 david 1.1 //
12 // Permission is hereby granted, free of charge, to any person obtaining a copy
13 // of this software and associated documentation files (the "Software"), to
14 // deal in the Software without restriction, including without limitation the
15 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
16 // sell copies of the Software, and to permit persons to whom the Software is
17 // furnished to do so, subject to the following conditions:
|
18 chip 1.11 //
|
19 david 1.1 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
20 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
21 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
22 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
23 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
25 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28 //==============================================================================
29 //
30 // Author: Dave Rosckes (rosckes@us.ibm.com)
31 //
|
32 david.dillard 1.12 // Modified By: David Dillard, VERITAS Software Corp.
33 // (david.dillard@veritas.com)
|
34 yi.zhou 1.16 // Yi Zhou, Hewlett-Packard Company (yi.zhou@hp.com)
|
35 david 1.1 //
36 //%/////////////////////////////////////////////////////////////////////////////
37
|
38 chuck 1.7 #include <Pegasus/Common/Config.h>
39 #include <Pegasus/Common/Array.h>
|
40 yi.zhou 1.16 #include <Pegasus/Common/Logger.h>
|
41 david 1.1 #include "CommonUTF.h"
|
42 chuck 1.7 #include <cstdio>
|
43 david 1.2 #include <cstring>
|
44 david.dillard 1.15 #include <cctype>
|
45 kumpf 1.3
|
46 yi.zhou 1.16 #ifdef PEGASUS_HAS_ICU
47 #include <unicode/uclean.h>
48 #endif
49
|
50 david 1.1 PEGASUS_NAMESPACE_BEGIN
|
51 kumpf 1.3
|
52 chuck 1.6
|
53 david.dillard 1.12 inline Uint8 _hexCharToNumeric(Char16 c)
|
54 chuck 1.6 {
55 Uint8 n;
56
|
57 david.dillard 1.14 if (isdigit(c))
|
58 chuck 1.6 n = (c - '0');
|
59 david.dillard 1.14 else if (isupper(c))
|
60 chuck 1.6 n = (c - 'A' + 10);
61 else // if (islower(c))
62 n = (c - 'a' + 10);
63
64 return n;
65 }
66
|
67 kumpf 1.3 // Note: Caller must ensure that "src" contains "size" bytes.
|
68 chip 1.11 Boolean isValid_U8(const Uint8 *src, int size)
|
69 david 1.1 {
70 Uint8 U8_char;
71 const Uint8 *srcptr = src+size;
72 switch (size)
73 {
|
74 chip 1.11 case 4:
75 if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
76 {
77 return false;
78 }
79 case 3:
80 if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
81 {
82 return false;
83 }
84 case 2:
85 if ((U8_char = (*--srcptr)) > 0xBF)
86 {
87 return false;
88 }
89 switch (*src)
90 {
91 case 0xE0:
92 if (U8_char < 0xA0)
93 {
94 return false;
95 chip 1.11 }
96 break;
97 case 0xF0:
98 if (U8_char < 0x90)
99 {
100 return false;
101 }
102 break;
103 case 0xF4:
104 if (U8_char > 0x8F)
105 {
106 return false;
107 }
108 break;
109 default:
110 if (U8_char < 0x80)
111 {
112 return false;
113 }
114 }
115 case 1:
116 chip 1.11 if (*src >= 0x80 && *src < 0xC2)
117 {
118 return false;
119 }
120 if (*src > 0xF4)
121 {
122 return false;
123 }
124 break;
|
125 david 1.2 default:
|
126 chip 1.11 {
127 return false;
|
128 david 1.2 }
|
129 david 1.1
130 }
131 return true;
|
132 chip 1.11 }
|
133 david 1.1
134 int UTF16toUTF8(const Uint16** srcHead,
|
135 chip 1.11 const Uint16* srcEnd,
136 Uint8** tgtHead,
137 Uint8* tgtEnd)
|
138 david 1.1 {
139 int returnCode = 0;
140 const Uint16* src = *srcHead;
141 Uint8* tgt = *tgtHead;
142 while (src < srcEnd)
143 {
|
144 chip 1.11 Uint32 tempchar;
145 Uint16 numberOfBytes = 0;
146 const Uint16* oldsrc = src;
147 tempchar = *src++;
148 if (tempchar >= FIRST_HIGH_SURROGATE
149 && tempchar <= LAST_HIGH_SURROGATE)
150 {
151 if (src < srcEnd)
152 {
153 Uint32 tempchar2 = *src;
154 if (tempchar2 >= FIRST_LOW_SURROGATE &&
155 tempchar2 <= LAST_LOW_SURROGATE)
156 {
157 tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift)
158 + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase;
159 ++src;
160 }
161 }
162 else
163 {
164 --src;
165 chip 1.11 returnCode = -1;
166 break;
167 }
168 }
169 if (tempchar < (Uint32)0x80)
170 {
171 numberOfBytes = 1;
172 }
173 else if (tempchar < (Uint32)0x800)
174 {
175 numberOfBytes = 2;
176 }
177 else if (tempchar < (Uint32)0x10000)
178 {
179 numberOfBytes = 3;
180 }
181 else if (tempchar < (Uint32)0x200000)
182 {
183 numberOfBytes = 4;
184 }
185 else
186 chip 1.11 {
187 numberOfBytes = 2;
188 tempchar = REPLACEMENT_CHARACTER;
189 }
190
191 tgt += numberOfBytes;
192 if (tgt > tgtEnd)
193 {
194 src = oldsrc;
195 tgt -= numberOfBytes;
196 returnCode = -1;
197 break;
198 }
199
200 switch (numberOfBytes)
201 {
202 case 4:
203 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
204 tempchar >>= 6;
205 case 3:
206 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
207 chip 1.11 tempchar >>= 6;
208 case 2:
209 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
210 tempchar >>= 6;
211 case 1:
212 *--tgt = (Uint8)(tempchar | firstByteMark[numberOfBytes]);
213 }
214 tgt += numberOfBytes;
|
215 david 1.1 }
216 *srcHead = src;
217 *tgtHead = tgt;
218 return returnCode;
219 }
220
221 int UTF8toUTF16 (const Uint8** srcHead,
|
222 chip 1.11 const Uint8* srcEnd,
223 Uint16** tgtHead,
224 Uint16* tgtEnd)
|
225 david 1.1 {
226 int returnCode = 0;
227 const Uint8* src = *srcHead;
228 Uint16* tgt = *tgtHead;
229 while (src < srcEnd)
230 {
|
231 chip 1.11 Uint32 tempchar = 0;
232 Uint16 moreBytes = trailingBytesForUTF8[*src];
233 if (src + moreBytes >= srcEnd)
234 {
235 returnCode = -1;
236 break;
237 }
238 switch (moreBytes)
239 {
240 case 3:
241 tempchar += *src++;
242 tempchar <<= 6;
243 case 2:
244 tempchar += *src++;
245 tempchar <<= 6;
246 case 1:
247 tempchar += *src++;
248 tempchar <<= 6;
249 case 0:
250 tempchar += *src++;
251 }
252 chip 1.11 tempchar -= offsetsFromUTF8[moreBytes];
253
254 if (tgt >= tgtEnd)
255 {
256 src -= (moreBytes+1);
257 returnCode = -1; break;
258 }
259 if (tempchar <= MAX_BYTE)
260 {
261 if ((tempchar >= FIRST_HIGH_SURROGATE &&
262 tempchar <= LAST_LOW_SURROGATE) ||
263 ((tempchar & 0xFFFE) == 0xFFFE))
264 {
265 *tgt++ = REPLACEMENT_CHARACTER;
266 }
267 else
268 {
269 *tgt++ = (Uint16)tempchar;
270 }
271 }
272 else if (tempchar > MAX_UTF16)
273 chip 1.11 {
274 *tgt++ = REPLACEMENT_CHARACTER;
275 }
276 else
277 {
278 if (tgt + 1 >= tgtEnd)
279 {
280 src -= (moreBytes+1);
281 returnCode = -1;
282 break;
283 }
284 tempchar -= halfBase;
285 *tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE);
286 *tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE);
287 }
|
288 david 1.1 }
289 *srcHead = src;
290 *tgtHead = tgt;
291 return returnCode;
292 }
|
293 david 1.5
294 Boolean isUTF8(const char *legal)
295 {
|
296 mike 1.16.6.1 if (Uint8(*legal) < 128)
297 return false;
298
|
299 david 1.5 char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1;
300
301 // Validate that the string is long enough to hold all the expected bytes.
302 // Note that if legal[0] == 0, numBytes will be 1.
303 for (char i=1; i<numBytes; i++)
304 {
305 if (legal[i] == 0)
306 {
307 return false;
308 }
309 }
310
311 return (isValid_U8((const Uint8 *)legal, numBytes));
312 }
|
313 chuck 1.6
|
314 chuck 1.9 Boolean isUTF8Str(const char *legal)
315 {
|
316 chip 1.11 /*char tmp[] = {0xCE,0x99,0xCE,0xBF,0xCF,0x8D,0xCE,0xBD,0xCE,
|
317 chuck 1.9 0xB9,0xCE,0xBA,0xCE,0xBF,0xCE,0xBD,0xCF,0x84,
318 0x00};*/
|
319 chip 1.11 // char tmp_[] = "class";
320 // char * tmp = legal;
|
321 david.dillard 1.13 size_t count = 0;
322 const size_t size = strlen(legal);
|
323 chip 1.11 // printf("size = %d\n",size);
|
324 david.dillard 1.13 while(count<size)
325 {
|
326 chip 1.11 // printf("count = %d\n",count);
|
327 david.dillard 1.13 if(isUTF8(&legal[count]) == true){
328 UTF8_NEXT(legal,count);
|
329 chip 1.11 }else{
330 // printf("bad string\n");
331 return false;
332 }
|
333 david.dillard 1.13 }
|
334 chip 1.11 // printf("good string\n");
335 return true;
|
336 chuck 1.9 /*
|
337 chip 1.11 printf("legal = %s\n\n", legal);
338 Uint32 count = 0;
339 Uint32 trailingBytes = 0;
|
340 chuck 1.9 Uint32 size = strlen(legal);
|
341 chip 1.11 printf("size of legal is %d\n",size);
|
342 chuck 1.9 while(count<size-1)
343 {
|
344 chip 1.11 printf("count = %d\n", count);
|
345 chuck 1.9 if(isUTF8((char*)&legal[count]) == true){
|
346 chip 1.11 UTF8_NEXT(legal,trailingBytes);
347 count += trailingBytes;
348 } else{
349 printf("CommonUTF8:: returning false; position[%d]",count);
350 return false;
351 }
|
352 chuck 1.9 }
|
353 chip 1.11 printf("CommonUTF8:: returning false; position[%d]",count);
354 return true;*/
|
355 chuck 1.9 }
|
356 chuck 1.6
357 String escapeStringEncoder(const String& Str)
358 {
359 String escapeStr;
360 Uint16 escChar;
361 char hexencoding[6];
|
362 chip 1.11
|
363 chuck 1.6 for(Uint32 i = 0; i < Str.size(); ++i)
364 {
|
365 chip 1.11 escChar = Str[i];
366 if(escChar <= 0x7F)
|
367 chuck 1.6 {
|
368 chip 1.11 escapeStr.append(escChar);
|
369 chuck 1.6 }
|
370 chip 1.11 else
371 {
372 memset(hexencoding,0x00,sizeof(hexencoding));
|
373 chuck 1.6 sprintf(hexencoding, "%%%03X%X", escChar/16, escChar%16);
374 escapeStr.append(hexencoding);
|
375 chip 1.11 }
|
376 chuck 1.6 }
377 return(escapeStr);
378 }
379
380 String escapeStringDecoder(const String& Str)
381 {
382 Uint32 i;
383
|
384 chip 1.11 Array<Uint16> utf16Chars;
|
385 chuck 1.6
386 for (i=0; i< Str.size(); ++i)
387 {
388 if (Str[i] == '%')
389 {
390 Uint8 digit1 = _hexCharToNumeric((Str[++i]));
391 Uint8 digit2 = _hexCharToNumeric((Str[++i]));
392 Uint8 digit3 = _hexCharToNumeric((Str[++i]));
393 Uint8 digit4 = _hexCharToNumeric((Str[++i]));
394
|
395 chip 1.11 Uint16 decodedChar = (digit1<<12) + (digit2<<8) +
|
396 chuck 1.6 (digit3<< 4) + (digit4);
397
|
398 chip 1.11 utf16Chars.append(decodedChar);
|
399 chuck 1.6 }
400 else
401 {
|
402 chip 1.11 utf16Chars.append((Uint16)Str[i]);
|
403 chuck 1.6 }
404 }
405
406 // If there was a string to decode...
407 if (Str.size() > 0)
408 {
409 utf16Chars.append('\0');
410 return String((Char16 *)utf16Chars.getData());
411 }
412 else
413 {
414 return String();
415 }
416 }
417
|
418 yi.zhou 1.16 #ifdef PEGASUS_HAS_ICU
419
420 Boolean InitializeICU::_initAttempted = false;
421 Boolean InitializeICU::_initSuccessful = false;
422 Mutex InitializeICU::_initMutex;
423
424 Boolean InitializeICU::initICUSuccessful()
425 {
426 if (!_initAttempted)
427 {
428 {
429 AutoMutex lock(_initMutex);
430
431 if (!_initAttempted)
432 {
433 UErrorCode _status = U_ZERO_ERROR;
434
435 // Initialize ICU
436 u_init(&_status);
437
438 if (U_FAILURE(_status))
439 yi.zhou 1.16 {
440 _initSuccessful = false;
441 Logger::put (Logger::STANDARD_LOG , System::CIMSERVER,
442 Logger::WARNING,
443 "ICU initialization failed with error: $0.",
444 _status);
445 }
446 else
447 {
448 _initSuccessful = true;
449 }
450 _initAttempted = true;
451 }
452 }
453 }
454
455 return _initSuccessful;
456 }
457
458 #endif
459
|
460 david 1.1 PEGASUS_NAMESPACE_END
|