1 karl 1.10 //%2005////////////////////////////////////////////////////////////////////////
|
2 david 1.1 //
|
3 karl 1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.4 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
9 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 david 1.1 //
12 // Permission is hereby granted, free of charge, to any person obtaining a copy
13 // of this software and associated documentation files (the "Software"), to
14 // deal in the Software without restriction, including without limitation the
15 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
16 // sell copies of the Software, and to permit persons to whom the Software is
17 // furnished to do so, subject to the following conditions:
|
18 chip 1.11 //
|
19 david 1.1 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
20 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
21 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
22 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
23 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
25 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28 //==============================================================================
29 //
30 // Author: Dave Rosckes (rosckes@us.ibm.com)
31 //
|
32 david.dillard 1.12 // Modified By: David Dillard, VERITAS Software Corp.
33 // (david.dillard@veritas.com)
|
34 yi.zhou 1.16 // Yi Zhou, Hewlett-Packard Company (yi.zhou@hp.com)
|
35 david 1.1 //
36 //%/////////////////////////////////////////////////////////////////////////////
37
|
38 chuck 1.7 #include <Pegasus/Common/Config.h>
39 #include <Pegasus/Common/Array.h>
|
40 yi.zhou 1.16 #include <Pegasus/Common/Logger.h>
|
41 david 1.1 #include "CommonUTF.h"
|
42 chuck 1.7 #include <cstdio>
|
43 david 1.2 #include <cstring>
|
44 david.dillard 1.15 #include <cctype>
|
45 kumpf 1.3
|
46 yi.zhou 1.16 #ifdef PEGASUS_HAS_ICU
47 #include <unicode/uclean.h>
48 #endif
49
|
50 david 1.1 PEGASUS_NAMESPACE_BEGIN
|
51 kumpf 1.3
|
52 chuck 1.6
|
53 david.dillard 1.12 inline Uint8 _hexCharToNumeric(Char16 c)
|
54 chuck 1.6 {
55 Uint8 n;
56
|
57 david.dillard 1.14 if (isdigit(c))
|
58 chuck 1.6 n = (c - '0');
|
59 david.dillard 1.14 else if (isupper(c))
|
60 chuck 1.6 n = (c - 'A' + 10);
61 else // if (islower(c))
62 n = (c - 'a' + 10);
63
64 return n;
65 }
66
|
67 kumpf 1.3 // Note: Caller must ensure that "src" contains "size" bytes.
|
68 chip 1.11 Boolean isValid_U8(const Uint8 *src, int size)
|
69 david 1.1 {
70 Uint8 U8_char;
71 const Uint8 *srcptr = src+size;
72 switch (size)
73 {
|
74 chip 1.11 case 4:
75 if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
76 {
77 return false;
78 }
79 case 3:
80 if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
81 {
82 return false;
83 }
84 case 2:
85 if ((U8_char = (*--srcptr)) > 0xBF)
86 {
87 return false;
88 }
89 switch (*src)
90 {
91 case 0xE0:
92 if (U8_char < 0xA0)
93 {
94 return false;
95 chip 1.11 }
96 break;
97 case 0xF0:
98 if (U8_char < 0x90)
99 {
100 return false;
101 }
102 break;
103 case 0xF4:
104 if (U8_char > 0x8F)
105 {
106 return false;
107 }
108 break;
109 default:
110 if (U8_char < 0x80)
111 {
112 return false;
113 }
114 }
115 case 1:
116 chip 1.11 if (*src >= 0x80 && *src < 0xC2)
117 {
118 return false;
119 }
120 if (*src > 0xF4)
121 {
122 return false;
123 }
124 break;
|
125 david 1.2 default:
|
126 chip 1.11 {
127 return false;
|
128 david 1.2 }
|
129 david 1.1
130 }
131 return true;
|
132 chip 1.11 }
|
133 david 1.1
134 int UTF16toUTF8(const Uint16** srcHead,
|
135 chip 1.11 const Uint16* srcEnd,
136 Uint8** tgtHead,
137 Uint8* tgtEnd)
|
138 david 1.1 {
139 int returnCode = 0;
140 const Uint16* src = *srcHead;
141 Uint8* tgt = *tgtHead;
142 while (src < srcEnd)
143 {
|
144 chip 1.11 Uint32 tempchar;
145 Uint16 numberOfBytes = 0;
146 const Uint16* oldsrc = src;
147 tempchar = *src++;
148 if (tempchar >= FIRST_HIGH_SURROGATE
149 && tempchar <= LAST_HIGH_SURROGATE)
150 {
151 if (src < srcEnd)
152 {
153 Uint32 tempchar2 = *src;
154 if (tempchar2 >= FIRST_LOW_SURROGATE &&
155 tempchar2 <= LAST_LOW_SURROGATE)
156 {
157 tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift)
158 + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase;
159 ++src;
160 }
161 }
162 else
163 {
164 --src;
165 chip 1.11 returnCode = -1;
166 break;
167 }
168 }
169 if (tempchar < (Uint32)0x80)
170 {
171 numberOfBytes = 1;
172 }
173 else if (tempchar < (Uint32)0x800)
174 {
175 numberOfBytes = 2;
176 }
177 else if (tempchar < (Uint32)0x10000)
178 {
179 numberOfBytes = 3;
180 }
181 else if (tempchar < (Uint32)0x200000)
182 {
183 numberOfBytes = 4;
184 }
185 else
186 chip 1.11 {
187 numberOfBytes = 2;
188 tempchar = REPLACEMENT_CHARACTER;
189 }
190
191 tgt += numberOfBytes;
192 if (tgt > tgtEnd)
193 {
194 src = oldsrc;
195 tgt -= numberOfBytes;
196 returnCode = -1;
197 break;
198 }
199
200 switch (numberOfBytes)
201 {
202 case 4:
203 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
204 tempchar >>= 6;
205 case 3:
206 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
207 chip 1.11 tempchar >>= 6;
208 case 2:
209 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
210 tempchar >>= 6;
211 case 1:
212 *--tgt = (Uint8)(tempchar | firstByteMark[numberOfBytes]);
213 }
214 tgt += numberOfBytes;
|
215 david 1.1 }
216 *srcHead = src;
217 *tgtHead = tgt;
218 return returnCode;
219 }
220
221 int UTF8toUTF16 (const Uint8** srcHead,
|
222 chip 1.11 const Uint8* srcEnd,
223 Uint16** tgtHead,
224 Uint16* tgtEnd)
|
225 david 1.1 {
226 int returnCode = 0;
227 const Uint8* src = *srcHead;
228 Uint16* tgt = *tgtHead;
229 while (src < srcEnd)
230 {
|
231 chip 1.11 Uint32 tempchar = 0;
232 Uint16 moreBytes = trailingBytesForUTF8[*src];
233 if (src + moreBytes >= srcEnd)
234 {
235 returnCode = -1;
236 break;
237 }
238 switch (moreBytes)
239 {
240 case 3:
241 tempchar += *src++;
242 tempchar <<= 6;
243 case 2:
244 tempchar += *src++;
245 tempchar <<= 6;
246 case 1:
247 tempchar += *src++;
248 tempchar <<= 6;
249 case 0:
250 tempchar += *src++;
251 }
252 chip 1.11 tempchar -= offsetsFromUTF8[moreBytes];
253
254 if (tgt >= tgtEnd)
255 {
256 src -= (moreBytes+1);
257 returnCode = -1; break;
258 }
259 if (tempchar <= MAX_BYTE)
260 {
261 if ((tempchar >= FIRST_HIGH_SURROGATE &&
262 tempchar <= LAST_LOW_SURROGATE) ||
263 ((tempchar & 0xFFFE) == 0xFFFE))
264 {
265 *tgt++ = REPLACEMENT_CHARACTER;
266 }
267 else
268 {
269 *tgt++ = (Uint16)tempchar;
270 }
271 }
272 else if (tempchar > MAX_UTF16)
273 chip 1.11 {
274 *tgt++ = REPLACEMENT_CHARACTER;
275 }
276 else
277 {
278 if (tgt + 1 >= tgtEnd)
279 {
280 src -= (moreBytes+1);
281 returnCode = -1;
282 break;
283 }
284 tempchar -= halfBase;
285 *tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE);
286 *tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE);
287 }
|
288 david 1.1 }
289 *srcHead = src;
290 *tgtHead = tgt;
291 return returnCode;
292 }
|
293 david 1.5
294 Boolean isUTF8(const char *legal)
295 {
296 char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1;
297
298 // Validate that the string is long enough to hold all the expected bytes.
299 // Note that if legal[0] == 0, numBytes will be 1.
300 for (char i=1; i<numBytes; i++)
301 {
302 if (legal[i] == 0)
303 {
304 return false;
305 }
306 }
307
308 return (isValid_U8((const Uint8 *)legal, numBytes));
309 }
|
310 chuck 1.6
|
311 chuck 1.9 Boolean isUTF8Str(const char *legal)
312 {
|
313 chip 1.11 /*char tmp[] = {0xCE,0x99,0xCE,0xBF,0xCF,0x8D,0xCE,0xBD,0xCE,
|
314 chuck 1.9 0xB9,0xCE,0xBA,0xCE,0xBF,0xCE,0xBD,0xCF,0x84,
315 0x00};*/
|
316 chip 1.11 // char tmp_[] = "class";
317 // char * tmp = legal;
|
318 david.dillard 1.13 size_t count = 0;
319 const size_t size = strlen(legal);
|
320 chip 1.11 // printf("size = %d\n",size);
|
321 david.dillard 1.13 while(count<size)
322 {
|
323 chip 1.11 // printf("count = %d\n",count);
|
324 david.dillard 1.13 if(isUTF8(&legal[count]) == true){
325 UTF8_NEXT(legal,count);
|
326 chip 1.11 }else{
327 // printf("bad string\n");
328 return false;
329 }
|
330 david.dillard 1.13 }
|
331 chip 1.11 // printf("good string\n");
332 return true;
|
333 chuck 1.9 /*
|
334 chip 1.11 printf("legal = %s\n\n", legal);
335 Uint32 count = 0;
336 Uint32 trailingBytes = 0;
|
337 chuck 1.9 Uint32 size = strlen(legal);
|
338 chip 1.11 printf("size of legal is %d\n",size);
|
339 chuck 1.9 while(count<size-1)
340 {
|
341 chip 1.11 printf("count = %d\n", count);
|
342 chuck 1.9 if(isUTF8((char*)&legal[count]) == true){
|
343 chip 1.11 UTF8_NEXT(legal,trailingBytes);
344 count += trailingBytes;
345 } else{
346 printf("CommonUTF8:: returning false; position[%d]",count);
347 return false;
348 }
|
349 chuck 1.9 }
|
350 chip 1.11 printf("CommonUTF8:: returning false; position[%d]",count);
351 return true;*/
|
352 chuck 1.9 }
|
353 chuck 1.6
354 String escapeStringEncoder(const String& Str)
355 {
356 String escapeStr;
357 Uint16 escChar;
358 char hexencoding[6];
|
359 chip 1.11
|
360 chuck 1.6 for(Uint32 i = 0; i < Str.size(); ++i)
361 {
|
362 chip 1.11 escChar = Str[i];
363 if(escChar <= 0x7F)
|
364 chuck 1.6 {
|
365 chip 1.11 escapeStr.append(escChar);
|
366 chuck 1.6 }
|
367 chip 1.11 else
368 {
369 memset(hexencoding,0x00,sizeof(hexencoding));
|
370 chuck 1.6 sprintf(hexencoding, "%%%03X%X", escChar/16, escChar%16);
371 escapeStr.append(hexencoding);
|
372 chip 1.11 }
|
373 chuck 1.6 }
374 return(escapeStr);
375 }
376
377 String escapeStringDecoder(const String& Str)
378 {
379 Uint32 i;
380
|
381 chip 1.11 Array<Uint16> utf16Chars;
|
382 chuck 1.6
383 for (i=0; i< Str.size(); ++i)
384 {
385 if (Str[i] == '%')
386 {
387 Uint8 digit1 = _hexCharToNumeric((Str[++i]));
388 Uint8 digit2 = _hexCharToNumeric((Str[++i]));
389 Uint8 digit3 = _hexCharToNumeric((Str[++i]));
390 Uint8 digit4 = _hexCharToNumeric((Str[++i]));
391
|
392 chip 1.11 Uint16 decodedChar = (digit1<<12) + (digit2<<8) +
|
393 chuck 1.6 (digit3<< 4) + (digit4);
394
|
395 chip 1.11 utf16Chars.append(decodedChar);
|
396 chuck 1.6 }
397 else
398 {
|
399 chip 1.11 utf16Chars.append((Uint16)Str[i]);
|
400 chuck 1.6 }
401 }
402
403 // If there was a string to decode...
404 if (Str.size() > 0)
405 {
406 utf16Chars.append('\0');
407 return String((Char16 *)utf16Chars.getData());
408 }
409 else
410 {
411 return String();
412 }
413 }
414
|
415 yi.zhou 1.16 #ifdef PEGASUS_HAS_ICU
416
417 Boolean InitializeICU::_initAttempted = false;
418 Boolean InitializeICU::_initSuccessful = false;
419 Mutex InitializeICU::_initMutex;
420
421 Boolean InitializeICU::initICUSuccessful()
422 {
423 if (!_initAttempted)
424 {
425 {
426 AutoMutex lock(_initMutex);
427
428 if (!_initAttempted)
429 {
430 UErrorCode _status = U_ZERO_ERROR;
431
432 // Initialize ICU
433 u_init(&_status);
434
435 if (U_FAILURE(_status))
436 yi.zhou 1.16 {
437 _initSuccessful = false;
438 Logger::put (Logger::STANDARD_LOG , System::CIMSERVER,
439 Logger::WARNING,
440 "ICU initialization failed with error: $0.",
441 _status);
442 }
443 else
444 {
445 _initSuccessful = true;
446 }
447 _initAttempted = true;
448 }
449 }
450 }
451
452 return _initSuccessful;
453 }
454
455 #endif
456
|
457 david 1.1 PEGASUS_NAMESPACE_END
|