1 karl 1.18 //%2006////////////////////////////////////////////////////////////////////////
|
2 david 1.1 //
|
3 karl 1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.4 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
9 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 karl 1.18 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
12 // EMC Corporation; Symantec Corporation; The Open Group.
|
13 david 1.1 //
14 // Permission is hereby granted, free of charge, to any person obtaining a copy
15 // of this software and associated documentation files (the "Software"), to
16 // deal in the Software without restriction, including without limitation the
17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
18 // sell copies of the Software, and to permit persons to whom the Software is
19 // furnished to do so, subject to the following conditions:
|
20 karl 1.18 //
|
21 david 1.1 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
22 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
24 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30 //==============================================================================
31 //
32 // Author: Dave Rosckes (rosckes@us.ibm.com)
33 //
|
34 david.dillard 1.12 // Modified By: David Dillard, VERITAS Software Corp.
35 // (david.dillard@veritas.com)
|
36 yi.zhou 1.16 // Yi Zhou, Hewlett-Packard Company (yi.zhou@hp.com)
|
37 david 1.1 //
38 //%/////////////////////////////////////////////////////////////////////////////
39
|
40 chuck 1.7 #include <Pegasus/Common/Config.h>
41 #include <Pegasus/Common/Array.h>
|
42 yi.zhou 1.16 #include <Pegasus/Common/Logger.h>
|
43 david 1.1 #include "CommonUTF.h"
|
44 chuck 1.7 #include <cstdio>
|
45 david 1.2 #include <cstring>
|
46 david.dillard 1.15 #include <cctype>
|
47 kumpf 1.3
|
48 yi.zhou 1.16 #ifdef PEGASUS_HAS_ICU
49 #include <unicode/uclean.h>
50 #endif
51
|
52 david 1.1 PEGASUS_NAMESPACE_BEGIN
|
53 kumpf 1.3
|
54 chuck 1.6
|
55 david.dillard 1.12 inline Uint8 _hexCharToNumeric(Char16 c)
|
56 chuck 1.6 {
57 Uint8 n;
58
|
59 david.dillard 1.14 if (isdigit(c))
|
60 chuck 1.6 n = (c - '0');
|
61 david.dillard 1.14 else if (isupper(c))
|
62 chuck 1.6 n = (c - 'A' + 10);
63 else // if (islower(c))
64 n = (c - 'a' + 10);
65
66 return n;
67 }
68
|
69 kumpf 1.3 // Note: Caller must ensure that "src" contains "size" bytes.
|
70 chip 1.11 Boolean isValid_U8(const Uint8 *src, int size)
|
71 david 1.1 {
72 Uint8 U8_char;
73 const Uint8 *srcptr = src+size;
74 switch (size)
75 {
|
76 chip 1.11 case 4:
77 if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
78 {
79 return false;
80 }
81 case 3:
82 if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
83 {
84 return false;
85 }
86 case 2:
87 if ((U8_char = (*--srcptr)) > 0xBF)
88 {
89 return false;
90 }
91 switch (*src)
92 {
93 case 0xE0:
94 if (U8_char < 0xA0)
95 {
96 return false;
97 chip 1.11 }
98 break;
99 case 0xF0:
100 if (U8_char < 0x90)
101 {
102 return false;
103 }
104 break;
105 case 0xF4:
106 if (U8_char > 0x8F)
107 {
108 return false;
109 }
110 break;
111 default:
112 if (U8_char < 0x80)
113 {
114 return false;
115 }
116 }
117 case 1:
118 chip 1.11 if (*src >= 0x80 && *src < 0xC2)
119 {
120 return false;
121 }
122 if (*src > 0xF4)
123 {
124 return false;
125 }
126 break;
|
127 david 1.2 default:
|
128 chip 1.11 {
129 return false;
|
130 david 1.2 }
|
131 david 1.1
132 }
133 return true;
|
134 chip 1.11 }
|
135 david 1.1
136 int UTF16toUTF8(const Uint16** srcHead,
|
137 chip 1.11 const Uint16* srcEnd,
138 Uint8** tgtHead,
139 Uint8* tgtEnd)
|
140 david 1.1 {
141 int returnCode = 0;
142 const Uint16* src = *srcHead;
143 Uint8* tgt = *tgtHead;
144 while (src < srcEnd)
145 {
|
146 mike 1.17 if (*src < 128)
147 {
148 if (tgt == tgtEnd)
149 {
150 returnCode = -1;
151 break;
152 }
153
154 *tgt++ = *src++;
155 continue;
156 }
157
|
158 chip 1.11 Uint32 tempchar;
159 Uint16 numberOfBytes = 0;
160 const Uint16* oldsrc = src;
161 tempchar = *src++;
162 if (tempchar >= FIRST_HIGH_SURROGATE
163 && tempchar <= LAST_HIGH_SURROGATE)
164 {
165 if (src < srcEnd)
166 {
167 Uint32 tempchar2 = *src;
168 if (tempchar2 >= FIRST_LOW_SURROGATE &&
169 tempchar2 <= LAST_LOW_SURROGATE)
170 {
171 tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift)
172 + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase;
173 ++src;
174 }
175 }
176 else
177 {
178 --src;
179 chip 1.11 returnCode = -1;
180 break;
181 }
182 }
183 if (tempchar < (Uint32)0x80)
184 {
185 numberOfBytes = 1;
186 }
187 else if (tempchar < (Uint32)0x800)
188 {
189 numberOfBytes = 2;
190 }
191 else if (tempchar < (Uint32)0x10000)
192 {
193 numberOfBytes = 3;
194 }
195 else if (tempchar < (Uint32)0x200000)
196 {
197 numberOfBytes = 4;
198 }
199 else
200 chip 1.11 {
201 numberOfBytes = 2;
202 tempchar = REPLACEMENT_CHARACTER;
203 }
204
205 tgt += numberOfBytes;
206 if (tgt > tgtEnd)
207 {
208 src = oldsrc;
209 tgt -= numberOfBytes;
210 returnCode = -1;
211 break;
212 }
213
214 switch (numberOfBytes)
215 {
216 case 4:
217 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
218 tempchar >>= 6;
219 case 3:
220 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
221 chip 1.11 tempchar >>= 6;
222 case 2:
223 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
224 tempchar >>= 6;
225 case 1:
226 *--tgt = (Uint8)(tempchar | firstByteMark[numberOfBytes]);
227 }
228 tgt += numberOfBytes;
|
229 david 1.1 }
230 *srcHead = src;
231 *tgtHead = tgt;
232 return returnCode;
233 }
234
235 int UTF8toUTF16 (const Uint8** srcHead,
|
236 chip 1.11 const Uint8* srcEnd,
237 Uint16** tgtHead,
238 Uint16* tgtEnd)
|
239 david 1.1 {
240 int returnCode = 0;
241 const Uint8* src = *srcHead;
242 Uint16* tgt = *tgtHead;
243 while (src < srcEnd)
244 {
|
245 chip 1.11 Uint32 tempchar = 0;
246 Uint16 moreBytes = trailingBytesForUTF8[*src];
247 if (src + moreBytes >= srcEnd)
248 {
249 returnCode = -1;
250 break;
251 }
252 switch (moreBytes)
253 {
254 case 3:
255 tempchar += *src++;
256 tempchar <<= 6;
257 case 2:
258 tempchar += *src++;
259 tempchar <<= 6;
260 case 1:
261 tempchar += *src++;
262 tempchar <<= 6;
263 case 0:
264 tempchar += *src++;
265 }
266 chip 1.11 tempchar -= offsetsFromUTF8[moreBytes];
267
268 if (tgt >= tgtEnd)
269 {
270 src -= (moreBytes+1);
271 returnCode = -1; break;
272 }
273 if (tempchar <= MAX_BYTE)
274 {
275 if ((tempchar >= FIRST_HIGH_SURROGATE &&
276 tempchar <= LAST_LOW_SURROGATE) ||
277 ((tempchar & 0xFFFE) == 0xFFFE))
278 {
279 *tgt++ = REPLACEMENT_CHARACTER;
280 }
281 else
282 {
283 *tgt++ = (Uint16)tempchar;
284 }
285 }
286 else if (tempchar > MAX_UTF16)
287 chip 1.11 {
288 *tgt++ = REPLACEMENT_CHARACTER;
289 }
290 else
291 {
292 if (tgt + 1 >= tgtEnd)
293 {
294 src -= (moreBytes+1);
295 returnCode = -1;
296 break;
297 }
298 tempchar -= halfBase;
299 *tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE);
300 *tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE);
301 }
|
302 david 1.1 }
303 *srcHead = src;
304 *tgtHead = tgt;
305 return returnCode;
306 }
|
307 david 1.5
|
308 mike 1.17 Boolean isUTF8Aux(const char *legal)
|
309 david 1.5 {
310 char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1;
311
312 // Validate that the string is long enough to hold all the expected bytes.
313 // Note that if legal[0] == 0, numBytes will be 1.
314 for (char i=1; i<numBytes; i++)
315 {
316 if (legal[i] == 0)
317 {
318 return false;
319 }
320 }
321
322 return (isValid_U8((const Uint8 *)legal, numBytes));
323 }
|
324 chuck 1.6
|
325 chuck 1.9 Boolean isUTF8Str(const char *legal)
326 {
|
327 chip 1.11 /*char tmp[] = {0xCE,0x99,0xCE,0xBF,0xCF,0x8D,0xCE,0xBD,0xCE,
|
328 chuck 1.9 0xB9,0xCE,0xBA,0xCE,0xBF,0xCE,0xBD,0xCF,0x84,
329 0x00};*/
|
330 chip 1.11 // char tmp_[] = "class";
331 // char * tmp = legal;
|
332 david.dillard 1.13 size_t count = 0;
333 const size_t size = strlen(legal);
|
334 chip 1.11 // printf("size = %d\n",size);
|
335 david.dillard 1.13 while(count<size)
336 {
|
337 chip 1.11 // printf("count = %d\n",count);
|
338 david.dillard 1.13 if(isUTF8(&legal[count]) == true){
339 UTF8_NEXT(legal,count);
|
340 chip 1.11 }else{
341 // printf("bad string\n");
342 return false;
343 }
|
344 david.dillard 1.13 }
|
345 chip 1.11 // printf("good string\n");
346 return true;
|
347 chuck 1.9 /*
|
348 chip 1.11 printf("legal = %s\n\n", legal);
349 Uint32 count = 0;
350 Uint32 trailingBytes = 0;
|
351 chuck 1.9 Uint32 size = strlen(legal);
|
352 chip 1.11 printf("size of legal is %d\n",size);
|
353 chuck 1.9 while(count<size-1)
354 {
|
355 chip 1.11 printf("count = %d\n", count);
|
356 chuck 1.9 if(isUTF8((char*)&legal[count]) == true){
|
357 chip 1.11 UTF8_NEXT(legal,trailingBytes);
358 count += trailingBytes;
359 } else{
360 printf("CommonUTF8:: returning false; position[%d]",count);
361 return false;
362 }
|
363 chuck 1.9 }
|
364 chip 1.11 printf("CommonUTF8:: returning false; position[%d]",count);
365 return true;*/
|
366 chuck 1.9 }
|
367 chuck 1.6
368 String escapeStringEncoder(const String& Str)
369 {
370 String escapeStr;
371 Uint16 escChar;
372 char hexencoding[6];
|
373 chip 1.11
|
374 chuck 1.6 for(Uint32 i = 0; i < Str.size(); ++i)
375 {
|
376 chip 1.11 escChar = Str[i];
377 if(escChar <= 0x7F)
|
378 chuck 1.6 {
|
379 chip 1.11 escapeStr.append(escChar);
|
380 chuck 1.6 }
|
381 chip 1.11 else
382 {
383 memset(hexencoding,0x00,sizeof(hexencoding));
|
384 chuck 1.6 sprintf(hexencoding, "%%%03X%X", escChar/16, escChar%16);
385 escapeStr.append(hexencoding);
|
386 chip 1.11 }
|
387 chuck 1.6 }
388 return(escapeStr);
389 }
390
391 String escapeStringDecoder(const String& Str)
392 {
393 Uint32 i;
394
|
395 chip 1.11 Array<Uint16> utf16Chars;
|
396 chuck 1.6
397 for (i=0; i< Str.size(); ++i)
398 {
399 if (Str[i] == '%')
400 {
401 Uint8 digit1 = _hexCharToNumeric((Str[++i]));
402 Uint8 digit2 = _hexCharToNumeric((Str[++i]));
403 Uint8 digit3 = _hexCharToNumeric((Str[++i]));
404 Uint8 digit4 = _hexCharToNumeric((Str[++i]));
405
|
406 chip 1.11 Uint16 decodedChar = (digit1<<12) + (digit2<<8) +
|
407 chuck 1.6 (digit3<< 4) + (digit4);
408
|
409 chip 1.11 utf16Chars.append(decodedChar);
|
410 chuck 1.6 }
411 else
412 {
|
413 chip 1.11 utf16Chars.append((Uint16)Str[i]);
|
414 chuck 1.6 }
415 }
416
417 // If there was a string to decode...
418 if (Str.size() > 0)
419 {
420 utf16Chars.append('\0');
421 return String((Char16 *)utf16Chars.getData());
422 }
423 else
424 {
425 return String();
426 }
427 }
428
|
429 yi.zhou 1.16 #ifdef PEGASUS_HAS_ICU
430
431 Boolean InitializeICU::_initAttempted = false;
432 Boolean InitializeICU::_initSuccessful = false;
433 Mutex InitializeICU::_initMutex;
434
435 Boolean InitializeICU::initICUSuccessful()
436 {
437 if (!_initAttempted)
438 {
439 {
440 AutoMutex lock(_initMutex);
441
442 if (!_initAttempted)
443 {
444 UErrorCode _status = U_ZERO_ERROR;
445
446 // Initialize ICU
447 u_init(&_status);
448
449 if (U_FAILURE(_status))
450 yi.zhou 1.16 {
451 _initSuccessful = false;
452 Logger::put (Logger::STANDARD_LOG , System::CIMSERVER,
453 Logger::WARNING,
454 "ICU initialization failed with error: $0.",
455 _status);
456 }
457 else
458 {
459 _initSuccessful = true;
460 }
461 _initAttempted = true;
462 }
463 }
464 }
465
466 return _initSuccessful;
467 }
468
469 #endif
470
|
471 david 1.1 PEGASUS_NAMESPACE_END
|