1 karl 1.10 //%2005////////////////////////////////////////////////////////////////////////
|
2 david 1.1 //
|
3 karl 1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.4 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
9 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 david 1.1 //
12 // Permission is hereby granted, free of charge, to any person obtaining a copy
13 // of this software and associated documentation files (the "Software"), to
14 // deal in the Software without restriction, including without limitation the
15 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
16 // sell copies of the Software, and to permit persons to whom the Software is
17 // furnished to do so, subject to the following conditions:
|
18 chip 1.11 //
|
19 david 1.1 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
20 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
21 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
22 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
23 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
25 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28 //==============================================================================
29 //
30 // Author: Dave Rosckes (rosckes@us.ibm.com)
31 //
|
32 david.dillard 1.12 // Modified By: David Dillard, VERITAS Software Corp.
33 // (david.dillard@veritas.com)
|
34 yi.zhou 1.16 // Yi Zhou, Hewlett-Packard Company (yi.zhou@hp.com)
|
35 david 1.1 //
36 //%/////////////////////////////////////////////////////////////////////////////
37
|
38 chuck 1.7 #include <Pegasus/Common/Config.h>
39 #include <Pegasus/Common/Array.h>
|
40 yi.zhou 1.16 #include <Pegasus/Common/Logger.h>
|
41 david 1.1 #include "CommonUTF.h"
|
42 chuck 1.7 #include <cstdio>
|
43 david 1.2 #include <cstring>
|
44 david.dillard 1.15 #include <cctype>
|
45 kumpf 1.3
|
46 yi.zhou 1.16 #ifdef PEGASUS_HAS_ICU
47 #include <unicode/uclean.h>
48 #endif
49
|
50 david 1.1 PEGASUS_NAMESPACE_BEGIN
|
51 kumpf 1.3
|
52 chuck 1.6
|
53 david.dillard 1.12 inline Uint8 _hexCharToNumeric(Char16 c)
|
54 chuck 1.6 {
55 Uint8 n;
56
|
57 david.dillard 1.14 if (isdigit(c))
|
58 chuck 1.6 n = (c - '0');
|
59 david.dillard 1.14 else if (isupper(c))
|
60 chuck 1.6 n = (c - 'A' + 10);
61 else // if (islower(c))
62 n = (c - 'a' + 10);
63
64 return n;
65 }
66
|
67 kumpf 1.3 // Note: Caller must ensure that "src" contains "size" bytes.
|
68 chip 1.11 Boolean isValid_U8(const Uint8 *src, int size)
|
69 david 1.1 {
70 Uint8 U8_char;
71 const Uint8 *srcptr = src+size;
72 switch (size)
73 {
|
74 chip 1.11 case 4:
75 if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
76 {
77 return false;
78 }
79 case 3:
80 if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
81 {
82 return false;
83 }
84 case 2:
85 if ((U8_char = (*--srcptr)) > 0xBF)
86 {
87 return false;
88 }
89 switch (*src)
90 {
91 case 0xE0:
92 if (U8_char < 0xA0)
93 {
94 return false;
95 chip 1.11 }
96 break;
97 case 0xF0:
98 if (U8_char < 0x90)
99 {
100 return false;
101 }
102 break;
103 case 0xF4:
104 if (U8_char > 0x8F)
105 {
106 return false;
107 }
108 break;
109 default:
110 if (U8_char < 0x80)
111 {
112 return false;
113 }
114 }
115 case 1:
116 chip 1.11 if (*src >= 0x80 && *src < 0xC2)
117 {
118 return false;
119 }
120 if (*src > 0xF4)
121 {
122 return false;
123 }
124 break;
|
125 david 1.2 default:
|
126 chip 1.11 {
127 return false;
|
128 david 1.2 }
|
129 david 1.1
130 }
131 return true;
|
132 chip 1.11 }
|
133 david 1.1
134 int UTF16toUTF8(const Uint16** srcHead,
|
135 chip 1.11 const Uint16* srcEnd,
136 Uint8** tgtHead,
137 Uint8* tgtEnd)
|
138 david 1.1 {
139 int returnCode = 0;
140 const Uint16* src = *srcHead;
141 Uint8* tgt = *tgtHead;
142 while (src < srcEnd)
143 {
|
144 mike 1.17 if (*src < 128)
145 {
146 if (tgt == tgtEnd)
147 {
148 returnCode = -1;
149 break;
150 }
151
152 *tgt++ = *src++;
153 continue;
154 }
155
|
156 chip 1.11 Uint32 tempchar;
157 Uint16 numberOfBytes = 0;
158 const Uint16* oldsrc = src;
159 tempchar = *src++;
160 if (tempchar >= FIRST_HIGH_SURROGATE
161 && tempchar <= LAST_HIGH_SURROGATE)
162 {
163 if (src < srcEnd)
164 {
165 Uint32 tempchar2 = *src;
166 if (tempchar2 >= FIRST_LOW_SURROGATE &&
167 tempchar2 <= LAST_LOW_SURROGATE)
168 {
169 tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift)
170 + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase;
171 ++src;
172 }
173 }
174 else
175 {
176 --src;
177 chip 1.11 returnCode = -1;
178 break;
179 }
180 }
181 if (tempchar < (Uint32)0x80)
182 {
183 numberOfBytes = 1;
184 }
185 else if (tempchar < (Uint32)0x800)
186 {
187 numberOfBytes = 2;
188 }
189 else if (tempchar < (Uint32)0x10000)
190 {
191 numberOfBytes = 3;
192 }
193 else if (tempchar < (Uint32)0x200000)
194 {
195 numberOfBytes = 4;
196 }
197 else
198 chip 1.11 {
199 numberOfBytes = 2;
200 tempchar = REPLACEMENT_CHARACTER;
201 }
202
203 tgt += numberOfBytes;
204 if (tgt > tgtEnd)
205 {
206 src = oldsrc;
207 tgt -= numberOfBytes;
208 returnCode = -1;
209 break;
210 }
211
212 switch (numberOfBytes)
213 {
214 case 4:
215 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
216 tempchar >>= 6;
217 case 3:
218 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
219 chip 1.11 tempchar >>= 6;
220 case 2:
221 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
222 tempchar >>= 6;
223 case 1:
224 *--tgt = (Uint8)(tempchar | firstByteMark[numberOfBytes]);
225 }
226 tgt += numberOfBytes;
|
227 david 1.1 }
228 *srcHead = src;
229 *tgtHead = tgt;
230 return returnCode;
231 }
232
233 int UTF8toUTF16 (const Uint8** srcHead,
|
234 chip 1.11 const Uint8* srcEnd,
235 Uint16** tgtHead,
236 Uint16* tgtEnd)
|
237 david 1.1 {
238 int returnCode = 0;
239 const Uint8* src = *srcHead;
240 Uint16* tgt = *tgtHead;
241 while (src < srcEnd)
242 {
|
243 chip 1.11 Uint32 tempchar = 0;
244 Uint16 moreBytes = trailingBytesForUTF8[*src];
245 if (src + moreBytes >= srcEnd)
246 {
247 returnCode = -1;
248 break;
249 }
250 switch (moreBytes)
251 {
252 case 3:
253 tempchar += *src++;
254 tempchar <<= 6;
255 case 2:
256 tempchar += *src++;
257 tempchar <<= 6;
258 case 1:
259 tempchar += *src++;
260 tempchar <<= 6;
261 case 0:
262 tempchar += *src++;
263 }
264 chip 1.11 tempchar -= offsetsFromUTF8[moreBytes];
265
266 if (tgt >= tgtEnd)
267 {
268 src -= (moreBytes+1);
269 returnCode = -1; break;
270 }
271 if (tempchar <= MAX_BYTE)
272 {
273 if ((tempchar >= FIRST_HIGH_SURROGATE &&
274 tempchar <= LAST_LOW_SURROGATE) ||
275 ((tempchar & 0xFFFE) == 0xFFFE))
276 {
277 *tgt++ = REPLACEMENT_CHARACTER;
278 }
279 else
280 {
281 *tgt++ = (Uint16)tempchar;
282 }
283 }
284 else if (tempchar > MAX_UTF16)
285 chip 1.11 {
286 *tgt++ = REPLACEMENT_CHARACTER;
287 }
288 else
289 {
290 if (tgt + 1 >= tgtEnd)
291 {
292 src -= (moreBytes+1);
293 returnCode = -1;
294 break;
295 }
296 tempchar -= halfBase;
297 *tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE);
298 *tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE);
299 }
|
300 david 1.1 }
301 *srcHead = src;
302 *tgtHead = tgt;
303 return returnCode;
304 }
|
305 david 1.5
|
306 mike 1.17 Boolean isUTF8Aux(const char *legal)
|
307 david 1.5 {
308 char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1;
309
310 // Validate that the string is long enough to hold all the expected bytes.
311 // Note that if legal[0] == 0, numBytes will be 1.
312 for (char i=1; i<numBytes; i++)
313 {
314 if (legal[i] == 0)
315 {
316 return false;
317 }
318 }
319
320 return (isValid_U8((const Uint8 *)legal, numBytes));
321 }
|
322 chuck 1.6
|
323 chuck 1.9 Boolean isUTF8Str(const char *legal)
324 {
|
325 chip 1.11 /*char tmp[] = {0xCE,0x99,0xCE,0xBF,0xCF,0x8D,0xCE,0xBD,0xCE,
|
326 chuck 1.9 0xB9,0xCE,0xBA,0xCE,0xBF,0xCE,0xBD,0xCF,0x84,
327 0x00};*/
|
328 chip 1.11 // char tmp_[] = "class";
329 // char * tmp = legal;
|
330 david.dillard 1.13 size_t count = 0;
331 const size_t size = strlen(legal);
|
332 chip 1.11 // printf("size = %d\n",size);
|
333 david.dillard 1.13 while(count<size)
334 {
|
335 chip 1.11 // printf("count = %d\n",count);
|
336 david.dillard 1.13 if(isUTF8(&legal[count]) == true){
337 UTF8_NEXT(legal,count);
|
338 chip 1.11 }else{
339 // printf("bad string\n");
340 return false;
341 }
|
342 david.dillard 1.13 }
|
343 chip 1.11 // printf("good string\n");
344 return true;
|
345 chuck 1.9 /*
|
346 chip 1.11 printf("legal = %s\n\n", legal);
347 Uint32 count = 0;
348 Uint32 trailingBytes = 0;
|
349 chuck 1.9 Uint32 size = strlen(legal);
|
350 chip 1.11 printf("size of legal is %d\n",size);
|
351 chuck 1.9 while(count<size-1)
352 {
|
353 chip 1.11 printf("count = %d\n", count);
|
354 chuck 1.9 if(isUTF8((char*)&legal[count]) == true){
|
355 chip 1.11 UTF8_NEXT(legal,trailingBytes);
356 count += trailingBytes;
357 } else{
358 printf("CommonUTF8:: returning false; position[%d]",count);
359 return false;
360 }
|
361 chuck 1.9 }
|
362 chip 1.11 printf("CommonUTF8:: returning false; position[%d]",count);
363 return true;*/
|
364 chuck 1.9 }
|
365 chuck 1.6
366 String escapeStringEncoder(const String& Str)
367 {
368 String escapeStr;
369 Uint16 escChar;
370 char hexencoding[6];
|
371 chip 1.11
|
372 chuck 1.6 for(Uint32 i = 0; i < Str.size(); ++i)
373 {
|
374 chip 1.11 escChar = Str[i];
375 if(escChar <= 0x7F)
|
376 chuck 1.6 {
|
377 chip 1.11 escapeStr.append(escChar);
|
378 chuck 1.6 }
|
379 chip 1.11 else
380 {
381 memset(hexencoding,0x00,sizeof(hexencoding));
|
382 chuck 1.6 sprintf(hexencoding, "%%%03X%X", escChar/16, escChar%16);
383 escapeStr.append(hexencoding);
|
384 chip 1.11 }
|
385 chuck 1.6 }
386 return(escapeStr);
387 }
388
389 String escapeStringDecoder(const String& Str)
390 {
391 Uint32 i;
392
|
393 chip 1.11 Array<Uint16> utf16Chars;
|
394 chuck 1.6
395 for (i=0; i< Str.size(); ++i)
396 {
397 if (Str[i] == '%')
398 {
399 Uint8 digit1 = _hexCharToNumeric((Str[++i]));
400 Uint8 digit2 = _hexCharToNumeric((Str[++i]));
401 Uint8 digit3 = _hexCharToNumeric((Str[++i]));
402 Uint8 digit4 = _hexCharToNumeric((Str[++i]));
403
|
404 chip 1.11 Uint16 decodedChar = (digit1<<12) + (digit2<<8) +
|
405 chuck 1.6 (digit3<< 4) + (digit4);
406
|
407 chip 1.11 utf16Chars.append(decodedChar);
|
408 chuck 1.6 }
409 else
410 {
|
411 chip 1.11 utf16Chars.append((Uint16)Str[i]);
|
412 chuck 1.6 }
413 }
414
415 // If there was a string to decode...
416 if (Str.size() > 0)
417 {
418 utf16Chars.append('\0');
419 return String((Char16 *)utf16Chars.getData());
420 }
421 else
422 {
423 return String();
424 }
425 }
426
|
427 yi.zhou 1.16 #ifdef PEGASUS_HAS_ICU
428
429 Boolean InitializeICU::_initAttempted = false;
430 Boolean InitializeICU::_initSuccessful = false;
431 Mutex InitializeICU::_initMutex;
432
433 Boolean InitializeICU::initICUSuccessful()
434 {
435 if (!_initAttempted)
436 {
437 {
438 AutoMutex lock(_initMutex);
439
440 if (!_initAttempted)
441 {
442 UErrorCode _status = U_ZERO_ERROR;
443
444 // Initialize ICU
445 u_init(&_status);
446
447 if (U_FAILURE(_status))
448 yi.zhou 1.16 {
449 _initSuccessful = false;
450 Logger::put (Logger::STANDARD_LOG , System::CIMSERVER,
451 Logger::WARNING,
452 "ICU initialization failed with error: $0.",
453 _status);
454 }
455 else
456 {
457 _initSuccessful = true;
458 }
459 _initAttempted = true;
460 }
461 }
462 }
463
464 return _initSuccessful;
465 }
466
467 #endif
468
|
469 david 1.1 PEGASUS_NAMESPACE_END
|