1 karl 1.4 //%2003////////////////////////////////////////////////////////////////////////
|
2 david 1.1 //
|
3 karl 1.4 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Development
4 // Company, L. P., IBM Corp., The Open Group, Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L. P.;
6 // IBM Corp.; EMC Corporation, The Open Group.
|
7 david 1.1 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
9 // of this software and associated documentation files (the "Software"), to
10 // deal in the Software without restriction, including without limitation the
11 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
12 // sell copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
14 //
15 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
16 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
17 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
18 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
19 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 //
24 //==============================================================================
25 //
26 // Author: Dave Rosckes (rosckes@us.ibm.com)
27 //
28 david 1.1 //
29 //%/////////////////////////////////////////////////////////////////////////////
30
|
31 chuck 1.7 #include <Pegasus/Common/Config.h>
32 #include <Pegasus/Common/Array.h>
|
33 david 1.1 #include "CommonUTF.h"
|
34 chuck 1.7 #include <cctype>
35 #include <cstdio>
|
36 david 1.2 #include <cstring>
|
37 kumpf 1.3
|
38 david 1.1 PEGASUS_NAMESPACE_BEGIN
|
39 kumpf 1.3
|
40 chuck 1.6
41 inline Uint8 _hexCharToNumeric(const Uint16 c)
42 {
43 Uint8 n;
44
45 if (isdigit(c))
46 n = (c - '0');
47 else if (isupper(c))
48 n = (c - 'A' + 10);
49 else // if (islower(c))
50 n = (c - 'a' + 10);
51
52 return n;
53 }
54
|
55 kumpf 1.3 // Note: Caller must ensure that "src" contains "size" bytes.
|
56 david 1.1 int isValid_U8(const Uint8 *src, int size)
57 {
58 Uint8 U8_char;
59 const Uint8 *srcptr = src+size;
60 switch (size)
61 {
62 case 4:
63 if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
64 {
65 return false;
66 }
67 case 3:
68 if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
69 {
70 return false;
71 }
72 case 2:
73 if ((U8_char = (*--srcptr)) > 0xBF)
74 {
75 return false;
76 }
77 david 1.1 switch (*src)
78 {
79 case 0xE0:
80 if (U8_char < 0xA0)
81 {
82 return false;
83 }
84 break;
85 case 0xF0:
86 if (U8_char < 0x90)
87 {
88 return false;
89 }
90 break;
91 case 0xF4:
92 if (U8_char > 0x8F)
93 {
94 return false;
95 }
96 break;
97 default:
98 david 1.1 if (U8_char < 0x80)
99 {
100 return false;
101 }
102 }
103 case 1:
104 if (*src >= 0x80 && *src < 0xC2)
105 {
106 return false;
107 }
108 if (*src > 0xF4)
109 {
110 return false;
111 }
112 break;
|
113 david 1.2 default:
114 {
115 return false;
116 }
|
117 david 1.1
118 }
119 return true;
120 }
121
122 int UTF16toUTF8(const Uint16** srcHead,
123 const Uint16* srcEnd,
124 Uint8** tgtHead,
125 Uint8* tgtEnd)
126 {
127 int returnCode = 0;
128 const Uint16* src = *srcHead;
129 Uint8* tgt = *tgtHead;
130 while (src < srcEnd)
131 {
132 Uint32 tempchar;
133 Uint16 numberOfBytes = 0;
134 const Uint16* oldsrc = src;
135 tempchar = *src++;
136 if (tempchar >= FIRST_HIGH_SURROGATE
137 && tempchar <= LAST_HIGH_SURROGATE)
138 david 1.1 {
139 if (src < srcEnd)
140 {
141 Uint32 tempchar2 = *src;
142 if (tempchar2 >= FIRST_LOW_SURROGATE &&
143 tempchar2 <= LAST_LOW_SURROGATE)
144 {
145 tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift)
146 + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase;
147 ++src;
148 }
149 }
150 else
151 {
152 --src;
153 returnCode = -1;
154 break;
155 }
156 }
157 if (tempchar < (Uint32)0x80)
158 {
159 david 1.1 numberOfBytes = 1;
160 }
161 else if (tempchar < (Uint32)0x800)
162 {
163 numberOfBytes = 2;
164 }
165 else if (tempchar < (Uint32)0x10000)
166 {
167 numberOfBytes = 3;
168 }
169 else if (tempchar < (Uint32)0x200000)
170 {
171 numberOfBytes = 4;
172 }
173 else
174 {
175 numberOfBytes = 2;
176 tempchar = REPLACEMENT_CHARACTER;
177 }
178
179 tgt += numberOfBytes;
180 david 1.1 if (tgt > tgtEnd)
181 {
182 src = oldsrc;
183 tgt -= numberOfBytes;
184 returnCode = -1;
185 break;
186 }
187
188 switch (numberOfBytes)
189 {
190 case 4:
191 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
192 tempchar >>= 6;
193 case 3:
194 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
195 tempchar >>= 6;
196 case 2:
197 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
198 tempchar >>= 6;
199 case 1:
200 *--tgt = (Uint8)(tempchar | firstByteMark[numberOfBytes]);
201 david 1.1 }
202 tgt += numberOfBytes;
203 }
204 *srcHead = src;
205 *tgtHead = tgt;
206 return returnCode;
207 }
208
209 int UTF8toUTF16 (const Uint8** srcHead,
210 const Uint8* srcEnd,
211 Uint16** tgtHead,
212 Uint16* tgtEnd)
213 {
214 int returnCode = 0;
215 const Uint8* src = *srcHead;
216 Uint16* tgt = *tgtHead;
217 while (src < srcEnd)
218 {
219 Uint32 tempchar = 0;
220 Uint16 moreBytes = trailingBytesForUTF8[*src];
221 if (src + moreBytes >= srcEnd)
222 david 1.1 {
223 returnCode = -1;
224 break;
225 }
226 switch (moreBytes)
227 {
228 case 3:
229 tempchar += *src++;
230 tempchar <<= 6;
231 case 2:
232 tempchar += *src++;
233 tempchar <<= 6;
234 case 1:
235 tempchar += *src++;
236 tempchar <<= 6;
237 case 0:
238 tempchar += *src++;
239 }
240 tempchar -= offsetsFromUTF8[moreBytes];
241
242 if (tgt >= tgtEnd)
243 david 1.1 {
244 src -= (moreBytes+1);
245 returnCode = -1; break;
246 }
247 if (tempchar <= MAX_BYTE)
248 {
249 if ((tempchar >= FIRST_HIGH_SURROGATE &&
250 tempchar <= LAST_LOW_SURROGATE) ||
251 ((tempchar & 0xFFFE) == 0xFFFE))
252 {
253 *tgt++ = REPLACEMENT_CHARACTER;
254 }
255 else
256 {
257 *tgt++ = (Uint16)tempchar;
258 }
259 }
260 else if (tempchar > MAX_UTF16)
261 {
262 *tgt++ = REPLACEMENT_CHARACTER;
263 }
264 david 1.1 else
265 {
266 if (tgt + 1 >= tgtEnd)
267 {
268 src -= (moreBytes+1);
269 returnCode = -1;
270 break;
271 }
272 tempchar -= halfBase;
273 *tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE);
274 *tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE);
275 }
276 }
277 *srcHead = src;
278 *tgtHead = tgt;
279 return returnCode;
280 }
|
281 david 1.5
282 Boolean isUTF8(const char *legal)
283 {
284 char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1;
285
286 // Validate that the string is long enough to hold all the expected bytes.
287 // Note that if legal[0] == 0, numBytes will be 1.
288 for (char i=1; i<numBytes; i++)
289 {
290 if (legal[i] == 0)
291 {
292 return false;
293 }
294 }
295
296 return (isValid_U8((const Uint8 *)legal, numBytes));
297 }
|
298 chuck 1.6
|
299 humberto 1.7.4.1 Boolean isUTF8Str(const char *legal)
300 {
301 /*char tmp[] = {0xCE,0x99,0xCE,0xBF,0xCF,0x8D,0xCE,0xBD,0xCE,
302 0xB9,0xCE,0xBA,0xCE,0xBF,0xCE,0xBD,0xCF,0x84,
303 0x00};*/
304 // char tmp_[] = "class";
305 // char * tmp = legal;
306 Uint32 count = 0;
307 Uint32 size = strlen(legal);
308 // printf("size = %d\n",size);
309 while(count<size)
310 {
311 // printf("count = %d\n",count);
312 if(isUTF8(&legal[count]) == true){
313 UTF8_NEXT(legal,count);
314 }else{
315 // printf("bad string\n");
316 return false;
317 }
318 }
319 // printf("good string\n");
320 humberto 1.7.4.1 return true;
321 /*
322 printf("legal = %s\n\n", legal);
323 Uint32 count = 0;
324 Uint32 trailingBytes = 0;
325 Uint32 size = strlen(legal);
326 printf("size of legal is %d\n",size);
327 while(count<size-1)
328 {
329 printf("count = %d\n", count);
330 if(isUTF8((char*)&legal[count]) == true){
331 UTF8_NEXT(legal,trailingBytes);
332 count += trailingBytes;
333 } else{
334 printf("CommonUTF8:: returning false; position[%d]",count);
335 return false;
336 }
337 }
338 printf("CommonUTF8:: returning false; position[%d]",count);
339 return true;*/
340 }
|
341 chuck 1.6
342 String escapeStringEncoder(const String& Str)
343 {
344 String escapeStr;
345 Uint16 escChar;
346 char hexencoding[6];
347
348 for(Uint32 i = 0; i < Str.size(); ++i)
349 {
350 escChar = Str[i];
351 if(escChar <= 0x7F)
352 {
353 escapeStr.append(escChar);
354 }
355 else
356 {
357 memset(hexencoding,0x00,sizeof(hexencoding));
358 sprintf(hexencoding, "%%%03X%X", escChar/16, escChar%16);
359 escapeStr.append(hexencoding);
360 }
361 }
362 chuck 1.6 return(escapeStr);
363 }
364
365 String escapeStringDecoder(const String& Str)
366 {
367 Uint32 i;
368
369 Array<Uint16> utf16Chars;
370
371 for (i=0; i< Str.size(); ++i)
372 {
373 if (Str[i] == '%')
374 {
375 Uint8 digit1 = _hexCharToNumeric((Str[++i]));
376 Uint8 digit2 = _hexCharToNumeric((Str[++i]));
377 Uint8 digit3 = _hexCharToNumeric((Str[++i]));
378 Uint8 digit4 = _hexCharToNumeric((Str[++i]));
379
380 Uint16 decodedChar = (digit1<<12) + (digit2<<8) +
381 (digit3<< 4) + (digit4);
382
383 chuck 1.6 utf16Chars.append(decodedChar);
384 }
385 else
386 {
387 utf16Chars.append((Uint16)Str[i]);
388 }
389 }
390
391 // If there was a string to decode...
392 if (Str.size() > 0)
393 {
394 utf16Chars.append('\0');
395 return String((Char16 *)utf16Chars.getData());
396 }
397 else
398 {
399 return String();
400 }
401 }
402
|
403 david 1.1 PEGASUS_NAMESPACE_END
|