(file) Return to CommonUTF.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 karl  1.18 //%2006////////////////////////////////////////////////////////////////////////
  2 david 1.1  //
  3 karl  1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
  4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
  5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
  6 karl  1.4  // IBM Corp.; EMC Corporation, The Open Group.
  7 karl  1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
  8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
  9            // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
 11 karl  1.18 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 12            // EMC Corporation; Symantec Corporation; The Open Group.
 13 david 1.1  //
 14            // Permission is hereby granted, free of charge, to any person obtaining a copy
 15            // of this software and associated documentation files (the "Software"), to
 16            // deal in the Software without restriction, including without limitation the
 17            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 18            // sell copies of the Software, and to permit persons to whom the Software is
 19            // furnished to do so, subject to the following conditions:
 20 karl  1.18 // 
 21 david 1.1  // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 22            // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 23            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 24            // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 25            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 26            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 27            // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 28            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 29            //
 30 kamal.locahana 1.22 //=============================================================================
 31 david          1.1  //
 32 kamal.locahana 1.22 //%////////////////////////////////////////////////////////////////////////////
 33 david          1.1  
 34 chuck          1.7  #include <Pegasus/Common/Config.h>
 35                     #include <Pegasus/Common/Array.h>
 36 yi.zhou        1.16 #include <Pegasus/Common/Logger.h>
 37 david          1.1  #include "CommonUTF.h"
 38 chuck          1.7  #include <cstdio>
 39 david          1.2  #include <cstring>
 40 david.dillard  1.15 #include <cctype>
 41 kumpf          1.3  
 42 yi.zhou        1.16 #ifdef PEGASUS_HAS_ICU
 43                     #include <unicode/uclean.h>
 44                     #endif
 45                     
 46 david          1.1  PEGASUS_NAMESPACE_BEGIN
 47 kumpf          1.3  
 48 karl           1.19 const Uint32 halfBase = 0x0010000UL;
 49                     const Uint32 halfMask = 0x3FFUL;
 50                     const int halfShift  = 10;
 51                     const Uint8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
 52                     
 53                     const Uint32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
 54                                  0x03C82080UL, 0xFA082080UL, 0x82082080UL };
 55                     
 56                     const char trailingBytesForUTF8[256] = {
 57                         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 58                         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 59                         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 60                         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 61                         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 62                         0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 63                         1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 64                         2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
 65                     };
 66 david.dillard  1.12 inline Uint8 _hexCharToNumeric(Char16 c)
 67 chuck          1.6  {
 68                         Uint8 n;
 69                     
 70 david.dillard  1.14     if (isdigit(c))
 71 chuck          1.6          n = (c - '0');
 72 david.dillard  1.14     else if (isupper(c))
 73 chuck          1.6          n = (c - 'A' + 10);
 74                         else // if (islower(c))
 75                             n = (c - 'a' + 10);
 76                     
 77                         return n;
 78                     }
 79                     
 80 kumpf          1.3  // Note: Caller must ensure that "src" contains "size" bytes.
 81 chip           1.11 Boolean isValid_U8(const Uint8 *src, int size)
 82 david          1.1  {
 83                         Uint8 U8_char;
 84                         const Uint8 *srcptr = src+size;
 85                         switch (size)
 86                         {
 87 chip           1.11     case 4:
 88                             if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
 89                             {
 90                             return false;
 91                             }
 92                         case 3:
 93                             if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
 94                             {
 95                             return false;
 96                             }
 97                         case 2:
 98                             if ((U8_char = (*--srcptr)) > 0xBF)
 99                             {
100                             return false;
101                             }
102                             switch (*src)
103                             {
104                             case 0xE0:
105                                 if (U8_char < 0xA0)
106                                 {
107                                 return false;
108 chip           1.11             }
109                                 break;
110                             case 0xF0:
111                                 if (U8_char < 0x90)
112                                 {
113                                 return false;
114                                 }
115                                 break;
116                             case 0xF4:
117                                 if (U8_char > 0x8F)
118                                 {
119                                 return false;
120                                 }
121                                 break;
122                             default:
123                                 if (U8_char < 0x80)
124                                 {
125                                 return false;
126                                 }
127                             }
128                         case 1:
129 chip           1.11         if (*src >= 0x80 && *src < 0xC2)
130                             {
131                             return false;
132                             }
133                             if (*src > 0xF4)
134                             {
135                             return false;
136                             }
137                             break;
138 david          1.2          default:
139 chip           1.11         {
140                             return false;
141 david          1.2              }
142 david          1.1  
143                         }
144                         return true;
145 chip           1.11 }
146 david          1.1  
147                     int UTF16toUTF8(const Uint16** srcHead,
148 chip           1.11         const Uint16* srcEnd,
149                             Uint8** tgtHead,
150                             Uint8* tgtEnd)
151 david          1.1  {
152                         int returnCode = 0;
153                         const Uint16* src = *srcHead;
154                         Uint8* tgt = *tgtHead;
155                         while (src < srcEnd)
156                         {
157 kumpf          1.20         if (*src < 128)
158                             {
159                                 if (tgt == tgtEnd)
160                                 {
161                                     returnCode = -1;
162                                     break;
163                                 }
164                     
165 kamal.locahana 1.22             *tgt++ = (Uint8)*src++;
166 kumpf          1.20             continue;
167                             }
168 mike           1.17 
169 chip           1.11     Uint32 tempchar;
170                         Uint16 numberOfBytes = 0;
171                         const Uint16* oldsrc = src;
172                         tempchar = *src++;
173                         if (tempchar >= FIRST_HIGH_SURROGATE
174                             && tempchar <= LAST_HIGH_SURROGATE)
175                         {
176                             if (src < srcEnd)
177                             {
178                             Uint32 tempchar2 = *src;
179                             if (tempchar2 >= FIRST_LOW_SURROGATE &&
180                                 tempchar2 <= LAST_LOW_SURROGATE)
181                             {
182                                 tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift)
183                                   + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase;
184                                 ++src;
185                             }
186                             }
187                             else
188                             {
189                             --src;
190 chip           1.11         returnCode = -1;
191                             break;
192                             }
193                         }
194                         if (tempchar < (Uint32)0x80)
195                         {
196                             numberOfBytes = 1;
197                         }
198                         else if (tempchar < (Uint32)0x800)
199                         {
200                             numberOfBytes = 2;
201                         }
202                         else if (tempchar < (Uint32)0x10000)
203                         {
204                             numberOfBytes = 3;
205                         }
206                         else if (tempchar < (Uint32)0x200000)
207                         {
208                             numberOfBytes = 4;
209                         }
210                         else
211 chip           1.11     {
212                             numberOfBytes = 2;
213                             tempchar = REPLACEMENT_CHARACTER;
214                         }
215                     
216                         tgt += numberOfBytes;
217                         if (tgt > tgtEnd)
218                         {
219                             src = oldsrc;
220                             tgt -= numberOfBytes;
221                             returnCode = -1;
222                             break;
223                         }
224                     
225                         switch (numberOfBytes)
226                         {
227                             case 4:
228                             *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
229                             tempchar >>= 6;
230                             case 3:
231                             *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
232 chip           1.11         tempchar >>= 6;
233                             case 2:
234                             *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
235                             tempchar >>= 6;
236                             case 1:
237                             *--tgt =  (Uint8)(tempchar | firstByteMark[numberOfBytes]);
238                         }
239                         tgt += numberOfBytes;
240 david          1.1      }
241                         *srcHead = src;
242                         *tgtHead = tgt;
243                         return returnCode;
244                     }
245                     
246                     int UTF8toUTF16 (const Uint8** srcHead,
247 chip           1.11          const Uint8* srcEnd,
248                              Uint16** tgtHead,
249                              Uint16* tgtEnd)
250 david          1.1  {
251                         int returnCode = 0;
252                         const Uint8* src = *srcHead;
253                         Uint16* tgt = *tgtHead;
254                         while (src < srcEnd)
255                         {
256 chip           1.11     Uint32 tempchar = 0;
257                         Uint16 moreBytes = trailingBytesForUTF8[*src];
258                         if (src + moreBytes >= srcEnd)
259                         {
260                             returnCode = -1;
261                             break;
262                         }
263                         switch (moreBytes)
264                         {
265                             case 3:
266                             tempchar += *src++;
267                             tempchar <<= 6;
268                             case 2:
269                             tempchar += *src++;
270                             tempchar <<= 6;
271                             case 1:
272                             tempchar += *src++;
273                             tempchar <<= 6;
274                             case 0:
275                             tempchar += *src++;
276                         }
277 chip           1.11     tempchar -= offsetsFromUTF8[moreBytes];
278                     
279                         if (tgt >= tgtEnd)
280                         {
281                             src -= (moreBytes+1);
282                             returnCode = -1; break;
283                         }
284                         if (tempchar <= MAX_BYTE)
285                         {
286                             if ((tempchar >= FIRST_HIGH_SURROGATE &&
287                              tempchar <= LAST_LOW_SURROGATE) ||
288                             ((tempchar & 0xFFFE) == 0xFFFE))
289                             {
290                             *tgt++ = REPLACEMENT_CHARACTER;
291                             }
292                             else
293                             {
294                             *tgt++ = (Uint16)tempchar;
295                             }
296                         }
297                         else if (tempchar > MAX_UTF16)
298 chip           1.11     {
299                             *tgt++ = REPLACEMENT_CHARACTER;
300                         }
301                         else
302                         {
303                             if (tgt + 1 >= tgtEnd)
304                             {
305                             src -= (moreBytes+1);
306                             returnCode = -1;
307                             break;
308                             }
309                             tempchar -= halfBase;
310                             *tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE);
311                             *tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE);
312                         }
313 david          1.1      }
314                         *srcHead = src;
315                         *tgtHead = tgt;
316                         return returnCode;
317                     }
318 david          1.5  
319 mike           1.17 Boolean isUTF8Aux(const char *legal)
320 david          1.5  {
321                         char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1;
322                     
323                         // Validate that the string is long enough to hold all the expected bytes.
324                         // Note that if legal[0] == 0, numBytes will be 1.
325                         for (char i=1; i<numBytes; i++)
326                         {
327                             if (legal[i] == 0)
328                             {
329                                 return false;
330                             }
331                         }
332                     
333 kumpf          1.21     return isValid_U8((const Uint8 *)legal, numBytes);
334 david          1.5  }
335 chuck          1.6  
336 chuck          1.9  Boolean isUTF8Str(const char *legal)
337                     {
338 chip           1.11     /*char tmp[] = {0xCE,0x99,0xCE,0xBF,0xCF,0x8D,0xCE,0xBD,0xCE,
339 chuck          1.9                        0xB9,0xCE,0xBA,0xCE,0xBF,0xCE,0xBD,0xCF,0x84,
340                                           0x00};*/
341 chip           1.11 //  char tmp_[] = "class";
342                     //  char * tmp = legal;
343 david.dillard  1.13     size_t count = 0;
344                         const size_t size = strlen(legal);
345 chip           1.11 //  printf("size = %d\n",size);
346 kumpf          1.21     while (count<size)
347 david.dillard  1.13     {
348 chip           1.11 //      printf("count = %d\n",count);
349 kumpf          1.21         if (isUTF8(&legal[count]) == true)
350                             {
351 david.dillard  1.13             UTF8_NEXT(legal,count);
352 kumpf          1.21         }
353                             else
354                             {
355 chip           1.11 //          printf("bad string\n");
356                                 return false;
357                             }
358 david.dillard  1.13     }
359 chip           1.11 //  printf("good string\n");
360                         return true;
361 chuck          1.9  /*
362 chip           1.11     printf("legal = %s\n\n", legal);
363                         Uint32 count = 0;
364                         Uint32 trailingBytes = 0;
365 kumpf          1.21     Uint32 size = strlen(legal);
366 chip           1.11     printf("size of legal is %d\n",size);
367 kumpf          1.21     while (count<size-1)
368                         {
369                             printf("count = %d\n", count);
370                             if (isUTF8((char*)&legal[count]) == true)
371 chuck          1.9          {
372 kumpf          1.21             UTF8_NEXT(legal,trailingBytes);
373 chip           1.11             count += trailingBytes;
374 kumpf          1.21         }
375                             else
376                             {
377 chip           1.11             printf("CommonUTF8:: returning false; position[%d]",count);
378 kumpf          1.21             return false;
379 chip           1.11         }
380 kumpf          1.21     }
381                         printf("CommonUTF8:: returning false; position[%d]",count);
382 chip           1.11     return true;*/
383 chuck          1.9  }
384 chuck          1.6  
385                     String escapeStringEncoder(const String& Str)
386                     {
387                         String escapeStr;
388                         Uint16 escChar;
389                         char hexencoding[6];
390 chip           1.11 
391 kumpf          1.21     for (Uint32 i = 0; i < Str.size(); ++i)
392 chuck          1.6      {
393 kumpf          1.21         escChar = Str[i];
394                             if (escChar <= 0x7F)
395 chuck          1.6          {
396 kumpf          1.21             escapeStr.append(escChar);
397 chuck          1.6          }
398 kumpf          1.21         else
399                             {
400                                 memset(hexencoding,0x00,sizeof(hexencoding));
401 chuck          1.6              sprintf(hexencoding, "%%%03X%X", escChar/16, escChar%16);
402                                 escapeStr.append(hexencoding);
403 kumpf          1.21         }
404 chip           1.11     }
405 kumpf          1.21     return escapeStr;
406 chuck          1.6  }
407                     
408                     String escapeStringDecoder(const String& Str)
409                     {
410                         Uint32 i;
411                     
412 chip           1.11     Array<Uint16> utf16Chars;
413 chuck          1.6  
414                         for (i=0; i< Str.size(); ++i)
415                         {
416                             if (Str[i] == '%')
417                             {
418                                 Uint8 digit1 = _hexCharToNumeric((Str[++i]));
419                                 Uint8 digit2 = _hexCharToNumeric((Str[++i]));
420                                 Uint8 digit3 = _hexCharToNumeric((Str[++i]));
421                                 Uint8 digit4 = _hexCharToNumeric((Str[++i]));
422                     
423 chip           1.11         Uint16 decodedChar = (digit1<<12) + (digit2<<8) +
424 chuck          1.6                                   (digit3<< 4) + (digit4);
425                     
426 chip           1.11             utf16Chars.append(decodedChar);
427 chuck          1.6          }
428                             else
429                             {
430 chip           1.11             utf16Chars.append((Uint16)Str[i]);
431 chuck          1.6          }
432                         }
433                     
434                         // If there was a string to decode...
435                         if (Str.size() > 0)
436                         {
437                             utf16Chars.append('\0');
438                             return String((Char16 *)utf16Chars.getData());
439                         }
440                         else
441                         {
442                             return String();
443                         }
444                     }
445                     
446 yi.zhou        1.16 #ifdef PEGASUS_HAS_ICU
447                     
448                     Boolean InitializeICU::_initAttempted = false;
449                     Boolean InitializeICU::_initSuccessful = false;
450                     Mutex InitializeICU::_initMutex;
451                     
452                     Boolean InitializeICU::initICUSuccessful()
453                     {
454                         if (!_initAttempted)
455                         {
456 kumpf          1.20         {
457 yi.zhou        1.16             AutoMutex lock(_initMutex);
458                     
459 kumpf          1.20             if (!_initAttempted)
460                                 {
461 yi.zhou        1.16                 UErrorCode _status = U_ZERO_ERROR;
462                     
463 kumpf          1.20                 // Initialize ICU
464 yi.zhou        1.16                 u_init(&_status);
465                     
466                                     if (U_FAILURE(_status))
467                                     {
468                                         _initSuccessful = false;
469 kumpf          1.20                     Logger::put(
470                                             Logger::STANDARD_LOG , System::CIMSERVER,
471                                             Logger::WARNING,
472                                             "ICU initialization failed with error: $0.",
473                                             _status);
474 yi.zhou        1.16                 }
475                                     else
476                                     {
477                                         _initSuccessful = true;
478                                     }
479                                     _initAttempted = true;
480 kumpf          1.20             }
481                             }
482 yi.zhou        1.16     }
483                     
484                         return _initSuccessful;
485                     }
486                     
487                     #endif
488                     
489 david          1.1  PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2