(file) Return to CommonUTF.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 karl  1.18 //%2006////////////////////////////////////////////////////////////////////////
  2 david 1.1  //
  3 karl  1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
  4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
  5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
  6 karl  1.4  // IBM Corp.; EMC Corporation, The Open Group.
  7 karl  1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
  8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
  9            // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
 11 karl  1.18 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 12            // EMC Corporation; Symantec Corporation; The Open Group.
 13 david 1.1  //
 14            // Permission is hereby granted, free of charge, to any person obtaining a copy
 15            // of this software and associated documentation files (the "Software"), to
 16            // deal in the Software without restriction, including without limitation the
 17            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 18            // sell copies of the Software, and to permit persons to whom the Software is
 19            // furnished to do so, subject to the following conditions:
 20 karl  1.18 // 
 21 david 1.1  // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 22            // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 23            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 24            // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 25            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 26            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 27            // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 28            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 29            //
 30            //==============================================================================
 31            //
 32            // Author: Dave Rosckes   (rosckes@us.ibm.com)
 33            //
 34 david.dillard 1.12 // Modified By: David Dillard, VERITAS Software Corp.
 35                    //                  (david.dillard@veritas.com)
 36 yi.zhou       1.16 //              Yi Zhou, Hewlett-Packard Company (yi.zhou@hp.com)
 37 david         1.1  //
 38                    //%/////////////////////////////////////////////////////////////////////////////
 39                    
 40 chuck         1.7  #include <Pegasus/Common/Config.h>
 41                    #include <Pegasus/Common/Array.h>
 42 yi.zhou       1.16 #include <Pegasus/Common/Logger.h>
 43 david         1.1  #include "CommonUTF.h"
 44 chuck         1.7  #include <cstdio>
 45 david         1.2  #include <cstring>
 46 david.dillard 1.15 #include <cctype>
 47 kumpf         1.3  
 48 yi.zhou       1.16 #ifdef PEGASUS_HAS_ICU
 49                    #include <unicode/uclean.h>
 50                    #endif
 51                    
 52 david         1.1  PEGASUS_NAMESPACE_BEGIN
 53 kumpf         1.3  
 54 karl          1.19 const Uint32 halfBase = 0x0010000UL;
 55                    const Uint32 halfMask = 0x3FFUL;
 56                    const int halfShift  = 10;
 57                    const Uint8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
 58                    
 59                    const Uint32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
 60                                 0x03C82080UL, 0xFA082080UL, 0x82082080UL };
 61                    
 62                    const char trailingBytesForUTF8[256] = {
 63                        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 64                        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 65                        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 66                        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 67                        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 68                        0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 69                        1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 70                        2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
 71                    };
 72 david.dillard 1.12 inline Uint8 _hexCharToNumeric(Char16 c)
 73 chuck         1.6  {
 74                        Uint8 n;
 75                    
 76 david.dillard 1.14     if (isdigit(c))
 77 chuck         1.6          n = (c - '0');
 78 david.dillard 1.14     else if (isupper(c))
 79 chuck         1.6          n = (c - 'A' + 10);
 80                        else // if (islower(c))
 81                            n = (c - 'a' + 10);
 82                    
 83                        return n;
 84                    }
 85                    
 86 kumpf         1.3  // Note: Caller must ensure that "src" contains "size" bytes.
 87 chip          1.11 Boolean isValid_U8(const Uint8 *src, int size)
 88 david         1.1  {
 89                        Uint8 U8_char;
 90                        const Uint8 *srcptr = src+size;
 91                        switch (size)
 92                        {
 93 chip          1.11     case 4:
 94                            if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
 95                            {
 96                            return false;
 97                            }
 98                        case 3:
 99                            if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
100                            {
101                            return false;
102                            }
103                        case 2:
104                            if ((U8_char = (*--srcptr)) > 0xBF)
105                            {
106                            return false;
107                            }
108                            switch (*src)
109                            {
110                            case 0xE0:
111                                if (U8_char < 0xA0)
112                                {
113                                return false;
114 chip          1.11             }
115                                break;
116                            case 0xF0:
117                                if (U8_char < 0x90)
118                                {
119                                return false;
120                                }
121                                break;
122                            case 0xF4:
123                                if (U8_char > 0x8F)
124                                {
125                                return false;
126                                }
127                                break;
128                            default:
129                                if (U8_char < 0x80)
130                                {
131                                return false;
132                                }
133                            }
134                        case 1:
135 chip          1.11         if (*src >= 0x80 && *src < 0xC2)
136                            {
137                            return false;
138                            }
139                            if (*src > 0xF4)
140                            {
141                            return false;
142                            }
143                            break;
144 david         1.2          default:
145 chip          1.11         {
146                            return false;
147 david         1.2              }
148 david         1.1  
149                        }
150                        return true;
151 chip          1.11 }
152 david         1.1  
153                    int UTF16toUTF8(const Uint16** srcHead,
154 chip          1.11         const Uint16* srcEnd,
155                            Uint8** tgtHead,
156                            Uint8* tgtEnd)
157 david         1.1  {
158                        int returnCode = 0;
159                        const Uint16* src = *srcHead;
160                        Uint8* tgt = *tgtHead;
161                        while (src < srcEnd)
162                        {
163 mike          1.17 	if (*src < 128)
164                    	{
165                    	    if (tgt == tgtEnd)
166                    	    {
167                    		returnCode = -1;
168                    		break;
169                    	    }
170                    
171                    	    *tgt++ = *src++;
172                    	    continue;
173                    	}
174                    
175 chip          1.11     Uint32 tempchar;
176                        Uint16 numberOfBytes = 0;
177                        const Uint16* oldsrc = src;
178                        tempchar = *src++;
179                        if (tempchar >= FIRST_HIGH_SURROGATE
180                            && tempchar <= LAST_HIGH_SURROGATE)
181                        {
182                            if (src < srcEnd)
183                            {
184                            Uint32 tempchar2 = *src;
185                            if (tempchar2 >= FIRST_LOW_SURROGATE &&
186                                tempchar2 <= LAST_LOW_SURROGATE)
187                            {
188                                tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift)
189                                  + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase;
190                                ++src;
191                            }
192                            }
193                            else
194                            {
195                            --src;
196 chip          1.11         returnCode = -1;
197                            break;
198                            }
199                        }
200                        if (tempchar < (Uint32)0x80)
201                        {
202                            numberOfBytes = 1;
203                        }
204                        else if (tempchar < (Uint32)0x800)
205                        {
206                            numberOfBytes = 2;
207                        }
208                        else if (tempchar < (Uint32)0x10000)
209                        {
210                            numberOfBytes = 3;
211                        }
212                        else if (tempchar < (Uint32)0x200000)
213                        {
214                            numberOfBytes = 4;
215                        }
216                        else
217 chip          1.11     {
218                            numberOfBytes = 2;
219                            tempchar = REPLACEMENT_CHARACTER;
220                        }
221                    
222                        tgt += numberOfBytes;
223                        if (tgt > tgtEnd)
224                        {
225                            src = oldsrc;
226                            tgt -= numberOfBytes;
227                            returnCode = -1;
228                            break;
229                        }
230                    
231                        switch (numberOfBytes)
232                        {
233                            case 4:
234                            *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
235                            tempchar >>= 6;
236                            case 3:
237                            *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
238 chip          1.11         tempchar >>= 6;
239                            case 2:
240                            *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
241                            tempchar >>= 6;
242                            case 1:
243                            *--tgt =  (Uint8)(tempchar | firstByteMark[numberOfBytes]);
244                        }
245                        tgt += numberOfBytes;
246 david         1.1      }
247                        *srcHead = src;
248                        *tgtHead = tgt;
249                        return returnCode;
250                    }
251                    
252                    int UTF8toUTF16 (const Uint8** srcHead,
253 chip          1.11          const Uint8* srcEnd,
254                             Uint16** tgtHead,
255                             Uint16* tgtEnd)
256 david         1.1  {
257                        int returnCode = 0;
258                        const Uint8* src = *srcHead;
259                        Uint16* tgt = *tgtHead;
260                        while (src < srcEnd)
261                        {
262 chip          1.11     Uint32 tempchar = 0;
263                        Uint16 moreBytes = trailingBytesForUTF8[*src];
264                        if (src + moreBytes >= srcEnd)
265                        {
266                            returnCode = -1;
267                            break;
268                        }
269                        switch (moreBytes)
270                        {
271                            case 3:
272                            tempchar += *src++;
273                            tempchar <<= 6;
274                            case 2:
275                            tempchar += *src++;
276                            tempchar <<= 6;
277                            case 1:
278                            tempchar += *src++;
279                            tempchar <<= 6;
280                            case 0:
281                            tempchar += *src++;
282                        }
283 chip          1.11     tempchar -= offsetsFromUTF8[moreBytes];
284                    
285                        if (tgt >= tgtEnd)
286                        {
287                            src -= (moreBytes+1);
288                            returnCode = -1; break;
289                        }
290                        if (tempchar <= MAX_BYTE)
291                        {
292                            if ((tempchar >= FIRST_HIGH_SURROGATE &&
293                             tempchar <= LAST_LOW_SURROGATE) ||
294                            ((tempchar & 0xFFFE) == 0xFFFE))
295                            {
296                            *tgt++ = REPLACEMENT_CHARACTER;
297                            }
298                            else
299                            {
300                            *tgt++ = (Uint16)tempchar;
301                            }
302                        }
303                        else if (tempchar > MAX_UTF16)
304 chip          1.11     {
305                            *tgt++ = REPLACEMENT_CHARACTER;
306                        }
307                        else
308                        {
309                            if (tgt + 1 >= tgtEnd)
310                            {
311                            src -= (moreBytes+1);
312                            returnCode = -1;
313                            break;
314                            }
315                            tempchar -= halfBase;
316                            *tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE);
317                            *tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE);
318                        }
319 david         1.1      }
320                        *srcHead = src;
321                        *tgtHead = tgt;
322                        return returnCode;
323                    }
324 david         1.5  
325 mike          1.17 Boolean isUTF8Aux(const char *legal)
326 david         1.5  {
327                        char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1;
328                    
329                        // Validate that the string is long enough to hold all the expected bytes.
330                        // Note that if legal[0] == 0, numBytes will be 1.
331                        for (char i=1; i<numBytes; i++)
332                        {
333                            if (legal[i] == 0)
334                            {
335                                return false;
336                            }
337                        }
338                    
339                        return (isValid_U8((const Uint8 *)legal, numBytes));
340                    }
341 chuck         1.6  
342 chuck         1.9  Boolean isUTF8Str(const char *legal)
343                    {
344 chip          1.11     /*char tmp[] = {0xCE,0x99,0xCE,0xBF,0xCF,0x8D,0xCE,0xBD,0xCE,
345 chuck         1.9                        0xB9,0xCE,0xBA,0xCE,0xBF,0xCE,0xBD,0xCF,0x84,
346                                          0x00};*/
347 chip          1.11 //  char tmp_[] = "class";
348                    //  char * tmp = legal;
349 david.dillard 1.13     size_t count = 0;
350                        const size_t size = strlen(legal);
351 chip          1.11 //  printf("size = %d\n",size);
352 david.dillard 1.13     while(count<size)
353                        {
354 chip          1.11 //      printf("count = %d\n",count);
355 david.dillard 1.13         if(isUTF8(&legal[count]) == true){
356                                UTF8_NEXT(legal,count);
357 chip          1.11         }else{
358                    //          printf("bad string\n");
359                                return false;
360                            }
361 david.dillard 1.13     }
362 chip          1.11 //  printf("good string\n");
363                        return true;
364 chuck         1.9  /*
365 chip          1.11     printf("legal = %s\n\n", legal);
366                        Uint32 count = 0;
367                        Uint32 trailingBytes = 0;
368 chuck         1.9          Uint32 size = strlen(legal);
369 chip          1.11     printf("size of legal is %d\n",size);
370 chuck         1.9          while(count<size-1)
371                            {
372 chip          1.11         printf("count = %d\n", count);
373 chuck         1.9                  if(isUTF8((char*)&legal[count]) == true){
374 chip          1.11                     UTF8_NEXT(legal,trailingBytes);
375                                count += trailingBytes;
376                            } else{
377                                printf("CommonUTF8:: returning false; position[%d]",count);
378                                 return false;
379                            }
380 chuck         1.9          }
381 chip          1.11      printf("CommonUTF8:: returning false; position[%d]",count);
382                        return true;*/
383 chuck         1.9  }
384 chuck         1.6  
385                    String escapeStringEncoder(const String& Str)
386                    {
387                        String escapeStr;
388                        Uint16 escChar;
389                        char hexencoding[6];
390 chip          1.11 
391 chuck         1.6      for(Uint32 i = 0; i < Str.size(); ++i)
392                        {
393 chip          1.11     escChar = Str[i];
394                        if(escChar <= 0x7F)
395 chuck         1.6          {
396 chip          1.11         escapeStr.append(escChar);
397 chuck         1.6          }
398 chip          1.11     else
399                        {
400                            memset(hexencoding,0x00,sizeof(hexencoding));
401 chuck         1.6              sprintf(hexencoding, "%%%03X%X", escChar/16, escChar%16);
402                                escapeStr.append(hexencoding);
403 chip          1.11     }
404 chuck         1.6      }
405                        return(escapeStr);
406                    }
407                    
408                    String escapeStringDecoder(const String& Str)
409                    {
410                        Uint32 i;
411                    
412 chip          1.11     Array<Uint16> utf16Chars;
413 chuck         1.6  
414                        for (i=0; i< Str.size(); ++i)
415                        {
416                            if (Str[i] == '%')
417                            {
418                                Uint8 digit1 = _hexCharToNumeric((Str[++i]));
419                                Uint8 digit2 = _hexCharToNumeric((Str[++i]));
420                                Uint8 digit3 = _hexCharToNumeric((Str[++i]));
421                                Uint8 digit4 = _hexCharToNumeric((Str[++i]));
422                    
423 chip          1.11         Uint16 decodedChar = (digit1<<12) + (digit2<<8) +
424 chuck         1.6                                   (digit3<< 4) + (digit4);
425                    
426 chip          1.11             utf16Chars.append(decodedChar);
427 chuck         1.6          }
428                            else
429                            {
430 chip          1.11             utf16Chars.append((Uint16)Str[i]);
431 chuck         1.6          }
432                        }
433                    
434                        // If there was a string to decode...
435                        if (Str.size() > 0)
436                        {
437                            utf16Chars.append('\0');
438                            return String((Char16 *)utf16Chars.getData());
439                        }
440                        else
441                        {
442                            return String();
443                        }
444                    }
445                    
446 yi.zhou       1.16 #ifdef PEGASUS_HAS_ICU
447                    
448                    Boolean InitializeICU::_initAttempted = false;
449                    Boolean InitializeICU::_initSuccessful = false;
450                    Mutex InitializeICU::_initMutex;
451                    
452                    Boolean InitializeICU::initICUSuccessful()
453                    {
454                        if (!_initAttempted)
455                        {
456                    	{
457                                AutoMutex lock(_initMutex);
458                    
459                    	    if (!_initAttempted)
460                    	    {
461                                    UErrorCode _status = U_ZERO_ERROR;
462                    
463                    		// Initialize ICU
464                                    u_init(&_status);
465                    
466                                    if (U_FAILURE(_status))
467 yi.zhou       1.16                 {
468                                        _initSuccessful = false;
469                                        Logger::put (Logger::STANDARD_LOG , System::CIMSERVER,
470                    				 Logger::WARNING,
471                                                     "ICU initialization failed with error: $0.", 
472                    				 _status);
473                                    }
474                                    else
475                                    {
476                                        _initSuccessful = true;
477                                    }
478                                    _initAttempted = true;
479                    	    }
480                    	}
481                        }
482                    
483                        return _initSuccessful;
484                    }
485                    
486                    #endif
487                    
488 david         1.1  PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2