(file) Return to CommonUTF.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 karl  1.18 //%2006////////////////////////////////////////////////////////////////////////
  2 david 1.1  //
  3 karl  1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
  4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
  5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
  6 karl  1.4  // IBM Corp.; EMC Corporation, The Open Group.
  7 karl  1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
  8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
  9            // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
 11 karl  1.18 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 12            // EMC Corporation; Symantec Corporation; The Open Group.
 13 david 1.1  //
 14            // Permission is hereby granted, free of charge, to any person obtaining a copy
 15            // of this software and associated documentation files (the "Software"), to
 16            // deal in the Software without restriction, including without limitation the
 17            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 18            // sell copies of the Software, and to permit persons to whom the Software is
 19            // furnished to do so, subject to the following conditions:
 20 karl  1.18 // 
 21 david 1.1  // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 22            // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 23            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 24            // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 25            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 26            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 27            // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 28            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 29            //
 30            //==============================================================================
 31            //
 32            // Author: Dave Rosckes   (rosckes@us.ibm.com)
 33            //
 34 david.dillard 1.12 // Modified By: David Dillard, VERITAS Software Corp.
 35                    //                  (david.dillard@veritas.com)
 36 yi.zhou       1.16 //              Yi Zhou, Hewlett-Packard Company (yi.zhou@hp.com)
 37 david         1.1  //
 38                    //%/////////////////////////////////////////////////////////////////////////////
 39                    
 40 chuck         1.7  #include <Pegasus/Common/Config.h>
 41                    #include <Pegasus/Common/Array.h>
 42 yi.zhou       1.16 #include <Pegasus/Common/Logger.h>
 43 david         1.1  #include "CommonUTF.h"
 44 chuck         1.7  #include <cstdio>
 45 david         1.2  #include <cstring>
 46 david.dillard 1.15 #include <cctype>
 47 kumpf         1.3  
 48 yi.zhou       1.16 #ifdef PEGASUS_HAS_ICU
 49                    #include <unicode/uclean.h>
 50                    #endif
 51                    
 52 david         1.1  PEGASUS_NAMESPACE_BEGIN
 53 kumpf         1.3  
 54 chuck         1.6  
 55 david.dillard 1.12 inline Uint8 _hexCharToNumeric(Char16 c)
 56 chuck         1.6  {
 57                        Uint8 n;
 58                    
 59 david.dillard 1.14     if (isdigit(c))
 60 chuck         1.6          n = (c - '0');
 61 david.dillard 1.14     else if (isupper(c))
 62 chuck         1.6          n = (c - 'A' + 10);
 63                        else // if (islower(c))
 64                            n = (c - 'a' + 10);
 65                    
 66                        return n;
 67                    }
 68                    
 69 kumpf         1.3  // Note: Caller must ensure that "src" contains "size" bytes.
 70 chip          1.11 Boolean isValid_U8(const Uint8 *src, int size)
 71 david         1.1  {
 72                        Uint8 U8_char;
 73                        const Uint8 *srcptr = src+size;
 74                        switch (size)
 75                        {
 76 chip          1.11     case 4:
 77                            if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
 78                            {
 79                            return false;
 80                            }
 81                        case 3:
 82                            if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
 83                            {
 84                            return false;
 85                            }
 86                        case 2:
 87                            if ((U8_char = (*--srcptr)) > 0xBF)
 88                            {
 89                            return false;
 90                            }
 91                            switch (*src)
 92                            {
 93                            case 0xE0:
 94                                if (U8_char < 0xA0)
 95                                {
 96                                return false;
 97 chip          1.11             }
 98                                break;
 99                            case 0xF0:
100                                if (U8_char < 0x90)
101                                {
102                                return false;
103                                }
104                                break;
105                            case 0xF4:
106                                if (U8_char > 0x8F)
107                                {
108                                return false;
109                                }
110                                break;
111                            default:
112                                if (U8_char < 0x80)
113                                {
114                                return false;
115                                }
116                            }
117                        case 1:
118 chip          1.11         if (*src >= 0x80 && *src < 0xC2)
119                            {
120                            return false;
121                            }
122                            if (*src > 0xF4)
123                            {
124                            return false;
125                            }
126                            break;
127 david         1.2          default:
128 chip          1.11         {
129                            return false;
130 david         1.2              }
131 david         1.1  
132                        }
133                        return true;
134 chip          1.11 }
135 david         1.1  
136                    int UTF16toUTF8(const Uint16** srcHead,
137 chip          1.11         const Uint16* srcEnd,
138                            Uint8** tgtHead,
139                            Uint8* tgtEnd)
140 david         1.1  {
141                        int returnCode = 0;
142                        const Uint16* src = *srcHead;
143                        Uint8* tgt = *tgtHead;
144                        while (src < srcEnd)
145                        {
146 mike          1.17 	if (*src < 128)
147                    	{
148                    	    if (tgt == tgtEnd)
149                    	    {
150                    		returnCode = -1;
151                    		break;
152                    	    }
153                    
154                    	    *tgt++ = *src++;
155                    	    continue;
156                    	}
157                    
158 chip          1.11     Uint32 tempchar;
159                        Uint16 numberOfBytes = 0;
160                        const Uint16* oldsrc = src;
161                        tempchar = *src++;
162                        if (tempchar >= FIRST_HIGH_SURROGATE
163                            && tempchar <= LAST_HIGH_SURROGATE)
164                        {
165                            if (src < srcEnd)
166                            {
167                            Uint32 tempchar2 = *src;
168                            if (tempchar2 >= FIRST_LOW_SURROGATE &&
169                                tempchar2 <= LAST_LOW_SURROGATE)
170                            {
171                                tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift)
172                                  + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase;
173                                ++src;
174                            }
175                            }
176                            else
177                            {
178                            --src;
179 chip          1.11         returnCode = -1;
180                            break;
181                            }
182                        }
183                        if (tempchar < (Uint32)0x80)
184                        {
185                            numberOfBytes = 1;
186                        }
187                        else if (tempchar < (Uint32)0x800)
188                        {
189                            numberOfBytes = 2;
190                        }
191                        else if (tempchar < (Uint32)0x10000)
192                        {
193                            numberOfBytes = 3;
194                        }
195                        else if (tempchar < (Uint32)0x200000)
196                        {
197                            numberOfBytes = 4;
198                        }
199                        else
200 chip          1.11     {
201                            numberOfBytes = 2;
202                            tempchar = REPLACEMENT_CHARACTER;
203                        }
204                    
205                        tgt += numberOfBytes;
206                        if (tgt > tgtEnd)
207                        {
208                            src = oldsrc;
209                            tgt -= numberOfBytes;
210                            returnCode = -1;
211                            break;
212                        }
213                    
214                        switch (numberOfBytes)
215                        {
216                            case 4:
217                            *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
218                            tempchar >>= 6;
219                            case 3:
220                            *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
221 chip          1.11         tempchar >>= 6;
222                            case 2:
223                            *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
224                            tempchar >>= 6;
225                            case 1:
226                            *--tgt =  (Uint8)(tempchar | firstByteMark[numberOfBytes]);
227                        }
228                        tgt += numberOfBytes;
229 david         1.1      }
230                        *srcHead = src;
231                        *tgtHead = tgt;
232                        return returnCode;
233                    }
234                    
235                    int UTF8toUTF16 (const Uint8** srcHead,
236 chip          1.11          const Uint8* srcEnd,
237                             Uint16** tgtHead,
238                             Uint16* tgtEnd)
239 david         1.1  {
240                        int returnCode = 0;
241                        const Uint8* src = *srcHead;
242                        Uint16* tgt = *tgtHead;
243                        while (src < srcEnd)
244                        {
245 chip          1.11     Uint32 tempchar = 0;
246                        Uint16 moreBytes = trailingBytesForUTF8[*src];
247                        if (src + moreBytes >= srcEnd)
248                        {
249                            returnCode = -1;
250                            break;
251                        }
252                        switch (moreBytes)
253                        {
254                            case 3:
255                            tempchar += *src++;
256                            tempchar <<= 6;
257                            case 2:
258                            tempchar += *src++;
259                            tempchar <<= 6;
260                            case 1:
261                            tempchar += *src++;
262                            tempchar <<= 6;
263                            case 0:
264                            tempchar += *src++;
265                        }
266 chip          1.11     tempchar -= offsetsFromUTF8[moreBytes];
267                    
268                        if (tgt >= tgtEnd)
269                        {
270                            src -= (moreBytes+1);
271                            returnCode = -1; break;
272                        }
273                        if (tempchar <= MAX_BYTE)
274                        {
275                            if ((tempchar >= FIRST_HIGH_SURROGATE &&
276                             tempchar <= LAST_LOW_SURROGATE) ||
277                            ((tempchar & 0xFFFE) == 0xFFFE))
278                            {
279                            *tgt++ = REPLACEMENT_CHARACTER;
280                            }
281                            else
282                            {
283                            *tgt++ = (Uint16)tempchar;
284                            }
285                        }
286                        else if (tempchar > MAX_UTF16)
287 chip          1.11     {
288                            *tgt++ = REPLACEMENT_CHARACTER;
289                        }
290                        else
291                        {
292                            if (tgt + 1 >= tgtEnd)
293                            {
294                            src -= (moreBytes+1);
295                            returnCode = -1;
296                            break;
297                            }
298                            tempchar -= halfBase;
299                            *tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE);
300                            *tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE);
301                        }
302 david         1.1      }
303                        *srcHead = src;
304                        *tgtHead = tgt;
305                        return returnCode;
306                    }
307 david         1.5  
308 mike          1.17 Boolean isUTF8Aux(const char *legal)
309 david         1.5  {
310                        char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1;
311                    
312                        // Validate that the string is long enough to hold all the expected bytes.
313                        // Note that if legal[0] == 0, numBytes will be 1.
314                        for (char i=1; i<numBytes; i++)
315                        {
316                            if (legal[i] == 0)
317                            {
318                                return false;
319                            }
320                        }
321                    
322                        return (isValid_U8((const Uint8 *)legal, numBytes));
323                    }
324 chuck         1.6  
325 chuck         1.9  Boolean isUTF8Str(const char *legal)
326                    {
327 chip          1.11     /*char tmp[] = {0xCE,0x99,0xCE,0xBF,0xCF,0x8D,0xCE,0xBD,0xCE,
328 chuck         1.9                        0xB9,0xCE,0xBA,0xCE,0xBF,0xCE,0xBD,0xCF,0x84,
329                                          0x00};*/
330 chip          1.11 //  char tmp_[] = "class";
331                    //  char * tmp = legal;
332 david.dillard 1.13     size_t count = 0;
333                        const size_t size = strlen(legal);
334 chip          1.11 //  printf("size = %d\n",size);
335 david.dillard 1.13     while(count<size)
336                        {
337 chip          1.11 //      printf("count = %d\n",count);
338 david.dillard 1.13         if(isUTF8(&legal[count]) == true){
339                                UTF8_NEXT(legal,count);
340 chip          1.11         }else{
341                    //          printf("bad string\n");
342                                return false;
343                            }
344 david.dillard 1.13     }
345 chip          1.11 //  printf("good string\n");
346                        return true;
347 chuck         1.9  /*
348 chip          1.11     printf("legal = %s\n\n", legal);
349                        Uint32 count = 0;
350                        Uint32 trailingBytes = 0;
351 chuck         1.9          Uint32 size = strlen(legal);
352 chip          1.11     printf("size of legal is %d\n",size);
353 chuck         1.9          while(count<size-1)
354                            {
355 chip          1.11         printf("count = %d\n", count);
356 chuck         1.9                  if(isUTF8((char*)&legal[count]) == true){
357 chip          1.11                     UTF8_NEXT(legal,trailingBytes);
358                                count += trailingBytes;
359                            } else{
360                                printf("CommonUTF8:: returning false; position[%d]",count);
361                                 return false;
362                            }
363 chuck         1.9          }
364 chip          1.11      printf("CommonUTF8:: returning false; position[%d]",count);
365                        return true;*/
366 chuck         1.9  }
367 chuck         1.6  
368                    String escapeStringEncoder(const String& Str)
369                    {
370                        String escapeStr;
371                        Uint16 escChar;
372                        char hexencoding[6];
373 chip          1.11 
374 chuck         1.6      for(Uint32 i = 0; i < Str.size(); ++i)
375                        {
376 chip          1.11     escChar = Str[i];
377                        if(escChar <= 0x7F)
378 chuck         1.6          {
379 chip          1.11         escapeStr.append(escChar);
380 chuck         1.6          }
381 chip          1.11     else
382                        {
383                            memset(hexencoding,0x00,sizeof(hexencoding));
384 chuck         1.6              sprintf(hexencoding, "%%%03X%X", escChar/16, escChar%16);
385                                escapeStr.append(hexencoding);
386 chip          1.11     }
387 chuck         1.6      }
388                        return(escapeStr);
389                    }
390                    
391                    String escapeStringDecoder(const String& Str)
392                    {
393                        Uint32 i;
394                    
395 chip          1.11     Array<Uint16> utf16Chars;
396 chuck         1.6  
397                        for (i=0; i< Str.size(); ++i)
398                        {
399                            if (Str[i] == '%')
400                            {
401                                Uint8 digit1 = _hexCharToNumeric((Str[++i]));
402                                Uint8 digit2 = _hexCharToNumeric((Str[++i]));
403                                Uint8 digit3 = _hexCharToNumeric((Str[++i]));
404                                Uint8 digit4 = _hexCharToNumeric((Str[++i]));
405                    
406 chip          1.11         Uint16 decodedChar = (digit1<<12) + (digit2<<8) +
407 chuck         1.6                                   (digit3<< 4) + (digit4);
408                    
409 chip          1.11             utf16Chars.append(decodedChar);
410 chuck         1.6          }
411                            else
412                            {
413 chip          1.11             utf16Chars.append((Uint16)Str[i]);
414 chuck         1.6          }
415                        }
416                    
417                        // If there was a string to decode...
418                        if (Str.size() > 0)
419                        {
420                            utf16Chars.append('\0');
421                            return String((Char16 *)utf16Chars.getData());
422                        }
423                        else
424                        {
425                            return String();
426                        }
427                    }
428                    
429 yi.zhou       1.16 #ifdef PEGASUS_HAS_ICU
430                    
431                    Boolean InitializeICU::_initAttempted = false;
432                    Boolean InitializeICU::_initSuccessful = false;
433                    Mutex InitializeICU::_initMutex;
434                    
435                    Boolean InitializeICU::initICUSuccessful()
436                    {
437                        if (!_initAttempted)
438                        {
439                    	{
440                                AutoMutex lock(_initMutex);
441                    
442                    	    if (!_initAttempted)
443                    	    {
444                                    UErrorCode _status = U_ZERO_ERROR;
445                    
446                    		// Initialize ICU
447                                    u_init(&_status);
448                    
449                                    if (U_FAILURE(_status))
450 yi.zhou       1.16                 {
451                                        _initSuccessful = false;
452                                        Logger::put (Logger::STANDARD_LOG , System::CIMSERVER,
453                    				 Logger::WARNING,
454                                                     "ICU initialization failed with error: $0.", 
455                    				 _status);
456                                    }
457                                    else
458                                    {
459                                        _initSuccessful = true;
460                                    }
461                                    _initAttempted = true;
462                    	    }
463                    	}
464                        }
465                    
466                        return _initSuccessful;
467                    }
468                    
469                    #endif
470                    
471 david         1.1  PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2