(file) Return to CommonUTF.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 karl  1.10 //%2005////////////////////////////////////////////////////////////////////////
  2 david 1.1  //
  3 karl  1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
  4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
  5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
  6 karl  1.4  // IBM Corp.; EMC Corporation, The Open Group.
  7 karl  1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
  8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
  9            // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
 11 david 1.1  //
 12            // Permission is hereby granted, free of charge, to any person obtaining a copy
 13            // of this software and associated documentation files (the "Software"), to
 14            // deal in the Software without restriction, including without limitation the
 15            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 16            // sell copies of the Software, and to permit persons to whom the Software is
 17            // furnished to do so, subject to the following conditions:
 18 chip  1.11 //
 19 david 1.1  // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 20            // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 21            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 22            // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 23            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 24            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 25            // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 26            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 27            //
 28            //==============================================================================
 29            //
 30            // Author: Dave Rosckes   (rosckes@us.ibm.com)
 31            //
 32 david.dillard 1.12 // Modified By: David Dillard, VERITAS Software Corp.
 33                    //                  (david.dillard@veritas.com)
 34 yi.zhou       1.16 //              Yi Zhou, Hewlett-Packard Company (yi.zhou@hp.com)
 35 david         1.1  //
 36                    //%/////////////////////////////////////////////////////////////////////////////
 37                    
 38 chuck         1.7  #include <Pegasus/Common/Config.h>
 39                    #include <Pegasus/Common/Array.h>
 40 yi.zhou       1.16 #include <Pegasus/Common/Logger.h>
 41 david         1.1  #include "CommonUTF.h"
 42 chuck         1.7  #include <cstdio>
 43 david         1.2  #include <cstring>
 44 david.dillard 1.15 #include <cctype>
 45 kumpf         1.3  
 46 yi.zhou       1.16 #ifdef PEGASUS_HAS_ICU
 47                    #include <unicode/uclean.h>
 48                    #endif
 49                    
 50 david         1.1  PEGASUS_NAMESPACE_BEGIN
 51 kumpf         1.3  
 52 chuck         1.6  
 53 david.dillard 1.12 inline Uint8 _hexCharToNumeric(Char16 c)
 54 chuck         1.6  {
 55                        Uint8 n;
 56                    
 57 david.dillard 1.14     if (isdigit(c))
 58 chuck         1.6          n = (c - '0');
 59 david.dillard 1.14     else if (isupper(c))
 60 chuck         1.6          n = (c - 'A' + 10);
 61                        else // if (islower(c))
 62                            n = (c - 'a' + 10);
 63                    
 64                        return n;
 65                    }
 66                    
 67 kumpf         1.3  // Note: Caller must ensure that "src" contains "size" bytes.
 68 chip          1.11 Boolean isValid_U8(const Uint8 *src, int size)
 69 david         1.1  {
 70                        Uint8 U8_char;
 71                        const Uint8 *srcptr = src+size;
 72                        switch (size)
 73                        {
 74 chip          1.11     case 4:
 75                            if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
 76                            {
 77                            return false;
 78                            }
 79                        case 3:
 80                            if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
 81                            {
 82                            return false;
 83                            }
 84                        case 2:
 85                            if ((U8_char = (*--srcptr)) > 0xBF)
 86                            {
 87                            return false;
 88                            }
 89                            switch (*src)
 90                            {
 91                            case 0xE0:
 92                                if (U8_char < 0xA0)
 93                                {
 94                                return false;
 95 chip          1.11             }
 96                                break;
 97                            case 0xF0:
 98                                if (U8_char < 0x90)
 99                                {
100                                return false;
101                                }
102                                break;
103                            case 0xF4:
104                                if (U8_char > 0x8F)
105                                {
106                                return false;
107                                }
108                                break;
109                            default:
110                                if (U8_char < 0x80)
111                                {
112                                return false;
113                                }
114                            }
115                        case 1:
116 chip          1.11         if (*src >= 0x80 && *src < 0xC2)
117                            {
118                            return false;
119                            }
120                            if (*src > 0xF4)
121                            {
122                            return false;
123                            }
124                            break;
125 david         1.2          default:
126 chip          1.11         {
127                            return false;
128 david         1.2              }
129 david         1.1  
130                        }
131                        return true;
132 chip          1.11 }
133 david         1.1  
134                    int UTF16toUTF8(const Uint16** srcHead,
135 chip          1.11         const Uint16* srcEnd,
136                            Uint8** tgtHead,
137                            Uint8* tgtEnd)
138 david         1.1  {
139                        int returnCode = 0;
140                        const Uint16* src = *srcHead;
141                        Uint8* tgt = *tgtHead;
142                        while (src < srcEnd)
143                        {
144 mike          1.16.2.1 	if (*src < 128)
145                        	{
146                        	    if (tgt == tgtEnd)
147                        	    {
148                        		returnCode = -1;
149                        		break;
150                        	    }
151                        
152                        	    *tgt++ = *src++;
153                        	    continue;
154                        	}
155                        
156 chip          1.11         Uint32 tempchar;
157                            Uint16 numberOfBytes = 0;
158                            const Uint16* oldsrc = src;
159                            tempchar = *src++;
160                            if (tempchar >= FIRST_HIGH_SURROGATE
161                                && tempchar <= LAST_HIGH_SURROGATE)
162                            {
163                                if (src < srcEnd)
164                                {
165                                Uint32 tempchar2 = *src;
166                                if (tempchar2 >= FIRST_LOW_SURROGATE &&
167                                    tempchar2 <= LAST_LOW_SURROGATE)
168                                {
169                                    tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift)
170                                      + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase;
171                                    ++src;
172                                }
173                                }
174                                else
175                                {
176                                --src;
177 chip          1.11             returnCode = -1;
178                                break;
179                                }
180                            }
181                            if (tempchar < (Uint32)0x80)
182                            {
183                                numberOfBytes = 1;
184                            }
185                            else if (tempchar < (Uint32)0x800)
186                            {
187                                numberOfBytes = 2;
188                            }
189                            else if (tempchar < (Uint32)0x10000)
190                            {
191                                numberOfBytes = 3;
192                            }
193                            else if (tempchar < (Uint32)0x200000)
194                            {
195                                numberOfBytes = 4;
196                            }
197                            else
198 chip          1.11         {
199                                numberOfBytes = 2;
200                                tempchar = REPLACEMENT_CHARACTER;
201                            }
202                        
203                            tgt += numberOfBytes;
204                            if (tgt > tgtEnd)
205                            {
206                                src = oldsrc;
207                                tgt -= numberOfBytes;
208                                returnCode = -1;
209                                break;
210                            }
211                        
212                            switch (numberOfBytes)
213                            {
214                                case 4:
215                                *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
216                                tempchar >>= 6;
217                                case 3:
218                                *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
219 chip          1.11             tempchar >>= 6;
220                                case 2:
221                                *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
222                                tempchar >>= 6;
223                                case 1:
224                                *--tgt =  (Uint8)(tempchar | firstByteMark[numberOfBytes]);
225                            }
226                            tgt += numberOfBytes;
227 david         1.1          }
228                            *srcHead = src;
229                            *tgtHead = tgt;
230                            return returnCode;
231                        }
232                        
233                        int UTF8toUTF16 (const Uint8** srcHead,
234 chip          1.11              const Uint8* srcEnd,
235                                 Uint16** tgtHead,
236                                 Uint16* tgtEnd)
237 david         1.1      {
238                            int returnCode = 0;
239                            const Uint8* src = *srcHead;
240                            Uint16* tgt = *tgtHead;
241                            while (src < srcEnd)
242                            {
243 chip          1.11         Uint32 tempchar = 0;
244                            Uint16 moreBytes = trailingBytesForUTF8[*src];
245                            if (src + moreBytes >= srcEnd)
246                            {
247                                returnCode = -1;
248                                break;
249                            }
250                            switch (moreBytes)
251                            {
252                                case 3:
253                                tempchar += *src++;
254                                tempchar <<= 6;
255                                case 2:
256                                tempchar += *src++;
257                                tempchar <<= 6;
258                                case 1:
259                                tempchar += *src++;
260                                tempchar <<= 6;
261                                case 0:
262                                tempchar += *src++;
263                            }
264 chip          1.11         tempchar -= offsetsFromUTF8[moreBytes];
265                        
266                            if (tgt >= tgtEnd)
267                            {
268                                src -= (moreBytes+1);
269                                returnCode = -1; break;
270                            }
271                            if (tempchar <= MAX_BYTE)
272                            {
273                                if ((tempchar >= FIRST_HIGH_SURROGATE &&
274                                 tempchar <= LAST_LOW_SURROGATE) ||
275                                ((tempchar & 0xFFFE) == 0xFFFE))
276                                {
277                                *tgt++ = REPLACEMENT_CHARACTER;
278                                }
279                                else
280                                {
281                                *tgt++ = (Uint16)tempchar;
282                                }
283                            }
284                            else if (tempchar > MAX_UTF16)
285 chip          1.11         {
286                                *tgt++ = REPLACEMENT_CHARACTER;
287                            }
288                            else
289                            {
290                                if (tgt + 1 >= tgtEnd)
291                                {
292                                src -= (moreBytes+1);
293                                returnCode = -1;
294                                break;
295                                }
296                                tempchar -= halfBase;
297                                *tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE);
298                                *tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE);
299                            }
300 david         1.1          }
301                            *srcHead = src;
302                            *tgtHead = tgt;
303                            return returnCode;
304                        }
305 david         1.5      
306                        Boolean isUTF8(const char *legal)
307                        {
308                            char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1;
309                        
310                            // Validate that the string is long enough to hold all the expected bytes.
311                            // Note that if legal[0] == 0, numBytes will be 1.
312                            for (char i=1; i<numBytes; i++)
313                            {
314                                if (legal[i] == 0)
315                                {
316                                    return false;
317                                }
318                            }
319                        
320                            return (isValid_U8((const Uint8 *)legal, numBytes));
321                        }
322 chuck         1.6      
323 chuck         1.9      Boolean isUTF8Str(const char *legal)
324                        {
325 chip          1.11         /*char tmp[] = {0xCE,0x99,0xCE,0xBF,0xCF,0x8D,0xCE,0xBD,0xCE,
326 chuck         1.9                            0xB9,0xCE,0xBA,0xCE,0xBF,0xCE,0xBD,0xCF,0x84,
327                                              0x00};*/
328 chip          1.11     //  char tmp_[] = "class";
329                        //  char * tmp = legal;
330 david.dillard 1.13         size_t count = 0;
331                            const size_t size = strlen(legal);
332 chip          1.11     //  printf("size = %d\n",size);
333 david.dillard 1.13         while(count<size)
334                            {
335 chip          1.11     //      printf("count = %d\n",count);
336 david.dillard 1.13             if(isUTF8(&legal[count]) == true){
337                                    UTF8_NEXT(legal,count);
338 chip          1.11             }else{
339                        //          printf("bad string\n");
340                                    return false;
341                                }
342 david.dillard 1.13         }
343 chip          1.11     //  printf("good string\n");
344                            return true;
345 chuck         1.9      /*
346 chip          1.11         printf("legal = %s\n\n", legal);
347                            Uint32 count = 0;
348                            Uint32 trailingBytes = 0;
349 chuck         1.9              Uint32 size = strlen(legal);
350 chip          1.11         printf("size of legal is %d\n",size);
351 chuck         1.9              while(count<size-1)
352                                {
353 chip          1.11             printf("count = %d\n", count);
354 chuck         1.9                      if(isUTF8((char*)&legal[count]) == true){
355 chip          1.11                         UTF8_NEXT(legal,trailingBytes);
356                                    count += trailingBytes;
357                                } else{
358                                    printf("CommonUTF8:: returning false; position[%d]",count);
359                                     return false;
360                                }
361 chuck         1.9              }
362 chip          1.11          printf("CommonUTF8:: returning false; position[%d]",count);
363                            return true;*/
364 chuck         1.9      }
365 chuck         1.6      
366                        String escapeStringEncoder(const String& Str)
367                        {
368                            String escapeStr;
369                            Uint16 escChar;
370                            char hexencoding[6];
371 chip          1.11     
372 chuck         1.6          for(Uint32 i = 0; i < Str.size(); ++i)
373                            {
374 chip          1.11         escChar = Str[i];
375                            if(escChar <= 0x7F)
376 chuck         1.6              {
377 chip          1.11             escapeStr.append(escChar);
378 chuck         1.6              }
379 chip          1.11         else
380                            {
381                                memset(hexencoding,0x00,sizeof(hexencoding));
382 chuck         1.6                  sprintf(hexencoding, "%%%03X%X", escChar/16, escChar%16);
383                                    escapeStr.append(hexencoding);
384 chip          1.11         }
385 chuck         1.6          }
386                            return(escapeStr);
387                        }
388                        
389                        String escapeStringDecoder(const String& Str)
390                        {
391                            Uint32 i;
392                        
393 chip          1.11         Array<Uint16> utf16Chars;
394 chuck         1.6      
395                            for (i=0; i< Str.size(); ++i)
396                            {
397                                if (Str[i] == '%')
398                                {
399                                    Uint8 digit1 = _hexCharToNumeric((Str[++i]));
400                                    Uint8 digit2 = _hexCharToNumeric((Str[++i]));
401                                    Uint8 digit3 = _hexCharToNumeric((Str[++i]));
402                                    Uint8 digit4 = _hexCharToNumeric((Str[++i]));
403                        
404 chip          1.11             Uint16 decodedChar = (digit1<<12) + (digit2<<8) +
405 chuck         1.6                                       (digit3<< 4) + (digit4);
406                        
407 chip          1.11                 utf16Chars.append(decodedChar);
408 chuck         1.6              }
409                                else
410                                {
411 chip          1.11                 utf16Chars.append((Uint16)Str[i]);
412 chuck         1.6              }
413                            }
414                        
415                            // If there was a string to decode...
416                            if (Str.size() > 0)
417                            {
418                                utf16Chars.append('\0');
419                                return String((Char16 *)utf16Chars.getData());
420                            }
421                            else
422                            {
423                                return String();
424                            }
425                        }
426                        
427 yi.zhou       1.16     #ifdef PEGASUS_HAS_ICU
428                        
429                        Boolean InitializeICU::_initAttempted = false;
430                        Boolean InitializeICU::_initSuccessful = false;
431                        Mutex InitializeICU::_initMutex;
432                        
433                        Boolean InitializeICU::initICUSuccessful()
434                        {
435                            if (!_initAttempted)
436                            {
437                        	{
438                                    AutoMutex lock(_initMutex);
439                        
440                        	    if (!_initAttempted)
441                        	    {
442                                        UErrorCode _status = U_ZERO_ERROR;
443                        
444                        		// Initialize ICU
445                                        u_init(&_status);
446                        
447                                        if (U_FAILURE(_status))
448 yi.zhou       1.16                     {
449                                            _initSuccessful = false;
450                                            Logger::put (Logger::STANDARD_LOG , System::CIMSERVER,
451                        				 Logger::WARNING,
452                                                         "ICU initialization failed with error: $0.", 
453                        				 _status);
454                                        }
455                                        else
456                                        {
457                                            _initSuccessful = true;
458                                        }
459                                        _initAttempted = true;
460                        	    }
461                        	}
462                            }
463                        
464                            return _initSuccessful;
465                        }
466                        
467                        #endif
468                        
469 david         1.1      PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2