(file) Return to CommonUTF.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 karl  1.10 //%2005////////////////////////////////////////////////////////////////////////
  2 david 1.1  //
  3 karl  1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
  4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
  5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
  6 karl  1.4  // IBM Corp.; EMC Corporation, The Open Group.
  7 karl  1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
  8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
  9            // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
 11 david 1.1  //
 12            // Permission is hereby granted, free of charge, to any person obtaining a copy
 13            // of this software and associated documentation files (the "Software"), to
 14            // deal in the Software without restriction, including without limitation the
 15            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 16            // sell copies of the Software, and to permit persons to whom the Software is
 17            // furnished to do so, subject to the following conditions:
 18 chip  1.11 //
 19 david 1.1  // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 20            // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 21            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 22            // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 23            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 24            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 25            // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 26            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 27            //
 28            //==============================================================================
 29            //
 30            // Author: Dave Rosckes   (rosckes@us.ibm.com)
 31            //
 32 david.dillard 1.12 // Modified By: David Dillard, VERITAS Software Corp.
 33                    //                  (david.dillard@veritas.com)
 34 yi.zhou       1.16 //              Yi Zhou, Hewlett-Packard Company (yi.zhou@hp.com)
 35 david         1.1  //
 36                    //%/////////////////////////////////////////////////////////////////////////////
 37                    
 38 chuck         1.7  #include <Pegasus/Common/Config.h>
 39                    #include <Pegasus/Common/Array.h>
 40 yi.zhou       1.16 #include <Pegasus/Common/Logger.h>
 41 david         1.1  #include "CommonUTF.h"
 42 chuck         1.7  #include <cstdio>
 43 david         1.2  #include <cstring>
 44 david.dillard 1.15 #include <cctype>
 45 kumpf         1.3  
 46 yi.zhou       1.16 #ifdef PEGASUS_HAS_ICU
 47                    #include <unicode/uclean.h>
 48                    #endif
 49                    
 50 david         1.1  PEGASUS_NAMESPACE_BEGIN
 51 kumpf         1.3  
 52 chuck         1.6  
 53 david.dillard 1.12 inline Uint8 _hexCharToNumeric(Char16 c)
 54 chuck         1.6  {
 55                        Uint8 n;
 56                    
 57 david.dillard 1.14     if (isdigit(c))
 58 chuck         1.6          n = (c - '0');
 59 david.dillard 1.14     else if (isupper(c))
 60 chuck         1.6          n = (c - 'A' + 10);
 61                        else // if (islower(c))
 62                            n = (c - 'a' + 10);
 63                    
 64                        return n;
 65                    }
 66                    
 67 kumpf         1.3  // Note: Caller must ensure that "src" contains "size" bytes.
 68 chip          1.11 Boolean isValid_U8(const Uint8 *src, int size)
 69 david         1.1  {
 70                        Uint8 U8_char;
 71                        const Uint8 *srcptr = src+size;
 72                        switch (size)
 73                        {
 74 chip          1.11     case 4:
 75                            if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
 76                            {
 77                            return false;
 78                            }
 79                        case 3:
 80                            if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
 81                            {
 82                            return false;
 83                            }
 84                        case 2:
 85                            if ((U8_char = (*--srcptr)) > 0xBF)
 86                            {
 87                            return false;
 88                            }
 89                            switch (*src)
 90                            {
 91                            case 0xE0:
 92                                if (U8_char < 0xA0)
 93                                {
 94                                return false;
 95 chip          1.11             }
 96                                break;
 97                            case 0xF0:
 98                                if (U8_char < 0x90)
 99                                {
100                                return false;
101                                }
102                                break;
103                            case 0xF4:
104                                if (U8_char > 0x8F)
105                                {
106                                return false;
107                                }
108                                break;
109                            default:
110                                if (U8_char < 0x80)
111                                {
112                                return false;
113                                }
114                            }
115                        case 1:
116 chip          1.11         if (*src >= 0x80 && *src < 0xC2)
117                            {
118                            return false;
119                            }
120                            if (*src > 0xF4)
121                            {
122                            return false;
123                            }
124                            break;
125 david         1.2          default:
126 chip          1.11         {
127                            return false;
128 david         1.2              }
129 david         1.1  
130                        }
131                        return true;
132 chip          1.11 }
133 david         1.1  
134                    int UTF16toUTF8(const Uint16** srcHead,
135 chip          1.11         const Uint16* srcEnd,
136                            Uint8** tgtHead,
137                            Uint8* tgtEnd)
138 david         1.1  {
139                        int returnCode = 0;
140                        const Uint16* src = *srcHead;
141                        Uint8* tgt = *tgtHead;
142                        while (src < srcEnd)
143                        {
144 chip          1.11     Uint32 tempchar;
145                        Uint16 numberOfBytes = 0;
146                        const Uint16* oldsrc = src;
147                        tempchar = *src++;
148                        if (tempchar >= FIRST_HIGH_SURROGATE
149                            && tempchar <= LAST_HIGH_SURROGATE)
150                        {
151                            if (src < srcEnd)
152                            {
153                            Uint32 tempchar2 = *src;
154                            if (tempchar2 >= FIRST_LOW_SURROGATE &&
155                                tempchar2 <= LAST_LOW_SURROGATE)
156                            {
157                                tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift)
158                                  + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase;
159                                ++src;
160                            }
161                            }
162                            else
163                            {
164                            --src;
165 chip          1.11         returnCode = -1;
166                            break;
167                            }
168                        }
169                        if (tempchar < (Uint32)0x80)
170                        {
171                            numberOfBytes = 1;
172                        }
173                        else if (tempchar < (Uint32)0x800)
174                        {
175                            numberOfBytes = 2;
176                        }
177                        else if (tempchar < (Uint32)0x10000)
178                        {
179                            numberOfBytes = 3;
180                        }
181                        else if (tempchar < (Uint32)0x200000)
182                        {
183                            numberOfBytes = 4;
184                        }
185                        else
186 chip          1.11     {
187                            numberOfBytes = 2;
188                            tempchar = REPLACEMENT_CHARACTER;
189                        }
190                    
191                        tgt += numberOfBytes;
192                        if (tgt > tgtEnd)
193                        {
194                            src = oldsrc;
195                            tgt -= numberOfBytes;
196                            returnCode = -1;
197                            break;
198                        }
199                    
200                        switch (numberOfBytes)
201                        {
202                            case 4:
203                            *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
204                            tempchar >>= 6;
205                            case 3:
206                            *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
207 chip          1.11         tempchar >>= 6;
208                            case 2:
209                            *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
210                            tempchar >>= 6;
211                            case 1:
212                            *--tgt =  (Uint8)(tempchar | firstByteMark[numberOfBytes]);
213                        }
214                        tgt += numberOfBytes;
215 david         1.1      }
216                        *srcHead = src;
217                        *tgtHead = tgt;
218                        return returnCode;
219                    }
220                    
221                    int UTF8toUTF16 (const Uint8** srcHead,
222 chip          1.11          const Uint8* srcEnd,
223                             Uint16** tgtHead,
224                             Uint16* tgtEnd)
225 david         1.1  {
226                        int returnCode = 0;
227                        const Uint8* src = *srcHead;
228                        Uint16* tgt = *tgtHead;
229                        while (src < srcEnd)
230                        {
231 chip          1.11     Uint32 tempchar = 0;
232                        Uint16 moreBytes = trailingBytesForUTF8[*src];
233                        if (src + moreBytes >= srcEnd)
234                        {
235                            returnCode = -1;
236                            break;
237                        }
238                        switch (moreBytes)
239                        {
240                            case 3:
241                            tempchar += *src++;
242                            tempchar <<= 6;
243                            case 2:
244                            tempchar += *src++;
245                            tempchar <<= 6;
246                            case 1:
247                            tempchar += *src++;
248                            tempchar <<= 6;
249                            case 0:
250                            tempchar += *src++;
251                        }
252 chip          1.11     tempchar -= offsetsFromUTF8[moreBytes];
253                    
254                        if (tgt >= tgtEnd)
255                        {
256                            src -= (moreBytes+1);
257                            returnCode = -1; break;
258                        }
259                        if (tempchar <= MAX_BYTE)
260                        {
261                            if ((tempchar >= FIRST_HIGH_SURROGATE &&
262                             tempchar <= LAST_LOW_SURROGATE) ||
263                            ((tempchar & 0xFFFE) == 0xFFFE))
264                            {
265                            *tgt++ = REPLACEMENT_CHARACTER;
266                            }
267                            else
268                            {
269                            *tgt++ = (Uint16)tempchar;
270                            }
271                        }
272                        else if (tempchar > MAX_UTF16)
273 chip          1.11     {
274                            *tgt++ = REPLACEMENT_CHARACTER;
275                        }
276                        else
277                        {
278                            if (tgt + 1 >= tgtEnd)
279                            {
280                            src -= (moreBytes+1);
281                            returnCode = -1;
282                            break;
283                            }
284                            tempchar -= halfBase;
285                            *tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE);
286                            *tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE);
287                        }
288 david         1.1      }
289                        *srcHead = src;
290                        *tgtHead = tgt;
291                        return returnCode;
292                    }
293 david         1.5  
294                    Boolean isUTF8(const char *legal)
295                    {
296                        char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1;
297                    
298                        // Validate that the string is long enough to hold all the expected bytes.
299                        // Note that if legal[0] == 0, numBytes will be 1.
300                        for (char i=1; i<numBytes; i++)
301                        {
302                            if (legal[i] == 0)
303                            {
304                                return false;
305                            }
306                        }
307                    
308                        return (isValid_U8((const Uint8 *)legal, numBytes));
309                    }
310 chuck         1.6  
311 chuck         1.9  Boolean isUTF8Str(const char *legal)
312                    {
313 chip          1.11     /*char tmp[] = {0xCE,0x99,0xCE,0xBF,0xCF,0x8D,0xCE,0xBD,0xCE,
314 chuck         1.9                        0xB9,0xCE,0xBA,0xCE,0xBF,0xCE,0xBD,0xCF,0x84,
315                                          0x00};*/
316 chip          1.11 //  char tmp_[] = "class";
317                    //  char * tmp = legal;
318 david.dillard 1.13     size_t count = 0;
319                        const size_t size = strlen(legal);
320 chip          1.11 //  printf("size = %d\n",size);
321 david.dillard 1.13     while(count<size)
322                        {
323 chip          1.11 //      printf("count = %d\n",count);
324 david.dillard 1.13         if(isUTF8(&legal[count]) == true){
325                                UTF8_NEXT(legal,count);
326 chip          1.11         }else{
327                    //          printf("bad string\n");
328                                return false;
329                            }
330 david.dillard 1.13     }
331 chip          1.11 //  printf("good string\n");
332                        return true;
333 chuck         1.9  /*
334 chip          1.11     printf("legal = %s\n\n", legal);
335                        Uint32 count = 0;
336                        Uint32 trailingBytes = 0;
337 chuck         1.9          Uint32 size = strlen(legal);
338 chip          1.11     printf("size of legal is %d\n",size);
339 chuck         1.9          while(count<size-1)
340                            {
341 chip          1.11         printf("count = %d\n", count);
342 chuck         1.9                  if(isUTF8((char*)&legal[count]) == true){
343 chip          1.11                     UTF8_NEXT(legal,trailingBytes);
344                                count += trailingBytes;
345                            } else{
346                                printf("CommonUTF8:: returning false; position[%d]",count);
347                                 return false;
348                            }
349 chuck         1.9          }
350 chip          1.11      printf("CommonUTF8:: returning false; position[%d]",count);
351                        return true;*/
352 chuck         1.9  }
353 chuck         1.6  
354                    String escapeStringEncoder(const String& Str)
355                    {
356                        String escapeStr;
357                        Uint16 escChar;
358                        char hexencoding[6];
359 chip          1.11 
360 chuck         1.6      for(Uint32 i = 0; i < Str.size(); ++i)
361                        {
362 chip          1.11     escChar = Str[i];
363                        if(escChar <= 0x7F)
364 chuck         1.6          {
365 chip          1.11         escapeStr.append(escChar);
366 chuck         1.6          }
367 chip          1.11     else
368                        {
369                            memset(hexencoding,0x00,sizeof(hexencoding));
370 chuck         1.6              sprintf(hexencoding, "%%%03X%X", escChar/16, escChar%16);
371                                escapeStr.append(hexencoding);
372 chip          1.11     }
373 chuck         1.6      }
374                        return(escapeStr);
375                    }
376                    
377                    String escapeStringDecoder(const String& Str)
378                    {
379                        Uint32 i;
380                    
381 chip          1.11     Array<Uint16> utf16Chars;
382 chuck         1.6  
383                        for (i=0; i< Str.size(); ++i)
384                        {
385                            if (Str[i] == '%')
386                            {
387                                Uint8 digit1 = _hexCharToNumeric((Str[++i]));
388                                Uint8 digit2 = _hexCharToNumeric((Str[++i]));
389                                Uint8 digit3 = _hexCharToNumeric((Str[++i]));
390                                Uint8 digit4 = _hexCharToNumeric((Str[++i]));
391                    
392 chip          1.11         Uint16 decodedChar = (digit1<<12) + (digit2<<8) +
393 chuck         1.6                                   (digit3<< 4) + (digit4);
394                    
395 chip          1.11             utf16Chars.append(decodedChar);
396 chuck         1.6          }
397                            else
398                            {
399 chip          1.11             utf16Chars.append((Uint16)Str[i]);
400 chuck         1.6          }
401                        }
402                    
403                        // If there was a string to decode...
404                        if (Str.size() > 0)
405                        {
406                            utf16Chars.append('\0');
407                            return String((Char16 *)utf16Chars.getData());
408                        }
409                        else
410                        {
411                            return String();
412                        }
413                    }
414                    
415 yi.zhou       1.16 #ifdef PEGASUS_HAS_ICU
416                    
417                    Boolean InitializeICU::_initAttempted = false;
418                    Boolean InitializeICU::_initSuccessful = false;
419                    Mutex InitializeICU::_initMutex;
420                    
421                    Boolean InitializeICU::initICUSuccessful()
422                    {
423                        if (!_initAttempted)
424                        {
425                    	{
426                                AutoMutex lock(_initMutex);
427                    
428                    	    if (!_initAttempted)
429                    	    {
430                                    UErrorCode _status = U_ZERO_ERROR;
431                    
432                    		// Initialize ICU
433                                    u_init(&_status);
434                    
435                                    if (U_FAILURE(_status))
436 yi.zhou       1.16                 {
437                                        _initSuccessful = false;
438                                        Logger::put (Logger::STANDARD_LOG , System::CIMSERVER,
439                    				 Logger::WARNING,
440                                                     "ICU initialization failed with error: $0.", 
441                    				 _status);
442                                    }
443                                    else
444                                    {
445                                        _initSuccessful = true;
446                                    }
447                                    _initAttempted = true;
448                    	    }
449                    	}
450                        }
451                    
452                        return _initSuccessful;
453                    }
454                    
455                    #endif
456                    
457 david         1.1  PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2