(file) Return to CommonUTF.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 karl  1.8 //%2004////////////////////////////////////////////////////////////////////////
  2 david 1.1 //
  3 karl  1.8 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
  4           // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
  5           // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
  6 karl  1.4 // IBM Corp.; EMC Corporation, The Open Group.
  7 karl  1.8 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
  8           // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
  9 david 1.1 //
 10           // Permission is hereby granted, free of charge, to any person obtaining a copy
 11           // of this software and associated documentation files (the "Software"), to
 12           // deal in the Software without restriction, including without limitation the
 13           // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 14           // sell copies of the Software, and to permit persons to whom the Software is
 15           // furnished to do so, subject to the following conditions:
 16           // 
 17           // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 18           // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 19           // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 20           // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 21           // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22           // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 23           // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 24           // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 25           //
 26           //==============================================================================
 27           //
 28           // Author: Dave Rosckes   (rosckes@us.ibm.com)
 29           //
 30 david 1.1 //
 31           //%/////////////////////////////////////////////////////////////////////////////
 32           
 33 chuck 1.7 #include <Pegasus/Common/Config.h>
 34           #include <Pegasus/Common/Array.h>
 35 david 1.1 #include "CommonUTF.h"
 36 chuck 1.7 #include <cctype>
 37           #include <cstdio>
 38 david 1.2 #include <cstring>
 39 kumpf 1.3 
 40 david 1.1 PEGASUS_NAMESPACE_BEGIN
 41 kumpf 1.3 
 42 chuck 1.6 
 43           inline Uint8 _hexCharToNumeric(const Uint16 c)
 44           {
 45               Uint8 n;
 46           
 47               if (isdigit(c))
 48                   n = (c - '0');
 49               else if (isupper(c))
 50                   n = (c - 'A' + 10);
 51               else // if (islower(c))
 52                   n = (c - 'a' + 10);
 53           
 54               return n;
 55           }
 56           
 57 kumpf 1.3 // Note: Caller must ensure that "src" contains "size" bytes.
 58 david 1.1 int isValid_U8(const Uint8 *src, int size)
 59           {
 60               Uint8 U8_char;
 61               const Uint8 *srcptr = src+size;
 62               switch (size)
 63               {
 64           	case 4:
 65           	    if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
 66           	    {
 67           		return false;
 68           	    }
 69           	case 3:
 70           	    if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
 71           	    {
 72           		return false;
 73           	    }
 74           	case 2:
 75           	    if ((U8_char = (*--srcptr)) > 0xBF)
 76           	    {
 77           		return false;
 78           	    }
 79 david 1.1 	    switch (*src)
 80           	    {
 81           		case 0xE0:
 82           		    if (U8_char < 0xA0)
 83           		    {
 84           			return false;
 85           		    }
 86           		    break;
 87           		case 0xF0:
 88           		    if (U8_char < 0x90)
 89           		    {
 90           			return false;
 91           		    }
 92           		    break;
 93           		case 0xF4:
 94           		    if (U8_char > 0x8F)
 95           		    {
 96           			return false;
 97           		    }
 98           		    break;
 99           		default:
100 david 1.1 		    if (U8_char < 0x80)
101           		    {
102           			return false;
103           		    }
104           	    }
105           	case 1:
106           	    if (*src >= 0x80 && *src < 0xC2)
107           	    {
108           		return false;
109           	    }
110           	    if (*src > 0xF4)
111           	    {
112           		return false;
113           	    }
114           	    break;
115 david 1.2         default:
116           	    {
117           		return false;
118                       }
119 david 1.1 
120               }
121               return true;
122           }	
123           
124           int UTF16toUTF8(const Uint16** srcHead,
125           		const Uint16* srcEnd, 
126           		Uint8** tgtHead,
127           		Uint8* tgtEnd)
128           {
129               int returnCode = 0;
130               const Uint16* src = *srcHead;
131               Uint8* tgt = *tgtHead;
132               while (src < srcEnd)
133               {
134           	Uint32 tempchar;
135           	Uint16 numberOfBytes = 0;
136           	const Uint16* oldsrc = src; 
137           	tempchar = *src++;
138           	if (tempchar >= FIRST_HIGH_SURROGATE
139           	    && tempchar <= LAST_HIGH_SURROGATE)
140 david 1.1 	{
141           	    if (src < srcEnd)
142           	    {
143           		Uint32 tempchar2 = *src;
144           		if (tempchar2 >= FIRST_LOW_SURROGATE &&
145           		    tempchar2 <= LAST_LOW_SURROGATE)
146           		{
147           		    tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift)
148           		      + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase;
149           		    ++src;
150           		} 
151           	    }
152           	    else
153           	    { 
154           		--src;
155           		returnCode = -1;
156           		break;
157           	    }
158           	}
159           	if (tempchar < (Uint32)0x80)
160           	{
161 david 1.1 	    numberOfBytes = 1;
162           	}
163           	else if (tempchar < (Uint32)0x800)
164           	{
165           	    numberOfBytes = 2;
166           	}
167           	else if (tempchar < (Uint32)0x10000)
168           	{
169           	    numberOfBytes = 3;
170           	}
171           	else if (tempchar < (Uint32)0x200000)
172           	{
173           	    numberOfBytes = 4;
174           	}
175           	else
176           	{
177           	    numberOfBytes = 2;
178           	    tempchar = REPLACEMENT_CHARACTER;
179           	}
180           
181           	tgt += numberOfBytes;
182 david 1.1 	if (tgt > tgtEnd)
183           	{
184           	    src = oldsrc;
185           	    tgt -= numberOfBytes;
186           	    returnCode = -1;
187           	    break;
188           	}
189           
190           	switch (numberOfBytes)
191           	{ 
192           	    case 4:
193           		*--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
194           		tempchar >>= 6;
195           	    case 3:
196           		*--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
197           		tempchar >>= 6;
198           	    case 2:
199           		*--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
200           		tempchar >>= 6;
201           	    case 1:
202           		*--tgt =  (Uint8)(tempchar | firstByteMark[numberOfBytes]);
203 david 1.1 	}
204           	tgt += numberOfBytes;
205               }
206               *srcHead = src;
207               *tgtHead = tgt;
208               return returnCode;
209           }
210           
211           int UTF8toUTF16 (const Uint8** srcHead,
212           		 const Uint8* srcEnd, 
213           		 Uint16** tgtHead,
214           		 Uint16* tgtEnd)
215           {
216               int returnCode = 0;
217               const Uint8* src = *srcHead;
218               Uint16* tgt = *tgtHead;
219               while (src < srcEnd)
220               {
221           	Uint32 tempchar = 0;
222           	Uint16 moreBytes = trailingBytesForUTF8[*src];
223           	if (src + moreBytes >= srcEnd)
224 david 1.1 	{
225           	    returnCode = -1;
226           	    break;
227           	}
228           	switch (moreBytes)
229           	{
230           	    case 3:
231           		tempchar += *src++;
232           		tempchar <<= 6;
233           	    case 2:
234           		tempchar += *src++;
235           		tempchar <<= 6;
236           	    case 1:
237           		tempchar += *src++;
238           		tempchar <<= 6;
239           	    case 0:
240           		tempchar += *src++;
241           	}
242           	tempchar -= offsetsFromUTF8[moreBytes];
243           
244           	if (tgt >= tgtEnd)
245 david 1.1 	{
246           	    src -= (moreBytes+1); 
247           	    returnCode = -1; break;
248           	}
249           	if (tempchar <= MAX_BYTE)
250           	{	
251           	    if ((tempchar >= FIRST_HIGH_SURROGATE &&
252           		 tempchar <= LAST_LOW_SURROGATE) ||
253           		((tempchar & 0xFFFE) ==	0xFFFE))
254           	    {
255           		*tgt++ = REPLACEMENT_CHARACTER;
256           	    }
257           	    else
258           	    {
259           		*tgt++ = (Uint16)tempchar; 
260           	    }
261           	}
262           	else if (tempchar > MAX_UTF16)
263           	{
264           	    *tgt++ = REPLACEMENT_CHARACTER;
265           	}
266 david 1.1 	else
267           	{
268           	    if (tgt + 1 >= tgtEnd)
269           	    {
270           		src -= (moreBytes+1);
271           		returnCode = -1;
272           		break;
273           	    }
274           	    tempchar -= halfBase;
275           	    *tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE);
276           	    *tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE);
277           	}
278               }
279               *srcHead = src;
280               *tgtHead = tgt;
281               return returnCode;
282           }
283 david 1.5 
284           Boolean isUTF8(const char *legal)
285           {
286               char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1;
287           
288               // Validate that the string is long enough to hold all the expected bytes.
289               // Note that if legal[0] == 0, numBytes will be 1.
290               for (char i=1; i<numBytes; i++)
291               {
292                   if (legal[i] == 0)
293                   {
294                       return false;
295                   }
296               }
297           
298               return (isValid_U8((const Uint8 *)legal, numBytes));
299           }
300 chuck 1.6 
301           
302           String escapeStringEncoder(const String& Str)
303           {
304               String escapeStr;
305               Uint16 escChar;
306               char hexencoding[6];
307               
308               for(Uint32 i = 0; i < Str.size(); ++i)
309               {
310           	escChar = Str[i];
311           	if(escChar <= 0x7F)
312                   {
313           	    escapeStr.append(escChar);
314                   }
315           	else
316           	{
317           	    memset(hexencoding,0x00,sizeof(hexencoding));
318                       sprintf(hexencoding, "%%%03X%X", escChar/16, escChar%16);
319                       escapeStr.append(hexencoding);
320           	}
321 chuck 1.6     }
322               return(escapeStr);
323           }
324           
325           String escapeStringDecoder(const String& Str)
326           {
327               Uint32 i;
328           
329               Array<Uint16> utf16Chars; 
330           
331               for (i=0; i< Str.size(); ++i)
332               {
333                   if (Str[i] == '%')
334                   {
335                       Uint8 digit1 = _hexCharToNumeric((Str[++i]));
336                       Uint8 digit2 = _hexCharToNumeric((Str[++i]));
337                       Uint8 digit3 = _hexCharToNumeric((Str[++i]));
338                       Uint8 digit4 = _hexCharToNumeric((Str[++i]));
339           
340           	    Uint16 decodedChar = (digit1<<12) + (digit2<<8) +
341                                            (digit3<< 4) + (digit4);
342 chuck 1.6 
343                       utf16Chars.append(decodedChar);				
344                   }
345                   else
346                   {
347                       utf16Chars.append((Uint16)Str[i]);	
348                   }
349               }
350           
351               // If there was a string to decode...
352               if (Str.size() > 0)
353               {
354                   utf16Chars.append('\0');
355                   return String((Char16 *)utf16Chars.getData());
356               }
357               else
358               {
359                   return String();
360               }
361           }
362           
363 david 1.1 PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2