(file) Return to CommonUTF.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 david 1.1 //%/////////////////////////////////////////////////////////////////////////////
  2           //
  3           // Copyright (c) 2003 BMC Software, Hewlett-Packard Company, IBM,
  4           // The Open Group, Tivoli Systems
  5           //
  6           // Permission is hereby granted, free of charge, to any person obtaining a copy
  7           // of this software and associated documentation files (the "Software"), to
  8           // deal in the Software without restriction, including without limitation the
  9           // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 10           // sell copies of the Software, and to permit persons to whom the Software is
 11           // furnished to do so, subject to the following conditions:
 12           // 
 13           // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 14           // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 15           // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 16           // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 17           // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 18           // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 19           // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 20           // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21           //
 22 david 1.1 //==============================================================================
 23           //
 24           // Author: Dave Rosckes   (rosckes@us.ibm.com)
 25           //
 26           //
 27           //%/////////////////////////////////////////////////////////////////////////////
 28           
 29           #include "CommonUTF.h"
 30 david 1.2 #include <cstring>
 31 kumpf 1.3 
 32 david 1.1 PEGASUS_NAMESPACE_BEGIN
 33 kumpf 1.3 
 34           // Note: Caller must ensure that "src" contains "size" bytes.
 35 david 1.1 int isValid_U8(const Uint8 *src, int size)
 36           {
 37               Uint8 U8_char;
 38               const Uint8 *srcptr = src+size;
 39               switch (size)
 40               {
 41           	case 4:
 42           	    if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
 43           	    {
 44           		return false;
 45           	    }
 46           	case 3:
 47           	    if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
 48           	    {
 49           		return false;
 50           	    }
 51           	case 2:
 52           	    if ((U8_char = (*--srcptr)) > 0xBF)
 53           	    {
 54           		return false;
 55           	    }
 56 david 1.1 	    switch (*src)
 57           	    {
 58           		case 0xE0:
 59           		    if (U8_char < 0xA0)
 60           		    {
 61           			return false;
 62           		    }
 63           		    break;
 64           		case 0xF0:
 65           		    if (U8_char < 0x90)
 66           		    {
 67           			return false;
 68           		    }
 69           		    break;
 70           		case 0xF4:
 71           		    if (U8_char > 0x8F)
 72           		    {
 73           			return false;
 74           		    }
 75           		    break;
 76           		default:
 77 david 1.1 		    if (U8_char < 0x80)
 78           		    {
 79           			return false;
 80           		    }
 81           	    }
 82           	case 1:
 83           	    if (*src >= 0x80 && *src < 0xC2)
 84           	    {
 85           		return false;
 86           	    }
 87           	    if (*src > 0xF4)
 88           	    {
 89           		return false;
 90           	    }
 91           	    break;
 92 david 1.2         default:
 93           	    {
 94           		return false;
 95                       }
 96 david 1.1 
 97               }
 98               return true;
 99           }	
100           
101           int UTF16toUTF8(const Uint16** srcHead,
102           		const Uint16* srcEnd, 
103           		Uint8** tgtHead,
104           		Uint8* tgtEnd)
105           {
106               int returnCode = 0;
107               const Uint16* src = *srcHead;
108               Uint8* tgt = *tgtHead;
109               while (src < srcEnd)
110               {
111           	Uint32 tempchar;
112           	Uint16 numberOfBytes = 0;
113           	const Uint16* oldsrc = src; 
114           	tempchar = *src++;
115           	if (tempchar >= FIRST_HIGH_SURROGATE
116           	    && tempchar <= LAST_HIGH_SURROGATE)
117 david 1.1 	{
118           	    if (src < srcEnd)
119           	    {
120           		Uint32 tempchar2 = *src;
121           		if (tempchar2 >= FIRST_LOW_SURROGATE &&
122           		    tempchar2 <= LAST_LOW_SURROGATE)
123           		{
124           		    tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift)
125           		      + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase;
126           		    ++src;
127           		} 
128           	    }
129           	    else
130           	    { 
131           		--src;
132           		returnCode = -1;
133           		break;
134           	    }
135           	}
136           	if (tempchar < (Uint32)0x80)
137           	{
138 david 1.1 	    numberOfBytes = 1;
139           	}
140           	else if (tempchar < (Uint32)0x800)
141           	{
142           	    numberOfBytes = 2;
143           	}
144           	else if (tempchar < (Uint32)0x10000)
145           	{
146           	    numberOfBytes = 3;
147           	}
148           	else if (tempchar < (Uint32)0x200000)
149           	{
150           	    numberOfBytes = 4;
151           	}
152           	else
153           	{
154           	    numberOfBytes = 2;
155           	    tempchar = REPLACEMENT_CHARACTER;
156           	}
157           
158           	tgt += numberOfBytes;
159 david 1.1 	if (tgt > tgtEnd)
160           	{
161           	    src = oldsrc;
162           	    tgt -= numberOfBytes;
163           	    returnCode = -1;
164           	    break;
165           	}
166           
167           	switch (numberOfBytes)
168           	{ 
169           	    case 4:
170           		*--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
171           		tempchar >>= 6;
172           	    case 3:
173           		*--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
174           		tempchar >>= 6;
175           	    case 2:
176           		*--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
177           		tempchar >>= 6;
178           	    case 1:
179           		*--tgt =  (Uint8)(tempchar | firstByteMark[numberOfBytes]);
180 david 1.1 	}
181           	tgt += numberOfBytes;
182               }
183               *srcHead = src;
184               *tgtHead = tgt;
185               return returnCode;
186           }
187           
188           int UTF8toUTF16 (const Uint8** srcHead,
189           		 const Uint8* srcEnd, 
190           		 Uint16** tgtHead,
191           		 Uint16* tgtEnd)
192           {
193               int returnCode = 0;
194               const Uint8* src = *srcHead;
195               Uint16* tgt = *tgtHead;
196               while (src < srcEnd)
197               {
198           	Uint32 tempchar = 0;
199           	Uint16 moreBytes = trailingBytesForUTF8[*src];
200           	if (src + moreBytes >= srcEnd)
201 david 1.1 	{
202           	    returnCode = -1;
203           	    break;
204           	}
205           	switch (moreBytes)
206           	{
207           	    case 3:
208           		tempchar += *src++;
209           		tempchar <<= 6;
210           	    case 2:
211           		tempchar += *src++;
212           		tempchar <<= 6;
213           	    case 1:
214           		tempchar += *src++;
215           		tempchar <<= 6;
216           	    case 0:
217           		tempchar += *src++;
218           	}
219           	tempchar -= offsetsFromUTF8[moreBytes];
220           
221           	if (tgt >= tgtEnd)
222 david 1.1 	{
223           	    src -= (moreBytes+1); 
224           	    returnCode = -1; break;
225           	}
226           	if (tempchar <= MAX_BYTE)
227           	{	
228           	    if ((tempchar >= FIRST_HIGH_SURROGATE &&
229           		 tempchar <= LAST_LOW_SURROGATE) ||
230           		((tempchar & 0xFFFE) ==	0xFFFE))
231           	    {
232           		*tgt++ = REPLACEMENT_CHARACTER;
233           	    }
234           	    else
235           	    {
236           		*tgt++ = (Uint16)tempchar; 
237           	    }
238           	}
239           	else if (tempchar > MAX_UTF16)
240           	{
241           	    *tgt++ = REPLACEMENT_CHARACTER;
242           	}
243 david 1.1 	else
244           	{
245           	    if (tgt + 1 >= tgtEnd)
246           	    {
247           		src -= (moreBytes+1);
248           		returnCode = -1;
249           		break;
250           	    }
251           	    tempchar -= halfBase;
252           	    *tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE);
253           	    *tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE);
254           	}
255               }
256               *srcHead = src;
257               *tgtHead = tgt;
258               return returnCode;
259           }
260           PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2