1 david 1.1 //%/////////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (c) 2003 BMC Software, Hewlett-Packard Company, IBM,
4 // The Open Group, Tivoli Systems
5 //
6 // Permission is hereby granted, free of charge, to any person obtaining a copy
7 // of this software and associated documentation files (the "Software"), to
8 // deal in the Software without restriction, including without limitation the
9 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 // sell copies of the Software, and to permit persons to whom the Software is
11 // furnished to do so, subject to the following conditions:
12 //
13 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
14 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
15 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
16 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
17 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
19 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 //
22 david 1.1 //==============================================================================
23 //
24 // Author: Dave Rosckes (rosckes@us.ibm.com)
25 //
26 //
27 //%/////////////////////////////////////////////////////////////////////////////
28
29 #include "CommonUTF.h"
|
96 david 1.1
97 }
98 return true;
99 }
100
101 int UTF16toUTF8(const Uint16** srcHead,
102 const Uint16* srcEnd,
103 Uint8** tgtHead,
104 Uint8* tgtEnd)
105 {
106 int returnCode = 0;
107 const Uint16* src = *srcHead;
108 Uint8* tgt = *tgtHead;
109 while (src < srcEnd)
110 {
111 Uint32 tempchar;
112 Uint16 numberOfBytes = 0;
113 const Uint16* oldsrc = src;
114 tempchar = *src++;
115 if (tempchar >= FIRST_HIGH_SURROGATE
116 && tempchar <= LAST_HIGH_SURROGATE)
117 david 1.1 {
118 if (src < srcEnd)
119 {
120 Uint32 tempchar2 = *src;
121 if (tempchar2 >= FIRST_LOW_SURROGATE &&
122 tempchar2 <= LAST_LOW_SURROGATE)
123 {
124 tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift)
125 + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase;
126 ++src;
127 }
128 }
129 else
130 {
131 --src;
132 returnCode = -1;
133 break;
134 }
135 }
136 if (tempchar < (Uint32)0x80)
137 {
138 david 1.1 numberOfBytes = 1;
139 }
140 else if (tempchar < (Uint32)0x800)
141 {
142 numberOfBytes = 2;
143 }
144 else if (tempchar < (Uint32)0x10000)
145 {
146 numberOfBytes = 3;
147 }
148 else if (tempchar < (Uint32)0x200000)
149 {
150 numberOfBytes = 4;
151 }
152 else
153 {
154 numberOfBytes = 2;
155 tempchar = REPLACEMENT_CHARACTER;
156 }
157
158 tgt += numberOfBytes;
159 david 1.1 if (tgt > tgtEnd)
160 {
161 src = oldsrc;
162 tgt -= numberOfBytes;
163 returnCode = -1;
164 break;
165 }
166
167 switch (numberOfBytes)
168 {
169 case 4:
170 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
171 tempchar >>= 6;
172 case 3:
173 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
174 tempchar >>= 6;
175 case 2:
176 *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
177 tempchar >>= 6;
178 case 1:
179 *--tgt = (Uint8)(tempchar | firstByteMark[numberOfBytes]);
180 david 1.1 }
181 tgt += numberOfBytes;
182 }
183 *srcHead = src;
184 *tgtHead = tgt;
185 return returnCode;
186 }
187
188 int UTF8toUTF16 (const Uint8** srcHead,
189 const Uint8* srcEnd,
190 Uint16** tgtHead,
191 Uint16* tgtEnd)
192 {
193 int returnCode = 0;
194 const Uint8* src = *srcHead;
195 Uint16* tgt = *tgtHead;
196 while (src < srcEnd)
197 {
198 Uint32 tempchar = 0;
199 Uint16 moreBytes = trailingBytesForUTF8[*src];
200 if (src + moreBytes >= srcEnd)
201 david 1.1 {
202 returnCode = -1;
203 break;
204 }
205 switch (moreBytes)
206 {
207 case 3:
208 tempchar += *src++;
209 tempchar <<= 6;
210 case 2:
211 tempchar += *src++;
212 tempchar <<= 6;
213 case 1:
214 tempchar += *src++;
215 tempchar <<= 6;
216 case 0:
217 tempchar += *src++;
218 }
219 tempchar -= offsetsFromUTF8[moreBytes];
220
221 if (tgt >= tgtEnd)
222 david 1.1 {
223 src -= (moreBytes+1);
224 returnCode = -1; break;
225 }
226 if (tempchar <= MAX_BYTE)
227 {
228 if ((tempchar >= FIRST_HIGH_SURROGATE &&
229 tempchar <= LAST_LOW_SURROGATE) ||
230 ((tempchar & 0xFFFE) == 0xFFFE))
231 {
232 *tgt++ = REPLACEMENT_CHARACTER;
233 }
234 else
235 {
236 *tgt++ = (Uint16)tempchar;
237 }
238 }
239 else if (tempchar > MAX_UTF16)
240 {
241 *tgt++ = REPLACEMENT_CHARACTER;
242 }
243 david 1.1 else
244 {
245 if (tgt + 1 >= tgtEnd)
246 {
247 src -= (moreBytes+1);
248 returnCode = -1;
249 break;
250 }
251 tempchar -= halfBase;
252 *tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE);
253 *tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE);
254 }
255 }
256 *srcHead = src;
257 *tgtHead = tgt;
258 return returnCode;
259 }
260 PEGASUS_NAMESPACE_END
|