1 karl 1.98 //%2005////////////////////////////////////////////////////////////////////////
|
2 mike 1.27 //
|
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 mike 1.27 //
12 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
14 // deal in the Software without restriction, including without limitation the
15 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
16 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is
17 // furnished to do so, subject to the following conditions:
|
18 david.dillard 1.105 //
|
19 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
20 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
21 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
22 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
23 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
25 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28 //==============================================================================
29 //
|
30 mike 1.111.6.12 // Author: Mike Brasher (mbrasher@austin.rr.com)
|
31 mike 1.27 //
|
32 mike 1.111.6.1 // Modified By:
33 // Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
34 // Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
35 // David Dillard, VERITAS Software Corp. (david.dillard@veritas.com)
36 // Mike Brasher (mike-brasher@austin.rr.com)
|
37 mike 1.27 //
38 //%/////////////////////////////////////////////////////////////////////////////
39
|
40 mike 1.111.6.1 #include <cassert>
|
41 kumpf 1.48 #include "InternalException.h"
|
42 david 1.69 #include "CommonUTF.h"
|
43 mike 1.111.6.1 #include "MessageLoader.h"
|
44 mike 1.111.6.2 #include "StringRep.h"
|
45 david 1.69
46 #ifdef PEGASUS_HAS_ICU
|
47 chuck 1.99 #include <unicode/ustring.h>
48 #include <unicode/uchar.h>
|
49 david 1.69 #endif
50
|
51 mike 1.27 PEGASUS_NAMESPACE_BEGIN
52
|
53 mike 1.111.6.1 //==============================================================================
54 //
55 // Compile-time macros (undefined by default).
56 //
57 // PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
58 //
59 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
60 //
61 //==============================================================================
62
63 //==============================================================================
|
64 kumpf 1.39 //
|
65 mike 1.111.6.1 // File-scope definitions:
|
66 kumpf 1.54 //
|
67 mike 1.111.6.1 //==============================================================================
68
|
69 mike 1.111.6.10 // Note: this table is much faster than the system toupper(). Please do not
70 // change.
71
|
72 mike 1.111.6.6 const Uint8 _toUpperTable[256] =
|
73 mike 1.111.6.1 {
74 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
75 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
76 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
77 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
78 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
79 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
80 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
81 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
82 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
83 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
84 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
85 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
86 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
87 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
88 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
89 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
90 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
91 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
92 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
93 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
94 mike 1.111.6.1 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
95 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
96 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
97 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
98 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
99 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
100 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
101 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
102 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
103 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
104 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
105 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
106 };
107
|
108 mike 1.111.6.10 // Note: this table is much faster than the system tulower(). Please do not
109 // change.
110
|
111 mike 1.111.6.6 const Uint8 _toLowerTable[256] =
|
112 mike 1.111.6.1 {
113 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
114 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
115 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
116 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
117 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
118 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
119 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
120 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
121 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
122 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
123 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
124 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
125 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
126 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
127 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
128 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
129 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
130 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
131 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
132 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
133 mike 1.111.6.1 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
134 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
135 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
136 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
137 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
138 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
139 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
140 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
141 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
142 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
143 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
144 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
145 };
|
146 kumpf 1.54
|
147 mike 1.111.6.10 // Converts 16-bit characters to upper case. This routine is faster than the
148 // system toupper(). Please do not change.
|
149 mike 1.111.6.6 inline Uint16 _toUpper(Uint16 x)
|
150 kumpf 1.54 {
|
151 mike 1.111.6.6 return (x & 0xFF00) ? x : _toUpperTable[x];
|
152 kumpf 1.54 }
153
|
154 mike 1.111.6.10 // Converts 16-bit characters to lower case. This routine is faster than the
155 // system toupper(). Please do not change.
|
156 mike 1.111.6.6 inline Uint16 _toLower(Uint16 x)
|
157 kumpf 1.54 {
|
158 mike 1.111.6.6 return (x & 0xFF00) ? x : _toLowerTable[x];
|
159 mike 1.111.6.1 }
|
160 kumpf 1.82
|
161 mike 1.111.6.6 // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
|
162 mike 1.111.6.5 static Uint32 _roundUpToPow2(Uint32 x)
|
163 mike 1.111.6.1 {
|
164 mike 1.111.6.7 #ifndef PEGASUS_STRING_NO_THROW
165
|
166 mike 1.111.6.6 if (x > 0x0FFFFFFF)
|
167 mike 1.111.6.8 throw PEGASUS_STD(bad_alloc)();
|
168 mike 1.111.6.6
|
169 mike 1.111.6.7 #endif
170
|
171 mike 1.111.6.1 if (x < 8)
|
172 mike 1.111.6.8 return 8;
|
173 mike 1.111.6.1
174 x--;
175 x |= (x >> 1);
176 x |= (x >> 2);
177 x |= (x >> 4);
178 x |= (x >> 8);
179 x |= (x >> 16);
180 x++;
181
182 return x;
183 }
184
185 template<class P, class Q>
186 static void _copy(P* p, const Q* q, size_t n)
187 {
|
188 mike 1.111.6.10 // The following employs loop unrolling for efficiency. Please do not
189 // eliminate.
|
190 mike 1.111.6.1
191 while (n >= 8)
|
192 kumpf 1.82 {
|
193 mike 1.111.6.8 p[0] = q[0];
194 p[1] = q[1];
195 p[2] = q[2];
196 p[3] = q[3];
197 p[4] = q[4];
198 p[5] = q[5];
199 p[6] = q[6];
200 p[7] = q[7];
201 p += 8;
202 q += 8;
203 n -= 8;
|
204 kumpf 1.82 }
|
205 mike 1.111.6.1
206 while (n >= 4)
207 {
|
208 mike 1.111.6.8 p[0] = q[0];
209 p[1] = q[1];
210 p[2] = q[2];
211 p[3] = q[3];
212 p += 4;
213 q += 4;
214 n -= 4;
|
215 mike 1.111.6.1 }
216
217 while (n--)
|
218 mike 1.111.6.8 *p++ = *q++;
|
219 kumpf 1.54 }
220
|
221 mike 1.111.6.1 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
|
222 kumpf 1.54 {
|
223 mike 1.111.6.10 // The following employs loop unrolling for efficiency. Please do not
224 // eliminate.
225
|
226 mike 1.111.6.1 while (n >= 4)
227 {
|
228 mike 1.111.6.8 if (s[0] == c)
229 return (Uint16*)s;
230 if (s[1] == c)
231 return (Uint16*)&s[1];
232 if (s[2] == c)
233 return (Uint16*)&s[2];
234 if (s[3] == c)
235 return (Uint16*)&s[3];
|
236 mike 1.111.6.1
|
237 mike 1.111.6.8 n -= 4;
238 s += 4;
|
239 mike 1.111.6.1 }
240
241 if (n)
242 {
|
243 mike 1.111.6.8 if (*s == c)
244 return (Uint16*)s;
245 s++;
246 n--;
|
247 mike 1.111.6.1 }
248
249 if (n)
250 {
|
251 mike 1.111.6.8 if (*s == c)
252 return (Uint16*)s;
253 s++;
254 n--;
|
255 mike 1.111.6.1 }
256
257 if (n && *s == c)
|
258 mike 1.111.6.8 return (Uint16*)s;
|
259 mike 1.111.6.1
260 // Not found!
261 return 0;
|
262 kumpf 1.54 }
263
|
264 mike 1.111.6.1 static int _compare(const Uint16* s1, const Uint16* s2)
|
265 kumpf 1.54 {
|
266 mike 1.111.6.1 while (*s1 && *s2)
|
267 kumpf 1.82 {
|
268 mike 1.111.6.1 int r = *s1++ - *s2++;
269
270 if (r)
271 return r;
|
272 kumpf 1.82 }
|
273 mike 1.111.6.1
274 if (*s2)
275 return -1;
276 else if (*s1)
277 return 1;
278
279 return 0;
|
280 kumpf 1.54 }
281
|
282 mike 1.111.6.6 static int _compareNoUTF8(const Uint16* s1, const char* s2)
|
283 kumpf 1.56 {
|
284 mike 1.111.6.1 Uint16 c1;
285 Uint16 c2;
286
287 do
|
288 kumpf 1.81 {
|
289 mike 1.111.6.8 c1 = *s1++;
290 c2 = *s2++;
|
291 mike 1.111.6.1
|
292 mike 1.111.6.8 if (c1 == 0)
293 return c1 - c2;
|
294 kumpf 1.81 }
|
295 mike 1.111.6.1 while (c1 == c2);
296
297 return c1 - c2;
|
298 kumpf 1.56 }
299
|
300 mike 1.111.6.1 static int _compare(const Uint16* s1, const Uint16* s2, size_t n)
|
301 kumpf 1.54 {
|
302 mike 1.111.6.1 // This should only be called when s1 and s2 have the same length.
303
304 while (n-- && (*s1++ - *s2++) == 0)
|
305 mike 1.111.6.8 ;
|
306 mike 1.111.6.1
307 return s1[-1] - s2[-1];
|
308 kumpf 1.54 }
309
|
310 mike 1.111.6.1 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
311 {
312 memcpy(s1, s2, n * sizeof(Uint16));
313 }
|
314 kumpf 1.39
|
315 mike 1.111.6.10 void StringThrowOutOfBounds()
|
316 mike 1.111.6.1 {
317 throw IndexOutOfBoundsException();
318 }
|
319 mike 1.27
|
320 mike 1.111.6.10 inline void _checkNullPointer(const void* ptr)
|
321 mike 1.27 {
|
322 mike 1.111.6.10 #ifdef PEGASUS_STRING_NO_THROW
323
|
324 mike 1.111.6.1 if (!ptr)
|
325 mike 1.111.6.8 throw NullPointer();
|
326 mike 1.111.6.10
|
327 mike 1.111.6.1 #endif
|
328 mike 1.111.6.10 }
329
330 static void _StringThrowBadUTF8(Uint32 index)
331 {
332 MessageLoaderParms parms(
333 "Common.String.BAD_UTF8",
334 "The byte sequence starting at index $0 "
335 "is not valid UTF-8 encoding.",
336 index);
337 throw Exception(parms);
338 }
|
339 mike 1.111.6.1
|
340 mike 1.111.6.10 static size_t _copyFromUTF8(
341 Uint16* dest,
342 const char* src,
343 size_t n,
344 size_t& utf8_error_index)
|
345 mike 1.111.6.1 {
346 Uint16* p = dest;
347 const Uint8* q = (const Uint8*)src;
|
348 mike 1.27
|
349 mike 1.111.6.1 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
350 // Use loop-unrolling.
|
351 mike 1.27
|
352 mike 1.111.6.1 while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
353 {
|
354 mike 1.111.6.8 p[0] = q[0];
355 p[1] = q[1];
356 p[2] = q[2];
357 p[3] = q[3];
358 p[4] = q[4];
359 p[5] = q[5];
360 p[6] = q[6];
361 p[7] = q[7];
362 p += 8;
363 q += 8;
364 n -= 8;
|
365 mike 1.111.6.1 }
366
367 while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
368 {
|
369 mike 1.111.6.8 p[0] = q[0];
370 p[1] = q[1];
371 p[2] = q[2];
372 p[3] = q[3];
373 p += 4;
374 q += 4;
375 n -= 4;
|
376 mike 1.111.6.1 }
377
378 switch (n)
379 {
|
380 mike 1.111.6.8 case 0:
381 return p - dest;
382 case 1:
383 if (q[0] < 128)
384 {
385 p[0] = q[0];
386 return p + 1 - dest;
387 }
388 break;
389 case 2:
390 if (((q[0]|q[1]) & 0x80) == 0)
391 {
392 p[0] = q[0];
393 p[1] = q[1];
394 return p + 2 - dest;
395 }
396 break;
397 case 3:
398 if (((q[0]|q[1]|q[2]) & 0x80) == 0)
399 {
400 p[0] = q[0];
401 mike 1.111.6.8 p[1] = q[1];
402 p[2] = q[2];
403 return p + 3 - dest;
404 }
405 break;
|
406 mike 1.111.6.1 }
407
408 // Process remaining characters.
409
410 while (n)
411 {
|
412 mike 1.111.6.8 // Optimize for 7-bit ASCII case.
|
413 mike 1.111.6.1
|
414 mike 1.111.6.8 if (*q < 128)
415 {
416 *p++ = *q++;
417 n--;
418 }
419 else
420 {
421 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
422
423 if (c > n || !isValid_U8(q, c) ||
424 UTF8toUTF16(&q, q + c, &p, p + n) != 0)
425 {
|
426 mike 1.111.6.10 utf8_error_index = q - (const Uint8*)src;
427 return size_t(-1);
|
428 mike 1.111.6.8 }
|
429 mike 1.111.6.1
|
430 mike 1.111.6.8 n -= c;
431 }
|
432 mike 1.111.6.1 }
433
434 return p - dest;
435 }
436
437 // Note: dest must be at least three times src (plus an extra byte for
438 // terminator).
|
439 mike 1.111.6.6 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
|
440 mike 1.111.6.1 {
|
441 mike 1.111.6.10 // The following employs loop unrolling for efficiency. Please do not
442 // eliminate.
443
|
444 mike 1.111.6.1 const Uint16* q = src;
445 Uint8* p = (Uint8*)dest;
446
447 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
448 {
|
449 mike 1.111.6.8 p[0] = q[0];
450 p[1] = q[1];
451 p[2] = q[2];
452 p[3] = q[3];
453 p += 4;
454 q += 4;
455 n -= 4;
|
456 mike 1.111.6.1 }
457
458 switch (n)
459 {
|
460 mike 1.111.6.8 case 0:
461 return p - (Uint8*)dest;
462 case 1:
463 if (q[0] < 128)
464 {
465 p[0] = q[0];
466 return p + 1 - (Uint8*)dest;
467 }
468 break;
469 case 2:
470 if (q[0] < 128 && q[1] < 128)
471 {
472 p[0] = q[0];
473 p[1] = q[1];
474 return p + 2 - (Uint8*)dest;
475 }
476 break;
477 case 3:
478 if (q[0] < 128 && q[1] < 128 && q[2] < 128)
479 {
480 p[0] = q[0];
481 mike 1.111.6.8 p[1] = q[1];
482 p[2] = q[2];
483 return p + 3 - (Uint8*)dest;
484 }
485 break;
|
486 mike 1.111.6.1 }
487
488 // If this line was reached, there must be characters greater than 128.
489
490 UTF16toUTF8(&q, q + n, &p, p + 3 * n);
|
491 mike 1.27
|
492 mike 1.111.6.1 return p - (Uint8*)dest;
493 }
494
|
495 mike 1.111.6.10 static inline size_t _convert(
496 Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
|
497 mike 1.111.6.1 {
498 #ifdef PEGASUS_STRING_NO_UTF8
499 _copy(p, q, n);
|
500 mike 1.27 return n;
|
501 mike 1.111.6.1 #else
|
502 mike 1.111.6.10 return _copyFromUTF8(p, q, n, utf8_error_index);
|
503 mike 1.111.6.1 #endif
|
504 mike 1.27 }
505
|
506 mike 1.111.6.1 //==============================================================================
|
507 chuck 1.102 //
|
508 mike 1.111.6.1 // class CString
|
509 chuck 1.102 //
|
510 mike 1.111.6.1 //==============================================================================
511
512 CString::CString(const CString& cstr) : _rep(0)
|
513 chuck 1.102 {
|
514 mike 1.111.6.1 if (cstr._rep)
|
515 chuck 1.102 {
|
516 mike 1.111.6.8 size_t n = strlen(cstr._rep) + 1;
|
517 mike 1.111.6.1 _rep = (char*)operator new(n);
|
518 mike 1.111.6.8 memcpy(_rep, cstr._rep, n);
|
519 mike 1.111.6.1 }
520 }
521
522 CString& CString::operator=(const CString& cstr)
523 {
524 if (&cstr != this)
525 {
526 if (_rep)
|
527 david.dillard 1.105 {
|
528 mike 1.111.6.1 operator delete(_rep);
529 _rep = 0;
|
530 chuck 1.102 }
|
531 mike 1.111.6.1
532 if (cstr._rep)
|
533 chuck 1.102 {
|
534 mike 1.111.6.8 size_t n = strlen(cstr._rep) + 1;
|
535 mike 1.111.6.1 _rep = (char*)operator new(n);
|
536 mike 1.111.6.8 memcpy(_rep, cstr._rep, n);
|
537 chuck 1.102 }
|
538 mike 1.111.6.1 }
|
539 chuck 1.102
|
540 mike 1.111.6.1 return *this;
541 }
542
543 //==============================================================================
544 //
545 // class StringRep
546 //
547 //==============================================================================
548
|
549 mike 1.111.6.6 StringRep StringRep::_emptyRep;
|
550 chuck 1.102
|
551 mike 1.111.6.1 inline StringRep* StringRep::alloc(size_t cap)
552 {
|
553 mike 1.111.6.7 #ifndef PEGASUS_STRING_NO_THROW
554
|
555 mike 1.111.6.6 // Any string bigger than this is seriously suspect.
556 if (cap > 0x0FFFFFFF)
|
557 mike 1.111.6.8 throw PEGASUS_STD(bad_alloc)();
|
558 mike 1.111.6.6
|
559 mike 1.111.6.7 #endif
560
|
561 mike 1.111.6.1 StringRep* rep = (StringRep*)::operator new(
|
562 mike 1.111.6.8 sizeof(StringRep) + cap * sizeof(Uint16));
|
563 mike 1.111.6.1 rep->cap = cap;
|
564 mike 1.111.6.12 new(&rep->refs) NewAtomicInt(1);
|
565 mike 1.111.6.1
566 return rep;
|
567 chuck 1.102 }
568
|
569 mike 1.111.6.1 static inline void _reserve(StringRep*& rep, Uint32 cap)
|
570 kumpf 1.43 {
|
571 mike 1.111.6.12 if (cap > rep->cap || rep->refs.get() != 1)
|
572 mike 1.111.6.1 {
|
573 mike 1.111.6.8 size_t n = _roundUpToPow2(cap);
574 StringRep* newRep = StringRep::alloc(n);
575 newRep->size = rep->size;
576 _copy(newRep->data, rep->data, rep->size + 1);
577 StringRep::unref(rep);
578 rep = newRep;
|
579 mike 1.111.6.1 }
580 }
|
581 kumpf 1.43
|
582 mike 1.111.6.1 StringRep* StringRep::create(const Uint16* data, size_t size)
583 {
584 StringRep* rep = StringRep::alloc(size);
585 rep->size = size;
586 _copy(rep->data, data, size);
587 rep->data[size] = '\0';
588 return rep;
589 }
590
|
591 mike 1.111.6.6 StringRep* StringRep::copyOnWrite(StringRep* rep)
|
592 mike 1.111.6.1 {
593 // Return a new copy of rep. Release rep.
594
|
595 mike 1.111.6.6 StringRep* newRep = StringRep::alloc(rep->size);
596 newRep->size = rep->size;
597 _copy(newRep->data, rep->data, rep->size);
598 newRep->data[newRep->size] = '\0';
|
599 mike 1.111.6.1 StringRep::unref(rep);
|
600 mike 1.111.6.6 return newRep;
|
601 mike 1.111.6.1 }
|
602 kumpf 1.43
|
603 mike 1.111.6.1 StringRep* StringRep::create(const char* data, size_t size)
|
604 mike 1.27 {
|
605 mike 1.111.6.1 StringRep* rep = StringRep::alloc(size);
|
606 mike 1.111.6.10 Uint32 utf8_error_index;
607 rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
608
609 #ifndef PEGASUS_STRING_NO_THROW
610 if (rep->size == size_t(-1))
611 {
612 StringRep::free(rep);
613 _StringThrowBadUTF8(utf8_error_index);
614 }
615 #endif
616
|
617 mike 1.111.6.1 rep->data[rep->size] = '\0';
618
619 return rep;
|
620 mike 1.27 }
621
|
622 mike 1.111.6.1 StringRep* StringRep::createASCII7(const char* data, size_t size)
|
623 mike 1.27 {
|
624 mike 1.111.6.1 StringRep* rep = StringRep::alloc(size);
625 _copy((Uint16*)rep->data, data, size);
626 rep->data[rep->size = size] = '\0';
627 return rep;
|
628 kumpf 1.39 }
|
629 tony 1.66
|
630 mike 1.111.6.1 Uint32 StringRep::length(const Uint16* str)
631 {
632 // Note: We could unroll this but it is rarely called.
633
634 const Uint16* end = (Uint16*)str;
635
636 while (*end++)
|
637 mike 1.111.6.8 ;
|
638 mike 1.111.6.1
639 return end - str - 1;
640 }
641
642 //==============================================================================
643 //
644 // class String
645 //
646 //==============================================================================
647
648 const String String::EMPTY;
|
649 mike 1.27
|
650 kumpf 1.39 String::String(const String& str, Uint32 n)
651 {
|
652 mike 1.111.6.6 _checkBounds(n, str._rep->size);
|
653 mike 1.111.6.1 _rep = StringRep::create(str._rep->data, n);
|
654 kumpf 1.39 }
655
656 String::String(const Char16* str)
657 {
|
658 mike 1.111.6.6 _checkNullPointer(str);
|
659 mike 1.111.6.1 _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
|
660 mike 1.27 }
661
|
662 kumpf 1.39 String::String(const Char16* str, Uint32 n)
663 {
|
664 mike 1.111.6.6 _checkNullPointer(str);
|
665 mike 1.111.6.1 _rep = StringRep::create((Uint16*)str, n);
|
666 kumpf 1.39 }
667
668 String::String(const char* str)
|
669 mike 1.27 {
|
670 mike 1.111.6.6 _checkNullPointer(str);
|
671 mike 1.111.6.10
672 // Set this just in case create() throws an exception.
673 _rep = &StringRep::_emptyRep;
|
674 mike 1.111.6.1 _rep = StringRep::create(str, strlen(str));
675 }
|
676 david.dillard 1.105
|
677 mike 1.111.6.1 String::String(const char* str, String::ASCII7Tag tag)
678 {
|
679 mike 1.111.6.6 _checkNullPointer(str);
|
680 mike 1.111.6.1 _rep = StringRep::createASCII7(str, strlen(str));
|
681 mike 1.27 }
682
|
683 kumpf 1.39 String::String(const char* str, Uint32 n)
|
684 mike 1.27 {
|
685 mike 1.111.6.6 _checkNullPointer(str);
|
686 mike 1.111.6.10
687 // Set this just in case create() throws an exception.
688 _rep = &StringRep::_emptyRep;
|
689 mike 1.111.6.1 _rep = StringRep::create(str, n);
690 }
|
691 david.dillard 1.105
|
692 mike 1.111.6.1 String::String(const char* str, size_t n, String::ASCII7Tag tag)
693 {
|
694 mike 1.111.6.6 _checkNullPointer(str);
|
695 mike 1.111.6.1 _rep = StringRep::createASCII7(str, n);
|
696 kumpf 1.39 }
|
697 mike 1.27
|
698 mike 1.111.6.1 String::String(const String& s1, const String& s2)
|
699 kumpf 1.39 {
|
700 mike 1.111.6.1 size_t n1 = s1._rep->size;
701 size_t n2 = s2._rep->size;
702 size_t n = n1 + n2;
703 _rep = StringRep::alloc(n);
704 _copy(_rep->data, s1._rep->data, n1);
705 _copy(_rep->data + n1, s2._rep->data, n2);
706 _rep->size = n;
707 _rep->data[n] = '\0';
|
708 mike 1.27 }
709
|
710 mike 1.111.6.1 String::String(const String& s1, const char* s2)
|
711 mike 1.27 {
|
712 mike 1.111.6.6 _checkNullPointer(s2);
|
713 mike 1.111.6.1 size_t n1 = s1._rep->size;
714 size_t n2 = strlen(s2);
715 _rep = StringRep::alloc(n1 + n2);
716 _copy(_rep->data, s1._rep->data, n1);
|
717 mike 1.111.6.10 size_t utf8_error_index;
718 size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
719
720 #ifndef PEGASUS_STRING_NO_THROW
721 if (tmp == size_t(-1))
722 {
723 StringRep::free(_rep);
724 _rep = &StringRep::_emptyRep;
725 _StringThrowBadUTF8(utf8_error_index);
726 }
727 #endif
728
729 _rep->size = n1 + tmp;
|
730 mike 1.111.6.1 _rep->data[_rep->size] = '\0';
|
731 mike 1.27 }
732
|
733 mike 1.111.6.1 String::String(const char* s1, const String& s2)
|
734 mike 1.27 {
|
735 mike 1.111.6.6 _checkNullPointer(s1);
|
736 mike 1.111.6.1 size_t n1 = strlen(s1);
737 size_t n2 = s2._rep->size;
738 _rep = StringRep::alloc(n1 + n2);
|
739 mike 1.111.6.10 size_t utf8_error_index;
740 size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
741
742 #ifndef PEGASUS_STRING_NO_THROW
743 if (tmp == size_t(-1))
744 {
745 StringRep::free(_rep);
746 _rep = &StringRep::_emptyRep;
747 _StringThrowBadUTF8(utf8_error_index);
748 }
749 #endif
750
751 _rep->size = n2 + tmp;
|
752 mike 1.111.6.1 _copy(_rep->data + n1, s2._rep->data, n2);
753 _rep->data[_rep->size] = '\0';
|
754 mike 1.27 }
755
|
756 mike 1.111.6.1 String& String::assign(const String& str)
|
757 mike 1.27 {
|
758 mike 1.111.6.1 if (_rep != str._rep)
|
759 david.dillard 1.105 {
|
760 mike 1.111.6.8 StringRep::unref(_rep);
761 StringRep::ref(_rep = str._rep);
|
762 david.dillard 1.105 }
763
|
764 mike 1.27 return *this;
765 }
766
767 String& String::assign(const Char16* str, Uint32 n)
768 {
|
769 mike 1.111.6.6 _checkNullPointer(str);
|
770 mike 1.111.6.1
|
771 mike 1.111.6.12 if (n > _rep->cap || _rep->refs.get() != 1)
|
772 david.dillard 1.105 {
|
773 mike 1.111.6.8 StringRep::unref(_rep);
774 _rep = StringRep::alloc(n);
|
775 david.dillard 1.105 }
776
|
777 mike 1.111.6.1 _rep->size = n;
778 _copy(_rep->data, (Uint16*)str, n);
779 _rep->data[n] = '\0';
780
|
781 mike 1.27 return *this;
782 }
783
|
784 mike 1.111.6.1 String& String::assign(const char* str, Uint32 n)
|
785 chuck 1.102 {
|
786 mike 1.111.6.6 _checkNullPointer(str);
|
787 mike 1.111.6.1
|
788 mike 1.111.6.12 if (n > _rep->cap || _rep->refs.get() != 1)
|
789 david.dillard 1.105 {
|
790 mike 1.111.6.8 StringRep::unref(_rep);
791 _rep = StringRep::alloc(n);
|
792 david.dillard 1.105 }
793
|
794 mike 1.111.6.10 size_t utf8_error_index;
795 _rep->size = _convert(_rep->data, str, n, utf8_error_index);
796
797 #ifndef PEGASUS_STRING_NO_THROW
798 if (_rep->size == size_t(-1))
799 {
800 StringRep::free(_rep);
801 _rep = &StringRep::_emptyRep;
802 _StringThrowBadUTF8(utf8_error_index);
803 }
804 #endif
805
|
806 mike 1.111.6.1 _rep->data[_rep->size] = 0;
807
|
808 chuck 1.102 return *this;
809 }
810
|
811 mike 1.111.6.1 String& String::assignASCII7(const char* str, Uint32 n)
|
812 mike 1.27 {
|
813 mike 1.111.6.6 _checkNullPointer(str);
|
814 mike 1.111.6.1
|
815 mike 1.111.6.12 if (n > _rep->cap || _rep->refs.get() != 1)
|
816 david.dillard 1.105 {
|
817 mike 1.111.6.8 StringRep::unref(_rep);
818 _rep = StringRep::alloc(n);
|
819 david.dillard 1.105 }
820
|
821 mike 1.111.6.1 _copy(_rep->data, str, n);
822 _rep->data[_rep->size = n] = 0;
823
|
824 mike 1.27 return *this;
825 }
826
|
827 kumpf 1.39 void String::clear()
828 {
|
829 mike 1.111.6.1 if (_rep->size)
830 {
|
831 mike 1.111.6.12 if (_rep->refs.get() == 1)
|
832 mike 1.111.6.8 {
833 _rep->size = 0;
834 _rep->data[0] = '\0';
835 }
836 else
837 {
838 StringRep::unref(_rep);
839 _rep = &StringRep::_emptyRep;
840 }
|
841 mike 1.111.6.1 }
|
842 kumpf 1.39 }
843
|
844 mike 1.111.6.1 void String::reserveCapacity(Uint32 cap)
|
845 kumpf 1.39 {
|
846 mike 1.111.6.1 _reserve(_rep, cap);
|
847 kumpf 1.39 }
848
|
849 mike 1.111.6.1 CString String::getCString() const
850 {
|
851 mike 1.111.6.10 // A UTF8 string can have three times as many characters as its UTF16
852 // counterpart, so we allocate extra memory for the worst case. In the
853 // best case, we may need only one third of the memory allocated. But
854 // downsizing the string afterwards is expensive and unecessary since
855 // CString objects are usually short-lived (disappearing after only a few
856 // instructions). CString objects are typically created on the stack as
857 // means to obtain a char* pointer.
858
|
859 mike 1.111.6.1 #ifdef PEGASUS_STRING_NO_UTF8
860 char* str = (char*)operator new(_rep->size + 1);
861 _copy(str, _rep->data, _rep->size);
862 str[_rep->size] = '\0';
863 return CString(str);
|
864 gs.keenan 1.110 #else
|
865 mike 1.111.6.1 Uint32 n = 3 * _rep->size;
866 char* str = (char*)operator new(n + 1);
|
867 mike 1.111.6.6 size_t size = _copyToUTF8(str, _rep->data, _rep->size);
|
868 mike 1.111.6.1 str[size] = '\0';
869 return CString(str);
|
870 gs.keenan 1.110 #endif
|
871 kumpf 1.39 }
872
|
873 mike 1.111.6.1 String& String::append(const Char16* str, Uint32 n)
|
874 kumpf 1.39 {
|
875 mike 1.111.6.6 _checkNullPointer(str);
|
876 kumpf 1.39
|
877 mike 1.111.6.6 size_t oldSize = _rep->size;
878 size_t newSize = oldSize + n;
879 _reserve(_rep, newSize);
880 _copy(_rep->data + oldSize, (Uint16*)str, n);
881 _rep->size = newSize;
882 _rep->data[newSize] = '\0';
|
883 mike 1.27
|
884 mike 1.111.6.1 return *this;
|
885 mike 1.27 }
886
|
887 mike 1.111.6.1 String& String::append(const String& str)
|
888 mike 1.27 {
|
889 mike 1.111.6.1 return append((Char16*)str._rep->data, str._rep->size);
|
890 mike 1.27 }
891
|
892 mike 1.111.6.1 String& String::append(const char* str, Uint32 size)
|
893 kumpf 1.39 {
|
894 mike 1.111.6.6 _checkNullPointer(str);
|
895 mike 1.111.6.1
|
896 mike 1.111.6.6 size_t oldSize = _rep->size;
897 size_t cap = oldSize + size;
|
898 mike 1.111.6.1
899 _reserve(_rep, cap);
|
900 mike 1.111.6.10 size_t utf8_error_index;
901 size_t tmp = _convert(
902 (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
903
904 #ifndef PEGASUS_STRING_NO_THROW
905 if (tmp == size_t(-1))
906 {
907 StringRep::free(_rep);
908 _rep = &StringRep::_emptyRep;
909 _StringThrowBadUTF8(utf8_error_index);
910 }
911 #endif
912
913 _rep->size += tmp;
|
914 mike 1.111.6.1 _rep->data[_rep->size] = '\0';
|
915 kumpf 1.39
|
916 mike 1.27 return *this;
917 }
918
|
919 mike 1.111.6.1 void String::remove(Uint32 index, Uint32 n)
|
920 mike 1.27 {
|
921 mike 1.111.6.1 if (n == PEG_NOT_FOUND)
922 n = _rep->size - index;
|
923 mike 1.27
|
924 mike 1.111.6.6 _checkBounds(index + n, _rep->size);
|
925 mike 1.27
|
926 mike 1.111.6.12 if (_rep->refs.get() != 1)
|
927 mike 1.111.6.8 _rep = StringRep::copyOnWrite(_rep);
|
928 mike 1.27
|
929 mike 1.111.6.1 assert(index + n <= _rep->size);
930
931 size_t rem = _rep->size - (index + n);
932 Uint16* data = _rep->data;
933
934 if (rem)
935 memmove(data + index, data + index + n, rem * sizeof(Uint16));
936
937 _rep->size -= n;
938 data[_rep->size] = '\0';
|
939 mike 1.27 }
940
|
941 mike 1.111.6.1 String String::subString(Uint32 index, Uint32 n) const
|
942 mike 1.27 {
|
943 mike 1.111.6.1 // Note: this implementation is very permissive but used for
944 // backwards compatibility.
945
946 if (index < _rep->size)
|
947 mike 1.27 {
|
948 mike 1.111.6.8 if (n == PEG_NOT_FOUND || n > _rep->size - index)
949 n = _rep->size - index;
|
950 mike 1.27
|
951 mike 1.111.6.8 return String((Char16*)_rep->data + index, n);
|
952 mike 1.27 }
|
953 david.dillard 1.105
954 return String();
|
955 mike 1.27 }
956
957 Uint32 String::find(Char16 c) const
958 {
|
959 mike 1.111.6.1 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
|
960 mike 1.27
|
961 mike 1.111.6.1 if (p)
|
962 mike 1.111.6.8 return p - _rep->data;
|
963 mike 1.27
964 return PEG_NOT_FOUND;
965 }
966
|
967 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
|
968 mike 1.30 {
|
969 mike 1.111.6.6 _checkBounds(index, _rep->size);
|
970 mike 1.30
|
971 mike 1.111.6.1 if (index >= _rep->size)
|
972 mike 1.111.6.8 return PEG_NOT_FOUND;
|
973 mike 1.111.6.1
974 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
975
976 if (p)
|
977 mike 1.111.6.8 return p - _rep->data;
|
978 mike 1.30
979 return PEG_NOT_FOUND;
980 }
981
|
982 mike 1.111.6.6 Uint32 StringFindAux(
|
983 mike 1.111.6.5 const StringRep* _rep, const Char16* s, Uint32 n)
|
984 mike 1.27 {
|
985 mike 1.111.6.6 _checkNullPointer(s);
|
986 mike 1.27
|
987 mike 1.111.6.1 const Uint16* data = _rep->data;
988 size_t rem = _rep->size;
|
989 mike 1.30
|
990 mike 1.111.6.1 while (n <= rem)
|
991 mike 1.27 {
|
992 mike 1.111.6.8 Uint16* p = (Uint16*)_find(data, rem, s[0]);
|
993 david.dillard 1.105
|
994 mike 1.111.6.8 if (!p)
995 break;
|
996 mike 1.111.6.1
|
997 mike 1.111.6.8 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
998 return p - _rep->data;
|
999 mike 1.111.6.1
|
1000 mike 1.111.6.8 p++;
1001 rem -= p - data;
1002 data = p;
|
1003 mike 1.27 }
|
1004 mike 1.111.6.1
|
1005 mike 1.27 return PEG_NOT_FOUND;
1006 }
1007
|
1008 mike 1.111.6.1 Uint32 String::find(const char* s) const
1009 {
|
1010 mike 1.111.6.6 _checkNullPointer(s);
|
1011 mike 1.111.6.1
1012 // Note: could optimize away creation of temporary, but this is rarely
1013 // called.
1014 return find(String(s));
1015 }
1016
|
1017 mike 1.27 Uint32 String::reverseFind(Char16 c) const
1018 {
|
1019 mike 1.111.6.1 Uint16 x = c;
1020 Uint16* p = _rep->data;
1021 Uint16* q = _rep->data + _rep->size;
|
1022 mike 1.27
|
1023 mike 1.111.6.1 while (q != p)
|
1024 mike 1.27 {
|
1025 mike 1.111.6.8 if (*--q == x)
1026 return q - p;
|
1027 mike 1.27 }
1028
1029 return PEG_NOT_FOUND;
1030 }
1031
1032 void String::toLower()
1033 {
|
1034 david 1.69 #ifdef PEGASUS_HAS_ICU
|
1035 mike 1.111.6.1
|
1036 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
1037 david 1.90 {
|
1038 mike 1.111.6.12 if (_rep->refs.get() != 1)
|
1039 mike 1.111.6.8 _rep = StringRep::copyOnWrite(_rep);
|
1040 mike 1.111.6.1
|
1041 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
1042 mike 1.111.6.1 // Since context-sensitive casing looks at adjacent chars, this
1043 // prevents optimizations where the us-ascii is converted before
1044 // calling ICU.
|
1045 yi.zhou 1.108 // The string may shrink or expand after the convert.
1046
|
1047 mike 1.111.6.8 //// First calculate size of resulting string. u_strToLower() returns
1048 //// only the size when zero is passed as the destination size argument.
|
1049 mike 1.111.6.1
|
1050 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
1051
|
1052 mike 1.111.6.6 int32_t newSize = u_strToLower(
|
1053 mike 1.111.6.8 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
|
1054 mike 1.111.6.1
1055 err = U_ZERO_ERROR;
|
1056 chuck 1.99
|
1057 mike 1.111.6.8 //// Reserve enough space for the result.
|
1058 mike 1.111.6.1
|
1059 mike 1.111.6.8 if ((Uint32)newSize > _rep->cap)
1060 _reserve(_rep, newSize);
|
1061 mike 1.111.6.1
|
1062 mike 1.111.6.8 //// Perform the conversion (overlapping buffers are allowed).
|
1063 yi.zhou 1.108
|
1064 mike 1.111.6.6 u_strToLower((UChar*)_rep->data, newSize,
|
1065 mike 1.111.6.8 (UChar*)_rep->data, _rep->size, NULL, &err);
|
1066 mike 1.111.6.1
|
1067 mike 1.111.6.8 _rep->size = newSize;
1068 return;
|
1069 david 1.90 }
|
1070 mike 1.111.6.1
1071 #endif /* PEGASUS_HAS_ICU */
1072
|
1073 mike 1.111.6.12 if (_rep->refs.get() != 1)
|
1074 mike 1.111.6.8 _rep = StringRep::copyOnWrite(_rep);
|
1075 mike 1.111.6.1
1076 Uint16* p = _rep->data;
1077 size_t n = _rep->size;
1078
1079 for (; n--; p++)
|
1080 david 1.90 {
|
1081 mike 1.111.6.8 if (!(*p & 0xFF00))
1082 *p = _toLower(*p);
|
1083 mike 1.27 }
|
1084 kumpf 1.39 }
1085
|
1086 chuck 1.99 void String::toUpper()
|
1087 david 1.90 {
1088 #ifdef PEGASUS_HAS_ICU
|
1089 mike 1.111.6.1
|
1090 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
1091 chuck 1.99 {
|
1092 mike 1.111.6.12 if (_rep->refs.get() != 1)
|
1093 mike 1.111.6.8 _rep = StringRep::copyOnWrite(_rep);
|
1094 mike 1.111.6.1
|
1095 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
1096 mike 1.111.6.1 // Since context-sensitive casing looks at adjacent chars, this
1097 // prevents optimizations where the us-ascii is converted before
1098 // calling ICU.
|
1099 yi.zhou 1.108 // The string may shrink or expand after the convert.
1100
|
1101 mike 1.111.6.8 //// First calculate size of resulting string. u_strToUpper() returns
1102 //// only the size when zero is passed as the destination size argument.
|
1103 mike 1.111.6.1
|
1104 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
1105
|
1106 mike 1.111.6.6 int32_t newSize = u_strToUpper(
|
1107 mike 1.111.6.8 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
|
1108 chuck 1.99
|
1109 mike 1.111.6.1 err = U_ZERO_ERROR;
|
1110 yi.zhou 1.108
|
1111 mike 1.111.6.8 //// Reserve enough space for the result.
|
1112 david 1.90
|
1113 mike 1.111.6.8 if ((Uint32)newSize > _rep->cap)
1114 _reserve(_rep, newSize);
|
1115 kumpf 1.39
|
1116 mike 1.111.6.8 //// Perform the conversion (overlapping buffers are allowed).
|
1117 mike 1.27
|
1118 mike 1.111.6.6 u_strToUpper((UChar*)_rep->data, newSize,
|
1119 mike 1.111.6.8 (UChar*)_rep->data, _rep->size, NULL, &err);
|
1120 mike 1.111.6.1
|
1121 mike 1.111.6.8 _rep->size = newSize;
|
1122 mike 1.111.6.1
|
1123 mike 1.111.6.8 return;
|
1124 mike 1.27 }
1125
|
1126 mike 1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1127
|
1128 mike 1.111.6.12 if (_rep->refs.get() != 1)
|
1129 mike 1.111.6.8 _rep = StringRep::copyOnWrite(_rep);
|
1130 mike 1.111.6.1
1131 Uint16* p = _rep->data;
1132 size_t n = _rep->size;
1133
1134 for (; n--; p++)
|
1135 mike 1.111.6.8 *p = _toUpper(*p);
|
1136 mike 1.27 }
1137
|
1138 mike 1.111.6.1 int String::compare(const String& s1, const String& s2, Uint32 n)
|
1139 mike 1.30 {
|
1140 mike 1.111.6.1 assert(n <= s1._rep->size);
1141 assert(n <= s2._rep->size);
|
1142 kumpf 1.43
|
1143 mike 1.111.6.1 // Ignoring error in which n is greater than s1.size() or s2.size()
1144 return _compare(s1._rep->data, s2._rep->data, n);
1145 }
|
1146 mike 1.30
|
1147 mike 1.111.6.1 int String::compare(const String& s1, const String& s2)
1148 {
1149 return _compare(s1._rep->data, s2._rep->data);
1150 }
|
1151 mike 1.30
|
1152 mike 1.111.6.1 int String::compare(const String& s1, const char* s2)
1153 {
|
1154 mike 1.111.6.6 _checkNullPointer(s2);
|
1155 mike 1.30
|
1156 mike 1.111.6.1 #ifdef PEGASUS_STRING_NO_UTF8
|
1157 mike 1.111.6.6 return _compareNoUTF8(s1._rep->data, s2);
|
1158 mike 1.111.6.1 #else
1159 // ATTN: optimize this!
1160 return String::compare(s1, String(s2));
1161 #endif
|
1162 mike 1.30 }
1163
|
1164 mike 1.111.6.1 int String::compareNoCase(const String& str1, const String& str2)
|
1165 kumpf 1.40 {
|
1166 david 1.69 #ifdef PEGASUS_HAS_ICU
|
1167 mike 1.111.6.1
|
1168 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1169 {
|
1170 mike 1.111.6.1 return u_strcasecmp(
|
1171 mike 1.111.6.8 str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
|
1172 yi.zhou 1.108 }
|
1173 kumpf 1.40
|
1174 mike 1.111.6.1 #endif /* PEGASUS_HAS_ICU */
|
1175 kumpf 1.40
|
1176 mike 1.111.6.1 const Uint16* s1 = str1._rep->data;
1177 const Uint16* s2 = str2._rep->data;
1178
1179 while (*s1 && *s2)
1180 {
|
1181 mike 1.111.6.6 int r = _toLower(*s1++) - _toLower(*s2++);
|
1182 kumpf 1.40
|
1183 david.dillard 1.105 if (r)
1184 return r;
|
1185 kumpf 1.40 }
1186
|
1187 mike 1.111.6.1 if (*s2)
|
1188 david.dillard 1.105 return -1;
|
1189 mike 1.111.6.1 else if (*s1)
|
1190 david.dillard 1.105 return 1;
|
1191 kumpf 1.40
1192 return 0;
1193 }
1194
|
1195 mike 1.111.6.6 Boolean StringEqualNoCase(const String& s1, const String& s2)
|
1196 mike 1.27 {
|
1197 david 1.69 #ifdef PEGASUS_HAS_ICU
|
1198 kumpf 1.39
|
1199 mike 1.111.6.1 return String::compareNoCase(s1, s2) == 0;
|
1200 kumpf 1.39
|
1201 mike 1.111.6.1 #else /* PEGASUS_HAS_ICU */
1202
|
1203 mike 1.111.6.10 // The following employs loop unrolling for efficiency. Please do not
1204 // eliminate.
1205
|
1206 mike 1.111.6.5 Uint16* p = (Uint16*)s1.getChar16Data();
1207 Uint16* q = (Uint16*)s2.getChar16Data();
1208 Uint32 n = s2.size();
|
1209 mike 1.111.6.1
1210 while (n >= 8)
1211 {
|
1212 mike 1.111.6.8 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1213 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1214 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1215 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1216 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1217 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1218 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1219 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1220 {
1221 return false;
1222 }
1223
1224 n -= 8;
1225 p += 8;
1226 q += 8;
|
1227 mike 1.111.6.1 }
1228
1229 while (n >= 4)
1230 {
|
1231 mike 1.111.6.8 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1232 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1233 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1234 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1235 {
1236 return false;
1237 }
1238
1239 n -= 4;
1240 p += 4;
1241 q += 4;
|
1242 mike 1.111.6.1 }
|
1243 mike 1.27
|
1244 kumpf 1.39 while (n--)
1245 {
|
1246 mike 1.111.6.8 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1247 return false;
|
1248 mike 1.111.6.1
|
1249 mike 1.111.6.8 p++;
1250 q++;
|
1251 kumpf 1.39 }
|
1252 mike 1.28
|
1253 kumpf 1.39 return true;
|
1254 david 1.69
|
1255 mike 1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1256 }
|
1257 mike 1.27
|
1258 mike 1.111.6.1 Boolean String::equalNoCase(const String& s1, const char* s2)
|
1259 david 1.69 {
|
1260 mike 1.111.6.6 _checkNullPointer(s2);
|
1261 david 1.69
|
1262 mike 1.111.6.1 #if defined(PEGASUS_HAS_ICU)
|
1263 david 1.69
|
1264 mike 1.111.6.1 return String::equalNoCase(s1, String(s2));
|
1265 david.dillard 1.105
|
1266 mike 1.111.6.1 #elif defined(PEGASUS_STRING_NO_UTF8)
|
1267 david 1.71
|
1268 mike 1.111.6.1 const Uint16* p1 = (Uint16*)s1._rep->data;
1269 const char* p2 = s2;
1270 size_t n = s1._rep->size;
|
1271 kumpf 1.42
|
1272 mike 1.111.6.1 while (n--)
1273 {
|
1274 mike 1.111.6.8 if (!*p2)
1275 return false;
|
1276 karl 1.36
|
1277 mike 1.111.6.8 if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1278 return false;
|
1279 mike 1.111.6.1 }
|
1280 david.dillard 1.105
|
1281 mike 1.111.6.1 if (*p2)
|
1282 mike 1.111.6.8 return false;
|
1283 mike 1.111.6.1
1284 return true;
|
1285 david.dillard 1.105
|
1286 mike 1.111.6.1 #else /* PEGASUS_HAS_ICU */
|
1287 chuck 1.78
|
1288 mike 1.111.6.1 // ATTN: optimize this!
1289 return String::equalNoCase(s1, String(s2));
|
1290 david.dillard 1.105
|
1291 mike 1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1292 }
|
1293 karl 1.36
|
1294 mike 1.111.6.1 Boolean String::equal(const String& s1, const String& s2)
|
1295 karl 1.36 {
|
1296 mike 1.111.6.1 return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,
|
1297 mike 1.111.6.8 s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
|
1298 karl 1.36 }
1299
|
1300 mike 1.111.6.1 Boolean String::equal(const String& s1, const char* s2)
|
1301 karl 1.36 {
|
1302 mike 1.111.6.1 #ifdef PEGASUS_STRING_NO_UTF8
|
1303 kumpf 1.39
|
1304 mike 1.111.6.6 _checkNullPointer(s2);
|
1305 kumpf 1.39
|
1306 mike 1.111.6.1 const Uint16* p = (Uint16*)s1._rep->data;
1307 const char* q = s2;
|
1308 kumpf 1.39
|
1309 mike 1.111.6.1 while (*p && *q)
1310 {
|
1311 mike 1.111.6.8 if (*p++ != Uint16(*q++))
1312 return false;
|
1313 mike 1.111.6.1 }
|
1314 kumpf 1.39
|
1315 mike 1.111.6.1 return !(*p || *q);
|
1316 kumpf 1.39
|
1317 mike 1.111.6.1 #else /* PEGASUS_STRING_NO_UTF8 */
|
1318 kumpf 1.39
|
1319 mike 1.111.6.1 return String::equal(s1, String(s2));
1320
1321 #endif /* PEGASUS_STRING_NO_UTF8 */
|
1322 kumpf 1.39 }
1323
|
1324 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
|
1325 kumpf 1.39 {
|
1326 david 1.69 #if defined(PEGASUS_OS_OS400)
|
1327 mike 1.111.6.1
|
1328 david 1.93 CString cstr = str.getCString();
|
1329 david 1.69 const char* utf8str = cstr;
1330 os << utf8str;
|
1331 mike 1.111.6.1 return os;
1332 #else
1333
1334 #if defined(PEGASUS_HAS_ICU)
|
1335 david 1.69
|
1336 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1337 {
|
1338 david.dillard 1.105 char *buf = NULL;
1339 const int size = str.size() * 6;
|
1340 mike 1.111.6.1 UnicodeString UniStr(
|
1341 mike 1.111.6.8 (const UChar *)str.getChar16Data(), (int32_t)str.size());
|
1342 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf);
1343 buf = new char[bufsize+1];
1344 UniStr.extract(0,bufsize,buf);
1345 os << buf;
1346 os.flush();
1347 delete [] buf;
|
1348 mike 1.111.6.1 return os;
|
1349 yi.zhou 1.108 }
|
1350 mike 1.111.6.1
1351 #endif // PEGASUS_HAS_ICU
1352
1353 for (Uint32 i = 0, n = str.size(); i < n; i++)
|
1354 yi.zhou 1.108 {
|
1355 mike 1.111.6.1 Uint16 code = str[i];
|
1356 david.dillard 1.105
|
1357 mike 1.111.6.8 if (code > 0 && !(code & 0xFF00))
1358 os << char(code);
|
1359 mike 1.111.6.1 else
|
1360 mike 1.111.6.8 {
|
1361 mike 1.111.6.1 // Print in hex format:
1362 char buffer[8];
1363 sprintf(buffer, "\\x%04X", code);
1364 os << buffer;
|
1365 david.dillard 1.105 }
|
1366 yi.zhou 1.108 }
|
1367 kumpf 1.39
1368 return os;
|
1369 mike 1.111.6.1 #endif // PEGASUS_OS_OS400
|
1370 kumpf 1.39 }
1371
|
1372 mike 1.111.6.6 void StringAppendCharAux(StringRep*& _rep)
|
1373 kumpf 1.39 {
|
1374 mike 1.111.6.1 StringRep* tmp;
|
1375 kumpf 1.39
|
1376 mike 1.111.6.1 if (_rep->cap)
1377 {
|
1378 mike 1.111.6.8 tmp = StringRep::alloc(2 * _rep->cap);
1379 tmp->size = _rep->size;
1380 _copy(tmp->data, _rep->data, _rep->size);
|
1381 mike 1.111.6.1 }
1382 else
1383 {
|
1384 mike 1.111.6.8 tmp = StringRep::alloc(8);
1385 tmp->size = 0;
|
1386 mike 1.111.6.1 }
|
1387 kumpf 1.39
|
1388 mike 1.111.6.1 StringRep::unref(_rep);
1389 _rep = tmp;
|
1390 kumpf 1.39 }
1391
|
1392 mike 1.111.6.1 PEGASUS_NAMESPACE_END
|
1393 kumpf 1.39
|
1394 mike 1.111.6.1 /*
1395 ================================================================================
|
1396 kumpf 1.39
|
1397 mike 1.111.6.1 String optimizations:
1398
1399 1. Added mechanism allowing certain functions to be inlined only when
|
1400 mike 1.111.6.8 used by internal Pegasus modules. External modules (i.e., providers)
1401 link to a non-inline version, which allows for binary compatibility.
|
1402 mike 1.111.6.1
1403 2. Implemented copy-on-write with atomic increment/decrement. This
|
1404 mike 1.111.6.8 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1405 for the 'ni1000' benchmark.
|
1406 mike 1.111.6.1
|
1407 mike 1.111.6.8 3. Employed loop unrolling in several places. For example, see:
|
1408 mike 1.111.6.1
|
1409 mike 1.111.6.8 static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
|
1410 mike 1.111.6.1
1411 4. Used the "empty-rep" optimization (described in whitepaper from the
|
1412 mike 1.111.6.8 GCC Developers Summit). This reduced default construction to a simple
1413 pointer assignment.
|
1414 mike 1.111.6.1
|
1415 mike 1.111.6.8 inline String::String() : _rep(&_emptyRep) { }
|
1416 mike 1.111.6.1
1417 5. Implemented Uint16 versions of toupper() and tolower() using tables.
|
1418 mike 1.111.6.8 For example:
|
1419 mike 1.111.6.1
|
1420 mike 1.111.6.8 static const char _upper[] =
1421 {
1422 0,1,2,...255
1423 };
1424
1425 inline Uint16 _toUpper(Uint16 x)
1426 {
1427 return (x & 0xFF00) ? x : _upper[x];
1428 }
|
1429 mike 1.111.6.1
|
1430 mike 1.111.6.8 This outperforms the system implementation by avoiding an anding
1431 operation.
|
1432 mike 1.111.6.1
1433 6. Implemented char* version of the following member functions to
|
1434 mike 1.111.6.8 eliminate unecessary creation of anonymous string objects
1435 (temporaries).
|
1436 mike 1.111.6.1
|
1437 mike 1.111.6.8 String(const String& s1, const char* s2);
1438 String(const char* s1, const String& s2);
1439 String& String::operator=(const char* str);
1440 Uint32 String::find(const char* s) const;
1441 bool String::equal(const String& s1, const char* s2);
1442 static int String::compare(const String& s1, const char* s2);
1443 String& String::append(const char* str);
1444 String& String::append(const char* str, Uint32 size);
1445 static bool String::equalNoCase(const String& s1, const char* s2);
1446 String& operator=(const char* str)
1447 String& String::assign(const char* str)
1448 String& String::append(const char* str)
1449 Boolean operator==(const String& s1, const char* s2)
1450 Boolean operator==(const char* s1, const String& s2)
1451 Boolean operator!=(const String& s1, const char* s2)
1452 Boolean operator!=(const char* s1, const String& s2)
1453 Boolean operator<(const String& s1, const char* s2)
1454 Boolean operator<(const char* s1, const String& s2)
1455 Boolean operator>(const String& s1, const char* s2)
1456 Boolean operator>(const char* s1, const String& s2)
1457 Boolean operator<=(const String& s1, const char* s2)
1458 mike 1.111.6.8 Boolean operator<=(const char* s1, const String& s2)
1459 Boolean operator>=(const String& s1, const char* s2)
1460 Boolean operator>=(const char* s1, const String& s2)
1461 String operator+(const String& s1, const char* s2)
1462 String operator+(const char* s1, const String& s2)
|
1463 mike 1.111.6.1
|
1464 mike 1.111.6.5 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next
|
1465 mike 1.111.6.1 power of two (algorithm from the book "Hacker's Delight").
1466
|
1467 mike 1.111.6.8 static Uint32 _roundUpToPow2(Uint32 x)
1468 {
1469 if (x < 8)
1470 return 8;
1471
1472 x--;
1473 x |= (x >> 1);
1474 x |= (x >> 2);
1475 x |= (x >> 4);
1476 x |= (x >> 8);
1477 x |= (x >> 16);
1478 x++;
|
1479 mike 1.111.6.1
|
1480 mike 1.111.6.8 return x;
1481 }
|
1482 mike 1.111.6.1
1483 8. Implemented "concatenating constructors" to eliminate temporaries
|
1484 mike 1.111.6.8 created by operator+(). This scheme employs the "return-value
1485 optimization" described by Stan Lippman.
|
1486 mike 1.111.6.1
|
1487 mike 1.111.6.8 inline String operator+(const String& s1, const String& s2)
1488 {
1489 return String(s1, s2, 0);
1490 }
|
1491 mike 1.111.6.1
1492 9. Experimented to find the optimial initial size for a short string.
|
1493 mike 1.111.6.8 Eight seems to offer the best tradeoff between space and time.
|
1494 mike 1.111.6.1
1495 10. Inlined all members of the Char16 class.
1496
1497 11. Used Uint16 internally in the String class. This showed no improvememnt
|
1498 mike 1.111.6.8 since Char16 was already fully inlined and was essentially reduced to
1499 Uint16 in any case.
|
1500 mike 1.111.6.1
1501 12. Implemented conditional logic (#if) allowing error checking logic to
|
1502 mike 1.111.6.8 be excluded to better performance. Examples include bounds checking
1503 and null-pointer checking.
|
1504 mike 1.111.6.1
1505 13. Used memcpy() and memcmp() where possible. These are implemented using
|
1506 mike 1.111.6.8 the rep family of intructions under Intel and are much faster.
|
1507 mike 1.111.6.1
1508 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
|
1509 mike 1.111.6.8 copy routine overhead.
|
1510 mike 1.111.6.1
1511 15. Added ASCII7 form of the constructor and assign().
1512
|
1513 mike 1.111.6.8 String s("hello world", String::ASCII7);
|
1514 mike 1.111.6.1
|
1515 mike 1.111.6.8 s.assignASCII7("hello world");
|
1516 mike 1.111.6.1
|
1517 mike 1.111.6.8 This avoids slower UTF8 processing when not needed.
|
1518 mike 1.111.6.1
|
1519 mike 1.111.6.5 ================================================================================
1520
1521 TO-DO:
1522
|
1523 mike 1.111.6.8 (+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES
|
1524 mike 1.111.6.5
|
1525 mike 1.111.6.8 (+) [DONE] Submit BUG-2754 (Windows buffer limit).
|
1526 mike 1.111.6.5
1527 (+) [DONE] Eliminate char versions of find() and append().
1528
1529 (+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h
1530
1531 (+) [DONE] Change _next_pow_2() to _roundUpToPow2().
1532
1533 (+) [DONE] Change '99' to '2' in StringRep constructor (comment as well).
1534
1535 (+) [DONE] Comment StringRep allocation layout.
1536
1537 (+) [DONE] Conceal private inline functions.
|
1538 mike 1.111.6.1
|
1539 mike 1.111.6.5 (+) [DONE] Shorten inclusion of StringInline.h in String.h.
|
1540 mike 1.111.6.1
|
1541 mike 1.111.6.5 (+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get
|
1542 mike 1.111.6.8 rid of altogether.
|
1543 mike 1.111.6.1
|
1544 mike 1.111.6.6 (+) [DONE] useCamelNotationOnAllFunctionNames.
1545
|
1546 mike 1.111.6.7 (+) [DONE] Check for overlow condition in StringRep::alloc().
|
1547 mike 1.111.6.1
|
1548 mike 1.111.6.9 (+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab").
1549
|
1550 mike 1.111.6.10 (+) [DONE] Fix throw-related memory leak.
|
1551 mike 1.111.6.1
|
1552 mike 1.111.6.10 -----------
|
1553 mike 1.111.6.1
|
1554 mike 1.111.6.8 (+) DOC++ String.h
1555
|
1556 mike 1.111.6.5 (+) Look at PEP223 for coding security guidelines.
|
1557 mike 1.111.6.1
|
1558 mike 1.111.6.6 (+) Replace AtomicInt with new Atomic implementation.
|
1559 mike 1.111.6.1
|
1560 mike 1.111.6.11 (+) Hide appendASCII7() function.
|
1561 mike 1.111.6.3
|
1562 mike 1.111.6.1 ================================================================================
1563 */
|