1 karl 1.119 //%2006////////////////////////////////////////////////////////////////////////
|
2 mike 1.27 //
|
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 karl 1.119 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
12 // EMC Corporation; Symantec Corporation; The Open Group.
|
13 mike 1.27 //
14 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
15 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
16 // deal in the Software without restriction, including without limitation the
17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
18 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is
19 // furnished to do so, subject to the following conditions:
|
20 karl 1.119 //
|
21 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
22 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
24 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
27 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30 //==============================================================================
31 //
|
32 mike 1.112 // Author: Mike Brasher (mbrasher@austin.rr.com)
|
33 mike 1.27 //
|
34 david.dillard 1.116 // Modified By:
|
35 mike 1.112 // Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
36 // Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
|
37 david.dillard 1.116 // David Dillard, Symantec Corp. (david_dillard@symantec.com)
|
38 mike 1.112 // Mike Brasher (mike-brasher@austin.rr.com)
|
39 mike 1.27 //
40 //%/////////////////////////////////////////////////////////////////////////////
41
|
42 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
|
43 mike 1.113 #include <cstring>
|
44 kumpf 1.48 #include "InternalException.h"
|
45 david 1.69 #include "CommonUTF.h"
|
46 mike 1.112 #include "MessageLoader.h"
47 #include "StringRep.h"
|
48 david 1.69
49 #ifdef PEGASUS_HAS_ICU
|
50 chuck 1.99 #include <unicode/ustring.h>
51 #include <unicode/uchar.h>
|
52 david 1.69 #endif
53
|
54 mike 1.112 PEGASUS_NAMESPACE_BEGIN
|
55 mike 1.28
|
56 mike 1.112 //==============================================================================
57 //
58 // Compile-time macros (undefined by default).
59 //
60 // PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
|
61 david.dillard 1.116 //
|
62 mike 1.112 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
63 //
64 //==============================================================================
|
65 mike 1.27
|
66 mike 1.112 //==============================================================================
|
67 kumpf 1.39 //
|
68 mike 1.112 // File-scope definitions:
|
69 kumpf 1.54 //
|
70 mike 1.112 //==============================================================================
71
72 // Note: this table is much faster than the system toupper(). Please do not
73 // change.
|
74 kumpf 1.54
|
75 david.dillard 1.116 const Uint8 _toUpperTable[256] =
|
76 kumpf 1.54 {
|
77 mike 1.112 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
78 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
79 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
80 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
81 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
82 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
83 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
84 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
85 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
86 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
87 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
88 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
89 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
90 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
91 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
92 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
93 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
94 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
95 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
96 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
97 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
98 mike 1.112 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
99 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
100 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
101 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
102 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
103 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
104 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
105 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
106 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
107 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
108 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
109 };
110
111 // Note: this table is much faster than the system tulower(). Please do not
112 // change.
113
|
114 david.dillard 1.116 const Uint8 _toLowerTable[256] =
|
115 mike 1.112 {
116 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
117 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
118 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
119 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
120 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
121 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
122 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
123 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
124 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
125 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
126 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
127 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
128 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
129 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
130 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
131 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
132 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
133 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
134 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
135 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
136 mike 1.112 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
137 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
138 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
139 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
140 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
141 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
142 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
143 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
144 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
145 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
146 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
147 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
148 };
149
150 // Converts 16-bit characters to upper case. This routine is faster than the
151 // system toupper(). Please do not change.
152 inline Uint16 _toUpper(Uint16 x)
153 {
154 return (x & 0xFF00) ? x : _toUpperTable[x];
|
155 kumpf 1.54 }
156
|
157 mike 1.112 // Converts 16-bit characters to lower case. This routine is faster than the
158 // system toupper(). Please do not change.
159 inline Uint16 _toLower(Uint16 x)
|
160 kumpf 1.54 {
|
161 mike 1.112 return (x & 0xFF00) ? x : _toLowerTable[x];
162 }
163
164 // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
165 static Uint32 _roundUpToPow2(Uint32 x)
166 {
167 #ifndef PEGASUS_STRING_NO_THROW
168
|
169 dave.sudlik 1.119.2.1 // Check for potential overflow in x
170 PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
|
171 mike 1.112
172 #endif
173
174 if (x < 8)
175 return 8;
176
177 x--;
178 x |= (x >> 1);
179 x |= (x >> 2);
180 x |= (x >> 4);
181 x |= (x >> 8);
182 x |= (x >> 16);
183 x++;
184
185 return x;
186 }
187
188 template<class P, class Q>
189 static void _copy(P* p, const Q* q, size_t n)
190 {
191 // The following employs loop unrolling for efficiency. Please do not
192 mike 1.112 // eliminate.
193
194 while (n >= 8)
195 {
196 p[0] = q[0];
197 p[1] = q[1];
198 p[2] = q[2];
199 p[3] = q[3];
200 p[4] = q[4];
201 p[5] = q[5];
202 p[6] = q[6];
203 p[7] = q[7];
204 p += 8;
205 q += 8;
206 n -= 8;
207 }
208
209 while (n >= 4)
210 {
211 p[0] = q[0];
212 p[1] = q[1];
213 mike 1.112 p[2] = q[2];
214 p[3] = q[3];
215 p += 4;
216 q += 4;
217 n -= 4;
218 }
219
220 while (n--)
221 *p++ = *q++;
222 }
223
224 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
225 {
226 // The following employs loop unrolling for efficiency. Please do not
227 // eliminate.
228
229 while (n >= 4)
230 {
231 if (s[0] == c)
232 return (Uint16*)s;
233 if (s[1] == c)
234 mike 1.112 return (Uint16*)&s[1];
235 if (s[2] == c)
236 return (Uint16*)&s[2];
237 if (s[3] == c)
238 return (Uint16*)&s[3];
|
239 kumpf 1.82
|
240 mike 1.112 n -= 4;
241 s += 4;
242 }
243
244 if (n)
245 {
246 if (*s == c)
247 return (Uint16*)s;
248 s++;
249 n--;
250 }
251
252 if (n)
253 {
254 if (*s == c)
255 return (Uint16*)s;
256 s++;
257 n--;
258 }
259
260 if (n && *s == c)
261 mike 1.112 return (Uint16*)s;
262
263 // Not found!
264 return 0;
265 }
266
267 static int _compare(const Uint16* s1, const Uint16* s2)
268 {
269 while (*s1 && *s2)
270 {
271 int r = *s1++ - *s2++;
272
273 if (r)
274 return r;
275 }
276
277 if (*s2)
278 return -1;
279 else if (*s1)
280 return 1;
281
282 mike 1.112 return 0;
283 }
284
285 static int _compareNoUTF8(const Uint16* s1, const char* s2)
286 {
287 Uint16 c1;
288 Uint16 c2;
289
290 do
291 {
292 c1 = *s1++;
293 c2 = *s2++;
294
295 if (c1 == 0)
296 return c1 - c2;
297 }
298 while (c1 == c2);
299
300 return c1 - c2;
301 }
302
303 mike 1.112 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
304 {
305 memcpy(s1, s2, n * sizeof(Uint16));
306 }
307
308 void StringThrowOutOfBounds()
309 {
310 throw IndexOutOfBoundsException();
311 }
312
313 inline void _checkNullPointer(const void* ptr)
314 {
|
315 kumpf 1.117 #ifndef PEGASUS_STRING_NO_THROW
|
316 mike 1.112
317 if (!ptr)
318 throw NullPointer();
319
320 #endif
321 }
322
323 static void _StringThrowBadUTF8(Uint32 index)
324 {
325 MessageLoaderParms parms(
326 "Common.String.BAD_UTF8",
327 "The byte sequence starting at index $0 "
328 "is not valid UTF-8 encoding.",
329 index);
330 throw Exception(parms);
331 }
332
333 static size_t _copyFromUTF8(
|
334 david.dillard 1.116 Uint16* dest,
335 const char* src,
|
336 mike 1.112 size_t n,
337 size_t& utf8_error_index)
338 {
339 Uint16* p = dest;
340 const Uint8* q = (const Uint8*)src;
341
342 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
343 // Use loop-unrolling.
344
345 while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
346 {
347 p[0] = q[0];
348 p[1] = q[1];
349 p[2] = q[2];
350 p[3] = q[3];
351 p[4] = q[4];
352 p[5] = q[5];
353 p[6] = q[6];
354 p[7] = q[7];
355 p += 8;
356 q += 8;
357 mike 1.112 n -= 8;
358 }
359
360 while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
361 {
362 p[0] = q[0];
363 p[1] = q[1];
364 p[2] = q[2];
365 p[3] = q[3];
366 p += 4;
367 q += 4;
368 n -= 4;
369 }
370
371 switch (n)
372 {
373 case 0:
374 return p - dest;
375 case 1:
376 if (q[0] < 128)
377 {
378 mike 1.112 p[0] = q[0];
379 return p + 1 - dest;
380 }
381 break;
382 case 2:
383 if (((q[0]|q[1]) & 0x80) == 0)
384 {
385 p[0] = q[0];
386 p[1] = q[1];
387 return p + 2 - dest;
388 }
389 break;
390 case 3:
391 if (((q[0]|q[1]|q[2]) & 0x80) == 0)
392 {
393 p[0] = q[0];
394 p[1] = q[1];
395 p[2] = q[2];
396 return p + 3 - dest;
397 }
398 break;
399 mike 1.112 }
400
401 // Process remaining characters.
402
403 while (n)
404 {
405 // Optimize for 7-bit ASCII case.
406
407 if (*q < 128)
408 {
409 *p++ = *q++;
410 n--;
411 }
412 else
413 {
414 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
415
416 if (c > n || !isValid_U8(q, c) ||
417 UTF8toUTF16(&q, q + c, &p, p + n) != 0)
418 {
419 utf8_error_index = q - (const Uint8*)src;
420 mike 1.112 return size_t(-1);
421 }
422
423 n -= c;
424 }
425 }
426
427 return p - dest;
428 }
429
|
430 david.dillard 1.116 // Note: dest must be at least three times src (plus an extra byte for
|
431 mike 1.112 // terminator).
432 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
433 {
434 // The following employs loop unrolling for efficiency. Please do not
435 // eliminate.
436
437 const Uint16* q = src;
438 Uint8* p = (Uint8*)dest;
439
440 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
|
441 kumpf 1.82 {
|
442 mike 1.112 p[0] = q[0];
443 p[1] = q[1];
444 p[2] = q[2];
445 p[3] = q[3];
446 p += 4;
447 q += 4;
448 n -= 4;
|
449 kumpf 1.82 }
|
450 mike 1.112
451 switch (n)
452 {
453 case 0:
454 return p - (Uint8*)dest;
455 case 1:
456 if (q[0] < 128)
457 {
458 p[0] = q[0];
459 return p + 1 - (Uint8*)dest;
460 }
461 break;
462 case 2:
463 if (q[0] < 128 && q[1] < 128)
464 {
465 p[0] = q[0];
466 p[1] = q[1];
467 return p + 2 - (Uint8*)dest;
468 }
469 break;
470 case 3:
471 mike 1.112 if (q[0] < 128 && q[1] < 128 && q[2] < 128)
472 {
473 p[0] = q[0];
474 p[1] = q[1];
475 p[2] = q[2];
476 return p + 3 - (Uint8*)dest;
477 }
478 break;
479 }
480
481 // If this line was reached, there must be characters greater than 128.
482
483 UTF16toUTF8(&q, q + n, &p, p + 3 * n);
484
485 return p - (Uint8*)dest;
|
486 kumpf 1.54 }
487
|
488 mike 1.112 static inline size_t _convert(
489 Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
|
490 kumpf 1.54 {
|
491 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8
492 _copy(p, q, n);
493 return n;
494 #else
495 return _copyFromUTF8(p, q, n, utf8_error_index);
496 #endif
|
497 kumpf 1.54 }
498
|
499 mike 1.112 //==============================================================================
500 //
501 // class CString
502 //
503 //==============================================================================
504
505 CString::CString(const CString& cstr) : _rep(0)
|
506 kumpf 1.54 {
|
507 mike 1.112 if (cstr._rep)
|
508 kumpf 1.82 {
|
509 mike 1.112 size_t n = strlen(cstr._rep) + 1;
510 _rep = (char*)operator new(n);
511 memcpy(_rep, cstr._rep, n);
|
512 kumpf 1.82 }
|
513 kumpf 1.54 }
514
|
515 kumpf 1.56 CString& CString::operator=(const CString& cstr)
516 {
|
517 kumpf 1.82 if (&cstr != this)
|
518 kumpf 1.81 {
|
519 kumpf 1.82 if (_rep)
520 {
|
521 mike 1.112 operator delete(_rep);
|
522 kumpf 1.82 _rep = 0;
523 }
|
524 mike 1.112
|
525 kumpf 1.82 if (cstr._rep)
526 {
|
527 mike 1.112 size_t n = strlen(cstr._rep) + 1;
528 _rep = (char*)operator new(n);
529 memcpy(_rep, cstr._rep, n);
|
530 kumpf 1.82 }
|
531 kumpf 1.81 }
|
532 mike 1.112
|
533 kumpf 1.56 return *this;
534 }
535
|
536 mike 1.112 //==============================================================================
|
537 kumpf 1.54 //
|
538 mike 1.112 // class StringRep
|
539 kumpf 1.39 //
|
540 mike 1.112 //==============================================================================
|
541 kumpf 1.39
|
542 mike 1.112 StringRep StringRep::_emptyRep;
|
543 mike 1.27
|
544 mike 1.112 inline StringRep* StringRep::alloc(size_t cap)
|
545 mike 1.27 {
|
546 mike 1.112 #ifndef PEGASUS_STRING_NO_THROW
|
547 dave.sudlik 1.119.2.1
548 // Check for potential overflow in cap
549 PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
|
550 mike 1.27
|
551 mike 1.112 #endif
|
552 mike 1.27
|
553 mike 1.112 StringRep* rep = (StringRep*)::operator new(
554 sizeof(StringRep) + cap * sizeof(Uint16));
555 rep->cap = cap;
556 new(&rep->refs) AtomicInt(1);
557
558 return rep;
|
559 mike 1.27 }
560
|
561 mike 1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
|
562 chuck 1.102 {
|
563 mike 1.114 if (cap > rep->cap || rep->refs.get() != 1)
|
564 chuck 1.102 {
|
565 mike 1.112 size_t n = _roundUpToPow2(cap);
566 StringRep* newRep = StringRep::alloc(n);
567 newRep->size = rep->size;
568 _copy(newRep->data, rep->data, rep->size + 1);
569 StringRep::unref(rep);
570 rep = newRep;
571 }
572 }
|
573 david.dillard 1.105
|
574 mike 1.112 StringRep* StringRep::create(const Uint16* data, size_t size)
575 {
576 StringRep* rep = StringRep::alloc(size);
577 rep->size = size;
578 _copy(rep->data, data, size);
579 rep->data[size] = '\0';
580 return rep;
581 }
|
582 chuck 1.102
|
583 mike 1.112 StringRep* StringRep::copyOnWrite(StringRep* rep)
584 {
585 // Return a new copy of rep. Release rep.
|
586 chuck 1.102
|
587 mike 1.112 StringRep* newRep = StringRep::alloc(rep->size);
588 newRep->size = rep->size;
589 _copy(newRep->data, rep->data, rep->size);
590 newRep->data[newRep->size] = '\0';
591 StringRep::unref(rep);
592 return newRep;
|
593 chuck 1.102 }
594
|
595 mike 1.112 StringRep* StringRep::create(const char* data, size_t size)
|
596 kumpf 1.43 {
|
597 mike 1.112 StringRep* rep = StringRep::alloc(size);
598 size_t utf8_error_index;
599 rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
600
601 #ifndef PEGASUS_STRING_NO_THROW
602 if (rep->size == size_t(-1))
603 {
604 StringRep::free(rep);
605 _StringThrowBadUTF8(utf8_error_index);
606 }
607 #endif
|
608 kumpf 1.43
|
609 mike 1.112 rep->data[rep->size] = '\0';
|
610 kumpf 1.43
|
611 mike 1.112 return rep;
|
612 mike 1.27 }
613
|
614 mike 1.112 Uint32 StringRep::length(const Uint16* str)
|
615 mike 1.27 {
|
616 mike 1.112 // Note: We could unroll this but it is rarely called.
617
618 const Uint16* end = (Uint16*)str;
619
620 while (*end++)
621 ;
622
623 return end - str - 1;
|
624 kumpf 1.39 }
|
625 tony 1.66
|
626 mike 1.112 //==============================================================================
627 //
628 // class String
629 //
630 //==============================================================================
631
632 const String String::EMPTY;
|
633 mike 1.27
|
634 kumpf 1.39 String::String(const String& str, Uint32 n)
635 {
|
636 mike 1.112 _checkBounds(n, str._rep->size);
637 _rep = StringRep::create(str._rep->data, n);
|
638 kumpf 1.39 }
639
640 String::String(const Char16* str)
641 {
|
642 mike 1.112 _checkNullPointer(str);
643 _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
|
644 mike 1.27 }
645
|
646 kumpf 1.39 String::String(const Char16* str, Uint32 n)
647 {
|
648 mike 1.112 _checkNullPointer(str);
649 _rep = StringRep::create((Uint16*)str, n);
|
650 kumpf 1.39 }
651
652 String::String(const char* str)
|
653 mike 1.27 {
|
654 mike 1.112 _checkNullPointer(str);
|
655 david.dillard 1.105
|
656 mike 1.112 // Set this just in case create() throws an exception.
657 _rep = &StringRep::_emptyRep;
658 _rep = StringRep::create(str, strlen(str));
|
659 mike 1.27 }
660
|
661 kumpf 1.39 String::String(const char* str, Uint32 n)
|
662 mike 1.27 {
|
663 mike 1.112 _checkNullPointer(str);
|
664 david.dillard 1.105
|
665 mike 1.112 // Set this just in case create() throws an exception.
666 _rep = &StringRep::_emptyRep;
667 _rep = StringRep::create(str, n);
|
668 kumpf 1.39 }
|
669 mike 1.27
|
670 mike 1.112 String::String(const String& s1, const String& s2)
|
671 kumpf 1.39 {
|
672 mike 1.112 size_t n1 = s1._rep->size;
673 size_t n2 = s2._rep->size;
674 size_t n = n1 + n2;
675 _rep = StringRep::alloc(n);
676 _copy(_rep->data, s1._rep->data, n1);
677 _copy(_rep->data + n1, s2._rep->data, n2);
678 _rep->size = n;
679 _rep->data[n] = '\0';
|
680 mike 1.27 }
681
|
682 mike 1.112 String::String(const String& s1, const char* s2)
|
683 mike 1.27 {
|
684 mike 1.112 _checkNullPointer(s2);
685 size_t n1 = s1._rep->size;
686 size_t n2 = strlen(s2);
687 _rep = StringRep::alloc(n1 + n2);
688 _copy(_rep->data, s1._rep->data, n1);
689 size_t utf8_error_index;
690 size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
691
692 #ifndef PEGASUS_STRING_NO_THROW
693 if (tmp == size_t(-1))
|
694 kumpf 1.82 {
|
695 mike 1.112 StringRep::free(_rep);
696 _rep = &StringRep::_emptyRep;
697 _StringThrowBadUTF8(utf8_error_index);
|
698 kumpf 1.82 }
|
699 mike 1.112 #endif
700
701 _rep->size = n1 + tmp;
702 _rep->data[_rep->size] = '\0';
|
703 mike 1.27 }
704
|
705 mike 1.112 String::String(const char* s1, const String& s2)
|
706 mike 1.27 {
|
707 mike 1.112 _checkNullPointer(s1);
708 size_t n1 = strlen(s1);
709 size_t n2 = s2._rep->size;
710 _rep = StringRep::alloc(n1 + n2);
711 size_t utf8_error_index;
712 size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
713
714 #ifndef PEGASUS_STRING_NO_THROW
715 if (tmp == size_t(-1))
716 {
717 StringRep::free(_rep);
718 _rep = &StringRep::_emptyRep;
719 _StringThrowBadUTF8(utf8_error_index);
720 }
721 #endif
722
723 _rep->size = n2 + tmp;
724 _copy(_rep->data + n1, s2._rep->data, n2);
725 _rep->data[_rep->size] = '\0';
|
726 mike 1.27 }
727
|
728 mike 1.112 String& String::assign(const String& str)
|
729 mike 1.27 {
|
730 mike 1.112 if (_rep != str._rep)
|
731 david.dillard 1.105 {
|
732 mike 1.112 StringRep::unref(_rep);
733 StringRep::ref(_rep = str._rep);
|
734 david.dillard 1.105 }
735
|
736 mike 1.27 return *this;
737 }
738
739 String& String::assign(const Char16* str, Uint32 n)
740 {
|
741 mike 1.112 _checkNullPointer(str);
742
|
743 mike 1.114 if (n > _rep->cap || _rep->refs.get() != 1)
|
744 david.dillard 1.105 {
|
745 mike 1.112 StringRep::unref(_rep);
746 _rep = StringRep::alloc(n);
|
747 david.dillard 1.105 }
748
|
749 mike 1.112 _rep->size = n;
750 _copy(_rep->data, (Uint16*)str, n);
751 _rep->data[n] = '\0';
752
|
753 mike 1.27 return *this;
754 }
755
|
756 mike 1.112 String& String::assign(const char* str, Uint32 n)
|
757 chuck 1.102 {
|
758 mike 1.112 _checkNullPointer(str);
759
|
760 mike 1.114 if (n > _rep->cap || _rep->refs.get() != 1)
|
761 david.dillard 1.105 {
|
762 mike 1.112 StringRep::unref(_rep);
763 _rep = StringRep::alloc(n);
|
764 david.dillard 1.105 }
765
|
766 mike 1.112 size_t utf8_error_index;
767 _rep->size = _convert(_rep->data, str, n, utf8_error_index);
|
768 chuck 1.102
|
769 mike 1.112 #ifndef PEGASUS_STRING_NO_THROW
770 if (_rep->size == size_t(-1))
|
771 david.dillard 1.105 {
|
772 mike 1.112 StringRep::free(_rep);
773 _rep = &StringRep::_emptyRep;
774 _StringThrowBadUTF8(utf8_error_index);
|
775 david.dillard 1.105 }
|
776 mike 1.112 #endif
777
778 _rep->data[_rep->size] = 0;
|
779 david.dillard 1.105
|
780 mike 1.27 return *this;
781 }
782
|
783 kumpf 1.39 void String::clear()
784 {
|
785 mike 1.112 if (_rep->size)
786 {
|
787 mike 1.114 if (_rep->refs.get() == 1)
|
788 mike 1.112 {
789 _rep->size = 0;
790 _rep->data[0] = '\0';
791 }
792 else
793 {
794 StringRep::unref(_rep);
795 _rep = &StringRep::_emptyRep;
796 }
797 }
|
798 kumpf 1.39 }
799
|
800 mike 1.112 void String::reserveCapacity(Uint32 cap)
|
801 kumpf 1.39 {
|
802 mike 1.112 _reserve(_rep, cap);
|
803 kumpf 1.39 }
804
|
805 mike 1.112 CString String::getCString() const
806 {
|
807 david.dillard 1.116 // A UTF8 string can have three times as many characters as its UTF16
808 // counterpart, so we allocate extra memory for the worst case. In the
|
809 mike 1.112 // best case, we may need only one third of the memory allocated. But
|
810 david.dillard 1.116 // downsizing the string afterwards is expensive and unecessary since
811 // CString objects are usually short-lived (disappearing after only a few
|
812 mike 1.112 // instructions). CString objects are typically created on the stack as
813 // means to obtain a char* pointer.
814
815 #ifdef PEGASUS_STRING_NO_UTF8
816 char* str = (char*)operator new(_rep->size + 1);
817 _copy(str, _rep->data, _rep->size);
818 str[_rep->size] = '\0';
819 return CString(str);
|
820 gs.keenan 1.110 #else
|
821 mike 1.112 Uint32 n = 3 * _rep->size;
822 char* str = (char*)operator new(n + 1);
823 size_t size = _copyToUTF8(str, _rep->data, _rep->size);
824 str[size] = '\0';
825 return CString(str);
|
826 gs.keenan 1.110 #endif
|
827 kumpf 1.39 }
828
|
829 mike 1.112 String& String::append(const Char16* str, Uint32 n)
|
830 kumpf 1.39 {
|
831 mike 1.112 _checkNullPointer(str);
832
833 size_t oldSize = _rep->size;
834 size_t newSize = oldSize + n;
835 _reserve(_rep, newSize);
836 _copy(_rep->data + oldSize, (Uint16*)str, n);
837 _rep->size = newSize;
838 _rep->data[newSize] = '\0';
839
840 return *this;
|
841 kumpf 1.39 }
842
|
843 mike 1.112 String& String::append(const String& str)
|
844 mike 1.27 {
|
845 mike 1.112 return append((Char16*)str._rep->data, str._rep->size);
|
846 mike 1.27 }
847
|
848 mike 1.112 String& String::append(const char* str, Uint32 size)
|
849 mike 1.27 {
|
850 mike 1.112 _checkNullPointer(str);
851
852 size_t oldSize = _rep->size;
853 size_t cap = oldSize + size;
854
855 _reserve(_rep, cap);
856 size_t utf8_error_index;
857 size_t tmp = _convert(
858 (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
859
860 #ifndef PEGASUS_STRING_NO_THROW
861 if (tmp == size_t(-1))
862 {
863 StringRep::free(_rep);
864 _rep = &StringRep::_emptyRep;
865 _StringThrowBadUTF8(utf8_error_index);
866 }
867 #endif
|
868 mike 1.27
|
869 mike 1.112 _rep->size += tmp;
870 _rep->data[_rep->size] = '\0';
|
871 mike 1.27
|
872 kumpf 1.39 return *this;
873 }
874
|
875 mike 1.112 void String::remove(Uint32 index, Uint32 n)
|
876 mike 1.27 {
|
877 mike 1.112 if (n == PEG_NOT_FOUND)
878 n = _rep->size - index;
879
880 _checkBounds(index + n, _rep->size);
881
|
882 mike 1.114 if (_rep->refs.get() != 1)
|
883 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
|
884 mike 1.27
|
885 jim.wunderlich 1.115 PEGASUS_ASSERT(index + n <= _rep->size);
|
886 mike 1.27
|
887 mike 1.112 size_t rem = _rep->size - (index + n);
888 Uint16* data = _rep->data;
|
889 mike 1.27
|
890 mike 1.112 if (rem)
891 memmove(data + index, data + index + n, rem * sizeof(Uint16));
|
892 mike 1.27
|
893 mike 1.112 _rep->size -= n;
894 data[_rep->size] = '\0';
|
895 mike 1.27 }
896
|
897 mike 1.112 String String::subString(Uint32 index, Uint32 n) const
|
898 mike 1.27 {
|
899 mike 1.112 // Note: this implementation is very permissive but used for
900 // backwards compatibility.
901
902 if (index < _rep->size)
|
903 mike 1.27 {
|
904 mike 1.112 if (n == PEG_NOT_FOUND || n > _rep->size - index)
905 n = _rep->size - index;
|
906 mike 1.27
|
907 mike 1.112 return String((Char16*)_rep->data + index, n);
|
908 mike 1.27 }
|
909 david.dillard 1.105
910 return String();
|
911 mike 1.27 }
912
913 Uint32 String::find(Char16 c) const
914 {
|
915 mike 1.112 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
|
916 mike 1.27
|
917 mike 1.112 if (p)
|
918 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
919 mike 1.27
920 return PEG_NOT_FOUND;
921 }
922
|
923 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
|
924 mike 1.30 {
|
925 mike 1.112 _checkBounds(index, _rep->size);
926
927 if (index >= _rep->size)
928 return PEG_NOT_FOUND;
929
930 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
|
931 mike 1.30
|
932 mike 1.112 if (p)
|
933 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
934 mike 1.30
935 return PEG_NOT_FOUND;
936 }
937
|
938 mike 1.112 Uint32 StringFindAux(
939 const StringRep* _rep, const Char16* s, Uint32 n)
|
940 mike 1.27 {
|
941 mike 1.112 _checkNullPointer(s);
|
942 mike 1.27
|
943 mike 1.112 const Uint16* data = _rep->data;
944 size_t rem = _rep->size;
945
946 while (n <= rem)
|
947 mike 1.30 {
|
948 mike 1.112 Uint16* p = (Uint16*)_find(data, rem, s[0]);
949
950 if (!p)
951 break;
|
952 mike 1.30
|
953 mike 1.112 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
|
954 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
955 david.dillard 1.105
|
956 mike 1.112 p++;
957 rem -= p - data;
958 data = p;
|
959 mike 1.27 }
|
960 mike 1.112
|
961 mike 1.27 return PEG_NOT_FOUND;
962 }
963
|
964 mike 1.112 Uint32 String::find(const char* s) const
965 {
966 _checkNullPointer(s);
967
968 // Note: could optimize away creation of temporary, but this is rarely
969 // called.
970 return find(String(s));
971 }
972
|
973 mike 1.27 Uint32 String::reverseFind(Char16 c) const
974 {
|
975 mike 1.112 Uint16 x = c;
976 Uint16* p = _rep->data;
977 Uint16* q = _rep->data + _rep->size;
|
978 mike 1.27
|
979 mike 1.112 while (q != p)
|
980 mike 1.27 {
|
981 mike 1.112 if (*--q == x)
|
982 david.dillard 1.116 return static_cast<Uint32>(q - p);
|
983 mike 1.27 }
984
985 return PEG_NOT_FOUND;
986 }
987
988 void String::toLower()
989 {
|
990 david 1.69 #ifdef PEGASUS_HAS_ICU
|
991 mike 1.112
|
992 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
993 david 1.90 {
|
994 mike 1.114 if (_rep->refs.get() != 1)
|
995 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
996
|
997 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
998 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this
999 // prevents optimizations where the us-ascii is converted before
|
1000 mike 1.112 // calling ICU.
|
1001 yi.zhou 1.108 // The string may shrink or expand after the convert.
1002
|
1003 mike 1.112 //// First calculate size of resulting string. u_strToLower() returns
1004 //// only the size when zero is passed as the destination size argument.
1005
|
1006 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
1007
|
1008 mike 1.112 int32_t newSize = u_strToLower(
1009 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
|
1010 david.dillard 1.116
|
1011 mike 1.112 err = U_ZERO_ERROR;
1012
1013 //// Reserve enough space for the result.
1014
1015 if ((Uint32)newSize > _rep->cap)
1016 _reserve(_rep, newSize);
1017
1018 //// Perform the conversion (overlapping buffers are allowed).
|
1019 chuck 1.99
|
1020 mike 1.112 u_strToLower((UChar*)_rep->data, newSize,
1021 (UChar*)_rep->data, _rep->size, NULL, &err);
|
1022 yi.zhou 1.108
|
1023 mike 1.112 _rep->size = newSize;
1024 return;
|
1025 david 1.90 }
|
1026 mike 1.112
1027 #endif /* PEGASUS_HAS_ICU */
1028
|
1029 mike 1.114 if (_rep->refs.get() != 1)
|
1030 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
1031
1032 Uint16* p = _rep->data;
1033 size_t n = _rep->size;
1034
1035 for (; n--; p++)
|
1036 david 1.90 {
|
1037 mike 1.112 if (!(*p & 0xFF00))
1038 *p = _toLower(*p);
|
1039 mike 1.27 }
|
1040 kumpf 1.39 }
1041
|
1042 chuck 1.99 void String::toUpper()
|
1043 david 1.90 {
1044 #ifdef PEGASUS_HAS_ICU
|
1045 mike 1.112
|
1046 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
1047 chuck 1.99 {
|
1048 mike 1.114 if (_rep->refs.get() != 1)
|
1049 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
1050
|
1051 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
1052 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this
1053 // prevents optimizations where the us-ascii is converted before
|
1054 mike 1.112 // calling ICU.
|
1055 yi.zhou 1.108 // The string may shrink or expand after the convert.
1056
|
1057 mike 1.112 //// First calculate size of resulting string. u_strToUpper() returns
1058 //// only the size when zero is passed as the destination size argument.
1059
|
1060 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
1061
|
1062 mike 1.112 int32_t newSize = u_strToUpper(
1063 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1064
1065 err = U_ZERO_ERROR;
1066
1067 //// Reserve enough space for the result.
1068
1069 if ((Uint32)newSize > _rep->cap)
1070 _reserve(_rep, newSize);
1071
1072 //// Perform the conversion (overlapping buffers are allowed).
1073
1074 u_strToUpper((UChar*)_rep->data, newSize,
1075 (UChar*)_rep->data, _rep->size, NULL, &err);
|
1076 chuck 1.99
|
1077 mike 1.112 _rep->size = newSize;
|
1078 yi.zhou 1.108
|
1079 mike 1.112 return;
|
1080 david 1.91 }
|
1081 mike 1.112
1082 #endif /* PEGASUS_HAS_ICU */
1083
|
1084 mike 1.114 if (_rep->refs.get() != 1)
|
1085 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
1086
1087 Uint16* p = _rep->data;
1088 size_t n = _rep->size;
1089
1090 for (; n--; p++)
1091 *p = _toUpper(*p);
|
1092 david 1.90 }
1093
|
1094 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
|
1095 kumpf 1.39 {
|
1096 kumpf 1.118 const Uint16* p1 = s1._rep->data;
1097 const Uint16* p2 = s2._rep->data;
|
1098 mike 1.27
|
1099 kumpf 1.118 while (n--)
1100 {
1101 int r = *p1++ - *p2++;
1102 if (r)
1103 {
1104 return r;
1105 }
1106 else if (!p1[-1])
1107 {
1108 // We must have encountered a null terminator in both s1 and s2
1109 return 0;
1110 }
1111 }
1112 return 0;
|
1113 mike 1.27 }
1114
|
1115 kumpf 1.43 int String::compare(const String& s1, const String& s2)
|
1116 mike 1.30 {
|
1117 mike 1.112 return _compare(s1._rep->data, s2._rep->data);
1118 }
|
1119 kumpf 1.43
|
1120 mike 1.112 int String::compare(const String& s1, const char* s2)
1121 {
1122 _checkNullPointer(s2);
|
1123 mike 1.30
|
1124 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8
1125 return _compareNoUTF8(s1._rep->data, s2);
1126 #else
1127 // ATTN: optimize this!
1128 return String::compare(s1, String(s2));
1129 #endif
|
1130 mike 1.30 }
1131
|
1132 mike 1.112 int String::compareNoCase(const String& str1, const String& str2)
|
1133 kumpf 1.40 {
|
1134 david 1.69 #ifdef PEGASUS_HAS_ICU
|
1135 mike 1.112
|
1136 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1137 {
|
1138 mike 1.112 return u_strcasecmp(
1139 str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
|
1140 yi.zhou 1.108 }
|
1141 kumpf 1.40
|
1142 mike 1.112 #endif /* PEGASUS_HAS_ICU */
1143
1144 const Uint16* s1 = str1._rep->data;
1145 const Uint16* s2 = str2._rep->data;
1146
1147 while (*s1 && *s2)
|
1148 kumpf 1.40 {
|
1149 mike 1.112 int r = _toLower(*s1++) - _toLower(*s2++);
|
1150 kumpf 1.40
|
1151 david.dillard 1.105 if (r)
1152 return r;
|
1153 kumpf 1.40 }
1154
|
1155 mike 1.112 if (*s2)
|
1156 david.dillard 1.105 return -1;
|
1157 mike 1.112 else if (*s1)
|
1158 david.dillard 1.105 return 1;
|
1159 kumpf 1.40
1160 return 0;
1161 }
1162
|
1163 mike 1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
|
1164 mike 1.27 {
|
1165 mike 1.112 #ifdef PEGASUS_HAS_ICU
1166
1167 return String::compareNoCase(s1, s2) == 0;
1168
1169 #else /* PEGASUS_HAS_ICU */
|
1170 mike 1.27
|
1171 mike 1.112 // The following employs loop unrolling for efficiency. Please do not
1172 // eliminate.
|
1173 kumpf 1.39
|
1174 mike 1.112 Uint16* p = (Uint16*)s1.getChar16Data();
1175 Uint16* q = (Uint16*)s2.getChar16Data();
1176 Uint32 n = s2.size();
1177
1178 while (n >= 8)
1179 {
1180 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1181 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1182 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1183 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1184 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1185 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1186 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1187 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1188 {
1189 return false;
1190 }
|
1191 kumpf 1.39
|
1192 mike 1.112 n -= 8;
1193 p += 8;
1194 q += 8;
1195 }
|
1196 mike 1.27
|
1197 mike 1.112 while (n >= 4)
|
1198 kumpf 1.39 {
|
1199 mike 1.112 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1200 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1201 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1202 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
|
1203 david.dillard 1.105 {
|
1204 mike 1.112 return false;
|
1205 david.dillard 1.105 }
|
1206 mike 1.112
1207 n -= 4;
1208 p += 4;
1209 q += 4;
1210 }
1211
1212 while (n--)
1213 {
1214 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
|
1215 david.dillard 1.105 return false;
|
1216 mike 1.112
1217 p++;
1218 q++;
|
1219 kumpf 1.39 }
|
1220 mike 1.28
|
1221 kumpf 1.39 return true;
|
1222 mike 1.112
1223 #endif /* PEGASUS_HAS_ICU */
|
1224 david 1.69 }
1225
|
1226 mike 1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
|
1227 david 1.69 {
|
1228 mike 1.112 _checkNullPointer(s2);
|
1229 david 1.69
|
1230 mike 1.112 #if defined(PEGASUS_HAS_ICU)
|
1231 david 1.69
|
1232 mike 1.112 return String::equalNoCase(s1, String(s2));
|
1233 david 1.69
|
1234 mike 1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
|
1235 david 1.69
|
1236 mike 1.112 const Uint16* p1 = (Uint16*)s1._rep->data;
1237 const char* p2 = s2;
1238 size_t n = s1._rep->size;
|
1239 david.dillard 1.105
|
1240 mike 1.112 while (n--)
1241 {
1242 if (!*p2)
1243 return false;
|
1244 david 1.71
|
1245 mike 1.112 if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1246 return false;
1247 }
|
1248 kumpf 1.42
|
1249 mike 1.112 if (*p2)
1250 return false;
|
1251 david.dillard 1.116
|
1252 mike 1.112 return true;
|
1253 karl 1.36
|
1254 mike 1.112 #else /* PEGASUS_HAS_ICU */
|
1255 david.dillard 1.105
|
1256 mike 1.112 // ATTN: optimize this!
1257 return String::equalNoCase(s1, String(s2));
|
1258 david.dillard 1.105
|
1259 mike 1.112 #endif /* PEGASUS_HAS_ICU */
1260 }
|
1261 chuck 1.78
|
1262 mike 1.112 Boolean String::equal(const String& s1, const String& s2)
|
1263 karl 1.36 {
|
1264 david.dillard 1.116 return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,
|
1265 mike 1.112 s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
|
1266 karl 1.36 }
1267
|
1268 mike 1.112 Boolean String::equal(const String& s1, const char* s2)
1269 {
1270 #ifdef PEGASUS_STRING_NO_UTF8
|
1271 kumpf 1.35
|
1272 mike 1.112 _checkNullPointer(s2);
|
1273 kumpf 1.39
|
1274 mike 1.112 const Uint16* p = (Uint16*)s1._rep->data;
1275 const char* q = s2;
|
1276 kumpf 1.39
|
1277 mike 1.112 while (*p && *q)
1278 {
1279 if (*p++ != Uint16(*q++))
1280 return false;
1281 }
|
1282 kumpf 1.39
|
1283 mike 1.112 return !(*p || *q);
|
1284 kumpf 1.39
|
1285 mike 1.112 #else /* PEGASUS_STRING_NO_UTF8 */
|
1286 kumpf 1.39
|
1287 mike 1.112 return String::equal(s1, String(s2));
|
1288 kumpf 1.39
|
1289 mike 1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
|
1290 kumpf 1.39 }
1291
|
1292 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
|
1293 kumpf 1.39 {
|
1294 mike 1.112 #if defined(PEGASUS_OS_OS400)
|
1295 david 1.72
|
1296 david 1.93 CString cstr = str.getCString();
|
1297 david 1.69 const char* utf8str = cstr;
|
1298 mike 1.112 os << utf8str;
1299 return os;
|
1300 david.dillard 1.116 #else
|
1301 david 1.69
|
1302 mike 1.112 #if defined(PEGASUS_HAS_ICU)
|
1303 david 1.69
|
1304 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1305 {
|
1306 david.dillard 1.105 char *buf = NULL;
1307 const int size = str.size() * 6;
|
1308 mike 1.112 UnicodeString UniStr(
1309 (const UChar *)str.getChar16Data(), (int32_t)str.size());
|
1310 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf);
1311 buf = new char[bufsize+1];
1312 UniStr.extract(0,bufsize,buf);
1313 os << buf;
1314 os.flush();
1315 delete [] buf;
|
1316 david.dillard 1.116 return os;
|
1317 yi.zhou 1.108 }
|
1318 mike 1.112
|
1319 david.dillard 1.116 #endif // PEGASUS_HAS_ICU
|
1320 mike 1.112
1321 for (Uint32 i = 0, n = str.size(); i < n; i++)
|
1322 yi.zhou 1.108 {
|
1323 mike 1.112 Uint16 code = str[i];
|
1324 david.dillard 1.105
|
1325 mike 1.112 if (code > 0 && !(code & 0xFF00))
1326 os << char(code);
1327 else
1328 {
1329 // Print in hex format:
1330 char buffer[8];
1331 sprintf(buffer, "\\x%04X", code);
1332 os << buffer;
|
1333 david.dillard 1.105 }
|
1334 yi.zhou 1.108 }
|
1335 kumpf 1.39
1336 return os;
|
1337 mike 1.112 #endif // PEGASUS_OS_OS400
|
1338 kumpf 1.39 }
1339
|
1340 mike 1.112 void StringAppendCharAux(StringRep*& _rep)
|
1341 kumpf 1.39 {
|
1342 mike 1.112 StringRep* tmp;
1343
1344 if (_rep->cap)
1345 {
1346 tmp = StringRep::alloc(2 * _rep->cap);
1347 tmp->size = _rep->size;
1348 _copy(tmp->data, _rep->data, _rep->size);
1349 }
1350 else
1351 {
1352 tmp = StringRep::alloc(8);
1353 tmp->size = 0;
1354 }
1355
1356 StringRep::unref(_rep);
1357 _rep = tmp;
|
1358 kumpf 1.39 }
1359
|
1360 mike 1.112 PEGASUS_NAMESPACE_END
1361
1362 /*
1363 ================================================================================
1364
1365 String optimizations:
1366
1367 1. Added mechanism allowing certain functions to be inlined only when
1368 used by internal Pegasus modules. External modules (i.e., providers)
1369 link to a non-inline version, which allows for binary compatibility.
1370
1371 2. Implemented copy-on-write with atomic increment/decrement. This
1372 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1373 for the 'ni1000' benchmark.
1374
1375 3. Employed loop unrolling in several places. For example, see:
1376
1377 static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1378
1379 4. Used the "empty-rep" optimization (described in whitepaper from the
1380 GCC Developers Summit). This reduced default construction to a simple
1381 mike 1.112 pointer assignment.
1382
1383 inline String::String() : _rep(&_emptyRep) { }
1384
1385 5. Implemented Uint16 versions of toupper() and tolower() using tables.
1386 For example:
1387
1388 static const char _upper[] =
1389 {
1390 0,1,2,...255
1391 };
1392
1393 inline Uint16 _toUpper(Uint16 x)
1394 {
1395 return (x & 0xFF00) ? x : _upper[x];
1396 }
1397
|
1398 david.dillard 1.116 This outperforms the system implementation by avoiding an anding
|
1399 mike 1.112 operation.
1400
|
1401 david.dillard 1.116 6. Implemented char* version of the following member functions to
1402 eliminate unecessary creation of anonymous string objects
|
1403 mike 1.112 (temporaries).
1404
1405 String(const String& s1, const char* s2);
1406 String(const char* s1, const String& s2);
1407 String& String::operator=(const char* str);
1408 Uint32 String::find(const char* s) const;
1409 bool String::equal(const String& s1, const char* s2);
1410 static int String::compare(const String& s1, const char* s2);
1411 String& String::append(const char* str);
1412 String& String::append(const char* str, Uint32 size);
1413 static bool String::equalNoCase(const String& s1, const char* s2);
1414 String& operator=(const char* str)
1415 String& String::assign(const char* str)
1416 String& String::append(const char* str)
1417 Boolean operator==(const String& s1, const char* s2)
1418 Boolean operator==(const char* s1, const String& s2)
1419 Boolean operator!=(const String& s1, const char* s2)
1420 Boolean operator!=(const char* s1, const String& s2)
1421 Boolean operator<(const String& s1, const char* s2)
1422 Boolean operator<(const char* s1, const String& s2)
1423 Boolean operator>(const String& s1, const char* s2)
1424 mike 1.112 Boolean operator>(const char* s1, const String& s2)
1425 Boolean operator<=(const String& s1, const char* s2)
1426 Boolean operator<=(const char* s1, const String& s2)
1427 Boolean operator>=(const String& s1, const char* s2)
1428 Boolean operator>=(const char* s1, const String& s2)
1429 String operator+(const String& s1, const char* s2)
1430 String operator+(const char* s1, const String& s2)
1431
|
1432 david.dillard 1.116 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next
|
1433 mike 1.112 power of two (algorithm from the book "Hacker's Delight").
1434
1435 static Uint32 _roundUpToPow2(Uint32 x)
1436 {
1437 if (x < 8)
1438 return 8;
1439
1440 x--;
1441 x |= (x >> 1);
1442 x |= (x >> 2);
1443 x |= (x >> 4);
1444 x |= (x >> 8);
1445 x |= (x >> 16);
1446 x++;
1447
1448 return x;
1449 }
1450
1451 8. Implemented "concatenating constructors" to eliminate temporaries
|
1452 david.dillard 1.116 created by operator+(). This scheme employs the "return-value
|
1453 mike 1.112 optimization" described by Stan Lippman.
1454
1455 inline String operator+(const String& s1, const String& s2)
1456 {
1457 return String(s1, s2, 0);
1458 }
1459
1460 9. Experimented to find the optimial initial size for a short string.
1461 Eight seems to offer the best tradeoff between space and time.
1462
1463 10. Inlined all members of the Char16 class.
1464
1465 11. Used Uint16 internally in the String class. This showed no improvememnt
1466 since Char16 was already fully inlined and was essentially reduced to
1467 Uint16 in any case.
1468
1469 12. Implemented conditional logic (#if) allowing error checking logic to
|
1470 david.dillard 1.116 be excluded to better performance. Examples include bounds checking
|
1471 mike 1.112 and null-pointer checking.
1472
1473 13. Used memcpy() and memcmp() where possible. These are implemented using
1474 the rep family of intructions under Intel and are much faster.
1475
|
1476 david.dillard 1.116 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
|
1477 mike 1.112 copy routine overhead.
1478
1479 15. Added ASCII7 form of the constructor and assign().
1480
1481 String s("hello world", String::ASCII7);
1482
1483 s.assignASCII7("hello world");
1484
1485 This avoids slower UTF8 processing when not needed.
1486
1487 ================================================================================
1488
1489 TO-DO:
1490
1491 (+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES
1492
1493 (+) [DONE] Submit BUG-2754 (Windows buffer limit).
1494
1495 (+) [DONE] Eliminate char versions of find() and append().
1496
1497 (+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h
1498 mike 1.112
1499 (+) [DONE] Change _next_pow_2() to _roundUpToPow2().
1500
1501 (+) [DONE] Change '99' to '2' in StringRep constructor (comment as well).
1502
1503 (+) [DONE] Comment StringRep allocation layout.
1504
1505 (+) [DONE] Conceal private inline functions.
1506
1507 (+) [DONE] Shorten inclusion of StringInline.h in String.h.
1508
1509 (+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get
1510 rid of altogether.
1511
1512 (+) [DONE] useCamelNotationOnAllFunctionNames.
1513
1514 (+) [DONE] Check for overlow condition in StringRep::alloc().
1515
1516 (+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab").
1517
1518 (+) [DONE] Fix throw-related memory leak.
1519 mike 1.112
1520 (+) [DONE] Look at PEP223 for coding security guidelines.
1521
1522 (+) [DONE] Use old AtomicInt for now (new AtomicInt part of bug #4250).
|
1523 kumpf 1.39
|
1524 mike 1.112 (+) [DONE] Removed appendASCII() and the ASCII form of the constructor.
|
1525 kumpf 1.39
|
1526 mike 1.112 (+) DOC++ String.h - will open new bug?
|
1527 kumpf 1.39
|
1528 mike 1.112 (+) Added PEGASUS_DISABLE_INTERNAL_INLINES macro (to permit suppression
1529 on certain platforms).
|
1530 kumpf 1.39
|
1531 mike 1.112 ================================================================================
1532 */
|