1 karl 1.119 //%2006////////////////////////////////////////////////////////////////////////
|
2 mike 1.27 //
|
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 karl 1.119 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
12 // EMC Corporation; Symantec Corporation; The Open Group.
|
13 mike 1.27 //
14 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
15 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
16 // deal in the Software without restriction, including without limitation the
17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
18 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is
19 // furnished to do so, subject to the following conditions:
|
20 karl 1.119 //
|
21 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
22 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
24 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
27 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30 //==============================================================================
31 //
|
32 mike 1.112 // Author: Mike Brasher (mbrasher@austin.rr.com)
|
33 mike 1.27 //
|
34 david.dillard 1.116 // Modified By:
|
35 mike 1.112 // Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
36 // Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
|
37 david.dillard 1.116 // David Dillard, Symantec Corp. (david_dillard@symantec.com)
|
38 mike 1.112 // Mike Brasher (mike-brasher@austin.rr.com)
|
39 mike 1.27 //
40 //%/////////////////////////////////////////////////////////////////////////////
41
|
42 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
|
43 mike 1.113 #include <cstring>
|
44 kumpf 1.48 #include "InternalException.h"
|
45 david 1.69 #include "CommonUTF.h"
|
46 mike 1.112 #include "MessageLoader.h"
47 #include "StringRep.h"
|
48 david 1.69
49 #ifdef PEGASUS_HAS_ICU
|
50 chuck 1.99 #include <unicode/ustring.h>
51 #include <unicode/uchar.h>
|
52 david 1.69 #endif
53
|
54 mike 1.112 PEGASUS_NAMESPACE_BEGIN
|
55 mike 1.28
|
56 mike 1.112 //==============================================================================
57 //
58 // Compile-time macros (undefined by default).
59 //
60 // PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
|
61 david.dillard 1.116 //
|
62 mike 1.112 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
63 //
64 //==============================================================================
|
65 mike 1.27
|
66 mike 1.112 //==============================================================================
|
67 kumpf 1.39 //
|
68 mike 1.112 // File-scope definitions:
|
69 kumpf 1.54 //
|
70 mike 1.112 //==============================================================================
71
72 // Note: this table is much faster than the system toupper(). Please do not
73 // change.
|
74 kumpf 1.54
|
75 david.dillard 1.116 const Uint8 _toUpperTable[256] =
|
76 kumpf 1.54 {
|
77 mike 1.112 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
78 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
79 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
80 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
81 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
82 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
83 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
84 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
85 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
86 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
87 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
88 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
89 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
90 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
91 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
92 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
93 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
94 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
95 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
96 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
97 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
98 mike 1.112 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
99 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
100 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
101 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
102 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
103 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
104 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
105 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
106 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
107 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
108 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
109 };
110
111 // Note: this table is much faster than the system tulower(). Please do not
112 // change.
113
|
114 david.dillard 1.116 const Uint8 _toLowerTable[256] =
|
115 mike 1.112 {
116 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
117 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
118 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
119 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
120 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
121 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
122 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
123 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
124 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
125 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
126 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
127 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
128 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
129 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
130 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
131 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
132 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
133 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
134 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
135 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
136 mike 1.112 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
137 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
138 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
139 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
140 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
141 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
142 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
143 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
144 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
145 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
146 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
147 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
148 };
149
150 // Converts 16-bit characters to upper case. This routine is faster than the
151 // system toupper(). Please do not change.
152 inline Uint16 _toUpper(Uint16 x)
153 {
154 return (x & 0xFF00) ? x : _toUpperTable[x];
|
155 kumpf 1.54 }
156
|
157 mike 1.112 // Converts 16-bit characters to lower case. This routine is faster than the
158 // system toupper(). Please do not change.
159 inline Uint16 _toLower(Uint16 x)
|
160 kumpf 1.54 {
|
161 mike 1.112 return (x & 0xFF00) ? x : _toLowerTable[x];
162 }
163
164 // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
165 static Uint32 _roundUpToPow2(Uint32 x)
166 {
167 #ifndef PEGASUS_STRING_NO_THROW
168
|
169 dave.sudlik 1.119.2.1 // Check for potential overflow in x
170 PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
|
171 mike 1.112
172 #endif
173
174 if (x < 8)
175 return 8;
176
177 x--;
178 x |= (x >> 1);
179 x |= (x >> 2);
180 x |= (x >> 4);
181 x |= (x >> 8);
182 x |= (x >> 16);
183 x++;
184
185 return x;
186 }
187
188 template<class P, class Q>
189 static void _copy(P* p, const Q* q, size_t n)
190 {
191 // The following employs loop unrolling for efficiency. Please do not
192 mike 1.112 // eliminate.
193
194 while (n >= 8)
195 {
196 p[0] = q[0];
197 p[1] = q[1];
198 p[2] = q[2];
199 p[3] = q[3];
200 p[4] = q[4];
201 p[5] = q[5];
202 p[6] = q[6];
203 p[7] = q[7];
204 p += 8;
205 q += 8;
206 n -= 8;
207 }
208
209 while (n >= 4)
210 {
211 p[0] = q[0];
212 p[1] = q[1];
213 mike 1.112 p[2] = q[2];
214 p[3] = q[3];
215 p += 4;
216 q += 4;
217 n -= 4;
218 }
219
220 while (n--)
221 *p++ = *q++;
222 }
223
224 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
225 {
226 // The following employs loop unrolling for efficiency. Please do not
227 // eliminate.
228
229 while (n >= 4)
230 {
231 if (s[0] == c)
232 return (Uint16*)s;
233 if (s[1] == c)
234 mike 1.112 return (Uint16*)&s[1];
235 if (s[2] == c)
236 return (Uint16*)&s[2];
237 if (s[3] == c)
238 return (Uint16*)&s[3];
|
239 kumpf 1.82
|
240 mike 1.112 n -= 4;
241 s += 4;
242 }
243
244 if (n)
245 {
246 if (*s == c)
247 return (Uint16*)s;
248 s++;
249 n--;
250 }
251
252 if (n)
253 {
254 if (*s == c)
255 return (Uint16*)s;
256 s++;
257 n--;
258 }
259
260 if (n && *s == c)
261 mike 1.112 return (Uint16*)s;
262
263 // Not found!
264 return 0;
265 }
266
267 static int _compare(const Uint16* s1, const Uint16* s2)
268 {
269 while (*s1 && *s2)
270 {
271 int r = *s1++ - *s2++;
272
273 if (r)
274 return r;
275 }
276
277 if (*s2)
278 return -1;
279 else if (*s1)
280 return 1;
281
282 mike 1.112 return 0;
283 }
284
285 static int _compareNoUTF8(const Uint16* s1, const char* s2)
286 {
287 Uint16 c1;
288 Uint16 c2;
289
290 do
291 {
292 c1 = *s1++;
293 c2 = *s2++;
294
295 if (c1 == 0)
296 return c1 - c2;
297 }
298 while (c1 == c2);
299
300 return c1 - c2;
301 }
302
303 mike 1.112 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
304 {
305 memcpy(s1, s2, n * sizeof(Uint16));
306 }
307
308 void StringThrowOutOfBounds()
309 {
310 throw IndexOutOfBoundsException();
311 }
312
313 inline void _checkNullPointer(const void* ptr)
314 {
|
315 kumpf 1.117 #ifndef PEGASUS_STRING_NO_THROW
|
316 mike 1.112
317 if (!ptr)
318 throw NullPointer();
319
320 #endif
321 }
322
323 static void _StringThrowBadUTF8(Uint32 index)
324 {
325 MessageLoaderParms parms(
326 "Common.String.BAD_UTF8",
327 "The byte sequence starting at index $0 "
328 "is not valid UTF-8 encoding.",
329 index);
330 throw Exception(parms);
331 }
332
333 static size_t _copyFromUTF8(
|
334 david.dillard 1.116 Uint16* dest,
335 const char* src,
|
336 mike 1.112 size_t n,
337 size_t& utf8_error_index)
338 {
339 Uint16* p = dest;
340 const Uint8* q = (const Uint8*)src;
341
342 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
343 // Use loop-unrolling.
344
345 while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
346 {
347 p[0] = q[0];
348 p[1] = q[1];
349 p[2] = q[2];
350 p[3] = q[3];
351 p[4] = q[4];
352 p[5] = q[5];
353 p[6] = q[6];
354 p[7] = q[7];
355 p += 8;
356 q += 8;
357 mike 1.112 n -= 8;
358 }
359
360 while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
361 {
362 p[0] = q[0];
363 p[1] = q[1];
364 p[2] = q[2];
365 p[3] = q[3];
366 p += 4;
367 q += 4;
368 n -= 4;
369 }
370
371 switch (n)
372 {
373 case 0:
374 return p - dest;
375 case 1:
376 if (q[0] < 128)
377 {
378 mike 1.112 p[0] = q[0];
379 return p + 1 - dest;
380 }
381 break;
382 case 2:
383 if (((q[0]|q[1]) & 0x80) == 0)
384 {
385 p[0] = q[0];
386 p[1] = q[1];
387 return p + 2 - dest;
388 }
389 break;
390 case 3:
391 if (((q[0]|q[1]|q[2]) & 0x80) == 0)
392 {
393 p[0] = q[0];
394 p[1] = q[1];
395 p[2] = q[2];
396 return p + 3 - dest;
397 }
398 break;
399 mike 1.112 }
400
401 // Process remaining characters.
402
403 while (n)
404 {
405 // Optimize for 7-bit ASCII case.
406
407 if (*q < 128)
408 {
409 *p++ = *q++;
410 n--;
411 }
412 else
413 {
414 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
415
416 if (c > n || !isValid_U8(q, c) ||
417 UTF8toUTF16(&q, q + c, &p, p + n) != 0)
418 {
419 utf8_error_index = q - (const Uint8*)src;
420 mike 1.112 return size_t(-1);
421 }
422
423 n -= c;
424 }
425 }
426
427 return p - dest;
428 }
429
|
430 david.dillard 1.116 // Note: dest must be at least three times src (plus an extra byte for
|
431 mike 1.112 // terminator).
432 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
433 {
434 // The following employs loop unrolling for efficiency. Please do not
435 // eliminate.
436
437 const Uint16* q = src;
438 Uint8* p = (Uint8*)dest;
439
440 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
|
441 kumpf 1.82 {
|
442 mike 1.112 p[0] = q[0];
443 p[1] = q[1];
444 p[2] = q[2];
445 p[3] = q[3];
446 p += 4;
447 q += 4;
448 n -= 4;
|
449 kumpf 1.82 }
|
450 mike 1.112
451 switch (n)
452 {
453 case 0:
454 return p - (Uint8*)dest;
455 case 1:
456 if (q[0] < 128)
457 {
458 p[0] = q[0];
459 return p + 1 - (Uint8*)dest;
460 }
461 break;
462 case 2:
463 if (q[0] < 128 && q[1] < 128)
464 {
465 p[0] = q[0];
466 p[1] = q[1];
467 return p + 2 - (Uint8*)dest;
468 }
469 break;
470 case 3:
471 mike 1.112 if (q[0] < 128 && q[1] < 128 && q[2] < 128)
472 {
473 p[0] = q[0];
474 p[1] = q[1];
475 p[2] = q[2];
476 return p + 3 - (Uint8*)dest;
477 }
478 break;
479 }
480
481 // If this line was reached, there must be characters greater than 128.
482
483 UTF16toUTF8(&q, q + n, &p, p + 3 * n);
484
485 return p - (Uint8*)dest;
|
486 kumpf 1.54 }
487
|
488 mike 1.112 static inline size_t _convert(
489 Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
|
490 kumpf 1.54 {
|
491 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8
492 _copy(p, q, n);
493 return n;
494 #else
495 return _copyFromUTF8(p, q, n, utf8_error_index);
496 #endif
|
497 kumpf 1.54 }
498
|
499 mike 1.112 //==============================================================================
500 //
501 // class CString
502 //
503 //==============================================================================
504
505 CString::CString(const CString& cstr) : _rep(0)
|
506 kumpf 1.54 {
|
507 mike 1.112 if (cstr._rep)
|
508 kumpf 1.82 {
|
509 mike 1.112 size_t n = strlen(cstr._rep) + 1;
510 _rep = (char*)operator new(n);
511 memcpy(_rep, cstr._rep, n);
|
512 kumpf 1.82 }
|
513 kumpf 1.54 }
514
|
515 kumpf 1.56 CString& CString::operator=(const CString& cstr)
516 {
|
517 kumpf 1.82 if (&cstr != this)
|
518 kumpf 1.81 {
|
519 kumpf 1.82 if (_rep)
520 {
|
521 mike 1.112 operator delete(_rep);
|
522 kumpf 1.82 _rep = 0;
523 }
|
524 mike 1.112
|
525 kumpf 1.82 if (cstr._rep)
526 {
|
527 mike 1.112 size_t n = strlen(cstr._rep) + 1;
528 _rep = (char*)operator new(n);
529 memcpy(_rep, cstr._rep, n);
|
530 kumpf 1.82 }
|
531 kumpf 1.81 }
|
532 mike 1.112
|
533 kumpf 1.56 return *this;
534 }
535
|
536 mike 1.112 //==============================================================================
|
537 kumpf 1.54 //
|
538 mike 1.112 // class StringRep
|
539 kumpf 1.39 //
|
540 mike 1.112 //==============================================================================
|
541 kumpf 1.39
|
542 mike 1.112 StringRep StringRep::_emptyRep;
|
543 mike 1.27
|
544 mike 1.112 inline StringRep* StringRep::alloc(size_t cap)
|
545 mike 1.27 {
|
546 mike 1.112 #ifndef PEGASUS_STRING_NO_THROW
|
547 dave.sudlik 1.119.2.1
548 // Check for potential overflow in cap
549 PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
|
550 mike 1.27
|
551 mike 1.112 #endif
|
552 mike 1.27
|
553 mike 1.112 StringRep* rep = (StringRep*)::operator new(
554 sizeof(StringRep) + cap * sizeof(Uint16));
555 rep->cap = cap;
556 new(&rep->refs) AtomicInt(1);
557
558 return rep;
|
559 mike 1.27 }
560
|
561 mike 1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
|
562 chuck 1.102 {
|
563 mike 1.114 if (cap > rep->cap || rep->refs.get() != 1)
|
564 chuck 1.102 {
|
565 mike 1.112 size_t n = _roundUpToPow2(cap);
566 StringRep* newRep = StringRep::alloc(n);
567 newRep->size = rep->size;
568 _copy(newRep->data, rep->data, rep->size + 1);
569 StringRep::unref(rep);
570 rep = newRep;
571 }
572 }
|
573 david.dillard 1.105
|
574 mike 1.112 StringRep* StringRep::create(const Uint16* data, size_t size)
575 {
576 StringRep* rep = StringRep::alloc(size);
577 rep->size = size;
578 _copy(rep->data, data, size);
579 rep->data[size] = '\0';
580 return rep;
581 }
|
582 chuck 1.102
|
583 mike 1.112 StringRep* StringRep::copyOnWrite(StringRep* rep)
584 {
585 // Return a new copy of rep. Release rep.
|
586 chuck 1.102
|
587 mike 1.112 StringRep* newRep = StringRep::alloc(rep->size);
588 newRep->size = rep->size;
589 _copy(newRep->data, rep->data, rep->size);
590 newRep->data[newRep->size] = '\0';
591 StringRep::unref(rep);
592 return newRep;
|
593 chuck 1.102 }
594
|
595 mike 1.112 StringRep* StringRep::create(const char* data, size_t size)
|
596 kumpf 1.43 {
|
597 mike 1.112 StringRep* rep = StringRep::alloc(size);
598 size_t utf8_error_index;
599 rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
600
601 #ifndef PEGASUS_STRING_NO_THROW
602 if (rep->size == size_t(-1))
603 {
604 StringRep::free(rep);
605 _StringThrowBadUTF8(utf8_error_index);
606 }
607 #endif
|
608 kumpf 1.43
|
609 mike 1.112 rep->data[rep->size] = '\0';
|
610 kumpf 1.43
|
611 mike 1.112 return rep;
|
612 mike 1.27 }
613
|
614 mike 1.112 Uint32 StringRep::length(const Uint16* str)
|
615 mike 1.27 {
|
616 mike 1.112 // Note: We could unroll this but it is rarely called.
617
618 const Uint16* end = (Uint16*)str;
619
620 while (*end++)
621 ;
622
623 return end - str - 1;
|
624 kumpf 1.39 }
|
625 tony 1.66
|
626 mike 1.112 //==============================================================================
627 //
628 // class String
629 //
630 //==============================================================================
631
632 const String String::EMPTY;
|
633 mike 1.27
|
634 kumpf 1.39 String::String(const String& str, Uint32 n)
635 {
|
636 mike 1.112 _checkBounds(n, str._rep->size);
637 _rep = StringRep::create(str._rep->data, n);
|
638 kumpf 1.39 }
639
640 String::String(const Char16* str)
641 {
|
642 mike 1.112 _checkNullPointer(str);
643 _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
|
644 mike 1.27 }
645
|
646 kumpf 1.39 String::String(const Char16* str, Uint32 n)
647 {
|
648 mike 1.112 _checkNullPointer(str);
649 _rep = StringRep::create((Uint16*)str, n);
|
650 kumpf 1.39 }
651
652 String::String(const char* str)
|
653 mike 1.27 {
|
654 mike 1.112 _checkNullPointer(str);
|
655 david.dillard 1.105
|
656 mike 1.112 // Set this just in case create() throws an exception.
657 _rep = &StringRep::_emptyRep;
658 _rep = StringRep::create(str, strlen(str));
|
659 mike 1.27 }
660
|
661 kumpf 1.39 String::String(const char* str, Uint32 n)
|
662 mike 1.27 {
|
663 mike 1.112 _checkNullPointer(str);
|
664 david.dillard 1.105
|
665 mike 1.112 // Set this just in case create() throws an exception.
666 _rep = &StringRep::_emptyRep;
667 _rep = StringRep::create(str, n);
|
668 kumpf 1.39 }
|
669 mike 1.27
|
670 mike 1.112 String::String(const String& s1, const String& s2)
|
671 kumpf 1.39 {
|
672 mike 1.112 size_t n1 = s1._rep->size;
673 size_t n2 = s2._rep->size;
674 size_t n = n1 + n2;
675 _rep = StringRep::alloc(n);
676 _copy(_rep->data, s1._rep->data, n1);
677 _copy(_rep->data + n1, s2._rep->data, n2);
678 _rep->size = n;
679 _rep->data[n] = '\0';
|
680 mike 1.27 }
681
|
682 mike 1.112 String::String(const String& s1, const char* s2)
|
683 mike 1.27 {
|
684 mike 1.112 _checkNullPointer(s2);
685 size_t n1 = s1._rep->size;
686 size_t n2 = strlen(s2);
687 _rep = StringRep::alloc(n1 + n2);
688 _copy(_rep->data, s1._rep->data, n1);
689 size_t utf8_error_index;
690 size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
691
692 #ifndef PEGASUS_STRING_NO_THROW
693 if (tmp == size_t(-1))
|
694 kumpf 1.82 {
|
695 mike 1.112 StringRep::free(_rep);
696 _rep = &StringRep::_emptyRep;
697 _StringThrowBadUTF8(utf8_error_index);
|
698 kumpf 1.82 }
|
699 mike 1.112 #endif
700
701 _rep->size = n1 + tmp;
702 _rep->data[_rep->size] = '\0';
|
703 mike 1.27 }
704
|
705 mike 1.112 String::String(const char* s1, const String& s2)
|
706 mike 1.27 {
|
707 mike 1.112 _checkNullPointer(s1);
708 size_t n1 = strlen(s1);
709 size_t n2 = s2._rep->size;
710 _rep = StringRep::alloc(n1 + n2);
711 size_t utf8_error_index;
712 size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
713
714 #ifndef PEGASUS_STRING_NO_THROW
715 if (tmp == size_t(-1))
716 {
717 StringRep::free(_rep);
718 _rep = &StringRep::_emptyRep;
719 _StringThrowBadUTF8(utf8_error_index);
720 }
721 #endif
722
723 _rep->size = n2 + tmp;
724 _copy(_rep->data + n1, s2._rep->data, n2);
725 _rep->data[_rep->size] = '\0';
|
726 mike 1.27 }
727
|
728 mike 1.112 String& String::assign(const String& str)
|
729 mike 1.27 {
|
730 mike 1.112 if (_rep != str._rep)
|
731 david.dillard 1.105 {
|
732 mike 1.112 StringRep::unref(_rep);
733 StringRep::ref(_rep = str._rep);
|
734 david.dillard 1.105 }
735
|
736 mike 1.27 return *this;
737 }
738
739 String& String::assign(const Char16* str, Uint32 n)
740 {
|
741 mike 1.112 _checkNullPointer(str);
742
|
743 mike 1.114 if (n > _rep->cap || _rep->refs.get() != 1)
|
744 david.dillard 1.105 {
|
745 mike 1.112 StringRep::unref(_rep);
746 _rep = StringRep::alloc(n);
|
747 david.dillard 1.105 }
748
|
749 mike 1.112 _rep->size = n;
750 _copy(_rep->data, (Uint16*)str, n);
751 _rep->data[n] = '\0';
752
|
753 mike 1.27 return *this;
754 }
755
|
756 mike 1.112 String& String::assign(const char* str, Uint32 n)
|
757 chuck 1.102 {
|
758 mike 1.112 _checkNullPointer(str);
759
|
760 mike 1.114 if (n > _rep->cap || _rep->refs.get() != 1)
|
761 david.dillard 1.105 {
|
762 mike 1.112 StringRep::unref(_rep);
763 _rep = StringRep::alloc(n);
|
764 david.dillard 1.105 }
765
|
766 mike 1.112 size_t utf8_error_index;
767 _rep->size = _convert(_rep->data, str, n, utf8_error_index);
|
768 chuck 1.102
|
769 mike 1.112 #ifndef PEGASUS_STRING_NO_THROW
770 if (_rep->size == size_t(-1))
|
771 david.dillard 1.105 {
|
772 mike 1.112 StringRep::free(_rep);
773 _rep = &StringRep::_emptyRep;
774 _StringThrowBadUTF8(utf8_error_index);
|
775 david.dillard 1.105 }
|
776 mike 1.112 #endif
777
778 _rep->data[_rep->size] = 0;
|
779 david.dillard 1.105
|
780 mike 1.27 return *this;
781 }
782
|
783 kumpf 1.39 void String::clear()
784 {
|
785 mike 1.112 if (_rep->size)
786 {
|
787 mike 1.114 if (_rep->refs.get() == 1)
|
788 mike 1.112 {
789 _rep->size = 0;
790 _rep->data[0] = '\0';
791 }
792 else
793 {
794 StringRep::unref(_rep);
795 _rep = &StringRep::_emptyRep;
796 }
797 }
|
798 kumpf 1.39 }
799
|
800 mike 1.112 void String::reserveCapacity(Uint32 cap)
|
801 kumpf 1.39 {
|
802 mike 1.112 _reserve(_rep, cap);
|
803 kumpf 1.39 }
804
|
805 mike 1.112 CString String::getCString() const
806 {
|
807 david.dillard 1.116 // A UTF8 string can have three times as many characters as its UTF16
808 // counterpart, so we allocate extra memory for the worst case. In the
|
809 mike 1.112 // best case, we may need only one third of the memory allocated. But
|
810 david.dillard 1.116 // downsizing the string afterwards is expensive and unecessary since
811 // CString objects are usually short-lived (disappearing after only a few
|
812 mike 1.112 // instructions). CString objects are typically created on the stack as
813 // means to obtain a char* pointer.
814
815 #ifdef PEGASUS_STRING_NO_UTF8
816 char* str = (char*)operator new(_rep->size + 1);
817 _copy(str, _rep->data, _rep->size);
818 str[_rep->size] = '\0';
819 return CString(str);
|
820 gs.keenan 1.110 #else
|
821 mike 1.112 Uint32 n = 3 * _rep->size;
822 char* str = (char*)operator new(n + 1);
823 size_t size = _copyToUTF8(str, _rep->data, _rep->size);
824 str[size] = '\0';
825 return CString(str);
|
826 gs.keenan 1.110 #endif
|
827 kumpf 1.39 }
828
|
829 mike 1.112 String& String::append(const Char16* str, Uint32 n)
|
830 kumpf 1.39 {
|
831 mike 1.112 _checkNullPointer(str);
832
833 size_t oldSize = _rep->size;
834 size_t newSize = oldSize + n;
835 _reserve(_rep, newSize);
836 _copy(_rep->data + oldSize, (Uint16*)str, n);
837 _rep->size = newSize;
838 _rep->data[newSize] = '\0';
839
840 return *this;
|
841 kumpf 1.39 }
842
|
843 mike 1.112 String& String::append(const String& str)
|
844 mike 1.27 {
|
845 w.otsuka 1.119.2.2 return append((Char16*)(&(str._rep->data[0])), str._rep->size);
|
846 mike 1.27 }
847
|
848 mike 1.112 String& String::append(const char* str, Uint32 size)
|
849 mike 1.27 {
|
850 mike 1.112 _checkNullPointer(str);
851
852 size_t oldSize = _rep->size;
853 size_t cap = oldSize + size;
854
855 _reserve(_rep, cap);
856 size_t utf8_error_index;
857 size_t tmp = _convert(
858 (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
859
860 #ifndef PEGASUS_STRING_NO_THROW
861 if (tmp == size_t(-1))
862 {
863 StringRep::free(_rep);
864 _rep = &StringRep::_emptyRep;
865 _StringThrowBadUTF8(utf8_error_index);
866 }
867 #endif
|
868 mike 1.27
|
869 mike 1.112 _rep->size += tmp;
870 _rep->data[_rep->size] = '\0';
|
871 mike 1.27
|
872 kumpf 1.39 return *this;
873 }
874
|
875 mike 1.112 void String::remove(Uint32 index, Uint32 n)
|
876 mike 1.27 {
|
877 mike 1.112 if (n == PEG_NOT_FOUND)
878 n = _rep->size - index;
879
880 _checkBounds(index + n, _rep->size);
881
|
882 mike 1.114 if (_rep->refs.get() != 1)
|
883 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
|
884 mike 1.27
|
885 jim.wunderlich 1.115 PEGASUS_ASSERT(index + n <= _rep->size);
|
886 mike 1.27
|
887 mike 1.112 size_t rem = _rep->size - (index + n);
888 Uint16* data = _rep->data;
|
889 mike 1.27
|
890 mike 1.112 if (rem)
891 memmove(data + index, data + index + n, rem * sizeof(Uint16));
|
892 mike 1.27
|
893 mike 1.112 _rep->size -= n;
894 data[_rep->size] = '\0';
|
895 mike 1.27 }
896
|
897 mike 1.112 String String::subString(Uint32 index, Uint32 n) const
|
898 mike 1.27 {
|
899 mike 1.112 // Note: this implementation is very permissive but used for
900 // backwards compatibility.
901
902 if (index < _rep->size)
|
903 mike 1.27 {
|
904 mike 1.112 if (n == PEG_NOT_FOUND || n > _rep->size - index)
905 n = _rep->size - index;
|
906 mike 1.27
|
907 w.otsuka 1.119.2.2 return String((Char16*)(_rep->data + index), n);
|
908 mike 1.27 }
|
909 david.dillard 1.105
910 return String();
|
911 mike 1.27 }
912
913 Uint32 String::find(Char16 c) const
914 {
|
915 mike 1.112 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
|
916 mike 1.27
|
917 mike 1.112 if (p)
|
918 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
919 mike 1.27
920 return PEG_NOT_FOUND;
921 }
922
|
923 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
|
924 mike 1.30 {
|
925 mike 1.112 _checkBounds(index, _rep->size);
926
927 if (index >= _rep->size)
928 return PEG_NOT_FOUND;
929
930 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
|
931 mike 1.30
|
932 mike 1.112 if (p)
|
933 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
934 mike 1.30
935 return PEG_NOT_FOUND;
936 }
937
|
938 mike 1.112 Uint32 StringFindAux(
939 const StringRep* _rep, const Char16* s, Uint32 n)
|
940 mike 1.27 {
|
941 mike 1.112 _checkNullPointer(s);
|
942 mike 1.27
|
943 mike 1.112 const Uint16* data = _rep->data;
944 size_t rem = _rep->size;
945
946 while (n <= rem)
|
947 mike 1.30 {
|
948 mike 1.112 Uint16* p = (Uint16*)_find(data, rem, s[0]);
949
950 if (!p)
951 break;
|
952 mike 1.30
|
953 mike 1.112 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
|
954 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
955 david.dillard 1.105
|
956 mike 1.112 p++;
957 rem -= p - data;
958 data = p;
|
959 mike 1.27 }
|
960 mike 1.112
|
961 mike 1.27 return PEG_NOT_FOUND;
962 }
963
|
964 mike 1.112 Uint32 String::find(const char* s) const
965 {
966 _checkNullPointer(s);
967
968 // Note: could optimize away creation of temporary, but this is rarely
969 // called.
970 return find(String(s));
971 }
972
|
973 mike 1.27 Uint32 String::reverseFind(Char16 c) const
974 {
|
975 mike 1.112 Uint16 x = c;
976 Uint16* p = _rep->data;
977 Uint16* q = _rep->data + _rep->size;
|
978 mike 1.27
|
979 mike 1.112 while (q != p)
|
980 mike 1.27 {
|
981 mike 1.112 if (*--q == x)
|
982 david.dillard 1.116 return static_cast<Uint32>(q - p);
|
983 mike 1.27 }
984
985 return PEG_NOT_FOUND;
986 }
987
988 void String::toLower()
989 {
|
990 david 1.69 #ifdef PEGASUS_HAS_ICU
|
991 mike 1.112
|
992 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
993 david 1.90 {
|
994 mike 1.114 if (_rep->refs.get() != 1)
|
995 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
996
|
997 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
998 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this
999 // prevents optimizations where the us-ascii is converted before
|
1000 mike 1.112 // calling ICU.
|
1001 yi.zhou 1.108 // The string may shrink or expand after the convert.
1002
|
1003 mike 1.112 //// First calculate size of resulting string. u_strToLower() returns
1004 //// only the size when zero is passed as the destination size argument.
1005
|
1006 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
1007
|
1008 mike 1.112 int32_t newSize = u_strToLower(
1009 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
|
1010 david.dillard 1.116
|
1011 mike 1.112 err = U_ZERO_ERROR;
1012
1013 //// Reserve enough space for the result.
1014
1015 if ((Uint32)newSize > _rep->cap)
1016 _reserve(_rep, newSize);
1017
1018 //// Perform the conversion (overlapping buffers are allowed).
|
1019 chuck 1.99
|
1020 mike 1.112 u_strToLower((UChar*)_rep->data, newSize,
1021 (UChar*)_rep->data, _rep->size, NULL, &err);
|
1022 yi.zhou 1.108
|
1023 mike 1.112 _rep->size = newSize;
1024 return;
|
1025 david 1.90 }
|
1026 mike 1.112
1027 #endif /* PEGASUS_HAS_ICU */
1028
|
1029 mike 1.114 if (_rep->refs.get() != 1)
|
1030 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
1031
1032 Uint16* p = _rep->data;
1033 size_t n = _rep->size;
1034
1035 for (; n--; p++)
|
1036 david 1.90 {
|
1037 mike 1.112 if (!(*p & 0xFF00))
1038 *p = _toLower(*p);
|
1039 mike 1.27 }
|
1040 kumpf 1.39 }
1041
|
1042 chuck 1.99 void String::toUpper()
|
1043 david 1.90 {
1044 #ifdef PEGASUS_HAS_ICU
|
1045 mike 1.112
|
1046 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
1047 chuck 1.99 {
|
1048 mike 1.114 if (_rep->refs.get() != 1)
|
1049 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
1050
|
1051 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
1052 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this
1053 // prevents optimizations where the us-ascii is converted before
|
1054 mike 1.112 // calling ICU.
|
1055 yi.zhou 1.108 // The string may shrink or expand after the convert.
1056
|
1057 mike 1.112 //// First calculate size of resulting string. u_strToUpper() returns
1058 //// only the size when zero is passed as the destination size argument.
1059
|
1060 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
1061
|
1062 mike 1.112 int32_t newSize = u_strToUpper(
1063 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1064
1065 err = U_ZERO_ERROR;
1066
1067 //// Reserve enough space for the result.
1068
1069 if ((Uint32)newSize > _rep->cap)
1070 _reserve(_rep, newSize);
1071
1072 //// Perform the conversion (overlapping buffers are allowed).
1073
1074 u_strToUpper((UChar*)_rep->data, newSize,
1075 (UChar*)_rep->data, _rep->size, NULL, &err);
|
1076 chuck 1.99
|
1077 mike 1.112 _rep->size = newSize;
|
1078 yi.zhou 1.108
|
1079 mike 1.112 return;
|
1080 david 1.91 }
|
1081 mike 1.112
1082 #endif /* PEGASUS_HAS_ICU */
1083
|
1084 mike 1.114 if (_rep->refs.get() != 1)
|
1085 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
1086
1087 Uint16* p = _rep->data;
1088 size_t n = _rep->size;
1089
1090 for (; n--; p++)
1091 *p = _toUpper(*p);
|
1092 david 1.90 }
1093
|
1094 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
|
1095 kumpf 1.39 {
|
1096 kumpf 1.118 const Uint16* p1 = s1._rep->data;
1097 const Uint16* p2 = s2._rep->data;
|
1098 mike 1.27
|
1099 kumpf 1.118 while (n--)
1100 {
1101 int r = *p1++ - *p2++;
1102 if (r)
1103 {
1104 return r;
1105 }
1106 else if (!p1[-1])
1107 {
1108 // We must have encountered a null terminator in both s1 and s2
1109 return 0;
1110 }
1111 }
1112 return 0;
|
1113 mike 1.27 }
1114
|
1115 kumpf 1.43 int String::compare(const String& s1, const String& s2)
|
1116 mike 1.30 {
|
1117 mike 1.112 return _compare(s1._rep->data, s2._rep->data);
1118 }
|
1119 kumpf 1.43
|
1120 mike 1.112 int String::compare(const String& s1, const char* s2)
1121 {
1122 _checkNullPointer(s2);
|
1123 mike 1.30
|
1124 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8
1125 return _compareNoUTF8(s1._rep->data, s2);
1126 #else
1127 // ATTN: optimize this!
1128 return String::compare(s1, String(s2));
1129 #endif
|
1130 mike 1.30 }
1131
|
1132 mike 1.112 int String::compareNoCase(const String& str1, const String& str2)
|
1133 kumpf 1.40 {
|
1134 david 1.69 #ifdef PEGASUS_HAS_ICU
|
1135 mike 1.112
|
1136 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1137 {
|
1138 mike 1.112 return u_strcasecmp(
|
1139 dave.sudlik 1.119.2.3 (const UChar*)str1._rep->data,
1140 (const UChar*)str2._rep->data,
1141 U_FOLD_CASE_DEFAULT
1142 );
|
1143 yi.zhou 1.108 }
|
1144 kumpf 1.40
|
1145 mike 1.112 #endif /* PEGASUS_HAS_ICU */
1146
1147 const Uint16* s1 = str1._rep->data;
1148 const Uint16* s2 = str2._rep->data;
1149
1150 while (*s1 && *s2)
|
1151 kumpf 1.40 {
|
1152 mike 1.112 int r = _toLower(*s1++) - _toLower(*s2++);
|
1153 kumpf 1.40
|
1154 david.dillard 1.105 if (r)
1155 return r;
|
1156 kumpf 1.40 }
1157
|
1158 mike 1.112 if (*s2)
|
1159 david.dillard 1.105 return -1;
|
1160 mike 1.112 else if (*s1)
|
1161 david.dillard 1.105 return 1;
|
1162 kumpf 1.40
1163 return 0;
1164 }
1165
|
1166 mike 1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
|
1167 mike 1.27 {
|
1168 mike 1.112 #ifdef PEGASUS_HAS_ICU
1169
1170 return String::compareNoCase(s1, s2) == 0;
1171
1172 #else /* PEGASUS_HAS_ICU */
|
1173 mike 1.27
|
1174 mike 1.112 // The following employs loop unrolling for efficiency. Please do not
1175 // eliminate.
|
1176 kumpf 1.39
|
1177 mike 1.112 Uint16* p = (Uint16*)s1.getChar16Data();
1178 Uint16* q = (Uint16*)s2.getChar16Data();
1179 Uint32 n = s2.size();
1180
1181 while (n >= 8)
1182 {
1183 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1184 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1185 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1186 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1187 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1188 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1189 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1190 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1191 {
1192 return false;
1193 }
|
1194 kumpf 1.39
|
1195 mike 1.112 n -= 8;
1196 p += 8;
1197 q += 8;
1198 }
|
1199 mike 1.27
|
1200 mike 1.112 while (n >= 4)
|
1201 kumpf 1.39 {
|
1202 mike 1.112 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1203 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1204 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1205 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
|
1206 david.dillard 1.105 {
|
1207 mike 1.112 return false;
|
1208 david.dillard 1.105 }
|
1209 mike 1.112
1210 n -= 4;
1211 p += 4;
1212 q += 4;
1213 }
1214
1215 while (n--)
1216 {
1217 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
|
1218 david.dillard 1.105 return false;
|
1219 mike 1.112
1220 p++;
1221 q++;
|
1222 kumpf 1.39 }
|
1223 mike 1.28
|
1224 kumpf 1.39 return true;
|
1225 mike 1.112
1226 #endif /* PEGASUS_HAS_ICU */
|
1227 david 1.69 }
1228
|
1229 mike 1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
|
1230 david 1.69 {
|
1231 mike 1.112 _checkNullPointer(s2);
|
1232 david 1.69
|
1233 mike 1.112 #if defined(PEGASUS_HAS_ICU)
|
1234 david 1.69
|
1235 mike 1.112 return String::equalNoCase(s1, String(s2));
|
1236 david 1.69
|
1237 mike 1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
|
1238 david 1.69
|
1239 mike 1.112 const Uint16* p1 = (Uint16*)s1._rep->data;
1240 const char* p2 = s2;
1241 size_t n = s1._rep->size;
|
1242 david.dillard 1.105
|
1243 mike 1.112 while (n--)
1244 {
1245 if (!*p2)
1246 return false;
|
1247 david 1.71
|
1248 mike 1.112 if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1249 return false;
1250 }
|
1251 kumpf 1.42
|
1252 mike 1.112 if (*p2)
1253 return false;
|
1254 david.dillard 1.116
|
1255 mike 1.112 return true;
|
1256 karl 1.36
|
1257 mike 1.112 #else /* PEGASUS_HAS_ICU */
|
1258 david.dillard 1.105
|
1259 mike 1.112 // ATTN: optimize this!
1260 return String::equalNoCase(s1, String(s2));
|
1261 david.dillard 1.105
|
1262 mike 1.112 #endif /* PEGASUS_HAS_ICU */
1263 }
|
1264 chuck 1.78
|
1265 mike 1.112 Boolean String::equal(const String& s1, const String& s2)
|
1266 karl 1.36 {
|
1267 david.dillard 1.116 return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,
|
1268 mike 1.112 s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
|
1269 karl 1.36 }
1270
|
1271 mike 1.112 Boolean String::equal(const String& s1, const char* s2)
1272 {
1273 #ifdef PEGASUS_STRING_NO_UTF8
|
1274 kumpf 1.35
|
1275 mike 1.112 _checkNullPointer(s2);
|
1276 kumpf 1.39
|
1277 mike 1.112 const Uint16* p = (Uint16*)s1._rep->data;
1278 const char* q = s2;
|
1279 kumpf 1.39
|
1280 mike 1.112 while (*p && *q)
1281 {
1282 if (*p++ != Uint16(*q++))
1283 return false;
1284 }
|
1285 kumpf 1.39
|
1286 mike 1.112 return !(*p || *q);
|
1287 kumpf 1.39
|
1288 mike 1.112 #else /* PEGASUS_STRING_NO_UTF8 */
|
1289 kumpf 1.39
|
1290 mike 1.112 return String::equal(s1, String(s2));
|
1291 kumpf 1.39
|
1292 mike 1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
|
1293 kumpf 1.39 }
1294
|
1295 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
|
1296 kumpf 1.39 {
|
1297 mike 1.112 #if defined(PEGASUS_OS_OS400)
|
1298 david 1.72
|
1299 david 1.93 CString cstr = str.getCString();
|
1300 david 1.69 const char* utf8str = cstr;
|
1301 mike 1.112 os << utf8str;
1302 return os;
|
1303 david.dillard 1.116 #else
|
1304 david 1.69
|
1305 mike 1.112 #if defined(PEGASUS_HAS_ICU)
|
1306 david 1.69
|
1307 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1308 {
|
1309 david.dillard 1.105 char *buf = NULL;
1310 const int size = str.size() * 6;
|
1311 mike 1.112 UnicodeString UniStr(
1312 (const UChar *)str.getChar16Data(), (int32_t)str.size());
|
1313 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf);
1314 buf = new char[bufsize+1];
1315 UniStr.extract(0,bufsize,buf);
1316 os << buf;
1317 os.flush();
1318 delete [] buf;
|
1319 david.dillard 1.116 return os;
|
1320 yi.zhou 1.108 }
|
1321 mike 1.112
|
1322 david.dillard 1.116 #endif // PEGASUS_HAS_ICU
|
1323 mike 1.112
1324 for (Uint32 i = 0, n = str.size(); i < n; i++)
|
1325 yi.zhou 1.108 {
|
1326 mike 1.112 Uint16 code = str[i];
|
1327 david.dillard 1.105
|
1328 mike 1.112 if (code > 0 && !(code & 0xFF00))
1329 os << char(code);
1330 else
1331 {
1332 // Print in hex format:
1333 char buffer[8];
1334 sprintf(buffer, "\\x%04X", code);
1335 os << buffer;
|
1336 david.dillard 1.105 }
|
1337 yi.zhou 1.108 }
|
1338 kumpf 1.39
1339 return os;
|
1340 mike 1.112 #endif // PEGASUS_OS_OS400
|
1341 kumpf 1.39 }
1342
|
1343 mike 1.112 void StringAppendCharAux(StringRep*& _rep)
|
1344 kumpf 1.39 {
|
1345 mike 1.112 StringRep* tmp;
1346
1347 if (_rep->cap)
1348 {
1349 tmp = StringRep::alloc(2 * _rep->cap);
1350 tmp->size = _rep->size;
1351 _copy(tmp->data, _rep->data, _rep->size);
1352 }
1353 else
1354 {
1355 tmp = StringRep::alloc(8);
1356 tmp->size = 0;
1357 }
1358
1359 StringRep::unref(_rep);
1360 _rep = tmp;
|
1361 kumpf 1.39 }
1362
|
1363 mike 1.112 PEGASUS_NAMESPACE_END
1364
1365 /*
1366 ================================================================================
1367
1368 String optimizations:
1369
1370 1. Added mechanism allowing certain functions to be inlined only when
1371 used by internal Pegasus modules. External modules (i.e., providers)
1372 link to a non-inline version, which allows for binary compatibility.
1373
1374 2. Implemented copy-on-write with atomic increment/decrement. This
1375 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1376 for the 'ni1000' benchmark.
1377
1378 3. Employed loop unrolling in several places. For example, see:
1379
1380 static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1381
1382 4. Used the "empty-rep" optimization (described in whitepaper from the
1383 GCC Developers Summit). This reduced default construction to a simple
1384 mike 1.112 pointer assignment.
1385
1386 inline String::String() : _rep(&_emptyRep) { }
1387
1388 5. Implemented Uint16 versions of toupper() and tolower() using tables.
1389 For example:
1390
1391 static const char _upper[] =
1392 {
1393 0,1,2,...255
1394 };
1395
1396 inline Uint16 _toUpper(Uint16 x)
1397 {
1398 return (x & 0xFF00) ? x : _upper[x];
1399 }
1400
|
1401 david.dillard 1.116 This outperforms the system implementation by avoiding an anding
|
1402 mike 1.112 operation.
1403
|
1404 david.dillard 1.116 6. Implemented char* version of the following member functions to
1405 eliminate unecessary creation of anonymous string objects
|
1406 mike 1.112 (temporaries).
1407
1408 String(const String& s1, const char* s2);
1409 String(const char* s1, const String& s2);
1410 String& String::operator=(const char* str);
1411 Uint32 String::find(const char* s) const;
1412 bool String::equal(const String& s1, const char* s2);
1413 static int String::compare(const String& s1, const char* s2);
1414 String& String::append(const char* str);
1415 String& String::append(const char* str, Uint32 size);
1416 static bool String::equalNoCase(const String& s1, const char* s2);
1417 String& operator=(const char* str)
1418 String& String::assign(const char* str)
1419 String& String::append(const char* str)
1420 Boolean operator==(const String& s1, const char* s2)
1421 Boolean operator==(const char* s1, const String& s2)
1422 Boolean operator!=(const String& s1, const char* s2)
1423 Boolean operator!=(const char* s1, const String& s2)
1424 Boolean operator<(const String& s1, const char* s2)
1425 Boolean operator<(const char* s1, const String& s2)
1426 Boolean operator>(const String& s1, const char* s2)
1427 mike 1.112 Boolean operator>(const char* s1, const String& s2)
1428 Boolean operator<=(const String& s1, const char* s2)
1429 Boolean operator<=(const char* s1, const String& s2)
1430 Boolean operator>=(const String& s1, const char* s2)
1431 Boolean operator>=(const char* s1, const String& s2)
1432 String operator+(const String& s1, const char* s2)
1433 String operator+(const char* s1, const String& s2)
1434
|
1435 david.dillard 1.116 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next
|
1436 mike 1.112 power of two (algorithm from the book "Hacker's Delight").
1437
1438 static Uint32 _roundUpToPow2(Uint32 x)
1439 {
1440 if (x < 8)
1441 return 8;
1442
1443 x--;
1444 x |= (x >> 1);
1445 x |= (x >> 2);
1446 x |= (x >> 4);
1447 x |= (x >> 8);
1448 x |= (x >> 16);
1449 x++;
1450
1451 return x;
1452 }
1453
1454 8. Implemented "concatenating constructors" to eliminate temporaries
|
1455 david.dillard 1.116 created by operator+(). This scheme employs the "return-value
|
1456 mike 1.112 optimization" described by Stan Lippman.
1457
1458 inline String operator+(const String& s1, const String& s2)
1459 {
1460 return String(s1, s2, 0);
1461 }
1462
1463 9. Experimented to find the optimial initial size for a short string.
1464 Eight seems to offer the best tradeoff between space and time.
1465
1466 10. Inlined all members of the Char16 class.
1467
1468 11. Used Uint16 internally in the String class. This showed no improvememnt
1469 since Char16 was already fully inlined and was essentially reduced to
1470 Uint16 in any case.
1471
1472 12. Implemented conditional logic (#if) allowing error checking logic to
|
1473 david.dillard 1.116 be excluded to better performance. Examples include bounds checking
|
1474 mike 1.112 and null-pointer checking.
1475
1476 13. Used memcpy() and memcmp() where possible. These are implemented using
1477 the rep family of intructions under Intel and are much faster.
1478
|
1479 david.dillard 1.116 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
|
1480 mike 1.112 copy routine overhead.
1481
1482 15. Added ASCII7 form of the constructor and assign().
1483
1484 String s("hello world", String::ASCII7);
1485
1486 s.assignASCII7("hello world");
1487
1488 This avoids slower UTF8 processing when not needed.
1489
1490 ================================================================================
1491
1492 TO-DO:
1493
1494 (+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES
1495
1496 (+) [DONE] Submit BUG-2754 (Windows buffer limit).
1497
1498 (+) [DONE] Eliminate char versions of find() and append().
1499
1500 (+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h
1501 mike 1.112
1502 (+) [DONE] Change _next_pow_2() to _roundUpToPow2().
1503
1504 (+) [DONE] Change '99' to '2' in StringRep constructor (comment as well).
1505
1506 (+) [DONE] Comment StringRep allocation layout.
1507
1508 (+) [DONE] Conceal private inline functions.
1509
1510 (+) [DONE] Shorten inclusion of StringInline.h in String.h.
1511
1512 (+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get
1513 rid of altogether.
1514
1515 (+) [DONE] useCamelNotationOnAllFunctionNames.
1516
1517 (+) [DONE] Check for overlow condition in StringRep::alloc().
1518
1519 (+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab").
1520
1521 (+) [DONE] Fix throw-related memory leak.
1522 mike 1.112
1523 (+) [DONE] Look at PEP223 for coding security guidelines.
1524
1525 (+) [DONE] Use old AtomicInt for now (new AtomicInt part of bug #4250).
|
1526 kumpf 1.39
|
1527 mike 1.112 (+) [DONE] Removed appendASCII() and the ASCII form of the constructor.
|
1528 kumpf 1.39
|
1529 mike 1.112 (+) DOC++ String.h - will open new bug?
|
1530 kumpf 1.39
|
1531 mike 1.112 (+) Added PEGASUS_DISABLE_INTERNAL_INLINES macro (to permit suppression
1532 on certain platforms).
|
1533 kumpf 1.39
|
1534 mike 1.112 ================================================================================
1535 */
|