1 karl 1.119 //%2006////////////////////////////////////////////////////////////////////////
|
2 mike 1.27 //
|
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 karl 1.119 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
12 // EMC Corporation; Symantec Corporation; The Open Group.
|
13 mike 1.27 //
14 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
15 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
16 // deal in the Software without restriction, including without limitation the
17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
18 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is
19 // furnished to do so, subject to the following conditions:
|
20 karl 1.119 //
|
21 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
22 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
24 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
27 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30 //==============================================================================
31 //
|
32 mike 1.112 // Author: Mike Brasher (mbrasher@austin.rr.com)
|
33 mike 1.27 //
|
34 david.dillard 1.116 // Modified By:
|
35 mike 1.112 // Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
36 // Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
|
37 david.dillard 1.116 // David Dillard, Symantec Corp. (david_dillard@symantec.com)
|
38 mike 1.112 // Mike Brasher (mike-brasher@austin.rr.com)
|
39 mike 1.27 //
40 //%/////////////////////////////////////////////////////////////////////////////
41
|
42 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
|
43 mike 1.113 #include <cstring>
|
44 kumpf 1.48 #include "InternalException.h"
|
45 david 1.69 #include "CommonUTF.h"
|
46 mike 1.112 #include "MessageLoader.h"
47 #include "StringRep.h"
|
48 david 1.69
49 #ifdef PEGASUS_HAS_ICU
|
50 chuck 1.99 #include <unicode/ustring.h>
51 #include <unicode/uchar.h>
|
52 david 1.69 #endif
53
|
54 mike 1.112 PEGASUS_NAMESPACE_BEGIN
|
55 mike 1.28
|
56 mike 1.112 //==============================================================================
57 //
58 // Compile-time macros (undefined by default).
59 //
60 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
61 //
62 //==============================================================================
|
63 mike 1.27
|
64 mike 1.112 //==============================================================================
|
65 kumpf 1.39 //
|
66 mike 1.112 // File-scope definitions:
|
67 kumpf 1.54 //
|
68 mike 1.112 //==============================================================================
69
70 // Note: this table is much faster than the system toupper(). Please do not
71 // change.
|
72 kumpf 1.54
|
73 david.dillard 1.116 const Uint8 _toUpperTable[256] =
|
74 kumpf 1.54 {
|
75 mike 1.112 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
76 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
77 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
78 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
79 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
80 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
81 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
82 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
83 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
84 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
85 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
86 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
87 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
88 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
89 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
90 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
91 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
92 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
93 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
94 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
95 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
96 mike 1.112 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
97 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
98 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
99 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
100 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
101 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
102 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
103 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
104 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
105 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
106 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
107 };
108
109 // Note: this table is much faster than the system tulower(). Please do not
110 // change.
111
|
112 david.dillard 1.116 const Uint8 _toLowerTable[256] =
|
113 mike 1.112 {
114 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
115 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
116 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
117 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
118 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
119 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
120 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
121 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
122 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
123 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
124 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
125 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
126 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
127 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
128 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
129 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
130 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
131 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
132 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
133 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
134 mike 1.112 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
135 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
136 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
137 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
138 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
139 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
140 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
141 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
142 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
143 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
144 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
145 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
146 };
147
148 // Converts 16-bit characters to upper case. This routine is faster than the
149 // system toupper(). Please do not change.
150 inline Uint16 _toUpper(Uint16 x)
151 {
152 return (x & 0xFF00) ? x : _toUpperTable[x];
|
153 kumpf 1.54 }
154
|
155 mike 1.112 // Converts 16-bit characters to lower case. This routine is faster than the
156 // system toupper(). Please do not change.
157 inline Uint16 _toLower(Uint16 x)
|
158 kumpf 1.54 {
|
159 mike 1.112 return (x & 0xFF00) ? x : _toLowerTable[x];
160 }
161
162 // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
163 static Uint32 _roundUpToPow2(Uint32 x)
164 {
|
165 dave.sudlik 1.120 // Check for potential overflow in x
166 PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
|
167 mike 1.112
168 if (x < 8)
169 return 8;
170
171 x--;
172 x |= (x >> 1);
173 x |= (x >> 2);
174 x |= (x >> 4);
175 x |= (x >> 8);
176 x |= (x >> 16);
177 x++;
178
179 return x;
180 }
181
182 template<class P, class Q>
183 static void _copy(P* p, const Q* q, size_t n)
184 {
185 // The following employs loop unrolling for efficiency. Please do not
186 // eliminate.
187
188 mike 1.112 while (n >= 8)
189 {
190 p[0] = q[0];
191 p[1] = q[1];
192 p[2] = q[2];
193 p[3] = q[3];
194 p[4] = q[4];
195 p[5] = q[5];
196 p[6] = q[6];
197 p[7] = q[7];
198 p += 8;
199 q += 8;
200 n -= 8;
201 }
202
203 while (n >= 4)
204 {
205 p[0] = q[0];
206 p[1] = q[1];
207 p[2] = q[2];
208 p[3] = q[3];
209 mike 1.112 p += 4;
210 q += 4;
211 n -= 4;
212 }
213
214 while (n--)
215 *p++ = *q++;
216 }
217
218 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
219 {
220 // The following employs loop unrolling for efficiency. Please do not
221 // eliminate.
222
223 while (n >= 4)
224 {
225 if (s[0] == c)
226 return (Uint16*)s;
227 if (s[1] == c)
228 return (Uint16*)&s[1];
229 if (s[2] == c)
230 mike 1.112 return (Uint16*)&s[2];
231 if (s[3] == c)
232 return (Uint16*)&s[3];
|
233 kumpf 1.82
|
234 mike 1.112 n -= 4;
235 s += 4;
236 }
237
238 if (n)
239 {
240 if (*s == c)
241 return (Uint16*)s;
242 s++;
243 n--;
244 }
245
246 if (n)
247 {
248 if (*s == c)
249 return (Uint16*)s;
250 s++;
251 n--;
252 }
253
254 if (n && *s == c)
255 mike 1.112 return (Uint16*)s;
256
257 // Not found!
258 return 0;
259 }
260
261 static int _compare(const Uint16* s1, const Uint16* s2)
262 {
263 while (*s1 && *s2)
264 {
265 int r = *s1++ - *s2++;
266
267 if (r)
268 return r;
269 }
270
271 if (*s2)
272 return -1;
273 else if (*s1)
274 return 1;
275
276 mike 1.112 return 0;
277 }
278
279 static int _compareNoUTF8(const Uint16* s1, const char* s2)
280 {
281 Uint16 c1;
282 Uint16 c2;
283
284 do
285 {
286 c1 = *s1++;
287 c2 = *s2++;
288
289 if (c1 == 0)
290 return c1 - c2;
291 }
292 while (c1 == c2);
293
294 return c1 - c2;
295 }
296
297 mike 1.112 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
298 {
299 memcpy(s1, s2, n * sizeof(Uint16));
300 }
301
302 void StringThrowOutOfBounds()
303 {
304 throw IndexOutOfBoundsException();
305 }
306
307 inline void _checkNullPointer(const void* ptr)
308 {
309 if (!ptr)
310 throw NullPointer();
311 }
312
313 static void _StringThrowBadUTF8(Uint32 index)
314 {
315 MessageLoaderParms parms(
316 "Common.String.BAD_UTF8",
317 "The byte sequence starting at index $0 "
318 mike 1.112 "is not valid UTF-8 encoding.",
319 index);
320 throw Exception(parms);
321 }
322
323 static size_t _copyFromUTF8(
|
324 david.dillard 1.116 Uint16* dest,
325 const char* src,
|
326 mike 1.112 size_t n,
327 size_t& utf8_error_index)
328 {
329 Uint16* p = dest;
330 const Uint8* q = (const Uint8*)src;
331
332 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
333 // Use loop-unrolling.
334
335 while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
336 {
337 p[0] = q[0];
338 p[1] = q[1];
339 p[2] = q[2];
340 p[3] = q[3];
341 p[4] = q[4];
342 p[5] = q[5];
343 p[6] = q[6];
344 p[7] = q[7];
345 p += 8;
346 q += 8;
347 mike 1.112 n -= 8;
348 }
349
350 while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
351 {
352 p[0] = q[0];
353 p[1] = q[1];
354 p[2] = q[2];
355 p[3] = q[3];
356 p += 4;
357 q += 4;
358 n -= 4;
359 }
360
361 switch (n)
362 {
363 case 0:
364 return p - dest;
365 case 1:
366 if (q[0] < 128)
367 {
368 mike 1.112 p[0] = q[0];
369 return p + 1 - dest;
370 }
371 break;
372 case 2:
373 if (((q[0]|q[1]) & 0x80) == 0)
374 {
375 p[0] = q[0];
376 p[1] = q[1];
377 return p + 2 - dest;
378 }
379 break;
380 case 3:
381 if (((q[0]|q[1]|q[2]) & 0x80) == 0)
382 {
383 p[0] = q[0];
384 p[1] = q[1];
385 p[2] = q[2];
386 return p + 3 - dest;
387 }
388 break;
389 mike 1.112 }
390
391 // Process remaining characters.
392
393 while (n)
394 {
395 // Optimize for 7-bit ASCII case.
396
397 if (*q < 128)
398 {
399 *p++ = *q++;
400 n--;
401 }
402 else
403 {
404 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
405
406 if (c > n || !isValid_U8(q, c) ||
407 UTF8toUTF16(&q, q + c, &p, p + n) != 0)
408 {
409 utf8_error_index = q - (const Uint8*)src;
410 mike 1.112 return size_t(-1);
411 }
412
413 n -= c;
414 }
415 }
416
417 return p - dest;
418 }
419
|
420 david.dillard 1.116 // Note: dest must be at least three times src (plus an extra byte for
|
421 mike 1.112 // terminator).
422 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
423 {
424 // The following employs loop unrolling for efficiency. Please do not
425 // eliminate.
426
427 const Uint16* q = src;
428 Uint8* p = (Uint8*)dest;
429
430 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
|
431 kumpf 1.82 {
|
432 mike 1.112 p[0] = q[0];
433 p[1] = q[1];
434 p[2] = q[2];
435 p[3] = q[3];
436 p += 4;
437 q += 4;
438 n -= 4;
|
439 kumpf 1.82 }
|
440 mike 1.112
441 switch (n)
442 {
443 case 0:
444 return p - (Uint8*)dest;
445 case 1:
446 if (q[0] < 128)
447 {
448 p[0] = q[0];
449 return p + 1 - (Uint8*)dest;
450 }
451 break;
452 case 2:
453 if (q[0] < 128 && q[1] < 128)
454 {
455 p[0] = q[0];
456 p[1] = q[1];
457 return p + 2 - (Uint8*)dest;
458 }
459 break;
460 case 3:
461 mike 1.112 if (q[0] < 128 && q[1] < 128 && q[2] < 128)
462 {
463 p[0] = q[0];
464 p[1] = q[1];
465 p[2] = q[2];
466 return p + 3 - (Uint8*)dest;
467 }
468 break;
469 }
470
471 // If this line was reached, there must be characters greater than 128.
472
473 UTF16toUTF8(&q, q + n, &p, p + 3 * n);
474
475 return p - (Uint8*)dest;
|
476 kumpf 1.54 }
477
|
478 mike 1.112 static inline size_t _convert(
479 Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
|
480 kumpf 1.54 {
|
481 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8
482 _copy(p, q, n);
483 return n;
484 #else
485 return _copyFromUTF8(p, q, n, utf8_error_index);
486 #endif
|
487 kumpf 1.54 }
488
|
489 mike 1.112 //==============================================================================
490 //
491 // class CString
492 //
493 //==============================================================================
494
495 CString::CString(const CString& cstr) : _rep(0)
|
496 kumpf 1.54 {
|
497 mike 1.112 if (cstr._rep)
|
498 kumpf 1.82 {
|
499 mike 1.112 size_t n = strlen(cstr._rep) + 1;
500 _rep = (char*)operator new(n);
501 memcpy(_rep, cstr._rep, n);
|
502 kumpf 1.82 }
|
503 kumpf 1.54 }
504
|
505 kumpf 1.56 CString& CString::operator=(const CString& cstr)
506 {
|
507 kumpf 1.82 if (&cstr != this)
|
508 kumpf 1.81 {
|
509 kumpf 1.82 if (_rep)
510 {
|
511 mike 1.112 operator delete(_rep);
|
512 kumpf 1.82 _rep = 0;
513 }
|
514 mike 1.112
|
515 kumpf 1.82 if (cstr._rep)
516 {
|
517 mike 1.112 size_t n = strlen(cstr._rep) + 1;
518 _rep = (char*)operator new(n);
519 memcpy(_rep, cstr._rep, n);
|
520 kumpf 1.82 }
|
521 kumpf 1.81 }
|
522 mike 1.112
|
523 kumpf 1.56 return *this;
524 }
525
|
526 mike 1.112 //==============================================================================
|
527 kumpf 1.54 //
|
528 mike 1.112 // class StringRep
|
529 kumpf 1.39 //
|
530 mike 1.112 //==============================================================================
|
531 kumpf 1.39
|
532 mike 1.112 StringRep StringRep::_emptyRep;
|
533 mike 1.27
|
534 mike 1.112 inline StringRep* StringRep::alloc(size_t cap)
|
535 mike 1.27 {
|
536 dave.sudlik 1.120 // Check for potential overflow in cap
537 PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
|
538 mike 1.27
|
539 mike 1.112 StringRep* rep = (StringRep*)::operator new(
540 sizeof(StringRep) + cap * sizeof(Uint16));
541 rep->cap = cap;
542 new(&rep->refs) AtomicInt(1);
543
544 return rep;
|
545 mike 1.27 }
546
|
547 mike 1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
|
548 chuck 1.102 {
|
549 mike 1.114 if (cap > rep->cap || rep->refs.get() != 1)
|
550 chuck 1.102 {
|
551 mike 1.112 size_t n = _roundUpToPow2(cap);
552 StringRep* newRep = StringRep::alloc(n);
553 newRep->size = rep->size;
554 _copy(newRep->data, rep->data, rep->size + 1);
555 StringRep::unref(rep);
556 rep = newRep;
557 }
558 }
|
559 david.dillard 1.105
|
560 mike 1.112 StringRep* StringRep::create(const Uint16* data, size_t size)
561 {
562 StringRep* rep = StringRep::alloc(size);
563 rep->size = size;
564 _copy(rep->data, data, size);
565 rep->data[size] = '\0';
566 return rep;
567 }
|
568 chuck 1.102
|
569 mike 1.112 StringRep* StringRep::copyOnWrite(StringRep* rep)
570 {
571 // Return a new copy of rep. Release rep.
|
572 chuck 1.102
|
573 mike 1.112 StringRep* newRep = StringRep::alloc(rep->size);
574 newRep->size = rep->size;
575 _copy(newRep->data, rep->data, rep->size);
576 newRep->data[newRep->size] = '\0';
577 StringRep::unref(rep);
578 return newRep;
|
579 chuck 1.102 }
580
|
581 mike 1.112 StringRep* StringRep::create(const char* data, size_t size)
|
582 kumpf 1.43 {
|
583 mike 1.112 StringRep* rep = StringRep::alloc(size);
584 size_t utf8_error_index;
585 rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
586
587 if (rep->size == size_t(-1))
588 {
589 StringRep::free(rep);
590 _StringThrowBadUTF8(utf8_error_index);
591 }
|
592 kumpf 1.43
|
593 mike 1.112 rep->data[rep->size] = '\0';
|
594 kumpf 1.43
|
595 mike 1.112 return rep;
|
596 mike 1.27 }
597
|
598 mike 1.112 Uint32 StringRep::length(const Uint16* str)
|
599 mike 1.27 {
|
600 mike 1.112 // Note: We could unroll this but it is rarely called.
601
602 const Uint16* end = (Uint16*)str;
603
604 while (*end++)
605 ;
606
607 return end - str - 1;
|
608 kumpf 1.39 }
|
609 tony 1.66
|
610 mike 1.112 //==============================================================================
611 //
612 // class String
613 //
614 //==============================================================================
615
616 const String String::EMPTY;
|
617 mike 1.27
|
618 kumpf 1.39 String::String(const String& str, Uint32 n)
619 {
|
620 mike 1.112 _checkBounds(n, str._rep->size);
621 _rep = StringRep::create(str._rep->data, n);
|
622 kumpf 1.39 }
623
624 String::String(const Char16* str)
625 {
|
626 mike 1.112 _checkNullPointer(str);
627 _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
|
628 mike 1.27 }
629
|
630 kumpf 1.39 String::String(const Char16* str, Uint32 n)
631 {
|
632 mike 1.112 _checkNullPointer(str);
633 _rep = StringRep::create((Uint16*)str, n);
|
634 kumpf 1.39 }
635
636 String::String(const char* str)
|
637 mike 1.27 {
|
638 mike 1.112 _checkNullPointer(str);
|
639 david.dillard 1.105
|
640 mike 1.112 // Set this just in case create() throws an exception.
641 _rep = &StringRep::_emptyRep;
642 _rep = StringRep::create(str, strlen(str));
|
643 mike 1.27 }
644
|
645 kumpf 1.39 String::String(const char* str, Uint32 n)
|
646 mike 1.27 {
|
647 mike 1.112 _checkNullPointer(str);
|
648 david.dillard 1.105
|
649 mike 1.112 // Set this just in case create() throws an exception.
650 _rep = &StringRep::_emptyRep;
651 _rep = StringRep::create(str, n);
|
652 kumpf 1.39 }
|
653 mike 1.27
|
654 mike 1.112 String::String(const String& s1, const String& s2)
|
655 kumpf 1.39 {
|
656 mike 1.112 size_t n1 = s1._rep->size;
657 size_t n2 = s2._rep->size;
658 size_t n = n1 + n2;
659 _rep = StringRep::alloc(n);
660 _copy(_rep->data, s1._rep->data, n1);
661 _copy(_rep->data + n1, s2._rep->data, n2);
662 _rep->size = n;
663 _rep->data[n] = '\0';
|
664 mike 1.27 }
665
|
666 mike 1.112 String::String(const String& s1, const char* s2)
|
667 mike 1.27 {
|
668 mike 1.112 _checkNullPointer(s2);
669 size_t n1 = s1._rep->size;
670 size_t n2 = strlen(s2);
671 _rep = StringRep::alloc(n1 + n2);
672 _copy(_rep->data, s1._rep->data, n1);
673 size_t utf8_error_index;
674 size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
675
676 if (tmp == size_t(-1))
|
677 kumpf 1.82 {
|
678 mike 1.112 StringRep::free(_rep);
679 _rep = &StringRep::_emptyRep;
680 _StringThrowBadUTF8(utf8_error_index);
|
681 kumpf 1.82 }
|
682 mike 1.112
683 _rep->size = n1 + tmp;
684 _rep->data[_rep->size] = '\0';
|
685 mike 1.27 }
686
|
687 mike 1.112 String::String(const char* s1, const String& s2)
|
688 mike 1.27 {
|
689 mike 1.112 _checkNullPointer(s1);
690 size_t n1 = strlen(s1);
691 size_t n2 = s2._rep->size;
692 _rep = StringRep::alloc(n1 + n2);
693 size_t utf8_error_index;
694 size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
695
696 if (tmp == size_t(-1))
697 {
698 StringRep::free(_rep);
699 _rep = &StringRep::_emptyRep;
700 _StringThrowBadUTF8(utf8_error_index);
701 }
702
703 _rep->size = n2 + tmp;
704 _copy(_rep->data + n1, s2._rep->data, n2);
705 _rep->data[_rep->size] = '\0';
|
706 mike 1.27 }
707
|
708 mike 1.112 String& String::assign(const String& str)
|
709 mike 1.27 {
|
710 mike 1.112 if (_rep != str._rep)
|
711 david.dillard 1.105 {
|
712 mike 1.112 StringRep::unref(_rep);
713 StringRep::ref(_rep = str._rep);
|
714 david.dillard 1.105 }
715
|
716 mike 1.27 return *this;
717 }
718
719 String& String::assign(const Char16* str, Uint32 n)
720 {
|
721 mike 1.112 _checkNullPointer(str);
722
|
723 mike 1.114 if (n > _rep->cap || _rep->refs.get() != 1)
|
724 david.dillard 1.105 {
|
725 mike 1.112 StringRep::unref(_rep);
726 _rep = StringRep::alloc(n);
|
727 david.dillard 1.105 }
728
|
729 mike 1.112 _rep->size = n;
730 _copy(_rep->data, (Uint16*)str, n);
731 _rep->data[n] = '\0';
732
|
733 mike 1.27 return *this;
734 }
735
|
736 mike 1.112 String& String::assign(const char* str, Uint32 n)
|
737 chuck 1.102 {
|
738 mike 1.112 _checkNullPointer(str);
739
|
740 mike 1.114 if (n > _rep->cap || _rep->refs.get() != 1)
|
741 david.dillard 1.105 {
|
742 mike 1.112 StringRep::unref(_rep);
743 _rep = StringRep::alloc(n);
|
744 david.dillard 1.105 }
745
|
746 mike 1.112 size_t utf8_error_index;
747 _rep->size = _convert(_rep->data, str, n, utf8_error_index);
|
748 chuck 1.102
|
749 mike 1.112 if (_rep->size == size_t(-1))
|
750 david.dillard 1.105 {
|
751 mike 1.112 StringRep::free(_rep);
752 _rep = &StringRep::_emptyRep;
753 _StringThrowBadUTF8(utf8_error_index);
|
754 david.dillard 1.105 }
|
755 mike 1.112
756 _rep->data[_rep->size] = 0;
|
757 david.dillard 1.105
|
758 mike 1.27 return *this;
759 }
760
|
761 kumpf 1.39 void String::clear()
762 {
|
763 mike 1.112 if (_rep->size)
764 {
|
765 mike 1.114 if (_rep->refs.get() == 1)
|
766 mike 1.112 {
767 _rep->size = 0;
768 _rep->data[0] = '\0';
769 }
770 else
771 {
772 StringRep::unref(_rep);
773 _rep = &StringRep::_emptyRep;
774 }
775 }
|
776 kumpf 1.39 }
777
|
778 mike 1.112 void String::reserveCapacity(Uint32 cap)
|
779 kumpf 1.39 {
|
780 mike 1.112 _reserve(_rep, cap);
|
781 kumpf 1.39 }
782
|
783 mike 1.112 CString String::getCString() const
784 {
|
785 david.dillard 1.116 // A UTF8 string can have three times as many characters as its UTF16
786 // counterpart, so we allocate extra memory for the worst case. In the
|
787 mike 1.112 // best case, we may need only one third of the memory allocated. But
|
788 david.dillard 1.116 // downsizing the string afterwards is expensive and unecessary since
789 // CString objects are usually short-lived (disappearing after only a few
|
790 mike 1.112 // instructions). CString objects are typically created on the stack as
791 // means to obtain a char* pointer.
792
793 #ifdef PEGASUS_STRING_NO_UTF8
794 char* str = (char*)operator new(_rep->size + 1);
795 _copy(str, _rep->data, _rep->size);
796 str[_rep->size] = '\0';
797 return CString(str);
|
798 gs.keenan 1.110 #else
|
799 mike 1.112 Uint32 n = 3 * _rep->size;
800 char* str = (char*)operator new(n + 1);
801 size_t size = _copyToUTF8(str, _rep->data, _rep->size);
802 str[size] = '\0';
803 return CString(str);
|
804 gs.keenan 1.110 #endif
|
805 kumpf 1.39 }
806
|
807 mike 1.112 String& String::append(const Char16* str, Uint32 n)
|
808 kumpf 1.39 {
|
809 mike 1.112 _checkNullPointer(str);
810
811 size_t oldSize = _rep->size;
812 size_t newSize = oldSize + n;
813 _reserve(_rep, newSize);
814 _copy(_rep->data + oldSize, (Uint16*)str, n);
815 _rep->size = newSize;
816 _rep->data[newSize] = '\0';
817
818 return *this;
|
819 kumpf 1.39 }
820
|
821 mike 1.112 String& String::append(const String& str)
|
822 mike 1.27 {
|
823 w.otsuka 1.121 return append((Char16*)(&(str._rep->data[0])), str._rep->size);
|
824 mike 1.27 }
825
|
826 mike 1.112 String& String::append(const char* str, Uint32 size)
|
827 mike 1.27 {
|
828 mike 1.112 _checkNullPointer(str);
829
830 size_t oldSize = _rep->size;
831 size_t cap = oldSize + size;
832
833 _reserve(_rep, cap);
834 size_t utf8_error_index;
835 size_t tmp = _convert(
836 (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
837
838 if (tmp == size_t(-1))
839 {
840 StringRep::free(_rep);
841 _rep = &StringRep::_emptyRep;
842 _StringThrowBadUTF8(utf8_error_index);
843 }
|
844 mike 1.27
|
845 mike 1.112 _rep->size += tmp;
846 _rep->data[_rep->size] = '\0';
|
847 mike 1.27
|
848 kumpf 1.39 return *this;
849 }
850
|
851 mike 1.112 void String::remove(Uint32 index, Uint32 n)
|
852 mike 1.27 {
|
853 mike 1.112 if (n == PEG_NOT_FOUND)
854 n = _rep->size - index;
855
856 _checkBounds(index + n, _rep->size);
857
|
858 mike 1.114 if (_rep->refs.get() != 1)
|
859 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
|
860 mike 1.27
|
861 jim.wunderlich 1.115 PEGASUS_ASSERT(index + n <= _rep->size);
|
862 mike 1.27
|
863 mike 1.112 size_t rem = _rep->size - (index + n);
864 Uint16* data = _rep->data;
|
865 mike 1.27
|
866 mike 1.112 if (rem)
867 memmove(data + index, data + index + n, rem * sizeof(Uint16));
|
868 mike 1.27
|
869 mike 1.112 _rep->size -= n;
870 data[_rep->size] = '\0';
|
871 mike 1.27 }
872
|
873 mike 1.112 String String::subString(Uint32 index, Uint32 n) const
|
874 mike 1.27 {
|
875 mike 1.112 // Note: this implementation is very permissive but used for
876 // backwards compatibility.
877
878 if (index < _rep->size)
|
879 mike 1.27 {
|
880 mike 1.112 if (n == PEG_NOT_FOUND || n > _rep->size - index)
881 n = _rep->size - index;
|
882 mike 1.27
|
883 w.otsuka 1.121 return String((Char16*)(_rep->data + index), n);
|
884 mike 1.27 }
|
885 david.dillard 1.105
886 return String();
|
887 mike 1.27 }
888
889 Uint32 String::find(Char16 c) const
890 {
|
891 mike 1.112 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
|
892 mike 1.27
|
893 mike 1.112 if (p)
|
894 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
895 mike 1.27
896 return PEG_NOT_FOUND;
897 }
898
|
899 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
|
900 mike 1.30 {
|
901 mike 1.112 _checkBounds(index, _rep->size);
902
903 if (index >= _rep->size)
904 return PEG_NOT_FOUND;
905
906 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
|
907 mike 1.30
|
908 mike 1.112 if (p)
|
909 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
910 mike 1.30
911 return PEG_NOT_FOUND;
912 }
913
|
914 mike 1.112 Uint32 StringFindAux(
915 const StringRep* _rep, const Char16* s, Uint32 n)
|
916 mike 1.27 {
|
917 mike 1.112 _checkNullPointer(s);
|
918 mike 1.27
|
919 mike 1.112 const Uint16* data = _rep->data;
920 size_t rem = _rep->size;
921
922 while (n <= rem)
|
923 mike 1.30 {
|
924 mike 1.112 Uint16* p = (Uint16*)_find(data, rem, s[0]);
925
926 if (!p)
927 break;
|
928 mike 1.30
|
929 mike 1.112 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
|
930 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
931 david.dillard 1.105
|
932 mike 1.112 p++;
933 rem -= p - data;
934 data = p;
|
935 mike 1.27 }
|
936 mike 1.112
|
937 mike 1.27 return PEG_NOT_FOUND;
938 }
939
|
940 mike 1.112 Uint32 String::find(const char* s) const
941 {
942 _checkNullPointer(s);
943
944 // Note: could optimize away creation of temporary, but this is rarely
945 // called.
946 return find(String(s));
947 }
948
|
949 mike 1.27 Uint32 String::reverseFind(Char16 c) const
950 {
|
951 mike 1.112 Uint16 x = c;
952 Uint16* p = _rep->data;
953 Uint16* q = _rep->data + _rep->size;
|
954 mike 1.27
|
955 mike 1.112 while (q != p)
|
956 mike 1.27 {
|
957 mike 1.112 if (*--q == x)
|
958 david.dillard 1.116 return static_cast<Uint32>(q - p);
|
959 mike 1.27 }
960
961 return PEG_NOT_FOUND;
962 }
963
964 void String::toLower()
965 {
|
966 david 1.69 #ifdef PEGASUS_HAS_ICU
|
967 mike 1.112
|
968 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
969 david 1.90 {
|
970 mike 1.114 if (_rep->refs.get() != 1)
|
971 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
972
|
973 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
974 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this
975 // prevents optimizations where the us-ascii is converted before
|
976 mike 1.112 // calling ICU.
|
977 yi.zhou 1.108 // The string may shrink or expand after the convert.
978
|
979 mike 1.112 //// First calculate size of resulting string. u_strToLower() returns
980 //// only the size when zero is passed as the destination size argument.
981
|
982 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
983
|
984 mike 1.112 int32_t newSize = u_strToLower(
985 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
|
986 david.dillard 1.116
|
987 mike 1.112 err = U_ZERO_ERROR;
988
989 //// Reserve enough space for the result.
990
991 if ((Uint32)newSize > _rep->cap)
992 _reserve(_rep, newSize);
993
994 //// Perform the conversion (overlapping buffers are allowed).
|
995 chuck 1.99
|
996 mike 1.112 u_strToLower((UChar*)_rep->data, newSize,
997 (UChar*)_rep->data, _rep->size, NULL, &err);
|
998 yi.zhou 1.108
|
999 mike 1.112 _rep->size = newSize;
1000 return;
|
1001 david 1.90 }
|
1002 mike 1.112
1003 #endif /* PEGASUS_HAS_ICU */
1004
|
1005 mike 1.114 if (_rep->refs.get() != 1)
|
1006 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
1007
1008 Uint16* p = _rep->data;
1009 size_t n = _rep->size;
1010
1011 for (; n--; p++)
|
1012 david 1.90 {
|
1013 mike 1.112 if (!(*p & 0xFF00))
1014 *p = _toLower(*p);
|
1015 mike 1.27 }
|
1016 kumpf 1.39 }
1017
|
1018 chuck 1.99 void String::toUpper()
|
1019 david 1.90 {
1020 #ifdef PEGASUS_HAS_ICU
|
1021 mike 1.112
|
1022 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
1023 chuck 1.99 {
|
1024 mike 1.114 if (_rep->refs.get() != 1)
|
1025 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
1026
|
1027 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
1028 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this
1029 // prevents optimizations where the us-ascii is converted before
|
1030 mike 1.112 // calling ICU.
|
1031 yi.zhou 1.108 // The string may shrink or expand after the convert.
1032
|
1033 mike 1.112 //// First calculate size of resulting string. u_strToUpper() returns
1034 //// only the size when zero is passed as the destination size argument.
1035
|
1036 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
1037
|
1038 mike 1.112 int32_t newSize = u_strToUpper(
1039 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1040
1041 err = U_ZERO_ERROR;
1042
1043 //// Reserve enough space for the result.
1044
1045 if ((Uint32)newSize > _rep->cap)
1046 _reserve(_rep, newSize);
1047
1048 //// Perform the conversion (overlapping buffers are allowed).
1049
1050 u_strToUpper((UChar*)_rep->data, newSize,
1051 (UChar*)_rep->data, _rep->size, NULL, &err);
|
1052 chuck 1.99
|
1053 mike 1.112 _rep->size = newSize;
|
1054 yi.zhou 1.108
|
1055 mike 1.112 return;
|
1056 david 1.91 }
|
1057 mike 1.112
1058 #endif /* PEGASUS_HAS_ICU */
1059
|
1060 mike 1.114 if (_rep->refs.get() != 1)
|
1061 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
1062
1063 Uint16* p = _rep->data;
1064 size_t n = _rep->size;
1065
1066 for (; n--; p++)
1067 *p = _toUpper(*p);
|
1068 david 1.90 }
1069
|
1070 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
|
1071 kumpf 1.39 {
|
1072 kumpf 1.118 const Uint16* p1 = s1._rep->data;
1073 const Uint16* p2 = s2._rep->data;
|
1074 mike 1.27
|
1075 kumpf 1.118 while (n--)
1076 {
1077 int r = *p1++ - *p2++;
1078 if (r)
1079 {
1080 return r;
1081 }
1082 else if (!p1[-1])
1083 {
1084 // We must have encountered a null terminator in both s1 and s2
1085 return 0;
1086 }
1087 }
1088 return 0;
|
1089 mike 1.27 }
1090
|
1091 kumpf 1.43 int String::compare(const String& s1, const String& s2)
|
1092 mike 1.30 {
|
1093 mike 1.112 return _compare(s1._rep->data, s2._rep->data);
1094 }
|
1095 kumpf 1.43
|
1096 mike 1.112 int String::compare(const String& s1, const char* s2)
1097 {
1098 _checkNullPointer(s2);
|
1099 mike 1.30
|
1100 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8
1101 return _compareNoUTF8(s1._rep->data, s2);
1102 #else
1103 // ATTN: optimize this!
1104 return String::compare(s1, String(s2));
1105 #endif
|
1106 mike 1.30 }
1107
|
1108 mike 1.112 int String::compareNoCase(const String& str1, const String& str2)
|
1109 kumpf 1.40 {
|
1110 david 1.69 #ifdef PEGASUS_HAS_ICU
|
1111 mike 1.112
|
1112 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1113 {
|
1114 mike 1.112 return u_strcasecmp(
1115 str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
|
1116 yi.zhou 1.108 }
|
1117 kumpf 1.40
|
1118 mike 1.112 #endif /* PEGASUS_HAS_ICU */
1119
1120 const Uint16* s1 = str1._rep->data;
1121 const Uint16* s2 = str2._rep->data;
1122
1123 while (*s1 && *s2)
|
1124 kumpf 1.40 {
|
1125 mike 1.112 int r = _toLower(*s1++) - _toLower(*s2++);
|
1126 kumpf 1.40
|
1127 david.dillard 1.105 if (r)
1128 return r;
|
1129 kumpf 1.40 }
1130
|
1131 mike 1.112 if (*s2)
|
1132 david.dillard 1.105 return -1;
|
1133 mike 1.112 else if (*s1)
|
1134 david.dillard 1.105 return 1;
|
1135 kumpf 1.40
1136 return 0;
1137 }
1138
|
1139 mike 1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
|
1140 mike 1.27 {
|
1141 mike 1.112 #ifdef PEGASUS_HAS_ICU
1142
1143 return String::compareNoCase(s1, s2) == 0;
1144
1145 #else /* PEGASUS_HAS_ICU */
|
1146 mike 1.27
|
1147 mike 1.112 // The following employs loop unrolling for efficiency. Please do not
1148 // eliminate.
|
1149 kumpf 1.39
|
1150 mike 1.112 Uint16* p = (Uint16*)s1.getChar16Data();
1151 Uint16* q = (Uint16*)s2.getChar16Data();
1152 Uint32 n = s2.size();
1153
1154 while (n >= 8)
1155 {
1156 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1157 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1158 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1159 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1160 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1161 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1162 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1163 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1164 {
1165 return false;
1166 }
|
1167 kumpf 1.39
|
1168 mike 1.112 n -= 8;
1169 p += 8;
1170 q += 8;
1171 }
|
1172 mike 1.27
|
1173 mike 1.112 while (n >= 4)
|
1174 kumpf 1.39 {
|
1175 mike 1.112 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1176 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1177 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1178 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
|
1179 david.dillard 1.105 {
|
1180 mike 1.112 return false;
|
1181 david.dillard 1.105 }
|
1182 mike 1.112
1183 n -= 4;
1184 p += 4;
1185 q += 4;
1186 }
1187
1188 while (n--)
1189 {
1190 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
|
1191 david.dillard 1.105 return false;
|
1192 mike 1.112
1193 p++;
1194 q++;
|
1195 kumpf 1.39 }
|
1196 mike 1.28
|
1197 kumpf 1.39 return true;
|
1198 mike 1.112
1199 #endif /* PEGASUS_HAS_ICU */
|
1200 david 1.69 }
1201
|
1202 mike 1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
|
1203 david 1.69 {
|
1204 mike 1.112 _checkNullPointer(s2);
|
1205 david 1.69
|
1206 mike 1.112 #if defined(PEGASUS_HAS_ICU)
|
1207 david 1.69
|
1208 mike 1.112 return String::equalNoCase(s1, String(s2));
|
1209 david 1.69
|
1210 mike 1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
|
1211 david 1.69
|
1212 mike 1.112 const Uint16* p1 = (Uint16*)s1._rep->data;
1213 const char* p2 = s2;
1214 size_t n = s1._rep->size;
|
1215 david.dillard 1.105
|
1216 mike 1.112 while (n--)
1217 {
1218 if (!*p2)
1219 return false;
|
1220 david 1.71
|
1221 mike 1.112 if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1222 return false;
1223 }
|
1224 kumpf 1.42
|
1225 mike 1.112 if (*p2)
1226 return false;
|
1227 david.dillard 1.116
|
1228 mike 1.112 return true;
|
1229 karl 1.36
|
1230 mike 1.112 #else /* PEGASUS_HAS_ICU */
|
1231 david.dillard 1.105
|
1232 mike 1.112 // ATTN: optimize this!
1233 return String::equalNoCase(s1, String(s2));
|
1234 david.dillard 1.105
|
1235 mike 1.112 #endif /* PEGASUS_HAS_ICU */
1236 }
|
1237 chuck 1.78
|
1238 mike 1.112 Boolean String::equal(const String& s1, const String& s2)
|
1239 karl 1.36 {
|
1240 david.dillard 1.116 return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,
|
1241 mike 1.112 s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
|
1242 karl 1.36 }
1243
|
1244 mike 1.112 Boolean String::equal(const String& s1, const char* s2)
1245 {
1246 #ifdef PEGASUS_STRING_NO_UTF8
|
1247 kumpf 1.35
|
1248 mike 1.112 _checkNullPointer(s2);
|
1249 kumpf 1.39
|
1250 mike 1.112 const Uint16* p = (Uint16*)s1._rep->data;
1251 const char* q = s2;
|
1252 kumpf 1.39
|
1253 mike 1.112 while (*p && *q)
1254 {
1255 if (*p++ != Uint16(*q++))
1256 return false;
1257 }
|
1258 kumpf 1.39
|
1259 mike 1.112 return !(*p || *q);
|
1260 kumpf 1.39
|
1261 mike 1.112 #else /* PEGASUS_STRING_NO_UTF8 */
|
1262 kumpf 1.39
|
1263 mike 1.112 return String::equal(s1, String(s2));
|
1264 kumpf 1.39
|
1265 mike 1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
|
1266 kumpf 1.39 }
1267
|
1268 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
|
1269 kumpf 1.39 {
|
1270 mike 1.112 #if defined(PEGASUS_OS_OS400)
|
1271 david 1.72
|
1272 david 1.93 CString cstr = str.getCString();
|
1273 david 1.69 const char* utf8str = cstr;
|
1274 mike 1.112 os << utf8str;
1275 return os;
|
1276 david.dillard 1.116 #else
|
1277 david 1.69
|
1278 mike 1.112 #if defined(PEGASUS_HAS_ICU)
|
1279 david 1.69
|
1280 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1281 {
|
1282 david.dillard 1.105 char *buf = NULL;
1283 const int size = str.size() * 6;
|
1284 mike 1.112 UnicodeString UniStr(
1285 (const UChar *)str.getChar16Data(), (int32_t)str.size());
|
1286 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf);
1287 buf = new char[bufsize+1];
1288 UniStr.extract(0,bufsize,buf);
1289 os << buf;
1290 os.flush();
1291 delete [] buf;
|
1292 david.dillard 1.116 return os;
|
1293 yi.zhou 1.108 }
|
1294 mike 1.112
|
1295 david.dillard 1.116 #endif // PEGASUS_HAS_ICU
|
1296 mike 1.112
1297 for (Uint32 i = 0, n = str.size(); i < n; i++)
|
1298 yi.zhou 1.108 {
|
1299 mike 1.112 Uint16 code = str[i];
|
1300 david.dillard 1.105
|
1301 mike 1.112 if (code > 0 && !(code & 0xFF00))
1302 os << char(code);
1303 else
1304 {
1305 // Print in hex format:
1306 char buffer[8];
1307 sprintf(buffer, "\\x%04X", code);
1308 os << buffer;
|
1309 david.dillard 1.105 }
|
1310 yi.zhou 1.108 }
|
1311 kumpf 1.39
1312 return os;
|
1313 mike 1.112 #endif // PEGASUS_OS_OS400
|
1314 kumpf 1.39 }
1315
|
1316 mike 1.112 void StringAppendCharAux(StringRep*& _rep)
|
1317 kumpf 1.39 {
|
1318 mike 1.112 StringRep* tmp;
1319
1320 if (_rep->cap)
1321 {
1322 tmp = StringRep::alloc(2 * _rep->cap);
1323 tmp->size = _rep->size;
1324 _copy(tmp->data, _rep->data, _rep->size);
1325 }
1326 else
1327 {
1328 tmp = StringRep::alloc(8);
1329 tmp->size = 0;
1330 }
1331
1332 StringRep::unref(_rep);
1333 _rep = tmp;
|
1334 kumpf 1.39 }
1335
|
1336 mike 1.112 PEGASUS_NAMESPACE_END
1337
1338 /*
1339 ================================================================================
1340
1341 String optimizations:
1342
1343 1. Added mechanism allowing certain functions to be inlined only when
1344 used by internal Pegasus modules. External modules (i.e., providers)
1345 link to a non-inline version, which allows for binary compatibility.
1346
1347 2. Implemented copy-on-write with atomic increment/decrement. This
1348 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1349 for the 'ni1000' benchmark.
1350
1351 3. Employed loop unrolling in several places. For example, see:
1352
1353 static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1354
1355 4. Used the "empty-rep" optimization (described in whitepaper from the
1356 GCC Developers Summit). This reduced default construction to a simple
1357 mike 1.112 pointer assignment.
1358
1359 inline String::String() : _rep(&_emptyRep) { }
1360
1361 5. Implemented Uint16 versions of toupper() and tolower() using tables.
1362 For example:
1363
1364 static const char _upper[] =
1365 {
1366 0,1,2,...255
1367 };
1368
1369 inline Uint16 _toUpper(Uint16 x)
1370 {
1371 return (x & 0xFF00) ? x : _upper[x];
1372 }
1373
|
1374 david.dillard 1.116 This outperforms the system implementation by avoiding an anding
|
1375 mike 1.112 operation.
1376
|
1377 david.dillard 1.116 6. Implemented char* version of the following member functions to
1378 eliminate unecessary creation of anonymous string objects
|
1379 mike 1.112 (temporaries).
1380
1381 String(const String& s1, const char* s2);
1382 String(const char* s1, const String& s2);
1383 String& String::operator=(const char* str);
1384 Uint32 String::find(const char* s) const;
1385 bool String::equal(const String& s1, const char* s2);
1386 static int String::compare(const String& s1, const char* s2);
1387 String& String::append(const char* str);
1388 String& String::append(const char* str, Uint32 size);
1389 static bool String::equalNoCase(const String& s1, const char* s2);
1390 String& operator=(const char* str)
1391 String& String::assign(const char* str)
1392 String& String::append(const char* str)
1393 Boolean operator==(const String& s1, const char* s2)
1394 Boolean operator==(const char* s1, const String& s2)
1395 Boolean operator!=(const String& s1, const char* s2)
1396 Boolean operator!=(const char* s1, const String& s2)
1397 Boolean operator<(const String& s1, const char* s2)
1398 Boolean operator<(const char* s1, const String& s2)
1399 Boolean operator>(const String& s1, const char* s2)
1400 mike 1.112 Boolean operator>(const char* s1, const String& s2)
1401 Boolean operator<=(const String& s1, const char* s2)
1402 Boolean operator<=(const char* s1, const String& s2)
1403 Boolean operator>=(const String& s1, const char* s2)
1404 Boolean operator>=(const char* s1, const String& s2)
1405 String operator+(const String& s1, const char* s2)
1406 String operator+(const char* s1, const String& s2)
1407
|
1408 david.dillard 1.116 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next
|
1409 mike 1.112 power of two (algorithm from the book "Hacker's Delight").
1410
1411 static Uint32 _roundUpToPow2(Uint32 x)
1412 {
1413 if (x < 8)
1414 return 8;
1415
1416 x--;
1417 x |= (x >> 1);
1418 x |= (x >> 2);
1419 x |= (x >> 4);
1420 x |= (x >> 8);
1421 x |= (x >> 16);
1422 x++;
1423
1424 return x;
1425 }
1426
1427 8. Implemented "concatenating constructors" to eliminate temporaries
|
1428 david.dillard 1.116 created by operator+(). This scheme employs the "return-value
|
1429 mike 1.112 optimization" described by Stan Lippman.
1430
1431 inline String operator+(const String& s1, const String& s2)
1432 {
1433 return String(s1, s2, 0);
1434 }
1435
1436 9. Experimented to find the optimial initial size for a short string.
1437 Eight seems to offer the best tradeoff between space and time.
1438
1439 10. Inlined all members of the Char16 class.
1440
1441 11. Used Uint16 internally in the String class. This showed no improvememnt
1442 since Char16 was already fully inlined and was essentially reduced to
1443 Uint16 in any case.
1444
1445 12. Implemented conditional logic (#if) allowing error checking logic to
|
1446 david.dillard 1.116 be excluded to better performance. Examples include bounds checking
|
1447 mike 1.112 and null-pointer checking.
1448
1449 13. Used memcpy() and memcmp() where possible. These are implemented using
1450 the rep family of intructions under Intel and are much faster.
1451
|
1452 david.dillard 1.116 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
|
1453 mike 1.112 copy routine overhead.
1454
1455 15. Added ASCII7 form of the constructor and assign().
1456
1457 String s("hello world", String::ASCII7);
1458
1459 s.assignASCII7("hello world");
1460
1461 This avoids slower UTF8 processing when not needed.
1462
1463 ================================================================================
1464
1465 TO-DO:
1466
1467 (+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES
1468
1469 (+) [DONE] Submit BUG-2754 (Windows buffer limit).
1470
1471 (+) [DONE] Eliminate char versions of find() and append().
1472
1473 (+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h
1474 mike 1.112
1475 (+) [DONE] Change _next_pow_2() to _roundUpToPow2().
1476
1477 (+) [DONE] Change '99' to '2' in StringRep constructor (comment as well).
1478
1479 (+) [DONE] Comment StringRep allocation layout.
1480
1481 (+) [DONE] Conceal private inline functions.
1482
1483 (+) [DONE] Shorten inclusion of StringInline.h in String.h.
1484
1485 (+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get
1486 rid of altogether.
1487
1488 (+) [DONE] useCamelNotationOnAllFunctionNames.
1489
1490 (+) [DONE] Check for overlow condition in StringRep::alloc().
1491
1492 (+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab").
1493
1494 (+) [DONE] Fix throw-related memory leak.
1495 mike 1.112
1496 (+) [DONE] Look at PEP223 for coding security guidelines.
1497
1498 (+) [DONE] Use old AtomicInt for now (new AtomicInt part of bug #4250).
|
1499 kumpf 1.39
|
1500 mike 1.112 (+) [DONE] Removed appendASCII() and the ASCII form of the constructor.
|
1501 kumpf 1.39
|
1502 mike 1.112 (+) DOC++ String.h - will open new bug?
|
1503 kumpf 1.39
|
1504 mike 1.112 (+) Added PEGASUS_DISABLE_INTERNAL_INLINES macro (to permit suppression
1505 on certain platforms).
|
1506 kumpf 1.39
|
1507 mike 1.112 ================================================================================
1508 */
|