1 karl 1.98 //%2005////////////////////////////////////////////////////////////////////////
|
2 mike 1.27 //
|
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 mike 1.27 //
12 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
14 // deal in the Software without restriction, including without limitation the
15 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
16 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is
17 // furnished to do so, subject to the following conditions:
|
18 david.dillard 1.105 //
|
19 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
20 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
21 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
22 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
23 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
25 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28 //==============================================================================
29 //
|
30 mike 1.112 // Author: Mike Brasher (mbrasher@austin.rr.com)
|
31 mike 1.27 //
|
32 david.dillard 1.116 // Modified By:
|
33 mike 1.112 // Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
34 // Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
|
35 david.dillard 1.116 // David Dillard, Symantec Corp. (david_dillard@symantec.com)
|
36 mike 1.112 // Mike Brasher (mike-brasher@austin.rr.com)
|
37 mike 1.27 //
38 //%/////////////////////////////////////////////////////////////////////////////
39
|
40 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
|
41 mike 1.113 #include <cstring>
|
42 kumpf 1.48 #include "InternalException.h"
|
43 david 1.69 #include "CommonUTF.h"
|
44 mike 1.112 #include "MessageLoader.h"
45 #include "StringRep.h"
|
46 david 1.69
47 #ifdef PEGASUS_HAS_ICU
|
48 chuck 1.99 #include <unicode/ustring.h>
49 #include <unicode/uchar.h>
|
50 david 1.69 #endif
51
|
52 mike 1.112 PEGASUS_NAMESPACE_BEGIN
|
53 mike 1.28
|
54 mike 1.112 //==============================================================================
55 //
56 // Compile-time macros (undefined by default).
57 //
58 // PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
|
59 david.dillard 1.116 //
|
60 mike 1.112 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
61 //
62 //==============================================================================
|
63 mike 1.27
|
64 mike 1.112 //==============================================================================
|
65 kumpf 1.39 //
|
66 mike 1.112 // File-scope definitions:
|
67 kumpf 1.54 //
|
68 mike 1.112 //==============================================================================
69
70 // Note: this table is much faster than the system toupper(). Please do not
71 // change.
|
72 kumpf 1.54
|
73 david.dillard 1.116 const Uint8 _toUpperTable[256] =
|
74 kumpf 1.54 {
|
75 mike 1.112 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
76 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
77 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
78 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
79 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
80 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
81 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
82 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
83 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
84 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
85 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
86 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
87 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
88 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
89 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
90 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
91 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
92 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
93 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
94 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
95 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
96 mike 1.112 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
97 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
98 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
99 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
100 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
101 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
102 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
103 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
104 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
105 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
106 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
107 };
108
109 // Note: this table is much faster than the system tulower(). Please do not
110 // change.
111
|
112 david.dillard 1.116 const Uint8 _toLowerTable[256] =
|
113 mike 1.112 {
114 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
115 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
116 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
117 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
118 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
119 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
120 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
121 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
122 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
123 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
124 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
125 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
126 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
127 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
128 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
129 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
130 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
131 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
132 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
133 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
134 mike 1.112 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
135 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
136 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
137 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
138 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
139 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
140 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
141 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
142 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
143 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
144 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
145 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
146 };
147
148 // Converts 16-bit characters to upper case. This routine is faster than the
149 // system toupper(). Please do not change.
150 inline Uint16 _toUpper(Uint16 x)
151 {
152 return (x & 0xFF00) ? x : _toUpperTable[x];
|
153 kumpf 1.54 }
154
|
155 mike 1.112 // Converts 16-bit characters to lower case. This routine is faster than the
156 // system toupper(). Please do not change.
157 inline Uint16 _toLower(Uint16 x)
|
158 kumpf 1.54 {
|
159 mike 1.112 return (x & 0xFF00) ? x : _toLowerTable[x];
160 }
161
162 // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
163 static Uint32 _roundUpToPow2(Uint32 x)
164 {
165 #ifndef PEGASUS_STRING_NO_THROW
166
167 if (x > 0x0FFFFFFF)
168 throw PEGASUS_STD(bad_alloc)();
169
170 #endif
171
172 if (x < 8)
173 return 8;
174
175 x--;
176 x |= (x >> 1);
177 x |= (x >> 2);
178 x |= (x >> 4);
179 x |= (x >> 8);
180 mike 1.112 x |= (x >> 16);
181 x++;
182
183 return x;
184 }
185
186 template<class P, class Q>
187 static void _copy(P* p, const Q* q, size_t n)
188 {
189 // The following employs loop unrolling for efficiency. Please do not
190 // eliminate.
191
192 while (n >= 8)
193 {
194 p[0] = q[0];
195 p[1] = q[1];
196 p[2] = q[2];
197 p[3] = q[3];
198 p[4] = q[4];
199 p[5] = q[5];
200 p[6] = q[6];
201 mike 1.112 p[7] = q[7];
202 p += 8;
203 q += 8;
204 n -= 8;
205 }
206
207 while (n >= 4)
208 {
209 p[0] = q[0];
210 p[1] = q[1];
211 p[2] = q[2];
212 p[3] = q[3];
213 p += 4;
214 q += 4;
215 n -= 4;
216 }
217
218 while (n--)
219 *p++ = *q++;
220 }
221
222 mike 1.112 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
223 {
224 // The following employs loop unrolling for efficiency. Please do not
225 // eliminate.
226
227 while (n >= 4)
228 {
229 if (s[0] == c)
230 return (Uint16*)s;
231 if (s[1] == c)
232 return (Uint16*)&s[1];
233 if (s[2] == c)
234 return (Uint16*)&s[2];
235 if (s[3] == c)
236 return (Uint16*)&s[3];
|
237 kumpf 1.82
|
238 mike 1.112 n -= 4;
239 s += 4;
240 }
241
242 if (n)
243 {
244 if (*s == c)
245 return (Uint16*)s;
246 s++;
247 n--;
248 }
249
250 if (n)
251 {
252 if (*s == c)
253 return (Uint16*)s;
254 s++;
255 n--;
256 }
257
258 if (n && *s == c)
259 mike 1.112 return (Uint16*)s;
260
261 // Not found!
262 return 0;
263 }
264
265 static int _compare(const Uint16* s1, const Uint16* s2)
266 {
267 while (*s1 && *s2)
268 {
269 int r = *s1++ - *s2++;
270
271 if (r)
272 return r;
273 }
274
275 if (*s2)
276 return -1;
277 else if (*s1)
278 return 1;
279
280 mike 1.112 return 0;
281 }
282
283 static int _compareNoUTF8(const Uint16* s1, const char* s2)
284 {
285 Uint16 c1;
286 Uint16 c2;
287
288 do
289 {
290 c1 = *s1++;
291 c2 = *s2++;
292
293 if (c1 == 0)
294 return c1 - c2;
295 }
296 while (c1 == c2);
297
298 return c1 - c2;
299 }
300
301 mike 1.112 static int _compare(const Uint16* s1, const Uint16* s2, size_t n)
302 {
303 // This should only be called when s1 and s2 have the same length.
304
305 while (n-- && (*s1++ - *s2++) == 0)
306 ;
307
308 return s1[-1] - s2[-1];
309 }
310
311 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
312 {
313 memcpy(s1, s2, n * sizeof(Uint16));
314 }
315
316 void StringThrowOutOfBounds()
317 {
318 throw IndexOutOfBoundsException();
319 }
320
321 inline void _checkNullPointer(const void* ptr)
322 mike 1.112 {
323 #ifdef PEGASUS_STRING_NO_THROW
324
325 if (!ptr)
326 throw NullPointer();
327
328 #endif
329 }
330
331 static void _StringThrowBadUTF8(Uint32 index)
332 {
333 MessageLoaderParms parms(
334 "Common.String.BAD_UTF8",
335 "The byte sequence starting at index $0 "
336 "is not valid UTF-8 encoding.",
337 index);
338 throw Exception(parms);
339 }
340
341 static size_t _copyFromUTF8(
|
342 david.dillard 1.116 Uint16* dest,
343 const char* src,
|
344 mike 1.112 size_t n,
345 size_t& utf8_error_index)
346 {
347 Uint16* p = dest;
348 const Uint8* q = (const Uint8*)src;
349
350 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
351 // Use loop-unrolling.
352
353 while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
354 {
355 p[0] = q[0];
356 p[1] = q[1];
357 p[2] = q[2];
358 p[3] = q[3];
359 p[4] = q[4];
360 p[5] = q[5];
361 p[6] = q[6];
362 p[7] = q[7];
363 p += 8;
364 q += 8;
365 mike 1.112 n -= 8;
366 }
367
368 while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
369 {
370 p[0] = q[0];
371 p[1] = q[1];
372 p[2] = q[2];
373 p[3] = q[3];
374 p += 4;
375 q += 4;
376 n -= 4;
377 }
378
379 switch (n)
380 {
381 case 0:
382 return p - dest;
383 case 1:
384 if (q[0] < 128)
385 {
386 mike 1.112 p[0] = q[0];
387 return p + 1 - dest;
388 }
389 break;
390 case 2:
391 if (((q[0]|q[1]) & 0x80) == 0)
392 {
393 p[0] = q[0];
394 p[1] = q[1];
395 return p + 2 - dest;
396 }
397 break;
398 case 3:
399 if (((q[0]|q[1]|q[2]) & 0x80) == 0)
400 {
401 p[0] = q[0];
402 p[1] = q[1];
403 p[2] = q[2];
404 return p + 3 - dest;
405 }
406 break;
407 mike 1.112 }
408
409 // Process remaining characters.
410
411 while (n)
412 {
413 // Optimize for 7-bit ASCII case.
414
415 if (*q < 128)
416 {
417 *p++ = *q++;
418 n--;
419 }
420 else
421 {
422 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
423
424 if (c > n || !isValid_U8(q, c) ||
425 UTF8toUTF16(&q, q + c, &p, p + n) != 0)
426 {
427 utf8_error_index = q - (const Uint8*)src;
428 mike 1.112 return size_t(-1);
429 }
430
431 n -= c;
432 }
433 }
434
435 return p - dest;
436 }
437
|
438 david.dillard 1.116 // Note: dest must be at least three times src (plus an extra byte for
|
439 mike 1.112 // terminator).
440 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
441 {
442 // The following employs loop unrolling for efficiency. Please do not
443 // eliminate.
444
445 const Uint16* q = src;
446 Uint8* p = (Uint8*)dest;
447
448 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
|
449 kumpf 1.82 {
|
450 mike 1.112 p[0] = q[0];
451 p[1] = q[1];
452 p[2] = q[2];
453 p[3] = q[3];
454 p += 4;
455 q += 4;
456 n -= 4;
|
457 kumpf 1.82 }
|
458 mike 1.112
459 switch (n)
460 {
461 case 0:
462 return p - (Uint8*)dest;
463 case 1:
464 if (q[0] < 128)
465 {
466 p[0] = q[0];
467 return p + 1 - (Uint8*)dest;
468 }
469 break;
470 case 2:
471 if (q[0] < 128 && q[1] < 128)
472 {
473 p[0] = q[0];
474 p[1] = q[1];
475 return p + 2 - (Uint8*)dest;
476 }
477 break;
478 case 3:
479 mike 1.112 if (q[0] < 128 && q[1] < 128 && q[2] < 128)
480 {
481 p[0] = q[0];
482 p[1] = q[1];
483 p[2] = q[2];
484 return p + 3 - (Uint8*)dest;
485 }
486 break;
487 }
488
489 // If this line was reached, there must be characters greater than 128.
490
491 UTF16toUTF8(&q, q + n, &p, p + 3 * n);
492
493 return p - (Uint8*)dest;
|
494 kumpf 1.54 }
495
|
496 mike 1.112 static inline size_t _convert(
497 Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
|
498 kumpf 1.54 {
|
499 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8
500 _copy(p, q, n);
501 return n;
502 #else
503 return _copyFromUTF8(p, q, n, utf8_error_index);
504 #endif
|
505 kumpf 1.54 }
506
|
507 mike 1.112 //==============================================================================
508 //
509 // class CString
510 //
511 //==============================================================================
512
513 CString::CString(const CString& cstr) : _rep(0)
|
514 kumpf 1.54 {
|
515 mike 1.112 if (cstr._rep)
|
516 kumpf 1.82 {
|
517 mike 1.112 size_t n = strlen(cstr._rep) + 1;
518 _rep = (char*)operator new(n);
519 memcpy(_rep, cstr._rep, n);
|
520 kumpf 1.82 }
|
521 kumpf 1.54 }
522
|
523 kumpf 1.56 CString& CString::operator=(const CString& cstr)
524 {
|
525 kumpf 1.82 if (&cstr != this)
|
526 kumpf 1.81 {
|
527 kumpf 1.82 if (_rep)
528 {
|
529 mike 1.112 operator delete(_rep);
|
530 kumpf 1.82 _rep = 0;
531 }
|
532 mike 1.112
|
533 kumpf 1.82 if (cstr._rep)
534 {
|
535 mike 1.112 size_t n = strlen(cstr._rep) + 1;
536 _rep = (char*)operator new(n);
537 memcpy(_rep, cstr._rep, n);
|
538 kumpf 1.82 }
|
539 kumpf 1.81 }
|
540 mike 1.112
|
541 kumpf 1.56 return *this;
542 }
543
|
544 mike 1.112 //==============================================================================
|
545 kumpf 1.54 //
|
546 mike 1.112 // class StringRep
|
547 kumpf 1.39 //
|
548 mike 1.112 //==============================================================================
|
549 kumpf 1.39
|
550 mike 1.112 StringRep StringRep::_emptyRep;
|
551 mike 1.27
|
552 mike 1.112 inline StringRep* StringRep::alloc(size_t cap)
|
553 mike 1.27 {
|
554 mike 1.112 #ifndef PEGASUS_STRING_NO_THROW
|
555 mike 1.27
|
556 mike 1.112 // Any string bigger than this is seriously suspect.
557 if (cap > 0x0FFFFFFF)
558 throw PEGASUS_STD(bad_alloc)();
|
559 mike 1.27
|
560 mike 1.112 #endif
|
561 mike 1.27
|
562 mike 1.112 StringRep* rep = (StringRep*)::operator new(
563 sizeof(StringRep) + cap * sizeof(Uint16));
564 rep->cap = cap;
565 new(&rep->refs) AtomicInt(1);
566
567 return rep;
|
568 mike 1.27 }
569
|
570 mike 1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
|
571 chuck 1.102 {
|
572 mike 1.114 if (cap > rep->cap || rep->refs.get() != 1)
|
573 chuck 1.102 {
|
574 mike 1.112 size_t n = _roundUpToPow2(cap);
575 StringRep* newRep = StringRep::alloc(n);
576 newRep->size = rep->size;
577 _copy(newRep->data, rep->data, rep->size + 1);
578 StringRep::unref(rep);
579 rep = newRep;
580 }
581 }
|
582 david.dillard 1.105
|
583 mike 1.112 StringRep* StringRep::create(const Uint16* data, size_t size)
584 {
585 StringRep* rep = StringRep::alloc(size);
586 rep->size = size;
587 _copy(rep->data, data, size);
588 rep->data[size] = '\0';
589 return rep;
590 }
|
591 chuck 1.102
|
592 mike 1.112 StringRep* StringRep::copyOnWrite(StringRep* rep)
593 {
594 // Return a new copy of rep. Release rep.
|
595 chuck 1.102
|
596 mike 1.112 StringRep* newRep = StringRep::alloc(rep->size);
597 newRep->size = rep->size;
598 _copy(newRep->data, rep->data, rep->size);
599 newRep->data[newRep->size] = '\0';
600 StringRep::unref(rep);
601 return newRep;
|
602 chuck 1.102 }
603
|
604 mike 1.112 StringRep* StringRep::create(const char* data, size_t size)
|
605 kumpf 1.43 {
|
606 mike 1.112 StringRep* rep = StringRep::alloc(size);
607 size_t utf8_error_index;
608 rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
609
610 #ifndef PEGASUS_STRING_NO_THROW
611 if (rep->size == size_t(-1))
612 {
613 StringRep::free(rep);
614 _StringThrowBadUTF8(utf8_error_index);
615 }
616 #endif
|
617 kumpf 1.43
|
618 mike 1.112 rep->data[rep->size] = '\0';
|
619 kumpf 1.43
|
620 mike 1.112 return rep;
|
621 mike 1.27 }
622
|
623 mike 1.112 Uint32 StringRep::length(const Uint16* str)
|
624 mike 1.27 {
|
625 mike 1.112 // Note: We could unroll this but it is rarely called.
626
627 const Uint16* end = (Uint16*)str;
628
629 while (*end++)
630 ;
631
632 return end - str - 1;
|
633 kumpf 1.39 }
|
634 tony 1.66
|
635 mike 1.112 //==============================================================================
636 //
637 // class String
638 //
639 //==============================================================================
640
641 const String String::EMPTY;
|
642 mike 1.27
|
643 kumpf 1.39 String::String(const String& str, Uint32 n)
644 {
|
645 mike 1.112 _checkBounds(n, str._rep->size);
646 _rep = StringRep::create(str._rep->data, n);
|
647 kumpf 1.39 }
648
649 String::String(const Char16* str)
650 {
|
651 mike 1.112 _checkNullPointer(str);
652 _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
|
653 mike 1.27 }
654
|
655 kumpf 1.39 String::String(const Char16* str, Uint32 n)
656 {
|
657 mike 1.112 _checkNullPointer(str);
658 _rep = StringRep::create((Uint16*)str, n);
|
659 kumpf 1.39 }
660
661 String::String(const char* str)
|
662 mike 1.27 {
|
663 mike 1.112 _checkNullPointer(str);
|
664 david.dillard 1.105
|
665 mike 1.112 // Set this just in case create() throws an exception.
666 _rep = &StringRep::_emptyRep;
667 _rep = StringRep::create(str, strlen(str));
|
668 mike 1.27 }
669
|
670 kumpf 1.39 String::String(const char* str, Uint32 n)
|
671 mike 1.27 {
|
672 mike 1.112 _checkNullPointer(str);
|
673 david.dillard 1.105
|
674 mike 1.112 // Set this just in case create() throws an exception.
675 _rep = &StringRep::_emptyRep;
676 _rep = StringRep::create(str, n);
|
677 kumpf 1.39 }
|
678 mike 1.27
|
679 mike 1.112 String::String(const String& s1, const String& s2)
|
680 kumpf 1.39 {
|
681 mike 1.112 size_t n1 = s1._rep->size;
682 size_t n2 = s2._rep->size;
683 size_t n = n1 + n2;
684 _rep = StringRep::alloc(n);
685 _copy(_rep->data, s1._rep->data, n1);
686 _copy(_rep->data + n1, s2._rep->data, n2);
687 _rep->size = n;
688 _rep->data[n] = '\0';
|
689 mike 1.27 }
690
|
691 mike 1.112 String::String(const String& s1, const char* s2)
|
692 mike 1.27 {
|
693 mike 1.112 _checkNullPointer(s2);
694 size_t n1 = s1._rep->size;
695 size_t n2 = strlen(s2);
696 _rep = StringRep::alloc(n1 + n2);
697 _copy(_rep->data, s1._rep->data, n1);
698 size_t utf8_error_index;
699 size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
700
701 #ifndef PEGASUS_STRING_NO_THROW
702 if (tmp == size_t(-1))
|
703 kumpf 1.82 {
|
704 mike 1.112 StringRep::free(_rep);
705 _rep = &StringRep::_emptyRep;
706 _StringThrowBadUTF8(utf8_error_index);
|
707 kumpf 1.82 }
|
708 mike 1.112 #endif
709
710 _rep->size = n1 + tmp;
711 _rep->data[_rep->size] = '\0';
|
712 mike 1.27 }
713
|
714 mike 1.112 String::String(const char* s1, const String& s2)
|
715 mike 1.27 {
|
716 mike 1.112 _checkNullPointer(s1);
717 size_t n1 = strlen(s1);
718 size_t n2 = s2._rep->size;
719 _rep = StringRep::alloc(n1 + n2);
720 size_t utf8_error_index;
721 size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
722
723 #ifndef PEGASUS_STRING_NO_THROW
724 if (tmp == size_t(-1))
725 {
726 StringRep::free(_rep);
727 _rep = &StringRep::_emptyRep;
728 _StringThrowBadUTF8(utf8_error_index);
729 }
730 #endif
731
732 _rep->size = n2 + tmp;
733 _copy(_rep->data + n1, s2._rep->data, n2);
734 _rep->data[_rep->size] = '\0';
|
735 mike 1.27 }
736
|
737 mike 1.112 String& String::assign(const String& str)
|
738 mike 1.27 {
|
739 mike 1.112 if (_rep != str._rep)
|
740 david.dillard 1.105 {
|
741 mike 1.112 StringRep::unref(_rep);
742 StringRep::ref(_rep = str._rep);
|
743 david.dillard 1.105 }
744
|
745 mike 1.27 return *this;
746 }
747
748 String& String::assign(const Char16* str, Uint32 n)
749 {
|
750 mike 1.112 _checkNullPointer(str);
751
|
752 mike 1.114 if (n > _rep->cap || _rep->refs.get() != 1)
|
753 david.dillard 1.105 {
|
754 mike 1.112 StringRep::unref(_rep);
755 _rep = StringRep::alloc(n);
|
756 david.dillard 1.105 }
757
|
758 mike 1.112 _rep->size = n;
759 _copy(_rep->data, (Uint16*)str, n);
760 _rep->data[n] = '\0';
761
|
762 mike 1.27 return *this;
763 }
764
|
765 mike 1.112 String& String::assign(const char* str, Uint32 n)
|
766 chuck 1.102 {
|
767 mike 1.112 _checkNullPointer(str);
768
|
769 mike 1.114 if (n > _rep->cap || _rep->refs.get() != 1)
|
770 david.dillard 1.105 {
|
771 mike 1.112 StringRep::unref(_rep);
772 _rep = StringRep::alloc(n);
|
773 david.dillard 1.105 }
774
|
775 mike 1.112 size_t utf8_error_index;
776 _rep->size = _convert(_rep->data, str, n, utf8_error_index);
|
777 chuck 1.102
|
778 mike 1.112 #ifndef PEGASUS_STRING_NO_THROW
779 if (_rep->size == size_t(-1))
|
780 david.dillard 1.105 {
|
781 mike 1.112 StringRep::free(_rep);
782 _rep = &StringRep::_emptyRep;
783 _StringThrowBadUTF8(utf8_error_index);
|
784 david.dillard 1.105 }
|
785 mike 1.112 #endif
786
787 _rep->data[_rep->size] = 0;
|
788 david.dillard 1.105
|
789 mike 1.27 return *this;
790 }
791
|
792 kumpf 1.39 void String::clear()
793 {
|
794 mike 1.112 if (_rep->size)
795 {
|
796 mike 1.114 if (_rep->refs.get() == 1)
|
797 mike 1.112 {
798 _rep->size = 0;
799 _rep->data[0] = '\0';
800 }
801 else
802 {
803 StringRep::unref(_rep);
804 _rep = &StringRep::_emptyRep;
805 }
806 }
|
807 kumpf 1.39 }
808
|
809 mike 1.112 void String::reserveCapacity(Uint32 cap)
|
810 kumpf 1.39 {
|
811 mike 1.112 _reserve(_rep, cap);
|
812 kumpf 1.39 }
813
|
814 mike 1.112 CString String::getCString() const
815 {
|
816 david.dillard 1.116 // A UTF8 string can have three times as many characters as its UTF16
817 // counterpart, so we allocate extra memory for the worst case. In the
|
818 mike 1.112 // best case, we may need only one third of the memory allocated. But
|
819 david.dillard 1.116 // downsizing the string afterwards is expensive and unecessary since
820 // CString objects are usually short-lived (disappearing after only a few
|
821 mike 1.112 // instructions). CString objects are typically created on the stack as
822 // means to obtain a char* pointer.
823
824 #ifdef PEGASUS_STRING_NO_UTF8
825 char* str = (char*)operator new(_rep->size + 1);
826 _copy(str, _rep->data, _rep->size);
827 str[_rep->size] = '\0';
828 return CString(str);
|
829 gs.keenan 1.110 #else
|
830 mike 1.112 Uint32 n = 3 * _rep->size;
831 char* str = (char*)operator new(n + 1);
832 size_t size = _copyToUTF8(str, _rep->data, _rep->size);
833 str[size] = '\0';
834 return CString(str);
|
835 gs.keenan 1.110 #endif
|
836 kumpf 1.39 }
837
|
838 mike 1.112 String& String::append(const Char16* str, Uint32 n)
|
839 kumpf 1.39 {
|
840 mike 1.112 _checkNullPointer(str);
841
842 size_t oldSize = _rep->size;
843 size_t newSize = oldSize + n;
844 _reserve(_rep, newSize);
845 _copy(_rep->data + oldSize, (Uint16*)str, n);
846 _rep->size = newSize;
847 _rep->data[newSize] = '\0';
848
849 return *this;
|
850 kumpf 1.39 }
851
|
852 mike 1.112 String& String::append(const String& str)
|
853 mike 1.27 {
|
854 mike 1.112 return append((Char16*)str._rep->data, str._rep->size);
|
855 mike 1.27 }
856
|
857 mike 1.112 String& String::append(const char* str, Uint32 size)
|
858 mike 1.27 {
|
859 mike 1.112 _checkNullPointer(str);
860
861 size_t oldSize = _rep->size;
862 size_t cap = oldSize + size;
863
864 _reserve(_rep, cap);
865 size_t utf8_error_index;
866 size_t tmp = _convert(
867 (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
868
869 #ifndef PEGASUS_STRING_NO_THROW
870 if (tmp == size_t(-1))
871 {
872 StringRep::free(_rep);
873 _rep = &StringRep::_emptyRep;
874 _StringThrowBadUTF8(utf8_error_index);
875 }
876 #endif
|
877 mike 1.27
|
878 mike 1.112 _rep->size += tmp;
879 _rep->data[_rep->size] = '\0';
|
880 mike 1.27
|
881 kumpf 1.39 return *this;
882 }
883
|
884 mike 1.112 void String::remove(Uint32 index, Uint32 n)
|
885 mike 1.27 {
|
886 mike 1.112 if (n == PEG_NOT_FOUND)
887 n = _rep->size - index;
888
889 _checkBounds(index + n, _rep->size);
890
|
891 mike 1.114 if (_rep->refs.get() != 1)
|
892 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
|
893 mike 1.27
|
894 jim.wunderlich 1.115 PEGASUS_ASSERT(index + n <= _rep->size);
|
895 mike 1.27
|
896 mike 1.112 size_t rem = _rep->size - (index + n);
897 Uint16* data = _rep->data;
|
898 mike 1.27
|
899 mike 1.112 if (rem)
900 memmove(data + index, data + index + n, rem * sizeof(Uint16));
|
901 mike 1.27
|
902 mike 1.112 _rep->size -= n;
903 data[_rep->size] = '\0';
|
904 mike 1.27 }
905
|
906 mike 1.112 String String::subString(Uint32 index, Uint32 n) const
|
907 mike 1.27 {
|
908 mike 1.112 // Note: this implementation is very permissive but used for
909 // backwards compatibility.
910
911 if (index < _rep->size)
|
912 mike 1.27 {
|
913 mike 1.112 if (n == PEG_NOT_FOUND || n > _rep->size - index)
914 n = _rep->size - index;
|
915 mike 1.27
|
916 mike 1.112 return String((Char16*)_rep->data + index, n);
|
917 mike 1.27 }
|
918 david.dillard 1.105
919 return String();
|
920 mike 1.27 }
921
922 Uint32 String::find(Char16 c) const
923 {
|
924 mike 1.112 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
|
925 mike 1.27
|
926 mike 1.112 if (p)
|
927 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
928 mike 1.27
929 return PEG_NOT_FOUND;
930 }
931
|
932 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
|
933 mike 1.30 {
|
934 mike 1.112 _checkBounds(index, _rep->size);
935
936 if (index >= _rep->size)
937 return PEG_NOT_FOUND;
938
939 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
|
940 mike 1.30
|
941 mike 1.112 if (p)
|
942 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
943 mike 1.30
944 return PEG_NOT_FOUND;
945 }
946
|
947 mike 1.112 Uint32 StringFindAux(
948 const StringRep* _rep, const Char16* s, Uint32 n)
|
949 mike 1.27 {
|
950 mike 1.112 _checkNullPointer(s);
|
951 mike 1.27
|
952 mike 1.112 const Uint16* data = _rep->data;
953 size_t rem = _rep->size;
954
955 while (n <= rem)
|
956 mike 1.30 {
|
957 mike 1.112 Uint16* p = (Uint16*)_find(data, rem, s[0]);
958
959 if (!p)
960 break;
|
961 mike 1.30
|
962 mike 1.112 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
|
963 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
964 david.dillard 1.105
|
965 mike 1.112 p++;
966 rem -= p - data;
967 data = p;
|
968 mike 1.27 }
|
969 mike 1.112
|
970 mike 1.27 return PEG_NOT_FOUND;
971 }
972
|
973 mike 1.112 Uint32 String::find(const char* s) const
974 {
975 _checkNullPointer(s);
976
977 // Note: could optimize away creation of temporary, but this is rarely
978 // called.
979 return find(String(s));
980 }
981
|
982 mike 1.27 Uint32 String::reverseFind(Char16 c) const
983 {
|
984 mike 1.112 Uint16 x = c;
985 Uint16* p = _rep->data;
986 Uint16* q = _rep->data + _rep->size;
|
987 mike 1.27
|
988 mike 1.112 while (q != p)
|
989 mike 1.27 {
|
990 mike 1.112 if (*--q == x)
|
991 david.dillard 1.116 return static_cast<Uint32>(q - p);
|
992 mike 1.27 }
993
994 return PEG_NOT_FOUND;
995 }
996
997 void String::toLower()
998 {
|
999 david 1.69 #ifdef PEGASUS_HAS_ICU
|
1000 mike 1.112
|
1001 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
1002 david 1.90 {
|
1003 mike 1.114 if (_rep->refs.get() != 1)
|
1004 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
1005
|
1006 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
1007 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this
1008 // prevents optimizations where the us-ascii is converted before
|
1009 mike 1.112 // calling ICU.
|
1010 yi.zhou 1.108 // The string may shrink or expand after the convert.
1011
|
1012 mike 1.112 //// First calculate size of resulting string. u_strToLower() returns
1013 //// only the size when zero is passed as the destination size argument.
1014
|
1015 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
1016
|
1017 mike 1.112 int32_t newSize = u_strToLower(
1018 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
|
1019 david.dillard 1.116
|
1020 mike 1.112 err = U_ZERO_ERROR;
1021
1022 //// Reserve enough space for the result.
1023
1024 if ((Uint32)newSize > _rep->cap)
1025 _reserve(_rep, newSize);
1026
1027 //// Perform the conversion (overlapping buffers are allowed).
|
1028 chuck 1.99
|
1029 mike 1.112 u_strToLower((UChar*)_rep->data, newSize,
1030 (UChar*)_rep->data, _rep->size, NULL, &err);
|
1031 yi.zhou 1.108
|
1032 mike 1.112 _rep->size = newSize;
1033 return;
|
1034 david 1.90 }
|
1035 mike 1.112
1036 #endif /* PEGASUS_HAS_ICU */
1037
|
1038 mike 1.114 if (_rep->refs.get() != 1)
|
1039 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
1040
1041 Uint16* p = _rep->data;
1042 size_t n = _rep->size;
1043
1044 for (; n--; p++)
|
1045 david 1.90 {
|
1046 mike 1.112 if (!(*p & 0xFF00))
1047 *p = _toLower(*p);
|
1048 mike 1.27 }
|
1049 kumpf 1.39 }
1050
|
1051 chuck 1.99 void String::toUpper()
|
1052 david 1.90 {
1053 #ifdef PEGASUS_HAS_ICU
|
1054 mike 1.112
|
1055 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
1056 chuck 1.99 {
|
1057 mike 1.114 if (_rep->refs.get() != 1)
|
1058 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
1059
|
1060 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
1061 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this
1062 // prevents optimizations where the us-ascii is converted before
|
1063 mike 1.112 // calling ICU.
|
1064 yi.zhou 1.108 // The string may shrink or expand after the convert.
1065
|
1066 mike 1.112 //// First calculate size of resulting string. u_strToUpper() returns
1067 //// only the size when zero is passed as the destination size argument.
1068
|
1069 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
1070
|
1071 mike 1.112 int32_t newSize = u_strToUpper(
1072 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1073
1074 err = U_ZERO_ERROR;
1075
1076 //// Reserve enough space for the result.
1077
1078 if ((Uint32)newSize > _rep->cap)
1079 _reserve(_rep, newSize);
1080
1081 //// Perform the conversion (overlapping buffers are allowed).
1082
1083 u_strToUpper((UChar*)_rep->data, newSize,
1084 (UChar*)_rep->data, _rep->size, NULL, &err);
|
1085 chuck 1.99
|
1086 mike 1.112 _rep->size = newSize;
|
1087 yi.zhou 1.108
|
1088 mike 1.112 return;
|
1089 david 1.91 }
|
1090 mike 1.112
1091 #endif /* PEGASUS_HAS_ICU */
1092
|
1093 mike 1.114 if (_rep->refs.get() != 1)
|
1094 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
1095
1096 Uint16* p = _rep->data;
1097 size_t n = _rep->size;
1098
1099 for (; n--; p++)
1100 *p = _toUpper(*p);
|
1101 david 1.90 }
1102
|
1103 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
|
1104 kumpf 1.39 {
|
1105 jim.wunderlich 1.115 PEGASUS_ASSERT(n <= s1._rep->size);
1106 PEGASUS_ASSERT(n <= s2._rep->size);
|
1107 mike 1.27
|
1108 mike 1.112 // Ignoring error in which n is greater than s1.size() or s2.size()
1109 return _compare(s1._rep->data, s2._rep->data, n);
|
1110 mike 1.27 }
1111
|
1112 kumpf 1.43 int String::compare(const String& s1, const String& s2)
|
1113 mike 1.30 {
|
1114 mike 1.112 return _compare(s1._rep->data, s2._rep->data);
1115 }
|
1116 kumpf 1.43
|
1117 mike 1.112 int String::compare(const String& s1, const char* s2)
1118 {
1119 _checkNullPointer(s2);
|
1120 mike 1.30
|
1121 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8
1122 return _compareNoUTF8(s1._rep->data, s2);
1123 #else
1124 // ATTN: optimize this!
1125 return String::compare(s1, String(s2));
1126 #endif
|
1127 mike 1.30 }
1128
|
1129 mike 1.112 int String::compareNoCase(const String& str1, const String& str2)
|
1130 kumpf 1.40 {
|
1131 david 1.69 #ifdef PEGASUS_HAS_ICU
|
1132 mike 1.112
|
1133 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1134 {
|
1135 mike 1.112 return u_strcasecmp(
1136 str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
|
1137 yi.zhou 1.108 }
|
1138 kumpf 1.40
|
1139 mike 1.112 #endif /* PEGASUS_HAS_ICU */
1140
1141 const Uint16* s1 = str1._rep->data;
1142 const Uint16* s2 = str2._rep->data;
1143
1144 while (*s1 && *s2)
|
1145 kumpf 1.40 {
|
1146 mike 1.112 int r = _toLower(*s1++) - _toLower(*s2++);
|
1147 kumpf 1.40
|
1148 david.dillard 1.105 if (r)
1149 return r;
|
1150 kumpf 1.40 }
1151
|
1152 mike 1.112 if (*s2)
|
1153 david.dillard 1.105 return -1;
|
1154 mike 1.112 else if (*s1)
|
1155 david.dillard 1.105 return 1;
|
1156 kumpf 1.40
1157 return 0;
1158 }
1159
|
1160 mike 1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
|
1161 mike 1.27 {
|
1162 mike 1.112 #ifdef PEGASUS_HAS_ICU
1163
1164 return String::compareNoCase(s1, s2) == 0;
1165
1166 #else /* PEGASUS_HAS_ICU */
|
1167 mike 1.27
|
1168 mike 1.112 // The following employs loop unrolling for efficiency. Please do not
1169 // eliminate.
|
1170 kumpf 1.39
|
1171 mike 1.112 Uint16* p = (Uint16*)s1.getChar16Data();
1172 Uint16* q = (Uint16*)s2.getChar16Data();
1173 Uint32 n = s2.size();
1174
1175 while (n >= 8)
1176 {
1177 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1178 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1179 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1180 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1181 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1182 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1183 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1184 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1185 {
1186 return false;
1187 }
|
1188 kumpf 1.39
|
1189 mike 1.112 n -= 8;
1190 p += 8;
1191 q += 8;
1192 }
|
1193 mike 1.27
|
1194 mike 1.112 while (n >= 4)
|
1195 kumpf 1.39 {
|
1196 mike 1.112 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1197 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1198 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1199 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
|
1200 david.dillard 1.105 {
|
1201 mike 1.112 return false;
|
1202 david.dillard 1.105 }
|
1203 mike 1.112
1204 n -= 4;
1205 p += 4;
1206 q += 4;
1207 }
1208
1209 while (n--)
1210 {
1211 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
|
1212 david.dillard 1.105 return false;
|
1213 mike 1.112
1214 p++;
1215 q++;
|
1216 kumpf 1.39 }
|
1217 mike 1.28
|
1218 kumpf 1.39 return true;
|
1219 mike 1.112
1220 #endif /* PEGASUS_HAS_ICU */
|
1221 david 1.69 }
1222
|
1223 mike 1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
|
1224 david 1.69 {
|
1225 mike 1.112 _checkNullPointer(s2);
|
1226 david 1.69
|
1227 mike 1.112 #if defined(PEGASUS_HAS_ICU)
|
1228 david 1.69
|
1229 mike 1.112 return String::equalNoCase(s1, String(s2));
|
1230 david 1.69
|
1231 mike 1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
|
1232 david 1.69
|
1233 mike 1.112 const Uint16* p1 = (Uint16*)s1._rep->data;
1234 const char* p2 = s2;
1235 size_t n = s1._rep->size;
|
1236 david.dillard 1.105
|
1237 mike 1.112 while (n--)
1238 {
1239 if (!*p2)
1240 return false;
|
1241 david 1.71
|
1242 mike 1.112 if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1243 return false;
1244 }
|
1245 kumpf 1.42
|
1246 mike 1.112 if (*p2)
1247 return false;
|
1248 david.dillard 1.116
|
1249 mike 1.112 return true;
|
1250 karl 1.36
|
1251 mike 1.112 #else /* PEGASUS_HAS_ICU */
|
1252 david.dillard 1.105
|
1253 mike 1.112 // ATTN: optimize this!
1254 return String::equalNoCase(s1, String(s2));
|
1255 david.dillard 1.105
|
1256 mike 1.112 #endif /* PEGASUS_HAS_ICU */
1257 }
|
1258 chuck 1.78
|
1259 mike 1.112 Boolean String::equal(const String& s1, const String& s2)
|
1260 karl 1.36 {
|
1261 david.dillard 1.116 return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,
|
1262 mike 1.112 s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
|
1263 karl 1.36 }
1264
|
1265 mike 1.112 Boolean String::equal(const String& s1, const char* s2)
1266 {
1267 #ifdef PEGASUS_STRING_NO_UTF8
|
1268 kumpf 1.35
|
1269 mike 1.112 _checkNullPointer(s2);
|
1270 kumpf 1.39
|
1271 mike 1.112 const Uint16* p = (Uint16*)s1._rep->data;
1272 const char* q = s2;
|
1273 kumpf 1.39
|
1274 mike 1.112 while (*p && *q)
1275 {
1276 if (*p++ != Uint16(*q++))
1277 return false;
1278 }
|
1279 kumpf 1.39
|
1280 mike 1.112 return !(*p || *q);
|
1281 kumpf 1.39
|
1282 mike 1.112 #else /* PEGASUS_STRING_NO_UTF8 */
|
1283 kumpf 1.39
|
1284 mike 1.112 return String::equal(s1, String(s2));
|
1285 kumpf 1.39
|
1286 mike 1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
|
1287 kumpf 1.39 }
1288
|
1289 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
|
1290 kumpf 1.39 {
|
1291 mike 1.112 #if defined(PEGASUS_OS_OS400)
|
1292 david 1.72
|
1293 david 1.93 CString cstr = str.getCString();
|
1294 david 1.69 const char* utf8str = cstr;
|
1295 mike 1.112 os << utf8str;
1296 return os;
|
1297 david.dillard 1.116 #else
|
1298 david 1.69
|
1299 mike 1.112 #if defined(PEGASUS_HAS_ICU)
|
1300 david 1.69
|
1301 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1302 {
|
1303 david.dillard 1.105 char *buf = NULL;
1304 const int size = str.size() * 6;
|
1305 mike 1.112 UnicodeString UniStr(
1306 (const UChar *)str.getChar16Data(), (int32_t)str.size());
|
1307 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf);
1308 buf = new char[bufsize+1];
1309 UniStr.extract(0,bufsize,buf);
1310 os << buf;
1311 os.flush();
1312 delete [] buf;
|
1313 david.dillard 1.116 return os;
|
1314 yi.zhou 1.108 }
|
1315 mike 1.112
|
1316 david.dillard 1.116 #endif // PEGASUS_HAS_ICU
|
1317 mike 1.112
1318 for (Uint32 i = 0, n = str.size(); i < n; i++)
|
1319 yi.zhou 1.108 {
|
1320 mike 1.112 Uint16 code = str[i];
|
1321 david.dillard 1.105
|
1322 mike 1.112 if (code > 0 && !(code & 0xFF00))
1323 os << char(code);
1324 else
1325 {
1326 // Print in hex format:
1327 char buffer[8];
1328 sprintf(buffer, "\\x%04X", code);
1329 os << buffer;
|
1330 david.dillard 1.105 }
|
1331 yi.zhou 1.108 }
|
1332 kumpf 1.39
1333 return os;
|
1334 mike 1.112 #endif // PEGASUS_OS_OS400
|
1335 kumpf 1.39 }
1336
|
1337 mike 1.112 void StringAppendCharAux(StringRep*& _rep)
|
1338 kumpf 1.39 {
|
1339 mike 1.112 StringRep* tmp;
1340
1341 if (_rep->cap)
1342 {
1343 tmp = StringRep::alloc(2 * _rep->cap);
1344 tmp->size = _rep->size;
1345 _copy(tmp->data, _rep->data, _rep->size);
1346 }
1347 else
1348 {
1349 tmp = StringRep::alloc(8);
1350 tmp->size = 0;
1351 }
1352
1353 StringRep::unref(_rep);
1354 _rep = tmp;
|
1355 kumpf 1.39 }
1356
|
1357 mike 1.112 PEGASUS_NAMESPACE_END
1358
1359 /*
1360 ================================================================================
1361
1362 String optimizations:
1363
1364 1. Added mechanism allowing certain functions to be inlined only when
1365 used by internal Pegasus modules. External modules (i.e., providers)
1366 link to a non-inline version, which allows for binary compatibility.
1367
1368 2. Implemented copy-on-write with atomic increment/decrement. This
1369 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1370 for the 'ni1000' benchmark.
1371
1372 3. Employed loop unrolling in several places. For example, see:
1373
1374 static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1375
1376 4. Used the "empty-rep" optimization (described in whitepaper from the
1377 GCC Developers Summit). This reduced default construction to a simple
1378 mike 1.112 pointer assignment.
1379
1380 inline String::String() : _rep(&_emptyRep) { }
1381
1382 5. Implemented Uint16 versions of toupper() and tolower() using tables.
1383 For example:
1384
1385 static const char _upper[] =
1386 {
1387 0,1,2,...255
1388 };
1389
1390 inline Uint16 _toUpper(Uint16 x)
1391 {
1392 return (x & 0xFF00) ? x : _upper[x];
1393 }
1394
|
1395 david.dillard 1.116 This outperforms the system implementation by avoiding an anding
|
1396 mike 1.112 operation.
1397
|
1398 david.dillard 1.116 6. Implemented char* version of the following member functions to
1399 eliminate unecessary creation of anonymous string objects
|
1400 mike 1.112 (temporaries).
1401
1402 String(const String& s1, const char* s2);
1403 String(const char* s1, const String& s2);
1404 String& String::operator=(const char* str);
1405 Uint32 String::find(const char* s) const;
1406 bool String::equal(const String& s1, const char* s2);
1407 static int String::compare(const String& s1, const char* s2);
1408 String& String::append(const char* str);
1409 String& String::append(const char* str, Uint32 size);
1410 static bool String::equalNoCase(const String& s1, const char* s2);
1411 String& operator=(const char* str)
1412 String& String::assign(const char* str)
1413 String& String::append(const char* str)
1414 Boolean operator==(const String& s1, const char* s2)
1415 Boolean operator==(const char* s1, const String& s2)
1416 Boolean operator!=(const String& s1, const char* s2)
1417 Boolean operator!=(const char* s1, const String& s2)
1418 Boolean operator<(const String& s1, const char* s2)
1419 Boolean operator<(const char* s1, const String& s2)
1420 Boolean operator>(const String& s1, const char* s2)
1421 mike 1.112 Boolean operator>(const char* s1, const String& s2)
1422 Boolean operator<=(const String& s1, const char* s2)
1423 Boolean operator<=(const char* s1, const String& s2)
1424 Boolean operator>=(const String& s1, const char* s2)
1425 Boolean operator>=(const char* s1, const String& s2)
1426 String operator+(const String& s1, const char* s2)
1427 String operator+(const char* s1, const String& s2)
1428
|
1429 david.dillard 1.116 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next
|
1430 mike 1.112 power of two (algorithm from the book "Hacker's Delight").
1431
1432 static Uint32 _roundUpToPow2(Uint32 x)
1433 {
1434 if (x < 8)
1435 return 8;
1436
1437 x--;
1438 x |= (x >> 1);
1439 x |= (x >> 2);
1440 x |= (x >> 4);
1441 x |= (x >> 8);
1442 x |= (x >> 16);
1443 x++;
1444
1445 return x;
1446 }
1447
1448 8. Implemented "concatenating constructors" to eliminate temporaries
|
1449 david.dillard 1.116 created by operator+(). This scheme employs the "return-value
|
1450 mike 1.112 optimization" described by Stan Lippman.
1451
1452 inline String operator+(const String& s1, const String& s2)
1453 {
1454 return String(s1, s2, 0);
1455 }
1456
1457 9. Experimented to find the optimial initial size for a short string.
1458 Eight seems to offer the best tradeoff between space and time.
1459
1460 10. Inlined all members of the Char16 class.
1461
1462 11. Used Uint16 internally in the String class. This showed no improvememnt
1463 since Char16 was already fully inlined and was essentially reduced to
1464 Uint16 in any case.
1465
1466 12. Implemented conditional logic (#if) allowing error checking logic to
|
1467 david.dillard 1.116 be excluded to better performance. Examples include bounds checking
|
1468 mike 1.112 and null-pointer checking.
1469
1470 13. Used memcpy() and memcmp() where possible. These are implemented using
1471 the rep family of intructions under Intel and are much faster.
1472
|
1473 david.dillard 1.116 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
|
1474 mike 1.112 copy routine overhead.
1475
1476 15. Added ASCII7 form of the constructor and assign().
1477
1478 String s("hello world", String::ASCII7);
1479
1480 s.assignASCII7("hello world");
1481
1482 This avoids slower UTF8 processing when not needed.
1483
1484 ================================================================================
1485
1486 TO-DO:
1487
1488 (+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES
1489
1490 (+) [DONE] Submit BUG-2754 (Windows buffer limit).
1491
1492 (+) [DONE] Eliminate char versions of find() and append().
1493
1494 (+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h
1495 mike 1.112
1496 (+) [DONE] Change _next_pow_2() to _roundUpToPow2().
1497
1498 (+) [DONE] Change '99' to '2' in StringRep constructor (comment as well).
1499
1500 (+) [DONE] Comment StringRep allocation layout.
1501
1502 (+) [DONE] Conceal private inline functions.
1503
1504 (+) [DONE] Shorten inclusion of StringInline.h in String.h.
1505
1506 (+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get
1507 rid of altogether.
1508
1509 (+) [DONE] useCamelNotationOnAllFunctionNames.
1510
1511 (+) [DONE] Check for overlow condition in StringRep::alloc().
1512
1513 (+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab").
1514
1515 (+) [DONE] Fix throw-related memory leak.
1516 mike 1.112
1517 (+) [DONE] Look at PEP223 for coding security guidelines.
1518
1519 (+) [DONE] Use old AtomicInt for now (new AtomicInt part of bug #4250).
|
1520 kumpf 1.39
|
1521 mike 1.112 (+) [DONE] Removed appendASCII() and the ASCII form of the constructor.
|
1522 kumpf 1.39
|
1523 mike 1.112 (+) DOC++ String.h - will open new bug?
|
1524 kumpf 1.39
|
1525 mike 1.112 (+) Added PEGASUS_DISABLE_INTERNAL_INLINES macro (to permit suppression
1526 on certain platforms).
|
1527 kumpf 1.39
|
1528 mike 1.112 ================================================================================
1529 */
|