1 karl 1.119 //%2006////////////////////////////////////////////////////////////////////////
|
2 mike 1.27 //
|
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 karl 1.119 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
12 // EMC Corporation; Symantec Corporation; The Open Group.
|
13 mike 1.27 //
14 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
15 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
16 // deal in the Software without restriction, including without limitation the
17 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
18 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is
19 // furnished to do so, subject to the following conditions:
|
20 karl 1.119 //
|
21 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
22 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
23 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
24 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
25 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
26 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
27 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 //
30 //==============================================================================
31 //
32 //%/////////////////////////////////////////////////////////////////////////////
33
|
34 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
|
35 mike 1.113 #include <cstring>
|
36 kumpf 1.48 #include "InternalException.h"
|
37 david 1.69 #include "CommonUTF.h"
|
38 mike 1.112 #include "MessageLoader.h"
39 #include "StringRep.h"
|
40 david 1.69
41 #ifdef PEGASUS_HAS_ICU
|
42 chuck 1.99 #include <unicode/ustring.h>
43 #include <unicode/uchar.h>
|
44 david 1.69 #endif
45
|
46 mike 1.112 PEGASUS_NAMESPACE_BEGIN
|
47 mike 1.28
|
48 mike 1.112 //==============================================================================
49 //
50 // Compile-time macros (undefined by default).
51 //
52 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
53 //
54 //==============================================================================
|
55 mike 1.27
|
56 mike 1.112 //==============================================================================
|
57 kumpf 1.39 //
|
58 mike 1.112 // File-scope definitions:
|
59 kumpf 1.54 //
|
60 mike 1.112 //==============================================================================
61
62 // Note: this table is much faster than the system toupper(). Please do not
63 // change.
|
64 kumpf 1.54
|
65 david.dillard 1.116 const Uint8 _toUpperTable[256] =
|
66 kumpf 1.54 {
|
67 mike 1.112 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
68 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
69 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
70 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
71 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
72 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
73 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
74 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
75 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
76 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
77 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
78 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
79 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
80 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
81 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
82 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
83 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
84 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
85 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
86 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
87 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
88 mike 1.112 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
89 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
90 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
91 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
92 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
93 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
94 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
95 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
96 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
97 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
98 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
99 };
100
101 // Note: this table is much faster than the system tulower(). Please do not
102 // change.
103
|
104 david.dillard 1.116 const Uint8 _toLowerTable[256] =
|
105 mike 1.112 {
106 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
107 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
108 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
109 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
110 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
111 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
112 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
113 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
114 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
115 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
116 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
117 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
118 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
119 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
120 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
121 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
122 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
123 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
124 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
125 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
126 mike 1.112 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
127 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
128 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
129 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
130 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
131 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
132 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
133 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
134 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
135 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
136 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
137 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
138 };
139
140 // Converts 16-bit characters to upper case. This routine is faster than the
141 // system toupper(). Please do not change.
142 inline Uint16 _toUpper(Uint16 x)
143 {
144 return (x & 0xFF00) ? x : _toUpperTable[x];
|
145 kumpf 1.54 }
146
|
147 mike 1.112 // Converts 16-bit characters to lower case. This routine is faster than the
148 // system toupper(). Please do not change.
149 inline Uint16 _toLower(Uint16 x)
|
150 kumpf 1.54 {
|
151 mike 1.112 return (x & 0xFF00) ? x : _toLowerTable[x];
152 }
153
154 // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
155 static Uint32 _roundUpToPow2(Uint32 x)
156 {
|
157 dave.sudlik 1.120 // Check for potential overflow in x
158 PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
|
159 mike 1.112
160 if (x < 8)
161 return 8;
162
163 x--;
164 x |= (x >> 1);
165 x |= (x >> 2);
166 x |= (x >> 4);
167 x |= (x >> 8);
168 x |= (x >> 16);
169 x++;
170
171 return x;
172 }
173
174 template<class P, class Q>
175 static void _copy(P* p, const Q* q, size_t n)
176 {
177 // The following employs loop unrolling for efficiency. Please do not
178 // eliminate.
179
180 mike 1.112 while (n >= 8)
181 {
182 p[0] = q[0];
183 p[1] = q[1];
184 p[2] = q[2];
185 p[3] = q[3];
186 p[4] = q[4];
187 p[5] = q[5];
188 p[6] = q[6];
189 p[7] = q[7];
190 p += 8;
191 q += 8;
192 n -= 8;
193 }
194
195 while (n >= 4)
196 {
197 p[0] = q[0];
198 p[1] = q[1];
199 p[2] = q[2];
200 p[3] = q[3];
201 mike 1.112 p += 4;
202 q += 4;
203 n -= 4;
204 }
205
206 while (n--)
207 *p++ = *q++;
208 }
209
210 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
211 {
212 // The following employs loop unrolling for efficiency. Please do not
213 // eliminate.
214
215 while (n >= 4)
216 {
217 if (s[0] == c)
218 return (Uint16*)s;
219 if (s[1] == c)
220 return (Uint16*)&s[1];
221 if (s[2] == c)
222 mike 1.112 return (Uint16*)&s[2];
223 if (s[3] == c)
224 return (Uint16*)&s[3];
|
225 kumpf 1.82
|
226 mike 1.112 n -= 4;
227 s += 4;
228 }
229
230 if (n)
231 {
232 if (*s == c)
233 return (Uint16*)s;
234 s++;
235 n--;
236 }
237
238 if (n)
239 {
240 if (*s == c)
241 return (Uint16*)s;
242 s++;
243 n--;
244 }
245
246 if (n && *s == c)
247 mike 1.112 return (Uint16*)s;
248
249 // Not found!
250 return 0;
251 }
252
253 static int _compare(const Uint16* s1, const Uint16* s2)
254 {
255 while (*s1 && *s2)
256 {
257 int r = *s1++ - *s2++;
258
259 if (r)
260 return r;
261 }
262
263 if (*s2)
264 return -1;
265 else if (*s1)
266 return 1;
267
268 mike 1.112 return 0;
269 }
270
271 static int _compareNoUTF8(const Uint16* s1, const char* s2)
272 {
273 Uint16 c1;
274 Uint16 c2;
275
276 do
277 {
278 c1 = *s1++;
279 c2 = *s2++;
280
281 if (c1 == 0)
282 return c1 - c2;
283 }
284 while (c1 == c2);
285
286 return c1 - c2;
287 }
288
289 mike 1.112 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
290 {
291 memcpy(s1, s2, n * sizeof(Uint16));
292 }
293
294 void StringThrowOutOfBounds()
295 {
296 throw IndexOutOfBoundsException();
297 }
298
299 inline void _checkNullPointer(const void* ptr)
300 {
301 if (!ptr)
302 throw NullPointer();
303 }
304
305 static void _StringThrowBadUTF8(Uint32 index)
306 {
307 MessageLoaderParms parms(
308 "Common.String.BAD_UTF8",
309 "The byte sequence starting at index $0 "
310 mike 1.112 "is not valid UTF-8 encoding.",
311 index);
312 throw Exception(parms);
313 }
314
315 static size_t _copyFromUTF8(
|
316 david.dillard 1.116 Uint16* dest,
317 const char* src,
|
318 mike 1.112 size_t n,
319 size_t& utf8_error_index)
320 {
321 Uint16* p = dest;
322 const Uint8* q = (const Uint8*)src;
323
324 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
325 // Use loop-unrolling.
326
327 while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
328 {
329 p[0] = q[0];
330 p[1] = q[1];
331 p[2] = q[2];
332 p[3] = q[3];
333 p[4] = q[4];
334 p[5] = q[5];
335 p[6] = q[6];
336 p[7] = q[7];
337 p += 8;
338 q += 8;
339 mike 1.112 n -= 8;
340 }
341
342 while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
343 {
344 p[0] = q[0];
345 p[1] = q[1];
346 p[2] = q[2];
347 p[3] = q[3];
348 p += 4;
349 q += 4;
350 n -= 4;
351 }
352
353 switch (n)
354 {
355 case 0:
356 return p - dest;
357 case 1:
358 if (q[0] < 128)
359 {
360 mike 1.112 p[0] = q[0];
361 return p + 1 - dest;
362 }
363 break;
364 case 2:
365 if (((q[0]|q[1]) & 0x80) == 0)
366 {
367 p[0] = q[0];
368 p[1] = q[1];
369 return p + 2 - dest;
370 }
371 break;
372 case 3:
373 if (((q[0]|q[1]|q[2]) & 0x80) == 0)
374 {
375 p[0] = q[0];
376 p[1] = q[1];
377 p[2] = q[2];
378 return p + 3 - dest;
379 }
380 break;
381 mike 1.112 }
382
383 // Process remaining characters.
384
385 while (n)
386 {
387 // Optimize for 7-bit ASCII case.
388
389 if (*q < 128)
390 {
391 *p++ = *q++;
392 n--;
393 }
394 else
395 {
396 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
397
398 if (c > n || !isValid_U8(q, c) ||
399 UTF8toUTF16(&q, q + c, &p, p + n) != 0)
400 {
401 utf8_error_index = q - (const Uint8*)src;
402 mike 1.112 return size_t(-1);
403 }
404
405 n -= c;
406 }
407 }
408
409 return p - dest;
410 }
411
|
412 david.dillard 1.116 // Note: dest must be at least three times src (plus an extra byte for
|
413 mike 1.112 // terminator).
414 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
415 {
416 // The following employs loop unrolling for efficiency. Please do not
417 // eliminate.
418
419 const Uint16* q = src;
420 Uint8* p = (Uint8*)dest;
421
422 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
|
423 kumpf 1.82 {
|
424 mike 1.112 p[0] = q[0];
425 p[1] = q[1];
426 p[2] = q[2];
427 p[3] = q[3];
428 p += 4;
429 q += 4;
430 n -= 4;
|
431 kumpf 1.82 }
|
432 mike 1.112
433 switch (n)
434 {
435 case 0:
436 return p - (Uint8*)dest;
437 case 1:
438 if (q[0] < 128)
439 {
440 p[0] = q[0];
441 return p + 1 - (Uint8*)dest;
442 }
443 break;
444 case 2:
445 if (q[0] < 128 && q[1] < 128)
446 {
447 p[0] = q[0];
448 p[1] = q[1];
449 return p + 2 - (Uint8*)dest;
450 }
451 break;
452 case 3:
453 mike 1.112 if (q[0] < 128 && q[1] < 128 && q[2] < 128)
454 {
455 p[0] = q[0];
456 p[1] = q[1];
457 p[2] = q[2];
458 return p + 3 - (Uint8*)dest;
459 }
460 break;
461 }
462
463 // If this line was reached, there must be characters greater than 128.
464
465 UTF16toUTF8(&q, q + n, &p, p + 3 * n);
466
467 return p - (Uint8*)dest;
|
468 kumpf 1.54 }
469
|
470 mike 1.112 static inline size_t _convert(
471 Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
|
472 kumpf 1.54 {
|
473 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8
474 _copy(p, q, n);
475 return n;
476 #else
477 return _copyFromUTF8(p, q, n, utf8_error_index);
478 #endif
|
479 kumpf 1.54 }
480
|
481 mike 1.112 //==============================================================================
482 //
483 // class CString
484 //
485 //==============================================================================
486
487 CString::CString(const CString& cstr) : _rep(0)
|
488 kumpf 1.54 {
|
489 mike 1.112 if (cstr._rep)
|
490 kumpf 1.82 {
|
491 mike 1.112 size_t n = strlen(cstr._rep) + 1;
492 _rep = (char*)operator new(n);
493 memcpy(_rep, cstr._rep, n);
|
494 kumpf 1.82 }
|
495 kumpf 1.54 }
496
|
497 kumpf 1.56 CString& CString::operator=(const CString& cstr)
498 {
|
499 kumpf 1.82 if (&cstr != this)
|
500 kumpf 1.81 {
|
501 kumpf 1.82 if (_rep)
502 {
|
503 mike 1.112 operator delete(_rep);
|
504 kumpf 1.82 _rep = 0;
505 }
|
506 mike 1.112
|
507 kumpf 1.82 if (cstr._rep)
508 {
|
509 mike 1.112 size_t n = strlen(cstr._rep) + 1;
510 _rep = (char*)operator new(n);
511 memcpy(_rep, cstr._rep, n);
|
512 kumpf 1.82 }
|
513 kumpf 1.81 }
|
514 mike 1.112
|
515 kumpf 1.56 return *this;
516 }
517
|
518 mike 1.112 //==============================================================================
|
519 kumpf 1.54 //
|
520 mike 1.112 // class StringRep
|
521 kumpf 1.39 //
|
522 mike 1.112 //==============================================================================
|
523 kumpf 1.39
|
524 mike 1.112 StringRep StringRep::_emptyRep;
|
525 mike 1.27
|
526 mike 1.112 inline StringRep* StringRep::alloc(size_t cap)
|
527 mike 1.27 {
|
528 dave.sudlik 1.120 // Check for potential overflow in cap
529 PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
|
530 mike 1.27
|
531 mike 1.112 StringRep* rep = (StringRep*)::operator new(
532 sizeof(StringRep) + cap * sizeof(Uint16));
533 rep->cap = cap;
534 new(&rep->refs) AtomicInt(1);
535
536 return rep;
|
537 mike 1.27 }
538
|
539 mike 1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
|
540 chuck 1.102 {
|
541 mike 1.114 if (cap > rep->cap || rep->refs.get() != 1)
|
542 chuck 1.102 {
|
543 mike 1.112 size_t n = _roundUpToPow2(cap);
544 StringRep* newRep = StringRep::alloc(n);
545 newRep->size = rep->size;
546 _copy(newRep->data, rep->data, rep->size + 1);
547 StringRep::unref(rep);
548 rep = newRep;
549 }
550 }
|
551 david.dillard 1.105
|
552 mike 1.112 StringRep* StringRep::create(const Uint16* data, size_t size)
553 {
554 StringRep* rep = StringRep::alloc(size);
555 rep->size = size;
556 _copy(rep->data, data, size);
557 rep->data[size] = '\0';
558 return rep;
559 }
|
560 chuck 1.102
|
561 mike 1.112 StringRep* StringRep::copyOnWrite(StringRep* rep)
562 {
563 // Return a new copy of rep. Release rep.
|
564 chuck 1.102
|
565 mike 1.112 StringRep* newRep = StringRep::alloc(rep->size);
566 newRep->size = rep->size;
567 _copy(newRep->data, rep->data, rep->size);
568 newRep->data[newRep->size] = '\0';
569 StringRep::unref(rep);
570 return newRep;
|
571 chuck 1.102 }
572
|
573 mike 1.112 StringRep* StringRep::create(const char* data, size_t size)
|
574 kumpf 1.43 {
|
575 mike 1.112 StringRep* rep = StringRep::alloc(size);
576 size_t utf8_error_index;
577 rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
578
579 if (rep->size == size_t(-1))
580 {
581 StringRep::free(rep);
|
582 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
|
583 mike 1.112 }
|
584 kumpf 1.43
|
585 mike 1.112 rep->data[rep->size] = '\0';
|
586 kumpf 1.43
|
587 mike 1.112 return rep;
|
588 mike 1.27 }
589
|
590 mike 1.112 Uint32 StringRep::length(const Uint16* str)
|
591 mike 1.27 {
|
592 mike 1.112 // Note: We could unroll this but it is rarely called.
593
594 const Uint16* end = (Uint16*)str;
595
596 while (*end++)
597 ;
598
|
599 a.dunfey 1.125 return (Uint32)(end - str - 1);
|
600 kumpf 1.39 }
|
601 tony 1.66
|
602 mike 1.112 //==============================================================================
603 //
604 // class String
605 //
606 //==============================================================================
607
608 const String String::EMPTY;
|
609 mike 1.27
|
610 kumpf 1.39 String::String(const String& str, Uint32 n)
611 {
|
612 mike 1.112 _checkBounds(n, str._rep->size);
613 _rep = StringRep::create(str._rep->data, n);
|
614 kumpf 1.39 }
615
616 String::String(const Char16* str)
617 {
|
618 mike 1.112 _checkNullPointer(str);
619 _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
|
620 mike 1.27 }
621
|
622 kumpf 1.39 String::String(const Char16* str, Uint32 n)
623 {
|
624 mike 1.112 _checkNullPointer(str);
625 _rep = StringRep::create((Uint16*)str, n);
|
626 kumpf 1.39 }
627
628 String::String(const char* str)
|
629 mike 1.27 {
|
630 mike 1.112 _checkNullPointer(str);
|
631 david.dillard 1.105
|
632 mike 1.112 // Set this just in case create() throws an exception.
633 _rep = &StringRep::_emptyRep;
634 _rep = StringRep::create(str, strlen(str));
|
635 mike 1.27 }
636
|
637 kumpf 1.39 String::String(const char* str, Uint32 n)
|
638 mike 1.27 {
|
639 mike 1.112 _checkNullPointer(str);
|
640 david.dillard 1.105
|
641 mike 1.112 // Set this just in case create() throws an exception.
642 _rep = &StringRep::_emptyRep;
643 _rep = StringRep::create(str, n);
|
644 kumpf 1.39 }
|
645 mike 1.27
|
646 mike 1.112 String::String(const String& s1, const String& s2)
|
647 kumpf 1.39 {
|
648 mike 1.112 size_t n1 = s1._rep->size;
649 size_t n2 = s2._rep->size;
650 size_t n = n1 + n2;
651 _rep = StringRep::alloc(n);
652 _copy(_rep->data, s1._rep->data, n1);
653 _copy(_rep->data + n1, s2._rep->data, n2);
654 _rep->size = n;
655 _rep->data[n] = '\0';
|
656 mike 1.27 }
657
|
658 mike 1.112 String::String(const String& s1, const char* s2)
|
659 mike 1.27 {
|
660 mike 1.112 _checkNullPointer(s2);
661 size_t n1 = s1._rep->size;
662 size_t n2 = strlen(s2);
663 _rep = StringRep::alloc(n1 + n2);
664 _copy(_rep->data, s1._rep->data, n1);
665 size_t utf8_error_index;
666 size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
667
668 if (tmp == size_t(-1))
|
669 kumpf 1.82 {
|
670 mike 1.112 StringRep::free(_rep);
671 _rep = &StringRep::_emptyRep;
|
672 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
|
673 kumpf 1.82 }
|
674 mike 1.112
675 _rep->size = n1 + tmp;
676 _rep->data[_rep->size] = '\0';
|
677 mike 1.27 }
678
|
679 mike 1.112 String::String(const char* s1, const String& s2)
|
680 mike 1.27 {
|
681 mike 1.112 _checkNullPointer(s1);
682 size_t n1 = strlen(s1);
683 size_t n2 = s2._rep->size;
684 _rep = StringRep::alloc(n1 + n2);
685 size_t utf8_error_index;
686 size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
687
688 if (tmp == size_t(-1))
689 {
690 StringRep::free(_rep);
691 _rep = &StringRep::_emptyRep;
|
692 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
|
693 mike 1.112 }
694
695 _rep->size = n2 + tmp;
696 _copy(_rep->data + n1, s2._rep->data, n2);
697 _rep->data[_rep->size] = '\0';
|
698 mike 1.27 }
699
|
700 mike 1.112 String& String::assign(const String& str)
|
701 mike 1.27 {
|
702 mike 1.112 if (_rep != str._rep)
|
703 david.dillard 1.105 {
|
704 mike 1.112 StringRep::unref(_rep);
705 StringRep::ref(_rep = str._rep);
|
706 david.dillard 1.105 }
707
|
708 mike 1.27 return *this;
709 }
710
711 String& String::assign(const Char16* str, Uint32 n)
712 {
|
713 mike 1.112 _checkNullPointer(str);
714
|
715 mike 1.114 if (n > _rep->cap || _rep->refs.get() != 1)
|
716 david.dillard 1.105 {
|
717 mike 1.112 StringRep::unref(_rep);
718 _rep = StringRep::alloc(n);
|
719 david.dillard 1.105 }
720
|
721 mike 1.112 _rep->size = n;
722 _copy(_rep->data, (Uint16*)str, n);
723 _rep->data[n] = '\0';
724
|
725 mike 1.27 return *this;
726 }
727
|
728 mike 1.112 String& String::assign(const char* str, Uint32 n)
|
729 chuck 1.102 {
|
730 mike 1.112 _checkNullPointer(str);
731
|
732 mike 1.114 if (n > _rep->cap || _rep->refs.get() != 1)
|
733 david.dillard 1.105 {
|
734 mike 1.112 StringRep::unref(_rep);
735 _rep = StringRep::alloc(n);
|
736 david.dillard 1.105 }
737
|
738 mike 1.112 size_t utf8_error_index;
739 _rep->size = _convert(_rep->data, str, n, utf8_error_index);
|
740 chuck 1.102
|
741 mike 1.112 if (_rep->size == size_t(-1))
|
742 david.dillard 1.105 {
|
743 mike 1.112 StringRep::free(_rep);
744 _rep = &StringRep::_emptyRep;
|
745 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
|
746 david.dillard 1.105 }
|
747 mike 1.112
748 _rep->data[_rep->size] = 0;
|
749 david.dillard 1.105
|
750 mike 1.27 return *this;
751 }
752
|
753 kumpf 1.39 void String::clear()
754 {
|
755 mike 1.112 if (_rep->size)
756 {
|
757 mike 1.114 if (_rep->refs.get() == 1)
|
758 mike 1.112 {
759 _rep->size = 0;
760 _rep->data[0] = '\0';
761 }
762 else
763 {
764 StringRep::unref(_rep);
765 _rep = &StringRep::_emptyRep;
766 }
767 }
|
768 kumpf 1.39 }
769
|
770 mike 1.112 void String::reserveCapacity(Uint32 cap)
|
771 kumpf 1.39 {
|
772 mike 1.112 _reserve(_rep, cap);
|
773 kumpf 1.39 }
774
|
775 mike 1.112 CString String::getCString() const
776 {
|
777 david.dillard 1.116 // A UTF8 string can have three times as many characters as its UTF16
778 // counterpart, so we allocate extra memory for the worst case. In the
|
779 mike 1.112 // best case, we may need only one third of the memory allocated. But
|
780 david.dillard 1.116 // downsizing the string afterwards is expensive and unecessary since
781 // CString objects are usually short-lived (disappearing after only a few
|
782 mike 1.112 // instructions). CString objects are typically created on the stack as
783 // means to obtain a char* pointer.
784
785 #ifdef PEGASUS_STRING_NO_UTF8
786 char* str = (char*)operator new(_rep->size + 1);
787 _copy(str, _rep->data, _rep->size);
788 str[_rep->size] = '\0';
789 return CString(str);
|
790 gs.keenan 1.110 #else
|
791 a.dunfey 1.125 Uint32 n = (Uint32)(3 * _rep->size);
|
792 mike 1.112 char* str = (char*)operator new(n + 1);
793 size_t size = _copyToUTF8(str, _rep->data, _rep->size);
794 str[size] = '\0';
795 return CString(str);
|
796 gs.keenan 1.110 #endif
|
797 kumpf 1.39 }
798
|
799 mike 1.112 String& String::append(const Char16* str, Uint32 n)
|
800 kumpf 1.39 {
|
801 mike 1.112 _checkNullPointer(str);
802
803 size_t oldSize = _rep->size;
804 size_t newSize = oldSize + n;
|
805 a.dunfey 1.125 _reserve(_rep, (Uint32)newSize);
|
806 mike 1.112 _copy(_rep->data + oldSize, (Uint16*)str, n);
807 _rep->size = newSize;
808 _rep->data[newSize] = '\0';
809
810 return *this;
|
811 kumpf 1.39 }
812
|
813 mike 1.112 String& String::append(const String& str)
|
814 mike 1.27 {
|
815 a.dunfey 1.125 return append((Char16*)(&(str._rep->data[0])), (Uint32)str._rep->size);
|
816 mike 1.27 }
817
|
818 mike 1.112 String& String::append(const char* str, Uint32 size)
|
819 mike 1.27 {
|
820 mike 1.112 _checkNullPointer(str);
821
822 size_t oldSize = _rep->size;
823 size_t cap = oldSize + size;
824
|
825 a.dunfey 1.125 _reserve(_rep, (Uint32)cap);
|
826 mike 1.112 size_t utf8_error_index;
827 size_t tmp = _convert(
828 (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
829
830 if (tmp == size_t(-1))
831 {
832 StringRep::free(_rep);
833 _rep = &StringRep::_emptyRep;
|
834 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
|
835 mike 1.112 }
|
836 mike 1.27
|
837 mike 1.112 _rep->size += tmp;
838 _rep->data[_rep->size] = '\0';
|
839 mike 1.27
|
840 kumpf 1.39 return *this;
841 }
842
|
843 mike 1.112 void String::remove(Uint32 index, Uint32 n)
|
844 mike 1.27 {
|
845 mike 1.112 if (n == PEG_NOT_FOUND)
|
846 a.dunfey 1.125 n = (Uint32)(_rep->size - index);
|
847 mike 1.112
848 _checkBounds(index + n, _rep->size);
849
|
850 mike 1.114 if (_rep->refs.get() != 1)
|
851 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
|
852 mike 1.27
|
853 jim.wunderlich 1.115 PEGASUS_ASSERT(index + n <= _rep->size);
|
854 mike 1.27
|
855 mike 1.112 size_t rem = _rep->size - (index + n);
856 Uint16* data = _rep->data;
|
857 mike 1.27
|
858 mike 1.112 if (rem)
859 memmove(data + index, data + index + n, rem * sizeof(Uint16));
|
860 mike 1.27
|
861 mike 1.112 _rep->size -= n;
862 data[_rep->size] = '\0';
|
863 mike 1.27 }
864
|
865 mike 1.112 String String::subString(Uint32 index, Uint32 n) const
|
866 mike 1.27 {
|
867 mike 1.112 // Note: this implementation is very permissive but used for
868 // backwards compatibility.
869
870 if (index < _rep->size)
|
871 mike 1.27 {
|
872 mike 1.112 if (n == PEG_NOT_FOUND || n > _rep->size - index)
|
873 a.dunfey 1.125 n = (Uint32)(_rep->size - index);
|
874 mike 1.27
|
875 w.otsuka 1.121 return String((Char16*)(_rep->data + index), n);
|
876 mike 1.27 }
|
877 david.dillard 1.105
878 return String();
|
879 mike 1.27 }
880
881 Uint32 String::find(Char16 c) const
882 {
|
883 mike 1.112 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
|
884 mike 1.27
|
885 mike 1.112 if (p)
|
886 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
887 mike 1.27
888 return PEG_NOT_FOUND;
889 }
890
|
891 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
|
892 mike 1.30 {
|
893 mike 1.112 _checkBounds(index, _rep->size);
894
895 if (index >= _rep->size)
896 return PEG_NOT_FOUND;
897
898 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
|
899 mike 1.30
|
900 mike 1.112 if (p)
|
901 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
902 mike 1.30
903 return PEG_NOT_FOUND;
904 }
905
|
906 mike 1.112 Uint32 StringFindAux(
907 const StringRep* _rep, const Char16* s, Uint32 n)
|
908 mike 1.27 {
|
909 mike 1.112 _checkNullPointer(s);
|
910 mike 1.27
|
911 mike 1.112 const Uint16* data = _rep->data;
912 size_t rem = _rep->size;
913
914 while (n <= rem)
|
915 mike 1.30 {
|
916 mike 1.112 Uint16* p = (Uint16*)_find(data, rem, s[0]);
917
918 if (!p)
919 break;
|
920 mike 1.30
|
921 mike 1.112 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
|
922 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
923 david.dillard 1.105
|
924 mike 1.112 p++;
925 rem -= p - data;
926 data = p;
|
927 mike 1.27 }
|
928 mike 1.112
|
929 mike 1.27 return PEG_NOT_FOUND;
930 }
931
|
932 mike 1.112 Uint32 String::find(const char* s) const
933 {
934 _checkNullPointer(s);
935
936 // Note: could optimize away creation of temporary, but this is rarely
937 // called.
938 return find(String(s));
939 }
940
|
941 mike 1.27 Uint32 String::reverseFind(Char16 c) const
942 {
|
943 mike 1.112 Uint16 x = c;
944 Uint16* p = _rep->data;
945 Uint16* q = _rep->data + _rep->size;
|
946 mike 1.27
|
947 mike 1.112 while (q != p)
|
948 mike 1.27 {
|
949 mike 1.112 if (*--q == x)
|
950 david.dillard 1.116 return static_cast<Uint32>(q - p);
|
951 mike 1.27 }
952
953 return PEG_NOT_FOUND;
954 }
955
956 void String::toLower()
957 {
|
958 david 1.69 #ifdef PEGASUS_HAS_ICU
|
959 mike 1.112
|
960 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
961 david 1.90 {
|
962 mike 1.114 if (_rep->refs.get() != 1)
|
963 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
964
|
965 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
966 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this
967 // prevents optimizations where the us-ascii is converted before
|
968 mike 1.112 // calling ICU.
|
969 yi.zhou 1.108 // The string may shrink or expand after the convert.
970
|
971 mike 1.112 //// First calculate size of resulting string. u_strToLower() returns
972 //// only the size when zero is passed as the destination size argument.
973
|
974 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
975
|
976 mike 1.112 int32_t newSize = u_strToLower(
977 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
|
978 david.dillard 1.116
|
979 mike 1.112 err = U_ZERO_ERROR;
980
981 //// Reserve enough space for the result.
982
983 if ((Uint32)newSize > _rep->cap)
984 _reserve(_rep, newSize);
985
986 //// Perform the conversion (overlapping buffers are allowed).
|
987 chuck 1.99
|
988 mike 1.112 u_strToLower((UChar*)_rep->data, newSize,
989 (UChar*)_rep->data, _rep->size, NULL, &err);
|
990 yi.zhou 1.108
|
991 mike 1.112 _rep->size = newSize;
992 return;
|
993 david 1.90 }
|
994 mike 1.112
995 #endif /* PEGASUS_HAS_ICU */
996
|
997 mike 1.114 if (_rep->refs.get() != 1)
|
998 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
999
1000 Uint16* p = _rep->data;
1001 size_t n = _rep->size;
1002
1003 for (; n--; p++)
|
1004 david 1.90 {
|
1005 mike 1.112 if (!(*p & 0xFF00))
1006 *p = _toLower(*p);
|
1007 mike 1.27 }
|
1008 kumpf 1.39 }
1009
|
1010 chuck 1.99 void String::toUpper()
|
1011 david 1.90 {
1012 #ifdef PEGASUS_HAS_ICU
|
1013 mike 1.112
|
1014 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
1015 chuck 1.99 {
|
1016 mike 1.114 if (_rep->refs.get() != 1)
|
1017 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
1018
|
1019 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
1020 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this
1021 // prevents optimizations where the us-ascii is converted before
|
1022 mike 1.112 // calling ICU.
|
1023 yi.zhou 1.108 // The string may shrink or expand after the convert.
1024
|
1025 mike 1.112 //// First calculate size of resulting string. u_strToUpper() returns
1026 //// only the size when zero is passed as the destination size argument.
1027
|
1028 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
1029
|
1030 mike 1.112 int32_t newSize = u_strToUpper(
1031 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1032
1033 err = U_ZERO_ERROR;
1034
1035 //// Reserve enough space for the result.
1036
1037 if ((Uint32)newSize > _rep->cap)
1038 _reserve(_rep, newSize);
1039
1040 //// Perform the conversion (overlapping buffers are allowed).
1041
1042 u_strToUpper((UChar*)_rep->data, newSize,
1043 (UChar*)_rep->data, _rep->size, NULL, &err);
|
1044 chuck 1.99
|
1045 mike 1.112 _rep->size = newSize;
|
1046 yi.zhou 1.108
|
1047 mike 1.112 return;
|
1048 david 1.91 }
|
1049 mike 1.112
1050 #endif /* PEGASUS_HAS_ICU */
1051
|
1052 mike 1.114 if (_rep->refs.get() != 1)
|
1053 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
1054
1055 Uint16* p = _rep->data;
1056 size_t n = _rep->size;
1057
1058 for (; n--; p++)
1059 *p = _toUpper(*p);
|
1060 david 1.90 }
1061
|
1062 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
|
1063 kumpf 1.39 {
|
1064 kumpf 1.118 const Uint16* p1 = s1._rep->data;
1065 const Uint16* p2 = s2._rep->data;
|
1066 mike 1.27
|
1067 kumpf 1.118 while (n--)
1068 {
1069 int r = *p1++ - *p2++;
1070 if (r)
1071 {
1072 return r;
1073 }
1074 else if (!p1[-1])
1075 {
1076 // We must have encountered a null terminator in both s1 and s2
1077 return 0;
1078 }
1079 }
1080 return 0;
|
1081 mike 1.27 }
1082
|
1083 kumpf 1.43 int String::compare(const String& s1, const String& s2)
|
1084 mike 1.30 {
|
1085 mike 1.112 return _compare(s1._rep->data, s2._rep->data);
1086 }
|
1087 kumpf 1.43
|
1088 mike 1.112 int String::compare(const String& s1, const char* s2)
1089 {
1090 _checkNullPointer(s2);
|
1091 mike 1.30
|
1092 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8
1093 return _compareNoUTF8(s1._rep->data, s2);
1094 #else
1095 // ATTN: optimize this!
1096 return String::compare(s1, String(s2));
1097 #endif
|
1098 mike 1.30 }
1099
|
1100 mike 1.112 int String::compareNoCase(const String& str1, const String& str2)
|
1101 kumpf 1.40 {
|
1102 david 1.69 #ifdef PEGASUS_HAS_ICU
|
1103 mike 1.112
|
1104 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1105 {
|
1106 mike 1.112 return u_strcasecmp(
|
1107 dave.sudlik 1.124 (const UChar*)str1._rep->data,
1108 (const UChar*)str2._rep->data,
1109 U_FOLD_CASE_DEFAULT
1110 );
|
1111 yi.zhou 1.108 }
|
1112 kumpf 1.40
|
1113 mike 1.112 #endif /* PEGASUS_HAS_ICU */
1114
1115 const Uint16* s1 = str1._rep->data;
1116 const Uint16* s2 = str2._rep->data;
1117
1118 while (*s1 && *s2)
|
1119 kumpf 1.40 {
|
1120 mike 1.112 int r = _toLower(*s1++) - _toLower(*s2++);
|
1121 kumpf 1.40
|
1122 david.dillard 1.105 if (r)
1123 return r;
|
1124 kumpf 1.40 }
1125
|
1126 mike 1.112 if (*s2)
|
1127 david.dillard 1.105 return -1;
|
1128 mike 1.112 else if (*s1)
|
1129 david.dillard 1.105 return 1;
|
1130 kumpf 1.40
1131 return 0;
1132 }
1133
|
1134 mike 1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
|
1135 mike 1.27 {
|
1136 mike 1.112 #ifdef PEGASUS_HAS_ICU
1137
1138 return String::compareNoCase(s1, s2) == 0;
1139
1140 #else /* PEGASUS_HAS_ICU */
|
1141 mike 1.27
|
1142 mike 1.112 // The following employs loop unrolling for efficiency. Please do not
1143 // eliminate.
|
1144 kumpf 1.39
|
1145 mike 1.112 Uint16* p = (Uint16*)s1.getChar16Data();
1146 Uint16* q = (Uint16*)s2.getChar16Data();
1147 Uint32 n = s2.size();
1148
1149 while (n >= 8)
1150 {
1151 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1152 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1153 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1154 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1155 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1156 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1157 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1158 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1159 {
1160 return false;
1161 }
|
1162 kumpf 1.39
|
1163 mike 1.112 n -= 8;
1164 p += 8;
1165 q += 8;
1166 }
|
1167 mike 1.27
|
1168 mike 1.112 while (n >= 4)
|
1169 kumpf 1.39 {
|
1170 mike 1.112 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1171 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1172 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1173 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
|
1174 david.dillard 1.105 {
|
1175 mike 1.112 return false;
|
1176 david.dillard 1.105 }
|
1177 mike 1.112
1178 n -= 4;
1179 p += 4;
1180 q += 4;
1181 }
1182
1183 while (n--)
1184 {
1185 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
|
1186 david.dillard 1.105 return false;
|
1187 mike 1.112
1188 p++;
1189 q++;
|
1190 kumpf 1.39 }
|
1191 mike 1.28
|
1192 kumpf 1.39 return true;
|
1193 mike 1.112
1194 #endif /* PEGASUS_HAS_ICU */
|
1195 david 1.69 }
1196
|
1197 mike 1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
|
1198 david 1.69 {
|
1199 mike 1.112 _checkNullPointer(s2);
|
1200 david 1.69
|
1201 mike 1.112 #if defined(PEGASUS_HAS_ICU)
|
1202 david 1.69
|
1203 mike 1.112 return String::equalNoCase(s1, String(s2));
|
1204 david 1.69
|
1205 mike 1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
|
1206 david 1.69
|
1207 mike 1.112 const Uint16* p1 = (Uint16*)s1._rep->data;
1208 const char* p2 = s2;
1209 size_t n = s1._rep->size;
|
1210 david.dillard 1.105
|
1211 mike 1.112 while (n--)
1212 {
1213 if (!*p2)
1214 return false;
|
1215 david 1.71
|
1216 mike 1.112 if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1217 return false;
1218 }
|
1219 kumpf 1.42
|
1220 mike 1.112 if (*p2)
1221 return false;
|
1222 david.dillard 1.116
|
1223 mike 1.112 return true;
|
1224 karl 1.36
|
1225 mike 1.112 #else /* PEGASUS_HAS_ICU */
|
1226 david.dillard 1.105
|
1227 mike 1.112 // ATTN: optimize this!
1228 return String::equalNoCase(s1, String(s2));
|
1229 david.dillard 1.105
|
1230 mike 1.112 #endif /* PEGASUS_HAS_ICU */
1231 }
|
1232 chuck 1.78
|
1233 mike 1.112 Boolean String::equal(const String& s1, const String& s2)
|
1234 karl 1.36 {
|
1235 david.dillard 1.116 return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,
|
1236 mike 1.112 s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
|
1237 karl 1.36 }
1238
|
1239 mike 1.112 Boolean String::equal(const String& s1, const char* s2)
1240 {
1241 #ifdef PEGASUS_STRING_NO_UTF8
|
1242 kumpf 1.35
|
1243 mike 1.112 _checkNullPointer(s2);
|
1244 kumpf 1.39
|
1245 mike 1.112 const Uint16* p = (Uint16*)s1._rep->data;
1246 const char* q = s2;
|
1247 kumpf 1.39
|
1248 mike 1.112 while (*p && *q)
1249 {
1250 if (*p++ != Uint16(*q++))
1251 return false;
1252 }
|
1253 kumpf 1.39
|
1254 mike 1.112 return !(*p || *q);
|
1255 kumpf 1.39
|
1256 mike 1.112 #else /* PEGASUS_STRING_NO_UTF8 */
|
1257 kumpf 1.39
|
1258 mike 1.112 return String::equal(s1, String(s2));
|
1259 kumpf 1.39
|
1260 mike 1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
|
1261 kumpf 1.39 }
1262
|
1263 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
|
1264 kumpf 1.39 {
|
1265 mike 1.112 #if defined(PEGASUS_HAS_ICU)
|
1266 david 1.69
|
1267 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1268 {
|
1269 david.dillard 1.105 char *buf = NULL;
1270 const int size = str.size() * 6;
|
1271 mike 1.112 UnicodeString UniStr(
1272 (const UChar *)str.getChar16Data(), (int32_t)str.size());
|
1273 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf);
1274 buf = new char[bufsize+1];
1275 UniStr.extract(0,bufsize,buf);
1276 os << buf;
1277 os.flush();
1278 delete [] buf;
|
1279 david.dillard 1.116 return os;
|
1280 yi.zhou 1.108 }
|
1281 mike 1.112
|
1282 david.dillard 1.116 #endif // PEGASUS_HAS_ICU
|
1283 mike 1.112
1284 for (Uint32 i = 0, n = str.size(); i < n; i++)
|
1285 yi.zhou 1.108 {
|
1286 mike 1.112 Uint16 code = str[i];
|
1287 david.dillard 1.105
|
1288 mike 1.112 if (code > 0 && !(code & 0xFF00))
1289 os << char(code);
1290 else
1291 {
1292 // Print in hex format:
1293 char buffer[8];
1294 sprintf(buffer, "\\x%04X", code);
1295 os << buffer;
|
1296 david.dillard 1.105 }
|
1297 yi.zhou 1.108 }
|
1298 kumpf 1.39
1299 return os;
1300 }
1301
|
1302 mike 1.112 void StringAppendCharAux(StringRep*& _rep)
|
1303 kumpf 1.39 {
|
1304 mike 1.112 StringRep* tmp;
1305
1306 if (_rep->cap)
1307 {
1308 tmp = StringRep::alloc(2 * _rep->cap);
1309 tmp->size = _rep->size;
1310 _copy(tmp->data, _rep->data, _rep->size);
1311 }
1312 else
1313 {
1314 tmp = StringRep::alloc(8);
1315 tmp->size = 0;
1316 }
1317
1318 StringRep::unref(_rep);
1319 _rep = tmp;
|
1320 kumpf 1.39 }
1321
|
1322 mike 1.112 PEGASUS_NAMESPACE_END
1323
1324 /*
1325 ================================================================================
1326
1327 String optimizations:
1328
1329 1. Added mechanism allowing certain functions to be inlined only when
1330 used by internal Pegasus modules. External modules (i.e., providers)
1331 link to a non-inline version, which allows for binary compatibility.
1332
1333 2. Implemented copy-on-write with atomic increment/decrement. This
1334 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1335 for the 'ni1000' benchmark.
1336
1337 3. Employed loop unrolling in several places. For example, see:
1338
1339 static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1340
1341 4. Used the "empty-rep" optimization (described in whitepaper from the
1342 GCC Developers Summit). This reduced default construction to a simple
1343 mike 1.112 pointer assignment.
1344
1345 inline String::String() : _rep(&_emptyRep) { }
1346
1347 5. Implemented Uint16 versions of toupper() and tolower() using tables.
1348 For example:
1349
1350 static const char _upper[] =
1351 {
1352 0,1,2,...255
1353 };
1354
1355 inline Uint16 _toUpper(Uint16 x)
1356 {
1357 return (x & 0xFF00) ? x : _upper[x];
1358 }
1359
|
1360 david.dillard 1.116 This outperforms the system implementation by avoiding an anding
|
1361 mike 1.112 operation.
1362
|
1363 david.dillard 1.116 6. Implemented char* version of the following member functions to
1364 eliminate unecessary creation of anonymous string objects
|
1365 mike 1.112 (temporaries).
1366
1367 String(const String& s1, const char* s2);
1368 String(const char* s1, const String& s2);
1369 String& String::operator=(const char* str);
1370 Uint32 String::find(const char* s) const;
1371 bool String::equal(const String& s1, const char* s2);
1372 static int String::compare(const String& s1, const char* s2);
1373 String& String::append(const char* str);
1374 String& String::append(const char* str, Uint32 size);
1375 static bool String::equalNoCase(const String& s1, const char* s2);
1376 String& operator=(const char* str)
1377 String& String::assign(const char* str)
1378 String& String::append(const char* str)
1379 Boolean operator==(const String& s1, const char* s2)
1380 Boolean operator==(const char* s1, const String& s2)
1381 Boolean operator!=(const String& s1, const char* s2)
1382 Boolean operator!=(const char* s1, const String& s2)
1383 Boolean operator<(const String& s1, const char* s2)
1384 Boolean operator<(const char* s1, const String& s2)
1385 Boolean operator>(const String& s1, const char* s2)
1386 mike 1.112 Boolean operator>(const char* s1, const String& s2)
1387 Boolean operator<=(const String& s1, const char* s2)
1388 Boolean operator<=(const char* s1, const String& s2)
1389 Boolean operator>=(const String& s1, const char* s2)
1390 Boolean operator>=(const char* s1, const String& s2)
1391 String operator+(const String& s1, const char* s2)
1392 String operator+(const char* s1, const String& s2)
1393
|
1394 david.dillard 1.116 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next
|
1395 mike 1.112 power of two (algorithm from the book "Hacker's Delight").
1396
1397 static Uint32 _roundUpToPow2(Uint32 x)
1398 {
1399 if (x < 8)
1400 return 8;
1401
1402 x--;
1403 x |= (x >> 1);
1404 x |= (x >> 2);
1405 x |= (x >> 4);
1406 x |= (x >> 8);
1407 x |= (x >> 16);
1408 x++;
1409
1410 return x;
1411 }
1412
1413 8. Implemented "concatenating constructors" to eliminate temporaries
|
1414 david.dillard 1.116 created by operator+(). This scheme employs the "return-value
|
1415 mike 1.112 optimization" described by Stan Lippman.
1416
1417 inline String operator+(const String& s1, const String& s2)
1418 {
1419 return String(s1, s2, 0);
1420 }
1421
1422 9. Experimented to find the optimial initial size for a short string.
1423 Eight seems to offer the best tradeoff between space and time.
1424
1425 10. Inlined all members of the Char16 class.
1426
1427 11. Used Uint16 internally in the String class. This showed no improvememnt
1428 since Char16 was already fully inlined and was essentially reduced to
1429 Uint16 in any case.
1430
1431 12. Implemented conditional logic (#if) allowing error checking logic to
|
1432 david.dillard 1.116 be excluded to better performance. Examples include bounds checking
|
1433 mike 1.112 and null-pointer checking.
1434
1435 13. Used memcpy() and memcmp() where possible. These are implemented using
1436 the rep family of intructions under Intel and are much faster.
1437
|
1438 david.dillard 1.116 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
|
1439 mike 1.112 copy routine overhead.
1440
1441 15. Added ASCII7 form of the constructor and assign().
1442
1443 String s("hello world", String::ASCII7);
1444
1445 s.assignASCII7("hello world");
1446
1447 This avoids slower UTF8 processing when not needed.
1448
1449 ================================================================================
1450 */
|