1 martin 1.133 //%LICENSE////////////////////////////////////////////////////////////////
|
2 martin 1.134 //
|
3 martin 1.133 // Licensed to The Open Group (TOG) under one or more contributor license
4 // agreements. Refer to the OpenPegasusNOTICE.txt file distributed with
5 // this work for additional information regarding copyright ownership.
6 // Each contributor licenses this file to you under the OpenPegasus Open
7 // Source License; you may not use this file except in compliance with the
8 // License.
|
9 martin 1.134 //
|
10 martin 1.133 // Permission is hereby granted, free of charge, to any person obtaining a
11 // copy of this software and associated documentation files (the "Software"),
12 // to deal in the Software without restriction, including without limitation
13 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 // and/or sell copies of the Software, and to permit persons to whom the
15 // Software is furnished to do so, subject to the following conditions:
|
16 martin 1.134 //
|
17 martin 1.133 // The above copyright notice and this permission notice shall be included
18 // in all copies or substantial portions of the Software.
|
19 martin 1.134 //
|
20 martin 1.133 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
21 martin 1.134 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22 martin 1.133 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27 martin 1.134 //
|
28 martin 1.133 //////////////////////////////////////////////////////////////////////////
|
29 mike 1.27 //
30 //%/////////////////////////////////////////////////////////////////////////////
31
|
32 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
|
33 mike 1.113 #include <cstring>
|
34 kumpf 1.48 #include "InternalException.h"
|
35 david 1.69 #include "CommonUTF.h"
|
36 mike 1.112 #include "MessageLoader.h"
37 #include "StringRep.h"
|
38 david 1.69
39 #ifdef PEGASUS_HAS_ICU
|
40 kumpf 1.132 # include <unicode/ures.h>
41 # include <unicode/ustring.h>
42 # include <unicode/uchar.h>
|
43 david 1.69 #endif
44
|
45 mike 1.112 PEGASUS_NAMESPACE_BEGIN
|
46 mike 1.28
|
47 mike 1.112 //==============================================================================
48 //
49 // Compile-time macros (undefined by default).
50 //
51 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
52 //
53 //==============================================================================
|
54 mike 1.27
|
55 mike 1.112 //==============================================================================
|
56 kumpf 1.39 //
|
57 mike 1.112 // File-scope definitions:
|
58 kumpf 1.54 //
|
59 mike 1.112 //==============================================================================
60
61 // Note: this table is much faster than the system toupper(). Please do not
62 // change.
|
63 kumpf 1.54
|
64 david.dillard 1.116 const Uint8 _toUpperTable[256] =
|
65 kumpf 1.54 {
|
66 mike 1.112 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
67 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
68 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
69 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
70 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
71 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
72 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
73 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
74 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
75 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
76 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
77 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
78 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
79 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
80 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
81 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
82 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
83 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
84 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
85 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
86 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
87 mike 1.112 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
88 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
89 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
90 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
91 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
92 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
93 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
94 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
95 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
96 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
97 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
98 };
99
100 // Note: this table is much faster than the system tulower(). Please do not
101 // change.
102
|
103 david.dillard 1.116 const Uint8 _toLowerTable[256] =
|
104 mike 1.112 {
105 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
106 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
107 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
108 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
109 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
110 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
111 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
112 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
113 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
114 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
115 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
116 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
117 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
118 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
119 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
120 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
121 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
122 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
123 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
124 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
125 mike 1.112 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
126 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
127 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
128 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
129 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
130 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
131 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
132 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
133 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
134 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
135 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
136 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
137 };
138
139 // Converts 16-bit characters to upper case. This routine is faster than the
140 // system toupper(). Please do not change.
141 inline Uint16 _toUpper(Uint16 x)
142 {
143 return (x & 0xFF00) ? x : _toUpperTable[x];
|
144 kumpf 1.54 }
145
|
146 mike 1.112 // Converts 16-bit characters to lower case. This routine is faster than the
147 // system toupper(). Please do not change.
148 inline Uint16 _toLower(Uint16 x)
|
149 kumpf 1.54 {
|
150 mike 1.112 return (x & 0xFF00) ? x : _toLowerTable[x];
151 }
152
153 // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
154 static Uint32 _roundUpToPow2(Uint32 x)
155 {
|
156 dave.sudlik 1.120 // Check for potential overflow in x
157 PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
|
158 mike 1.112
159 if (x < 8)
160 return 8;
161
162 x--;
163 x |= (x >> 1);
164 x |= (x >> 2);
165 x |= (x >> 4);
166 x |= (x >> 8);
167 x |= (x >> 16);
168 x++;
169
170 return x;
171 }
172
173 template<class P, class Q>
174 static void _copy(P* p, const Q* q, size_t n)
175 {
176 // The following employs loop unrolling for efficiency. Please do not
177 // eliminate.
178
179 mike 1.112 while (n >= 8)
180 {
181 p[0] = q[0];
182 p[1] = q[1];
183 p[2] = q[2];
184 p[3] = q[3];
185 p[4] = q[4];
186 p[5] = q[5];
187 p[6] = q[6];
188 p[7] = q[7];
189 p += 8;
190 q += 8;
191 n -= 8;
192 }
193
194 while (n >= 4)
195 {
196 p[0] = q[0];
197 p[1] = q[1];
198 p[2] = q[2];
199 p[3] = q[3];
200 mike 1.112 p += 4;
201 q += 4;
202 n -= 4;
203 }
204
205 while (n--)
206 *p++ = *q++;
207 }
208
209 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
210 {
211 // The following employs loop unrolling for efficiency. Please do not
212 // eliminate.
213
214 while (n >= 4)
215 {
216 if (s[0] == c)
217 return (Uint16*)s;
218 if (s[1] == c)
219 return (Uint16*)&s[1];
220 if (s[2] == c)
221 mike 1.112 return (Uint16*)&s[2];
222 if (s[3] == c)
223 return (Uint16*)&s[3];
|
224 kumpf 1.82
|
225 mike 1.112 n -= 4;
226 s += 4;
227 }
228
229 if (n)
230 {
231 if (*s == c)
232 return (Uint16*)s;
233 s++;
234 n--;
235 }
236
237 if (n)
238 {
239 if (*s == c)
240 return (Uint16*)s;
241 s++;
242 n--;
243 }
244
245 if (n && *s == c)
246 mike 1.112 return (Uint16*)s;
247
248 // Not found!
249 return 0;
250 }
251
252 static int _compare(const Uint16* s1, const Uint16* s2)
253 {
254 while (*s1 && *s2)
255 {
256 int r = *s1++ - *s2++;
257
258 if (r)
259 return r;
260 }
261
262 if (*s2)
263 return -1;
264 else if (*s1)
265 return 1;
266
267 mike 1.112 return 0;
268 }
269
|
270 kumpf 1.130 #ifdef PEGASUS_STRING_NO_UTF8
|
271 mike 1.112 static int _compareNoUTF8(const Uint16* s1, const char* s2)
272 {
273 Uint16 c1;
274 Uint16 c2;
275
276 do
277 {
278 c1 = *s1++;
279 c2 = *s2++;
280
281 if (c1 == 0)
282 return c1 - c2;
283 }
284 while (c1 == c2);
285
286 return c1 - c2;
287 }
|
288 kumpf 1.130 #endif
|
289 mike 1.112
290 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
291 {
292 memcpy(s1, s2, n * sizeof(Uint16));
293 }
294
295 void StringThrowOutOfBounds()
296 {
297 throw IndexOutOfBoundsException();
298 }
299
300 inline void _checkNullPointer(const void* ptr)
301 {
302 if (!ptr)
303 throw NullPointer();
304 }
305
306 static void _StringThrowBadUTF8(Uint32 index)
307 {
308 MessageLoaderParms parms(
309 "Common.String.BAD_UTF8",
310 mike 1.112 "The byte sequence starting at index $0 "
311 "is not valid UTF-8 encoding.",
312 index);
313 throw Exception(parms);
314 }
315
316 static size_t _copyFromUTF8(
|
317 david.dillard 1.116 Uint16* dest,
318 const char* src,
|
319 mike 1.112 size_t n,
320 size_t& utf8_error_index)
321 {
322 Uint16* p = dest;
323 const Uint8* q = (const Uint8*)src;
324
325 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
326 // Use loop-unrolling.
327
328 while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
329 {
330 p[0] = q[0];
331 p[1] = q[1];
332 p[2] = q[2];
333 p[3] = q[3];
334 p[4] = q[4];
335 p[5] = q[5];
336 p[6] = q[6];
337 p[7] = q[7];
338 p += 8;
339 q += 8;
340 mike 1.112 n -= 8;
341 }
342
343 while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
344 {
345 p[0] = q[0];
346 p[1] = q[1];
347 p[2] = q[2];
348 p[3] = q[3];
349 p += 4;
350 q += 4;
351 n -= 4;
352 }
353
354 switch (n)
355 {
356 case 0:
357 return p - dest;
358 case 1:
359 if (q[0] < 128)
360 {
361 mike 1.112 p[0] = q[0];
362 return p + 1 - dest;
363 }
364 break;
365 case 2:
366 if (((q[0]|q[1]) & 0x80) == 0)
367 {
368 p[0] = q[0];
369 p[1] = q[1];
370 return p + 2 - dest;
371 }
372 break;
373 case 3:
374 if (((q[0]|q[1]|q[2]) & 0x80) == 0)
375 {
376 p[0] = q[0];
377 p[1] = q[1];
378 p[2] = q[2];
379 return p + 3 - dest;
380 }
381 break;
382 mike 1.112 }
383
384 // Process remaining characters.
385
386 while (n)
387 {
388 // Optimize for 7-bit ASCII case.
389
390 if (*q < 128)
391 {
392 *p++ = *q++;
393 n--;
394 }
395 else
396 {
397 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
398
399 if (c > n || !isValid_U8(q, c) ||
400 UTF8toUTF16(&q, q + c, &p, p + n) != 0)
401 {
402 utf8_error_index = q - (const Uint8*)src;
403 mike 1.112 return size_t(-1);
404 }
405
406 n -= c;
407 }
408 }
409
410 return p - dest;
411 }
412
|
413 david.dillard 1.116 // Note: dest must be at least three times src (plus an extra byte for
|
414 mike 1.112 // terminator).
415 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
416 {
417 // The following employs loop unrolling for efficiency. Please do not
418 // eliminate.
419
420 const Uint16* q = src;
421 Uint8* p = (Uint8*)dest;
422
423 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
|
424 kumpf 1.82 {
|
425 mike 1.112 p[0] = q[0];
426 p[1] = q[1];
427 p[2] = q[2];
428 p[3] = q[3];
429 p += 4;
430 q += 4;
431 n -= 4;
|
432 kumpf 1.82 }
|
433 mike 1.112
434 switch (n)
435 {
436 case 0:
437 return p - (Uint8*)dest;
438 case 1:
439 if (q[0] < 128)
440 {
441 p[0] = q[0];
442 return p + 1 - (Uint8*)dest;
443 }
444 break;
445 case 2:
446 if (q[0] < 128 && q[1] < 128)
447 {
448 p[0] = q[0];
449 p[1] = q[1];
450 return p + 2 - (Uint8*)dest;
451 }
452 break;
453 case 3:
454 mike 1.112 if (q[0] < 128 && q[1] < 128 && q[2] < 128)
455 {
456 p[0] = q[0];
457 p[1] = q[1];
458 p[2] = q[2];
459 return p + 3 - (Uint8*)dest;
460 }
461 break;
462 }
463
464 // If this line was reached, there must be characters greater than 128.
465
466 UTF16toUTF8(&q, q + n, &p, p + 3 * n);
467
468 return p - (Uint8*)dest;
|
469 kumpf 1.54 }
470
|
471 mike 1.112 static inline size_t _convert(
472 Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
|
473 kumpf 1.54 {
|
474 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8
475 _copy(p, q, n);
476 return n;
477 #else
478 return _copyFromUTF8(p, q, n, utf8_error_index);
479 #endif
|
480 kumpf 1.54 }
481
|
482 mike 1.112 //==============================================================================
483 //
484 // class CString
485 //
486 //==============================================================================
487
488 CString::CString(const CString& cstr) : _rep(0)
|
489 kumpf 1.54 {
|
490 mike 1.112 if (cstr._rep)
|
491 kumpf 1.82 {
|
492 mike 1.112 size_t n = strlen(cstr._rep) + 1;
493 _rep = (char*)operator new(n);
494 memcpy(_rep, cstr._rep, n);
|
495 kumpf 1.82 }
|
496 kumpf 1.54 }
497
|
498 kumpf 1.56 CString& CString::operator=(const CString& cstr)
499 {
|
500 kumpf 1.82 if (&cstr != this)
|
501 kumpf 1.81 {
|
502 kumpf 1.82 if (_rep)
503 {
|
504 mike 1.112 operator delete(_rep);
|
505 kumpf 1.82 _rep = 0;
506 }
|
507 mike 1.112
|
508 kumpf 1.82 if (cstr._rep)
509 {
|
510 mike 1.112 size_t n = strlen(cstr._rep) + 1;
511 _rep = (char*)operator new(n);
512 memcpy(_rep, cstr._rep, n);
|
513 kumpf 1.82 }
|
514 kumpf 1.81 }
|
515 mike 1.112
|
516 kumpf 1.56 return *this;
517 }
518
|
519 mike 1.112 //==============================================================================
|
520 kumpf 1.54 //
|
521 mike 1.112 // class StringRep
|
522 kumpf 1.39 //
|
523 mike 1.112 //==============================================================================
|
524 kumpf 1.39
|
525 mike 1.112 StringRep StringRep::_emptyRep;
|
526 mike 1.27
|
527 mike 1.112 inline StringRep* StringRep::alloc(size_t cap)
|
528 mike 1.27 {
|
529 dave.sudlik 1.120 // Check for potential overflow in cap
530 PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
|
531 mike 1.27
|
532 mike 1.112 StringRep* rep = (StringRep*)::operator new(
533 sizeof(StringRep) + cap * sizeof(Uint16));
534 rep->cap = cap;
535 new(&rep->refs) AtomicInt(1);
536
537 return rep;
|
538 mike 1.27 }
539
|
540 mike 1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
|
541 chuck 1.102 {
|
542 mike 1.114 if (cap > rep->cap || rep->refs.get() != 1)
|
543 chuck 1.102 {
|
544 mike 1.112 size_t n = _roundUpToPow2(cap);
545 StringRep* newRep = StringRep::alloc(n);
546 newRep->size = rep->size;
547 _copy(newRep->data, rep->data, rep->size + 1);
548 StringRep::unref(rep);
549 rep = newRep;
550 }
551 }
|
552 david.dillard 1.105
|
553 mike 1.112 StringRep* StringRep::create(const Uint16* data, size_t size)
554 {
555 StringRep* rep = StringRep::alloc(size);
556 rep->size = size;
557 _copy(rep->data, data, size);
558 rep->data[size] = '\0';
559 return rep;
560 }
|
561 chuck 1.102
|
562 mike 1.112 StringRep* StringRep::copyOnWrite(StringRep* rep)
563 {
564 // Return a new copy of rep. Release rep.
|
565 chuck 1.102
|
566 mike 1.112 StringRep* newRep = StringRep::alloc(rep->size);
567 newRep->size = rep->size;
568 _copy(newRep->data, rep->data, rep->size);
569 newRep->data[newRep->size] = '\0';
570 StringRep::unref(rep);
571 return newRep;
|
572 chuck 1.102 }
573
|
574 mike 1.112 StringRep* StringRep::create(const char* data, size_t size)
|
575 kumpf 1.43 {
|
576 mike 1.112 StringRep* rep = StringRep::alloc(size);
577 size_t utf8_error_index;
578 rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
579
580 if (rep->size == size_t(-1))
581 {
582 StringRep::free(rep);
|
583 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
|
584 mike 1.112 }
|
585 kumpf 1.43
|
586 mike 1.112 rep->data[rep->size] = '\0';
|
587 kumpf 1.43
|
588 mike 1.112 return rep;
|
589 mike 1.27 }
590
|
591 mike 1.112 Uint32 StringRep::length(const Uint16* str)
|
592 mike 1.27 {
|
593 mike 1.112 // Note: We could unroll this but it is rarely called.
594
595 const Uint16* end = (Uint16*)str;
596
597 while (*end++)
598 ;
599
|
600 a.dunfey 1.125 return (Uint32)(end - str - 1);
|
601 kumpf 1.39 }
|
602 tony 1.66
|
603 mike 1.112 //==============================================================================
604 //
605 // class String
606 //
607 //==============================================================================
608
609 const String String::EMPTY;
|
610 mike 1.27
|
611 kumpf 1.39 String::String(const String& str, Uint32 n)
612 {
|
613 mike 1.112 _checkBounds(n, str._rep->size);
614 _rep = StringRep::create(str._rep->data, n);
|
615 kumpf 1.39 }
616
617 String::String(const Char16* str)
618 {
|
619 mike 1.112 _checkNullPointer(str);
620 _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
|
621 mike 1.27 }
622
|
623 kumpf 1.39 String::String(const Char16* str, Uint32 n)
624 {
|
625 mike 1.112 _checkNullPointer(str);
626 _rep = StringRep::create((Uint16*)str, n);
|
627 kumpf 1.39 }
628
629 String::String(const char* str)
|
630 mike 1.27 {
|
631 mike 1.112 _checkNullPointer(str);
|
632 david.dillard 1.105
|
633 mike 1.112 // Set this just in case create() throws an exception.
634 _rep = &StringRep::_emptyRep;
635 _rep = StringRep::create(str, strlen(str));
|
636 mike 1.27 }
637
|
638 kumpf 1.39 String::String(const char* str, Uint32 n)
|
639 mike 1.27 {
|
640 mike 1.112 _checkNullPointer(str);
|
641 david.dillard 1.105
|
642 mike 1.112 // Set this just in case create() throws an exception.
643 _rep = &StringRep::_emptyRep;
644 _rep = StringRep::create(str, n);
|
645 kumpf 1.39 }
|
646 mike 1.27
|
647 mike 1.112 String::String(const String& s1, const String& s2)
|
648 kumpf 1.39 {
|
649 mike 1.112 size_t n1 = s1._rep->size;
650 size_t n2 = s2._rep->size;
651 size_t n = n1 + n2;
652 _rep = StringRep::alloc(n);
653 _copy(_rep->data, s1._rep->data, n1);
654 _copy(_rep->data + n1, s2._rep->data, n2);
655 _rep->size = n;
656 _rep->data[n] = '\0';
|
657 mike 1.27 }
658
|
659 mike 1.112 String::String(const String& s1, const char* s2)
|
660 mike 1.27 {
|
661 mike 1.112 _checkNullPointer(s2);
662 size_t n1 = s1._rep->size;
663 size_t n2 = strlen(s2);
664 _rep = StringRep::alloc(n1 + n2);
665 _copy(_rep->data, s1._rep->data, n1);
666 size_t utf8_error_index;
667 size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
668
669 if (tmp == size_t(-1))
|
670 kumpf 1.82 {
|
671 mike 1.112 StringRep::free(_rep);
672 _rep = &StringRep::_emptyRep;
|
673 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
|
674 kumpf 1.82 }
|
675 mike 1.112
676 _rep->size = n1 + tmp;
677 _rep->data[_rep->size] = '\0';
|
678 mike 1.27 }
679
|
680 mike 1.112 String::String(const char* s1, const String& s2)
|
681 mike 1.27 {
|
682 mike 1.112 _checkNullPointer(s1);
683 size_t n1 = strlen(s1);
684 size_t n2 = s2._rep->size;
685 _rep = StringRep::alloc(n1 + n2);
686 size_t utf8_error_index;
687 size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
688
689 if (tmp == size_t(-1))
690 {
691 StringRep::free(_rep);
692 _rep = &StringRep::_emptyRep;
|
693 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
|
694 mike 1.112 }
695
696 _rep->size = n2 + tmp;
697 _copy(_rep->data + n1, s2._rep->data, n2);
698 _rep->data[_rep->size] = '\0';
|
699 mike 1.27 }
700
|
701 mike 1.112 String& String::assign(const String& str)
|
702 mike 1.27 {
|
703 mike 1.112 if (_rep != str._rep)
|
704 david.dillard 1.105 {
|
705 mike 1.112 StringRep::unref(_rep);
706 StringRep::ref(_rep = str._rep);
|
707 david.dillard 1.105 }
708
|
709 mike 1.27 return *this;
710 }
711
712 String& String::assign(const Char16* str, Uint32 n)
713 {
|
714 mike 1.112 _checkNullPointer(str);
715
|
716 mike 1.114 if (n > _rep->cap || _rep->refs.get() != 1)
|
717 david.dillard 1.105 {
|
718 mike 1.112 StringRep::unref(_rep);
719 _rep = StringRep::alloc(n);
|
720 david.dillard 1.105 }
721
|
722 mike 1.112 _rep->size = n;
723 _copy(_rep->data, (Uint16*)str, n);
724 _rep->data[n] = '\0';
725
|
726 mike 1.27 return *this;
727 }
728
|
729 mike 1.112 String& String::assign(const char* str, Uint32 n)
|
730 chuck 1.102 {
|
731 mike 1.112 _checkNullPointer(str);
732
|
733 mike 1.114 if (n > _rep->cap || _rep->refs.get() != 1)
|
734 david.dillard 1.105 {
|
735 mike 1.112 StringRep::unref(_rep);
736 _rep = StringRep::alloc(n);
|
737 david.dillard 1.105 }
738
|
739 mike 1.112 size_t utf8_error_index;
740 _rep->size = _convert(_rep->data, str, n, utf8_error_index);
|
741 chuck 1.102
|
742 mike 1.112 if (_rep->size == size_t(-1))
|
743 david.dillard 1.105 {
|
744 mike 1.112 StringRep::free(_rep);
745 _rep = &StringRep::_emptyRep;
|
746 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
|
747 david.dillard 1.105 }
|
748 mike 1.112
749 _rep->data[_rep->size] = 0;
|
750 david.dillard 1.105
|
751 mike 1.27 return *this;
752 }
753
|
754 kumpf 1.39 void String::clear()
755 {
|
756 mike 1.112 if (_rep->size)
757 {
|
758 mike 1.114 if (_rep->refs.get() == 1)
|
759 mike 1.112 {
760 _rep->size = 0;
761 _rep->data[0] = '\0';
762 }
763 else
764 {
765 StringRep::unref(_rep);
766 _rep = &StringRep::_emptyRep;
767 }
768 }
|
769 kumpf 1.39 }
770
|
771 mike 1.112 void String::reserveCapacity(Uint32 cap)
|
772 kumpf 1.39 {
|
773 mike 1.112 _reserve(_rep, cap);
|
774 kumpf 1.39 }
775
|
776 mike 1.112 CString String::getCString() const
777 {
|
778 david.dillard 1.116 // A UTF8 string can have three times as many characters as its UTF16
779 // counterpart, so we allocate extra memory for the worst case. In the
|
780 mike 1.112 // best case, we may need only one third of the memory allocated. But
|
781 david.dillard 1.116 // downsizing the string afterwards is expensive and unecessary since
782 // CString objects are usually short-lived (disappearing after only a few
|
783 mike 1.112 // instructions). CString objects are typically created on the stack as
784 // means to obtain a char* pointer.
785
786 #ifdef PEGASUS_STRING_NO_UTF8
787 char* str = (char*)operator new(_rep->size + 1);
788 _copy(str, _rep->data, _rep->size);
789 str[_rep->size] = '\0';
790 return CString(str);
|
791 gs.keenan 1.110 #else
|
792 a.dunfey 1.125 Uint32 n = (Uint32)(3 * _rep->size);
|
793 mike 1.112 char* str = (char*)operator new(n + 1);
794 size_t size = _copyToUTF8(str, _rep->data, _rep->size);
795 str[size] = '\0';
796 return CString(str);
|
797 gs.keenan 1.110 #endif
|
798 kumpf 1.39 }
799
|
800 mike 1.112 String& String::append(const Char16* str, Uint32 n)
|
801 kumpf 1.39 {
|
802 mike 1.112 _checkNullPointer(str);
803
804 size_t oldSize = _rep->size;
805 size_t newSize = oldSize + n;
|
806 a.dunfey 1.125 _reserve(_rep, (Uint32)newSize);
|
807 mike 1.112 _copy(_rep->data + oldSize, (Uint16*)str, n);
808 _rep->size = newSize;
809 _rep->data[newSize] = '\0';
810
811 return *this;
|
812 kumpf 1.39 }
813
|
814 mike 1.112 String& String::append(const String& str)
|
815 mike 1.27 {
|
816 a.dunfey 1.125 return append((Char16*)(&(str._rep->data[0])), (Uint32)str._rep->size);
|
817 mike 1.27 }
818
|
819 mike 1.112 String& String::append(const char* str, Uint32 size)
|
820 mike 1.27 {
|
821 mike 1.112 _checkNullPointer(str);
822
823 size_t oldSize = _rep->size;
824 size_t cap = oldSize + size;
825
|
826 a.dunfey 1.125 _reserve(_rep, (Uint32)cap);
|
827 mike 1.112 size_t utf8_error_index;
828 size_t tmp = _convert(
829 (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
830
831 if (tmp == size_t(-1))
832 {
833 StringRep::free(_rep);
834 _rep = &StringRep::_emptyRep;
|
835 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
|
836 mike 1.112 }
|
837 mike 1.27
|
838 mike 1.112 _rep->size += tmp;
839 _rep->data[_rep->size] = '\0';
|
840 mike 1.27
|
841 kumpf 1.39 return *this;
842 }
843
|
844 mike 1.112 void String::remove(Uint32 index, Uint32 n)
|
845 mike 1.27 {
|
846 mike 1.112 if (n == PEG_NOT_FOUND)
|
847 a.dunfey 1.125 n = (Uint32)(_rep->size - index);
|
848 mike 1.112
849 _checkBounds(index + n, _rep->size);
850
|
851 mike 1.114 if (_rep->refs.get() != 1)
|
852 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
|
853 mike 1.27
|
854 jim.wunderlich 1.115 PEGASUS_ASSERT(index + n <= _rep->size);
|
855 mike 1.27
|
856 mike 1.112 size_t rem = _rep->size - (index + n);
857 Uint16* data = _rep->data;
|
858 mike 1.27
|
859 mike 1.112 if (rem)
860 memmove(data + index, data + index + n, rem * sizeof(Uint16));
|
861 mike 1.27
|
862 mike 1.112 _rep->size -= n;
863 data[_rep->size] = '\0';
|
864 mike 1.27 }
865
|
866 mike 1.112 String String::subString(Uint32 index, Uint32 n) const
|
867 mike 1.27 {
|
868 mike 1.112 // Note: this implementation is very permissive but used for
869 // backwards compatibility.
870
871 if (index < _rep->size)
|
872 mike 1.27 {
|
873 mike 1.112 if (n == PEG_NOT_FOUND || n > _rep->size - index)
|
874 a.dunfey 1.125 n = (Uint32)(_rep->size - index);
|
875 mike 1.27
|
876 w.otsuka 1.121 return String((Char16*)(_rep->data + index), n);
|
877 mike 1.27 }
|
878 david.dillard 1.105
879 return String();
|
880 mike 1.27 }
881
882 Uint32 String::find(Char16 c) const
883 {
|
884 mike 1.112 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
|
885 mike 1.27
|
886 mike 1.112 if (p)
|
887 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
888 mike 1.27
889 return PEG_NOT_FOUND;
890 }
891
|
892 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
|
893 mike 1.30 {
|
894 mike 1.112 _checkBounds(index, _rep->size);
895
896 if (index >= _rep->size)
897 return PEG_NOT_FOUND;
898
899 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
|
900 mike 1.30
|
901 mike 1.112 if (p)
|
902 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
903 mike 1.30
904 return PEG_NOT_FOUND;
905 }
906
|
907 mike 1.112 Uint32 StringFindAux(
908 const StringRep* _rep, const Char16* s, Uint32 n)
|
909 mike 1.27 {
|
910 mike 1.112 _checkNullPointer(s);
|
911 mike 1.27
|
912 mike 1.112 const Uint16* data = _rep->data;
913 size_t rem = _rep->size;
914
915 while (n <= rem)
|
916 mike 1.30 {
|
917 mike 1.112 Uint16* p = (Uint16*)_find(data, rem, s[0]);
918
919 if (!p)
920 break;
|
921 mike 1.30
|
922 mike 1.112 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
|
923 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
924 david.dillard 1.105
|
925 mike 1.112 p++;
926 rem -= p - data;
927 data = p;
|
928 mike 1.27 }
|
929 mike 1.112
|
930 mike 1.27 return PEG_NOT_FOUND;
931 }
932
|
933 mike 1.112 Uint32 String::find(const char* s) const
934 {
935 _checkNullPointer(s);
936
937 // Note: could optimize away creation of temporary, but this is rarely
938 // called.
939 return find(String(s));
940 }
941
|
942 mike 1.27 Uint32 String::reverseFind(Char16 c) const
943 {
|
944 mike 1.112 Uint16 x = c;
945 Uint16* p = _rep->data;
946 Uint16* q = _rep->data + _rep->size;
|
947 mike 1.27
|
948 mike 1.112 while (q != p)
|
949 mike 1.27 {
|
950 mike 1.112 if (*--q == x)
|
951 david.dillard 1.116 return static_cast<Uint32>(q - p);
|
952 mike 1.27 }
953
954 return PEG_NOT_FOUND;
955 }
956
957 void String::toLower()
958 {
|
959 david 1.69 #ifdef PEGASUS_HAS_ICU
|
960 mike 1.112
|
961 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
962 david 1.90 {
|
963 mike 1.114 if (_rep->refs.get() != 1)
|
964 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
965
|
966 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
967 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this
968 // prevents optimizations where the us-ascii is converted before
|
969 mike 1.112 // calling ICU.
|
970 yi.zhou 1.108 // The string may shrink or expand after the convert.
971
|
972 mike 1.112 //// First calculate size of resulting string. u_strToLower() returns
973 //// only the size when zero is passed as the destination size argument.
974
|
975 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
976
|
977 mike 1.112 int32_t newSize = u_strToLower(
978 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
|
979 david.dillard 1.116
|
980 mike 1.112 err = U_ZERO_ERROR;
981
982 //// Reserve enough space for the result.
983
984 if ((Uint32)newSize > _rep->cap)
985 _reserve(_rep, newSize);
986
987 //// Perform the conversion (overlapping buffers are allowed).
|
988 chuck 1.99
|
989 mike 1.112 u_strToLower((UChar*)_rep->data, newSize,
990 (UChar*)_rep->data, _rep->size, NULL, &err);
|
991 yi.zhou 1.108
|
992 mike 1.112 _rep->size = newSize;
993 return;
|
994 david 1.90 }
|
995 mike 1.112
996 #endif /* PEGASUS_HAS_ICU */
997
|
998 mike 1.114 if (_rep->refs.get() != 1)
|
999 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
1000
1001 Uint16* p = _rep->data;
1002 size_t n = _rep->size;
1003
1004 for (; n--; p++)
|
1005 david 1.90 {
|
1006 mike 1.112 if (!(*p & 0xFF00))
1007 *p = _toLower(*p);
|
1008 mike 1.27 }
|
1009 kumpf 1.39 }
1010
|
1011 chuck 1.99 void String::toUpper()
|
1012 david 1.90 {
1013 #ifdef PEGASUS_HAS_ICU
|
1014 mike 1.112
|
1015 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
1016 chuck 1.99 {
|
1017 mike 1.114 if (_rep->refs.get() != 1)
|
1018 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
1019
|
1020 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
1021 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this
1022 // prevents optimizations where the us-ascii is converted before
|
1023 mike 1.112 // calling ICU.
|
1024 yi.zhou 1.108 // The string may shrink or expand after the convert.
1025
|
1026 mike 1.112 //// First calculate size of resulting string. u_strToUpper() returns
1027 //// only the size when zero is passed as the destination size argument.
1028
|
1029 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
1030
|
1031 mike 1.112 int32_t newSize = u_strToUpper(
1032 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1033
1034 err = U_ZERO_ERROR;
1035
1036 //// Reserve enough space for the result.
1037
1038 if ((Uint32)newSize > _rep->cap)
1039 _reserve(_rep, newSize);
1040
1041 //// Perform the conversion (overlapping buffers are allowed).
1042
1043 u_strToUpper((UChar*)_rep->data, newSize,
1044 (UChar*)_rep->data, _rep->size, NULL, &err);
|
1045 chuck 1.99
|
1046 mike 1.112 _rep->size = newSize;
|
1047 yi.zhou 1.108
|
1048 mike 1.112 return;
|
1049 david 1.91 }
|
1050 mike 1.112
1051 #endif /* PEGASUS_HAS_ICU */
1052
|
1053 mike 1.114 if (_rep->refs.get() != 1)
|
1054 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
1055
1056 Uint16* p = _rep->data;
1057 size_t n = _rep->size;
1058
1059 for (; n--; p++)
1060 *p = _toUpper(*p);
|
1061 david 1.90 }
1062
|
1063 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
|
1064 kumpf 1.39 {
|
1065 kumpf 1.118 const Uint16* p1 = s1._rep->data;
1066 const Uint16* p2 = s2._rep->data;
|
1067 mike 1.27
|
1068 kumpf 1.118 while (n--)
1069 {
1070 int r = *p1++ - *p2++;
1071 if (r)
1072 {
1073 return r;
1074 }
1075 else if (!p1[-1])
1076 {
1077 // We must have encountered a null terminator in both s1 and s2
1078 return 0;
1079 }
1080 }
1081 return 0;
|
1082 mike 1.27 }
1083
|
1084 kumpf 1.43 int String::compare(const String& s1, const String& s2)
|
1085 mike 1.30 {
|
1086 mike 1.112 return _compare(s1._rep->data, s2._rep->data);
1087 }
|
1088 kumpf 1.43
|
1089 mike 1.112 int String::compare(const String& s1, const char* s2)
1090 {
1091 _checkNullPointer(s2);
|
1092 mike 1.30
|
1093 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8
1094 return _compareNoUTF8(s1._rep->data, s2);
1095 #else
1096 // ATTN: optimize this!
1097 return String::compare(s1, String(s2));
1098 #endif
|
1099 mike 1.30 }
1100
|
1101 mike 1.112 int String::compareNoCase(const String& str1, const String& str2)
|
1102 kumpf 1.40 {
|
1103 david 1.69 #ifdef PEGASUS_HAS_ICU
|
1104 mike 1.112
|
1105 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1106 {
|
1107 mike 1.112 return u_strcasecmp(
|
1108 dave.sudlik 1.124 (const UChar*)str1._rep->data,
1109 (const UChar*)str2._rep->data,
1110 U_FOLD_CASE_DEFAULT
1111 );
|
1112 yi.zhou 1.108 }
|
1113 kumpf 1.40
|
1114 mike 1.112 #endif /* PEGASUS_HAS_ICU */
1115
1116 const Uint16* s1 = str1._rep->data;
1117 const Uint16* s2 = str2._rep->data;
1118
1119 while (*s1 && *s2)
|
1120 kumpf 1.40 {
|
1121 mike 1.112 int r = _toLower(*s1++) - _toLower(*s2++);
|
1122 kumpf 1.40
|
1123 david.dillard 1.105 if (r)
1124 return r;
|
1125 kumpf 1.40 }
1126
|
1127 mike 1.112 if (*s2)
|
1128 david.dillard 1.105 return -1;
|
1129 mike 1.112 else if (*s1)
|
1130 david.dillard 1.105 return 1;
|
1131 kumpf 1.40
1132 return 0;
1133 }
1134
|
1135 mike 1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
|
1136 mike 1.27 {
|
1137 mike 1.112 #ifdef PEGASUS_HAS_ICU
1138
1139 return String::compareNoCase(s1, s2) == 0;
1140
1141 #else /* PEGASUS_HAS_ICU */
|
1142 mike 1.27
|
1143 mike 1.112 // The following employs loop unrolling for efficiency. Please do not
1144 // eliminate.
|
1145 kumpf 1.39
|
1146 mike 1.112 Uint16* p = (Uint16*)s1.getChar16Data();
1147 Uint16* q = (Uint16*)s2.getChar16Data();
1148 Uint32 n = s2.size();
1149
1150 while (n >= 8)
1151 {
1152 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1153 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1154 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1155 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1156 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1157 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1158 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1159 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1160 {
1161 return false;
1162 }
|
1163 kumpf 1.39
|
1164 mike 1.112 n -= 8;
1165 p += 8;
1166 q += 8;
1167 }
|
1168 mike 1.27
|
1169 mike 1.112 while (n >= 4)
|
1170 kumpf 1.39 {
|
1171 mike 1.112 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1172 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1173 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1174 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
|
1175 david.dillard 1.105 {
|
1176 mike 1.112 return false;
|
1177 david.dillard 1.105 }
|
1178 mike 1.112
1179 n -= 4;
1180 p += 4;
1181 q += 4;
1182 }
1183
1184 while (n--)
1185 {
1186 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
|
1187 david.dillard 1.105 return false;
|
1188 mike 1.112
1189 p++;
1190 q++;
|
1191 kumpf 1.39 }
|
1192 mike 1.28
|
1193 kumpf 1.39 return true;
|
1194 mike 1.112
1195 #endif /* PEGASUS_HAS_ICU */
|
1196 david 1.69 }
1197
|
1198 mike 1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
|
1199 david 1.69 {
|
1200 mike 1.112 _checkNullPointer(s2);
|
1201 david 1.69
|
1202 mike 1.112 #if defined(PEGASUS_HAS_ICU)
|
1203 david 1.69
|
1204 mike 1.112 return String::equalNoCase(s1, String(s2));
|
1205 david 1.69
|
1206 mike 1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
|
1207 david 1.69
|
1208 mike 1.112 const Uint16* p1 = (Uint16*)s1._rep->data;
1209 const char* p2 = s2;
1210 size_t n = s1._rep->size;
|
1211 david.dillard 1.105
|
1212 mike 1.112 while (n--)
1213 {
1214 if (!*p2)
1215 return false;
|
1216 david 1.71
|
1217 mike 1.112 if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1218 return false;
1219 }
|
1220 kumpf 1.42
|
1221 mike 1.112 if (*p2)
1222 return false;
|
1223 david.dillard 1.116
|
1224 mike 1.112 return true;
|
1225 karl 1.36
|
1226 mike 1.112 #else /* PEGASUS_HAS_ICU */
|
1227 david.dillard 1.105
|
1228 mike 1.112 // ATTN: optimize this!
1229 return String::equalNoCase(s1, String(s2));
|
1230 david.dillard 1.105
|
1231 mike 1.112 #endif /* PEGASUS_HAS_ICU */
1232 }
|
1233 chuck 1.78
|
1234 mike 1.112 Boolean String::equal(const String& s1, const String& s2)
|
1235 karl 1.36 {
|
1236 marek 1.131 return (s1._rep == s2._rep) ||
|
1237 kumpf 1.135 (s1._rep->size == s2._rep->size) &&
|
1238 marek 1.131 memcmp(s1._rep->data,
|
1239 kumpf 1.135 s2._rep->data,
|
1240 marek 1.131 s1._rep->size * sizeof(Uint16)) == 0;
|
1241 karl 1.36 }
1242
|
1243 mike 1.112 Boolean String::equal(const String& s1, const char* s2)
1244 {
1245 #ifdef PEGASUS_STRING_NO_UTF8
|
1246 kumpf 1.35
|
1247 mike 1.112 _checkNullPointer(s2);
|
1248 kumpf 1.39
|
1249 mike 1.112 const Uint16* p = (Uint16*)s1._rep->data;
1250 const char* q = s2;
|
1251 kumpf 1.39
|
1252 mike 1.112 while (*p && *q)
1253 {
1254 if (*p++ != Uint16(*q++))
1255 return false;
1256 }
|
1257 kumpf 1.39
|
1258 mike 1.112 return !(*p || *q);
|
1259 kumpf 1.39
|
1260 mike 1.112 #else /* PEGASUS_STRING_NO_UTF8 */
|
1261 kumpf 1.39
|
1262 mike 1.112 return String::equal(s1, String(s2));
|
1263 kumpf 1.39
|
1264 mike 1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
|
1265 kumpf 1.39 }
1266
|
1267 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
|
1268 kumpf 1.39 {
|
1269 mike 1.112 #if defined(PEGASUS_HAS_ICU)
|
1270 david 1.69
|
1271 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1272 {
|
1273 david.dillard 1.105 char *buf = NULL;
1274 const int size = str.size() * 6;
|
1275 mike 1.112 UnicodeString UniStr(
1276 (const UChar *)str.getChar16Data(), (int32_t)str.size());
|
1277 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf);
1278 buf = new char[bufsize+1];
1279 UniStr.extract(0,bufsize,buf);
1280 os << buf;
1281 os.flush();
1282 delete [] buf;
|
1283 david.dillard 1.116 return os;
|
1284 yi.zhou 1.108 }
|
1285 mike 1.112
|
1286 david.dillard 1.116 #endif // PEGASUS_HAS_ICU
|
1287 mike 1.112
1288 for (Uint32 i = 0, n = str.size(); i < n; i++)
|
1289 yi.zhou 1.108 {
|
1290 mike 1.112 Uint16 code = str[i];
|
1291 david.dillard 1.105
|
1292 mike 1.112 if (code > 0 && !(code & 0xFF00))
1293 os << char(code);
1294 else
1295 {
1296 // Print in hex format:
1297 char buffer[8];
1298 sprintf(buffer, "\\x%04X", code);
1299 os << buffer;
|
1300 david.dillard 1.105 }
|
1301 yi.zhou 1.108 }
|
1302 kumpf 1.39
1303 return os;
1304 }
1305
|
1306 mike 1.112 void StringAppendCharAux(StringRep*& _rep)
|
1307 kumpf 1.39 {
|
1308 mike 1.112 StringRep* tmp;
1309
1310 if (_rep->cap)
1311 {
1312 tmp = StringRep::alloc(2 * _rep->cap);
1313 tmp->size = _rep->size;
1314 _copy(tmp->data, _rep->data, _rep->size);
1315 }
1316 else
1317 {
1318 tmp = StringRep::alloc(8);
1319 tmp->size = 0;
1320 }
1321
1322 StringRep::unref(_rep);
1323 _rep = tmp;
|
1324 kumpf 1.39 }
1325
|
1326 thilo.boehm 1.128 void AssignASCII(String& s, const char* str, Uint32 n)
1327 {
1328 class StringLayout
1329 {
1330 public:
1331 StringRep* rep;
1332 };
1333
|
1334 kumpf 1.130 StringLayout* that = reinterpret_cast<StringLayout*>(&s);
|
1335 thilo.boehm 1.128
1336 _checkNullPointer(str);
1337
1338 if (n > that->rep->cap || that->rep->refs.get() != 1)
1339 {
1340 StringRep::unref(that->rep);
1341 that->rep = StringRep::alloc(n);
1342 }
1343
1344 _copy(that->rep->data, str, n);
1345 that->rep->size = n;
1346 that->rep->data[that->rep->size] = 0;
1347 }
1348
|
1349 mike 1.112 PEGASUS_NAMESPACE_END
1350
1351 /*
1352 ================================================================================
1353
1354 String optimizations:
1355
1356 1. Added mechanism allowing certain functions to be inlined only when
1357 used by internal Pegasus modules. External modules (i.e., providers)
1358 link to a non-inline version, which allows for binary compatibility.
1359
1360 2. Implemented copy-on-write with atomic increment/decrement. This
1361 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1362 for the 'ni1000' benchmark.
1363
1364 3. Employed loop unrolling in several places. For example, see:
1365
1366 static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1367
1368 4. Used the "empty-rep" optimization (described in whitepaper from the
1369 GCC Developers Summit). This reduced default construction to a simple
1370 mike 1.112 pointer assignment.
1371
1372 inline String::String() : _rep(&_emptyRep) { }
1373
1374 5. Implemented Uint16 versions of toupper() and tolower() using tables.
1375 For example:
1376
1377 static const char _upper[] =
1378 {
1379 0,1,2,...255
1380 };
1381
1382 inline Uint16 _toUpper(Uint16 x)
1383 {
1384 return (x & 0xFF00) ? x : _upper[x];
1385 }
1386
|
1387 david.dillard 1.116 This outperforms the system implementation by avoiding an anding
|
1388 mike 1.112 operation.
1389
|
1390 david.dillard 1.116 6. Implemented char* version of the following member functions to
1391 eliminate unecessary creation of anonymous string objects
|
1392 mike 1.112 (temporaries).
1393
1394 String(const String& s1, const char* s2);
1395 String(const char* s1, const String& s2);
1396 String& String::operator=(const char* str);
1397 Uint32 String::find(const char* s) const;
1398 bool String::equal(const String& s1, const char* s2);
1399 static int String::compare(const String& s1, const char* s2);
1400 String& String::append(const char* str);
1401 String& String::append(const char* str, Uint32 size);
1402 static bool String::equalNoCase(const String& s1, const char* s2);
1403 String& operator=(const char* str)
1404 String& String::assign(const char* str)
1405 String& String::append(const char* str)
1406 Boolean operator==(const String& s1, const char* s2)
1407 Boolean operator==(const char* s1, const String& s2)
1408 Boolean operator!=(const String& s1, const char* s2)
1409 Boolean operator!=(const char* s1, const String& s2)
1410 Boolean operator<(const String& s1, const char* s2)
1411 Boolean operator<(const char* s1, const String& s2)
1412 Boolean operator>(const String& s1, const char* s2)
1413 mike 1.112 Boolean operator>(const char* s1, const String& s2)
1414 Boolean operator<=(const String& s1, const char* s2)
1415 Boolean operator<=(const char* s1, const String& s2)
1416 Boolean operator>=(const String& s1, const char* s2)
1417 Boolean operator>=(const char* s1, const String& s2)
1418 String operator+(const String& s1, const char* s2)
1419 String operator+(const char* s1, const String& s2)
1420
|
1421 david.dillard 1.116 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next
|
1422 mike 1.112 power of two (algorithm from the book "Hacker's Delight").
1423
1424 static Uint32 _roundUpToPow2(Uint32 x)
1425 {
1426 if (x < 8)
1427 return 8;
1428
1429 x--;
1430 x |= (x >> 1);
1431 x |= (x >> 2);
1432 x |= (x >> 4);
1433 x |= (x >> 8);
1434 x |= (x >> 16);
1435 x++;
1436
1437 return x;
1438 }
1439
1440 8. Implemented "concatenating constructors" to eliminate temporaries
|
1441 david.dillard 1.116 created by operator+(). This scheme employs the "return-value
|
1442 mike 1.112 optimization" described by Stan Lippman.
1443
1444 inline String operator+(const String& s1, const String& s2)
1445 {
1446 return String(s1, s2, 0);
1447 }
1448
1449 9. Experimented to find the optimial initial size for a short string.
1450 Eight seems to offer the best tradeoff between space and time.
1451
1452 10. Inlined all members of the Char16 class.
1453
1454 11. Used Uint16 internally in the String class. This showed no improvememnt
1455 since Char16 was already fully inlined and was essentially reduced to
1456 Uint16 in any case.
1457
1458 12. Implemented conditional logic (#if) allowing error checking logic to
|
1459 david.dillard 1.116 be excluded to better performance. Examples include bounds checking
|
1460 mike 1.112 and null-pointer checking.
1461
1462 13. Used memcpy() and memcmp() where possible. These are implemented using
1463 the rep family of intructions under Intel and are much faster.
1464
|
1465 david.dillard 1.116 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
|
1466 mike 1.112 copy routine overhead.
1467
1468 15. Added ASCII7 form of the constructor and assign().
1469
1470 String s("hello world", String::ASCII7);
1471
1472 s.assignASCII7("hello world");
1473
1474 This avoids slower UTF8 processing when not needed.
1475
1476 ================================================================================
1477 */
|