1 martin 1.133 //%LICENSE////////////////////////////////////////////////////////////////
|
2 martin 1.134 //
|
3 martin 1.133 // Licensed to The Open Group (TOG) under one or more contributor license
4 // agreements. Refer to the OpenPegasusNOTICE.txt file distributed with
5 // this work for additional information regarding copyright ownership.
6 // Each contributor licenses this file to you under the OpenPegasus Open
7 // Source License; you may not use this file except in compliance with the
8 // License.
|
9 martin 1.134 //
|
10 martin 1.133 // Permission is hereby granted, free of charge, to any person obtaining a
11 // copy of this software and associated documentation files (the "Software"),
12 // to deal in the Software without restriction, including without limitation
13 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 // and/or sell copies of the Software, and to permit persons to whom the
15 // Software is furnished to do so, subject to the following conditions:
|
16 martin 1.134 //
|
17 martin 1.133 // The above copyright notice and this permission notice shall be included
18 // in all copies or substantial portions of the Software.
|
19 martin 1.134 //
|
20 martin 1.133 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
21 martin 1.134 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22 martin 1.133 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27 martin 1.134 //
|
28 martin 1.133 //////////////////////////////////////////////////////////////////////////
|
29 mike 1.27 //
30 //%/////////////////////////////////////////////////////////////////////////////
31
|
32 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
|
33 mike 1.113 #include <cstring>
|
34 kumpf 1.48 #include "InternalException.h"
|
35 mike 1.112 #include "MessageLoader.h"
36 #include "StringRep.h"
|
37 david 1.69
38 #ifdef PEGASUS_HAS_ICU
|
39 kumpf 1.132 # include <unicode/ures.h>
40 # include <unicode/ustring.h>
41 # include <unicode/uchar.h>
|
42 david 1.69 #endif
43
|
44 mike 1.112 PEGASUS_NAMESPACE_BEGIN
|
45 mike 1.28
|
46 mike 1.112 //==============================================================================
47 //
48 // Compile-time macros (undefined by default).
49 //
50 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
51 //
52 //==============================================================================
|
53 mike 1.27
|
54 mike 1.112 //==============================================================================
|
55 kumpf 1.39 //
|
56 mike 1.112 // File-scope definitions:
|
57 kumpf 1.54 //
|
58 mike 1.112 //==============================================================================
59
60 // Note: this table is much faster than the system toupper(). Please do not
61 // change.
|
62 kumpf 1.54
|
63 david.dillard 1.116 const Uint8 _toUpperTable[256] =
|
64 kumpf 1.54 {
|
65 mike 1.112 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
66 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
67 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
68 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
69 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
70 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
71 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
72 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
73 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
74 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
75 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
76 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
77 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
78 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
79 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
80 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
81 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
82 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
83 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
84 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
85 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
86 mike 1.112 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
87 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
88 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
89 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
90 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
91 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
92 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
93 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
94 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
95 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
96 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
97 };
98
|
99 dev.meetei 1.139 // Note: this table is much faster than the system tolower(). Please do not
|
100 mike 1.112 // change.
101
|
102 david.dillard 1.116 const Uint8 _toLowerTable[256] =
|
103 mike 1.112 {
104 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
105 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
106 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
107 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
108 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
109 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
110 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
111 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
112 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
113 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
114 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
115 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
116 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
117 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
118 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
119 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
120 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
121 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
122 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
123 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
124 mike 1.112 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
125 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
126 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
127 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
128 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
129 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
130 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
131 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
132 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
133 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
134 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
135 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
136 };
137
138 // Converts 16-bit characters to upper case. This routine is faster than the
139 // system toupper(). Please do not change.
140 inline Uint16 _toUpper(Uint16 x)
141 {
142 return (x & 0xFF00) ? x : _toUpperTable[x];
|
143 kumpf 1.54 }
144
|
145 mike 1.112 // Converts 16-bit characters to lower case. This routine is faster than the
146 // system toupper(). Please do not change.
147 inline Uint16 _toLower(Uint16 x)
|
148 kumpf 1.54 {
|
149 mike 1.112 return (x & 0xFF00) ? x : _toLowerTable[x];
150 }
151
152 // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
153 static Uint32 _roundUpToPow2(Uint32 x)
154 {
|
155 dave.sudlik 1.120 // Check for potential overflow in x
156 PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
|
157 mike 1.112
158 if (x < 8)
159 return 8;
160
161 x--;
162 x |= (x >> 1);
163 x |= (x >> 2);
164 x |= (x >> 4);
165 x |= (x >> 8);
166 x |= (x >> 16);
167 x++;
168
169 return x;
170 }
171
172 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
173 {
174 // The following employs loop unrolling for efficiency. Please do not
175 // eliminate.
176
177 while (n >= 4)
178 mike 1.112 {
179 if (s[0] == c)
180 return (Uint16*)s;
181 if (s[1] == c)
182 return (Uint16*)&s[1];
183 if (s[2] == c)
184 return (Uint16*)&s[2];
185 if (s[3] == c)
186 return (Uint16*)&s[3];
|
187 kumpf 1.82
|
188 mike 1.112 n -= 4;
189 s += 4;
190 }
191
192 if (n)
193 {
194 if (*s == c)
195 return (Uint16*)s;
196 s++;
197 n--;
198 }
199
200 if (n)
201 {
202 if (*s == c)
203 return (Uint16*)s;
204 s++;
205 n--;
206 }
207
208 if (n && *s == c)
209 mike 1.112 return (Uint16*)s;
210
211 // Not found!
212 return 0;
213 }
214
215 static int _compare(const Uint16* s1, const Uint16* s2)
216 {
217 while (*s1 && *s2)
218 {
219 int r = *s1++ - *s2++;
220
221 if (r)
222 return r;
223 }
224
225 if (*s2)
226 return -1;
227 else if (*s1)
228 return 1;
229
230 mike 1.112 return 0;
231 }
232
|
233 kumpf 1.130 #ifdef PEGASUS_STRING_NO_UTF8
|
234 mike 1.112 static int _compareNoUTF8(const Uint16* s1, const char* s2)
235 {
236 Uint16 c1;
237 Uint16 c2;
238
239 do
240 {
241 c1 = *s1++;
242 c2 = *s2++;
243
244 if (c1 == 0)
245 return c1 - c2;
246 }
247 while (c1 == c2);
248
249 return c1 - c2;
250 }
|
251 kumpf 1.130 #endif
|
252 mike 1.112
253 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
254 {
255 memcpy(s1, s2, n * sizeof(Uint16));
256 }
257
258 void StringThrowOutOfBounds()
259 {
260 throw IndexOutOfBoundsException();
261 }
262
263 inline void _checkNullPointer(const void* ptr)
264 {
265 if (!ptr)
266 throw NullPointer();
267 }
268
|
269 thilo.boehm 1.138 #define BADUTF8_MAX_CLEAR_CHAR 40
270 #define BADUTF8_MAX_CHAR_TO_HEX 10
271
272 static void _formatBadUTF8Chars(
273 char* buffer,
274 Uint32 index,
275 const char* q,
276 size_t n )
|
277 mike 1.112 {
|
278 thilo.boehm 1.138
279 char tmp[20];
280 const char* start;
281
282 size_t clearChar =
283 (( index < BADUTF8_MAX_CLEAR_CHAR ) ? index : BADUTF8_MAX_CLEAR_CHAR );
284 size_t charToHex =
285 ((n-index-1) < BADUTF8_MAX_CHAR_TO_HEX ?
286 (n-index-1) : BADUTF8_MAX_CHAR_TO_HEX );
287
288 if (index < BADUTF8_MAX_CLEAR_CHAR)
289 {
290 start = q;
291 } else
292 {
293 start = &(q[ index - BADUTF8_MAX_CLEAR_CHAR]);
294 }
295
296 // Intialize the buffer with the first character as '\0' to be able to use
297 // strnchat() and strcat()
298 buffer[0] = 0;
299 thilo.boehm 1.138 // Start the buffer with the valid UTF8 chars
300 strncat(buffer,start,clearChar);
301 for (size_t i = clearChar, j = 0; j <= charToHex; i++,j++ )
302 {
303 tmp[0] = 0;
304 sprintf(&(tmp[0])," 0x%02X",(Uint8)start[i]);
305 strncat(buffer,&(tmp[0]),5);
306 }
307
308 }
309
310 static void _StringThrowBadUTF8(Uint32 index, const char* q, size_t n)
311 {
312 char buffer[1024];
313
314 _formatBadUTF8Chars(&(buffer[0]),index,q,n);
315
|
316 mike 1.112 MessageLoaderParms parms(
|
317 thilo.boehm 1.138 "Common.String.BAD_UTF8_LONG",
|
318 mike 1.112 "The byte sequence starting at index $0 "
|
319 thilo.boehm 1.138 "is not valid UTF-8 encoding: $1",
320 index,buffer);
321
|
322 mike 1.112 throw Exception(parms);
323 }
324
|
325 david.dillard 1.116 // Note: dest must be at least three times src (plus an extra byte for
|
326 mike 1.112 // terminator).
327 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
328 {
329 // The following employs loop unrolling for efficiency. Please do not
330 // eliminate.
331
332 const Uint16* q = src;
333 Uint8* p = (Uint8*)dest;
334
335 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
|
336 kumpf 1.82 {
|
337 mike 1.112 p[0] = q[0];
338 p[1] = q[1];
339 p[2] = q[2];
340 p[3] = q[3];
341 p += 4;
342 q += 4;
343 n -= 4;
|
344 kumpf 1.82 }
|
345 mike 1.112
346 switch (n)
347 {
348 case 0:
349 return p - (Uint8*)dest;
350 case 1:
351 if (q[0] < 128)
352 {
353 p[0] = q[0];
354 return p + 1 - (Uint8*)dest;
355 }
356 break;
357 case 2:
358 if (q[0] < 128 && q[1] < 128)
359 {
360 p[0] = q[0];
361 p[1] = q[1];
362 return p + 2 - (Uint8*)dest;
363 }
364 break;
365 case 3:
366 mike 1.112 if (q[0] < 128 && q[1] < 128 && q[2] < 128)
367 {
368 p[0] = q[0];
369 p[1] = q[1];
370 p[2] = q[2];
371 return p + 3 - (Uint8*)dest;
372 }
373 break;
374 }
375
376 // If this line was reached, there must be characters greater than 128.
377
378 UTF16toUTF8(&q, q + n, &p, p + 3 * n);
379
380 return p - (Uint8*)dest;
|
381 kumpf 1.54 }
382
|
383 mike 1.112 //==============================================================================
384 //
385 // class CString
386 //
387 //==============================================================================
388
389 CString::CString(const CString& cstr) : _rep(0)
|
390 kumpf 1.54 {
|
391 mike 1.112 if (cstr._rep)
|
392 kumpf 1.82 {
|
393 mike 1.112 size_t n = strlen(cstr._rep) + 1;
394 _rep = (char*)operator new(n);
395 memcpy(_rep, cstr._rep, n);
|
396 kumpf 1.82 }
|
397 kumpf 1.54 }
398
|
399 kumpf 1.56 CString& CString::operator=(const CString& cstr)
400 {
|
401 kumpf 1.82 if (&cstr != this)
|
402 kumpf 1.81 {
|
403 kumpf 1.82 if (_rep)
404 {
|
405 mike 1.112 operator delete(_rep);
|
406 kumpf 1.82 _rep = 0;
407 }
|
408 mike 1.112
|
409 kumpf 1.82 if (cstr._rep)
410 {
|
411 mike 1.112 size_t n = strlen(cstr._rep) + 1;
412 _rep = (char*)operator new(n);
413 memcpy(_rep, cstr._rep, n);
|
414 kumpf 1.82 }
|
415 kumpf 1.81 }
|
416 mike 1.112
|
417 kumpf 1.56 return *this;
418 }
419
|
420 mike 1.112 //==============================================================================
|
421 kumpf 1.54 //
|
422 mike 1.112 // class StringRep
|
423 kumpf 1.39 //
|
424 mike 1.112 //==============================================================================
|
425 kumpf 1.39
|
426 mike 1.112 StringRep StringRep::_emptyRep;
|
427 mike 1.27
|
428 mike 1.112 inline StringRep* StringRep::alloc(size_t cap)
|
429 mike 1.27 {
|
430 dave.sudlik 1.120 // Check for potential overflow in cap
431 PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
|
432 mike 1.27
|
433 mike 1.112 StringRep* rep = (StringRep*)::operator new(
434 sizeof(StringRep) + cap * sizeof(Uint16));
435 rep->cap = cap;
436 new(&rep->refs) AtomicInt(1);
437
438 return rep;
|
439 mike 1.27 }
440
|
441 mike 1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
|
442 chuck 1.102 {
|
443 mike 1.114 if (cap > rep->cap || rep->refs.get() != 1)
|
444 chuck 1.102 {
|
445 mike 1.112 size_t n = _roundUpToPow2(cap);
446 StringRep* newRep = StringRep::alloc(n);
447 newRep->size = rep->size;
448 _copy(newRep->data, rep->data, rep->size + 1);
449 StringRep::unref(rep);
450 rep = newRep;
451 }
452 }
|
453 david.dillard 1.105
|
454 mike 1.112 StringRep* StringRep::create(const Uint16* data, size_t size)
455 {
456 StringRep* rep = StringRep::alloc(size);
457 rep->size = size;
458 _copy(rep->data, data, size);
459 rep->data[size] = '\0';
460 return rep;
461 }
|
462 chuck 1.102
|
463 mike 1.112 StringRep* StringRep::copyOnWrite(StringRep* rep)
464 {
465 // Return a new copy of rep. Release rep.
|
466 chuck 1.102
|
467 mike 1.112 StringRep* newRep = StringRep::alloc(rep->size);
468 newRep->size = rep->size;
469 _copy(newRep->data, rep->data, rep->size);
470 newRep->data[newRep->size] = '\0';
471 StringRep::unref(rep);
472 return newRep;
|
473 chuck 1.102 }
474
|
475 mike 1.112 StringRep* StringRep::create(const char* data, size_t size)
|
476 kumpf 1.43 {
|
477 mike 1.112 StringRep* rep = StringRep::alloc(size);
478 size_t utf8_error_index;
479 rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
480
481 if (rep->size == size_t(-1))
482 {
483 StringRep::free(rep);
|
484 thilo.boehm 1.138 _StringThrowBadUTF8((Uint32)utf8_error_index, data,size);
|
485 mike 1.112 }
|
486 kumpf 1.43
|
487 mike 1.112 rep->data[rep->size] = '\0';
|
488 kumpf 1.43
|
489 mike 1.112 return rep;
|
490 mike 1.27 }
491
|
492 mike 1.112 Uint32 StringRep::length(const Uint16* str)
|
493 mike 1.27 {
|
494 mike 1.112 // Note: We could unroll this but it is rarely called.
495
496 const Uint16* end = (Uint16*)str;
497
498 while (*end++)
499 ;
500
|
501 a.dunfey 1.125 return (Uint32)(end - str - 1);
|
502 kumpf 1.39 }
|
503 tony 1.66
|
504 mike 1.112 //==============================================================================
505 //
506 // class String
507 //
508 //==============================================================================
509
510 const String String::EMPTY;
|
511 mike 1.27
|
512 kumpf 1.39 String::String(const String& str, Uint32 n)
513 {
|
514 mike 1.112 _checkBounds(n, str._rep->size);
515 _rep = StringRep::create(str._rep->data, n);
|
516 kumpf 1.39 }
517
518 String::String(const Char16* str)
519 {
|
520 mike 1.112 _checkNullPointer(str);
521 _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
|
522 mike 1.27 }
523
|
524 kumpf 1.39 String::String(const Char16* str, Uint32 n)
525 {
|
526 mike 1.112 _checkNullPointer(str);
527 _rep = StringRep::create((Uint16*)str, n);
|
528 kumpf 1.39 }
529
530 String::String(const char* str)
|
531 mike 1.27 {
|
532 mike 1.112 _checkNullPointer(str);
|
533 david.dillard 1.105
|
534 mike 1.112 // Set this just in case create() throws an exception.
535 _rep = &StringRep::_emptyRep;
536 _rep = StringRep::create(str, strlen(str));
|
537 mike 1.27 }
538
|
539 kumpf 1.39 String::String(const char* str, Uint32 n)
|
540 mike 1.27 {
|
541 mike 1.112 _checkNullPointer(str);
|
542 david.dillard 1.105
|
543 mike 1.112 // Set this just in case create() throws an exception.
544 _rep = &StringRep::_emptyRep;
545 _rep = StringRep::create(str, n);
|
546 kumpf 1.39 }
|
547 mike 1.27
|
548 mike 1.112 String::String(const String& s1, const String& s2)
|
549 kumpf 1.39 {
|
550 mike 1.112 size_t n1 = s1._rep->size;
551 size_t n2 = s2._rep->size;
552 size_t n = n1 + n2;
553 _rep = StringRep::alloc(n);
554 _copy(_rep->data, s1._rep->data, n1);
555 _copy(_rep->data + n1, s2._rep->data, n2);
556 _rep->size = n;
557 _rep->data[n] = '\0';
|
558 mike 1.27 }
559
|
560 mike 1.112 String::String(const String& s1, const char* s2)
|
561 mike 1.27 {
|
562 mike 1.112 _checkNullPointer(s2);
563 size_t n1 = s1._rep->size;
564 size_t n2 = strlen(s2);
565 _rep = StringRep::alloc(n1 + n2);
566 _copy(_rep->data, s1._rep->data, n1);
567 size_t utf8_error_index;
568 size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
569
570 if (tmp == size_t(-1))
|
571 kumpf 1.82 {
|
572 mike 1.112 StringRep::free(_rep);
573 _rep = &StringRep::_emptyRep;
|
574 thilo.boehm 1.138 _StringThrowBadUTF8((Uint32)utf8_error_index,s2,n2);
|
575 kumpf 1.82 }
|
576 mike 1.112
577 _rep->size = n1 + tmp;
578 _rep->data[_rep->size] = '\0';
|
579 mike 1.27 }
580
|
581 mike 1.112 String::String(const char* s1, const String& s2)
|
582 mike 1.27 {
|
583 mike 1.112 _checkNullPointer(s1);
584 size_t n1 = strlen(s1);
585 size_t n2 = s2._rep->size;
586 _rep = StringRep::alloc(n1 + n2);
587 size_t utf8_error_index;
588 size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
589
590 if (tmp == size_t(-1))
591 {
592 StringRep::free(_rep);
593 _rep = &StringRep::_emptyRep;
|
594 thilo.boehm 1.138 _StringThrowBadUTF8((Uint32)utf8_error_index,s1,n1);
|
595 mike 1.112 }
596
597 _rep->size = n2 + tmp;
598 _copy(_rep->data + n1, s2._rep->data, n2);
599 _rep->data[_rep->size] = '\0';
|
600 mike 1.27 }
601
|
602 mike 1.112 String& String::assign(const String& str)
|
603 mike 1.27 {
|
604 mike 1.112 if (_rep != str._rep)
|
605 david.dillard 1.105 {
|
606 mike 1.112 StringRep::unref(_rep);
607 StringRep::ref(_rep = str._rep);
|
608 david.dillard 1.105 }
609
|
610 mike 1.27 return *this;
611 }
612
613 String& String::assign(const Char16* str, Uint32 n)
614 {
|
615 mike 1.112 _checkNullPointer(str);
616
|
617 mike 1.114 if (n > _rep->cap || _rep->refs.get() != 1)
|
618 david.dillard 1.105 {
|
619 mike 1.112 StringRep::unref(_rep);
620 _rep = StringRep::alloc(n);
|
621 david.dillard 1.105 }
622
|
623 mike 1.112 _rep->size = n;
624 _copy(_rep->data, (Uint16*)str, n);
625 _rep->data[n] = '\0';
626
|
627 mike 1.27 return *this;
628 }
629
|
630 mike 1.112 String& String::assign(const char* str, Uint32 n)
|
631 chuck 1.102 {
|
632 mike 1.112 _checkNullPointer(str);
633
|
634 mike 1.114 if (n > _rep->cap || _rep->refs.get() != 1)
|
635 david.dillard 1.105 {
|
636 mike 1.112 StringRep::unref(_rep);
637 _rep = StringRep::alloc(n);
|
638 david.dillard 1.105 }
639
|
640 mike 1.112 size_t utf8_error_index;
641 _rep->size = _convert(_rep->data, str, n, utf8_error_index);
|
642 chuck 1.102
|
643 mike 1.112 if (_rep->size == size_t(-1))
|
644 david.dillard 1.105 {
|
645 mike 1.112 StringRep::free(_rep);
646 _rep = &StringRep::_emptyRep;
|
647 thilo.boehm 1.138 _StringThrowBadUTF8((Uint32)utf8_error_index,str,n);
|
648 david.dillard 1.105 }
|
649 mike 1.112
650 _rep->data[_rep->size] = 0;
|
651 david.dillard 1.105
|
652 mike 1.27 return *this;
653 }
654
|
655 kumpf 1.39 void String::clear()
656 {
|
657 mike 1.112 if (_rep->size)
658 {
|
659 mike 1.114 if (_rep->refs.get() == 1)
|
660 mike 1.112 {
661 _rep->size = 0;
662 _rep->data[0] = '\0';
663 }
664 else
665 {
666 StringRep::unref(_rep);
667 _rep = &StringRep::_emptyRep;
668 }
669 }
|
670 kumpf 1.39 }
671
|
672 mike 1.112 void String::reserveCapacity(Uint32 cap)
|
673 kumpf 1.39 {
|
674 mike 1.112 _reserve(_rep, cap);
|
675 kumpf 1.39 }
676
|
677 mike 1.112 CString String::getCString() const
678 {
|
679 david.dillard 1.116 // A UTF8 string can have three times as many characters as its UTF16
680 // counterpart, so we allocate extra memory for the worst case. In the
|
681 mike 1.112 // best case, we may need only one third of the memory allocated. But
|
682 david.dillard 1.116 // downsizing the string afterwards is expensive and unecessary since
683 // CString objects are usually short-lived (disappearing after only a few
|
684 mike 1.112 // instructions). CString objects are typically created on the stack as
685 // means to obtain a char* pointer.
686
687 #ifdef PEGASUS_STRING_NO_UTF8
688 char* str = (char*)operator new(_rep->size + 1);
689 _copy(str, _rep->data, _rep->size);
690 str[_rep->size] = '\0';
691 return CString(str);
|
692 gs.keenan 1.110 #else
|
693 a.dunfey 1.125 Uint32 n = (Uint32)(3 * _rep->size);
|
694 mike 1.112 char* str = (char*)operator new(n + 1);
695 size_t size = _copyToUTF8(str, _rep->data, _rep->size);
696 str[size] = '\0';
697 return CString(str);
|
698 gs.keenan 1.110 #endif
|
699 kumpf 1.39 }
700
|
701 mike 1.112 String& String::append(const Char16* str, Uint32 n)
|
702 kumpf 1.39 {
|
703 mike 1.112 _checkNullPointer(str);
704
705 size_t oldSize = _rep->size;
706 size_t newSize = oldSize + n;
|
707 a.dunfey 1.125 _reserve(_rep, (Uint32)newSize);
|
708 mike 1.112 _copy(_rep->data + oldSize, (Uint16*)str, n);
709 _rep->size = newSize;
710 _rep->data[newSize] = '\0';
711
712 return *this;
|
713 kumpf 1.39 }
714
|
715 mike 1.112 String& String::append(const String& str)
|
716 mike 1.27 {
|
717 a.dunfey 1.125 return append((Char16*)(&(str._rep->data[0])), (Uint32)str._rep->size);
|
718 mike 1.27 }
719
|
720 mike 1.112 String& String::append(const char* str, Uint32 size)
|
721 mike 1.27 {
|
722 mike 1.112 _checkNullPointer(str);
723
724 size_t oldSize = _rep->size;
725 size_t cap = oldSize + size;
726
|
727 a.dunfey 1.125 _reserve(_rep, (Uint32)cap);
|
728 mike 1.112 size_t utf8_error_index;
729 size_t tmp = _convert(
730 (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
731
732 if (tmp == size_t(-1))
733 {
734 StringRep::free(_rep);
735 _rep = &StringRep::_emptyRep;
|
736 thilo.boehm 1.138 _StringThrowBadUTF8((Uint32)utf8_error_index,str,size);
|
737 mike 1.112 }
|
738 mike 1.27
|
739 mike 1.112 _rep->size += tmp;
740 _rep->data[_rep->size] = '\0';
|
741 mike 1.27
|
742 kumpf 1.39 return *this;
743 }
744
|
745 mike 1.112 void String::remove(Uint32 index, Uint32 n)
|
746 mike 1.27 {
|
747 mike 1.112 if (n == PEG_NOT_FOUND)
|
748 a.dunfey 1.125 n = (Uint32)(_rep->size - index);
|
749 mike 1.112
750 _checkBounds(index + n, _rep->size);
751
|
752 mike 1.114 if (_rep->refs.get() != 1)
|
753 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
|
754 mike 1.27
|
755 jim.wunderlich 1.115 PEGASUS_ASSERT(index + n <= _rep->size);
|
756 mike 1.27
|
757 mike 1.112 size_t rem = _rep->size - (index + n);
758 Uint16* data = _rep->data;
|
759 mike 1.27
|
760 mike 1.112 if (rem)
761 memmove(data + index, data + index + n, rem * sizeof(Uint16));
|
762 mike 1.27
|
763 mike 1.112 _rep->size -= n;
764 data[_rep->size] = '\0';
|
765 mike 1.27 }
766
|
767 mike 1.112 String String::subString(Uint32 index, Uint32 n) const
|
768 mike 1.27 {
|
769 mike 1.112 // Note: this implementation is very permissive but used for
770 // backwards compatibility.
771
772 if (index < _rep->size)
|
773 mike 1.27 {
|
774 mike 1.112 if (n == PEG_NOT_FOUND || n > _rep->size - index)
|
775 a.dunfey 1.125 n = (Uint32)(_rep->size - index);
|
776 mike 1.27
|
777 w.otsuka 1.121 return String((Char16*)(_rep->data + index), n);
|
778 mike 1.27 }
|
779 david.dillard 1.105
780 return String();
|
781 mike 1.27 }
782
783 Uint32 String::find(Char16 c) const
784 {
|
785 mike 1.112 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
|
786 mike 1.27
|
787 mike 1.112 if (p)
|
788 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
789 mike 1.27
790 return PEG_NOT_FOUND;
791 }
792
|
793 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
|
794 mike 1.30 {
|
795 mike 1.112 _checkBounds(index, _rep->size);
796
797 if (index >= _rep->size)
798 return PEG_NOT_FOUND;
799
800 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
|
801 mike 1.30
|
802 mike 1.112 if (p)
|
803 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
804 mike 1.30
805 return PEG_NOT_FOUND;
806 }
807
|
808 mike 1.112 Uint32 StringFindAux(
809 const StringRep* _rep, const Char16* s, Uint32 n)
|
810 mike 1.27 {
|
811 mike 1.112 _checkNullPointer(s);
|
812 mike 1.27
|
813 mike 1.112 const Uint16* data = _rep->data;
814 size_t rem = _rep->size;
815
816 while (n <= rem)
|
817 mike 1.30 {
|
818 mike 1.112 Uint16* p = (Uint16*)_find(data, rem, s[0]);
819
820 if (!p)
821 break;
|
822 mike 1.30
|
823 mike 1.112 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
|
824 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
825 david.dillard 1.105
|
826 mike 1.112 p++;
827 rem -= p - data;
828 data = p;
|
829 mike 1.27 }
|
830 mike 1.112
|
831 mike 1.27 return PEG_NOT_FOUND;
832 }
833
|
834 mike 1.112 Uint32 String::find(const char* s) const
835 {
836 _checkNullPointer(s);
837
838 // Note: could optimize away creation of temporary, but this is rarely
839 // called.
840 return find(String(s));
841 }
842
|
843 mike 1.27 Uint32 String::reverseFind(Char16 c) const
844 {
|
845 mike 1.112 Uint16 x = c;
846 Uint16* p = _rep->data;
847 Uint16* q = _rep->data + _rep->size;
|
848 mike 1.27
|
849 mike 1.112 while (q != p)
|
850 mike 1.27 {
|
851 mike 1.112 if (*--q == x)
|
852 david.dillard 1.116 return static_cast<Uint32>(q - p);
|
853 mike 1.27 }
854
855 return PEG_NOT_FOUND;
856 }
857
858 void String::toLower()
859 {
|
860 david 1.69 #ifdef PEGASUS_HAS_ICU
|
861 mike 1.112
|
862 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
863 david 1.90 {
|
864 mike 1.114 if (_rep->refs.get() != 1)
|
865 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
866
|
867 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
868 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this
869 // prevents optimizations where the us-ascii is converted before
|
870 mike 1.112 // calling ICU.
|
871 yi.zhou 1.108 // The string may shrink or expand after the convert.
872
|
873 mike 1.112 //// First calculate size of resulting string. u_strToLower() returns
874 //// only the size when zero is passed as the destination size argument.
875
|
876 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
877
|
878 mike 1.112 int32_t newSize = u_strToLower(
879 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
|
880 david.dillard 1.116
|
881 mike 1.112 err = U_ZERO_ERROR;
882
883 //// Reserve enough space for the result.
884
885 if ((Uint32)newSize > _rep->cap)
886 _reserve(_rep, newSize);
887
888 //// Perform the conversion (overlapping buffers are allowed).
|
889 chuck 1.99
|
890 mike 1.112 u_strToLower((UChar*)_rep->data, newSize,
891 (UChar*)_rep->data, _rep->size, NULL, &err);
|
892 yi.zhou 1.108
|
893 mike 1.112 _rep->size = newSize;
894 return;
|
895 david 1.90 }
|
896 mike 1.112
897 #endif /* PEGASUS_HAS_ICU */
898
|
899 mike 1.114 if (_rep->refs.get() != 1)
|
900 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
901
902 Uint16* p = _rep->data;
903 size_t n = _rep->size;
904
905 for (; n--; p++)
|
906 david 1.90 {
|
907 mike 1.112 if (!(*p & 0xFF00))
908 *p = _toLower(*p);
|
909 mike 1.27 }
|
910 kumpf 1.39 }
911
|
912 chuck 1.99 void String::toUpper()
|
913 david 1.90 {
914 #ifdef PEGASUS_HAS_ICU
|
915 mike 1.112
|
916 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
917 chuck 1.99 {
|
918 mike 1.114 if (_rep->refs.get() != 1)
|
919 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
920
|
921 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
922 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this
923 // prevents optimizations where the us-ascii is converted before
|
924 mike 1.112 // calling ICU.
|
925 yi.zhou 1.108 // The string may shrink or expand after the convert.
926
|
927 mike 1.112 //// First calculate size of resulting string. u_strToUpper() returns
928 //// only the size when zero is passed as the destination size argument.
929
|
930 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
931
|
932 mike 1.112 int32_t newSize = u_strToUpper(
933 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
934
935 err = U_ZERO_ERROR;
936
937 //// Reserve enough space for the result.
938
939 if ((Uint32)newSize > _rep->cap)
940 _reserve(_rep, newSize);
941
942 //// Perform the conversion (overlapping buffers are allowed).
943
944 u_strToUpper((UChar*)_rep->data, newSize,
945 (UChar*)_rep->data, _rep->size, NULL, &err);
|
946 chuck 1.99
|
947 mike 1.112 _rep->size = newSize;
|
948 yi.zhou 1.108
|
949 mike 1.112 return;
|
950 david 1.91 }
|
951 mike 1.112
952 #endif /* PEGASUS_HAS_ICU */
953
|
954 mike 1.114 if (_rep->refs.get() != 1)
|
955 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
956
957 Uint16* p = _rep->data;
958 size_t n = _rep->size;
959
960 for (; n--; p++)
961 *p = _toUpper(*p);
|
962 david 1.90 }
963
|
964 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
|
965 kumpf 1.39 {
|
966 kumpf 1.118 const Uint16* p1 = s1._rep->data;
967 const Uint16* p2 = s2._rep->data;
|
968 mike 1.27
|
969 kumpf 1.118 while (n--)
970 {
971 int r = *p1++ - *p2++;
972 if (r)
973 {
974 return r;
975 }
976 else if (!p1[-1])
977 {
978 // We must have encountered a null terminator in both s1 and s2
979 return 0;
980 }
981 }
982 return 0;
|
983 mike 1.27 }
984
|
985 kumpf 1.43 int String::compare(const String& s1, const String& s2)
|
986 mike 1.30 {
|
987 mike 1.112 return _compare(s1._rep->data, s2._rep->data);
988 }
|
989 kumpf 1.43
|
990 mike 1.112 int String::compare(const String& s1, const char* s2)
991 {
992 _checkNullPointer(s2);
|
993 mike 1.30
|
994 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8
995 return _compareNoUTF8(s1._rep->data, s2);
996 #else
997 // ATTN: optimize this!
998 return String::compare(s1, String(s2));
999 #endif
|
1000 mike 1.30 }
1001
|
1002 mike 1.112 int String::compareNoCase(const String& str1, const String& str2)
|
1003 kumpf 1.40 {
|
1004 david 1.69 #ifdef PEGASUS_HAS_ICU
|
1005 mike 1.112
|
1006 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1007 {
|
1008 mike 1.112 return u_strcasecmp(
|
1009 dave.sudlik 1.124 (const UChar*)str1._rep->data,
1010 (const UChar*)str2._rep->data,
1011 U_FOLD_CASE_DEFAULT
1012 );
|
1013 yi.zhou 1.108 }
|
1014 kumpf 1.40
|
1015 mike 1.112 #endif /* PEGASUS_HAS_ICU */
1016
1017 const Uint16* s1 = str1._rep->data;
1018 const Uint16* s2 = str2._rep->data;
1019
1020 while (*s1 && *s2)
|
1021 kumpf 1.40 {
|
1022 mike 1.112 int r = _toLower(*s1++) - _toLower(*s2++);
|
1023 kumpf 1.40
|
1024 david.dillard 1.105 if (r)
1025 return r;
|
1026 kumpf 1.40 }
1027
|
1028 mike 1.112 if (*s2)
|
1029 david.dillard 1.105 return -1;
|
1030 mike 1.112 else if (*s1)
|
1031 david.dillard 1.105 return 1;
|
1032 kumpf 1.40
1033 return 0;
1034 }
1035
|
1036 mike 1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
|
1037 mike 1.27 {
|
1038 mike 1.112 #ifdef PEGASUS_HAS_ICU
1039
1040 return String::compareNoCase(s1, s2) == 0;
1041
1042 #else /* PEGASUS_HAS_ICU */
|
1043 mike 1.27
|
1044 mike 1.112 // The following employs loop unrolling for efficiency. Please do not
1045 // eliminate.
|
1046 kumpf 1.39
|
1047 mike 1.112 Uint16* p = (Uint16*)s1.getChar16Data();
1048 Uint16* q = (Uint16*)s2.getChar16Data();
1049 Uint32 n = s2.size();
1050
1051 while (n >= 8)
1052 {
1053 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1054 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1055 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1056 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1057 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1058 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1059 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1060 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1061 {
1062 return false;
1063 }
|
1064 kumpf 1.39
|
1065 mike 1.112 n -= 8;
1066 p += 8;
1067 q += 8;
1068 }
|
1069 mike 1.27
|
1070 mike 1.112 while (n >= 4)
|
1071 kumpf 1.39 {
|
1072 mike 1.112 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1073 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1074 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1075 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
|
1076 david.dillard 1.105 {
|
1077 mike 1.112 return false;
|
1078 david.dillard 1.105 }
|
1079 mike 1.112
1080 n -= 4;
1081 p += 4;
1082 q += 4;
1083 }
1084
1085 while (n--)
1086 {
1087 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
|
1088 david.dillard 1.105 return false;
|
1089 mike 1.112
1090 p++;
1091 q++;
|
1092 kumpf 1.39 }
|
1093 mike 1.28
|
1094 kumpf 1.39 return true;
|
1095 mike 1.112
1096 #endif /* PEGASUS_HAS_ICU */
|
1097 david 1.69 }
1098
|
1099 mike 1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
|
1100 david 1.69 {
|
1101 mike 1.112 _checkNullPointer(s2);
|
1102 david 1.69
|
1103 mike 1.112 #if defined(PEGASUS_HAS_ICU)
|
1104 david 1.69
|
1105 mike 1.112 return String::equalNoCase(s1, String(s2));
|
1106 david 1.69
|
1107 mike 1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
|
1108 david 1.69
|
1109 mike 1.112 const Uint16* p1 = (Uint16*)s1._rep->data;
1110 const char* p2 = s2;
1111 size_t n = s1._rep->size;
|
1112 david.dillard 1.105
|
1113 mike 1.112 while (n--)
1114 {
1115 if (!*p2)
1116 return false;
|
1117 david 1.71
|
1118 mike 1.112 if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1119 return false;
1120 }
|
1121 kumpf 1.42
|
1122 mike 1.112 if (*p2)
1123 return false;
|
1124 david.dillard 1.116
|
1125 mike 1.112 return true;
|
1126 karl 1.36
|
1127 mike 1.112 #else /* PEGASUS_HAS_ICU */
|
1128 david.dillard 1.105
|
1129 mike 1.112 // ATTN: optimize this!
1130 return String::equalNoCase(s1, String(s2));
|
1131 david.dillard 1.105
|
1132 mike 1.112 #endif /* PEGASUS_HAS_ICU */
1133 }
|
1134 chuck 1.78
|
1135 mike 1.112 Boolean String::equal(const String& s1, const String& s2)
|
1136 karl 1.36 {
|
1137 marek 1.131 return (s1._rep == s2._rep) ||
|
1138 marek 1.137 ((s1._rep->size == s2._rep->size) &&
1139 memcmp(s1._rep->data,
1140 s2._rep->data,
1141 s1._rep->size * sizeof(Uint16)) == 0);
|
1142 karl 1.36 }
1143
|
1144 mike 1.112 Boolean String::equal(const String& s1, const char* s2)
1145 {
1146 #ifdef PEGASUS_STRING_NO_UTF8
|
1147 kumpf 1.35
|
1148 mike 1.112 _checkNullPointer(s2);
|
1149 kumpf 1.39
|
1150 mike 1.112 const Uint16* p = (Uint16*)s1._rep->data;
1151 const char* q = s2;
|
1152 kumpf 1.39
|
1153 mike 1.112 while (*p && *q)
1154 {
1155 if (*p++ != Uint16(*q++))
1156 return false;
1157 }
|
1158 kumpf 1.39
|
1159 mike 1.112 return !(*p || *q);
|
1160 kumpf 1.39
|
1161 mike 1.112 #else /* PEGASUS_STRING_NO_UTF8 */
|
1162 kumpf 1.39
|
1163 mike 1.112 return String::equal(s1, String(s2));
|
1164 kumpf 1.39
|
1165 mike 1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
|
1166 kumpf 1.39 }
1167
|
1168 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
|
1169 kumpf 1.39 {
|
1170 mike 1.112 #if defined(PEGASUS_HAS_ICU)
|
1171 david 1.69
|
1172 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1173 {
|
1174 david.dillard 1.105 char *buf = NULL;
1175 const int size = str.size() * 6;
|
1176 mike 1.112 UnicodeString UniStr(
1177 (const UChar *)str.getChar16Data(), (int32_t)str.size());
|
1178 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf);
1179 buf = new char[bufsize+1];
1180 UniStr.extract(0,bufsize,buf);
1181 os << buf;
1182 os.flush();
1183 delete [] buf;
|
1184 david.dillard 1.116 return os;
|
1185 yi.zhou 1.108 }
|
1186 mike 1.112
|
1187 david.dillard 1.116 #endif // PEGASUS_HAS_ICU
|
1188 mike 1.112
1189 for (Uint32 i = 0, n = str.size(); i < n; i++)
|
1190 yi.zhou 1.108 {
|
1191 mike 1.112 Uint16 code = str[i];
|
1192 david.dillard 1.105
|
1193 mike 1.112 if (code > 0 && !(code & 0xFF00))
1194 os << char(code);
1195 else
1196 {
1197 // Print in hex format:
1198 char buffer[8];
1199 sprintf(buffer, "\\x%04X", code);
1200 os << buffer;
|
1201 david.dillard 1.105 }
|
1202 yi.zhou 1.108 }
|
1203 kumpf 1.39
1204 return os;
1205 }
1206
|
1207 mike 1.112 void StringAppendCharAux(StringRep*& _rep)
|
1208 kumpf 1.39 {
|
1209 mike 1.112 StringRep* tmp;
1210
1211 if (_rep->cap)
1212 {
1213 tmp = StringRep::alloc(2 * _rep->cap);
1214 tmp->size = _rep->size;
1215 _copy(tmp->data, _rep->data, _rep->size);
1216 }
1217 else
1218 {
1219 tmp = StringRep::alloc(8);
1220 tmp->size = 0;
1221 }
1222
1223 StringRep::unref(_rep);
1224 _rep = tmp;
|
1225 kumpf 1.39 }
1226
|
1227 thilo.boehm 1.128 void AssignASCII(String& s, const char* str, Uint32 n)
1228 {
1229 class StringLayout
1230 {
1231 public:
1232 StringRep* rep;
1233 };
1234
|
1235 kumpf 1.130 StringLayout* that = reinterpret_cast<StringLayout*>(&s);
|
1236 thilo.boehm 1.128
1237 _checkNullPointer(str);
1238
1239 if (n > that->rep->cap || that->rep->refs.get() != 1)
1240 {
1241 StringRep::unref(that->rep);
1242 that->rep = StringRep::alloc(n);
1243 }
1244
1245 _copy(that->rep->data, str, n);
1246 that->rep->size = n;
1247 that->rep->data[that->rep->size] = 0;
1248 }
1249
|
1250 mike 1.112 PEGASUS_NAMESPACE_END
1251
1252 /*
1253 ================================================================================
1254
1255 String optimizations:
1256
1257 1. Added mechanism allowing certain functions to be inlined only when
1258 used by internal Pegasus modules. External modules (i.e., providers)
1259 link to a non-inline version, which allows for binary compatibility.
1260
1261 2. Implemented copy-on-write with atomic increment/decrement. This
1262 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1263 for the 'ni1000' benchmark.
1264
1265 3. Employed loop unrolling in several places. For example, see:
1266
1267 static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1268
1269 4. Used the "empty-rep" optimization (described in whitepaper from the
1270 GCC Developers Summit). This reduced default construction to a simple
1271 mike 1.112 pointer assignment.
1272
1273 inline String::String() : _rep(&_emptyRep) { }
1274
1275 5. Implemented Uint16 versions of toupper() and tolower() using tables.
1276 For example:
1277
1278 static const char _upper[] =
1279 {
1280 0,1,2,...255
1281 };
1282
1283 inline Uint16 _toUpper(Uint16 x)
1284 {
1285 return (x & 0xFF00) ? x : _upper[x];
1286 }
1287
|
1288 david.dillard 1.116 This outperforms the system implementation by avoiding an anding
|
1289 mike 1.112 operation.
1290
|
1291 david.dillard 1.116 6. Implemented char* version of the following member functions to
1292 eliminate unecessary creation of anonymous string objects
|
1293 mike 1.112 (temporaries).
1294
1295 String(const String& s1, const char* s2);
1296 String(const char* s1, const String& s2);
1297 String& String::operator=(const char* str);
1298 Uint32 String::find(const char* s) const;
1299 bool String::equal(const String& s1, const char* s2);
1300 static int String::compare(const String& s1, const char* s2);
1301 String& String::append(const char* str);
1302 String& String::append(const char* str, Uint32 size);
1303 static bool String::equalNoCase(const String& s1, const char* s2);
1304 String& operator=(const char* str)
1305 String& String::assign(const char* str)
1306 String& String::append(const char* str)
1307 Boolean operator==(const String& s1, const char* s2)
1308 Boolean operator==(const char* s1, const String& s2)
1309 Boolean operator!=(const String& s1, const char* s2)
1310 Boolean operator!=(const char* s1, const String& s2)
1311 Boolean operator<(const String& s1, const char* s2)
1312 Boolean operator<(const char* s1, const String& s2)
1313 Boolean operator>(const String& s1, const char* s2)
1314 mike 1.112 Boolean operator>(const char* s1, const String& s2)
1315 Boolean operator<=(const String& s1, const char* s2)
1316 Boolean operator<=(const char* s1, const String& s2)
1317 Boolean operator>=(const String& s1, const char* s2)
1318 Boolean operator>=(const char* s1, const String& s2)
1319 String operator+(const String& s1, const char* s2)
1320 String operator+(const char* s1, const String& s2)
1321
|
1322 david.dillard 1.116 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next
|
1323 mike 1.112 power of two (algorithm from the book "Hacker's Delight").
1324
1325 static Uint32 _roundUpToPow2(Uint32 x)
1326 {
1327 if (x < 8)
1328 return 8;
1329
1330 x--;
1331 x |= (x >> 1);
1332 x |= (x >> 2);
1333 x |= (x >> 4);
1334 x |= (x >> 8);
1335 x |= (x >> 16);
1336 x++;
1337
1338 return x;
1339 }
1340
1341 8. Implemented "concatenating constructors" to eliminate temporaries
|
1342 david.dillard 1.116 created by operator+(). This scheme employs the "return-value
|
1343 mike 1.112 optimization" described by Stan Lippman.
1344
1345 inline String operator+(const String& s1, const String& s2)
1346 {
1347 return String(s1, s2, 0);
1348 }
1349
1350 9. Experimented to find the optimial initial size for a short string.
1351 Eight seems to offer the best tradeoff between space and time.
1352
1353 10. Inlined all members of the Char16 class.
1354
1355 11. Used Uint16 internally in the String class. This showed no improvememnt
1356 since Char16 was already fully inlined and was essentially reduced to
1357 Uint16 in any case.
1358
1359 12. Implemented conditional logic (#if) allowing error checking logic to
|
1360 david.dillard 1.116 be excluded to better performance. Examples include bounds checking
|
1361 mike 1.112 and null-pointer checking.
1362
1363 13. Used memcpy() and memcmp() where possible. These are implemented using
1364 the rep family of intructions under Intel and are much faster.
1365
|
1366 david.dillard 1.116 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
|
1367 mike 1.112 copy routine overhead.
1368
1369 15. Added ASCII7 form of the constructor and assign().
1370
1371 String s("hello world", String::ASCII7);
1372
1373 s.assignASCII7("hello world");
1374
1375 This avoids slower UTF8 processing when not needed.
1376
1377 ================================================================================
1378 */
|