1 martin 1.133 //%LICENSE////////////////////////////////////////////////////////////////
|
2 martin 1.134 //
|
3 martin 1.133 // Licensed to The Open Group (TOG) under one or more contributor license
4 // agreements. Refer to the OpenPegasusNOTICE.txt file distributed with
5 // this work for additional information regarding copyright ownership.
6 // Each contributor licenses this file to you under the OpenPegasus Open
7 // Source License; you may not use this file except in compliance with the
8 // License.
|
9 martin 1.134 //
|
10 martin 1.133 // Permission is hereby granted, free of charge, to any person obtaining a
11 // copy of this software and associated documentation files (the "Software"),
12 // to deal in the Software without restriction, including without limitation
13 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 // and/or sell copies of the Software, and to permit persons to whom the
15 // Software is furnished to do so, subject to the following conditions:
|
16 martin 1.134 //
|
17 martin 1.133 // The above copyright notice and this permission notice shall be included
18 // in all copies or substantial portions of the Software.
|
19 martin 1.134 //
|
20 martin 1.133 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
21 martin 1.134 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22 martin 1.133 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27 martin 1.134 //
|
28 martin 1.133 //////////////////////////////////////////////////////////////////////////
|
29 mike 1.27 //
30 //%/////////////////////////////////////////////////////////////////////////////
31
|
32 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
|
33 mike 1.113 #include <cstring>
|
34 kumpf 1.48 #include "InternalException.h"
|
35 mike 1.112 #include "MessageLoader.h"
36 #include "StringRep.h"
|
37 karl 1.140 #include <Pegasus/Common/Pegasus_inl.h>
38 #include <cstdarg>
|
39 david 1.69
40 #ifdef PEGASUS_HAS_ICU
|
41 kumpf 1.132 # include <unicode/ures.h>
42 # include <unicode/ustring.h>
43 # include <unicode/uchar.h>
|
44 david 1.69 #endif
45
|
46 mike 1.112 PEGASUS_NAMESPACE_BEGIN
|
47 mike 1.28
|
48 mike 1.112 //==============================================================================
49 //
50 // Compile-time macros (undefined by default).
51 //
52 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
53 //
54 //==============================================================================
|
55 mike 1.27
|
56 mike 1.112 //==============================================================================
|
57 kumpf 1.39 //
|
58 mike 1.112 // File-scope definitions:
|
59 kumpf 1.54 //
|
60 mike 1.112 //==============================================================================
61
62 // Note: this table is much faster than the system toupper(). Please do not
63 // change.
|
64 kumpf 1.54
|
65 david.dillard 1.116 const Uint8 _toUpperTable[256] =
|
66 kumpf 1.54 {
|
67 mike 1.112 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
68 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
69 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
70 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
71 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
72 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
73 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
74 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
75 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
76 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
77 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
78 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
79 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
80 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
81 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
82 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
83 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
84 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
85 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
86 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
87 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
88 mike 1.112 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
89 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
90 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
91 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
92 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
93 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
94 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
95 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
96 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
97 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
98 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
99 };
100
|
101 dev.meetei 1.139 // Note: this table is much faster than the system tolower(). Please do not
|
102 mike 1.112 // change.
103
|
104 david.dillard 1.116 const Uint8 _toLowerTable[256] =
|
105 mike 1.112 {
106 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
107 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
108 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
109 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
110 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
111 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
112 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
113 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
114 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
115 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
116 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
117 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
118 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
119 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
120 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
121 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
122 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
123 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
124 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
125 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
126 mike 1.112 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
127 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
128 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
129 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
130 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
131 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
132 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
133 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
134 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
135 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
136 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
137 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
138 };
139
140 // Converts 16-bit characters to upper case. This routine is faster than the
141 // system toupper(). Please do not change.
142 inline Uint16 _toUpper(Uint16 x)
143 {
144 return (x & 0xFF00) ? x : _toUpperTable[x];
|
145 kumpf 1.54 }
146
|
147 mike 1.112 // Converts 16-bit characters to lower case. This routine is faster than the
148 // system toupper(). Please do not change.
149 inline Uint16 _toLower(Uint16 x)
|
150 kumpf 1.54 {
|
151 mike 1.112 return (x & 0xFF00) ? x : _toLowerTable[x];
152 }
153
154 // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
155 static Uint32 _roundUpToPow2(Uint32 x)
156 {
|
157 dave.sudlik 1.120 // Check for potential overflow in x
158 PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
|
159 mike 1.112
160 if (x < 8)
161 return 8;
162
163 x--;
164 x |= (x >> 1);
165 x |= (x >> 2);
166 x |= (x >> 4);
167 x |= (x >> 8);
168 x |= (x >> 16);
169 x++;
170
171 return x;
172 }
173
174 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
175 {
176 // The following employs loop unrolling for efficiency. Please do not
177 // eliminate.
178
179 while (n >= 4)
180 mike 1.112 {
181 if (s[0] == c)
182 return (Uint16*)s;
183 if (s[1] == c)
184 return (Uint16*)&s[1];
185 if (s[2] == c)
186 return (Uint16*)&s[2];
187 if (s[3] == c)
188 return (Uint16*)&s[3];
|
189 kumpf 1.82
|
190 mike 1.112 n -= 4;
191 s += 4;
192 }
193
194 if (n)
195 {
196 if (*s == c)
197 return (Uint16*)s;
198 s++;
199 n--;
200 }
201
202 if (n)
203 {
204 if (*s == c)
205 return (Uint16*)s;
206 s++;
207 n--;
208 }
209
210 if (n && *s == c)
211 mike 1.112 return (Uint16*)s;
212
213 // Not found!
214 return 0;
215 }
216
217 static int _compare(const Uint16* s1, const Uint16* s2)
218 {
219 while (*s1 && *s2)
220 {
221 int r = *s1++ - *s2++;
222
223 if (r)
224 return r;
225 }
226
227 if (*s2)
228 return -1;
229 else if (*s1)
230 return 1;
231
232 mike 1.112 return 0;
233 }
234
|
235 kumpf 1.130 #ifdef PEGASUS_STRING_NO_UTF8
|
236 mike 1.112 static int _compareNoUTF8(const Uint16* s1, const char* s2)
237 {
238 Uint16 c1;
239 Uint16 c2;
240
241 do
242 {
243 c1 = *s1++;
244 c2 = *s2++;
245
246 if (c1 == 0)
247 return c1 - c2;
248 }
249 while (c1 == c2);
250
251 return c1 - c2;
252 }
|
253 kumpf 1.130 #endif
|
254 mike 1.112
255 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
256 {
257 memcpy(s1, s2, n * sizeof(Uint16));
258 }
259
260 void StringThrowOutOfBounds()
261 {
262 throw IndexOutOfBoundsException();
263 }
264
265 inline void _checkNullPointer(const void* ptr)
266 {
267 if (!ptr)
268 throw NullPointer();
269 }
270
|
271 thilo.boehm 1.138 #define BADUTF8_MAX_CLEAR_CHAR 40
272 #define BADUTF8_MAX_CHAR_TO_HEX 10
273
274 static void _formatBadUTF8Chars(
275 char* buffer,
276 Uint32 index,
277 const char* q,
278 size_t n )
|
279 mike 1.112 {
|
280 thilo.boehm 1.138
281 char tmp[20];
282 const char* start;
283
284 size_t clearChar =
285 (( index < BADUTF8_MAX_CLEAR_CHAR ) ? index : BADUTF8_MAX_CLEAR_CHAR );
286 size_t charToHex =
287 ((n-index-1) < BADUTF8_MAX_CHAR_TO_HEX ?
288 (n-index-1) : BADUTF8_MAX_CHAR_TO_HEX );
289
290 if (index < BADUTF8_MAX_CLEAR_CHAR)
291 {
292 start = q;
293 } else
294 {
295 start = &(q[ index - BADUTF8_MAX_CLEAR_CHAR]);
296 }
297
298 // Intialize the buffer with the first character as '\0' to be able to use
299 // strnchat() and strcat()
300 buffer[0] = 0;
301 thilo.boehm 1.138 // Start the buffer with the valid UTF8 chars
302 strncat(buffer,start,clearChar);
303 for (size_t i = clearChar, j = 0; j <= charToHex; i++,j++ )
304 {
305 tmp[0] = 0;
306 sprintf(&(tmp[0])," 0x%02X",(Uint8)start[i]);
307 strncat(buffer,&(tmp[0]),5);
308 }
309
310 }
311
312 static void _StringThrowBadUTF8(Uint32 index, const char* q, size_t n)
313 {
314 char buffer[1024];
315
316 _formatBadUTF8Chars(&(buffer[0]),index,q,n);
317
|
318 mike 1.112 MessageLoaderParms parms(
|
319 thilo.boehm 1.138 "Common.String.BAD_UTF8_LONG",
|
320 mike 1.112 "The byte sequence starting at index $0 "
|
321 thilo.boehm 1.138 "is not valid UTF-8 encoding: $1",
322 index,buffer);
323
|
324 mike 1.112 throw Exception(parms);
325 }
326
|
327 david.dillard 1.116 // Note: dest must be at least three times src (plus an extra byte for
|
328 mike 1.112 // terminator).
329 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
330 {
331 // The following employs loop unrolling for efficiency. Please do not
332 // eliminate.
333
334 const Uint16* q = src;
335 Uint8* p = (Uint8*)dest;
336
337 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
|
338 kumpf 1.82 {
|
339 mike 1.112 p[0] = q[0];
340 p[1] = q[1];
341 p[2] = q[2];
342 p[3] = q[3];
343 p += 4;
344 q += 4;
345 n -= 4;
|
346 kumpf 1.82 }
|
347 mike 1.112
348 switch (n)
349 {
350 case 0:
351 return p - (Uint8*)dest;
352 case 1:
353 if (q[0] < 128)
354 {
355 p[0] = q[0];
356 return p + 1 - (Uint8*)dest;
357 }
358 break;
359 case 2:
360 if (q[0] < 128 && q[1] < 128)
361 {
362 p[0] = q[0];
363 p[1] = q[1];
364 return p + 2 - (Uint8*)dest;
365 }
366 break;
367 case 3:
368 mike 1.112 if (q[0] < 128 && q[1] < 128 && q[2] < 128)
369 {
370 p[0] = q[0];
371 p[1] = q[1];
372 p[2] = q[2];
373 return p + 3 - (Uint8*)dest;
374 }
375 break;
376 }
377
378 // If this line was reached, there must be characters greater than 128.
379
380 UTF16toUTF8(&q, q + n, &p, p + 3 * n);
381
382 return p - (Uint8*)dest;
|
383 kumpf 1.54 }
384
|
385 mike 1.112 //==============================================================================
386 //
387 // class CString
388 //
389 //==============================================================================
390
391 CString::CString(const CString& cstr) : _rep(0)
|
392 kumpf 1.54 {
|
393 mike 1.112 if (cstr._rep)
|
394 kumpf 1.82 {
|
395 mike 1.112 size_t n = strlen(cstr._rep) + 1;
396 _rep = (char*)operator new(n);
397 memcpy(_rep, cstr._rep, n);
|
398 kumpf 1.82 }
|
399 kumpf 1.54 }
400
|
401 kumpf 1.56 CString& CString::operator=(const CString& cstr)
402 {
|
403 kumpf 1.82 if (&cstr != this)
|
404 kumpf 1.81 {
|
405 kumpf 1.82 if (_rep)
406 {
|
407 mike 1.112 operator delete(_rep);
|
408 kumpf 1.82 _rep = 0;
409 }
|
410 mike 1.112
|
411 kumpf 1.82 if (cstr._rep)
412 {
|
413 mike 1.112 size_t n = strlen(cstr._rep) + 1;
414 _rep = (char*)operator new(n);
415 memcpy(_rep, cstr._rep, n);
|
416 kumpf 1.82 }
|
417 kumpf 1.81 }
|
418 mike 1.112
|
419 kumpf 1.56 return *this;
420 }
421
|
422 mike 1.112 //==============================================================================
|
423 kumpf 1.54 //
|
424 mike 1.112 // class StringRep
|
425 kumpf 1.39 //
|
426 mike 1.112 //==============================================================================
|
427 kumpf 1.39
|
428 mike 1.112 StringRep StringRep::_emptyRep;
|
429 mike 1.27
|
430 mike 1.112 inline StringRep* StringRep::alloc(size_t cap)
|
431 mike 1.27 {
|
432 dave.sudlik 1.120 // Check for potential overflow in cap
433 PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
|
434 mike 1.27
|
435 mike 1.112 StringRep* rep = (StringRep*)::operator new(
436 sizeof(StringRep) + cap * sizeof(Uint16));
437 rep->cap = cap;
438 new(&rep->refs) AtomicInt(1);
439
440 return rep;
|
441 mike 1.27 }
442
|
443 mike 1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
|
444 chuck 1.102 {
|
445 mike 1.114 if (cap > rep->cap || rep->refs.get() != 1)
|
446 chuck 1.102 {
|
447 mike 1.112 size_t n = _roundUpToPow2(cap);
448 StringRep* newRep = StringRep::alloc(n);
449 newRep->size = rep->size;
450 _copy(newRep->data, rep->data, rep->size + 1);
451 StringRep::unref(rep);
452 rep = newRep;
453 }
454 }
|
455 david.dillard 1.105
|
456 mike 1.112 StringRep* StringRep::create(const Uint16* data, size_t size)
457 {
458 StringRep* rep = StringRep::alloc(size);
459 rep->size = size;
460 _copy(rep->data, data, size);
461 rep->data[size] = '\0';
462 return rep;
463 }
|
464 chuck 1.102
|
465 mike 1.112 StringRep* StringRep::copyOnWrite(StringRep* rep)
466 {
467 // Return a new copy of rep. Release rep.
|
468 chuck 1.102
|
469 mike 1.112 StringRep* newRep = StringRep::alloc(rep->size);
470 newRep->size = rep->size;
471 _copy(newRep->data, rep->data, rep->size);
472 newRep->data[newRep->size] = '\0';
473 StringRep::unref(rep);
474 return newRep;
|
475 chuck 1.102 }
476
|
477 mike 1.112 StringRep* StringRep::create(const char* data, size_t size)
|
478 kumpf 1.43 {
|
479 mike 1.112 StringRep* rep = StringRep::alloc(size);
480 size_t utf8_error_index;
481 rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
482
483 if (rep->size == size_t(-1))
484 {
485 StringRep::free(rep);
|
486 thilo.boehm 1.138 _StringThrowBadUTF8((Uint32)utf8_error_index, data,size);
|
487 mike 1.112 }
|
488 kumpf 1.43
|
489 mike 1.112 rep->data[rep->size] = '\0';
|
490 kumpf 1.43
|
491 mike 1.112 return rep;
|
492 mike 1.27 }
493
|
494 mike 1.112 Uint32 StringRep::length(const Uint16* str)
|
495 mike 1.27 {
|
496 mike 1.112 // Note: We could unroll this but it is rarely called.
497
498 const Uint16* end = (Uint16*)str;
499
500 while (*end++)
501 ;
502
|
503 a.dunfey 1.125 return (Uint32)(end - str - 1);
|
504 kumpf 1.39 }
|
505 tony 1.66
|
506 mike 1.112 //==============================================================================
507 //
508 // class String
509 //
510 //==============================================================================
511
512 const String String::EMPTY;
|
513 mike 1.27
|
514 kumpf 1.39 String::String(const String& str, Uint32 n)
515 {
|
516 mike 1.112 _checkBounds(n, str._rep->size);
517 _rep = StringRep::create(str._rep->data, n);
|
518 kumpf 1.39 }
519
520 String::String(const Char16* str)
521 {
|
522 mike 1.112 _checkNullPointer(str);
523 _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
|
524 mike 1.27 }
525
|
526 kumpf 1.39 String::String(const Char16* str, Uint32 n)
527 {
|
528 mike 1.112 _checkNullPointer(str);
529 _rep = StringRep::create((Uint16*)str, n);
|
530 kumpf 1.39 }
531
532 String::String(const char* str)
|
533 mike 1.27 {
|
534 mike 1.112 _checkNullPointer(str);
|
535 david.dillard 1.105
|
536 mike 1.112 // Set this just in case create() throws an exception.
537 _rep = &StringRep::_emptyRep;
538 _rep = StringRep::create(str, strlen(str));
|
539 mike 1.27 }
540
|
541 kumpf 1.39 String::String(const char* str, Uint32 n)
|
542 mike 1.27 {
|
543 mike 1.112 _checkNullPointer(str);
|
544 david.dillard 1.105
|
545 mike 1.112 // Set this just in case create() throws an exception.
546 _rep = &StringRep::_emptyRep;
547 _rep = StringRep::create(str, n);
|
548 kumpf 1.39 }
|
549 mike 1.27
|
550 mike 1.112 String::String(const String& s1, const String& s2)
|
551 kumpf 1.39 {
|
552 mike 1.112 size_t n1 = s1._rep->size;
553 size_t n2 = s2._rep->size;
554 size_t n = n1 + n2;
555 _rep = StringRep::alloc(n);
556 _copy(_rep->data, s1._rep->data, n1);
557 _copy(_rep->data + n1, s2._rep->data, n2);
558 _rep->size = n;
559 _rep->data[n] = '\0';
|
560 mike 1.27 }
561
|
562 mike 1.112 String::String(const String& s1, const char* s2)
|
563 mike 1.27 {
|
564 mike 1.112 _checkNullPointer(s2);
565 size_t n1 = s1._rep->size;
566 size_t n2 = strlen(s2);
567 _rep = StringRep::alloc(n1 + n2);
568 _copy(_rep->data, s1._rep->data, n1);
569 size_t utf8_error_index;
570 size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
571
572 if (tmp == size_t(-1))
|
573 kumpf 1.82 {
|
574 mike 1.112 StringRep::free(_rep);
575 _rep = &StringRep::_emptyRep;
|
576 thilo.boehm 1.138 _StringThrowBadUTF8((Uint32)utf8_error_index,s2,n2);
|
577 kumpf 1.82 }
|
578 mike 1.112
579 _rep->size = n1 + tmp;
580 _rep->data[_rep->size] = '\0';
|
581 mike 1.27 }
582
|
583 mike 1.112 String::String(const char* s1, const String& s2)
|
584 mike 1.27 {
|
585 mike 1.112 _checkNullPointer(s1);
586 size_t n1 = strlen(s1);
587 size_t n2 = s2._rep->size;
588 _rep = StringRep::alloc(n1 + n2);
589 size_t utf8_error_index;
590 size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
591
592 if (tmp == size_t(-1))
593 {
594 StringRep::free(_rep);
595 _rep = &StringRep::_emptyRep;
|
596 thilo.boehm 1.138 _StringThrowBadUTF8((Uint32)utf8_error_index,s1,n1);
|
597 mike 1.112 }
598
599 _rep->size = n2 + tmp;
600 _copy(_rep->data + n1, s2._rep->data, n2);
601 _rep->data[_rep->size] = '\0';
|
602 mike 1.27 }
603
|
604 mike 1.112 String& String::assign(const String& str)
|
605 mike 1.27 {
|
606 mike 1.112 if (_rep != str._rep)
|
607 david.dillard 1.105 {
|
608 mike 1.112 StringRep::unref(_rep);
609 StringRep::ref(_rep = str._rep);
|
610 david.dillard 1.105 }
611
|
612 mike 1.27 return *this;
613 }
614
615 String& String::assign(const Char16* str, Uint32 n)
616 {
|
617 mike 1.112 _checkNullPointer(str);
618
|
619 mike 1.114 if (n > _rep->cap || _rep->refs.get() != 1)
|
620 david.dillard 1.105 {
|
621 mike 1.112 StringRep::unref(_rep);
622 _rep = StringRep::alloc(n);
|
623 david.dillard 1.105 }
624
|
625 mike 1.112 _rep->size = n;
626 _copy(_rep->data, (Uint16*)str, n);
627 _rep->data[n] = '\0';
628
|
629 mike 1.27 return *this;
630 }
631
|
632 mike 1.112 String& String::assign(const char* str, Uint32 n)
|
633 chuck 1.102 {
|
634 mike 1.112 _checkNullPointer(str);
635
|
636 mike 1.114 if (n > _rep->cap || _rep->refs.get() != 1)
|
637 david.dillard 1.105 {
|
638 mike 1.112 StringRep::unref(_rep);
639 _rep = StringRep::alloc(n);
|
640 david.dillard 1.105 }
641
|
642 mike 1.112 size_t utf8_error_index;
643 _rep->size = _convert(_rep->data, str, n, utf8_error_index);
|
644 chuck 1.102
|
645 mike 1.112 if (_rep->size == size_t(-1))
|
646 david.dillard 1.105 {
|
647 mike 1.112 StringRep::free(_rep);
648 _rep = &StringRep::_emptyRep;
|
649 thilo.boehm 1.138 _StringThrowBadUTF8((Uint32)utf8_error_index,str,n);
|
650 david.dillard 1.105 }
|
651 mike 1.112
652 _rep->data[_rep->size] = 0;
|
653 david.dillard 1.105
|
654 mike 1.27 return *this;
655 }
656
|
657 kumpf 1.39 void String::clear()
658 {
|
659 mike 1.112 if (_rep->size)
660 {
|
661 mike 1.114 if (_rep->refs.get() == 1)
|
662 mike 1.112 {
663 _rep->size = 0;
664 _rep->data[0] = '\0';
665 }
666 else
667 {
668 StringRep::unref(_rep);
669 _rep = &StringRep::_emptyRep;
670 }
671 }
|
672 kumpf 1.39 }
673
|
674 mike 1.112 void String::reserveCapacity(Uint32 cap)
|
675 kumpf 1.39 {
|
676 mike 1.112 _reserve(_rep, cap);
|
677 kumpf 1.39 }
678
|
679 mike 1.112 CString String::getCString() const
680 {
|
681 david.dillard 1.116 // A UTF8 string can have three times as many characters as its UTF16
682 // counterpart, so we allocate extra memory for the worst case. In the
|
683 mike 1.112 // best case, we may need only one third of the memory allocated. But
|
684 david.dillard 1.116 // downsizing the string afterwards is expensive and unecessary since
685 // CString objects are usually short-lived (disappearing after only a few
|
686 mike 1.112 // instructions). CString objects are typically created on the stack as
687 // means to obtain a char* pointer.
688
689 #ifdef PEGASUS_STRING_NO_UTF8
690 char* str = (char*)operator new(_rep->size + 1);
691 _copy(str, _rep->data, _rep->size);
692 str[_rep->size] = '\0';
693 return CString(str);
|
694 gs.keenan 1.110 #else
|
695 a.dunfey 1.125 Uint32 n = (Uint32)(3 * _rep->size);
|
696 mike 1.112 char* str = (char*)operator new(n + 1);
697 size_t size = _copyToUTF8(str, _rep->data, _rep->size);
698 str[size] = '\0';
699 return CString(str);
|
700 gs.keenan 1.110 #endif
|
701 kumpf 1.39 }
702
|
703 mike 1.112 String& String::append(const Char16* str, Uint32 n)
|
704 kumpf 1.39 {
|
705 mike 1.112 _checkNullPointer(str);
706
707 size_t oldSize = _rep->size;
708 size_t newSize = oldSize + n;
|
709 a.dunfey 1.125 _reserve(_rep, (Uint32)newSize);
|
710 mike 1.112 _copy(_rep->data + oldSize, (Uint16*)str, n);
711 _rep->size = newSize;
712 _rep->data[newSize] = '\0';
713
714 return *this;
|
715 kumpf 1.39 }
716
|
717 mike 1.112 String& String::append(const String& str)
|
718 mike 1.27 {
|
719 a.dunfey 1.125 return append((Char16*)(&(str._rep->data[0])), (Uint32)str._rep->size);
|
720 mike 1.27 }
721
|
722 mike 1.112 String& String::append(const char* str, Uint32 size)
|
723 mike 1.27 {
|
724 mike 1.112 _checkNullPointer(str);
725
726 size_t oldSize = _rep->size;
727 size_t cap = oldSize + size;
728
|
729 a.dunfey 1.125 _reserve(_rep, (Uint32)cap);
|
730 mike 1.112 size_t utf8_error_index;
731 size_t tmp = _convert(
732 (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
733
734 if (tmp == size_t(-1))
735 {
736 StringRep::free(_rep);
737 _rep = &StringRep::_emptyRep;
|
738 thilo.boehm 1.138 _StringThrowBadUTF8((Uint32)utf8_error_index,str,size);
|
739 mike 1.112 }
|
740 mike 1.27
|
741 mike 1.112 _rep->size += tmp;
742 _rep->data[_rep->size] = '\0';
|
743 mike 1.27
|
744 kumpf 1.39 return *this;
745 }
746
|
747 mike 1.112 void String::remove(Uint32 index, Uint32 n)
|
748 mike 1.27 {
|
749 mike 1.112 if (n == PEG_NOT_FOUND)
|
750 a.dunfey 1.125 n = (Uint32)(_rep->size - index);
|
751 mike 1.112
752 _checkBounds(index + n, _rep->size);
753
|
754 mike 1.114 if (_rep->refs.get() != 1)
|
755 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
|
756 mike 1.27
|
757 jim.wunderlich 1.115 PEGASUS_ASSERT(index + n <= _rep->size);
|
758 mike 1.27
|
759 mike 1.112 size_t rem = _rep->size - (index + n);
760 Uint16* data = _rep->data;
|
761 mike 1.27
|
762 mike 1.112 if (rem)
763 memmove(data + index, data + index + n, rem * sizeof(Uint16));
|
764 mike 1.27
|
765 mike 1.112 _rep->size -= n;
766 data[_rep->size] = '\0';
|
767 mike 1.27 }
768
|
769 mike 1.112 String String::subString(Uint32 index, Uint32 n) const
|
770 mike 1.27 {
|
771 mike 1.112 // Note: this implementation is very permissive but used for
772 // backwards compatibility.
773
774 if (index < _rep->size)
|
775 mike 1.27 {
|
776 mike 1.112 if (n == PEG_NOT_FOUND || n > _rep->size - index)
|
777 a.dunfey 1.125 n = (Uint32)(_rep->size - index);
|
778 mike 1.27
|
779 w.otsuka 1.121 return String((Char16*)(_rep->data + index), n);
|
780 mike 1.27 }
|
781 david.dillard 1.105
782 return String();
|
783 mike 1.27 }
784
785 Uint32 String::find(Char16 c) const
786 {
|
787 mike 1.112 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
|
788 mike 1.27
|
789 mike 1.112 if (p)
|
790 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
791 mike 1.27
792 return PEG_NOT_FOUND;
793 }
794
|
795 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
|
796 mike 1.30 {
|
797 mike 1.112 _checkBounds(index, _rep->size);
798
799 if (index >= _rep->size)
800 return PEG_NOT_FOUND;
801
802 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
|
803 mike 1.30
|
804 mike 1.112 if (p)
|
805 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
806 mike 1.30
807 return PEG_NOT_FOUND;
808 }
809
|
810 mike 1.112 Uint32 StringFindAux(
811 const StringRep* _rep, const Char16* s, Uint32 n)
|
812 mike 1.27 {
|
813 mike 1.112 _checkNullPointer(s);
|
814 mike 1.27
|
815 mike 1.112 const Uint16* data = _rep->data;
816 size_t rem = _rep->size;
817
818 while (n <= rem)
|
819 mike 1.30 {
|
820 mike 1.112 Uint16* p = (Uint16*)_find(data, rem, s[0]);
821
822 if (!p)
823 break;
|
824 mike 1.30
|
825 mike 1.112 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
|
826 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
827 david.dillard 1.105
|
828 mike 1.112 p++;
829 rem -= p - data;
830 data = p;
|
831 mike 1.27 }
|
832 mike 1.112
|
833 mike 1.27 return PEG_NOT_FOUND;
834 }
835
|
836 mike 1.112 Uint32 String::find(const char* s) const
837 {
838 _checkNullPointer(s);
839
840 // Note: could optimize away creation of temporary, but this is rarely
841 // called.
842 return find(String(s));
843 }
844
|
845 mike 1.27 Uint32 String::reverseFind(Char16 c) const
846 {
|
847 mike 1.112 Uint16 x = c;
848 Uint16* p = _rep->data;
849 Uint16* q = _rep->data + _rep->size;
|
850 mike 1.27
|
851 mike 1.112 while (q != p)
|
852 mike 1.27 {
|
853 mike 1.112 if (*--q == x)
|
854 david.dillard 1.116 return static_cast<Uint32>(q - p);
|
855 mike 1.27 }
856
857 return PEG_NOT_FOUND;
858 }
859
860 void String::toLower()
861 {
|
862 david 1.69 #ifdef PEGASUS_HAS_ICU
|
863 mike 1.112
|
864 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
865 david 1.90 {
|
866 mike 1.114 if (_rep->refs.get() != 1)
|
867 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
868
|
869 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
870 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this
871 // prevents optimizations where the us-ascii is converted before
|
872 mike 1.112 // calling ICU.
|
873 yi.zhou 1.108 // The string may shrink or expand after the convert.
874
|
875 mike 1.112 //// First calculate size of resulting string. u_strToLower() returns
876 //// only the size when zero is passed as the destination size argument.
877
|
878 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
879
|
880 mike 1.112 int32_t newSize = u_strToLower(
881 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
|
882 david.dillard 1.116
|
883 mike 1.112 err = U_ZERO_ERROR;
884
885 //// Reserve enough space for the result.
886
887 if ((Uint32)newSize > _rep->cap)
888 _reserve(_rep, newSize);
889
890 //// Perform the conversion (overlapping buffers are allowed).
|
891 chuck 1.99
|
892 mike 1.112 u_strToLower((UChar*)_rep->data, newSize,
893 (UChar*)_rep->data, _rep->size, NULL, &err);
|
894 yi.zhou 1.108
|
895 mike 1.112 _rep->size = newSize;
896 return;
|
897 david 1.90 }
|
898 mike 1.112
899 #endif /* PEGASUS_HAS_ICU */
900
|
901 mike 1.114 if (_rep->refs.get() != 1)
|
902 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
903
904 Uint16* p = _rep->data;
905 size_t n = _rep->size;
906
907 for (; n--; p++)
|
908 david 1.90 {
|
909 mike 1.112 if (!(*p & 0xFF00))
910 *p = _toLower(*p);
|
911 mike 1.27 }
|
912 kumpf 1.39 }
913
|
914 chuck 1.99 void String::toUpper()
|
915 david 1.90 {
916 #ifdef PEGASUS_HAS_ICU
|
917 mike 1.112
|
918 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
919 chuck 1.99 {
|
920 mike 1.114 if (_rep->refs.get() != 1)
|
921 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
922
|
923 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
924 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this
925 // prevents optimizations where the us-ascii is converted before
|
926 mike 1.112 // calling ICU.
|
927 yi.zhou 1.108 // The string may shrink or expand after the convert.
928
|
929 mike 1.112 //// First calculate size of resulting string. u_strToUpper() returns
930 //// only the size when zero is passed as the destination size argument.
931
|
932 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
933
|
934 mike 1.112 int32_t newSize = u_strToUpper(
935 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
936
937 err = U_ZERO_ERROR;
938
939 //// Reserve enough space for the result.
940
941 if ((Uint32)newSize > _rep->cap)
942 _reserve(_rep, newSize);
943
944 //// Perform the conversion (overlapping buffers are allowed).
945
946 u_strToUpper((UChar*)_rep->data, newSize,
947 (UChar*)_rep->data, _rep->size, NULL, &err);
|
948 chuck 1.99
|
949 mike 1.112 _rep->size = newSize;
|
950 yi.zhou 1.108
|
951 mike 1.112 return;
|
952 david 1.91 }
|
953 mike 1.112
954 #endif /* PEGASUS_HAS_ICU */
955
|
956 mike 1.114 if (_rep->refs.get() != 1)
|
957 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
958
959 Uint16* p = _rep->data;
960 size_t n = _rep->size;
961
962 for (; n--; p++)
963 *p = _toUpper(*p);
|
964 david 1.90 }
965
|
966 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
|
967 kumpf 1.39 {
|
968 kumpf 1.118 const Uint16* p1 = s1._rep->data;
969 const Uint16* p2 = s2._rep->data;
|
970 mike 1.27
|
971 kumpf 1.118 while (n--)
972 {
973 int r = *p1++ - *p2++;
974 if (r)
975 {
976 return r;
977 }
978 else if (!p1[-1])
979 {
980 // We must have encountered a null terminator in both s1 and s2
981 return 0;
982 }
983 }
984 return 0;
|
985 mike 1.27 }
986
|
987 kumpf 1.43 int String::compare(const String& s1, const String& s2)
|
988 mike 1.30 {
|
989 mike 1.112 return _compare(s1._rep->data, s2._rep->data);
990 }
|
991 kumpf 1.43
|
992 mike 1.112 int String::compare(const String& s1, const char* s2)
993 {
994 _checkNullPointer(s2);
|
995 mike 1.30
|
996 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8
997 return _compareNoUTF8(s1._rep->data, s2);
998 #else
999 // ATTN: optimize this!
1000 return String::compare(s1, String(s2));
1001 #endif
|
1002 mike 1.30 }
1003
|
1004 mike 1.112 int String::compareNoCase(const String& str1, const String& str2)
|
1005 kumpf 1.40 {
|
1006 david 1.69 #ifdef PEGASUS_HAS_ICU
|
1007 mike 1.112
|
1008 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1009 {
|
1010 mike 1.112 return u_strcasecmp(
|
1011 dave.sudlik 1.124 (const UChar*)str1._rep->data,
1012 (const UChar*)str2._rep->data,
1013 U_FOLD_CASE_DEFAULT
1014 );
|
1015 yi.zhou 1.108 }
|
1016 kumpf 1.40
|
1017 mike 1.112 #endif /* PEGASUS_HAS_ICU */
1018
1019 const Uint16* s1 = str1._rep->data;
1020 const Uint16* s2 = str2._rep->data;
1021
1022 while (*s1 && *s2)
|
1023 kumpf 1.40 {
|
1024 mike 1.112 int r = _toLower(*s1++) - _toLower(*s2++);
|
1025 kumpf 1.40
|
1026 david.dillard 1.105 if (r)
1027 return r;
|
1028 kumpf 1.40 }
1029
|
1030 mike 1.112 if (*s2)
|
1031 david.dillard 1.105 return -1;
|
1032 mike 1.112 else if (*s1)
|
1033 david.dillard 1.105 return 1;
|
1034 kumpf 1.40
1035 return 0;
1036 }
1037
|
1038 mike 1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
|
1039 mike 1.27 {
|
1040 mike 1.112 #ifdef PEGASUS_HAS_ICU
1041
1042 return String::compareNoCase(s1, s2) == 0;
1043
1044 #else /* PEGASUS_HAS_ICU */
|
1045 mike 1.27
|
1046 mike 1.112 // The following employs loop unrolling for efficiency. Please do not
1047 // eliminate.
|
1048 kumpf 1.39
|
1049 mike 1.112 Uint16* p = (Uint16*)s1.getChar16Data();
1050 Uint16* q = (Uint16*)s2.getChar16Data();
1051 Uint32 n = s2.size();
1052
1053 while (n >= 8)
1054 {
1055 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1056 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1057 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1058 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1059 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1060 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1061 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1062 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1063 {
1064 return false;
1065 }
|
1066 kumpf 1.39
|
1067 mike 1.112 n -= 8;
1068 p += 8;
1069 q += 8;
1070 }
|
1071 mike 1.27
|
1072 mike 1.112 while (n >= 4)
|
1073 kumpf 1.39 {
|
1074 mike 1.112 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1075 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1076 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1077 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
|
1078 david.dillard 1.105 {
|
1079 mike 1.112 return false;
|
1080 david.dillard 1.105 }
|
1081 mike 1.112
1082 n -= 4;
1083 p += 4;
1084 q += 4;
1085 }
1086
1087 while (n--)
1088 {
1089 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
|
1090 david.dillard 1.105 return false;
|
1091 mike 1.112
1092 p++;
1093 q++;
|
1094 kumpf 1.39 }
|
1095 mike 1.28
|
1096 kumpf 1.39 return true;
|
1097 mike 1.112
1098 #endif /* PEGASUS_HAS_ICU */
|
1099 david 1.69 }
1100
|
1101 mike 1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
|
1102 david 1.69 {
|
1103 mike 1.112 _checkNullPointer(s2);
|
1104 david 1.69
|
1105 mike 1.112 #if defined(PEGASUS_HAS_ICU)
|
1106 david 1.69
|
1107 mike 1.112 return String::equalNoCase(s1, String(s2));
|
1108 david 1.69
|
1109 mike 1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
|
1110 david 1.69
|
1111 mike 1.112 const Uint16* p1 = (Uint16*)s1._rep->data;
1112 const char* p2 = s2;
1113 size_t n = s1._rep->size;
|
1114 david.dillard 1.105
|
1115 mike 1.112 while (n--)
1116 {
1117 if (!*p2)
1118 return false;
|
1119 david 1.71
|
1120 mike 1.112 if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1121 return false;
1122 }
|
1123 kumpf 1.42
|
1124 mike 1.112 if (*p2)
1125 return false;
|
1126 david.dillard 1.116
|
1127 mike 1.112 return true;
|
1128 karl 1.36
|
1129 mike 1.112 #else /* PEGASUS_HAS_ICU */
|
1130 david.dillard 1.105
|
1131 mike 1.112 // ATTN: optimize this!
1132 return String::equalNoCase(s1, String(s2));
|
1133 david.dillard 1.105
|
1134 mike 1.112 #endif /* PEGASUS_HAS_ICU */
1135 }
|
1136 chuck 1.78
|
1137 mike 1.112 Boolean String::equal(const String& s1, const String& s2)
|
1138 karl 1.36 {
|
1139 marek 1.131 return (s1._rep == s2._rep) ||
|
1140 marek 1.137 ((s1._rep->size == s2._rep->size) &&
1141 memcmp(s1._rep->data,
1142 s2._rep->data,
1143 s1._rep->size * sizeof(Uint16)) == 0);
|
1144 karl 1.36 }
1145
|
1146 karl 1.140 void String::appendPrintf(const char* format, ...)
1147 {
1148 va_list ap;
1149 va_start(ap, format);
1150
1151 // Format into allocated memory
1152 ////char* rtnCharPtr = _charVPrintf(format, ap);
1153
1154 // Iniitial allocation size. This is a guess assuming that
1155 // most printfs are one or two lines long
1156 int allocSize = 256;
1157 int rtnSize;
1158 char *p;
1159
1160 // initial allocate for output
1161 if ((p = (char*)malloc(allocSize)) == NULL)
1162 {
1163 return;
1164 }
1165
1166 // repeat formatting with increased realloc until it works.
1167 karl 1.140 do
1168 {
1169 rtnSize = vsnprintf(p, allocSize, format, ap);
1170
1171 // return if successful; i.e. if not negative and
1172 // returns less than allocated size.
1173 if (rtnSize > -1 && rtnSize < allocSize)
1174 {
1175 break;
1176 }
1177
1178 // increment alloc size. Positive return is
1179 // expected size and negative is error.
1180 allocSize = (rtnSize > -1)? (rtnSize + 1) : allocSize * 2;
1181
1182 } while((p = (char*)peg_inln_realloc(p, allocSize)) != NULL);
1183
1184 // get here only with error in malloc.
1185
1186 va_end(ap);
1187
1188 karl 1.140 // Free allocated memory append printf output to current string
1189 append(p, rtnSize);
1190 free(p);
1191 }
1192
|
1193 mike 1.112 Boolean String::equal(const String& s1, const char* s2)
1194 {
1195 #ifdef PEGASUS_STRING_NO_UTF8
|
1196 kumpf 1.35
|
1197 mike 1.112 _checkNullPointer(s2);
|
1198 kumpf 1.39
|
1199 mike 1.112 const Uint16* p = (Uint16*)s1._rep->data;
1200 const char* q = s2;
|
1201 kumpf 1.39
|
1202 mike 1.112 while (*p && *q)
1203 {
1204 if (*p++ != Uint16(*q++))
1205 return false;
1206 }
|
1207 kumpf 1.39
|
1208 mike 1.112 return !(*p || *q);
|
1209 kumpf 1.39
|
1210 mike 1.112 #else /* PEGASUS_STRING_NO_UTF8 */
|
1211 kumpf 1.39
|
1212 mike 1.112 return String::equal(s1, String(s2));
|
1213 kumpf 1.39
|
1214 mike 1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
|
1215 kumpf 1.39 }
1216
|
1217 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
|
1218 kumpf 1.39 {
|
1219 mike 1.112 #if defined(PEGASUS_HAS_ICU)
|
1220 david 1.69
|
1221 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1222 {
|
1223 david.dillard 1.105 char *buf = NULL;
1224 const int size = str.size() * 6;
|
1225 mike 1.112 UnicodeString UniStr(
1226 (const UChar *)str.getChar16Data(), (int32_t)str.size());
|
1227 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf);
1228 buf = new char[bufsize+1];
1229 UniStr.extract(0,bufsize,buf);
1230 os << buf;
1231 os.flush();
1232 delete [] buf;
|
1233 david.dillard 1.116 return os;
|
1234 yi.zhou 1.108 }
|
1235 mike 1.112
|
1236 david.dillard 1.116 #endif // PEGASUS_HAS_ICU
|
1237 mike 1.112
1238 for (Uint32 i = 0, n = str.size(); i < n; i++)
|
1239 yi.zhou 1.108 {
|
1240 mike 1.112 Uint16 code = str[i];
|
1241 david.dillard 1.105
|
1242 mike 1.112 if (code > 0 && !(code & 0xFF00))
1243 os << char(code);
1244 else
1245 {
1246 // Print in hex format:
1247 char buffer[8];
1248 sprintf(buffer, "\\x%04X", code);
1249 os << buffer;
|
1250 david.dillard 1.105 }
|
1251 yi.zhou 1.108 }
|
1252 kumpf 1.39
1253 return os;
1254 }
1255
|
1256 mike 1.112 void StringAppendCharAux(StringRep*& _rep)
|
1257 kumpf 1.39 {
|
1258 mike 1.112 StringRep* tmp;
1259
1260 if (_rep->cap)
1261 {
1262 tmp = StringRep::alloc(2 * _rep->cap);
1263 tmp->size = _rep->size;
1264 _copy(tmp->data, _rep->data, _rep->size);
1265 }
1266 else
1267 {
1268 tmp = StringRep::alloc(8);
1269 tmp->size = 0;
1270 }
1271
1272 StringRep::unref(_rep);
1273 _rep = tmp;
|
1274 kumpf 1.39 }
1275
|
1276 thilo.boehm 1.128 void AssignASCII(String& s, const char* str, Uint32 n)
1277 {
1278 class StringLayout
1279 {
1280 public:
1281 StringRep* rep;
1282 };
1283
|
1284 kumpf 1.130 StringLayout* that = reinterpret_cast<StringLayout*>(&s);
|
1285 thilo.boehm 1.128
1286 _checkNullPointer(str);
1287
1288 if (n > that->rep->cap || that->rep->refs.get() != 1)
1289 {
1290 StringRep::unref(that->rep);
1291 that->rep = StringRep::alloc(n);
1292 }
1293
1294 _copy(that->rep->data, str, n);
1295 that->rep->size = n;
1296 that->rep->data[that->rep->size] = 0;
1297 }
1298
|
1299 mike 1.112 PEGASUS_NAMESPACE_END
1300
1301 /*
1302 ================================================================================
1303
1304 String optimizations:
1305
1306 1. Added mechanism allowing certain functions to be inlined only when
1307 used by internal Pegasus modules. External modules (i.e., providers)
1308 link to a non-inline version, which allows for binary compatibility.
1309
1310 2. Implemented copy-on-write with atomic increment/decrement. This
1311 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1312 for the 'ni1000' benchmark.
1313
1314 3. Employed loop unrolling in several places. For example, see:
1315
1316 static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1317
1318 4. Used the "empty-rep" optimization (described in whitepaper from the
1319 GCC Developers Summit). This reduced default construction to a simple
1320 mike 1.112 pointer assignment.
1321
1322 inline String::String() : _rep(&_emptyRep) { }
1323
1324 5. Implemented Uint16 versions of toupper() and tolower() using tables.
1325 For example:
1326
1327 static const char _upper[] =
1328 {
1329 0,1,2,...255
1330 };
1331
1332 inline Uint16 _toUpper(Uint16 x)
1333 {
1334 return (x & 0xFF00) ? x : _upper[x];
1335 }
1336
|
1337 david.dillard 1.116 This outperforms the system implementation by avoiding an anding
|
1338 mike 1.112 operation.
1339
|
1340 david.dillard 1.116 6. Implemented char* version of the following member functions to
1341 eliminate unecessary creation of anonymous string objects
|
1342 mike 1.112 (temporaries).
1343
1344 String(const String& s1, const char* s2);
1345 String(const char* s1, const String& s2);
1346 String& String::operator=(const char* str);
1347 Uint32 String::find(const char* s) const;
1348 bool String::equal(const String& s1, const char* s2);
1349 static int String::compare(const String& s1, const char* s2);
1350 String& String::append(const char* str);
1351 String& String::append(const char* str, Uint32 size);
1352 static bool String::equalNoCase(const String& s1, const char* s2);
1353 String& operator=(const char* str)
1354 String& String::assign(const char* str)
1355 String& String::append(const char* str)
1356 Boolean operator==(const String& s1, const char* s2)
1357 Boolean operator==(const char* s1, const String& s2)
1358 Boolean operator!=(const String& s1, const char* s2)
1359 Boolean operator!=(const char* s1, const String& s2)
1360 Boolean operator<(const String& s1, const char* s2)
1361 Boolean operator<(const char* s1, const String& s2)
1362 Boolean operator>(const String& s1, const char* s2)
1363 mike 1.112 Boolean operator>(const char* s1, const String& s2)
1364 Boolean operator<=(const String& s1, const char* s2)
1365 Boolean operator<=(const char* s1, const String& s2)
1366 Boolean operator>=(const String& s1, const char* s2)
1367 Boolean operator>=(const char* s1, const String& s2)
1368 String operator+(const String& s1, const char* s2)
1369 String operator+(const char* s1, const String& s2)
1370
|
1371 david.dillard 1.116 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next
|
1372 mike 1.112 power of two (algorithm from the book "Hacker's Delight").
1373
1374 static Uint32 _roundUpToPow2(Uint32 x)
1375 {
1376 if (x < 8)
1377 return 8;
1378
1379 x--;
1380 x |= (x >> 1);
1381 x |= (x >> 2);
1382 x |= (x >> 4);
1383 x |= (x >> 8);
1384 x |= (x >> 16);
1385 x++;
1386
1387 return x;
1388 }
1389
1390 8. Implemented "concatenating constructors" to eliminate temporaries
|
1391 david.dillard 1.116 created by operator+(). This scheme employs the "return-value
|
1392 mike 1.112 optimization" described by Stan Lippman.
1393
1394 inline String operator+(const String& s1, const String& s2)
1395 {
1396 return String(s1, s2, 0);
1397 }
1398
1399 9. Experimented to find the optimial initial size for a short string.
1400 Eight seems to offer the best tradeoff between space and time.
1401
1402 10. Inlined all members of the Char16 class.
1403
1404 11. Used Uint16 internally in the String class. This showed no improvememnt
1405 since Char16 was already fully inlined and was essentially reduced to
1406 Uint16 in any case.
1407
1408 12. Implemented conditional logic (#if) allowing error checking logic to
|
1409 david.dillard 1.116 be excluded to better performance. Examples include bounds checking
|
1410 mike 1.112 and null-pointer checking.
1411
1412 13. Used memcpy() and memcmp() where possible. These are implemented using
1413 the rep family of intructions under Intel and are much faster.
1414
|
1415 david.dillard 1.116 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
|
1416 mike 1.112 copy routine overhead.
1417
1418 15. Added ASCII7 form of the constructor and assign().
1419
1420 String s("hello world", String::ASCII7);
1421
1422 s.assignASCII7("hello world");
1423
1424 This avoids slower UTF8 processing when not needed.
1425
1426 ================================================================================
1427 */
|