1 martin 1.133 //%LICENSE////////////////////////////////////////////////////////////////
|
2 martin 1.134 //
|
3 martin 1.133 // Licensed to The Open Group (TOG) under one or more contributor license
4 // agreements. Refer to the OpenPegasusNOTICE.txt file distributed with
5 // this work for additional information regarding copyright ownership.
6 // Each contributor licenses this file to you under the OpenPegasus Open
7 // Source License; you may not use this file except in compliance with the
8 // License.
|
9 martin 1.134 //
|
10 martin 1.133 // Permission is hereby granted, free of charge, to any person obtaining a
11 // copy of this software and associated documentation files (the "Software"),
12 // to deal in the Software without restriction, including without limitation
13 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 // and/or sell copies of the Software, and to permit persons to whom the
15 // Software is furnished to do so, subject to the following conditions:
|
16 martin 1.134 //
|
17 martin 1.133 // The above copyright notice and this permission notice shall be included
18 // in all copies or substantial portions of the Software.
|
19 martin 1.134 //
|
20 martin 1.133 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
21 martin 1.134 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22 martin 1.133 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27 martin 1.134 //
|
28 martin 1.133 //////////////////////////////////////////////////////////////////////////
|
29 mike 1.27 //
30 //%/////////////////////////////////////////////////////////////////////////////
31
|
32 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
|
33 mike 1.113 #include <cstring>
|
34 kumpf 1.48 #include "InternalException.h"
|
35 mike 1.112 #include "MessageLoader.h"
36 #include "StringRep.h"
|
37 david 1.69
38 #ifdef PEGASUS_HAS_ICU
|
39 kumpf 1.132 # include <unicode/ures.h>
40 # include <unicode/ustring.h>
41 # include <unicode/uchar.h>
|
42 david 1.69 #endif
43
|
44 mike 1.112 PEGASUS_NAMESPACE_BEGIN
|
45 mike 1.28
|
46 mike 1.112 //==============================================================================
47 //
48 // Compile-time macros (undefined by default).
49 //
50 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
51 //
52 //==============================================================================
|
53 mike 1.27
|
54 mike 1.112 //==============================================================================
|
55 kumpf 1.39 //
|
56 mike 1.112 // File-scope definitions:
|
57 kumpf 1.54 //
|
58 mike 1.112 //==============================================================================
59
60 // Note: this table is much faster than the system toupper(). Please do not
61 // change.
|
62 kumpf 1.54
|
63 david.dillard 1.116 const Uint8 _toUpperTable[256] =
|
64 kumpf 1.54 {
|
65 mike 1.112 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
66 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
67 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
68 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
69 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
70 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
71 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
72 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
73 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
74 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
75 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
76 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
77 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
78 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
79 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
80 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
81 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
82 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
83 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
84 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
85 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
86 mike 1.112 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
87 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
88 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
89 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
90 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
91 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
92 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
93 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
94 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
95 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
96 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
97 };
98
99 // Note: this table is much faster than the system tulower(). Please do not
100 // change.
101
|
102 david.dillard 1.116 const Uint8 _toLowerTable[256] =
|
103 mike 1.112 {
104 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
105 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
106 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
107 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
108 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
109 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
110 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
111 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
112 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
113 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
114 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
115 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
116 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
117 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
118 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
119 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
120 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
121 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
122 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
123 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
124 mike 1.112 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
125 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
126 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
127 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
128 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
129 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
130 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
131 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
132 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
133 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
134 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
135 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
136 };
137
138 // Converts 16-bit characters to upper case. This routine is faster than the
139 // system toupper(). Please do not change.
140 inline Uint16 _toUpper(Uint16 x)
141 {
142 return (x & 0xFF00) ? x : _toUpperTable[x];
|
143 kumpf 1.54 }
144
|
145 mike 1.112 // Converts 16-bit characters to lower case. This routine is faster than the
146 // system toupper(). Please do not change.
147 inline Uint16 _toLower(Uint16 x)
|
148 kumpf 1.54 {
|
149 mike 1.112 return (x & 0xFF00) ? x : _toLowerTable[x];
150 }
151
152 // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
153 static Uint32 _roundUpToPow2(Uint32 x)
154 {
|
155 dave.sudlik 1.120 // Check for potential overflow in x
156 PEGASUS_CHECK_CAPACITY_OVERFLOW(x);
|
157 mike 1.112
158 if (x < 8)
159 return 8;
160
161 x--;
162 x |= (x >> 1);
163 x |= (x >> 2);
164 x |= (x >> 4);
165 x |= (x >> 8);
166 x |= (x >> 16);
167 x++;
168
169 return x;
170 }
171
172 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
173 {
174 // The following employs loop unrolling for efficiency. Please do not
175 // eliminate.
176
177 while (n >= 4)
178 mike 1.112 {
179 if (s[0] == c)
180 return (Uint16*)s;
181 if (s[1] == c)
182 return (Uint16*)&s[1];
183 if (s[2] == c)
184 return (Uint16*)&s[2];
185 if (s[3] == c)
186 return (Uint16*)&s[3];
|
187 kumpf 1.82
|
188 mike 1.112 n -= 4;
189 s += 4;
190 }
191
192 if (n)
193 {
194 if (*s == c)
195 return (Uint16*)s;
196 s++;
197 n--;
198 }
199
200 if (n)
201 {
202 if (*s == c)
203 return (Uint16*)s;
204 s++;
205 n--;
206 }
207
208 if (n && *s == c)
209 mike 1.112 return (Uint16*)s;
210
211 // Not found!
212 return 0;
213 }
214
215 static int _compare(const Uint16* s1, const Uint16* s2)
216 {
217 while (*s1 && *s2)
218 {
219 int r = *s1++ - *s2++;
220
221 if (r)
222 return r;
223 }
224
225 if (*s2)
226 return -1;
227 else if (*s1)
228 return 1;
229
230 mike 1.112 return 0;
231 }
232
|
233 kumpf 1.130 #ifdef PEGASUS_STRING_NO_UTF8
|
234 mike 1.112 static int _compareNoUTF8(const Uint16* s1, const char* s2)
235 {
236 Uint16 c1;
237 Uint16 c2;
238
239 do
240 {
241 c1 = *s1++;
242 c2 = *s2++;
243
244 if (c1 == 0)
245 return c1 - c2;
246 }
247 while (c1 == c2);
248
249 return c1 - c2;
250 }
|
251 kumpf 1.130 #endif
|
252 mike 1.112
253 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
254 {
255 memcpy(s1, s2, n * sizeof(Uint16));
256 }
257
258 void StringThrowOutOfBounds()
259 {
260 throw IndexOutOfBoundsException();
261 }
262
263 inline void _checkNullPointer(const void* ptr)
264 {
265 if (!ptr)
266 throw NullPointer();
267 }
268
269 static void _StringThrowBadUTF8(Uint32 index)
270 {
271 MessageLoaderParms parms(
272 "Common.String.BAD_UTF8",
273 mike 1.112 "The byte sequence starting at index $0 "
274 "is not valid UTF-8 encoding.",
275 index);
276 throw Exception(parms);
277 }
278
|
279 david.dillard 1.116 // Note: dest must be at least three times src (plus an extra byte for
|
280 mike 1.112 // terminator).
281 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
282 {
283 // The following employs loop unrolling for efficiency. Please do not
284 // eliminate.
285
286 const Uint16* q = src;
287 Uint8* p = (Uint8*)dest;
288
289 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
|
290 kumpf 1.82 {
|
291 mike 1.112 p[0] = q[0];
292 p[1] = q[1];
293 p[2] = q[2];
294 p[3] = q[3];
295 p += 4;
296 q += 4;
297 n -= 4;
|
298 kumpf 1.82 }
|
299 mike 1.112
300 switch (n)
301 {
302 case 0:
303 return p - (Uint8*)dest;
304 case 1:
305 if (q[0] < 128)
306 {
307 p[0] = q[0];
308 return p + 1 - (Uint8*)dest;
309 }
310 break;
311 case 2:
312 if (q[0] < 128 && q[1] < 128)
313 {
314 p[0] = q[0];
315 p[1] = q[1];
316 return p + 2 - (Uint8*)dest;
317 }
318 break;
319 case 3:
320 mike 1.112 if (q[0] < 128 && q[1] < 128 && q[2] < 128)
321 {
322 p[0] = q[0];
323 p[1] = q[1];
324 p[2] = q[2];
325 return p + 3 - (Uint8*)dest;
326 }
327 break;
328 }
329
330 // If this line was reached, there must be characters greater than 128.
331
332 UTF16toUTF8(&q, q + n, &p, p + 3 * n);
333
334 return p - (Uint8*)dest;
|
335 kumpf 1.54 }
336
|
337 mike 1.112 //==============================================================================
338 //
339 // class CString
340 //
341 //==============================================================================
342
343 CString::CString(const CString& cstr) : _rep(0)
|
344 kumpf 1.54 {
|
345 mike 1.112 if (cstr._rep)
|
346 kumpf 1.82 {
|
347 mike 1.112 size_t n = strlen(cstr._rep) + 1;
348 _rep = (char*)operator new(n);
349 memcpy(_rep, cstr._rep, n);
|
350 kumpf 1.82 }
|
351 kumpf 1.54 }
352
|
353 kumpf 1.56 CString& CString::operator=(const CString& cstr)
354 {
|
355 kumpf 1.82 if (&cstr != this)
|
356 kumpf 1.81 {
|
357 kumpf 1.82 if (_rep)
358 {
|
359 mike 1.112 operator delete(_rep);
|
360 kumpf 1.82 _rep = 0;
361 }
|
362 mike 1.112
|
363 kumpf 1.82 if (cstr._rep)
364 {
|
365 mike 1.112 size_t n = strlen(cstr._rep) + 1;
366 _rep = (char*)operator new(n);
367 memcpy(_rep, cstr._rep, n);
|
368 kumpf 1.82 }
|
369 kumpf 1.81 }
|
370 mike 1.112
|
371 kumpf 1.56 return *this;
372 }
373
|
374 mike 1.112 //==============================================================================
|
375 kumpf 1.54 //
|
376 mike 1.112 // class StringRep
|
377 kumpf 1.39 //
|
378 mike 1.112 //==============================================================================
|
379 kumpf 1.39
|
380 mike 1.112 StringRep StringRep::_emptyRep;
|
381 mike 1.27
|
382 mike 1.112 inline StringRep* StringRep::alloc(size_t cap)
|
383 mike 1.27 {
|
384 dave.sudlik 1.120 // Check for potential overflow in cap
385 PEGASUS_CHECK_CAPACITY_OVERFLOW(cap);
|
386 mike 1.27
|
387 mike 1.112 StringRep* rep = (StringRep*)::operator new(
388 sizeof(StringRep) + cap * sizeof(Uint16));
389 rep->cap = cap;
390 new(&rep->refs) AtomicInt(1);
391
392 return rep;
|
393 mike 1.27 }
394
|
395 mike 1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
|
396 chuck 1.102 {
|
397 mike 1.114 if (cap > rep->cap || rep->refs.get() != 1)
|
398 chuck 1.102 {
|
399 mike 1.112 size_t n = _roundUpToPow2(cap);
400 StringRep* newRep = StringRep::alloc(n);
401 newRep->size = rep->size;
402 _copy(newRep->data, rep->data, rep->size + 1);
403 StringRep::unref(rep);
404 rep = newRep;
405 }
406 }
|
407 david.dillard 1.105
|
408 mike 1.112 StringRep* StringRep::create(const Uint16* data, size_t size)
409 {
410 StringRep* rep = StringRep::alloc(size);
411 rep->size = size;
412 _copy(rep->data, data, size);
413 rep->data[size] = '\0';
414 return rep;
415 }
|
416 chuck 1.102
|
417 mike 1.112 StringRep* StringRep::copyOnWrite(StringRep* rep)
418 {
419 // Return a new copy of rep. Release rep.
|
420 chuck 1.102
|
421 mike 1.112 StringRep* newRep = StringRep::alloc(rep->size);
422 newRep->size = rep->size;
423 _copy(newRep->data, rep->data, rep->size);
424 newRep->data[newRep->size] = '\0';
425 StringRep::unref(rep);
426 return newRep;
|
427 chuck 1.102 }
428
|
429 mike 1.112 StringRep* StringRep::create(const char* data, size_t size)
|
430 kumpf 1.43 {
|
431 mike 1.112 StringRep* rep = StringRep::alloc(size);
432 size_t utf8_error_index;
433 rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
434
435 if (rep->size == size_t(-1))
436 {
437 StringRep::free(rep);
|
438 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
|
439 mike 1.112 }
|
440 kumpf 1.43
|
441 mike 1.112 rep->data[rep->size] = '\0';
|
442 kumpf 1.43
|
443 mike 1.112 return rep;
|
444 mike 1.27 }
445
|
446 mike 1.112 Uint32 StringRep::length(const Uint16* str)
|
447 mike 1.27 {
|
448 mike 1.112 // Note: We could unroll this but it is rarely called.
449
450 const Uint16* end = (Uint16*)str;
451
452 while (*end++)
453 ;
454
|
455 a.dunfey 1.125 return (Uint32)(end - str - 1);
|
456 kumpf 1.39 }
|
457 tony 1.66
|
458 mike 1.112 //==============================================================================
459 //
460 // class String
461 //
462 //==============================================================================
463
464 const String String::EMPTY;
|
465 mike 1.27
|
466 kumpf 1.39 String::String(const String& str, Uint32 n)
467 {
|
468 mike 1.112 _checkBounds(n, str._rep->size);
469 _rep = StringRep::create(str._rep->data, n);
|
470 kumpf 1.39 }
471
472 String::String(const Char16* str)
473 {
|
474 mike 1.112 _checkNullPointer(str);
475 _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
|
476 mike 1.27 }
477
|
478 kumpf 1.39 String::String(const Char16* str, Uint32 n)
479 {
|
480 mike 1.112 _checkNullPointer(str);
481 _rep = StringRep::create((Uint16*)str, n);
|
482 kumpf 1.39 }
483
484 String::String(const char* str)
|
485 mike 1.27 {
|
486 mike 1.112 _checkNullPointer(str);
|
487 david.dillard 1.105
|
488 mike 1.112 // Set this just in case create() throws an exception.
489 _rep = &StringRep::_emptyRep;
490 _rep = StringRep::create(str, strlen(str));
|
491 mike 1.27 }
492
|
493 kumpf 1.39 String::String(const char* str, Uint32 n)
|
494 mike 1.27 {
|
495 mike 1.112 _checkNullPointer(str);
|
496 david.dillard 1.105
|
497 mike 1.112 // Set this just in case create() throws an exception.
498 _rep = &StringRep::_emptyRep;
499 _rep = StringRep::create(str, n);
|
500 kumpf 1.39 }
|
501 mike 1.27
|
502 mike 1.112 String::String(const String& s1, const String& s2)
|
503 kumpf 1.39 {
|
504 mike 1.112 size_t n1 = s1._rep->size;
505 size_t n2 = s2._rep->size;
506 size_t n = n1 + n2;
507 _rep = StringRep::alloc(n);
508 _copy(_rep->data, s1._rep->data, n1);
509 _copy(_rep->data + n1, s2._rep->data, n2);
510 _rep->size = n;
511 _rep->data[n] = '\0';
|
512 mike 1.27 }
513
|
514 mike 1.112 String::String(const String& s1, const char* s2)
|
515 mike 1.27 {
|
516 mike 1.112 _checkNullPointer(s2);
517 size_t n1 = s1._rep->size;
518 size_t n2 = strlen(s2);
519 _rep = StringRep::alloc(n1 + n2);
520 _copy(_rep->data, s1._rep->data, n1);
521 size_t utf8_error_index;
522 size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
523
524 if (tmp == size_t(-1))
|
525 kumpf 1.82 {
|
526 mike 1.112 StringRep::free(_rep);
527 _rep = &StringRep::_emptyRep;
|
528 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
|
529 kumpf 1.82 }
|
530 mike 1.112
531 _rep->size = n1 + tmp;
532 _rep->data[_rep->size] = '\0';
|
533 mike 1.27 }
534
|
535 mike 1.112 String::String(const char* s1, const String& s2)
|
536 mike 1.27 {
|
537 mike 1.112 _checkNullPointer(s1);
538 size_t n1 = strlen(s1);
539 size_t n2 = s2._rep->size;
540 _rep = StringRep::alloc(n1 + n2);
541 size_t utf8_error_index;
542 size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
543
544 if (tmp == size_t(-1))
545 {
546 StringRep::free(_rep);
547 _rep = &StringRep::_emptyRep;
|
548 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
|
549 mike 1.112 }
550
551 _rep->size = n2 + tmp;
552 _copy(_rep->data + n1, s2._rep->data, n2);
553 _rep->data[_rep->size] = '\0';
|
554 mike 1.27 }
555
|
556 mike 1.112 String& String::assign(const String& str)
|
557 mike 1.27 {
|
558 mike 1.112 if (_rep != str._rep)
|
559 david.dillard 1.105 {
|
560 mike 1.112 StringRep::unref(_rep);
561 StringRep::ref(_rep = str._rep);
|
562 david.dillard 1.105 }
563
|
564 mike 1.27 return *this;
565 }
566
567 String& String::assign(const Char16* str, Uint32 n)
568 {
|
569 mike 1.112 _checkNullPointer(str);
570
|
571 mike 1.114 if (n > _rep->cap || _rep->refs.get() != 1)
|
572 david.dillard 1.105 {
|
573 mike 1.112 StringRep::unref(_rep);
574 _rep = StringRep::alloc(n);
|
575 david.dillard 1.105 }
576
|
577 mike 1.112 _rep->size = n;
578 _copy(_rep->data, (Uint16*)str, n);
579 _rep->data[n] = '\0';
580
|
581 mike 1.27 return *this;
582 }
583
|
584 mike 1.112 String& String::assign(const char* str, Uint32 n)
|
585 chuck 1.102 {
|
586 mike 1.112 _checkNullPointer(str);
587
|
588 mike 1.114 if (n > _rep->cap || _rep->refs.get() != 1)
|
589 david.dillard 1.105 {
|
590 mike 1.112 StringRep::unref(_rep);
591 _rep = StringRep::alloc(n);
|
592 david.dillard 1.105 }
593
|
594 mike 1.112 size_t utf8_error_index;
595 _rep->size = _convert(_rep->data, str, n, utf8_error_index);
|
596 chuck 1.102
|
597 mike 1.112 if (_rep->size == size_t(-1))
|
598 david.dillard 1.105 {
|
599 mike 1.112 StringRep::free(_rep);
600 _rep = &StringRep::_emptyRep;
|
601 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
|
602 david.dillard 1.105 }
|
603 mike 1.112
604 _rep->data[_rep->size] = 0;
|
605 david.dillard 1.105
|
606 mike 1.27 return *this;
607 }
608
|
609 kumpf 1.39 void String::clear()
610 {
|
611 mike 1.112 if (_rep->size)
612 {
|
613 mike 1.114 if (_rep->refs.get() == 1)
|
614 mike 1.112 {
615 _rep->size = 0;
616 _rep->data[0] = '\0';
617 }
618 else
619 {
620 StringRep::unref(_rep);
621 _rep = &StringRep::_emptyRep;
622 }
623 }
|
624 kumpf 1.39 }
625
|
626 mike 1.112 void String::reserveCapacity(Uint32 cap)
|
627 kumpf 1.39 {
|
628 mike 1.112 _reserve(_rep, cap);
|
629 kumpf 1.39 }
630
|
631 mike 1.112 CString String::getCString() const
632 {
|
633 david.dillard 1.116 // A UTF8 string can have three times as many characters as its UTF16
634 // counterpart, so we allocate extra memory for the worst case. In the
|
635 mike 1.112 // best case, we may need only one third of the memory allocated. But
|
636 david.dillard 1.116 // downsizing the string afterwards is expensive and unecessary since
637 // CString objects are usually short-lived (disappearing after only a few
|
638 mike 1.112 // instructions). CString objects are typically created on the stack as
639 // means to obtain a char* pointer.
640
641 #ifdef PEGASUS_STRING_NO_UTF8
642 char* str = (char*)operator new(_rep->size + 1);
643 _copy(str, _rep->data, _rep->size);
644 str[_rep->size] = '\0';
645 return CString(str);
|
646 gs.keenan 1.110 #else
|
647 a.dunfey 1.125 Uint32 n = (Uint32)(3 * _rep->size);
|
648 mike 1.112 char* str = (char*)operator new(n + 1);
649 size_t size = _copyToUTF8(str, _rep->data, _rep->size);
650 str[size] = '\0';
651 return CString(str);
|
652 gs.keenan 1.110 #endif
|
653 kumpf 1.39 }
654
|
655 mike 1.112 String& String::append(const Char16* str, Uint32 n)
|
656 kumpf 1.39 {
|
657 mike 1.112 _checkNullPointer(str);
658
659 size_t oldSize = _rep->size;
660 size_t newSize = oldSize + n;
|
661 a.dunfey 1.125 _reserve(_rep, (Uint32)newSize);
|
662 mike 1.112 _copy(_rep->data + oldSize, (Uint16*)str, n);
663 _rep->size = newSize;
664 _rep->data[newSize] = '\0';
665
666 return *this;
|
667 kumpf 1.39 }
668
|
669 mike 1.112 String& String::append(const String& str)
|
670 mike 1.27 {
|
671 a.dunfey 1.125 return append((Char16*)(&(str._rep->data[0])), (Uint32)str._rep->size);
|
672 mike 1.27 }
673
|
674 mike 1.112 String& String::append(const char* str, Uint32 size)
|
675 mike 1.27 {
|
676 mike 1.112 _checkNullPointer(str);
677
678 size_t oldSize = _rep->size;
679 size_t cap = oldSize + size;
680
|
681 a.dunfey 1.125 _reserve(_rep, (Uint32)cap);
|
682 mike 1.112 size_t utf8_error_index;
683 size_t tmp = _convert(
684 (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
685
686 if (tmp == size_t(-1))
687 {
688 StringRep::free(_rep);
689 _rep = &StringRep::_emptyRep;
|
690 a.dunfey 1.125 _StringThrowBadUTF8((Uint32)utf8_error_index);
|
691 mike 1.112 }
|
692 mike 1.27
|
693 mike 1.112 _rep->size += tmp;
694 _rep->data[_rep->size] = '\0';
|
695 mike 1.27
|
696 kumpf 1.39 return *this;
697 }
698
|
699 mike 1.112 void String::remove(Uint32 index, Uint32 n)
|
700 mike 1.27 {
|
701 mike 1.112 if (n == PEG_NOT_FOUND)
|
702 a.dunfey 1.125 n = (Uint32)(_rep->size - index);
|
703 mike 1.112
704 _checkBounds(index + n, _rep->size);
705
|
706 mike 1.114 if (_rep->refs.get() != 1)
|
707 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
|
708 mike 1.27
|
709 jim.wunderlich 1.115 PEGASUS_ASSERT(index + n <= _rep->size);
|
710 mike 1.27
|
711 mike 1.112 size_t rem = _rep->size - (index + n);
712 Uint16* data = _rep->data;
|
713 mike 1.27
|
714 mike 1.112 if (rem)
715 memmove(data + index, data + index + n, rem * sizeof(Uint16));
|
716 mike 1.27
|
717 mike 1.112 _rep->size -= n;
718 data[_rep->size] = '\0';
|
719 mike 1.27 }
720
|
721 mike 1.112 String String::subString(Uint32 index, Uint32 n) const
|
722 mike 1.27 {
|
723 mike 1.112 // Note: this implementation is very permissive but used for
724 // backwards compatibility.
725
726 if (index < _rep->size)
|
727 mike 1.27 {
|
728 mike 1.112 if (n == PEG_NOT_FOUND || n > _rep->size - index)
|
729 a.dunfey 1.125 n = (Uint32)(_rep->size - index);
|
730 mike 1.27
|
731 w.otsuka 1.121 return String((Char16*)(_rep->data + index), n);
|
732 mike 1.27 }
|
733 david.dillard 1.105
734 return String();
|
735 mike 1.27 }
736
737 Uint32 String::find(Char16 c) const
738 {
|
739 mike 1.112 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
|
740 mike 1.27
|
741 mike 1.112 if (p)
|
742 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
743 mike 1.27
744 return PEG_NOT_FOUND;
745 }
746
|
747 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
|
748 mike 1.30 {
|
749 mike 1.112 _checkBounds(index, _rep->size);
750
751 if (index >= _rep->size)
752 return PEG_NOT_FOUND;
753
754 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
|
755 mike 1.30
|
756 mike 1.112 if (p)
|
757 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
758 mike 1.30
759 return PEG_NOT_FOUND;
760 }
761
|
762 mike 1.112 Uint32 StringFindAux(
763 const StringRep* _rep, const Char16* s, Uint32 n)
|
764 mike 1.27 {
|
765 mike 1.112 _checkNullPointer(s);
|
766 mike 1.27
|
767 mike 1.112 const Uint16* data = _rep->data;
768 size_t rem = _rep->size;
769
770 while (n <= rem)
|
771 mike 1.30 {
|
772 mike 1.112 Uint16* p = (Uint16*)_find(data, rem, s[0]);
773
774 if (!p)
775 break;
|
776 mike 1.30
|
777 mike 1.112 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
|
778 david.dillard 1.116 return static_cast<Uint32>(p - _rep->data);
|
779 david.dillard 1.105
|
780 mike 1.112 p++;
781 rem -= p - data;
782 data = p;
|
783 mike 1.27 }
|
784 mike 1.112
|
785 mike 1.27 return PEG_NOT_FOUND;
786 }
787
|
788 mike 1.112 Uint32 String::find(const char* s) const
789 {
790 _checkNullPointer(s);
791
792 // Note: could optimize away creation of temporary, but this is rarely
793 // called.
794 return find(String(s));
795 }
796
|
797 mike 1.27 Uint32 String::reverseFind(Char16 c) const
798 {
|
799 mike 1.112 Uint16 x = c;
800 Uint16* p = _rep->data;
801 Uint16* q = _rep->data + _rep->size;
|
802 mike 1.27
|
803 mike 1.112 while (q != p)
|
804 mike 1.27 {
|
805 mike 1.112 if (*--q == x)
|
806 david.dillard 1.116 return static_cast<Uint32>(q - p);
|
807 mike 1.27 }
808
809 return PEG_NOT_FOUND;
810 }
811
812 void String::toLower()
813 {
|
814 david 1.69 #ifdef PEGASUS_HAS_ICU
|
815 mike 1.112
|
816 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
817 david 1.90 {
|
818 mike 1.114 if (_rep->refs.get() != 1)
|
819 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
820
|
821 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
822 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this
823 // prevents optimizations where the us-ascii is converted before
|
824 mike 1.112 // calling ICU.
|
825 yi.zhou 1.108 // The string may shrink or expand after the convert.
826
|
827 mike 1.112 //// First calculate size of resulting string. u_strToLower() returns
828 //// only the size when zero is passed as the destination size argument.
829
|
830 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
831
|
832 mike 1.112 int32_t newSize = u_strToLower(
833 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
|
834 david.dillard 1.116
|
835 mike 1.112 err = U_ZERO_ERROR;
836
837 //// Reserve enough space for the result.
838
839 if ((Uint32)newSize > _rep->cap)
840 _reserve(_rep, newSize);
841
842 //// Perform the conversion (overlapping buffers are allowed).
|
843 chuck 1.99
|
844 mike 1.112 u_strToLower((UChar*)_rep->data, newSize,
845 (UChar*)_rep->data, _rep->size, NULL, &err);
|
846 yi.zhou 1.108
|
847 mike 1.112 _rep->size = newSize;
848 return;
|
849 david 1.90 }
|
850 mike 1.112
851 #endif /* PEGASUS_HAS_ICU */
852
|
853 mike 1.114 if (_rep->refs.get() != 1)
|
854 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
855
856 Uint16* p = _rep->data;
857 size_t n = _rep->size;
858
859 for (; n--; p++)
|
860 david 1.90 {
|
861 mike 1.112 if (!(*p & 0xFF00))
862 *p = _toLower(*p);
|
863 mike 1.27 }
|
864 kumpf 1.39 }
865
|
866 chuck 1.99 void String::toUpper()
|
867 david 1.90 {
868 #ifdef PEGASUS_HAS_ICU
|
869 mike 1.112
|
870 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
871 chuck 1.99 {
|
872 mike 1.114 if (_rep->refs.get() != 1)
|
873 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
874
|
875 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
876 david.dillard 1.116 // Since context-sensitive casing looks at adjacent chars, this
877 // prevents optimizations where the us-ascii is converted before
|
878 mike 1.112 // calling ICU.
|
879 yi.zhou 1.108 // The string may shrink or expand after the convert.
880
|
881 mike 1.112 //// First calculate size of resulting string. u_strToUpper() returns
882 //// only the size when zero is passed as the destination size argument.
883
|
884 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
885
|
886 mike 1.112 int32_t newSize = u_strToUpper(
887 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
888
889 err = U_ZERO_ERROR;
890
891 //// Reserve enough space for the result.
892
893 if ((Uint32)newSize > _rep->cap)
894 _reserve(_rep, newSize);
895
896 //// Perform the conversion (overlapping buffers are allowed).
897
898 u_strToUpper((UChar*)_rep->data, newSize,
899 (UChar*)_rep->data, _rep->size, NULL, &err);
|
900 chuck 1.99
|
901 mike 1.112 _rep->size = newSize;
|
902 yi.zhou 1.108
|
903 mike 1.112 return;
|
904 david 1.91 }
|
905 mike 1.112
906 #endif /* PEGASUS_HAS_ICU */
907
|
908 mike 1.114 if (_rep->refs.get() != 1)
|
909 mike 1.112 _rep = StringRep::copyOnWrite(_rep);
910
911 Uint16* p = _rep->data;
912 size_t n = _rep->size;
913
914 for (; n--; p++)
915 *p = _toUpper(*p);
|
916 david 1.90 }
917
|
918 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
|
919 kumpf 1.39 {
|
920 kumpf 1.118 const Uint16* p1 = s1._rep->data;
921 const Uint16* p2 = s2._rep->data;
|
922 mike 1.27
|
923 kumpf 1.118 while (n--)
924 {
925 int r = *p1++ - *p2++;
926 if (r)
927 {
928 return r;
929 }
930 else if (!p1[-1])
931 {
932 // We must have encountered a null terminator in both s1 and s2
933 return 0;
934 }
935 }
936 return 0;
|
937 mike 1.27 }
938
|
939 kumpf 1.43 int String::compare(const String& s1, const String& s2)
|
940 mike 1.30 {
|
941 mike 1.112 return _compare(s1._rep->data, s2._rep->data);
942 }
|
943 kumpf 1.43
|
944 mike 1.112 int String::compare(const String& s1, const char* s2)
945 {
946 _checkNullPointer(s2);
|
947 mike 1.30
|
948 mike 1.112 #ifdef PEGASUS_STRING_NO_UTF8
949 return _compareNoUTF8(s1._rep->data, s2);
950 #else
951 // ATTN: optimize this!
952 return String::compare(s1, String(s2));
953 #endif
|
954 mike 1.30 }
955
|
956 mike 1.112 int String::compareNoCase(const String& str1, const String& str2)
|
957 kumpf 1.40 {
|
958 david 1.69 #ifdef PEGASUS_HAS_ICU
|
959 mike 1.112
|
960 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
961 {
|
962 mike 1.112 return u_strcasecmp(
|
963 dave.sudlik 1.124 (const UChar*)str1._rep->data,
964 (const UChar*)str2._rep->data,
965 U_FOLD_CASE_DEFAULT
966 );
|
967 yi.zhou 1.108 }
|
968 kumpf 1.40
|
969 mike 1.112 #endif /* PEGASUS_HAS_ICU */
970
971 const Uint16* s1 = str1._rep->data;
972 const Uint16* s2 = str2._rep->data;
973
974 while (*s1 && *s2)
|
975 kumpf 1.40 {
|
976 mike 1.112 int r = _toLower(*s1++) - _toLower(*s2++);
|
977 kumpf 1.40
|
978 david.dillard 1.105 if (r)
979 return r;
|
980 kumpf 1.40 }
981
|
982 mike 1.112 if (*s2)
|
983 david.dillard 1.105 return -1;
|
984 mike 1.112 else if (*s1)
|
985 david.dillard 1.105 return 1;
|
986 kumpf 1.40
987 return 0;
988 }
989
|
990 mike 1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
|
991 mike 1.27 {
|
992 mike 1.112 #ifdef PEGASUS_HAS_ICU
993
994 return String::compareNoCase(s1, s2) == 0;
995
996 #else /* PEGASUS_HAS_ICU */
|
997 mike 1.27
|
998 mike 1.112 // The following employs loop unrolling for efficiency. Please do not
999 // eliminate.
|
1000 kumpf 1.39
|
1001 mike 1.112 Uint16* p = (Uint16*)s1.getChar16Data();
1002 Uint16* q = (Uint16*)s2.getChar16Data();
1003 Uint32 n = s2.size();
1004
1005 while (n >= 8)
1006 {
1007 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1008 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1009 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1010 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1011 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1012 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1013 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1014 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1015 {
1016 return false;
1017 }
|
1018 kumpf 1.39
|
1019 mike 1.112 n -= 8;
1020 p += 8;
1021 q += 8;
1022 }
|
1023 mike 1.27
|
1024 mike 1.112 while (n >= 4)
|
1025 kumpf 1.39 {
|
1026 mike 1.112 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1027 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1028 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1029 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
|
1030 david.dillard 1.105 {
|
1031 mike 1.112 return false;
|
1032 david.dillard 1.105 }
|
1033 mike 1.112
1034 n -= 4;
1035 p += 4;
1036 q += 4;
1037 }
1038
1039 while (n--)
1040 {
1041 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
|
1042 david.dillard 1.105 return false;
|
1043 mike 1.112
1044 p++;
1045 q++;
|
1046 kumpf 1.39 }
|
1047 mike 1.28
|
1048 kumpf 1.39 return true;
|
1049 mike 1.112
1050 #endif /* PEGASUS_HAS_ICU */
|
1051 david 1.69 }
1052
|
1053 mike 1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
|
1054 david 1.69 {
|
1055 mike 1.112 _checkNullPointer(s2);
|
1056 david 1.69
|
1057 mike 1.112 #if defined(PEGASUS_HAS_ICU)
|
1058 david 1.69
|
1059 mike 1.112 return String::equalNoCase(s1, String(s2));
|
1060 david 1.69
|
1061 mike 1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
|
1062 david 1.69
|
1063 mike 1.112 const Uint16* p1 = (Uint16*)s1._rep->data;
1064 const char* p2 = s2;
1065 size_t n = s1._rep->size;
|
1066 david.dillard 1.105
|
1067 mike 1.112 while (n--)
1068 {
1069 if (!*p2)
1070 return false;
|
1071 david 1.71
|
1072 mike 1.112 if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1073 return false;
1074 }
|
1075 kumpf 1.42
|
1076 mike 1.112 if (*p2)
1077 return false;
|
1078 david.dillard 1.116
|
1079 mike 1.112 return true;
|
1080 karl 1.36
|
1081 mike 1.112 #else /* PEGASUS_HAS_ICU */
|
1082 david.dillard 1.105
|
1083 mike 1.112 // ATTN: optimize this!
1084 return String::equalNoCase(s1, String(s2));
|
1085 david.dillard 1.105
|
1086 mike 1.112 #endif /* PEGASUS_HAS_ICU */
1087 }
|
1088 chuck 1.78
|
1089 mike 1.112 Boolean String::equal(const String& s1, const String& s2)
|
1090 karl 1.36 {
|
1091 marek 1.131 return (s1._rep == s2._rep) ||
|
1092 marek 1.137 ((s1._rep->size == s2._rep->size) &&
1093 memcmp(s1._rep->data,
1094 s2._rep->data,
1095 s1._rep->size * sizeof(Uint16)) == 0);
|
1096 karl 1.36 }
1097
|
1098 mike 1.112 Boolean String::equal(const String& s1, const char* s2)
1099 {
1100 #ifdef PEGASUS_STRING_NO_UTF8
|
1101 kumpf 1.35
|
1102 mike 1.112 _checkNullPointer(s2);
|
1103 kumpf 1.39
|
1104 mike 1.112 const Uint16* p = (Uint16*)s1._rep->data;
1105 const char* q = s2;
|
1106 kumpf 1.39
|
1107 mike 1.112 while (*p && *q)
1108 {
1109 if (*p++ != Uint16(*q++))
1110 return false;
1111 }
|
1112 kumpf 1.39
|
1113 mike 1.112 return !(*p || *q);
|
1114 kumpf 1.39
|
1115 mike 1.112 #else /* PEGASUS_STRING_NO_UTF8 */
|
1116 kumpf 1.39
|
1117 mike 1.112 return String::equal(s1, String(s2));
|
1118 kumpf 1.39
|
1119 mike 1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
|
1120 kumpf 1.39 }
1121
|
1122 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
|
1123 kumpf 1.39 {
|
1124 mike 1.112 #if defined(PEGASUS_HAS_ICU)
|
1125 david 1.69
|
1126 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1127 {
|
1128 david.dillard 1.105 char *buf = NULL;
1129 const int size = str.size() * 6;
|
1130 mike 1.112 UnicodeString UniStr(
1131 (const UChar *)str.getChar16Data(), (int32_t)str.size());
|
1132 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf);
1133 buf = new char[bufsize+1];
1134 UniStr.extract(0,bufsize,buf);
1135 os << buf;
1136 os.flush();
1137 delete [] buf;
|
1138 david.dillard 1.116 return os;
|
1139 yi.zhou 1.108 }
|
1140 mike 1.112
|
1141 david.dillard 1.116 #endif // PEGASUS_HAS_ICU
|
1142 mike 1.112
1143 for (Uint32 i = 0, n = str.size(); i < n; i++)
|
1144 yi.zhou 1.108 {
|
1145 mike 1.112 Uint16 code = str[i];
|
1146 david.dillard 1.105
|
1147 mike 1.112 if (code > 0 && !(code & 0xFF00))
1148 os << char(code);
1149 else
1150 {
1151 // Print in hex format:
1152 char buffer[8];
1153 sprintf(buffer, "\\x%04X", code);
1154 os << buffer;
|
1155 david.dillard 1.105 }
|
1156 yi.zhou 1.108 }
|
1157 kumpf 1.39
1158 return os;
1159 }
1160
|
1161 mike 1.112 void StringAppendCharAux(StringRep*& _rep)
|
1162 kumpf 1.39 {
|
1163 mike 1.112 StringRep* tmp;
1164
1165 if (_rep->cap)
1166 {
1167 tmp = StringRep::alloc(2 * _rep->cap);
1168 tmp->size = _rep->size;
1169 _copy(tmp->data, _rep->data, _rep->size);
1170 }
1171 else
1172 {
1173 tmp = StringRep::alloc(8);
1174 tmp->size = 0;
1175 }
1176
1177 StringRep::unref(_rep);
1178 _rep = tmp;
|
1179 kumpf 1.39 }
1180
|
1181 thilo.boehm 1.128 void AssignASCII(String& s, const char* str, Uint32 n)
1182 {
1183 class StringLayout
1184 {
1185 public:
1186 StringRep* rep;
1187 };
1188
|
1189 kumpf 1.130 StringLayout* that = reinterpret_cast<StringLayout*>(&s);
|
1190 thilo.boehm 1.128
1191 _checkNullPointer(str);
1192
1193 if (n > that->rep->cap || that->rep->refs.get() != 1)
1194 {
1195 StringRep::unref(that->rep);
1196 that->rep = StringRep::alloc(n);
1197 }
1198
1199 _copy(that->rep->data, str, n);
1200 that->rep->size = n;
1201 that->rep->data[that->rep->size] = 0;
1202 }
1203
|
1204 mike 1.112 PEGASUS_NAMESPACE_END
1205
1206 /*
1207 ================================================================================
1208
1209 String optimizations:
1210
1211 1. Added mechanism allowing certain functions to be inlined only when
1212 used by internal Pegasus modules. External modules (i.e., providers)
1213 link to a non-inline version, which allows for binary compatibility.
1214
1215 2. Implemented copy-on-write with atomic increment/decrement. This
1216 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1217 for the 'ni1000' benchmark.
1218
1219 3. Employed loop unrolling in several places. For example, see:
1220
1221 static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1222
1223 4. Used the "empty-rep" optimization (described in whitepaper from the
1224 GCC Developers Summit). This reduced default construction to a simple
1225 mike 1.112 pointer assignment.
1226
1227 inline String::String() : _rep(&_emptyRep) { }
1228
1229 5. Implemented Uint16 versions of toupper() and tolower() using tables.
1230 For example:
1231
1232 static const char _upper[] =
1233 {
1234 0,1,2,...255
1235 };
1236
1237 inline Uint16 _toUpper(Uint16 x)
1238 {
1239 return (x & 0xFF00) ? x : _upper[x];
1240 }
1241
|
1242 david.dillard 1.116 This outperforms the system implementation by avoiding an anding
|
1243 mike 1.112 operation.
1244
|
1245 david.dillard 1.116 6. Implemented char* version of the following member functions to
1246 eliminate unecessary creation of anonymous string objects
|
1247 mike 1.112 (temporaries).
1248
1249 String(const String& s1, const char* s2);
1250 String(const char* s1, const String& s2);
1251 String& String::operator=(const char* str);
1252 Uint32 String::find(const char* s) const;
1253 bool String::equal(const String& s1, const char* s2);
1254 static int String::compare(const String& s1, const char* s2);
1255 String& String::append(const char* str);
1256 String& String::append(const char* str, Uint32 size);
1257 static bool String::equalNoCase(const String& s1, const char* s2);
1258 String& operator=(const char* str)
1259 String& String::assign(const char* str)
1260 String& String::append(const char* str)
1261 Boolean operator==(const String& s1, const char* s2)
1262 Boolean operator==(const char* s1, const String& s2)
1263 Boolean operator!=(const String& s1, const char* s2)
1264 Boolean operator!=(const char* s1, const String& s2)
1265 Boolean operator<(const String& s1, const char* s2)
1266 Boolean operator<(const char* s1, const String& s2)
1267 Boolean operator>(const String& s1, const char* s2)
1268 mike 1.112 Boolean operator>(const char* s1, const String& s2)
1269 Boolean operator<=(const String& s1, const char* s2)
1270 Boolean operator<=(const char* s1, const String& s2)
1271 Boolean operator>=(const String& s1, const char* s2)
1272 Boolean operator>=(const char* s1, const String& s2)
1273 String operator+(const String& s1, const char* s2)
1274 String operator+(const char* s1, const String& s2)
1275
|
1276 david.dillard 1.116 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next
|
1277 mike 1.112 power of two (algorithm from the book "Hacker's Delight").
1278
1279 static Uint32 _roundUpToPow2(Uint32 x)
1280 {
1281 if (x < 8)
1282 return 8;
1283
1284 x--;
1285 x |= (x >> 1);
1286 x |= (x >> 2);
1287 x |= (x >> 4);
1288 x |= (x >> 8);
1289 x |= (x >> 16);
1290 x++;
1291
1292 return x;
1293 }
1294
1295 8. Implemented "concatenating constructors" to eliminate temporaries
|
1296 david.dillard 1.116 created by operator+(). This scheme employs the "return-value
|
1297 mike 1.112 optimization" described by Stan Lippman.
1298
1299 inline String operator+(const String& s1, const String& s2)
1300 {
1301 return String(s1, s2, 0);
1302 }
1303
1304 9. Experimented to find the optimial initial size for a short string.
1305 Eight seems to offer the best tradeoff between space and time.
1306
1307 10. Inlined all members of the Char16 class.
1308
1309 11. Used Uint16 internally in the String class. This showed no improvememnt
1310 since Char16 was already fully inlined and was essentially reduced to
1311 Uint16 in any case.
1312
1313 12. Implemented conditional logic (#if) allowing error checking logic to
|
1314 david.dillard 1.116 be excluded to better performance. Examples include bounds checking
|
1315 mike 1.112 and null-pointer checking.
1316
1317 13. Used memcpy() and memcmp() where possible. These are implemented using
1318 the rep family of intructions under Intel and are much faster.
1319
|
1320 david.dillard 1.116 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
|
1321 mike 1.112 copy routine overhead.
1322
1323 15. Added ASCII7 form of the constructor and assign().
1324
1325 String s("hello world", String::ASCII7);
1326
1327 s.assignASCII7("hello world");
1328
1329 This avoids slower UTF8 processing when not needed.
1330
1331 ================================================================================
1332 */
|