1 karl 1.98 //%2005////////////////////////////////////////////////////////////////////////
|
2 mike 1.27 //
|
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 mike 1.27 //
12 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
14 // deal in the Software without restriction, including without limitation the
15 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
16 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is
17 // furnished to do so, subject to the following conditions:
|
18 david.dillard 1.105 //
|
19 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
20 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
21 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
22 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
23 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
25 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28 //==============================================================================
29 //
30 // Author: Mike Brasher (mbrasher@bmc.com)
31 //
32 //%/////////////////////////////////////////////////////////////////////////////
33
|
34 mike 1.111.2.6 #define PEGASUS_USE_INTERNAL_INLINES
|
35 mike 1.27 #include "String.h"
|
36 mike 1.111.2.6 #include <cassert>
|
37 kumpf 1.48 #include "InternalException.h"
|
38 david 1.69 #include "CommonUTF.h"
|
39 mike 1.111.2.1 #include "CharSet.h"
|
40 david 1.69
|
41 mike 1.111.2.1 #ifdef PEGASUS_STRING_ENABLE_ICU
|
42 chuck 1.99 #include <unicode/ustring.h>
43 #include <unicode/uchar.h>
|
44 david 1.69 #endif
45
|
46 mike 1.27 PEGASUS_NAMESPACE_BEGIN
47
|
48 mike 1.111.2.1 //==============================================================================
|
49 kumpf 1.39 //
|
50 mike 1.111.2.6 // Compile-time switches (defined macros).
51 //
52 // PEGASUS_STRING_ENABLE_ICU -- enables use of ICU package.
53 //
54 // PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
55 //
56 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
57 //
58 // PEGASUS_USE_INTERNAL_INLINES -- enables internal inlining feature.
59 //
60 //==============================================================================
61
62 //==============================================================================
63 //
|
64 mike 1.111.2.1 // File-scope definitions:
|
65 kumpf 1.54 //
|
66 mike 1.111.2.1 //==============================================================================
|
67 kumpf 1.54
|
68 mike 1.111.2.1 // Converts 16-bit characters to upper case.
69 inline Uint16 _to_upper(Uint16 x)
|
70 kumpf 1.54 {
|
71 mike 1.111.2.1 return (x & 0xFF00) ? x : CharSet::to_upper(x);
|
72 kumpf 1.54 }
73
|
74 mike 1.111.2.1 // Converts 16-bit characters to lower case.
75 inline Uint16 _to_lower(Uint16 x)
|
76 kumpf 1.54 {
|
77 mike 1.111.2.1 return (x & 0xFF00) ? x : CharSet::to_lower(x);
78 }
|
79 kumpf 1.82
|
80 mike 1.111.2.1 // Rounds x to the next power of two (or just returns 8 if x < 8).
81 static Uint32 _next_pow_2(Uint32 x)
82 {
83 if (x < 8)
84 return 8;
85
86 x--;
87 x |= (x >> 1);
88 x |= (x >> 2);
89 x |= (x >> 4);
90 x |= (x >> 8);
91 x |= (x >> 16);
92 x++;
93
94 return x;
95 }
96
97 template<class P, class Q>
98 static void _copy(P* p, const Q* q, size_t n)
99 {
100 // Use loop unrolling.
101 mike 1.111.2.1
102 while (n >= 8)
103 {
104 p[0] = q[0];
105 p[1] = q[1];
106 p[2] = q[2];
107 p[3] = q[3];
108 p[4] = q[4];
109 p[5] = q[5];
110 p[6] = q[6];
111 p[7] = q[7];
112 p += 8;
113 q += 8;
114 n -= 8;
115 }
116
117 while (n >= 4)
|
118 kumpf 1.82 {
|
119 mike 1.111.2.1 p[0] = q[0];
120 p[1] = q[1];
121 p[2] = q[2];
122 p[3] = q[3];
123 p += 4;
124 q += 4;
125 n -= 4;
|
126 kumpf 1.82 }
|
127 mike 1.111.2.1
128 while (n--)
129 *p++ = *q++;
|
130 kumpf 1.54 }
131
|
132 mike 1.111.2.1 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
|
133 kumpf 1.54 {
|
134 mike 1.111.2.1 while (n >= 4)
135 {
136 if (s[0] == c)
137 return (Uint16*)s;
138 if (s[1] == c)
139 return (Uint16*)&s[1];
140 if (s[2] == c)
141 return (Uint16*)&s[2];
142 if (s[3] == c)
143 return (Uint16*)&s[3];
144
145 n -= 4;
146 s += 4;
147 }
148
149 if (n)
150 {
151 if (*s == c)
152 return (Uint16*)s;
153 s++;
154 n--;
155 mike 1.111.2.1 }
156
157 if (n)
158 {
159 if (*s == c)
160 return (Uint16*)s;
161 s++;
162 n--;
163 }
164
165 if (n && *s == c)
166 return (Uint16*)s;
167
168 // Not found!
169 return 0;
|
170 kumpf 1.54 }
171
|
172 mike 1.111.2.1 static int _compare(const Uint16* s1, const Uint16* s2)
|
173 kumpf 1.54 {
|
174 mike 1.111.2.1 while (*s1 && *s2)
|
175 kumpf 1.82 {
|
176 mike 1.111.2.1 int r = *s1++ - *s2++;
177
178 if (r)
179 return r;
|
180 kumpf 1.82 }
|
181 mike 1.111.2.1
182 if (*s2)
183 return -1;
184 else if (*s1)
185 return 1;
186
187 return 0;
|
188 kumpf 1.54 }
189
|
190 mike 1.111.2.1 static int _compare_no_utf8(const Uint16* s1, const char* s2)
|
191 kumpf 1.56 {
|
192 mike 1.111.2.1 Uint16 c1;
193 Uint16 c2;
194
195 do
|
196 kumpf 1.81 {
|
197 mike 1.111.2.1 c1 = *s1++;
198 c2 = *s2++;
199
200 if (c1 == 0)
201 return c1 - c2;
|
202 kumpf 1.81 }
|
203 mike 1.111.2.1 while (c1 == c2);
204
205 return c1 - c2;
|
206 kumpf 1.56 }
207
|
208 mike 1.111.2.1 static int _compare(const Uint16* s1, const Uint16* s2, size_t n)
|
209 kumpf 1.54 {
|
210 mike 1.111.2.1 // This should only be called when s1 and s2 have the same length.
211
212 while (n-- && (*s1++ - *s2++) == 0)
213 ;
214
215 return s1[-1] - s2[-1];
|
216 kumpf 1.54 }
217
|
218 mike 1.111.2.1 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
219 {
220 memcpy(s1, s2, n * sizeof(Uint16));
221 }
|
222 kumpf 1.39
|
223 mike 1.111.2.1 void String_throw_out_of_bounds()
224 {
225 throw IndexOutOfBoundsException();
226 }
227
228 #ifdef PEGASUS_STRING_NO_THROW
229 # define _check_null_pointer(ARG) /* empty */
230 #else
231 template<class T>
232 inline void _check_null_pointer(const T* ptr)
233 {
234 if (!ptr)
235 throw NullPointer();
236 }
237 #endif
|
238 mike 1.27
|
239 mike 1.111.2.1 static size_t _copy_from_utf8(Uint16* dest, const char* src, size_t n)
|
240 mike 1.27 {
|
241 mike 1.111.2.1 Uint16* p = dest;
242 const Uint8* q = (const Uint8*)src;
|
243 mike 1.111.2.2
244 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead below
245 // this loop). Use factor-four loop-unrolling.
246
247 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
248 {
249 p[0] = q[0];
250 p[1] = q[1];
251 p[2] = q[2];
252 p[3] = q[3];
253 p += 4;
254 q += 4;
255 n -= 4;
256 }
257
258 switch (n)
259 {
260 case 0:
261 return p - dest;
262 case 1:
263 if (q[0] < 128)
264 mike 1.111.2.2 {
265 p[0] = q[0];
266 return p + 1 - dest;
267 }
268 break;
269 case 2:
270 if (q[0] < 128 && q[1] < 128)
271 {
272 p[0] = q[0];
273 p[1] = q[1];
274 return p + 2 - dest;
275 }
276 break;
277 case 3:
278 if (q[0] < 128 && q[1] < 128 && q[2] < 128)
279 {
280 p[0] = q[0];
281 p[1] = q[1];
282 p[2] = q[2];
283 return p + 3 - dest;
284 }
285 mike 1.111.2.2 break;
286 }
287
288 // Process remaining characters.
|
289 mike 1.111.2.1
290 while (n)
291 {
|
292 mike 1.111.2.2 // Optimize for 7-bit ASCII case.
|
293 mike 1.111.2.1
|
294 mike 1.111.2.2 if (*q < 128)
|
295 mike 1.111.2.1 {
296 *p++ = *q++;
297 n--;
298 }
|
299 mike 1.111.2.2 else
300 {
301 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
|
302 mike 1.111.2.1
|
303 mike 1.111.2.2 if (c > n || !isValid_U8(q, c) ||
304 UTF8toUTF16(&q, q + c, &p, p + n) != 0)
305 {
306 throw Exception("Bad UTF8 encoding");
307 }
|
308 mike 1.111.2.1
|
309 mike 1.111.2.2 n -= c;
|
310 mike 1.111.2.1 }
311 }
|
312 mike 1.27
|
313 mike 1.111.2.1 return p - dest;
314 }
|
315 mike 1.27
|
316 mike 1.111.2.1 // Note: dest must be at least three times src (plus an extra byte for
317 // terminator).
|
318 mike 1.111.2.2 static inline size_t _copy_to_utf8(char* dest, const Uint16* src, size_t n)
|
319 mike 1.111.2.1 {
|
320 mike 1.111.2.2 const Uint16* q = src;
321 Uint8* p = (Uint8*)dest;
322
323 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
324 {
325 p[0] = q[0];
326 p[1] = q[1];
327 p[2] = q[2];
328 p[3] = q[3];
329 p += 4;
330 q += 4;
331 n -= 4;
332 }
333
334 switch (n)
335 {
336 case 0:
337 return p - (Uint8*)dest;
338 case 1:
339 if (q[0] < 128)
340 {
341 mike 1.111.2.2 p[0] = q[0];
342 return p + 1 - (Uint8*)dest;
343 }
344 break;
345 case 2:
346 if (q[0] < 128 && q[1] < 128)
347 {
348 p[0] = q[0];
349 p[1] = q[1];
350 return p + 2 - (Uint8*)dest;
351 }
352 break;
353 case 3:
354 if (q[0] < 128 && q[1] < 128 && q[2] < 128)
355 {
356 p[0] = q[0];
357 p[1] = q[1];
358 p[2] = q[2];
359 return p + 3 - (Uint8*)dest;
360 }
361 break;
362 mike 1.111.2.2 }
363
364 // If this line was reached, there must be characters greater than 128.
365
366 UTF16toUTF8(&q, q + n, &p, p + 3 * n);
367
368 return p - (Uint8*)dest;
|
369 mike 1.111.2.1 }
|
370 mike 1.27
|
371 mike 1.111.2.1 static inline size_t _convert(Uint16* p, const char* q, size_t n)
372 {
373 #ifdef PEGASUS_STRING_NO_UTF8
374 _copy(p, q, n);
|
375 mike 1.27 return n;
|
376 mike 1.111.2.1 #else
377 return _copy_from_utf8(p, q, n);
378 #endif
|
379 mike 1.27 }
380
|
381 mike 1.111.2.1 //==============================================================================
|
382 chuck 1.102 //
|
383 mike 1.111.2.1 // class CString
|
384 chuck 1.102 //
|
385 mike 1.111.2.1 //==============================================================================
386
387 CString::CString(const CString& cstr) : _rep(0)
|
388 chuck 1.102 {
|
389 mike 1.111.2.1 if (cstr._rep)
|
390 chuck 1.102 {
|
391 mike 1.111.2.1 size_t n = strlen(cstr._rep) + 1;
392 _rep = (char*)operator new(n);
393 memcpy(_rep, cstr._rep, n);
394 }
395 }
396
397 CString& CString::operator=(const CString& cstr)
398 {
399 if (&cstr != this)
400 {
401 if (_rep)
|
402 david.dillard 1.105 {
|
403 mike 1.111.2.1 operator delete(_rep);
404 _rep = 0;
|
405 chuck 1.102 }
|
406 mike 1.111.2.1
407 if (cstr._rep)
|
408 chuck 1.102 {
|
409 mike 1.111.2.1 size_t n = strlen(cstr._rep) + 1;
410 _rep = (char*)operator new(n);
411 memcpy(_rep, cstr._rep, n);
|
412 chuck 1.102 }
|
413 mike 1.111.2.1 }
|
414 chuck 1.102
|
415 mike 1.111.2.1 return *this;
|
416 chuck 1.102 }
417
|
418 mike 1.111.2.1 //==============================================================================
419 //
420 // class StringRep
421 //
422 //==============================================================================
|
423 kumpf 1.43
|
424 mike 1.111.2.1 StringRep StringRep::_empty_rep;
|
425 kumpf 1.43
|
426 mike 1.111.2.1 inline StringRep* StringRep::alloc(size_t cap)
|
427 mike 1.27 {
|
428 mike 1.111.2.1 StringRep* rep = (StringRep*)::operator new(
429 sizeof(StringRep) + cap * sizeof(Uint16));
430 rep->cap = cap;
431 Atomic_create(&rep->refs, 1);
432
433 return rep;
|
434 mike 1.27 }
435
|
436 mike 1.111.2.1 static inline void _reserve(StringRep*& rep, Uint32 cap)
|
437 mike 1.27 {
|
438 mike 1.111.2.1 if (cap > rep->cap || Atomic_get(&rep->refs) != 1)
439 {
440 size_t n = _next_pow_2(cap);
441 StringRep* new_rep = StringRep::alloc(n);
442 new_rep->size = rep->size;
443 _copy(new_rep->data, rep->data, rep->size + 1);
444 StringRep::unref(rep);
445 rep = new_rep;
446 }
|
447 kumpf 1.39 }
|
448 tony 1.66
|
449 mike 1.111.2.1 StringRep* StringRep::create(const Uint16* data, size_t size)
|
450 kumpf 1.39 {
|
451 mike 1.111.2.1 StringRep* rep = StringRep::alloc(size);
452 rep->size = size;
453 _copy(rep->data, data, size);
454 rep->data[size] = '\0';
455 return rep;
|
456 kumpf 1.39 }
457
|
458 mike 1.111.2.1 StringRep* StringRep::copy_on_write(StringRep* rep)
|
459 kumpf 1.39 {
|
460 mike 1.111.2.1 // Return a new copy of rep. Release rep.
|
461 david.dillard 1.105
|
462 mike 1.111.2.1 StringRep* new_rep = StringRep::alloc(rep->size);
463 new_rep->size = rep->size;
464 _copy(new_rep->data, rep->data, rep->size);
465 new_rep->data[new_rep->size] = '\0';
466 StringRep::unref(rep);
467 return new_rep;
|
468 mike 1.27 }
469
|
470 mike 1.111.2.1 StringRep* StringRep::create(const char* data, size_t size)
|
471 kumpf 1.39 {
|
472 mike 1.111.2.1 StringRep* rep = StringRep::alloc(size);
473 rep->size = _convert((Uint16*)rep->data, data, size);
474 rep->data[rep->size] = '\0';
|
475 david.dillard 1.105
|
476 mike 1.111.2.1 return rep;
|
477 kumpf 1.39 }
478
|
479 mike 1.111.2.4 StringRep* StringRep::createASCII7(const char* data, size_t size)
|
480 mike 1.111.2.2 {
481 StringRep* rep = StringRep::alloc(size);
482 _copy((Uint16*)rep->data, data, size);
483 rep->data[rep->size = size] = '\0';
484 return rep;
485 }
486
|
487 mike 1.111.2.1 Uint32 StringRep::length(const Uint16* str)
|
488 mike 1.27 {
|
489 mike 1.111.2.5 // Note: We could unroll this but it is rarely called.
|
490 david.dillard 1.105
|
491 mike 1.111.2.1 const Uint16* end = (Uint16*)str;
492
493 while (*end++)
494 ;
495
496 return end - str - 1;
|
497 mike 1.27 }
498
|
499 mike 1.111.2.1 //==============================================================================
500 //
501 // class String
502 //
503 //==============================================================================
|
504 david.dillard 1.105
|
505 mike 1.111.2.1 const String String::EMPTY;
506
507 String::String(const String& str, Uint32 n)
508 {
509 _check_bounds(n, str._rep->size);
510 _rep = StringRep::create(str._rep->data, n);
|
511 kumpf 1.39 }
|
512 mike 1.27
|
513 mike 1.111.2.1 String::String(const Char16* str)
|
514 kumpf 1.39 {
|
515 mike 1.111.2.1 _check_null_pointer(str);
516 _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
|
517 mike 1.27 }
518
|
519 mike 1.111.2.1 String::String(const Char16* str, Uint32 n)
|
520 mike 1.27 {
|
521 mike 1.111.2.1 _check_null_pointer(str);
522 _rep = StringRep::create((Uint16*)str, n);
|
523 mike 1.27 }
524
|
525 mike 1.111.2.1 String::String(const char* str)
|
526 mike 1.27 {
|
527 mike 1.111.2.1 _check_null_pointer(str);
528 _rep = StringRep::create(str, strlen(str));
|
529 mike 1.27 }
530
|
531 mike 1.111.2.2 String::String(const char* str, String::ASCII7Tag tag)
532 {
533 _check_null_pointer(str);
|
534 mike 1.111.2.4 _rep = StringRep::createASCII7(str, strlen(str));
|
535 mike 1.111.2.2 }
536
|
537 mike 1.111.2.1 String::String(const char* str, Uint32 n)
|
538 mike 1.27 {
|
539 mike 1.111.2.1 _check_null_pointer(str);
540 _rep = StringRep::create(str, n);
541 }
|
542 david.dillard 1.105
|
543 mike 1.111.2.2 String::String(const char* str, size_t n, String::ASCII7Tag tag)
544 {
545 _check_null_pointer(str);
|
546 mike 1.111.2.4 _rep = StringRep::createASCII7(str, n);
|
547 mike 1.111.2.2 }
548
|
549 mike 1.111.2.1 String::String(const String& s1, const String& s2)
550 {
551 size_t n1 = s1._rep->size;
552 size_t n2 = s2._rep->size;
553 size_t n = n1 + n2;
554 _rep = StringRep::alloc(n);
555 _copy(_rep->data, s1._rep->data, n1);
556 _copy(_rep->data + n1, s2._rep->data, n2);
557 _rep->size = n;
558 _rep->data[n] = '\0';
559 }
560
561 String::String(const String& s1, const char* s2)
562 {
563 _check_null_pointer(s2);
564 size_t n1 = s1._rep->size;
565 size_t n2 = strlen(s2);
566 _rep = StringRep::alloc(n1 + n2);
567 _copy(_rep->data, s1._rep->data, n1);
568 _rep->size = n1 + _convert((Uint16*)_rep->data + n1, s2, n2);
569 _rep->data[_rep->size] = '\0';
570 mike 1.111.2.1 }
571
572 String::String(const char* s1, const String& s2)
573 {
574 _check_null_pointer(s1);
575 size_t n1 = strlen(s1);
576 size_t n2 = s2._rep->size;
577 _rep = StringRep::alloc(n1 + n2);
578 _rep->size = n2 + _convert((Uint16*)_rep->data, s1, n1);
579 _copy(_rep->data + n1, s2._rep->data, n2);
580 _rep->data[_rep->size] = '\0';
|
581 mike 1.27 }
582
|
583 mike 1.111.2.1 String& String::assign(const String& str)
|
584 mike 1.27 {
|
585 mike 1.111.2.1 if (_rep != str._rep)
|
586 david.dillard 1.105 {
|
587 mike 1.111.2.1 StringRep::unref(_rep);
588 StringRep::ref(_rep = str._rep);
|
589 david.dillard 1.105 }
590
|
591 mike 1.27 return *this;
592 }
593
|
594 mike 1.111.2.1 String& String::assign(const Char16* str, Uint32 n)
|
595 chuck 1.102 {
|
596 mike 1.111.2.1 _check_null_pointer(str);
597
598 if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
|
599 david.dillard 1.105 {
|
600 mike 1.111.2.1 StringRep::unref(_rep);
601 _rep = StringRep::alloc(n);
|
602 david.dillard 1.105 }
603
|
604 mike 1.111.2.1 _rep->size = n;
605 _copy(_rep->data, (Uint16*)str, n);
606 _rep->data[n] = '\0';
607
|
608 chuck 1.102 return *this;
609 }
610
|
611 kumpf 1.39 String& String::assign(const char* str, Uint32 n)
|
612 mike 1.27 {
|
613 mike 1.111.2.1 _check_null_pointer(str);
614
615 if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
|
616 david.dillard 1.105 {
|
617 mike 1.111.2.1 StringRep::unref(_rep);
618 _rep = StringRep::alloc(n);
|
619 david.dillard 1.105 }
620
|
621 mike 1.111.2.1 _rep->size = _convert(_rep->data, str, n);
622 _rep->data[_rep->size] = 0;
623
|
624 mike 1.27 return *this;
625 }
626
|
627 mike 1.111.2.4 String& String::assignASCII7(const char* str, Uint32 n)
|
628 mike 1.111.2.2 {
629 _check_null_pointer(str);
630
631 if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
632 {
633 StringRep::unref(_rep);
634 _rep = StringRep::alloc(n);
635 }
636
637 _copy(_rep->data, str, n);
638 _rep->data[_rep->size = n] = 0;
639
640 return *this;
641 }
642
|
643 kumpf 1.39 void String::clear()
644 {
|
645 mike 1.111.2.1 if (_rep->size)
646 {
647 if (Atomic_get(&_rep->refs) == 1)
648 _rep->size = 0;
649 else
650 {
651 StringRep::unref(_rep);
652 _rep = &StringRep::_empty_rep;
653 }
654 }
|
655 kumpf 1.39 }
656
|
657 mike 1.111.2.1 void String::reserveCapacity(Uint32 cap)
|
658 kumpf 1.39 {
|
659 mike 1.111.2.1 _reserve(_rep, cap);
|
660 kumpf 1.39 }
661
|
662 mike 1.111.2.1 CString String::getCString() const
663 {
664 #ifdef PEGASUS_STRING_NO_UTF8
665 char* str = (char*)operator new(_rep->size + 1);
666 _copy(str, _rep->data, _rep->size);
667 str[_rep->size] = '\0';
668 return CString(str);
|
669 gs.keenan 1.110 #else
|
670 mike 1.111.2.1 Uint32 n = 3 * _rep->size;
671 char* str = (char*)operator new(n + 1);
|
672 mike 1.111.2.2 size_t size = _copy_to_utf8(str, _rep->data, _rep->size);
|
673 mike 1.111.2.1 str[size] = '\0';
674 return CString(str);
|
675 gs.keenan 1.110 #endif
|
676 kumpf 1.39 }
677
|
678 mike 1.111.2.1 String& String::append(const Char16* str, Uint32 n)
|
679 kumpf 1.39 {
|
680 mike 1.111.2.1 _check_null_pointer(str);
|
681 kumpf 1.39
|
682 mike 1.111.2.1 size_t old_size = _rep->size;
683 size_t new_size = old_size + n;
684 _reserve(_rep, new_size);
685 _copy(_rep->data + old_size, (Uint16*)str, n);
686 _rep->size = new_size;
687 _rep->data[new_size] = '\0';
|
688 mike 1.27
|
689 mike 1.111.2.1 return *this;
|
690 mike 1.27 }
691
|
692 mike 1.111.2.1 String& String::append(const String& str)
|
693 mike 1.27 {
|
694 mike 1.111.2.1 return append((Char16*)str._rep->data, str._rep->size);
|
695 mike 1.27 }
696
|
697 mike 1.111.2.1 String& String::append(const char* str, Uint32 size)
|
698 kumpf 1.39 {
|
699 mike 1.111.2.1 _check_null_pointer(str);
700
701 size_t old_size = _rep->size;
702 size_t cap = old_size + size;
703
704 _reserve(_rep, cap);
705 _rep->size += _convert((Uint16*)_rep->data + old_size, str, size);
706 _rep->data[_rep->size] = '\0';
|
707 kumpf 1.39
|
708 mike 1.27 return *this;
709 }
710
|
711 mike 1.111.2.1 void String::remove(Uint32 index, Uint32 n)
|
712 mike 1.27 {
|
713 mike 1.111.2.1 if (n == PEG_NOT_FOUND)
714 n = _rep->size - index;
|
715 mike 1.27
|
716 mike 1.111.2.1 _check_bounds(index + n, _rep->size);
717
718 if (Atomic_get(&_rep->refs) != 1)
719 _rep = StringRep::copy_on_write(_rep);
|
720 mike 1.27
|
721 mike 1.111.2.1 assert(index + n <= _rep->size);
|
722 mike 1.27
|
723 mike 1.111.2.1 size_t rem = _rep->size - (index + n);
724 Uint16* data = _rep->data;
725
726 if (rem)
727 memmove(data + index, data + index + n, rem * sizeof(Uint16));
728
729 _rep->size -= n;
730 data[_rep->size] = '\0';
|
731 mike 1.27 }
732
|
733 mike 1.111.2.1 String String::subString(Uint32 index, Uint32 n) const
|
734 mike 1.27 {
|
735 mike 1.111.2.5 // Note: this implementation is very permissive but used for
|
736 mike 1.111.2.1 // backwards compatibility.
737
738 if (index < _rep->size)
|
739 mike 1.27 {
|
740 mike 1.111.2.1 if (n == PEG_NOT_FOUND || n > _rep->size - index)
741 n = _rep->size - index;
|
742 mike 1.27
|
743 mike 1.111.2.1 return String((Char16*)_rep->data + index, n);
|
744 mike 1.27 }
|
745 david.dillard 1.105
746 return String();
|
747 mike 1.27 }
748
749 Uint32 String::find(Char16 c) const
750 {
|
751 mike 1.111.2.1 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
|
752 mike 1.27
|
753 mike 1.111.2.1 if (p)
754 return p - _rep->data;
|
755 mike 1.27
756 return PEG_NOT_FOUND;
757 }
758
|
759 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
|
760 mike 1.30 {
|
761 mike 1.111.2.1 _check_bounds(index, _rep->size);
|
762 mike 1.30
|
763 mike 1.111.2.1 if (index >= _rep->size)
764 return PEG_NOT_FOUND;
765
766 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
767
768 if (p)
769 return p - _rep->data;
|
770 mike 1.30
771 return PEG_NOT_FOUND;
772 }
773
|
774 mike 1.111.2.1 Uint32 String::_find_aux(const Char16* s, Uint32 n) const
|
775 mike 1.27 {
|
776 mike 1.111.2.1 _check_null_pointer(s);
|
777 mike 1.27
|
778 mike 1.111.2.1 const Uint16* data = _rep->data;
779 size_t rem = _rep->size;
|
780 mike 1.30
|
781 mike 1.111.2.1 while (n <= rem)
|
782 mike 1.27 {
|
783 mike 1.111.2.1 Uint16* p = (Uint16*)_find(data, rem, s[0]);
|
784 david.dillard 1.105
|
785 mike 1.111.2.1 if (!p)
786 break;
787
788 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
789 return p - _rep->data;
790
791 p++;
792 rem -= p - data;
793 data = p;
|
794 mike 1.27 }
|
795 mike 1.111.2.1
|
796 mike 1.27 return PEG_NOT_FOUND;
797 }
798
|
799 mike 1.111.2.1 Uint32 String::find(const char* s) const
800 {
801 _check_null_pointer(s);
802
|
803 mike 1.111.2.5 // Note: could optimize away creation of temporary, but this is rarely
804 // called.
|
805 mike 1.111.2.1 return find(String(s));
806 }
807
|
808 mike 1.27 Uint32 String::reverseFind(Char16 c) const
809 {
|
810 mike 1.111.2.1 Uint16 x = c;
811 Uint16* p = _rep->data;
812 Uint16* q = _rep->data + _rep->size;
|
813 mike 1.27
|
814 mike 1.111.2.1 while (q != p)
|
815 mike 1.27 {
|
816 mike 1.111.2.1 if (*--q == x)
817 return q - p;
|
818 mike 1.27 }
819
820 return PEG_NOT_FOUND;
821 }
822
823 void String::toLower()
824 {
|
825 mike 1.111.2.1 #ifdef PEGASUS_STRING_ENABLE_ICU
826
|
827 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
828 david 1.90 {
|
829 mike 1.111.2.1 //// First calculate size of resulting string. u_strToLower() returns
830 //// only the size when zero is passed as the destination size argument.
831
|
832 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
833
|
834 mike 1.111.2.1 int32_t new_size = u_strToLower(
835 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
|
836 chuck 1.99
|
837 mike 1.111.2.1 //// Reserve enough space for the result.
838
839 if ((Uint32)new_size > _rep->cap)
840 _reserve(_rep, new_size);
841
842 //// Perform the conversion (overlapping buffers are allowed).
843
844 u_strToLower((UChar*)_rep->data, new_size,
845 (UChar*)_rep->data, _rep->size, NULL, &err);
|
846 yi.zhou 1.108
|
847 mike 1.111.2.1 _rep->size = new_size;
|
848 david 1.90 }
|
849 mike 1.111.2.1
850 #endif /* PEGASUS_STRING_ENABLE_ICU */
851
852 if (Atomic_get(&_rep->refs) != 1)
853 _rep = StringRep::copy_on_write(_rep);
854
855 Uint16* p = _rep->data;
856 size_t n = _rep->size;
857
858 for (; n--; p++)
|
859 david 1.90 {
|
860 mike 1.111.2.1 if (!(*p & 0xFF00))
861 *p = _to_lower(*p);
|
862 mike 1.27 }
|
863 kumpf 1.39 }
864
|
865 chuck 1.99 void String::toUpper()
|
866 david 1.90 {
|
867 mike 1.111.2.1 #ifdef PEGASUS_STRING_ENABLE_ICU
868
|
869 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
870 chuck 1.99 {
|
871 mike 1.111.2.1 //// First calculate size of resulting string. u_strToUpper() returns
872 //// only the size when zero is passed as the destination size argument.
873
|
874 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
875
|
876 mike 1.111.2.1 int32_t new_size = u_strToUpper(
877 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
|
878 chuck 1.99
|
879 mike 1.111.2.1 //// Reserve enough space for the result.
|
880 yi.zhou 1.108
|
881 mike 1.111.2.1 if ((Uint32)new_size > _rep->cap)
882 _reserve(_rep, new_size);
|
883 david 1.90
|
884 mike 1.111.2.1 //// Perform the conversion (overlapping buffers are allowed).
|
885 kumpf 1.39
|
886 mike 1.111.2.1 u_strToUpper((UChar*)_rep->data, new_size,
887 (UChar*)_rep->data, _rep->size, NULL, &err);
|
888 mike 1.27
|
889 mike 1.111.2.1 _rep->size = new_size;
|
890 mike 1.27 }
891
|
892 mike 1.111.2.1 #endif /* PEGASUS_STRING_ENABLE_ICU */
893
894 if (Atomic_get(&_rep->refs) != 1)
895 _rep = StringRep::copy_on_write(_rep);
896
897 Uint16* p = _rep->data;
898 size_t n = _rep->size;
899
900 for (; n--; p++)
901 *p = _to_upper(*p);
|
902 mike 1.27 }
903
|
904 mike 1.111.2.1 int String::compare(const String& s1, const String& s2, Uint32 n)
|
905 mike 1.30 {
|
906 mike 1.111.2.1 assert(n <= s1._rep->size);
907 assert(n <= s2._rep->size);
|
908 kumpf 1.43
|
909 mike 1.111.2.1 // Ignoring error in which n is greater than s1.size() or s2.size()
910 return _compare(s1._rep->data, s2._rep->data, n);
911 }
|
912 mike 1.30
|
913 mike 1.111.2.1 int String::compare(const String& s1, const String& s2)
914 {
915 return _compare(s1._rep->data, s2._rep->data);
916 }
|
917 mike 1.30
|
918 mike 1.111.2.1 int String::compare(const String& s1, const char* s2)
919 {
920 _check_null_pointer(s2);
|
921 mike 1.30
|
922 mike 1.111.2.1 #ifdef PEGASUS_STRING_NO_UTF8
923 return _compare_no_utf8(s1._rep->data, s2);
924 #else
925 // ATTN: optimize this!
926 return String::compare(s1, String(s2));
927 #endif
|
928 mike 1.30 }
929
|
930 mike 1.111.2.1 int String::compareNoCase(const String& str1, const String& str2)
|
931 kumpf 1.40 {
|
932 mike 1.111.2.1 #ifdef PEGASUS_STRING_ENABLE_ICU
933
|
934 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
935 {
|
936 mike 1.111.2.1 return u_strcasecmp(
937 str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
|
938 yi.zhou 1.108 }
|
939 kumpf 1.40
|
940 mike 1.111.2.1 #endif /* PEGASUS_STRING_ENABLE_ICU */
|
941 kumpf 1.40
|
942 mike 1.111.2.1 const Uint16* s1 = str1._rep->data;
943 const Uint16* s2 = str2._rep->data;
944
945 while (*s1 && *s2)
946 {
947 int r = _to_lower(*s1++) - _to_lower(*s2++);
|
948 kumpf 1.40
|
949 david.dillard 1.105 if (r)
950 return r;
|
951 kumpf 1.40 }
952
|
953 mike 1.111.2.1 if (*s2)
|
954 david.dillard 1.105 return -1;
|
955 mike 1.111.2.1 else if (*s1)
|
956 david.dillard 1.105 return 1;
|
957 kumpf 1.40
958 return 0;
959 }
960
|
961 mike 1.111.2.1 Boolean String::equalNoCase_aux(const String& s1, const String& s2)
|
962 mike 1.27 {
|
963 mike 1.111.2.1 #ifdef PEGASUS_STRING_ENABLE_ICU
|
964 mike 1.27
|
965 mike 1.111.2.1 return String::compareNoCase(s1, s2) == 0;
|
966 kumpf 1.39
|
967 mike 1.111.2.1 #else /* PEGASUS_STRING_ENABLE_ICU */
|
968 kumpf 1.39
|
969 mike 1.111.2.1 Uint16* p = (Uint16*)s1._rep->data;
970 Uint16* q = (Uint16*)s2._rep->data;
971 Uint32 n = s2._rep->size;
972
973 while (n >= 8)
974 {
975 if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) ||
976 ((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) ||
977 ((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) ||
978 ((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3]))) ||
979 ((p[4] - q[4]) && (_to_upper(p[4]) - _to_upper(q[4]))) ||
980 ((p[5] - q[5]) && (_to_upper(p[5]) - _to_upper(q[5]))) ||
981 ((p[6] - q[6]) && (_to_upper(p[6]) - _to_upper(q[6]))) ||
982 ((p[7] - q[7]) && (_to_upper(p[7]) - _to_upper(q[7]))))
983 {
984 return false;
985 }
986
987 n -= 8;
988 p += 8;
989 q += 8;
990 mike 1.111.2.1 }
991
992 while (n >= 4)
993 {
994 if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) ||
995 ((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) ||
996 ((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) ||
997 ((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3]))))
998 {
999 return false;
1000 }
1001
1002 n -= 4;
1003 p += 4;
1004 q += 4;
1005 }
|
1006 mike 1.27
|
1007 kumpf 1.39 while (n--)
1008 {
|
1009 mike 1.111.2.1 if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))))
1010 return false;
1011
1012 p++;
1013 q++;
|
1014 kumpf 1.39 }
|
1015 mike 1.28
|
1016 kumpf 1.39 return true;
|
1017 david 1.69
|
1018 mike 1.111.2.1 #endif /* PEGASUS_STRING_ENABLE_ICU */
1019 }
|
1020 mike 1.27
|
1021 mike 1.111.2.1 Boolean String::equalNoCase(const String& s1, const char* s2)
|
1022 david 1.69 {
|
1023 mike 1.111.2.1 _check_null_pointer(s2);
|
1024 david 1.69
|
1025 mike 1.111.2.1 #if defined(PEGASUS_STRING_ENABLE_ICU)
|
1026 david.dillard 1.105
|
1027 mike 1.111.2.1 return String::equalNoCase(s1, String(s2));
|
1028 david 1.71
|
1029 mike 1.111.2.1 #elif defined(PEGASUS_STRING_NO_UTF8)
|
1030 kumpf 1.42
|
1031 mike 1.111.2.1 const Uint16* p1 = (Uint16*)s1._rep->data;
1032 const char* p2 = s2;
1033 size_t n = s1._rep->size;
|
1034 karl 1.36
|
1035 mike 1.111.2.1 while (n--)
1036 {
1037 if (!*p2)
1038 return false;
|
1039 david.dillard 1.105
|
1040 mike 1.111.2.1 if (_to_upper(*p1++) != CharSet::to_upper(int(*p2++)))
1041 return false;
1042 }
1043
1044 return true;
|
1045 david.dillard 1.105
|
1046 mike 1.111.2.1 #else /* PEGASUS_STRING_ENABLE_ICU */
|
1047 chuck 1.78
|
1048 mike 1.111.2.1 // ATTN: optimize this!
1049 return String::equalNoCase(s1, String(s2));
|
1050 david.dillard 1.105
|
1051 mike 1.111.2.1 #endif /* PEGASUS_STRING_ENABLE_ICU */
1052 }
|
1053 karl 1.36
|
1054 mike 1.111.2.1 Boolean String::equal(const String& s1, const String& s2)
|
1055 karl 1.36 {
|
1056 mike 1.111.2.1 return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,
1057 s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
|
1058 karl 1.36 }
1059
|
1060 mike 1.111.2.1 Boolean String::equal(const String& s1, const char* s2)
|
1061 karl 1.36 {
|
1062 mike 1.111.2.1 #ifdef PEGASUS_STRING_NO_UTF8
|
1063 kumpf 1.39
|
1064 mike 1.111.2.1 _check_null_pointer(s2);
|
1065 kumpf 1.39
|
1066 mike 1.111.2.1 const Uint16* p = (Uint16*)s1._rep->data;
1067 const char* q = s2;
|
1068 kumpf 1.39
|
1069 mike 1.111.2.1 while (*p && *q)
1070 {
1071 if (*p++ != Uint16(*q++))
1072 return false;
1073 }
|
1074 kumpf 1.39
|
1075 mike 1.111.2.1 return !(*p || *q);
|
1076 kumpf 1.39
|
1077 mike 1.111.2.1 #else /* PEGASUS_STRING_NO_UTF8 */
|
1078 kumpf 1.39
|
1079 mike 1.111.2.1 return String::equal(s1, String(s2));
1080
1081 #endif /* PEGASUS_STRING_NO_UTF8 */
|
1082 kumpf 1.39 }
1083
|
1084 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
|
1085 kumpf 1.39 {
|
1086 david 1.69 #if defined(PEGASUS_OS_OS400)
|
1087 mike 1.111.2.1
|
1088 david 1.93 CString cstr = str.getCString();
|
1089 david 1.69 const char* utf8str = cstr;
1090 os << utf8str;
1091
|
1092 mike 1.111.2.1 #elif defined(PEGASUS_STRING_ENABLE_ICU)
1093
|
1094 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1095 {
|
1096 david.dillard 1.105 char *buf = NULL;
1097 const int size = str.size() * 6;
|
1098 mike 1.111.2.1 UnicodeString UniStr(
1099 (const UChar *)str.getChar16Data(), (int32_t)str.size());
|
1100 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf);
1101 buf = new char[bufsize+1];
1102 UniStr.extract(0,bufsize,buf);
1103 os << buf;
1104 os.flush();
1105 delete [] buf;
|
1106 yi.zhou 1.108 }
|
1107 mike 1.111.2.1
1108 #endif /* PEGASUS_OS_OS400 */
1109
1110 for (Uint32 i = 0, n = str.size(); i < n; i++)
|
1111 yi.zhou 1.108 {
|
1112 mike 1.111.2.1 Uint16 code = str[i];
|
1113 david.dillard 1.105
|
1114 mike 1.111.2.1 if (code > 0 && !(code & 0xFF00))
1115 os << char(code);
1116 else
1117 {
1118 // Print in hex format:
1119 char buffer[8];
1120 sprintf(buffer, "\\x%04X", code);
1121 os << buffer;
1122 }
|
1123 yi.zhou 1.108 }
|
1124 kumpf 1.39
1125 return os;
1126 }
1127
|
1128 mike 1.111.2.1 void String::_append_char_aux()
|
1129 kumpf 1.39 {
|
1130 mike 1.111.2.1 StringRep* tmp;
|
1131 kumpf 1.39
|
1132 mike 1.111.2.1 if (_rep->cap)
1133 {
1134 tmp = StringRep::alloc(2 * _rep->cap);
1135 tmp->size = _rep->size;
1136 _copy(tmp->data, _rep->data, _rep->size);
1137 }
1138 else
1139 {
1140 tmp = StringRep::alloc(8);
1141 tmp->size = 0;
1142 }
|
1143 kumpf 1.39
|
1144 mike 1.111.2.1 StringRep::unref(_rep);
1145 _rep = tmp;
|
1146 kumpf 1.39 }
1147
|
1148 mike 1.111.2.1 PEGASUS_NAMESPACE_END
|
1149 kumpf 1.39
|
1150 mike 1.111.2.1 /*
1151 ================================================================================
|
1152 kumpf 1.39
|
1153 mike 1.111.2.1 String optimizations:
1154
1155 1. Added mechanism allowing certain functions to be inlined only when
1156 used by internal Pegasus modules. External modules (i.e., providers)
1157 link to a non-inline version, which allows for binary compatibility.
1158
1159 2. Implemented copy-on-write with atomic increment/decrement. This
1160 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1161 for the 'ni1000' benchmark.
1162
1163 3. Employed loop unrolling in several places. For example, see:
1164
1165 static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1166
1167 4. Used the "empty-rep" optimization (described in whitepaper from the
1168 GCC Developers Summit). This reduced default construction to a simple
1169 pointer assignment.
1170
1171 inline String::String() : _rep(&_empty_rep) { }
1172
1173 5. Implemented Uint16 versions of toupper() and tolower() using tables.
1174 mike 1.111.2.1 For example:
1175
1176 static const char _upper[] =
1177 {
1178 0,1,2,...255
1179 };
1180
1181 inline Uint16 _to_upper(Uint16 x)
1182 {
1183 return (x & 0xFF00) ? x : _upper[x];
1184 }
1185
1186 This outperforms the system implementation by avoiding an anding
1187 operation.
1188
1189 6. Implemented char* version of the following member functions to
1190 eliminate unecessary creation of anonymous string objects
1191 (temporaries).
1192
1193 String(const String& s1, const char* s2);
1194 String(const char* s1, const String& s2);
1195 mike 1.111.2.1 String& String::operator=(const char* str);
1196 Uint32 String::find(const char* s) const;
1197 bool String::equal(const String& s1, const char* s2);
1198 static int String::compare(const String& s1, const char* s2);
1199 String& String::append(const char* str);
1200 String& String::append(const char* str, Uint32 size);
1201 static bool String::equalNoCase(const String& s1, const char* s2);
1202 String& operator=(const char* str)
1203 String& String::assign(const char* str)
1204 String& String::append(const char* str)
1205 Boolean operator==(const String& s1, const char* s2)
1206 Boolean operator==(const char* s1, const String& s2)
1207 Boolean operator!=(const String& s1, const char* s2)
1208 Boolean operator!=(const char* s1, const String& s2)
1209 Boolean operator<(const String& s1, const char* s2)
1210 Boolean operator<(const char* s1, const String& s2)
1211 Boolean operator>(const String& s1, const char* s2)
1212 Boolean operator>(const char* s1, const String& s2)
1213 Boolean operator<=(const String& s1, const char* s2)
1214 Boolean operator<=(const char* s1, const String& s2)
1215 Boolean operator>=(const String& s1, const char* s2)
1216 mike 1.111.2.1 Boolean operator>=(const char* s1, const String& s2)
1217 String operator+(const String& s1, const char* s2)
1218 String operator+(const char* s1, const String& s2)
1219
1220 7. Optimized _next_pow_2(), used in rounding the capacity to the next
1221 power of two (algorithm from the book "Hacker's Delight").
1222
1223 static Uint32 _next_pow_2(Uint32 x)
1224 {
1225 if (x < 8)
1226 return 8;
1227
1228 x--;
1229 x |= (x >> 1);
1230 x |= (x >> 2);
1231 x |= (x >> 4);
1232 x |= (x >> 8);
1233 x |= (x >> 16);
1234 x++;
1235
1236 return x;
1237 mike 1.111.2.1 }
1238
1239 8. Implemented "concatenating constructors" to eliminate temporaries
1240 created by operator+(). This scheme employs the "return-value
1241 optimization" described by Stan Lippman.
1242
1243 inline String operator+(const String& s1, const String& s2)
1244 {
1245 return String(s1, s2, 0);
1246 }
1247
1248 9. Experimented to find the optimial initial size for a short string.
1249 Eight seems to offer the best tradoff between space and time.
1250
1251 10. Inlined all members of the Char16 class.
1252
1253 11. Used Uint16 internally in the String class. This showed no improvememnt
1254 since Char16 was already fully inlined and was essentially reduced to
1255 Uint16 in any case.
1256
1257 12. Implemented conditional logic (#if) allowing error checking logic to
1258 mike 1.111.2.1 be excluded to better performance. Examples include bounds checking
1259 and null-pointer checking.
1260
1261 13. Used memcpy() and memcmp() where possible. These are implemented using
1262 the rep family of intructions under Intel and are much faster.
1263
|
1264 mike 1.111.2.2 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1265 copy routine overhead.
1266
1267 15. Added ASCII7 form of the constructor and assign().
1268
1269 String s("hello world", String::ASCII7);
1270
|
1271 mike 1.111.2.4 s.assignASCII7("hello world");
|
1272 mike 1.111.2.2
|
1273 mike 1.111.2.6 This avoids slower UTF8 processing when not needed.
|
1274 mike 1.111.2.2
|
1275 mike 1.111.2.1 ================================================================================
1276 */
|