1 karl 1.98 //%2005////////////////////////////////////////////////////////////////////////
|
2 mike 1.27 //
|
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 mike 1.27 //
12 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
14 // deal in the Software without restriction, including without limitation the
15 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
16 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is
17 // furnished to do so, subject to the following conditions:
|
18 david.dillard 1.105 //
|
19 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
20 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
21 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
22 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
23 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
25 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28 //==============================================================================
29 //
30 // Author: Mike Brasher (mbrasher@bmc.com)
31 //
|
32 mike 1.111.2.9 // Modified By:
33 // Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
34 // Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
35 // David Dillard, VERITAS Software Corp. (david.dillard@veritas.com)
36 // Mike Brasher (mike-brasher@austin.rr.com)
37 //
|
38 mike 1.27 //%/////////////////////////////////////////////////////////////////////////////
39
|
40 mike 1.111.2.10 #ifndef PEGASUS_USE_INTERNAL_INLINES
41 # define PEGASUS_USE_INTERNAL_INLINES
42 #endif
43
|
44 mike 1.27 #include "String.h"
|
45 mike 1.111.2.6 #include <cassert>
|
46 kumpf 1.48 #include "InternalException.h"
|
47 david 1.69 #include "CommonUTF.h"
|
48 chuck 1.111.2.12 #include "MessageLoader.h"
|
49 mike 1.111.2.1 #include "CharSet.h"
|
50 david 1.69
|
51 chuck 1.111.2.14 #ifdef PEGASUS_HAS_ICU
|
52 chuck 1.99 #include <unicode/ustring.h>
53 #include <unicode/uchar.h>
|
54 david 1.69 #endif
55
|
56 mike 1.27 PEGASUS_NAMESPACE_BEGIN
57
|
58 mike 1.111.2.1 //==============================================================================
|
59 kumpf 1.39 //
|
60 mike 1.111.2.7 // Compile-time macros (undefined by default).
|
61 mike 1.111.2.6 //
62 // PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
63 //
64 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
65 //
66 // PEGASUS_USE_INTERNAL_INLINES -- enables internal inlining feature.
67 //
68 //==============================================================================
69
70 //==============================================================================
71 //
|
72 mike 1.111.2.1 // File-scope definitions:
|
73 kumpf 1.54 //
|
74 mike 1.111.2.1 //==============================================================================
|
75 kumpf 1.54
|
76 mike 1.111.2.1 // Converts 16-bit characters to upper case.
77 inline Uint16 _to_upper(Uint16 x)
|
78 kumpf 1.54 {
|
79 mike 1.111.2.1 return (x & 0xFF00) ? x : CharSet::to_upper(x);
|
80 kumpf 1.54 }
81
|
82 mike 1.111.2.1 // Converts 16-bit characters to lower case.
83 inline Uint16 _to_lower(Uint16 x)
|
84 kumpf 1.54 {
|
85 mike 1.111.2.1 return (x & 0xFF00) ? x : CharSet::to_lower(x);
86 }
|
87 kumpf 1.82
|
88 mike 1.111.2.1 // Rounds x to the next power of two (or just returns 8 if x < 8).
89 static Uint32 _next_pow_2(Uint32 x)
90 {
91 if (x < 8)
92 return 8;
93
94 x--;
95 x |= (x >> 1);
96 x |= (x >> 2);
97 x |= (x >> 4);
98 x |= (x >> 8);
99 x |= (x >> 16);
100 x++;
101
102 return x;
103 }
104
105 template<class P, class Q>
106 static void _copy(P* p, const Q* q, size_t n)
107 {
108 // Use loop unrolling.
109 mike 1.111.2.1
110 while (n >= 8)
111 {
112 p[0] = q[0];
113 p[1] = q[1];
114 p[2] = q[2];
115 p[3] = q[3];
116 p[4] = q[4];
117 p[5] = q[5];
118 p[6] = q[6];
119 p[7] = q[7];
120 p += 8;
121 q += 8;
122 n -= 8;
123 }
124
125 while (n >= 4)
|
126 kumpf 1.82 {
|
127 mike 1.111.2.1 p[0] = q[0];
128 p[1] = q[1];
129 p[2] = q[2];
130 p[3] = q[3];
131 p += 4;
132 q += 4;
133 n -= 4;
|
134 kumpf 1.82 }
|
135 mike 1.111.2.1
136 while (n--)
137 *p++ = *q++;
|
138 kumpf 1.54 }
139
|
140 mike 1.111.2.1 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
|
141 kumpf 1.54 {
|
142 mike 1.111.2.1 while (n >= 4)
143 {
144 if (s[0] == c)
145 return (Uint16*)s;
146 if (s[1] == c)
147 return (Uint16*)&s[1];
148 if (s[2] == c)
149 return (Uint16*)&s[2];
150 if (s[3] == c)
151 return (Uint16*)&s[3];
152
153 n -= 4;
154 s += 4;
155 }
156
157 if (n)
158 {
159 if (*s == c)
160 return (Uint16*)s;
161 s++;
162 n--;
163 mike 1.111.2.1 }
164
165 if (n)
166 {
167 if (*s == c)
168 return (Uint16*)s;
169 s++;
170 n--;
171 }
172
173 if (n && *s == c)
174 return (Uint16*)s;
175
176 // Not found!
177 return 0;
|
178 kumpf 1.54 }
179
|
180 mike 1.111.2.1 static int _compare(const Uint16* s1, const Uint16* s2)
|
181 kumpf 1.54 {
|
182 mike 1.111.2.1 while (*s1 && *s2)
|
183 kumpf 1.82 {
|
184 mike 1.111.2.1 int r = *s1++ - *s2++;
185
186 if (r)
187 return r;
|
188 kumpf 1.82 }
|
189 mike 1.111.2.1
190 if (*s2)
191 return -1;
192 else if (*s1)
193 return 1;
194
195 return 0;
|
196 kumpf 1.54 }
197
|
198 mike 1.111.2.1 static int _compare_no_utf8(const Uint16* s1, const char* s2)
|
199 kumpf 1.56 {
|
200 mike 1.111.2.1 Uint16 c1;
201 Uint16 c2;
202
203 do
|
204 kumpf 1.81 {
|
205 mike 1.111.2.1 c1 = *s1++;
206 c2 = *s2++;
207
208 if (c1 == 0)
209 return c1 - c2;
|
210 kumpf 1.81 }
|
211 mike 1.111.2.1 while (c1 == c2);
212
213 return c1 - c2;
|
214 kumpf 1.56 }
215
|
216 mike 1.111.2.1 static int _compare(const Uint16* s1, const Uint16* s2, size_t n)
|
217 kumpf 1.54 {
|
218 mike 1.111.2.1 // This should only be called when s1 and s2 have the same length.
219
220 while (n-- && (*s1++ - *s2++) == 0)
221 ;
222
|
223 mike 1.111.2.8 //
224
|
225 mike 1.111.2.1 return s1[-1] - s2[-1];
|
226 kumpf 1.54 }
227
|
228 mike 1.111.2.1 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
229 {
230 memcpy(s1, s2, n * sizeof(Uint16));
231 }
|
232 kumpf 1.39
|
233 mike 1.111.2.1 void String_throw_out_of_bounds()
234 {
235 throw IndexOutOfBoundsException();
236 }
237
238 #ifdef PEGASUS_STRING_NO_THROW
239 # define _check_null_pointer(ARG) /* empty */
240 #else
241 template<class T>
242 inline void _check_null_pointer(const T* ptr)
243 {
244 if (!ptr)
245 throw NullPointer();
246 }
247 #endif
|
248 mike 1.27
|
249 mike 1.111.2.1 static size_t _copy_from_utf8(Uint16* dest, const char* src, size_t n)
|
250 mike 1.27 {
|
251 mike 1.111.2.1 Uint16* p = dest;
252 const Uint8* q = (const Uint8*)src;
|
253 mike 1.111.2.2
|
254 mike 1.111.2.11 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
255 // Use loop-unrolling.
256
257 while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
258 {
259 p[0] = q[0];
260 p[1] = q[1];
261 p[2] = q[2];
262 p[3] = q[3];
263 p[4] = q[4];
264 p[5] = q[5];
265 p[6] = q[6];
266 p[7] = q[7];
267 p += 8;
268 q += 8;
269 n -= 8;
270 }
|
271 mike 1.111.2.2
|
272 mike 1.111.2.10 while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
|
273 mike 1.111.2.2 {
274 p[0] = q[0];
275 p[1] = q[1];
276 p[2] = q[2];
277 p[3] = q[3];
278 p += 4;
279 q += 4;
280 n -= 4;
281 }
282
283 switch (n)
284 {
285 case 0:
286 return p - dest;
287 case 1:
288 if (q[0] < 128)
289 {
290 p[0] = q[0];
291 return p + 1 - dest;
292 }
293 break;
294 mike 1.111.2.2 case 2:
|
295 mike 1.111.2.11 if (((q[0]|q[1]) & 0x80) == 0)
|
296 mike 1.111.2.2 {
297 p[0] = q[0];
298 p[1] = q[1];
299 return p + 2 - dest;
300 }
301 break;
302 case 3:
|
303 mike 1.111.2.11 if (((q[0]|q[1]|q[2]) & 0x80) == 0)
|
304 mike 1.111.2.2 {
305 p[0] = q[0];
306 p[1] = q[1];
307 p[2] = q[2];
308 return p + 3 - dest;
309 }
310 break;
311 }
312
313 // Process remaining characters.
|
314 mike 1.111.2.1
315 while (n)
316 {
|
317 mike 1.111.2.2 // Optimize for 7-bit ASCII case.
|
318 mike 1.111.2.1
|
319 mike 1.111.2.2 if (*q < 128)
|
320 mike 1.111.2.1 {
321 *p++ = *q++;
322 n--;
323 }
|
324 mike 1.111.2.2 else
325 {
326 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
|
327 mike 1.111.2.1
|
328 mike 1.111.2.2 if (c > n || !isValid_U8(q, c) ||
329 UTF8toUTF16(&q, q + c, &p, p + n) != 0)
330 {
|
331 mike 1.111.2.13 MessageLoaderParms parms("Common.String.BAD_UTF8",
332 "The byte sequence starting at index $0 "
333 "is not valid UTF-8 encoding.",
334 q - (const Uint8*)src);
335 throw Exception(parms);
|
336 mike 1.111.2.2 }
|
337 mike 1.111.2.1
|
338 mike 1.111.2.2 n -= c;
|
339 mike 1.111.2.1 }
340 }
|
341 mike 1.27
|
342 mike 1.111.2.1 return p - dest;
343 }
|
344 mike 1.27
|
345 mike 1.111.2.1 // Note: dest must be at least three times src (plus an extra byte for
346 // terminator).
|
347 mike 1.111.2.2 static inline size_t _copy_to_utf8(char* dest, const Uint16* src, size_t n)
|
348 mike 1.111.2.1 {
|
349 mike 1.111.2.2 const Uint16* q = src;
350 Uint8* p = (Uint8*)dest;
351
352 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
353 {
354 p[0] = q[0];
355 p[1] = q[1];
356 p[2] = q[2];
357 p[3] = q[3];
358 p += 4;
359 q += 4;
360 n -= 4;
361 }
362
363 switch (n)
364 {
365 case 0:
366 return p - (Uint8*)dest;
367 case 1:
368 if (q[0] < 128)
369 {
370 mike 1.111.2.2 p[0] = q[0];
371 return p + 1 - (Uint8*)dest;
372 }
373 break;
374 case 2:
375 if (q[0] < 128 && q[1] < 128)
376 {
377 p[0] = q[0];
378 p[1] = q[1];
379 return p + 2 - (Uint8*)dest;
380 }
381 break;
382 case 3:
383 if (q[0] < 128 && q[1] < 128 && q[2] < 128)
384 {
385 p[0] = q[0];
386 p[1] = q[1];
387 p[2] = q[2];
388 return p + 3 - (Uint8*)dest;
389 }
390 break;
391 mike 1.111.2.2 }
392
393 // If this line was reached, there must be characters greater than 128.
394
395 UTF16toUTF8(&q, q + n, &p, p + 3 * n);
396
397 return p - (Uint8*)dest;
|
398 mike 1.111.2.1 }
|
399 mike 1.27
|
400 mike 1.111.2.1 static inline size_t _convert(Uint16* p, const char* q, size_t n)
401 {
402 #ifdef PEGASUS_STRING_NO_UTF8
403 _copy(p, q, n);
|
404 mike 1.27 return n;
|
405 mike 1.111.2.1 #else
406 return _copy_from_utf8(p, q, n);
407 #endif
|
408 mike 1.27 }
409
|
410 mike 1.111.2.1 //==============================================================================
|
411 chuck 1.102 //
|
412 mike 1.111.2.1 // class CString
|
413 chuck 1.102 //
|
414 mike 1.111.2.1 //==============================================================================
415
416 CString::CString(const CString& cstr) : _rep(0)
|
417 chuck 1.102 {
|
418 mike 1.111.2.1 if (cstr._rep)
|
419 chuck 1.102 {
|
420 mike 1.111.2.1 size_t n = strlen(cstr._rep) + 1;
421 _rep = (char*)operator new(n);
422 memcpy(_rep, cstr._rep, n);
423 }
424 }
425
426 CString& CString::operator=(const CString& cstr)
427 {
428 if (&cstr != this)
429 {
430 if (_rep)
|
431 david.dillard 1.105 {
|
432 mike 1.111.2.1 operator delete(_rep);
433 _rep = 0;
|
434 chuck 1.102 }
|
435 mike 1.111.2.1
436 if (cstr._rep)
|
437 chuck 1.102 {
|
438 mike 1.111.2.1 size_t n = strlen(cstr._rep) + 1;
439 _rep = (char*)operator new(n);
440 memcpy(_rep, cstr._rep, n);
|
441 chuck 1.102 }
|
442 mike 1.111.2.1 }
|
443 chuck 1.102
|
444 mike 1.111.2.1 return *this;
|
445 chuck 1.102 }
446
|
447 mike 1.111.2.1 //==============================================================================
448 //
449 // class StringRep
450 //
451 //==============================================================================
|
452 kumpf 1.43
|
453 mike 1.111.2.1 StringRep StringRep::_empty_rep;
|
454 kumpf 1.43
|
455 mike 1.111.2.1 inline StringRep* StringRep::alloc(size_t cap)
|
456 mike 1.27 {
|
457 mike 1.111.2.1 StringRep* rep = (StringRep*)::operator new(
458 sizeof(StringRep) + cap * sizeof(Uint16));
459 rep->cap = cap;
460 Atomic_create(&rep->refs, 1);
461
462 return rep;
|
463 mike 1.27 }
464
|
465 mike 1.111.2.1 static inline void _reserve(StringRep*& rep, Uint32 cap)
|
466 mike 1.27 {
|
467 mike 1.111.2.1 if (cap > rep->cap || Atomic_get(&rep->refs) != 1)
468 {
469 size_t n = _next_pow_2(cap);
470 StringRep* new_rep = StringRep::alloc(n);
471 new_rep->size = rep->size;
472 _copy(new_rep->data, rep->data, rep->size + 1);
473 StringRep::unref(rep);
474 rep = new_rep;
475 }
|
476 kumpf 1.39 }
|
477 tony 1.66
|
478 mike 1.111.2.1 StringRep* StringRep::create(const Uint16* data, size_t size)
|
479 kumpf 1.39 {
|
480 mike 1.111.2.1 StringRep* rep = StringRep::alloc(size);
481 rep->size = size;
482 _copy(rep->data, data, size);
483 rep->data[size] = '\0';
484 return rep;
|
485 kumpf 1.39 }
486
|
487 mike 1.111.2.1 StringRep* StringRep::copy_on_write(StringRep* rep)
|
488 kumpf 1.39 {
|
489 mike 1.111.2.1 // Return a new copy of rep. Release rep.
|
490 david.dillard 1.105
|
491 mike 1.111.2.1 StringRep* new_rep = StringRep::alloc(rep->size);
492 new_rep->size = rep->size;
493 _copy(new_rep->data, rep->data, rep->size);
494 new_rep->data[new_rep->size] = '\0';
495 StringRep::unref(rep);
496 return new_rep;
|
497 mike 1.27 }
498
|
499 mike 1.111.2.1 StringRep* StringRep::create(const char* data, size_t size)
|
500 kumpf 1.39 {
|
501 mike 1.111.2.1 StringRep* rep = StringRep::alloc(size);
502 rep->size = _convert((Uint16*)rep->data, data, size);
503 rep->data[rep->size] = '\0';
|
504 david.dillard 1.105
|
505 mike 1.111.2.1 return rep;
|
506 kumpf 1.39 }
507
|
508 mike 1.111.2.4 StringRep* StringRep::createASCII7(const char* data, size_t size)
|
509 mike 1.111.2.2 {
510 StringRep* rep = StringRep::alloc(size);
511 _copy((Uint16*)rep->data, data, size);
512 rep->data[rep->size = size] = '\0';
513 return rep;
514 }
515
|
516 mike 1.111.2.1 Uint32 StringRep::length(const Uint16* str)
|
517 mike 1.27 {
|
518 mike 1.111.2.5 // Note: We could unroll this but it is rarely called.
|
519 david.dillard 1.105
|
520 mike 1.111.2.1 const Uint16* end = (Uint16*)str;
521
522 while (*end++)
523 ;
524
525 return end - str - 1;
|
526 mike 1.27 }
527
|
528 mike 1.111.2.1 //==============================================================================
529 //
530 // class String
531 //
532 //==============================================================================
|
533 david.dillard 1.105
|
534 mike 1.111.2.1 const String String::EMPTY;
535
536 String::String(const String& str, Uint32 n)
537 {
538 _check_bounds(n, str._rep->size);
539 _rep = StringRep::create(str._rep->data, n);
|
540 kumpf 1.39 }
|
541 mike 1.27
|
542 mike 1.111.2.1 String::String(const Char16* str)
|
543 kumpf 1.39 {
|
544 mike 1.111.2.1 _check_null_pointer(str);
545 _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
|
546 mike 1.27 }
547
|
548 mike 1.111.2.1 String::String(const Char16* str, Uint32 n)
|
549 mike 1.27 {
|
550 mike 1.111.2.1 _check_null_pointer(str);
551 _rep = StringRep::create((Uint16*)str, n);
|
552 mike 1.27 }
553
|
554 mike 1.111.2.1 String::String(const char* str)
|
555 mike 1.27 {
|
556 mike 1.111.2.1 _check_null_pointer(str);
557 _rep = StringRep::create(str, strlen(str));
|
558 mike 1.27 }
559
|
560 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
|
561 mike 1.111.2.2 String::String(const char* str, String::ASCII7Tag tag)
562 {
563 _check_null_pointer(str);
|
564 mike 1.111.2.4 _rep = StringRep::createASCII7(str, strlen(str));
|
565 mike 1.111.2.2 }
|
566 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
|
567 mike 1.111.2.2
|
568 mike 1.111.2.1 String::String(const char* str, Uint32 n)
|
569 mike 1.27 {
|
570 mike 1.111.2.1 _check_null_pointer(str);
571 _rep = StringRep::create(str, n);
572 }
|
573 david.dillard 1.105
|
574 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
|
575 mike 1.111.2.2 String::String(const char* str, size_t n, String::ASCII7Tag tag)
576 {
577 _check_null_pointer(str);
|
578 mike 1.111.2.4 _rep = StringRep::createASCII7(str, n);
|
579 mike 1.111.2.2 }
|
580 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
|
581 mike 1.111.2.2
|
582 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
|
583 mike 1.111.2.1 String::String(const String& s1, const String& s2)
584 {
585 size_t n1 = s1._rep->size;
586 size_t n2 = s2._rep->size;
587 size_t n = n1 + n2;
588 _rep = StringRep::alloc(n);
589 _copy(_rep->data, s1._rep->data, n1);
590 _copy(_rep->data + n1, s2._rep->data, n2);
591 _rep->size = n;
592 _rep->data[n] = '\0';
593 }
|
594 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
|
595 mike 1.111.2.1
|
596 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
|
597 mike 1.111.2.1 String::String(const String& s1, const char* s2)
598 {
599 _check_null_pointer(s2);
600 size_t n1 = s1._rep->size;
601 size_t n2 = strlen(s2);
602 _rep = StringRep::alloc(n1 + n2);
603 _copy(_rep->data, s1._rep->data, n1);
604 _rep->size = n1 + _convert((Uint16*)_rep->data + n1, s2, n2);
605 _rep->data[_rep->size] = '\0';
606 }
|
607 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
|
608 mike 1.111.2.1
|
609 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
|
610 mike 1.111.2.1 String::String(const char* s1, const String& s2)
611 {
612 _check_null_pointer(s1);
613 size_t n1 = strlen(s1);
614 size_t n2 = s2._rep->size;
615 _rep = StringRep::alloc(n1 + n2);
616 _rep->size = n2 + _convert((Uint16*)_rep->data, s1, n1);
617 _copy(_rep->data + n1, s2._rep->data, n2);
618 _rep->data[_rep->size] = '\0';
|
619 mike 1.27 }
|
620 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
|
621 mike 1.27
|
622 mike 1.111.2.1 String& String::assign(const String& str)
|
623 mike 1.27 {
|
624 mike 1.111.2.1 if (_rep != str._rep)
|
625 david.dillard 1.105 {
|
626 mike 1.111.2.1 StringRep::unref(_rep);
627 StringRep::ref(_rep = str._rep);
|
628 david.dillard 1.105 }
629
|
630 mike 1.27 return *this;
631 }
632
|
633 mike 1.111.2.1 String& String::assign(const Char16* str, Uint32 n)
|
634 chuck 1.102 {
|
635 mike 1.111.2.1 _check_null_pointer(str);
636
637 if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
|
638 david.dillard 1.105 {
|
639 mike 1.111.2.1 StringRep::unref(_rep);
640 _rep = StringRep::alloc(n);
|
641 david.dillard 1.105 }
642
|
643 mike 1.111.2.1 _rep->size = n;
644 _copy(_rep->data, (Uint16*)str, n);
645 _rep->data[n] = '\0';
646
|
647 chuck 1.102 return *this;
648 }
649
|
650 kumpf 1.39 String& String::assign(const char* str, Uint32 n)
|
651 mike 1.27 {
|
652 mike 1.111.2.1 _check_null_pointer(str);
653
654 if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
|
655 david.dillard 1.105 {
|
656 mike 1.111.2.1 StringRep::unref(_rep);
657 _rep = StringRep::alloc(n);
|
658 david.dillard 1.105 }
659
|
660 mike 1.111.2.1 _rep->size = _convert(_rep->data, str, n);
661 _rep->data[_rep->size] = 0;
662
|
663 mike 1.27 return *this;
664 }
665
|
666 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
|
667 mike 1.111.2.4 String& String::assignASCII7(const char* str, Uint32 n)
|
668 mike 1.111.2.2 {
669 _check_null_pointer(str);
670
671 if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
672 {
673 StringRep::unref(_rep);
674 _rep = StringRep::alloc(n);
675 }
676
677 _copy(_rep->data, str, n);
678 _rep->data[_rep->size = n] = 0;
679
680 return *this;
681 }
|
682 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
|
683 mike 1.111.2.2
|
684 kumpf 1.39 void String::clear()
685 {
|
686 mike 1.111.2.1 if (_rep->size)
687 {
688 if (Atomic_get(&_rep->refs) == 1)
689 _rep->size = 0;
690 else
691 {
692 StringRep::unref(_rep);
693 _rep = &StringRep::_empty_rep;
694 }
695 }
|
696 kumpf 1.39 }
697
|
698 mike 1.111.2.1 void String::reserveCapacity(Uint32 cap)
|
699 kumpf 1.39 {
|
700 mike 1.111.2.1 _reserve(_rep, cap);
|
701 kumpf 1.39 }
702
|
703 mike 1.111.2.1 CString String::getCString() const
704 {
705 #ifdef PEGASUS_STRING_NO_UTF8
706 char* str = (char*)operator new(_rep->size + 1);
707 _copy(str, _rep->data, _rep->size);
708 str[_rep->size] = '\0';
709 return CString(str);
|
710 gs.keenan 1.110 #else
|
711 mike 1.111.2.1 Uint32 n = 3 * _rep->size;
712 char* str = (char*)operator new(n + 1);
|
713 mike 1.111.2.2 size_t size = _copy_to_utf8(str, _rep->data, _rep->size);
|
714 mike 1.111.2.1 str[size] = '\0';
715 return CString(str);
|
716 gs.keenan 1.110 #endif
|
717 kumpf 1.39 }
718
|
719 mike 1.111.2.1 String& String::append(const Char16* str, Uint32 n)
|
720 kumpf 1.39 {
|
721 mike 1.111.2.1 _check_null_pointer(str);
|
722 kumpf 1.39
|
723 mike 1.111.2.1 size_t old_size = _rep->size;
724 size_t new_size = old_size + n;
725 _reserve(_rep, new_size);
726 _copy(_rep->data + old_size, (Uint16*)str, n);
727 _rep->size = new_size;
728 _rep->data[new_size] = '\0';
|
729 mike 1.27
|
730 mike 1.111.2.1 return *this;
|
731 mike 1.27 }
732
|
733 mike 1.111.2.1 String& String::append(const String& str)
|
734 mike 1.27 {
|
735 mike 1.111.2.1 return append((Char16*)str._rep->data, str._rep->size);
|
736 mike 1.27 }
737
|
738 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
|
739 mike 1.111.2.1 String& String::append(const char* str, Uint32 size)
|
740 kumpf 1.39 {
|
741 mike 1.111.2.1 _check_null_pointer(str);
742
743 size_t old_size = _rep->size;
744 size_t cap = old_size + size;
745
746 _reserve(_rep, cap);
747 _rep->size += _convert((Uint16*)_rep->data + old_size, str, size);
748 _rep->data[_rep->size] = '\0';
|
749 kumpf 1.39
|
750 mike 1.27 return *this;
751 }
|
752 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
|
753 mike 1.27
|
754 mike 1.111.2.1 void String::remove(Uint32 index, Uint32 n)
|
755 mike 1.27 {
|
756 mike 1.111.2.1 if (n == PEG_NOT_FOUND)
757 n = _rep->size - index;
|
758 mike 1.27
|
759 mike 1.111.2.1 _check_bounds(index + n, _rep->size);
760
761 if (Atomic_get(&_rep->refs) != 1)
762 _rep = StringRep::copy_on_write(_rep);
|
763 mike 1.27
|
764 mike 1.111.2.1 assert(index + n <= _rep->size);
|
765 mike 1.27
|
766 mike 1.111.2.1 size_t rem = _rep->size - (index + n);
767 Uint16* data = _rep->data;
768
769 if (rem)
770 memmove(data + index, data + index + n, rem * sizeof(Uint16));
771
772 _rep->size -= n;
773 data[_rep->size] = '\0';
|
774 mike 1.27 }
775
|
776 mike 1.111.2.1 String String::subString(Uint32 index, Uint32 n) const
|
777 mike 1.27 {
|
778 mike 1.111.2.5 // Note: this implementation is very permissive but used for
|
779 mike 1.111.2.1 // backwards compatibility.
780
781 if (index < _rep->size)
|
782 mike 1.27 {
|
783 mike 1.111.2.1 if (n == PEG_NOT_FOUND || n > _rep->size - index)
784 n = _rep->size - index;
|
785 mike 1.27
|
786 mike 1.111.2.1 return String((Char16*)_rep->data + index, n);
|
787 mike 1.27 }
|
788 david.dillard 1.105
789 return String();
|
790 mike 1.27 }
791
792 Uint32 String::find(Char16 c) const
793 {
|
794 mike 1.111.2.1 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
|
795 mike 1.27
|
796 mike 1.111.2.1 if (p)
797 return p - _rep->data;
|
798 mike 1.27
799 return PEG_NOT_FOUND;
800 }
801
|
802 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
|
803 mike 1.30 {
|
804 mike 1.111.2.1 _check_bounds(index, _rep->size);
|
805 mike 1.30
|
806 mike 1.111.2.1 if (index >= _rep->size)
807 return PEG_NOT_FOUND;
808
809 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
810
811 if (p)
812 return p - _rep->data;
|
813 mike 1.30
814 return PEG_NOT_FOUND;
815 }
816
|
817 mike 1.111.2.1 Uint32 String::_find_aux(const Char16* s, Uint32 n) const
|
818 mike 1.27 {
|
819 mike 1.111.2.1 _check_null_pointer(s);
|
820 mike 1.27
|
821 mike 1.111.2.1 const Uint16* data = _rep->data;
822 size_t rem = _rep->size;
|
823 mike 1.30
|
824 mike 1.111.2.1 while (n <= rem)
|
825 mike 1.27 {
|
826 mike 1.111.2.1 Uint16* p = (Uint16*)_find(data, rem, s[0]);
|
827 david.dillard 1.105
|
828 mike 1.111.2.1 if (!p)
829 break;
830
831 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
832 return p - _rep->data;
833
834 p++;
835 rem -= p - data;
836 data = p;
|
837 mike 1.27 }
|
838 mike 1.111.2.1
|
839 mike 1.27 return PEG_NOT_FOUND;
840 }
841
|
842 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
|
843 mike 1.111.2.1 Uint32 String::find(const char* s) const
844 {
845 _check_null_pointer(s);
846
|
847 mike 1.111.2.5 // Note: could optimize away creation of temporary, but this is rarely
848 // called.
|
849 mike 1.111.2.1 return find(String(s));
850 }
|
851 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
|
852 mike 1.111.2.1
|
853 mike 1.27 Uint32 String::reverseFind(Char16 c) const
854 {
|
855 mike 1.111.2.1 Uint16 x = c;
856 Uint16* p = _rep->data;
857 Uint16* q = _rep->data + _rep->size;
|
858 mike 1.27
|
859 mike 1.111.2.1 while (q != p)
|
860 mike 1.27 {
|
861 mike 1.111.2.1 if (*--q == x)
862 return q - p;
|
863 mike 1.27 }
864
865 return PEG_NOT_FOUND;
866 }
867
868 void String::toLower()
869 {
|
870 chuck 1.111.2.14 #ifdef PEGASUS_HAS_ICU
|
871 mike 1.111.2.1
|
872 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
873 david 1.90 {
|
874 chuck 1.111.2.12 if (Atomic_get(&_rep->refs) != 1)
875 _rep = StringRep::copy_on_write(_rep);
876
877 // This will do a locale-insensitive, but context-sensitive convert.
878 // Since context-sensitive casing looks at adjacent chars, this
879 // prevents optimizations where the us-ascii is converted before
880 // calling ICU.
881 // The string may shrink or expand after the convert.
882
|
883 mike 1.111.2.1 //// First calculate size of resulting string. u_strToLower() returns
884 //// only the size when zero is passed as the destination size argument.
885
|
886 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
887
|
888 mike 1.111.2.1 int32_t new_size = u_strToLower(
889 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
|
890 chuck 1.111.2.12
891 err = U_ZERO_ERROR;
|
892 chuck 1.99
|
893 mike 1.111.2.1 //// Reserve enough space for the result.
894
895 if ((Uint32)new_size > _rep->cap)
896 _reserve(_rep, new_size);
897
898 //// Perform the conversion (overlapping buffers are allowed).
899
900 u_strToLower((UChar*)_rep->data, new_size,
901 (UChar*)_rep->data, _rep->size, NULL, &err);
|
902 yi.zhou 1.108
|
903 mike 1.111.2.1 _rep->size = new_size;
|
904 mike 1.111.2.13 return;
|
905 david 1.90 }
|
906 mike 1.111.2.1
|
907 chuck 1.111.2.14 #endif /* PEGASUS_HAS_ICU */
|
908 mike 1.111.2.1
909 if (Atomic_get(&_rep->refs) != 1)
910 _rep = StringRep::copy_on_write(_rep);
911
912 Uint16* p = _rep->data;
913 size_t n = _rep->size;
914
915 for (; n--; p++)
|
916 david 1.90 {
|
917 mike 1.111.2.1 if (!(*p & 0xFF00))
918 *p = _to_lower(*p);
|
919 mike 1.27 }
|
920 kumpf 1.39 }
921
|
922 chuck 1.99 void String::toUpper()
|
923 david 1.90 {
|
924 chuck 1.111.2.14 #ifdef PEGASUS_HAS_ICU
|
925 mike 1.111.2.1
|
926 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
927 chuck 1.99 {
|
928 chuck 1.111.2.12 if (Atomic_get(&_rep->refs) != 1)
|
929 mike 1.111.2.13 _rep = StringRep::copy_on_write(_rep);
|
930 chuck 1.111.2.12
931 // This will do a locale-insensitive, but context-sensitive convert.
932 // Since context-sensitive casing looks at adjacent chars, this
933 // prevents optimizations where the us-ascii is converted before
934 // calling ICU.
935 // The string may shrink or expand after the convert.
936
|
937 mike 1.111.2.1 //// First calculate size of resulting string. u_strToUpper() returns
938 //// only the size when zero is passed as the destination size argument.
939
|
940 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
941
|
942 mike 1.111.2.1 int32_t new_size = u_strToUpper(
943 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
|
944 chuck 1.99
|
945 chuck 1.111.2.12 err = U_ZERO_ERROR;
946
|
947 mike 1.111.2.1 //// Reserve enough space for the result.
|
948 yi.zhou 1.108
|
949 mike 1.111.2.1 if ((Uint32)new_size > _rep->cap)
950 _reserve(_rep, new_size);
|
951 david 1.90
|
952 mike 1.111.2.1 //// Perform the conversion (overlapping buffers are allowed).
|
953 kumpf 1.39
|
954 mike 1.111.2.1 u_strToUpper((UChar*)_rep->data, new_size,
955 (UChar*)_rep->data, _rep->size, NULL, &err);
|
956 mike 1.27
|
957 mike 1.111.2.1 _rep->size = new_size;
|
958 chuck 1.111.2.12
|
959 mike 1.111.2.13 return;
|
960 mike 1.27 }
961
|
962 chuck 1.111.2.14 #endif /* PEGASUS_HAS_ICU */
|
963 mike 1.111.2.1
964 if (Atomic_get(&_rep->refs) != 1)
965 _rep = StringRep::copy_on_write(_rep);
966
967 Uint16* p = _rep->data;
968 size_t n = _rep->size;
969
970 for (; n--; p++)
971 *p = _to_upper(*p);
|
972 mike 1.27 }
973
|
974 mike 1.111.2.1 int String::compare(const String& s1, const String& s2, Uint32 n)
|
975 mike 1.30 {
|
976 mike 1.111.2.1 assert(n <= s1._rep->size);
977 assert(n <= s2._rep->size);
|
978 kumpf 1.43
|
979 mike 1.111.2.1 // Ignoring error in which n is greater than s1.size() or s2.size()
980 return _compare(s1._rep->data, s2._rep->data, n);
981 }
|
982 mike 1.30
|
983 mike 1.111.2.1 int String::compare(const String& s1, const String& s2)
984 {
985 return _compare(s1._rep->data, s2._rep->data);
986 }
|
987 mike 1.30
|
988 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
|
989 mike 1.111.2.1 int String::compare(const String& s1, const char* s2)
990 {
991 _check_null_pointer(s2);
|
992 mike 1.30
|
993 mike 1.111.2.1 #ifdef PEGASUS_STRING_NO_UTF8
994 return _compare_no_utf8(s1._rep->data, s2);
995 #else
996 // ATTN: optimize this!
997 return String::compare(s1, String(s2));
998 #endif
|
999 mike 1.30 }
|
1000 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
|
1001 mike 1.30
|
1002 mike 1.111.2.1 int String::compareNoCase(const String& str1, const String& str2)
|
1003 kumpf 1.40 {
|
1004 chuck 1.111.2.14 #ifdef PEGASUS_HAS_ICU
|
1005 mike 1.111.2.1
|
1006 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1007 {
|
1008 mike 1.111.2.1 return u_strcasecmp(
1009 str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
|
1010 yi.zhou 1.108 }
|
1011 kumpf 1.40
|
1012 chuck 1.111.2.14 #endif /* PEGASUS_HAS_ICU */
|
1013 kumpf 1.40
|
1014 mike 1.111.2.1 const Uint16* s1 = str1._rep->data;
1015 const Uint16* s2 = str2._rep->data;
1016
1017 while (*s1 && *s2)
1018 {
1019 int r = _to_lower(*s1++) - _to_lower(*s2++);
|
1020 kumpf 1.40
|
1021 david.dillard 1.105 if (r)
1022 return r;
|
1023 kumpf 1.40 }
1024
|
1025 mike 1.111.2.1 if (*s2)
|
1026 david.dillard 1.105 return -1;
|
1027 mike 1.111.2.1 else if (*s1)
|
1028 david.dillard 1.105 return 1;
|
1029 kumpf 1.40
1030 return 0;
1031 }
1032
|
1033 mike 1.111.2.1 Boolean String::equalNoCase_aux(const String& s1, const String& s2)
|
1034 mike 1.27 {
|
1035 chuck 1.111.2.14 #ifdef PEGASUS_HAS_ICU
|
1036 mike 1.27
|
1037 mike 1.111.2.1 return String::compareNoCase(s1, s2) == 0;
|
1038 kumpf 1.39
|
1039 chuck 1.111.2.14 #else /* PEGASUS_HAS_ICU */
|
1040 kumpf 1.39
|
1041 mike 1.111.2.1 Uint16* p = (Uint16*)s1._rep->data;
1042 Uint16* q = (Uint16*)s2._rep->data;
1043 Uint32 n = s2._rep->size;
1044
1045 while (n >= 8)
1046 {
1047 if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) ||
1048 ((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) ||
1049 ((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) ||
1050 ((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3]))) ||
1051 ((p[4] - q[4]) && (_to_upper(p[4]) - _to_upper(q[4]))) ||
1052 ((p[5] - q[5]) && (_to_upper(p[5]) - _to_upper(q[5]))) ||
1053 ((p[6] - q[6]) && (_to_upper(p[6]) - _to_upper(q[6]))) ||
1054 ((p[7] - q[7]) && (_to_upper(p[7]) - _to_upper(q[7]))))
1055 {
1056 return false;
1057 }
1058
1059 n -= 8;
1060 p += 8;
1061 q += 8;
1062 mike 1.111.2.1 }
1063
1064 while (n >= 4)
1065 {
1066 if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) ||
1067 ((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) ||
1068 ((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) ||
1069 ((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3]))))
1070 {
1071 return false;
1072 }
1073
1074 n -= 4;
1075 p += 4;
1076 q += 4;
1077 }
|
1078 mike 1.27
|
1079 kumpf 1.39 while (n--)
1080 {
|
1081 mike 1.111.2.1 if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))))
1082 return false;
1083
1084 p++;
1085 q++;
|
1086 kumpf 1.39 }
|
1087 mike 1.28
|
1088 kumpf 1.39 return true;
|
1089 david 1.69
|
1090 chuck 1.111.2.14 #endif /* PEGASUS_HAS_ICU */
|
1091 mike 1.111.2.1 }
|
1092 mike 1.27
|
1093 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
|
1094 mike 1.111.2.1 Boolean String::equalNoCase(const String& s1, const char* s2)
|
1095 david 1.69 {
|
1096 mike 1.111.2.1 _check_null_pointer(s2);
|
1097 david 1.69
|
1098 chuck 1.111.2.14 #if defined(PEGASUS_HAS_ICU)
|
1099 david.dillard 1.105
|
1100 mike 1.111.2.1 return String::equalNoCase(s1, String(s2));
|
1101 david 1.71
|
1102 mike 1.111.2.1 #elif defined(PEGASUS_STRING_NO_UTF8)
|
1103 kumpf 1.42
|
1104 mike 1.111.2.1 const Uint16* p1 = (Uint16*)s1._rep->data;
1105 const char* p2 = s2;
1106 size_t n = s1._rep->size;
|
1107 karl 1.36
|
1108 mike 1.111.2.1 while (n--)
1109 {
1110 if (!*p2)
1111 return false;
|
1112 david.dillard 1.105
|
1113 mike 1.111.2.1 if (_to_upper(*p1++) != CharSet::to_upper(int(*p2++)))
1114 return false;
1115 }
1116
1117 return true;
|
1118 david.dillard 1.105
|
1119 chuck 1.111.2.14 #else /* PEGASUS_HAS_ICU */
|
1120 chuck 1.78
|
1121 mike 1.111.2.1 // ATTN: optimize this!
1122 return String::equalNoCase(s1, String(s2));
|
1123 david.dillard 1.105
|
1124 chuck 1.111.2.14 #endif /* PEGASUS_HAS_ICU */
|
1125 mike 1.111.2.1 }
|
1126 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
|
1127 karl 1.36
|
1128 mike 1.111.2.1 Boolean String::equal(const String& s1, const String& s2)
|
1129 karl 1.36 {
|
1130 mike 1.111.2.1 return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,
1131 s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
|
1132 karl 1.36 }
1133
|
1134 mike 1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
|
1135 mike 1.111.2.1 Boolean String::equal(const String& s1, const char* s2)
|
1136 karl 1.36 {
|
1137 mike 1.111.2.1 #ifdef PEGASUS_STRING_NO_UTF8
|
1138 kumpf 1.39
|
1139 mike 1.111.2.1 _check_null_pointer(s2);
|
1140 kumpf 1.39
|
1141 mike 1.111.2.1 const Uint16* p = (Uint16*)s1._rep->data;
1142 const char* q = s2;
|
1143 kumpf 1.39
|
1144 mike 1.111.2.1 while (*p && *q)
1145 {
1146 if (*p++ != Uint16(*q++))
1147 return false;
1148 }
|
1149 kumpf 1.39
|
1150 mike 1.111.2.1 return !(*p || *q);
|
1151 kumpf 1.39
|
1152 mike 1.111.2.1 #else /* PEGASUS_STRING_NO_UTF8 */
|
1153 kumpf 1.39
|
1154 mike 1.111.2.1 return String::equal(s1, String(s2));
1155
1156 #endif /* PEGASUS_STRING_NO_UTF8 */
|
1157 kumpf 1.39 }
|
1158 mike 1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
|
1159 kumpf 1.39
|
1160 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
|
1161 kumpf 1.39 {
|
1162 david 1.69 #if defined(PEGASUS_OS_OS400)
|
1163 mike 1.111.2.1
|
1164 david 1.93 CString cstr = str.getCString();
|
1165 david 1.69 const char* utf8str = cstr;
1166 os << utf8str;
|
1167 chuck 1.111.2.12 return os;
1168 #else
|
1169 david 1.69
|
1170 chuck 1.111.2.14 #if defined(PEGASUS_HAS_ICU)
|
1171 mike 1.111.2.1
|
1172 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1173 {
|
1174 david.dillard 1.105 char *buf = NULL;
1175 const int size = str.size() * 6;
|
1176 mike 1.111.2.1 UnicodeString UniStr(
1177 (const UChar *)str.getChar16Data(), (int32_t)str.size());
|
1178 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf);
1179 buf = new char[bufsize+1];
1180 UniStr.extract(0,bufsize,buf);
1181 os << buf;
1182 os.flush();
1183 delete [] buf;
|
1184 chuck 1.111.2.12 return os;
|
1185 yi.zhou 1.108 }
|
1186 mike 1.111.2.1
|
1187 chuck 1.111.2.14 #endif // PEGASUS_HAS_ICU
|
1188 mike 1.111.2.1
1189 for (Uint32 i = 0, n = str.size(); i < n; i++)
|
1190 yi.zhou 1.108 {
|
1191 chuck 1.111.2.12 Uint16 code = str[i];
|
1192 david.dillard 1.105
|
1193 chuck 1.111.2.12 if (code > 0 && !(code & 0xFF00))
1194 os << char(code);
1195 else
1196 {
1197 // Print in hex format:
1198 char buffer[8];
1199 sprintf(buffer, "\\x%04X", code);
1200 os << buffer;
1201 }
|
1202 yi.zhou 1.108 }
|
1203 kumpf 1.39
1204 return os;
|
1205 chuck 1.111.2.12 #endif // PEGASUS_OS_OS400
|
1206 kumpf 1.39 }
1207
|
1208 mike 1.111.2.1 void String::_append_char_aux()
|
1209 kumpf 1.39 {
|
1210 mike 1.111.2.1 StringRep* tmp;
|
1211 kumpf 1.39
|
1212 mike 1.111.2.1 if (_rep->cap)
1213 {
1214 tmp = StringRep::alloc(2 * _rep->cap);
1215 tmp->size = _rep->size;
1216 _copy(tmp->data, _rep->data, _rep->size);
1217 }
1218 else
1219 {
1220 tmp = StringRep::alloc(8);
1221 tmp->size = 0;
1222 }
|
1223 kumpf 1.39
|
1224 mike 1.111.2.1 StringRep::unref(_rep);
1225 _rep = tmp;
|
1226 kumpf 1.39 }
1227
|
1228 mike 1.111.2.1 PEGASUS_NAMESPACE_END
|
1229 kumpf 1.39
|
1230 mike 1.111.2.1 /*
1231 ================================================================================
|
1232 kumpf 1.39
|
1233 mike 1.111.2.1 String optimizations:
1234
1235 1. Added mechanism allowing certain functions to be inlined only when
1236 used by internal Pegasus modules. External modules (i.e., providers)
1237 link to a non-inline version, which allows for binary compatibility.
1238
1239 2. Implemented copy-on-write with atomic increment/decrement. This
1240 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1241 for the 'ni1000' benchmark.
1242
1243 3. Employed loop unrolling in several places. For example, see:
1244
1245 static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1246
1247 4. Used the "empty-rep" optimization (described in whitepaper from the
1248 GCC Developers Summit). This reduced default construction to a simple
1249 pointer assignment.
1250
1251 inline String::String() : _rep(&_empty_rep) { }
1252
1253 5. Implemented Uint16 versions of toupper() and tolower() using tables.
1254 mike 1.111.2.1 For example:
1255
1256 static const char _upper[] =
1257 {
1258 0,1,2,...255
1259 };
1260
1261 inline Uint16 _to_upper(Uint16 x)
1262 {
1263 return (x & 0xFF00) ? x : _upper[x];
1264 }
1265
1266 This outperforms the system implementation by avoiding an anding
1267 operation.
1268
1269 6. Implemented char* version of the following member functions to
1270 eliminate unecessary creation of anonymous string objects
1271 (temporaries).
1272
1273 String(const String& s1, const char* s2);
1274 String(const char* s1, const String& s2);
1275 mike 1.111.2.1 String& String::operator=(const char* str);
1276 Uint32 String::find(const char* s) const;
1277 bool String::equal(const String& s1, const char* s2);
1278 static int String::compare(const String& s1, const char* s2);
1279 String& String::append(const char* str);
1280 String& String::append(const char* str, Uint32 size);
1281 static bool String::equalNoCase(const String& s1, const char* s2);
1282 String& operator=(const char* str)
1283 String& String::assign(const char* str)
1284 String& String::append(const char* str)
1285 Boolean operator==(const String& s1, const char* s2)
1286 Boolean operator==(const char* s1, const String& s2)
1287 Boolean operator!=(const String& s1, const char* s2)
1288 Boolean operator!=(const char* s1, const String& s2)
1289 Boolean operator<(const String& s1, const char* s2)
1290 Boolean operator<(const char* s1, const String& s2)
1291 Boolean operator>(const String& s1, const char* s2)
1292 Boolean operator>(const char* s1, const String& s2)
1293 Boolean operator<=(const String& s1, const char* s2)
1294 Boolean operator<=(const char* s1, const String& s2)
1295 Boolean operator>=(const String& s1, const char* s2)
1296 mike 1.111.2.1 Boolean operator>=(const char* s1, const String& s2)
1297 String operator+(const String& s1, const char* s2)
1298 String operator+(const char* s1, const String& s2)
1299
1300 7. Optimized _next_pow_2(), used in rounding the capacity to the next
1301 power of two (algorithm from the book "Hacker's Delight").
1302
1303 static Uint32 _next_pow_2(Uint32 x)
1304 {
1305 if (x < 8)
1306 return 8;
1307
1308 x--;
1309 x |= (x >> 1);
1310 x |= (x >> 2);
1311 x |= (x >> 4);
1312 x |= (x >> 8);
1313 x |= (x >> 16);
1314 x++;
1315
1316 return x;
1317 mike 1.111.2.1 }
1318
1319 8. Implemented "concatenating constructors" to eliminate temporaries
1320 created by operator+(). This scheme employs the "return-value
1321 optimization" described by Stan Lippman.
1322
1323 inline String operator+(const String& s1, const String& s2)
1324 {
1325 return String(s1, s2, 0);
1326 }
1327
1328 9. Experimented to find the optimial initial size for a short string.
|
1329 mike 1.111.2.13 Eight seems to offer the best tradeoff between space and time.
|
1330 mike 1.111.2.1
1331 10. Inlined all members of the Char16 class.
1332
1333 11. Used Uint16 internally in the String class. This showed no improvememnt
1334 since Char16 was already fully inlined and was essentially reduced to
1335 Uint16 in any case.
1336
1337 12. Implemented conditional logic (#if) allowing error checking logic to
1338 be excluded to better performance. Examples include bounds checking
1339 and null-pointer checking.
1340
1341 13. Used memcpy() and memcmp() where possible. These are implemented using
1342 the rep family of intructions under Intel and are much faster.
1343
|
1344 mike 1.111.2.2 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1345 copy routine overhead.
1346
1347 15. Added ASCII7 form of the constructor and assign().
1348
1349 String s("hello world", String::ASCII7);
1350
|
1351 mike 1.111.2.4 s.assignASCII7("hello world");
|
1352 mike 1.111.2.2
|
1353 mike 1.111.2.6 This avoids slower UTF8 processing when not needed.
|
1354 mike 1.111.2.2
|
1355 mike 1.111.2.1 ================================================================================
1356 */
|