1 karl 1.98 //%2005////////////////////////////////////////////////////////////////////////
|
2 mike 1.27 //
|
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 mike 1.27 //
12 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
14 // deal in the Software without restriction, including without limitation the
15 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
16 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is
17 // furnished to do so, subject to the following conditions:
|
18 david.dillard 1.105 //
|
19 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
20 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
21 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
22 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
23 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
25 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28 //==============================================================================
29 //
30 // Author: Mike Brasher (mbrasher@bmc.com)
31 //
|
32 mike 1.111.6.1 // Modified By:
33 // Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
34 // Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
35 // David Dillard, VERITAS Software Corp. (david.dillard@veritas.com)
36 // Mike Brasher (mike-brasher@austin.rr.com)
|
37 mike 1.27 //
38 //%/////////////////////////////////////////////////////////////////////////////
39
|
40 mike 1.111.6.1 #include <cassert>
|
41 kumpf 1.48 #include "InternalException.h"
|
42 david 1.69 #include "CommonUTF.h"
|
43 mike 1.111.6.1 #include "MessageLoader.h"
|
44 mike 1.111.6.2 #include "StringRep.h"
|
45 david 1.69
46 #ifdef PEGASUS_HAS_ICU
|
47 chuck 1.99 #include <unicode/ustring.h>
48 #include <unicode/uchar.h>
|
49 david 1.69 #endif
50
|
51 mike 1.27 PEGASUS_NAMESPACE_BEGIN
52
|
53 mike 1.111.6.1 //==============================================================================
54 //
55 // Compile-time macros (undefined by default).
56 //
57 // PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
58 //
59 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
60 //
61 //==============================================================================
62
63 //==============================================================================
|
64 kumpf 1.39 //
|
65 mike 1.111.6.1 // File-scope definitions:
|
66 kumpf 1.54 //
|
67 mike 1.111.6.1 //==============================================================================
68
|
69 mike 1.111.6.6 const Uint8 _toUpperTable[256] =
|
70 mike 1.111.6.1 {
71 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
72 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
73 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
74 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
75 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
76 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
77 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
78 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
79 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
80 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
81 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
82 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
83 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
84 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
85 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
86 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
87 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
88 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
89 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
90 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
91 mike 1.111.6.1 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
92 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
93 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
94 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
95 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
96 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
97 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
98 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
99 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
100 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
101 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
102 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
103 };
104
|
105 mike 1.111.6.6 const Uint8 _toLowerTable[256] =
|
106 mike 1.111.6.1 {
107 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
108 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
109 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
110 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
111 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
112 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
113 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
114 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
115 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
116 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
117 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
118 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
119 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
120 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
121 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
122 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
123 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
124 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
125 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
126 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
127 mike 1.111.6.1 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
128 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
129 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
130 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
131 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
132 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
133 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
134 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
135 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
136 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
137 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
138 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
139 };
|
140 kumpf 1.54
|
141 mike 1.111.6.1 // Converts 16-bit characters to upper case.
|
142 mike 1.111.6.6 inline Uint16 _toUpper(Uint16 x)
|
143 kumpf 1.54 {
|
144 mike 1.111.6.6 return (x & 0xFF00) ? x : _toUpperTable[x];
|
145 kumpf 1.54 }
146
|
147 mike 1.111.6.1 // Converts 16-bit characters to lower case.
|
148 mike 1.111.6.6 inline Uint16 _toLower(Uint16 x)
|
149 kumpf 1.54 {
|
150 mike 1.111.6.6 return (x & 0xFF00) ? x : _toLowerTable[x];
|
151 mike 1.111.6.1 }
|
152 kumpf 1.82
|
153 mike 1.111.6.6 // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
|
154 mike 1.111.6.5 static Uint32 _roundUpToPow2(Uint32 x)
|
155 mike 1.111.6.1 {
|
156 mike 1.111.6.6 if (x > 0x0FFFFFFF)
157 throw PEGASUS_STD(bad_alloc)();
158
|
159 mike 1.111.6.1 if (x < 8)
160 return 8;
161
162 x--;
163 x |= (x >> 1);
164 x |= (x >> 2);
165 x |= (x >> 4);
166 x |= (x >> 8);
167 x |= (x >> 16);
168 x++;
169
170 return x;
171 }
172
173 template<class P, class Q>
174 static void _copy(P* p, const Q* q, size_t n)
175 {
176 // Use loop unrolling.
177
178 while (n >= 8)
|
179 kumpf 1.82 {
|
180 mike 1.111.6.1 p[0] = q[0];
181 p[1] = q[1];
182 p[2] = q[2];
183 p[3] = q[3];
184 p[4] = q[4];
185 p[5] = q[5];
186 p[6] = q[6];
187 p[7] = q[7];
188 p += 8;
189 q += 8;
190 n -= 8;
|
191 kumpf 1.82 }
|
192 mike 1.111.6.1
193 while (n >= 4)
194 {
195 p[0] = q[0];
196 p[1] = q[1];
197 p[2] = q[2];
198 p[3] = q[3];
199 p += 4;
200 q += 4;
201 n -= 4;
202 }
203
204 while (n--)
205 *p++ = *q++;
|
206 kumpf 1.54 }
207
|
208 mike 1.111.6.1 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
|
209 kumpf 1.54 {
|
210 mike 1.111.6.1 while (n >= 4)
211 {
212 if (s[0] == c)
213 return (Uint16*)s;
214 if (s[1] == c)
215 return (Uint16*)&s[1];
216 if (s[2] == c)
217 return (Uint16*)&s[2];
218 if (s[3] == c)
219 return (Uint16*)&s[3];
220
221 n -= 4;
222 s += 4;
223 }
224
225 if (n)
226 {
227 if (*s == c)
228 return (Uint16*)s;
229 s++;
230 n--;
231 mike 1.111.6.1 }
232
233 if (n)
234 {
235 if (*s == c)
236 return (Uint16*)s;
237 s++;
238 n--;
239 }
240
241 if (n && *s == c)
242 return (Uint16*)s;
243
244 // Not found!
245 return 0;
|
246 kumpf 1.54 }
247
|
248 mike 1.111.6.1 static int _compare(const Uint16* s1, const Uint16* s2)
|
249 kumpf 1.54 {
|
250 mike 1.111.6.1 while (*s1 && *s2)
|
251 kumpf 1.82 {
|
252 mike 1.111.6.1 int r = *s1++ - *s2++;
253
254 if (r)
255 return r;
|
256 kumpf 1.82 }
|
257 mike 1.111.6.1
258 if (*s2)
259 return -1;
260 else if (*s1)
261 return 1;
262
263 return 0;
|
264 kumpf 1.54 }
265
|
266 mike 1.111.6.6 static int _compareNoUTF8(const Uint16* s1, const char* s2)
|
267 kumpf 1.56 {
|
268 mike 1.111.6.1 Uint16 c1;
269 Uint16 c2;
270
271 do
|
272 kumpf 1.81 {
|
273 mike 1.111.6.1 c1 = *s1++;
274 c2 = *s2++;
275
276 if (c1 == 0)
277 return c1 - c2;
|
278 kumpf 1.81 }
|
279 mike 1.111.6.1 while (c1 == c2);
280
281 return c1 - c2;
|
282 kumpf 1.56 }
283
|
284 mike 1.111.6.1 static int _compare(const Uint16* s1, const Uint16* s2, size_t n)
|
285 kumpf 1.54 {
|
286 mike 1.111.6.1 // This should only be called when s1 and s2 have the same length.
287
288 while (n-- && (*s1++ - *s2++) == 0)
289 ;
290
291 //
292
293 return s1[-1] - s2[-1];
|
294 kumpf 1.54 }
295
|
296 mike 1.111.6.1 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
297 {
298 memcpy(s1, s2, n * sizeof(Uint16));
299 }
|
300 kumpf 1.39
|
301 mike 1.111.6.6 void StrinThrowOutOfBounds()
|
302 mike 1.111.6.1 {
303 throw IndexOutOfBoundsException();
304 }
|
305 mike 1.27
|
306 mike 1.111.6.1 #ifdef PEGASUS_STRING_NO_THROW
|
307 mike 1.111.6.6 # define _checkNullPointer(ARG) /* empty */
|
308 mike 1.111.6.1 #else
309 template<class T>
|
310 mike 1.111.6.6 inline void _checkNullPointer(const T* ptr)
|
311 mike 1.27 {
|
312 mike 1.111.6.1 if (!ptr)
313 throw NullPointer();
314 }
315 #endif
316
|
317 mike 1.111.6.6 static size_t _copyFromUTF8(Uint16* dest, const char* src, size_t n)
|
318 mike 1.111.6.1 {
319 Uint16* p = dest;
320 const Uint8* q = (const Uint8*)src;
|
321 mike 1.27
|
322 mike 1.111.6.1 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
323 // Use loop-unrolling.
|
324 mike 1.27
|
325 mike 1.111.6.1 while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
326 {
327 p[0] = q[0];
328 p[1] = q[1];
329 p[2] = q[2];
330 p[3] = q[3];
331 p[4] = q[4];
332 p[5] = q[5];
333 p[6] = q[6];
334 p[7] = q[7];
335 p += 8;
336 q += 8;
337 n -= 8;
338 }
339
340 while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
341 {
342 p[0] = q[0];
343 p[1] = q[1];
344 p[2] = q[2];
345 p[3] = q[3];
346 mike 1.111.6.1 p += 4;
347 q += 4;
348 n -= 4;
349 }
350
351 switch (n)
352 {
353 case 0:
354 return p - dest;
355 case 1:
356 if (q[0] < 128)
357 {
358 p[0] = q[0];
359 return p + 1 - dest;
360 }
361 break;
362 case 2:
363 if (((q[0]|q[1]) & 0x80) == 0)
364 {
365 p[0] = q[0];
366 p[1] = q[1];
367 mike 1.111.6.1 return p + 2 - dest;
368 }
369 break;
370 case 3:
371 if (((q[0]|q[1]|q[2]) & 0x80) == 0)
372 {
373 p[0] = q[0];
374 p[1] = q[1];
375 p[2] = q[2];
376 return p + 3 - dest;
377 }
378 break;
379 }
380
381 // Process remaining characters.
382
383 while (n)
384 {
385 // Optimize for 7-bit ASCII case.
386
387 if (*q < 128)
388 mike 1.111.6.1 {
389 *p++ = *q++;
390 n--;
391 }
392 else
393 {
394 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
395
396 if (c > n || !isValid_U8(q, c) ||
397 UTF8toUTF16(&q, q + c, &p, p + n) != 0)
398 {
399 MessageLoaderParms parms("Common.String.BAD_UTF8",
400 "The byte sequence starting at index $0 "
401 "is not valid UTF-8 encoding.",
402 q - (const Uint8*)src);
403 throw Exception(parms);
404 }
405
406 n -= c;
407 }
408 }
409 mike 1.111.6.1
410 return p - dest;
411 }
412
413 // Note: dest must be at least three times src (plus an extra byte for
414 // terminator).
|
415 mike 1.111.6.6 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
|
416 mike 1.111.6.1 {
417 const Uint16* q = src;
418 Uint8* p = (Uint8*)dest;
419
420 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
421 {
422 p[0] = q[0];
423 p[1] = q[1];
424 p[2] = q[2];
425 p[3] = q[3];
426 p += 4;
427 q += 4;
428 n -= 4;
429 }
430
431 switch (n)
432 {
433 case 0:
434 return p - (Uint8*)dest;
435 case 1:
436 if (q[0] < 128)
437 mike 1.111.6.1 {
438 p[0] = q[0];
439 return p + 1 - (Uint8*)dest;
440 }
441 break;
442 case 2:
443 if (q[0] < 128 && q[1] < 128)
444 {
445 p[0] = q[0];
446 p[1] = q[1];
447 return p + 2 - (Uint8*)dest;
448 }
449 break;
450 case 3:
451 if (q[0] < 128 && q[1] < 128 && q[2] < 128)
452 {
453 p[0] = q[0];
454 p[1] = q[1];
455 p[2] = q[2];
456 return p + 3 - (Uint8*)dest;
457 }
458 mike 1.111.6.1 break;
459 }
460
461 // If this line was reached, there must be characters greater than 128.
462
463 UTF16toUTF8(&q, q + n, &p, p + 3 * n);
|
464 mike 1.27
|
465 mike 1.111.6.1 return p - (Uint8*)dest;
466 }
467
468 static inline size_t _convert(Uint16* p, const char* q, size_t n)
469 {
470 #ifdef PEGASUS_STRING_NO_UTF8
471 _copy(p, q, n);
|
472 mike 1.27 return n;
|
473 mike 1.111.6.1 #else
|
474 mike 1.111.6.6 return _copyFromUTF8(p, q, n);
|
475 mike 1.111.6.1 #endif
|
476 mike 1.27 }
477
|
478 mike 1.111.6.1 //==============================================================================
|
479 chuck 1.102 //
|
480 mike 1.111.6.1 // class CString
|
481 chuck 1.102 //
|
482 mike 1.111.6.1 //==============================================================================
483
484 CString::CString(const CString& cstr) : _rep(0)
|
485 chuck 1.102 {
|
486 mike 1.111.6.1 if (cstr._rep)
|
487 chuck 1.102 {
|
488 mike 1.111.6.1 size_t n = strlen(cstr._rep) + 1;
489 _rep = (char*)operator new(n);
490 memcpy(_rep, cstr._rep, n);
491 }
492 }
493
494 CString& CString::operator=(const CString& cstr)
495 {
496 if (&cstr != this)
497 {
498 if (_rep)
|
499 david.dillard 1.105 {
|
500 mike 1.111.6.1 operator delete(_rep);
501 _rep = 0;
|
502 chuck 1.102 }
|
503 mike 1.111.6.1
504 if (cstr._rep)
|
505 chuck 1.102 {
|
506 mike 1.111.6.1 size_t n = strlen(cstr._rep) + 1;
507 _rep = (char*)operator new(n);
508 memcpy(_rep, cstr._rep, n);
|
509 chuck 1.102 }
|
510 mike 1.111.6.1 }
|
511 chuck 1.102
|
512 mike 1.111.6.1 return *this;
513 }
514
515 //==============================================================================
516 //
517 // class StringRep
518 //
519 //==============================================================================
520
|
521 mike 1.111.6.6 StringRep StringRep::_emptyRep;
|
522 chuck 1.102
|
523 mike 1.111.6.1 inline StringRep* StringRep::alloc(size_t cap)
524 {
|
525 mike 1.111.6.6 // Any string bigger than this is seriously suspect.
526 if (cap > 0x0FFFFFFF)
527 throw PEGASUS_STD(bad_alloc)();
528
|
529 mike 1.111.6.1 StringRep* rep = (StringRep*)::operator new(
530 sizeof(StringRep) + cap * sizeof(Uint16));
531 rep->cap = cap;
532 Atomic_create(&rep->refs, 1);
533
534 return rep;
|
535 chuck 1.102 }
536
|
537 mike 1.111.6.1 static inline void _reserve(StringRep*& rep, Uint32 cap)
|
538 kumpf 1.43 {
|
539 mike 1.111.6.1 if (cap > rep->cap || Atomic_get(&rep->refs) != 1)
540 {
|
541 mike 1.111.6.5 size_t n = _roundUpToPow2(cap);
|
542 mike 1.111.6.6 StringRep* newRep = StringRep::alloc(n);
543 newRep->size = rep->size;
544 _copy(newRep->data, rep->data, rep->size + 1);
|
545 mike 1.111.6.1 StringRep::unref(rep);
|
546 mike 1.111.6.6 rep = newRep;
|
547 mike 1.111.6.1 }
548 }
|
549 kumpf 1.43
|
550 mike 1.111.6.1 StringRep* StringRep::create(const Uint16* data, size_t size)
551 {
552 StringRep* rep = StringRep::alloc(size);
553 rep->size = size;
554 _copy(rep->data, data, size);
555 rep->data[size] = '\0';
556 return rep;
557 }
558
|
559 mike 1.111.6.6 StringRep* StringRep::copyOnWrite(StringRep* rep)
|
560 mike 1.111.6.1 {
561 // Return a new copy of rep. Release rep.
562
|
563 mike 1.111.6.6 StringRep* newRep = StringRep::alloc(rep->size);
564 newRep->size = rep->size;
565 _copy(newRep->data, rep->data, rep->size);
566 newRep->data[newRep->size] = '\0';
|
567 mike 1.111.6.1 StringRep::unref(rep);
|
568 mike 1.111.6.6 return newRep;
|
569 mike 1.111.6.1 }
|
570 kumpf 1.43
|
571 mike 1.111.6.1 StringRep* StringRep::create(const char* data, size_t size)
|
572 mike 1.27 {
|
573 mike 1.111.6.1 StringRep* rep = StringRep::alloc(size);
574 rep->size = _convert((Uint16*)rep->data, data, size);
575 rep->data[rep->size] = '\0';
576
577 return rep;
|
578 mike 1.27 }
579
|
580 mike 1.111.6.1 StringRep* StringRep::createASCII7(const char* data, size_t size)
|
581 mike 1.27 {
|
582 mike 1.111.6.1 StringRep* rep = StringRep::alloc(size);
583 _copy((Uint16*)rep->data, data, size);
584 rep->data[rep->size = size] = '\0';
585 return rep;
|
586 kumpf 1.39 }
|
587 tony 1.66
|
588 mike 1.111.6.1 Uint32 StringRep::length(const Uint16* str)
589 {
590 // Note: We could unroll this but it is rarely called.
591
592 const Uint16* end = (Uint16*)str;
593
594 while (*end++)
595 ;
596
597 return end - str - 1;
598 }
599
600 //==============================================================================
601 //
602 // class String
603 //
604 //==============================================================================
605
606 const String String::EMPTY;
|
607 mike 1.27
|
608 kumpf 1.39 String::String(const String& str, Uint32 n)
609 {
|
610 mike 1.111.6.6 _checkBounds(n, str._rep->size);
|
611 mike 1.111.6.1 _rep = StringRep::create(str._rep->data, n);
|
612 kumpf 1.39 }
613
614 String::String(const Char16* str)
615 {
|
616 mike 1.111.6.6 _checkNullPointer(str);
|
617 mike 1.111.6.1 _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
|
618 mike 1.27 }
619
|
620 kumpf 1.39 String::String(const Char16* str, Uint32 n)
621 {
|
622 mike 1.111.6.6 _checkNullPointer(str);
|
623 mike 1.111.6.1 _rep = StringRep::create((Uint16*)str, n);
|
624 kumpf 1.39 }
625
626 String::String(const char* str)
|
627 mike 1.27 {
|
628 mike 1.111.6.6 _checkNullPointer(str);
|
629 mike 1.111.6.1 _rep = StringRep::create(str, strlen(str));
630 }
|
631 david.dillard 1.105
|
632 mike 1.111.6.1 String::String(const char* str, String::ASCII7Tag tag)
633 {
|
634 mike 1.111.6.6 _checkNullPointer(str);
|
635 mike 1.111.6.1 _rep = StringRep::createASCII7(str, strlen(str));
|
636 mike 1.27 }
637
|
638 kumpf 1.39 String::String(const char* str, Uint32 n)
|
639 mike 1.27 {
|
640 mike 1.111.6.6 _checkNullPointer(str);
|
641 mike 1.111.6.1 _rep = StringRep::create(str, n);
642 }
|
643 david.dillard 1.105
|
644 mike 1.111.6.1 String::String(const char* str, size_t n, String::ASCII7Tag tag)
645 {
|
646 mike 1.111.6.6 _checkNullPointer(str);
|
647 mike 1.111.6.1 _rep = StringRep::createASCII7(str, n);
|
648 kumpf 1.39 }
|
649 mike 1.27
|
650 mike 1.111.6.1 String::String(const String& s1, const String& s2)
|
651 kumpf 1.39 {
|
652 mike 1.111.6.1 size_t n1 = s1._rep->size;
653 size_t n2 = s2._rep->size;
654 size_t n = n1 + n2;
655 _rep = StringRep::alloc(n);
656 _copy(_rep->data, s1._rep->data, n1);
657 _copy(_rep->data + n1, s2._rep->data, n2);
658 _rep->size = n;
659 _rep->data[n] = '\0';
|
660 mike 1.27 }
661
|
662 mike 1.111.6.1 String::String(const String& s1, const char* s2)
|
663 mike 1.27 {
|
664 mike 1.111.6.6 _checkNullPointer(s2);
|
665 mike 1.111.6.1 size_t n1 = s1._rep->size;
666 size_t n2 = strlen(s2);
667 _rep = StringRep::alloc(n1 + n2);
668 _copy(_rep->data, s1._rep->data, n1);
669 _rep->size = n1 + _convert((Uint16*)_rep->data + n1, s2, n2);
670 _rep->data[_rep->size] = '\0';
|
671 mike 1.27 }
672
|
673 mike 1.111.6.1 String::String(const char* s1, const String& s2)
|
674 mike 1.27 {
|
675 mike 1.111.6.6 _checkNullPointer(s1);
|
676 mike 1.111.6.1 size_t n1 = strlen(s1);
677 size_t n2 = s2._rep->size;
678 _rep = StringRep::alloc(n1 + n2);
679 _rep->size = n2 + _convert((Uint16*)_rep->data, s1, n1);
680 _copy(_rep->data + n1, s2._rep->data, n2);
681 _rep->data[_rep->size] = '\0';
|
682 mike 1.27 }
683
|
684 mike 1.111.6.1 String& String::assign(const String& str)
|
685 mike 1.27 {
|
686 mike 1.111.6.1 if (_rep != str._rep)
|
687 david.dillard 1.105 {
|
688 mike 1.111.6.1 StringRep::unref(_rep);
689 StringRep::ref(_rep = str._rep);
|
690 david.dillard 1.105 }
691
|
692 mike 1.27 return *this;
693 }
694
695 String& String::assign(const Char16* str, Uint32 n)
696 {
|
697 mike 1.111.6.6 _checkNullPointer(str);
|
698 mike 1.111.6.1
699 if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
|
700 david.dillard 1.105 {
|
701 mike 1.111.6.1 StringRep::unref(_rep);
702 _rep = StringRep::alloc(n);
|
703 david.dillard 1.105 }
704
|
705 mike 1.111.6.1 _rep->size = n;
706 _copy(_rep->data, (Uint16*)str, n);
707 _rep->data[n] = '\0';
708
|
709 mike 1.27 return *this;
710 }
711
|
712 mike 1.111.6.1 String& String::assign(const char* str, Uint32 n)
|
713 chuck 1.102 {
|
714 mike 1.111.6.6 _checkNullPointer(str);
|
715 mike 1.111.6.1
716 if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
|
717 david.dillard 1.105 {
|
718 mike 1.111.6.1 StringRep::unref(_rep);
719 _rep = StringRep::alloc(n);
|
720 david.dillard 1.105 }
721
|
722 mike 1.111.6.1 _rep->size = _convert(_rep->data, str, n);
723 _rep->data[_rep->size] = 0;
724
|
725 chuck 1.102 return *this;
726 }
727
|
728 mike 1.111.6.1 String& String::assignASCII7(const char* str, Uint32 n)
|
729 mike 1.27 {
|
730 mike 1.111.6.6 _checkNullPointer(str);
|
731 mike 1.111.6.1
732 if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
|
733 david.dillard 1.105 {
|
734 mike 1.111.6.1 StringRep::unref(_rep);
735 _rep = StringRep::alloc(n);
|
736 david.dillard 1.105 }
737
|
738 mike 1.111.6.1 _copy(_rep->data, str, n);
739 _rep->data[_rep->size = n] = 0;
740
|
741 mike 1.27 return *this;
742 }
743
|
744 kumpf 1.39 void String::clear()
745 {
|
746 mike 1.111.6.1 if (_rep->size)
747 {
748 if (Atomic_get(&_rep->refs) == 1)
|
749 mike 1.111.6.4 {
|
750 mike 1.111.6.1 _rep->size = 0;
|
751 mike 1.111.6.4 _rep->data[0] = '\0';
752 }
|
753 mike 1.111.6.1 else
754 {
755 StringRep::unref(_rep);
|
756 mike 1.111.6.6 _rep = &StringRep::_emptyRep;
|
757 mike 1.111.6.1 }
758 }
|
759 kumpf 1.39 }
760
|
761 mike 1.111.6.1 void String::reserveCapacity(Uint32 cap)
|
762 kumpf 1.39 {
|
763 mike 1.111.6.1 _reserve(_rep, cap);
|
764 kumpf 1.39 }
765
|
766 mike 1.111.6.1 CString String::getCString() const
767 {
768 #ifdef PEGASUS_STRING_NO_UTF8
769 char* str = (char*)operator new(_rep->size + 1);
770 _copy(str, _rep->data, _rep->size);
771 str[_rep->size] = '\0';
772 return CString(str);
|
773 gs.keenan 1.110 #else
|
774 mike 1.111.6.1 Uint32 n = 3 * _rep->size;
775 char* str = (char*)operator new(n + 1);
|
776 mike 1.111.6.6 size_t size = _copyToUTF8(str, _rep->data, _rep->size);
|
777 mike 1.111.6.1 str[size] = '\0';
778 return CString(str);
|
779 gs.keenan 1.110 #endif
|
780 kumpf 1.39 }
781
|
782 mike 1.111.6.1 String& String::append(const Char16* str, Uint32 n)
|
783 kumpf 1.39 {
|
784 mike 1.111.6.6 _checkNullPointer(str);
|
785 kumpf 1.39
|
786 mike 1.111.6.6 size_t oldSize = _rep->size;
787 size_t newSize = oldSize + n;
788 _reserve(_rep, newSize);
789 _copy(_rep->data + oldSize, (Uint16*)str, n);
790 _rep->size = newSize;
791 _rep->data[newSize] = '\0';
|
792 mike 1.27
|
793 mike 1.111.6.1 return *this;
|
794 mike 1.27 }
795
|
796 mike 1.111.6.1 String& String::append(const String& str)
|
797 mike 1.27 {
|
798 mike 1.111.6.1 return append((Char16*)str._rep->data, str._rep->size);
|
799 mike 1.27 }
800
|
801 mike 1.111.6.1 String& String::append(const char* str, Uint32 size)
|
802 kumpf 1.39 {
|
803 mike 1.111.6.6 _checkNullPointer(str);
|
804 mike 1.111.6.1
|
805 mike 1.111.6.6 size_t oldSize = _rep->size;
806 size_t cap = oldSize + size;
|
807 mike 1.111.6.1
808 _reserve(_rep, cap);
|
809 mike 1.111.6.6 _rep->size += _convert((Uint16*)_rep->data + oldSize, str, size);
|
810 mike 1.111.6.1 _rep->data[_rep->size] = '\0';
|
811 kumpf 1.39
|
812 mike 1.27 return *this;
813 }
814
|
815 mike 1.111.6.1 void String::remove(Uint32 index, Uint32 n)
|
816 mike 1.27 {
|
817 mike 1.111.6.1 if (n == PEG_NOT_FOUND)
818 n = _rep->size - index;
|
819 mike 1.27
|
820 mike 1.111.6.6 _checkBounds(index + n, _rep->size);
|
821 mike 1.27
|
822 mike 1.111.6.1 if (Atomic_get(&_rep->refs) != 1)
|
823 mike 1.111.6.6 _rep = StringRep::copyOnWrite(_rep);
|
824 mike 1.27
|
825 mike 1.111.6.1 assert(index + n <= _rep->size);
826
827 size_t rem = _rep->size - (index + n);
828 Uint16* data = _rep->data;
829
830 if (rem)
831 memmove(data + index, data + index + n, rem * sizeof(Uint16));
832
833 _rep->size -= n;
834 data[_rep->size] = '\0';
|
835 mike 1.27 }
836
|
837 mike 1.111.6.1 String String::subString(Uint32 index, Uint32 n) const
|
838 mike 1.27 {
|
839 mike 1.111.6.1 // Note: this implementation is very permissive but used for
840 // backwards compatibility.
841
842 if (index < _rep->size)
|
843 mike 1.27 {
|
844 mike 1.111.6.1 if (n == PEG_NOT_FOUND || n > _rep->size - index)
845 n = _rep->size - index;
|
846 mike 1.27
|
847 mike 1.111.6.1 return String((Char16*)_rep->data + index, n);
|
848 mike 1.27 }
|
849 david.dillard 1.105
850 return String();
|
851 mike 1.27 }
852
853 Uint32 String::find(Char16 c) const
854 {
|
855 mike 1.111.6.1 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
|
856 mike 1.27
|
857 mike 1.111.6.1 if (p)
858 return p - _rep->data;
|
859 mike 1.27
860 return PEG_NOT_FOUND;
861 }
862
|
863 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
|
864 mike 1.30 {
|
865 mike 1.111.6.6 _checkBounds(index, _rep->size);
|
866 mike 1.30
|
867 mike 1.111.6.1 if (index >= _rep->size)
868 return PEG_NOT_FOUND;
869
870 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
871
872 if (p)
873 return p - _rep->data;
|
874 mike 1.30
875 return PEG_NOT_FOUND;
876 }
877
|
878 mike 1.111.6.6 Uint32 StringFindAux(
|
879 mike 1.111.6.5 const StringRep* _rep, const Char16* s, Uint32 n)
|
880 mike 1.27 {
|
881 mike 1.111.6.6 _checkNullPointer(s);
|
882 mike 1.27
|
883 mike 1.111.6.1 const Uint16* data = _rep->data;
884 size_t rem = _rep->size;
|
885 mike 1.30
|
886 mike 1.111.6.1 while (n <= rem)
|
887 mike 1.27 {
|
888 mike 1.111.6.1 Uint16* p = (Uint16*)_find(data, rem, s[0]);
|
889 david.dillard 1.105
|
890 mike 1.111.6.1 if (!p)
891 break;
892
893 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
894 return p - _rep->data;
895
896 p++;
897 rem -= p - data;
898 data = p;
|
899 mike 1.27 }
|
900 mike 1.111.6.1
|
901 mike 1.27 return PEG_NOT_FOUND;
902 }
903
|
904 mike 1.111.6.1 Uint32 String::find(const char* s) const
905 {
|
906 mike 1.111.6.6 _checkNullPointer(s);
|
907 mike 1.111.6.1
908 // Note: could optimize away creation of temporary, but this is rarely
909 // called.
910 return find(String(s));
911 }
912
|
913 mike 1.27 Uint32 String::reverseFind(Char16 c) const
914 {
|
915 mike 1.111.6.1 Uint16 x = c;
916 Uint16* p = _rep->data;
917 Uint16* q = _rep->data + _rep->size;
|
918 mike 1.27
|
919 mike 1.111.6.1 while (q != p)
|
920 mike 1.27 {
|
921 mike 1.111.6.1 if (*--q == x)
922 return q - p;
|
923 mike 1.27 }
924
925 return PEG_NOT_FOUND;
926 }
927
928 void String::toLower()
929 {
|
930 david 1.69 #ifdef PEGASUS_HAS_ICU
|
931 mike 1.111.6.1
|
932 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
933 david 1.90 {
|
934 mike 1.111.6.1 if (Atomic_get(&_rep->refs) != 1)
|
935 mike 1.111.6.6 _rep = StringRep::copyOnWrite(_rep);
|
936 mike 1.111.6.1
|
937 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
938 mike 1.111.6.1 // Since context-sensitive casing looks at adjacent chars, this
939 // prevents optimizations where the us-ascii is converted before
940 // calling ICU.
|
941 yi.zhou 1.108 // The string may shrink or expand after the convert.
942
|
943 mike 1.111.6.1 //// First calculate size of resulting string. u_strToLower() returns
944 //// only the size when zero is passed as the destination size argument.
945
|
946 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
947
|
948 mike 1.111.6.6 int32_t newSize = u_strToLower(
|
949 mike 1.111.6.1 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
950
951 err = U_ZERO_ERROR;
|
952 chuck 1.99
|
953 mike 1.111.6.1 //// Reserve enough space for the result.
954
|
955 mike 1.111.6.6 if ((Uint32)newSize > _rep->cap)
956 _reserve(_rep, newSize);
|
957 mike 1.111.6.1
958 //// Perform the conversion (overlapping buffers are allowed).
|
959 yi.zhou 1.108
|
960 mike 1.111.6.6 u_strToLower((UChar*)_rep->data, newSize,
|
961 mike 1.111.6.1 (UChar*)_rep->data, _rep->size, NULL, &err);
962
|
963 mike 1.111.6.6 _rep->size = newSize;
|
964 mike 1.111.6.1 return;
|
965 david 1.90 }
|
966 mike 1.111.6.1
967 #endif /* PEGASUS_HAS_ICU */
968
969 if (Atomic_get(&_rep->refs) != 1)
|
970 mike 1.111.6.6 _rep = StringRep::copyOnWrite(_rep);
|
971 mike 1.111.6.1
972 Uint16* p = _rep->data;
973 size_t n = _rep->size;
974
975 for (; n--; p++)
|
976 david 1.90 {
|
977 mike 1.111.6.1 if (!(*p & 0xFF00))
|
978 mike 1.111.6.6 *p = _toLower(*p);
|
979 mike 1.27 }
|
980 kumpf 1.39 }
981
|
982 chuck 1.99 void String::toUpper()
|
983 david 1.90 {
984 #ifdef PEGASUS_HAS_ICU
|
985 mike 1.111.6.1
|
986 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
987 chuck 1.99 {
|
988 mike 1.111.6.1 if (Atomic_get(&_rep->refs) != 1)
|
989 mike 1.111.6.6 _rep = StringRep::copyOnWrite(_rep);
|
990 mike 1.111.6.1
|
991 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
992 mike 1.111.6.1 // Since context-sensitive casing looks at adjacent chars, this
993 // prevents optimizations where the us-ascii is converted before
994 // calling ICU.
|
995 yi.zhou 1.108 // The string may shrink or expand after the convert.
996
|
997 mike 1.111.6.1 //// First calculate size of resulting string. u_strToUpper() returns
998 //// only the size when zero is passed as the destination size argument.
999
|
1000 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
1001
|
1002 mike 1.111.6.6 int32_t newSize = u_strToUpper(
|
1003 mike 1.111.6.1 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
|
1004 chuck 1.99
|
1005 mike 1.111.6.1 err = U_ZERO_ERROR;
|
1006 yi.zhou 1.108
|
1007 mike 1.111.6.1 //// Reserve enough space for the result.
|
1008 david 1.90
|
1009 mike 1.111.6.6 if ((Uint32)newSize > _rep->cap)
1010 _reserve(_rep, newSize);
|
1011 kumpf 1.39
|
1012 mike 1.111.6.1 //// Perform the conversion (overlapping buffers are allowed).
|
1013 mike 1.27
|
1014 mike 1.111.6.6 u_strToUpper((UChar*)_rep->data, newSize,
|
1015 mike 1.111.6.1 (UChar*)_rep->data, _rep->size, NULL, &err);
1016
|
1017 mike 1.111.6.6 _rep->size = newSize;
|
1018 mike 1.111.6.1
1019 return;
|
1020 mike 1.27 }
1021
|
1022 mike 1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1023
1024 if (Atomic_get(&_rep->refs) != 1)
|
1025 mike 1.111.6.6 _rep = StringRep::copyOnWrite(_rep);
|
1026 mike 1.111.6.1
1027 Uint16* p = _rep->data;
1028 size_t n = _rep->size;
1029
1030 for (; n--; p++)
|
1031 mike 1.111.6.6 *p = _toUpper(*p);
|
1032 mike 1.27 }
1033
|
1034 mike 1.111.6.1 int String::compare(const String& s1, const String& s2, Uint32 n)
|
1035 mike 1.30 {
|
1036 mike 1.111.6.1 assert(n <= s1._rep->size);
1037 assert(n <= s2._rep->size);
|
1038 kumpf 1.43
|
1039 mike 1.111.6.1 // Ignoring error in which n is greater than s1.size() or s2.size()
1040 return _compare(s1._rep->data, s2._rep->data, n);
1041 }
|
1042 mike 1.30
|
1043 mike 1.111.6.1 int String::compare(const String& s1, const String& s2)
1044 {
1045 return _compare(s1._rep->data, s2._rep->data);
1046 }
|
1047 mike 1.30
|
1048 mike 1.111.6.1 int String::compare(const String& s1, const char* s2)
1049 {
|
1050 mike 1.111.6.6 _checkNullPointer(s2);
|
1051 mike 1.30
|
1052 mike 1.111.6.1 #ifdef PEGASUS_STRING_NO_UTF8
|
1053 mike 1.111.6.6 return _compareNoUTF8(s1._rep->data, s2);
|
1054 mike 1.111.6.1 #else
1055 // ATTN: optimize this!
1056 return String::compare(s1, String(s2));
1057 #endif
|
1058 mike 1.30 }
1059
|
1060 mike 1.111.6.1 int String::compareNoCase(const String& str1, const String& str2)
|
1061 kumpf 1.40 {
|
1062 david 1.69 #ifdef PEGASUS_HAS_ICU
|
1063 mike 1.111.6.1
|
1064 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1065 {
|
1066 mike 1.111.6.1 return u_strcasecmp(
1067 str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
|
1068 yi.zhou 1.108 }
|
1069 kumpf 1.40
|
1070 mike 1.111.6.1 #endif /* PEGASUS_HAS_ICU */
|
1071 kumpf 1.40
|
1072 mike 1.111.6.1 const Uint16* s1 = str1._rep->data;
1073 const Uint16* s2 = str2._rep->data;
1074
1075 while (*s1 && *s2)
1076 {
|
1077 mike 1.111.6.6 int r = _toLower(*s1++) - _toLower(*s2++);
|
1078 kumpf 1.40
|
1079 david.dillard 1.105 if (r)
1080 return r;
|
1081 kumpf 1.40 }
1082
|
1083 mike 1.111.6.1 if (*s2)
|
1084 david.dillard 1.105 return -1;
|
1085 mike 1.111.6.1 else if (*s1)
|
1086 david.dillard 1.105 return 1;
|
1087 kumpf 1.40
1088 return 0;
1089 }
1090
|
1091 mike 1.111.6.6 Boolean StringEqualNoCase(const String& s1, const String& s2)
|
1092 mike 1.27 {
|
1093 david 1.69 #ifdef PEGASUS_HAS_ICU
|
1094 kumpf 1.39
|
1095 mike 1.111.6.1 return String::compareNoCase(s1, s2) == 0;
|
1096 kumpf 1.39
|
1097 mike 1.111.6.1 #else /* PEGASUS_HAS_ICU */
1098
|
1099 mike 1.111.6.5 Uint16* p = (Uint16*)s1.getChar16Data();
1100 Uint16* q = (Uint16*)s2.getChar16Data();
1101 Uint32 n = s2.size();
|
1102 mike 1.111.6.1
1103 while (n >= 8)
1104 {
|
1105 mike 1.111.6.6 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1106 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1107 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1108 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1109 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1110 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1111 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1112 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
|
1113 mike 1.111.6.1 {
1114 return false;
1115 }
1116
1117 n -= 8;
1118 p += 8;
1119 q += 8;
1120 }
1121
1122 while (n >= 4)
1123 {
|
1124 mike 1.111.6.6 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1125 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1126 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1127 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
|
1128 mike 1.111.6.1 {
1129 return false;
1130 }
1131
1132 n -= 4;
1133 p += 4;
1134 q += 4;
1135 }
|
1136 mike 1.27
|
1137 kumpf 1.39 while (n--)
1138 {
|
1139 mike 1.111.6.6 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
|
1140 mike 1.111.6.1 return false;
1141
1142 p++;
1143 q++;
|
1144 kumpf 1.39 }
|
1145 mike 1.28
|
1146 kumpf 1.39 return true;
|
1147 david 1.69
|
1148 mike 1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1149 }
|
1150 mike 1.27
|
1151 mike 1.111.6.1 Boolean String::equalNoCase(const String& s1, const char* s2)
|
1152 david 1.69 {
|
1153 mike 1.111.6.6 _checkNullPointer(s2);
|
1154 david 1.69
|
1155 mike 1.111.6.1 #if defined(PEGASUS_HAS_ICU)
|
1156 david 1.69
|
1157 mike 1.111.6.1 return String::equalNoCase(s1, String(s2));
|
1158 david.dillard 1.105
|
1159 mike 1.111.6.1 #elif defined(PEGASUS_STRING_NO_UTF8)
|
1160 david 1.71
|
1161 mike 1.111.6.1 const Uint16* p1 = (Uint16*)s1._rep->data;
1162 const char* p2 = s2;
1163 size_t n = s1._rep->size;
|
1164 kumpf 1.42
|
1165 mike 1.111.6.1 while (n--)
1166 {
1167 if (!*p2)
1168 return false;
|
1169 karl 1.36
|
1170 mike 1.111.6.6 if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
|
1171 mike 1.111.6.1 return false;
1172 }
|
1173 david.dillard 1.105
|
1174 mike 1.111.6.1 if (*p2)
1175 return false;
1176
1177 return true;
|
1178 david.dillard 1.105
|
1179 mike 1.111.6.1 #else /* PEGASUS_HAS_ICU */
|
1180 chuck 1.78
|
1181 mike 1.111.6.1 // ATTN: optimize this!
1182 return String::equalNoCase(s1, String(s2));
|
1183 david.dillard 1.105
|
1184 mike 1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1185 }
|
1186 karl 1.36
|
1187 mike 1.111.6.1 Boolean String::equal(const String& s1, const String& s2)
|
1188 karl 1.36 {
|
1189 mike 1.111.6.1 return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,
1190 s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
|
1191 karl 1.36 }
1192
|
1193 mike 1.111.6.1 Boolean String::equal(const String& s1, const char* s2)
|
1194 karl 1.36 {
|
1195 mike 1.111.6.1 #ifdef PEGASUS_STRING_NO_UTF8
|
1196 kumpf 1.39
|
1197 mike 1.111.6.6 _checkNullPointer(s2);
|
1198 kumpf 1.39
|
1199 mike 1.111.6.1 const Uint16* p = (Uint16*)s1._rep->data;
1200 const char* q = s2;
|
1201 kumpf 1.39
|
1202 mike 1.111.6.1 while (*p && *q)
1203 {
1204 if (*p++ != Uint16(*q++))
1205 return false;
1206 }
|
1207 kumpf 1.39
|
1208 mike 1.111.6.1 return !(*p || *q);
|
1209 kumpf 1.39
|
1210 mike 1.111.6.1 #else /* PEGASUS_STRING_NO_UTF8 */
|
1211 kumpf 1.39
|
1212 mike 1.111.6.1 return String::equal(s1, String(s2));
1213
1214 #endif /* PEGASUS_STRING_NO_UTF8 */
|
1215 kumpf 1.39 }
1216
|
1217 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
|
1218 kumpf 1.39 {
|
1219 david 1.69 #if defined(PEGASUS_OS_OS400)
|
1220 mike 1.111.6.1
|
1221 david 1.93 CString cstr = str.getCString();
|
1222 david 1.69 const char* utf8str = cstr;
1223 os << utf8str;
|
1224 mike 1.111.6.1 return os;
1225 #else
1226
1227 #if defined(PEGASUS_HAS_ICU)
|
1228 david 1.69
|
1229 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1230 {
|
1231 david.dillard 1.105 char *buf = NULL;
1232 const int size = str.size() * 6;
|
1233 mike 1.111.6.1 UnicodeString UniStr(
1234 (const UChar *)str.getChar16Data(), (int32_t)str.size());
|
1235 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf);
1236 buf = new char[bufsize+1];
1237 UniStr.extract(0,bufsize,buf);
1238 os << buf;
1239 os.flush();
1240 delete [] buf;
|
1241 mike 1.111.6.1 return os;
|
1242 yi.zhou 1.108 }
|
1243 mike 1.111.6.1
1244 #endif // PEGASUS_HAS_ICU
1245
1246 for (Uint32 i = 0, n = str.size(); i < n; i++)
|
1247 yi.zhou 1.108 {
|
1248 mike 1.111.6.1 Uint16 code = str[i];
|
1249 david.dillard 1.105
|
1250 mike 1.111.6.1 if (code > 0 && !(code & 0xFF00))
1251 os << char(code);
1252 else
1253 {
1254 // Print in hex format:
1255 char buffer[8];
1256 sprintf(buffer, "\\x%04X", code);
1257 os << buffer;
|
1258 david.dillard 1.105 }
|
1259 yi.zhou 1.108 }
|
1260 kumpf 1.39
1261 return os;
|
1262 mike 1.111.6.1 #endif // PEGASUS_OS_OS400
|
1263 kumpf 1.39 }
1264
|
1265 mike 1.111.6.6 void StringAppendCharAux(StringRep*& _rep)
|
1266 kumpf 1.39 {
|
1267 mike 1.111.6.1 StringRep* tmp;
|
1268 kumpf 1.39
|
1269 mike 1.111.6.1 if (_rep->cap)
1270 {
1271 tmp = StringRep::alloc(2 * _rep->cap);
1272 tmp->size = _rep->size;
1273 _copy(tmp->data, _rep->data, _rep->size);
1274 }
1275 else
1276 {
1277 tmp = StringRep::alloc(8);
1278 tmp->size = 0;
1279 }
|
1280 kumpf 1.39
|
1281 mike 1.111.6.1 StringRep::unref(_rep);
1282 _rep = tmp;
|
1283 kumpf 1.39 }
1284
|
1285 mike 1.111.6.1 PEGASUS_NAMESPACE_END
|
1286 kumpf 1.39
|
1287 mike 1.111.6.1 /*
1288 ================================================================================
|
1289 kumpf 1.39
|
1290 mike 1.111.6.1 String optimizations:
1291
1292 1. Added mechanism allowing certain functions to be inlined only when
1293 used by internal Pegasus modules. External modules (i.e., providers)
1294 link to a non-inline version, which allows for binary compatibility.
1295
1296 2. Implemented copy-on-write with atomic increment/decrement. This
1297 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1298 for the 'ni1000' benchmark.
1299
1300 3. Employed loop unrolling in several places. For example, see:
1301
1302 static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1303
1304 4. Used the "empty-rep" optimization (described in whitepaper from the
1305 GCC Developers Summit). This reduced default construction to a simple
1306 pointer assignment.
1307
|
1308 mike 1.111.6.6 inline String::String() : _rep(&_emptyRep) { }
|
1309 mike 1.111.6.1
1310 5. Implemented Uint16 versions of toupper() and tolower() using tables.
1311 For example:
1312
1313 static const char _upper[] =
1314 {
1315 0,1,2,...255
1316 };
1317
|
1318 mike 1.111.6.6 inline Uint16 _toUpper(Uint16 x)
|
1319 mike 1.111.6.1 {
1320 return (x & 0xFF00) ? x : _upper[x];
1321 }
1322
1323 This outperforms the system implementation by avoiding an anding
1324 operation.
1325
1326 6. Implemented char* version of the following member functions to
1327 eliminate unecessary creation of anonymous string objects
1328 (temporaries).
1329
1330 String(const String& s1, const char* s2);
1331 String(const char* s1, const String& s2);
1332 String& String::operator=(const char* str);
1333 Uint32 String::find(const char* s) const;
1334 bool String::equal(const String& s1, const char* s2);
1335 static int String::compare(const String& s1, const char* s2);
1336 String& String::append(const char* str);
1337 String& String::append(const char* str, Uint32 size);
1338 static bool String::equalNoCase(const String& s1, const char* s2);
1339 String& operator=(const char* str)
1340 mike 1.111.6.1 String& String::assign(const char* str)
1341 String& String::append(const char* str)
1342 Boolean operator==(const String& s1, const char* s2)
1343 Boolean operator==(const char* s1, const String& s2)
1344 Boolean operator!=(const String& s1, const char* s2)
1345 Boolean operator!=(const char* s1, const String& s2)
1346 Boolean operator<(const String& s1, const char* s2)
1347 Boolean operator<(const char* s1, const String& s2)
1348 Boolean operator>(const String& s1, const char* s2)
1349 Boolean operator>(const char* s1, const String& s2)
1350 Boolean operator<=(const String& s1, const char* s2)
1351 Boolean operator<=(const char* s1, const String& s2)
1352 Boolean operator>=(const String& s1, const char* s2)
1353 Boolean operator>=(const char* s1, const String& s2)
1354 String operator+(const String& s1, const char* s2)
1355 String operator+(const char* s1, const String& s2)
1356
|
1357 mike 1.111.6.5 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next
|
1358 mike 1.111.6.1 power of two (algorithm from the book "Hacker's Delight").
1359
|
1360 mike 1.111.6.5 static Uint32 _roundUpToPow2(Uint32 x)
|
1361 mike 1.111.6.1 {
1362 if (x < 8)
1363 return 8;
1364
1365 x--;
1366 x |= (x >> 1);
1367 x |= (x >> 2);
1368 x |= (x >> 4);
1369 x |= (x >> 8);
1370 x |= (x >> 16);
1371 x++;
1372
1373 return x;
1374 }
1375
1376 8. Implemented "concatenating constructors" to eliminate temporaries
1377 created by operator+(). This scheme employs the "return-value
1378 optimization" described by Stan Lippman.
1379
1380 inline String operator+(const String& s1, const String& s2)
1381 {
1382 mike 1.111.6.1 return String(s1, s2, 0);
1383 }
1384
1385 9. Experimented to find the optimial initial size for a short string.
1386 Eight seems to offer the best tradeoff between space and time.
1387
1388 10. Inlined all members of the Char16 class.
1389
1390 11. Used Uint16 internally in the String class. This showed no improvememnt
1391 since Char16 was already fully inlined and was essentially reduced to
1392 Uint16 in any case.
1393
1394 12. Implemented conditional logic (#if) allowing error checking logic to
1395 be excluded to better performance. Examples include bounds checking
1396 and null-pointer checking.
1397
1398 13. Used memcpy() and memcmp() where possible. These are implemented using
1399 the rep family of intructions under Intel and are much faster.
1400
1401 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1402 copy routine overhead.
1403 mike 1.111.6.1
1404 15. Added ASCII7 form of the constructor and assign().
1405
1406 String s("hello world", String::ASCII7);
1407
1408 s.assignASCII7("hello world");
1409
1410 This avoids slower UTF8 processing when not needed.
1411
|
1412 mike 1.111.6.5 ================================================================================
1413
1414 TO-DO:
1415
1416 (+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES
1417
1418 (+) [DONE] Submit BUG-2754 (Windows buffer limit).
1419
1420 (+) [DONE] Eliminate char versions of find() and append().
1421
1422 (+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h
1423
1424 (+) [DONE] Change _next_pow_2() to _roundUpToPow2().
1425
1426 (+) [DONE] Change '99' to '2' in StringRep constructor (comment as well).
1427
1428 (+) [DONE] Comment StringRep allocation layout.
1429
1430 (+) [DONE] Conceal private inline functions.
|
1431 mike 1.111.6.1
|
1432 mike 1.111.6.5 (+) [DONE] Shorten inclusion of StringInline.h in String.h.
|
1433 mike 1.111.6.1
|
1434 mike 1.111.6.5 (+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get
1435 rid of altogether.
|
1436 mike 1.111.6.1
|
1437 mike 1.111.6.6 (+) [DONE] useCamelNotationOnAllFunctionNames.
1438
|
1439 mike 1.111.6.5 -----------
|
1440 mike 1.111.6.1
|
1441 mike 1.111.6.5 (+) Check for overlow condition in StringRep::alloc().
|
1442 mike 1.111.6.1
|
1443 mike 1.111.6.5 (+) Fix throw-related memory leak.
|
1444 mike 1.111.6.1
|
1445 mike 1.111.6.5 (+) DOC++ String.h
|
1446 mike 1.111.6.1
|
1447 mike 1.111.6.5 (+) Look at PEP223 for coding security guidelines.
|
1448 mike 1.111.6.1
|
1449 mike 1.111.6.6 (+) Replace AtomicInt with new Atomic implementation.
|
1450 mike 1.111.6.1
|
1451 mike 1.111.6.5 (+) Implement Atomic operations for HP.
|
1452 mike 1.111.6.3
|
1453 mike 1.111.6.5 (+) Remove tabs.
|
1454 mike 1.111.6.1
1455 ================================================================================
1456 */
|