1 karl 1.98 //%2005////////////////////////////////////////////////////////////////////////
|
2 mike 1.27 //
|
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 mike 1.27 //
12 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
14 // deal in the Software without restriction, including without limitation the
15 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
16 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is
17 // furnished to do so, subject to the following conditions:
|
18 david.dillard 1.105 //
|
19 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
20 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
21 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
22 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
23 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
25 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28 //==============================================================================
29 //
30 // Author: Mike Brasher (mbrasher@bmc.com)
31 //
|
32 mike 1.111.6.1 // Modified By:
33 // Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
34 // Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
35 // David Dillard, VERITAS Software Corp. (david.dillard@veritas.com)
36 // Mike Brasher (mike-brasher@austin.rr.com)
|
37 mike 1.27 //
38 //%/////////////////////////////////////////////////////////////////////////////
39
|
40 mike 1.111.6.1 #include <cassert>
|
41 kumpf 1.48 #include "InternalException.h"
|
42 david 1.69 #include "CommonUTF.h"
|
43 mike 1.111.6.1 #include "MessageLoader.h"
|
44 mike 1.111.6.2 #include "StringRep.h"
|
45 david 1.69
46 #ifdef PEGASUS_HAS_ICU
|
47 chuck 1.99 #include <unicode/ustring.h>
48 #include <unicode/uchar.h>
|
49 david 1.69 #endif
50
|
51 mike 1.27 PEGASUS_NAMESPACE_BEGIN
52
|
53 mike 1.111.6.1 //==============================================================================
54 //
55 // Compile-time macros (undefined by default).
56 //
57 // PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
58 //
59 // PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
60 //
61 //==============================================================================
62
63 //==============================================================================
|
64 kumpf 1.39 //
|
65 mike 1.111.6.1 // File-scope definitions:
|
66 kumpf 1.54 //
|
67 mike 1.111.6.1 //==============================================================================
68
|
69 mike 1.111.6.6 const Uint8 _toUpperTable[256] =
|
70 mike 1.111.6.1 {
71 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
72 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
73 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
74 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
75 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
76 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
77 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
78 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
79 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
80 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
81 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
82 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
83 0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
84 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
85 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
86 0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
87 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
88 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
89 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
90 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
91 mike 1.111.6.1 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
92 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
93 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
94 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
95 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
96 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
97 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
98 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
99 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
100 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
101 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
102 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
103 };
104
|
105 mike 1.111.6.6 const Uint8 _toLowerTable[256] =
|
106 mike 1.111.6.1 {
107 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
108 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
109 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
110 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
111 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
112 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
113 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
114 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
115 0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
116 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
117 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
118 0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
119 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
120 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
121 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
122 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
123 0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
124 0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
125 0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
126 0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
127 mike 1.111.6.1 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
128 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
129 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
130 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
131 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
132 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
133 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
134 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
135 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
136 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
137 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
138 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
139 };
|
140 kumpf 1.54
|
141 mike 1.111.6.1 // Converts 16-bit characters to upper case.
|
142 mike 1.111.6.6 inline Uint16 _toUpper(Uint16 x)
|
143 kumpf 1.54 {
|
144 mike 1.111.6.6 return (x & 0xFF00) ? x : _toUpperTable[x];
|
145 kumpf 1.54 }
146
|
147 mike 1.111.6.1 // Converts 16-bit characters to lower case.
|
148 mike 1.111.6.6 inline Uint16 _toLower(Uint16 x)
|
149 kumpf 1.54 {
|
150 mike 1.111.6.6 return (x & 0xFF00) ? x : _toLowerTable[x];
|
151 mike 1.111.6.1 }
|
152 kumpf 1.82
|
153 mike 1.111.6.6 // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
|
154 mike 1.111.6.5 static Uint32 _roundUpToPow2(Uint32 x)
|
155 mike 1.111.6.1 {
|
156 mike 1.111.6.7 #ifndef PEGASUS_STRING_NO_THROW
157
|
158 mike 1.111.6.6 if (x > 0x0FFFFFFF)
|
159 mike 1.111.6.8 throw PEGASUS_STD(bad_alloc)();
|
160 mike 1.111.6.6
|
161 mike 1.111.6.7 #endif
162
|
163 mike 1.111.6.1 if (x < 8)
|
164 mike 1.111.6.8 return 8;
|
165 mike 1.111.6.1
166 x--;
167 x |= (x >> 1);
168 x |= (x >> 2);
169 x |= (x >> 4);
170 x |= (x >> 8);
171 x |= (x >> 16);
172 x++;
173
174 return x;
175 }
176
177 template<class P, class Q>
178 static void _copy(P* p, const Q* q, size_t n)
179 {
180 // Use loop unrolling.
181
182 while (n >= 8)
|
183 kumpf 1.82 {
|
184 mike 1.111.6.8 p[0] = q[0];
185 p[1] = q[1];
186 p[2] = q[2];
187 p[3] = q[3];
188 p[4] = q[4];
189 p[5] = q[5];
190 p[6] = q[6];
191 p[7] = q[7];
192 p += 8;
193 q += 8;
194 n -= 8;
|
195 kumpf 1.82 }
|
196 mike 1.111.6.1
197 while (n >= 4)
198 {
|
199 mike 1.111.6.8 p[0] = q[0];
200 p[1] = q[1];
201 p[2] = q[2];
202 p[3] = q[3];
203 p += 4;
204 q += 4;
205 n -= 4;
|
206 mike 1.111.6.1 }
207
208 while (n--)
|
209 mike 1.111.6.8 *p++ = *q++;
|
210 kumpf 1.54 }
211
|
212 mike 1.111.6.1 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
|
213 kumpf 1.54 {
|
214 mike 1.111.6.1 while (n >= 4)
215 {
|
216 mike 1.111.6.8 if (s[0] == c)
217 return (Uint16*)s;
218 if (s[1] == c)
219 return (Uint16*)&s[1];
220 if (s[2] == c)
221 return (Uint16*)&s[2];
222 if (s[3] == c)
223 return (Uint16*)&s[3];
|
224 mike 1.111.6.1
|
225 mike 1.111.6.8 n -= 4;
226 s += 4;
|
227 mike 1.111.6.1 }
228
229 if (n)
230 {
|
231 mike 1.111.6.8 if (*s == c)
232 return (Uint16*)s;
233 s++;
234 n--;
|
235 mike 1.111.6.1 }
236
237 if (n)
238 {
|
239 mike 1.111.6.8 if (*s == c)
240 return (Uint16*)s;
241 s++;
242 n--;
|
243 mike 1.111.6.1 }
244
245 if (n && *s == c)
|
246 mike 1.111.6.8 return (Uint16*)s;
|
247 mike 1.111.6.1
248 // Not found!
249 return 0;
|
250 kumpf 1.54 }
251
|
252 mike 1.111.6.1 static int _compare(const Uint16* s1, const Uint16* s2)
|
253 kumpf 1.54 {
|
254 mike 1.111.6.1 while (*s1 && *s2)
|
255 kumpf 1.82 {
|
256 mike 1.111.6.1 int r = *s1++ - *s2++;
257
258 if (r)
259 return r;
|
260 kumpf 1.82 }
|
261 mike 1.111.6.1
262 if (*s2)
263 return -1;
264 else if (*s1)
265 return 1;
266
267 return 0;
|
268 kumpf 1.54 }
269
|
270 mike 1.111.6.6 static int _compareNoUTF8(const Uint16* s1, const char* s2)
|
271 kumpf 1.56 {
|
272 mike 1.111.6.1 Uint16 c1;
273 Uint16 c2;
274
275 do
|
276 kumpf 1.81 {
|
277 mike 1.111.6.8 c1 = *s1++;
278 c2 = *s2++;
|
279 mike 1.111.6.1
|
280 mike 1.111.6.8 if (c1 == 0)
281 return c1 - c2;
|
282 kumpf 1.81 }
|
283 mike 1.111.6.1 while (c1 == c2);
284
285 return c1 - c2;
|
286 kumpf 1.56 }
287
|
288 mike 1.111.6.1 static int _compare(const Uint16* s1, const Uint16* s2, size_t n)
|
289 kumpf 1.54 {
|
290 mike 1.111.6.1 // This should only be called when s1 and s2 have the same length.
291
292 while (n-- && (*s1++ - *s2++) == 0)
|
293 mike 1.111.6.8 ;
|
294 mike 1.111.6.1
295 //
296
297 return s1[-1] - s2[-1];
|
298 kumpf 1.54 }
299
|
300 mike 1.111.6.1 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
301 {
302 memcpy(s1, s2, n * sizeof(Uint16));
303 }
|
304 kumpf 1.39
|
305 mike 1.111.6.6 void StrinThrowOutOfBounds()
|
306 mike 1.111.6.1 {
307 throw IndexOutOfBoundsException();
308 }
|
309 mike 1.27
|
310 mike 1.111.6.1 #ifdef PEGASUS_STRING_NO_THROW
|
311 mike 1.111.6.6 # define _checkNullPointer(ARG) /* empty */
|
312 mike 1.111.6.1 #else
313 template<class T>
|
314 mike 1.111.6.6 inline void _checkNullPointer(const T* ptr)
|
315 mike 1.27 {
|
316 mike 1.111.6.1 if (!ptr)
|
317 mike 1.111.6.8 throw NullPointer();
|
318 mike 1.111.6.1 }
319 #endif
320
|
321 mike 1.111.6.6 static size_t _copyFromUTF8(Uint16* dest, const char* src, size_t n)
|
322 mike 1.111.6.1 {
323 Uint16* p = dest;
324 const Uint8* q = (const Uint8*)src;
|
325 mike 1.27
|
326 mike 1.111.6.1 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
327 // Use loop-unrolling.
|
328 mike 1.27
|
329 mike 1.111.6.1 while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
330 {
|
331 mike 1.111.6.8 p[0] = q[0];
332 p[1] = q[1];
333 p[2] = q[2];
334 p[3] = q[3];
335 p[4] = q[4];
336 p[5] = q[5];
337 p[6] = q[6];
338 p[7] = q[7];
339 p += 8;
340 q += 8;
341 n -= 8;
|
342 mike 1.111.6.1 }
343
344 while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
345 {
|
346 mike 1.111.6.8 p[0] = q[0];
347 p[1] = q[1];
348 p[2] = q[2];
349 p[3] = q[3];
350 p += 4;
351 q += 4;
352 n -= 4;
|
353 mike 1.111.6.1 }
354
355 switch (n)
356 {
|
357 mike 1.111.6.8 case 0:
358 return p - dest;
359 case 1:
360 if (q[0] < 128)
361 {
362 p[0] = q[0];
363 return p + 1 - dest;
364 }
365 break;
366 case 2:
367 if (((q[0]|q[1]) & 0x80) == 0)
368 {
369 p[0] = q[0];
370 p[1] = q[1];
371 return p + 2 - dest;
372 }
373 break;
374 case 3:
375 if (((q[0]|q[1]|q[2]) & 0x80) == 0)
376 {
377 p[0] = q[0];
378 mike 1.111.6.8 p[1] = q[1];
379 p[2] = q[2];
380 return p + 3 - dest;
381 }
382 break;
|
383 mike 1.111.6.1 }
384
385 // Process remaining characters.
386
387 while (n)
388 {
|
389 mike 1.111.6.8 // Optimize for 7-bit ASCII case.
|
390 mike 1.111.6.1
|
391 mike 1.111.6.8 if (*q < 128)
392 {
393 *p++ = *q++;
394 n--;
395 }
396 else
397 {
398 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
399
400 if (c > n || !isValid_U8(q, c) ||
401 UTF8toUTF16(&q, q + c, &p, p + n) != 0)
402 {
403 MessageLoaderParms parms("Common.String.BAD_UTF8",
404 "The byte sequence starting at index $0 "
405 "is not valid UTF-8 encoding.",
406 q - (const Uint8*)src);
407 throw Exception(parms);
408 }
|
409 mike 1.111.6.1
|
410 mike 1.111.6.8 n -= c;
411 }
|
412 mike 1.111.6.1 }
413
414 return p - dest;
415 }
416
417 // Note: dest must be at least three times src (plus an extra byte for
418 // terminator).
|
419 mike 1.111.6.6 static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
|
420 mike 1.111.6.1 {
421 const Uint16* q = src;
422 Uint8* p = (Uint8*)dest;
423
424 while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
425 {
|
426 mike 1.111.6.8 p[0] = q[0];
427 p[1] = q[1];
428 p[2] = q[2];
429 p[3] = q[3];
430 p += 4;
431 q += 4;
432 n -= 4;
|
433 mike 1.111.6.1 }
434
435 switch (n)
436 {
|
437 mike 1.111.6.8 case 0:
438 return p - (Uint8*)dest;
439 case 1:
440 if (q[0] < 128)
441 {
442 p[0] = q[0];
443 return p + 1 - (Uint8*)dest;
444 }
445 break;
446 case 2:
447 if (q[0] < 128 && q[1] < 128)
448 {
449 p[0] = q[0];
450 p[1] = q[1];
451 return p + 2 - (Uint8*)dest;
452 }
453 break;
454 case 3:
455 if (q[0] < 128 && q[1] < 128 && q[2] < 128)
456 {
457 p[0] = q[0];
458 mike 1.111.6.8 p[1] = q[1];
459 p[2] = q[2];
460 return p + 3 - (Uint8*)dest;
461 }
462 break;
|
463 mike 1.111.6.1 }
464
465 // If this line was reached, there must be characters greater than 128.
466
467 UTF16toUTF8(&q, q + n, &p, p + 3 * n);
|
468 mike 1.27
|
469 mike 1.111.6.1 return p - (Uint8*)dest;
470 }
471
472 static inline size_t _convert(Uint16* p, const char* q, size_t n)
473 {
474 #ifdef PEGASUS_STRING_NO_UTF8
475 _copy(p, q, n);
|
476 mike 1.27 return n;
|
477 mike 1.111.6.1 #else
|
478 mike 1.111.6.6 return _copyFromUTF8(p, q, n);
|
479 mike 1.111.6.1 #endif
|
480 mike 1.27 }
481
|
482 mike 1.111.6.1 //==============================================================================
|
483 chuck 1.102 //
|
484 mike 1.111.6.1 // class CString
|
485 chuck 1.102 //
|
486 mike 1.111.6.1 //==============================================================================
487
488 CString::CString(const CString& cstr) : _rep(0)
|
489 chuck 1.102 {
|
490 mike 1.111.6.1 if (cstr._rep)
|
491 chuck 1.102 {
|
492 mike 1.111.6.8 size_t n = strlen(cstr._rep) + 1;
|
493 mike 1.111.6.1 _rep = (char*)operator new(n);
|
494 mike 1.111.6.8 memcpy(_rep, cstr._rep, n);
|
495 mike 1.111.6.1 }
496 }
497
498 CString& CString::operator=(const CString& cstr)
499 {
500 if (&cstr != this)
501 {
502 if (_rep)
|
503 david.dillard 1.105 {
|
504 mike 1.111.6.1 operator delete(_rep);
505 _rep = 0;
|
506 chuck 1.102 }
|
507 mike 1.111.6.1
508 if (cstr._rep)
|
509 chuck 1.102 {
|
510 mike 1.111.6.8 size_t n = strlen(cstr._rep) + 1;
|
511 mike 1.111.6.1 _rep = (char*)operator new(n);
|
512 mike 1.111.6.8 memcpy(_rep, cstr._rep, n);
|
513 chuck 1.102 }
|
514 mike 1.111.6.1 }
|
515 chuck 1.102
|
516 mike 1.111.6.1 return *this;
517 }
518
519 //==============================================================================
520 //
521 // class StringRep
522 //
523 //==============================================================================
524
|
525 mike 1.111.6.6 StringRep StringRep::_emptyRep;
|
526 chuck 1.102
|
527 mike 1.111.6.1 inline StringRep* StringRep::alloc(size_t cap)
528 {
|
529 mike 1.111.6.7 #ifndef PEGASUS_STRING_NO_THROW
530
|
531 mike 1.111.6.6 // Any string bigger than this is seriously suspect.
532 if (cap > 0x0FFFFFFF)
|
533 mike 1.111.6.8 throw PEGASUS_STD(bad_alloc)();
|
534 mike 1.111.6.6
|
535 mike 1.111.6.7 #endif
536
|
537 mike 1.111.6.1 StringRep* rep = (StringRep*)::operator new(
|
538 mike 1.111.6.8 sizeof(StringRep) + cap * sizeof(Uint16));
|
539 mike 1.111.6.1 rep->cap = cap;
540 Atomic_create(&rep->refs, 1);
541
542 return rep;
|
543 chuck 1.102 }
544
|
545 mike 1.111.6.1 static inline void _reserve(StringRep*& rep, Uint32 cap)
|
546 kumpf 1.43 {
|
547 mike 1.111.6.1 if (cap > rep->cap || Atomic_get(&rep->refs) != 1)
548 {
|
549 mike 1.111.6.8 size_t n = _roundUpToPow2(cap);
550 StringRep* newRep = StringRep::alloc(n);
551 newRep->size = rep->size;
552 _copy(newRep->data, rep->data, rep->size + 1);
553 StringRep::unref(rep);
554 rep = newRep;
|
555 mike 1.111.6.1 }
556 }
|
557 kumpf 1.43
|
558 mike 1.111.6.1 StringRep* StringRep::create(const Uint16* data, size_t size)
559 {
560 StringRep* rep = StringRep::alloc(size);
561 rep->size = size;
562 _copy(rep->data, data, size);
563 rep->data[size] = '\0';
564 return rep;
565 }
566
|
567 mike 1.111.6.6 StringRep* StringRep::copyOnWrite(StringRep* rep)
|
568 mike 1.111.6.1 {
569 // Return a new copy of rep. Release rep.
570
|
571 mike 1.111.6.6 StringRep* newRep = StringRep::alloc(rep->size);
572 newRep->size = rep->size;
573 _copy(newRep->data, rep->data, rep->size);
574 newRep->data[newRep->size] = '\0';
|
575 mike 1.111.6.1 StringRep::unref(rep);
|
576 mike 1.111.6.6 return newRep;
|
577 mike 1.111.6.1 }
|
578 kumpf 1.43
|
579 mike 1.111.6.1 StringRep* StringRep::create(const char* data, size_t size)
|
580 mike 1.27 {
|
581 mike 1.111.6.1 StringRep* rep = StringRep::alloc(size);
582 rep->size = _convert((Uint16*)rep->data, data, size);
583 rep->data[rep->size] = '\0';
584
585 return rep;
|
586 mike 1.27 }
587
|
588 mike 1.111.6.1 StringRep* StringRep::createASCII7(const char* data, size_t size)
|
589 mike 1.27 {
|
590 mike 1.111.6.1 StringRep* rep = StringRep::alloc(size);
591 _copy((Uint16*)rep->data, data, size);
592 rep->data[rep->size = size] = '\0';
593 return rep;
|
594 kumpf 1.39 }
|
595 tony 1.66
|
596 mike 1.111.6.1 Uint32 StringRep::length(const Uint16* str)
597 {
598 // Note: We could unroll this but it is rarely called.
599
600 const Uint16* end = (Uint16*)str;
601
602 while (*end++)
|
603 mike 1.111.6.8 ;
|
604 mike 1.111.6.1
605 return end - str - 1;
606 }
607
608 //==============================================================================
609 //
610 // class String
611 //
612 //==============================================================================
613
614 const String String::EMPTY;
|
615 mike 1.27
|
616 kumpf 1.39 String::String(const String& str, Uint32 n)
617 {
|
618 mike 1.111.6.6 _checkBounds(n, str._rep->size);
|
619 mike 1.111.6.1 _rep = StringRep::create(str._rep->data, n);
|
620 kumpf 1.39 }
621
622 String::String(const Char16* str)
623 {
|
624 mike 1.111.6.6 _checkNullPointer(str);
|
625 mike 1.111.6.1 _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
|
626 mike 1.27 }
627
|
628 kumpf 1.39 String::String(const Char16* str, Uint32 n)
629 {
|
630 mike 1.111.6.6 _checkNullPointer(str);
|
631 mike 1.111.6.1 _rep = StringRep::create((Uint16*)str, n);
|
632 kumpf 1.39 }
633
634 String::String(const char* str)
|
635 mike 1.27 {
|
636 mike 1.111.6.6 _checkNullPointer(str);
|
637 mike 1.111.6.1 _rep = StringRep::create(str, strlen(str));
638 }
|
639 david.dillard 1.105
|
640 mike 1.111.6.1 String::String(const char* str, String::ASCII7Tag tag)
641 {
|
642 mike 1.111.6.6 _checkNullPointer(str);
|
643 mike 1.111.6.1 _rep = StringRep::createASCII7(str, strlen(str));
|
644 mike 1.27 }
645
|
646 kumpf 1.39 String::String(const char* str, Uint32 n)
|
647 mike 1.27 {
|
648 mike 1.111.6.6 _checkNullPointer(str);
|
649 mike 1.111.6.1 _rep = StringRep::create(str, n);
650 }
|
651 david.dillard 1.105
|
652 mike 1.111.6.1 String::String(const char* str, size_t n, String::ASCII7Tag tag)
653 {
|
654 mike 1.111.6.6 _checkNullPointer(str);
|
655 mike 1.111.6.1 _rep = StringRep::createASCII7(str, n);
|
656 kumpf 1.39 }
|
657 mike 1.27
|
658 mike 1.111.6.1 String::String(const String& s1, const String& s2)
|
659 kumpf 1.39 {
|
660 mike 1.111.6.1 size_t n1 = s1._rep->size;
661 size_t n2 = s2._rep->size;
662 size_t n = n1 + n2;
663 _rep = StringRep::alloc(n);
664 _copy(_rep->data, s1._rep->data, n1);
665 _copy(_rep->data + n1, s2._rep->data, n2);
666 _rep->size = n;
667 _rep->data[n] = '\0';
|
668 mike 1.27 }
669
|
670 mike 1.111.6.1 String::String(const String& s1, const char* s2)
|
671 mike 1.27 {
|
672 mike 1.111.6.6 _checkNullPointer(s2);
|
673 mike 1.111.6.1 size_t n1 = s1._rep->size;
674 size_t n2 = strlen(s2);
675 _rep = StringRep::alloc(n1 + n2);
676 _copy(_rep->data, s1._rep->data, n1);
677 _rep->size = n1 + _convert((Uint16*)_rep->data + n1, s2, n2);
678 _rep->data[_rep->size] = '\0';
|
679 mike 1.27 }
680
|
681 mike 1.111.6.1 String::String(const char* s1, const String& s2)
|
682 mike 1.27 {
|
683 mike 1.111.6.6 _checkNullPointer(s1);
|
684 mike 1.111.6.1 size_t n1 = strlen(s1);
685 size_t n2 = s2._rep->size;
686 _rep = StringRep::alloc(n1 + n2);
687 _rep->size = n2 + _convert((Uint16*)_rep->data, s1, n1);
688 _copy(_rep->data + n1, s2._rep->data, n2);
689 _rep->data[_rep->size] = '\0';
|
690 mike 1.27 }
691
|
692 mike 1.111.6.1 String& String::assign(const String& str)
|
693 mike 1.27 {
|
694 mike 1.111.6.1 if (_rep != str._rep)
|
695 david.dillard 1.105 {
|
696 mike 1.111.6.8 StringRep::unref(_rep);
697 StringRep::ref(_rep = str._rep);
|
698 david.dillard 1.105 }
699
|
700 mike 1.27 return *this;
701 }
702
703 String& String::assign(const Char16* str, Uint32 n)
704 {
|
705 mike 1.111.6.6 _checkNullPointer(str);
|
706 mike 1.111.6.1
707 if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
|
708 david.dillard 1.105 {
|
709 mike 1.111.6.8 StringRep::unref(_rep);
710 _rep = StringRep::alloc(n);
|
711 david.dillard 1.105 }
712
|
713 mike 1.111.6.1 _rep->size = n;
714 _copy(_rep->data, (Uint16*)str, n);
715 _rep->data[n] = '\0';
716
|
717 mike 1.27 return *this;
718 }
719
|
720 mike 1.111.6.1 String& String::assign(const char* str, Uint32 n)
|
721 chuck 1.102 {
|
722 mike 1.111.6.6 _checkNullPointer(str);
|
723 mike 1.111.6.1
724 if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
|
725 david.dillard 1.105 {
|
726 mike 1.111.6.8 StringRep::unref(_rep);
727 _rep = StringRep::alloc(n);
|
728 david.dillard 1.105 }
729
|
730 mike 1.111.6.1 _rep->size = _convert(_rep->data, str, n);
731 _rep->data[_rep->size] = 0;
732
|
733 chuck 1.102 return *this;
734 }
735
|
736 mike 1.111.6.1 String& String::assignASCII7(const char* str, Uint32 n)
|
737 mike 1.27 {
|
738 mike 1.111.6.6 _checkNullPointer(str);
|
739 mike 1.111.6.1
740 if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
|
741 david.dillard 1.105 {
|
742 mike 1.111.6.8 StringRep::unref(_rep);
743 _rep = StringRep::alloc(n);
|
744 david.dillard 1.105 }
745
|
746 mike 1.111.6.1 _copy(_rep->data, str, n);
747 _rep->data[_rep->size = n] = 0;
748
|
749 mike 1.27 return *this;
750 }
751
|
752 kumpf 1.39 void String::clear()
753 {
|
754 mike 1.111.6.1 if (_rep->size)
755 {
|
756 mike 1.111.6.8 if (Atomic_get(&_rep->refs) == 1)
757 {
758 _rep->size = 0;
759 _rep->data[0] = '\0';
760 }
761 else
762 {
763 StringRep::unref(_rep);
764 _rep = &StringRep::_emptyRep;
765 }
|
766 mike 1.111.6.1 }
|
767 kumpf 1.39 }
768
|
769 mike 1.111.6.1 void String::reserveCapacity(Uint32 cap)
|
770 kumpf 1.39 {
|
771 mike 1.111.6.1 _reserve(_rep, cap);
|
772 kumpf 1.39 }
773
|
774 mike 1.111.6.1 CString String::getCString() const
775 {
776 #ifdef PEGASUS_STRING_NO_UTF8
777 char* str = (char*)operator new(_rep->size + 1);
778 _copy(str, _rep->data, _rep->size);
779 str[_rep->size] = '\0';
780 return CString(str);
|
781 gs.keenan 1.110 #else
|
782 mike 1.111.6.1 Uint32 n = 3 * _rep->size;
783 char* str = (char*)operator new(n + 1);
|
784 mike 1.111.6.6 size_t size = _copyToUTF8(str, _rep->data, _rep->size);
|
785 mike 1.111.6.1 str[size] = '\0';
786 return CString(str);
|
787 gs.keenan 1.110 #endif
|
788 kumpf 1.39 }
789
|
790 mike 1.111.6.1 String& String::append(const Char16* str, Uint32 n)
|
791 kumpf 1.39 {
|
792 mike 1.111.6.6 _checkNullPointer(str);
|
793 kumpf 1.39
|
794 mike 1.111.6.6 size_t oldSize = _rep->size;
795 size_t newSize = oldSize + n;
796 _reserve(_rep, newSize);
797 _copy(_rep->data + oldSize, (Uint16*)str, n);
798 _rep->size = newSize;
799 _rep->data[newSize] = '\0';
|
800 mike 1.27
|
801 mike 1.111.6.1 return *this;
|
802 mike 1.27 }
803
|
804 mike 1.111.6.1 String& String::append(const String& str)
|
805 mike 1.27 {
|
806 mike 1.111.6.1 return append((Char16*)str._rep->data, str._rep->size);
|
807 mike 1.27 }
808
|
809 mike 1.111.6.1 String& String::append(const char* str, Uint32 size)
|
810 kumpf 1.39 {
|
811 mike 1.111.6.6 _checkNullPointer(str);
|
812 mike 1.111.6.1
|
813 mike 1.111.6.6 size_t oldSize = _rep->size;
814 size_t cap = oldSize + size;
|
815 mike 1.111.6.1
816 _reserve(_rep, cap);
|
817 mike 1.111.6.6 _rep->size += _convert((Uint16*)_rep->data + oldSize, str, size);
|
818 mike 1.111.6.1 _rep->data[_rep->size] = '\0';
|
819 kumpf 1.39
|
820 mike 1.27 return *this;
821 }
822
|
823 mike 1.111.6.1 void String::remove(Uint32 index, Uint32 n)
|
824 mike 1.27 {
|
825 mike 1.111.6.1 if (n == PEG_NOT_FOUND)
826 n = _rep->size - index;
|
827 mike 1.27
|
828 mike 1.111.6.6 _checkBounds(index + n, _rep->size);
|
829 mike 1.27
|
830 mike 1.111.6.1 if (Atomic_get(&_rep->refs) != 1)
|
831 mike 1.111.6.8 _rep = StringRep::copyOnWrite(_rep);
|
832 mike 1.27
|
833 mike 1.111.6.1 assert(index + n <= _rep->size);
834
835 size_t rem = _rep->size - (index + n);
836 Uint16* data = _rep->data;
837
838 if (rem)
839 memmove(data + index, data + index + n, rem * sizeof(Uint16));
840
841 _rep->size -= n;
842 data[_rep->size] = '\0';
|
843 mike 1.27 }
844
|
845 mike 1.111.6.1 String String::subString(Uint32 index, Uint32 n) const
|
846 mike 1.27 {
|
847 mike 1.111.6.1 // Note: this implementation is very permissive but used for
848 // backwards compatibility.
849
850 if (index < _rep->size)
|
851 mike 1.27 {
|
852 mike 1.111.6.8 if (n == PEG_NOT_FOUND || n > _rep->size - index)
853 n = _rep->size - index;
|
854 mike 1.27
|
855 mike 1.111.6.8 return String((Char16*)_rep->data + index, n);
|
856 mike 1.27 }
|
857 david.dillard 1.105
858 return String();
|
859 mike 1.27 }
860
861 Uint32 String::find(Char16 c) const
862 {
|
863 mike 1.111.6.1 Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
|
864 mike 1.27
|
865 mike 1.111.6.1 if (p)
|
866 mike 1.111.6.8 return p - _rep->data;
|
867 mike 1.27
868 return PEG_NOT_FOUND;
869 }
870
|
871 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
|
872 mike 1.30 {
|
873 mike 1.111.6.6 _checkBounds(index, _rep->size);
|
874 mike 1.30
|
875 mike 1.111.6.1 if (index >= _rep->size)
|
876 mike 1.111.6.8 return PEG_NOT_FOUND;
|
877 mike 1.111.6.1
878 Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
879
880 if (p)
|
881 mike 1.111.6.8 return p - _rep->data;
|
882 mike 1.30
883 return PEG_NOT_FOUND;
884 }
885
|
886 mike 1.111.6.6 Uint32 StringFindAux(
|
887 mike 1.111.6.5 const StringRep* _rep, const Char16* s, Uint32 n)
|
888 mike 1.27 {
|
889 mike 1.111.6.6 _checkNullPointer(s);
|
890 mike 1.27
|
891 mike 1.111.6.1 const Uint16* data = _rep->data;
892 size_t rem = _rep->size;
|
893 mike 1.30
|
894 mike 1.111.6.1 while (n <= rem)
|
895 mike 1.27 {
|
896 mike 1.111.6.8 Uint16* p = (Uint16*)_find(data, rem, s[0]);
|
897 david.dillard 1.105
|
898 mike 1.111.6.8 if (!p)
899 break;
|
900 mike 1.111.6.1
|
901 mike 1.111.6.8 if (memcmp(p, s, n * sizeof(Uint16)) == 0)
902 return p - _rep->data;
|
903 mike 1.111.6.1
|
904 mike 1.111.6.8 p++;
905 rem -= p - data;
906 data = p;
|
907 mike 1.27 }
|
908 mike 1.111.6.1
|
909 mike 1.27 return PEG_NOT_FOUND;
910 }
911
|
912 mike 1.111.6.1 Uint32 String::find(const char* s) const
913 {
|
914 mike 1.111.6.6 _checkNullPointer(s);
|
915 mike 1.111.6.1
916 // Note: could optimize away creation of temporary, but this is rarely
917 // called.
918 return find(String(s));
919 }
920
|
921 mike 1.27 Uint32 String::reverseFind(Char16 c) const
922 {
|
923 mike 1.111.6.1 Uint16 x = c;
924 Uint16* p = _rep->data;
925 Uint16* q = _rep->data + _rep->size;
|
926 mike 1.27
|
927 mike 1.111.6.1 while (q != p)
|
928 mike 1.27 {
|
929 mike 1.111.6.8 if (*--q == x)
930 return q - p;
|
931 mike 1.27 }
932
933 return PEG_NOT_FOUND;
934 }
935
936 void String::toLower()
937 {
|
938 david 1.69 #ifdef PEGASUS_HAS_ICU
|
939 mike 1.111.6.1
|
940 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
941 david 1.90 {
|
942 mike 1.111.6.1 if (Atomic_get(&_rep->refs) != 1)
|
943 mike 1.111.6.8 _rep = StringRep::copyOnWrite(_rep);
|
944 mike 1.111.6.1
|
945 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
946 mike 1.111.6.1 // Since context-sensitive casing looks at adjacent chars, this
947 // prevents optimizations where the us-ascii is converted before
948 // calling ICU.
|
949 yi.zhou 1.108 // The string may shrink or expand after the convert.
950
|
951 mike 1.111.6.8 //// First calculate size of resulting string. u_strToLower() returns
952 //// only the size when zero is passed as the destination size argument.
|
953 mike 1.111.6.1
|
954 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
955
|
956 mike 1.111.6.6 int32_t newSize = u_strToLower(
|
957 mike 1.111.6.8 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
|
958 mike 1.111.6.1
959 err = U_ZERO_ERROR;
|
960 chuck 1.99
|
961 mike 1.111.6.8 //// Reserve enough space for the result.
|
962 mike 1.111.6.1
|
963 mike 1.111.6.8 if ((Uint32)newSize > _rep->cap)
964 _reserve(_rep, newSize);
|
965 mike 1.111.6.1
|
966 mike 1.111.6.8 //// Perform the conversion (overlapping buffers are allowed).
|
967 yi.zhou 1.108
|
968 mike 1.111.6.6 u_strToLower((UChar*)_rep->data, newSize,
|
969 mike 1.111.6.8 (UChar*)_rep->data, _rep->size, NULL, &err);
|
970 mike 1.111.6.1
|
971 mike 1.111.6.8 _rep->size = newSize;
972 return;
|
973 david 1.90 }
|
974 mike 1.111.6.1
975 #endif /* PEGASUS_HAS_ICU */
976
977 if (Atomic_get(&_rep->refs) != 1)
|
978 mike 1.111.6.8 _rep = StringRep::copyOnWrite(_rep);
|
979 mike 1.111.6.1
980 Uint16* p = _rep->data;
981 size_t n = _rep->size;
982
983 for (; n--; p++)
|
984 david 1.90 {
|
985 mike 1.111.6.8 if (!(*p & 0xFF00))
986 *p = _toLower(*p);
|
987 mike 1.27 }
|
988 kumpf 1.39 }
989
|
990 chuck 1.99 void String::toUpper()
|
991 david 1.90 {
992 #ifdef PEGASUS_HAS_ICU
|
993 mike 1.111.6.1
|
994 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
995 chuck 1.99 {
|
996 mike 1.111.6.1 if (Atomic_get(&_rep->refs) != 1)
|
997 mike 1.111.6.8 _rep = StringRep::copyOnWrite(_rep);
|
998 mike 1.111.6.1
|
999 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
|
1000 mike 1.111.6.1 // Since context-sensitive casing looks at adjacent chars, this
1001 // prevents optimizations where the us-ascii is converted before
1002 // calling ICU.
|
1003 yi.zhou 1.108 // The string may shrink or expand after the convert.
1004
|
1005 mike 1.111.6.8 //// First calculate size of resulting string. u_strToUpper() returns
1006 //// only the size when zero is passed as the destination size argument.
|
1007 mike 1.111.6.1
|
1008 yi.zhou 1.108 UErrorCode err = U_ZERO_ERROR;
1009
|
1010 mike 1.111.6.6 int32_t newSize = u_strToUpper(
|
1011 mike 1.111.6.8 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
|
1012 chuck 1.99
|
1013 mike 1.111.6.1 err = U_ZERO_ERROR;
|
1014 yi.zhou 1.108
|
1015 mike 1.111.6.8 //// Reserve enough space for the result.
|
1016 david 1.90
|
1017 mike 1.111.6.8 if ((Uint32)newSize > _rep->cap)
1018 _reserve(_rep, newSize);
|
1019 kumpf 1.39
|
1020 mike 1.111.6.8 //// Perform the conversion (overlapping buffers are allowed).
|
1021 mike 1.27
|
1022 mike 1.111.6.6 u_strToUpper((UChar*)_rep->data, newSize,
|
1023 mike 1.111.6.8 (UChar*)_rep->data, _rep->size, NULL, &err);
|
1024 mike 1.111.6.1
|
1025 mike 1.111.6.8 _rep->size = newSize;
|
1026 mike 1.111.6.1
|
1027 mike 1.111.6.8 return;
|
1028 mike 1.27 }
1029
|
1030 mike 1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1031
1032 if (Atomic_get(&_rep->refs) != 1)
|
1033 mike 1.111.6.8 _rep = StringRep::copyOnWrite(_rep);
|
1034 mike 1.111.6.1
1035 Uint16* p = _rep->data;
1036 size_t n = _rep->size;
1037
1038 for (; n--; p++)
|
1039 mike 1.111.6.8 *p = _toUpper(*p);
|
1040 mike 1.27 }
1041
|
1042 mike 1.111.6.1 int String::compare(const String& s1, const String& s2, Uint32 n)
|
1043 mike 1.30 {
|
1044 mike 1.111.6.1 assert(n <= s1._rep->size);
1045 assert(n <= s2._rep->size);
|
1046 kumpf 1.43
|
1047 mike 1.111.6.1 // Ignoring error in which n is greater than s1.size() or s2.size()
1048 return _compare(s1._rep->data, s2._rep->data, n);
1049 }
|
1050 mike 1.30
|
1051 mike 1.111.6.1 int String::compare(const String& s1, const String& s2)
1052 {
1053 return _compare(s1._rep->data, s2._rep->data);
1054 }
|
1055 mike 1.30
|
1056 mike 1.111.6.1 int String::compare(const String& s1, const char* s2)
1057 {
|
1058 mike 1.111.6.6 _checkNullPointer(s2);
|
1059 mike 1.30
|
1060 mike 1.111.6.1 #ifdef PEGASUS_STRING_NO_UTF8
|
1061 mike 1.111.6.6 return _compareNoUTF8(s1._rep->data, s2);
|
1062 mike 1.111.6.1 #else
1063 // ATTN: optimize this!
1064 return String::compare(s1, String(s2));
1065 #endif
|
1066 mike 1.30 }
1067
|
1068 mike 1.111.6.1 int String::compareNoCase(const String& str1, const String& str2)
|
1069 kumpf 1.40 {
|
1070 david 1.69 #ifdef PEGASUS_HAS_ICU
|
1071 mike 1.111.6.1
|
1072 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1073 {
|
1074 mike 1.111.6.1 return u_strcasecmp(
|
1075 mike 1.111.6.8 str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
|
1076 yi.zhou 1.108 }
|
1077 kumpf 1.40
|
1078 mike 1.111.6.1 #endif /* PEGASUS_HAS_ICU */
|
1079 kumpf 1.40
|
1080 mike 1.111.6.1 const Uint16* s1 = str1._rep->data;
1081 const Uint16* s2 = str2._rep->data;
1082
1083 while (*s1 && *s2)
1084 {
|
1085 mike 1.111.6.6 int r = _toLower(*s1++) - _toLower(*s2++);
|
1086 kumpf 1.40
|
1087 david.dillard 1.105 if (r)
1088 return r;
|
1089 kumpf 1.40 }
1090
|
1091 mike 1.111.6.1 if (*s2)
|
1092 david.dillard 1.105 return -1;
|
1093 mike 1.111.6.1 else if (*s1)
|
1094 david.dillard 1.105 return 1;
|
1095 kumpf 1.40
1096 return 0;
1097 }
1098
|
1099 mike 1.111.6.6 Boolean StringEqualNoCase(const String& s1, const String& s2)
|
1100 mike 1.27 {
|
1101 david 1.69 #ifdef PEGASUS_HAS_ICU
|
1102 kumpf 1.39
|
1103 mike 1.111.6.1 return String::compareNoCase(s1, s2) == 0;
|
1104 kumpf 1.39
|
1105 mike 1.111.6.1 #else /* PEGASUS_HAS_ICU */
1106
|
1107 mike 1.111.6.5 Uint16* p = (Uint16*)s1.getChar16Data();
1108 Uint16* q = (Uint16*)s2.getChar16Data();
1109 Uint32 n = s2.size();
|
1110 mike 1.111.6.1
1111 while (n >= 8)
1112 {
|
1113 mike 1.111.6.8 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1114 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1115 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1116 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1117 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1118 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1119 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1120 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1121 {
1122 return false;
1123 }
1124
1125 n -= 8;
1126 p += 8;
1127 q += 8;
|
1128 mike 1.111.6.1 }
1129
1130 while (n >= 4)
1131 {
|
1132 mike 1.111.6.8 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1133 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1134 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1135 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1136 {
1137 return false;
1138 }
1139
1140 n -= 4;
1141 p += 4;
1142 q += 4;
|
1143 mike 1.111.6.1 }
|
1144 mike 1.27
|
1145 kumpf 1.39 while (n--)
1146 {
|
1147 mike 1.111.6.8 if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1148 return false;
|
1149 mike 1.111.6.1
|
1150 mike 1.111.6.8 p++;
1151 q++;
|
1152 kumpf 1.39 }
|
1153 mike 1.28
|
1154 kumpf 1.39 return true;
|
1155 david 1.69
|
1156 mike 1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1157 }
|
1158 mike 1.27
|
1159 mike 1.111.6.1 Boolean String::equalNoCase(const String& s1, const char* s2)
|
1160 david 1.69 {
|
1161 mike 1.111.6.6 _checkNullPointer(s2);
|
1162 david 1.69
|
1163 mike 1.111.6.1 #if defined(PEGASUS_HAS_ICU)
|
1164 david 1.69
|
1165 mike 1.111.6.1 return String::equalNoCase(s1, String(s2));
|
1166 david.dillard 1.105
|
1167 mike 1.111.6.1 #elif defined(PEGASUS_STRING_NO_UTF8)
|
1168 david 1.71
|
1169 mike 1.111.6.1 const Uint16* p1 = (Uint16*)s1._rep->data;
1170 const char* p2 = s2;
1171 size_t n = s1._rep->size;
|
1172 kumpf 1.42
|
1173 mike 1.111.6.1 while (n--)
1174 {
|
1175 mike 1.111.6.8 if (!*p2)
1176 return false;
|
1177 karl 1.36
|
1178 mike 1.111.6.8 if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1179 return false;
|
1180 mike 1.111.6.1 }
|
1181 david.dillard 1.105
|
1182 mike 1.111.6.1 if (*p2)
|
1183 mike 1.111.6.8 return false;
|
1184 mike 1.111.6.1
1185 return true;
|
1186 david.dillard 1.105
|
1187 mike 1.111.6.1 #else /* PEGASUS_HAS_ICU */
|
1188 chuck 1.78
|
1189 mike 1.111.6.1 // ATTN: optimize this!
1190 return String::equalNoCase(s1, String(s2));
|
1191 david.dillard 1.105
|
1192 mike 1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1193 }
|
1194 karl 1.36
|
1195 mike 1.111.6.1 Boolean String::equal(const String& s1, const String& s2)
|
1196 karl 1.36 {
|
1197 mike 1.111.6.1 return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,
|
1198 mike 1.111.6.8 s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
|
1199 karl 1.36 }
1200
|
1201 mike 1.111.6.1 Boolean String::equal(const String& s1, const char* s2)
|
1202 karl 1.36 {
|
1203 mike 1.111.6.1 #ifdef PEGASUS_STRING_NO_UTF8
|
1204 kumpf 1.39
|
1205 mike 1.111.6.6 _checkNullPointer(s2);
|
1206 kumpf 1.39
|
1207 mike 1.111.6.1 const Uint16* p = (Uint16*)s1._rep->data;
1208 const char* q = s2;
|
1209 kumpf 1.39
|
1210 mike 1.111.6.1 while (*p && *q)
1211 {
|
1212 mike 1.111.6.8 if (*p++ != Uint16(*q++))
1213 return false;
|
1214 mike 1.111.6.1 }
|
1215 kumpf 1.39
|
1216 mike 1.111.6.1 return !(*p || *q);
|
1217 kumpf 1.39
|
1218 mike 1.111.6.1 #else /* PEGASUS_STRING_NO_UTF8 */
|
1219 kumpf 1.39
|
1220 mike 1.111.6.1 return String::equal(s1, String(s2));
1221
1222 #endif /* PEGASUS_STRING_NO_UTF8 */
|
1223 kumpf 1.39 }
1224
|
1225 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
|
1226 kumpf 1.39 {
|
1227 david 1.69 #if defined(PEGASUS_OS_OS400)
|
1228 mike 1.111.6.1
|
1229 david 1.93 CString cstr = str.getCString();
|
1230 david 1.69 const char* utf8str = cstr;
1231 os << utf8str;
|
1232 mike 1.111.6.1 return os;
1233 #else
1234
1235 #if defined(PEGASUS_HAS_ICU)
|
1236 david 1.69
|
1237 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
1238 {
|
1239 david.dillard 1.105 char *buf = NULL;
1240 const int size = str.size() * 6;
|
1241 mike 1.111.6.1 UnicodeString UniStr(
|
1242 mike 1.111.6.8 (const UChar *)str.getChar16Data(), (int32_t)str.size());
|
1243 david.dillard 1.105 Uint32 bufsize = UniStr.extract(0,size,buf);
1244 buf = new char[bufsize+1];
1245 UniStr.extract(0,bufsize,buf);
1246 os << buf;
1247 os.flush();
1248 delete [] buf;
|
1249 mike 1.111.6.1 return os;
|
1250 yi.zhou 1.108 }
|
1251 mike 1.111.6.1
1252 #endif // PEGASUS_HAS_ICU
1253
1254 for (Uint32 i = 0, n = str.size(); i < n; i++)
|
1255 yi.zhou 1.108 {
|
1256 mike 1.111.6.1 Uint16 code = str[i];
|
1257 david.dillard 1.105
|
1258 mike 1.111.6.8 if (code > 0 && !(code & 0xFF00))
1259 os << char(code);
|
1260 mike 1.111.6.1 else
|
1261 mike 1.111.6.8 {
|
1262 mike 1.111.6.1 // Print in hex format:
1263 char buffer[8];
1264 sprintf(buffer, "\\x%04X", code);
1265 os << buffer;
|
1266 david.dillard 1.105 }
|
1267 yi.zhou 1.108 }
|
1268 kumpf 1.39
1269 return os;
|
1270 mike 1.111.6.1 #endif // PEGASUS_OS_OS400
|
1271 kumpf 1.39 }
1272
|
1273 mike 1.111.6.6 void StringAppendCharAux(StringRep*& _rep)
|
1274 kumpf 1.39 {
|
1275 mike 1.111.6.1 StringRep* tmp;
|
1276 kumpf 1.39
|
1277 mike 1.111.6.1 if (_rep->cap)
1278 {
|
1279 mike 1.111.6.8 tmp = StringRep::alloc(2 * _rep->cap);
1280 tmp->size = _rep->size;
1281 _copy(tmp->data, _rep->data, _rep->size);
|
1282 mike 1.111.6.1 }
1283 else
1284 {
|
1285 mike 1.111.6.8 tmp = StringRep::alloc(8);
1286 tmp->size = 0;
|
1287 mike 1.111.6.1 }
|
1288 kumpf 1.39
|
1289 mike 1.111.6.1 StringRep::unref(_rep);
1290 _rep = tmp;
|
1291 kumpf 1.39 }
1292
|
1293 mike 1.111.6.1 PEGASUS_NAMESPACE_END
|
1294 kumpf 1.39
|
1295 mike 1.111.6.1 /*
1296 ================================================================================
|
1297 kumpf 1.39
|
1298 mike 1.111.6.1 String optimizations:
1299
1300 1. Added mechanism allowing certain functions to be inlined only when
|
1301 mike 1.111.6.8 used by internal Pegasus modules. External modules (i.e., providers)
1302 link to a non-inline version, which allows for binary compatibility.
|
1303 mike 1.111.6.1
1304 2. Implemented copy-on-write with atomic increment/decrement. This
|
1305 mike 1.111.6.8 yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1306 for the 'ni1000' benchmark.
|
1307 mike 1.111.6.1
|
1308 mike 1.111.6.8 3. Employed loop unrolling in several places. For example, see:
|
1309 mike 1.111.6.1
|
1310 mike 1.111.6.8 static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
|
1311 mike 1.111.6.1
1312 4. Used the "empty-rep" optimization (described in whitepaper from the
|
1313 mike 1.111.6.8 GCC Developers Summit). This reduced default construction to a simple
1314 pointer assignment.
|
1315 mike 1.111.6.1
|
1316 mike 1.111.6.8 inline String::String() : _rep(&_emptyRep) { }
|
1317 mike 1.111.6.1
1318 5. Implemented Uint16 versions of toupper() and tolower() using tables.
|
1319 mike 1.111.6.8 For example:
|
1320 mike 1.111.6.1
|
1321 mike 1.111.6.8 static const char _upper[] =
1322 {
1323 0,1,2,...255
1324 };
1325
1326 inline Uint16 _toUpper(Uint16 x)
1327 {
1328 return (x & 0xFF00) ? x : _upper[x];
1329 }
|
1330 mike 1.111.6.1
|
1331 mike 1.111.6.8 This outperforms the system implementation by avoiding an anding
1332 operation.
|
1333 mike 1.111.6.1
1334 6. Implemented char* version of the following member functions to
|
1335 mike 1.111.6.8 eliminate unecessary creation of anonymous string objects
1336 (temporaries).
|
1337 mike 1.111.6.1
|
1338 mike 1.111.6.8 String(const String& s1, const char* s2);
1339 String(const char* s1, const String& s2);
1340 String& String::operator=(const char* str);
1341 Uint32 String::find(const char* s) const;
1342 bool String::equal(const String& s1, const char* s2);
1343 static int String::compare(const String& s1, const char* s2);
1344 String& String::append(const char* str);
1345 String& String::append(const char* str, Uint32 size);
1346 static bool String::equalNoCase(const String& s1, const char* s2);
1347 String& operator=(const char* str)
1348 String& String::assign(const char* str)
1349 String& String::append(const char* str)
1350 Boolean operator==(const String& s1, const char* s2)
1351 Boolean operator==(const char* s1, const String& s2)
1352 Boolean operator!=(const String& s1, const char* s2)
1353 Boolean operator!=(const char* s1, const String& s2)
1354 Boolean operator<(const String& s1, const char* s2)
1355 Boolean operator<(const char* s1, const String& s2)
1356 Boolean operator>(const String& s1, const char* s2)
1357 Boolean operator>(const char* s1, const String& s2)
1358 Boolean operator<=(const String& s1, const char* s2)
1359 mike 1.111.6.8 Boolean operator<=(const char* s1, const String& s2)
1360 Boolean operator>=(const String& s1, const char* s2)
1361 Boolean operator>=(const char* s1, const String& s2)
1362 String operator+(const String& s1, const char* s2)
1363 String operator+(const char* s1, const String& s2)
|
1364 mike 1.111.6.1
|
1365 mike 1.111.6.5 7. Optimized _roundUpToPow2(), used in rounding the capacity to the next
|
1366 mike 1.111.6.1 power of two (algorithm from the book "Hacker's Delight").
1367
|
1368 mike 1.111.6.8 static Uint32 _roundUpToPow2(Uint32 x)
1369 {
1370 if (x < 8)
1371 return 8;
1372
1373 x--;
1374 x |= (x >> 1);
1375 x |= (x >> 2);
1376 x |= (x >> 4);
1377 x |= (x >> 8);
1378 x |= (x >> 16);
1379 x++;
|
1380 mike 1.111.6.1
|
1381 mike 1.111.6.8 return x;
1382 }
|
1383 mike 1.111.6.1
1384 8. Implemented "concatenating constructors" to eliminate temporaries
|
1385 mike 1.111.6.8 created by operator+(). This scheme employs the "return-value
1386 optimization" described by Stan Lippman.
|
1387 mike 1.111.6.1
|
1388 mike 1.111.6.8 inline String operator+(const String& s1, const String& s2)
1389 {
1390 return String(s1, s2, 0);
1391 }
|
1392 mike 1.111.6.1
1393 9. Experimented to find the optimial initial size for a short string.
|
1394 mike 1.111.6.8 Eight seems to offer the best tradeoff between space and time.
|
1395 mike 1.111.6.1
1396 10. Inlined all members of the Char16 class.
1397
1398 11. Used Uint16 internally in the String class. This showed no improvememnt
|
1399 mike 1.111.6.8 since Char16 was already fully inlined and was essentially reduced to
1400 Uint16 in any case.
|
1401 mike 1.111.6.1
1402 12. Implemented conditional logic (#if) allowing error checking logic to
|
1403 mike 1.111.6.8 be excluded to better performance. Examples include bounds checking
1404 and null-pointer checking.
|
1405 mike 1.111.6.1
1406 13. Used memcpy() and memcmp() where possible. These are implemented using
|
1407 mike 1.111.6.8 the rep family of intructions under Intel and are much faster.
|
1408 mike 1.111.6.1
1409 14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
|
1410 mike 1.111.6.8 copy routine overhead.
|
1411 mike 1.111.6.1
1412 15. Added ASCII7 form of the constructor and assign().
1413
|
1414 mike 1.111.6.8 String s("hello world", String::ASCII7);
|
1415 mike 1.111.6.1
|
1416 mike 1.111.6.8 s.assignASCII7("hello world");
|
1417 mike 1.111.6.1
|
1418 mike 1.111.6.8 This avoids slower UTF8 processing when not needed.
|
1419 mike 1.111.6.1
|
1420 mike 1.111.6.5 ================================================================================
1421
1422 TO-DO:
1423
|
1424 mike 1.111.6.8 (+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES
|
1425 mike 1.111.6.5
|
1426 mike 1.111.6.8 (+) [DONE] Submit BUG-2754 (Windows buffer limit).
|
1427 mike 1.111.6.5
1428 (+) [DONE] Eliminate char versions of find() and append().
1429
1430 (+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h
1431
1432 (+) [DONE] Change _next_pow_2() to _roundUpToPow2().
1433
1434 (+) [DONE] Change '99' to '2' in StringRep constructor (comment as well).
1435
1436 (+) [DONE] Comment StringRep allocation layout.
1437
1438 (+) [DONE] Conceal private inline functions.
|
1439 mike 1.111.6.1
|
1440 mike 1.111.6.5 (+) [DONE] Shorten inclusion of StringInline.h in String.h.
|
1441 mike 1.111.6.1
|
1442 mike 1.111.6.5 (+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get
|
1443 mike 1.111.6.8 rid of altogether.
|
1444 mike 1.111.6.1
|
1445 mike 1.111.6.6 (+) [DONE] useCamelNotationOnAllFunctionNames.
1446
|
1447 mike 1.111.6.7 (+) [DONE] Check for overlow condition in StringRep::alloc().
|
1448 mike 1.111.6.1
|
1449 mike 1.111.6.9 (+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab").
1450
|
1451 mike 1.111.6.7 -----------
|
1452 mike 1.111.6.1
|
1453 mike 1.111.6.5 (+) Fix throw-related memory leak.
|
1454 mike 1.111.6.1
|
1455 mike 1.111.6.8 (+) DOC++ String.h
1456
|
1457 mike 1.111.6.5 (+) Look at PEP223 for coding security guidelines.
|
1458 mike 1.111.6.1
|
1459 mike 1.111.6.6 (+) Replace AtomicInt with new Atomic implementation.
|
1460 mike 1.111.6.1
|
1461 mike 1.111.6.5 (+) Implement Atomic operations for HP.
|
1462 mike 1.111.6.3
|
1463 mike 1.111.6.1 ================================================================================
1464 */
|