1 karl 1.98 //%2005////////////////////////////////////////////////////////////////////////
|
2 mike 1.27 //
|
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 mike 1.27 //
12 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
14 // deal in the Software without restriction, including without limitation the
15 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
16 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is
17 // furnished to do so, subject to the following conditions:
|
18 david.dillard 1.105 //
|
19 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
20 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
21 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
22 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
23 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
25 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28 //==============================================================================
29 //
30 // Author: Mike Brasher (mbrasher@bmc.com)
31 //
|
32 kumpf 1.39 // Modified By: Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
|
33 joyce.j 1.101 // Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
|
34 mike 1.27 //
35 //%/////////////////////////////////////////////////////////////////////////////
36
37
38 #include <cctype>
|
39 kumpf 1.64 #include <cstring>
|
40 mike 1.27 #include "String.h"
|
41 kumpf 1.43 #include "Array.h"
|
42 chuck 1.103 #include "AutoPtr.h"
|
43 kumpf 1.48 #include "InternalException.h"
|
44 mike 1.27 #include <iostream>
|
45 kumpf 1.63 #include <fstream>
|
46 mike 1.27
|
47 david 1.69 #include "CommonUTF.h"
48
49 #ifdef PEGASUS_HAS_ICU
|
50 chuck 1.99 #include <unicode/ustring.h>
51 #include <unicode/uchar.h>
|
52 david 1.69 #endif
53
|
54 mike 1.28 PEGASUS_USING_STD;
55
|
56 mike 1.27 PEGASUS_NAMESPACE_BEGIN
57
|
58 kumpf 1.39 ///////////////////////////////////////////////////////////////////////////////
59 //
|
60 kumpf 1.54 // CString
61 //
62 ///////////////////////////////////////////////////////////////////////////////
63
64 CString::CString()
65 : _rep(0)
66 {
67 }
68
69 CString::CString(const CString& cstr)
70 {
|
71 kumpf 1.82 _rep = 0;
72
73 if (cstr._rep)
74 {
75 _rep = (void*)new char[strlen((char*)cstr._rep)+1];
76 strcpy((char*)_rep, (char*)cstr._rep);
77 }
|
78 kumpf 1.54 }
79
80 CString::CString(char* cstr)
81 : _rep(cstr)
82 {
83 }
84
85 CString::~CString()
86 {
87 if (_rep)
|
88 kumpf 1.82 {
|
89 kumpf 1.59 delete [] (char*)_rep;
|
90 kumpf 1.82 }
|
91 kumpf 1.54 }
92
|
93 kumpf 1.56 CString& CString::operator=(const CString& cstr)
94 {
|
95 kumpf 1.82 if (&cstr != this)
|
96 kumpf 1.81 {
|
97 kumpf 1.82 if (_rep)
98 {
99 delete [] (char*)_rep;
100 _rep = 0;
101 }
102 if (cstr._rep)
103 {
104 _rep = (char*)new char[strlen((char*)cstr._rep)+1];
105 strcpy((char*)_rep, (char*)cstr._rep);
106 }
|
107 kumpf 1.81 }
|
108 kumpf 1.56 return *this;
109 }
110
|
111 kumpf 1.54 CString::operator const char*() const
112 {
|
113 kumpf 1.59 return (char*)_rep;
|
114 kumpf 1.54 }
115
116 ///////////////////////////////////////////////////////////////////////////////
117 //
|
118 kumpf 1.39 // String
119 //
120 ///////////////////////////////////////////////////////////////////////////////
121
|
122 kumpf 1.37 const String String::EMPTY = String();
|
123 mike 1.27
|
124 kumpf 1.39 inline Uint32 _StrLen(const Char16* str)
|
125 mike 1.27 {
126 if (!str)
|
127 david.dillard 1.105 throw NullPointer();
|
128 mike 1.27
129 Uint32 n = 0;
130
131 while (*str++)
|
132 david.dillard 1.105 n++;
|
133 mike 1.27
134 return n;
135 }
136
|
137 chuck 1.102 //
138 // Converts a utf-8 char buffer to utf-16 and appends the utf-16 to the Array.
139 // n is the length of the input char *, if stopAtTerm is 0
140 // A terminator character is appended to the end.
141 // Note that each input char is converted individually, which gives
142 // the fastest performance.
143 //
144 void _convertAndAppend(const char* str, Array<Char16>& c16a, Uint32 n, Uint8 stopAtTerm)
145 {
146 Uint32 i = 0;
147 while ((stopAtTerm && *str) || (!stopAtTerm && i < n))
148 {
149 if (*(Uint8*)str <= 0x7f)
|
150 david.dillard 1.105 {
|
151 chuck 1.102 // Current byte sequence is in the us-ascii range.
152 c16a.append(Uint8(*str++));
153 }
154 else
155 {
156 //
157 // Current byte sequence is not in the us-ascii range.
158 //
159
160 // Check if the byte sequence is valid utf-8, and if so,
161 // call the converter to utf-16
162 Uint16 tgt[3];
163 tgt[1] = 0;
164 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*str);
165 if ( (!stopAtTerm && i + c >= n) ||
166 (!isValid_U8((const Uint8 *)str, c+1)) )
167 {
168 // Note about error conditions.
|
169 david.dillard 1.105 // It is possible that the last utf-8 char before the
|
170 chuck 1.102 // end of input string extends past the end of the input string.
171 // This is caught in both cases -
172 // If counting up to n, then the test above catches it.
173 // If converting until terminator found, then a terminator
174 // in the middle of a multi-byte utf-8 char is invalid.
175 MessageLoaderParms parms("Common.String.BAD_UTF8",
176 "The byte sequence starting at index $0 is not valid UTF-8 encoding.",
177 i);
178 throw Exception(parms);
179 }
180 else
181 {
182 // str is incremented by this call to the start of the next char
183 Uint16 * tgtBuf = tgt;
|
184 david.dillard 1.105 UTF8toUTF16((const Uint8 **)&str, (Uint8 *)&str[c+1], &tgtBuf, &tgtBuf[2]);
|
185 chuck 1.102 c16a.append(tgt[0]);
186 if (tgt[1])
|
187 david.dillard 1.105 {
|
188 chuck 1.102 // Its a utf-16 surrogate pair (uses 2 Char16's)
189 c16a.append(tgt[1]);
190 }
|
191 david.dillard 1.105
|
192 chuck 1.102 // bump by the trailing byte count
193 i += c;
194 }
195 }
196
197 i++;
198 } // end while
199
200 c16a.append('\0');
201 }
202
|
203 kumpf 1.43 class StringRep
204 {
205 public:
206 StringRep()
207 {}
208 StringRep(const StringRep& r)
209 : c16a(r.c16a)
210 {}
211 StringRep(const Char16* str)
212 : c16a(str, _StrLen(str) + 1)
213 {}
214
215 Array<Char16> c16a;
216 };
217
|
218 mike 1.27 String::String()
219 {
|
220 kumpf 1.43 _rep = new StringRep;
221 _rep->c16a.append('\0');
|
222 mike 1.27 }
223
|
224 kumpf 1.39 String::String(const String& str)
|
225 mike 1.27 {
|
226 tony 1.66 if (str._rep != NULL)
227 {
|
228 kumpf 1.43 _rep = new StringRep(*str._rep);
|
229 tony 1.66 }
230 else
231 {
232 _rep = new StringRep();
233 }
|
234 kumpf 1.39 }
|
235 tony 1.66
|
236 mike 1.27
|
237 kumpf 1.39 String::String(const String& str, Uint32 n)
238 {
|
239 kumpf 1.43 _rep = new StringRep;
|
240 kumpf 1.55 assign(str.getChar16Data(), n);
|
241 kumpf 1.39 }
242
243 String::String(const Char16* str)
244 {
|
245 david.dillard 1.105 if ( str == 0 )
246 {
247 throw NullPointer();
248 }
249
|
250 kumpf 1.43 _rep = new StringRep(str);
|
251 mike 1.27 }
252
|
253 kumpf 1.39 String::String(const Char16* str, Uint32 n)
254 {
|
255 david.dillard 1.105 if ( str == 0 )
256 {
257 throw NullPointer();
258 }
259
|
260 kumpf 1.43 _rep = new StringRep;
|
261 kumpf 1.39 assign(str, n);
262 }
263
264 String::String(const char* str)
|
265 mike 1.27 {
|
266 david.dillard 1.105 if ( str == 0 )
267 {
268 throw NullPointer();
269 }
270
|
271 kumpf 1.43 _rep = new StringRep;
|
272 chuck 1.103 AutoPtr<StringRep> tempRep(_rep);
273 // An exception can be thrown, so use a temp AutoPtr.
|
274 chuck 1.102 _convertAndAppend(str, _rep->c16a, 0, 1);
|
275 chuck 1.103 tempRep.release();
|
276 mike 1.27 }
277
|
278 kumpf 1.39 String::String(const char* str, Uint32 n)
|
279 mike 1.27 {
|
280 david.dillard 1.105 if ( str == 0 )
281 {
282 throw NullPointer();
283 }
284
|
285 kumpf 1.43 _rep = new StringRep;
|
286 chuck 1.103 AutoPtr<StringRep> tempRep(_rep);
287 // An exception can be thrown, so use a temp AutoPtr.
|
288 chuck 1.102 _convertAndAppend(str, _rep->c16a, n, 0);
|
289 chuck 1.103 tempRep.release();
|
290 kumpf 1.39 }
|
291 mike 1.27
|
292 kumpf 1.39 String::~String()
293 {
|
294 kumpf 1.43 delete _rep;
|
295 mike 1.27 }
296
|
297 kumpf 1.39 String& String::operator=(const String& str)
|
298 mike 1.27 {
|
299 kumpf 1.82 if (&str != this)
300 {
301 assign(str);
302 }
303 return *this;
|
304 mike 1.27 }
305
|
306 kumpf 1.39 String& String::assign(const String& str)
|
307 mike 1.27 {
|
308 kumpf 1.43 _rep->c16a = str._rep->c16a;
|
309 kumpf 1.39 return *this;
|
310 mike 1.27 }
311
|
312 kumpf 1.39 String& String::assign(const Char16* str)
|
313 mike 1.27 {
|
314 david.dillard 1.105 if ( str == 0 )
315 {
316 throw NullPointer();
317 }
318
|
319 kumpf 1.43 _rep->c16a.clear();
320 _rep->c16a.append(str, _StrLen(str) + 1);
|
321 mike 1.27 return *this;
322 }
323
324 String& String::assign(const Char16* str, Uint32 n)
325 {
|
326 david.dillard 1.105 if ( str == 0 )
327 {
328 throw NullPointer();
329 }
330
|
331 kumpf 1.43 _rep->c16a.clear();
|
332 joyce.j 1.101 _rep->c16a.append(str, n);
|
333 kumpf 1.43 _rep->c16a.append('\0');
|
334 mike 1.27 return *this;
335 }
336
|
337 chuck 1.102 String& String::assign(const char* str)
338 {
|
339 david.dillard 1.105 if ( str == 0 )
340 {
341 throw NullPointer();
342 }
343
|
344 chuck 1.102 _rep->c16a.clear();
345 _convertAndAppend(str, _rep->c16a, 0, 1);
346 return *this;
347 }
348
|
349 kumpf 1.39 String& String::assign(const char* str, Uint32 n)
|
350 mike 1.27 {
|
351 david.dillard 1.105 if ( str == 0 )
352 {
353 throw NullPointer();
354 }
355
|
356 chuck 1.102 _rep->c16a.clear();
357 _convertAndAppend(str, _rep->c16a, n, 0);
|
358 mike 1.27 return *this;
359 }
360
|
361 kumpf 1.39 void String::clear()
362 {
|
363 kumpf 1.43 _rep->c16a.clear();
364 _rep->c16a.append('\0');
|
365 kumpf 1.39 }
366
|
367 kumpf 1.43 void String::reserveCapacity(Uint32 capacity)
|
368 kumpf 1.39 {
|
369 kumpf 1.45 _rep->c16a.reserveCapacity(capacity + 1);
|
370 kumpf 1.39 }
371
372 Uint32 String::size() const
373 {
|
374 gs.keenan 1.106 #if defined (PEGASUS_OS_VMS)
375 //
376 // This prevents returning a minus number.
377 //
378 // Seems as though the first time through
379 // the XML parser something doesn't get
380 // initialized and there is no check for
381 // a negative number in the parser!
382 //
383 Uint32 foo;
384 foo = _rep->c16a.size();
|
385 gs.keenan 1.107 if (foo == 0)
|
386 gs.keenan 1.106 {
387 return 0;
388 }
389 else
390 {
391 return (foo -1);
392 }
393 #else
|
394 kumpf 1.43 return _rep->c16a.size() - 1;
|
395 gs.keenan 1.106 #endif
|
396 kumpf 1.39 }
397
|
398 kumpf 1.55 const Char16* String::getChar16Data() const
|
399 kumpf 1.39 {
|
400 kumpf 1.43 return _rep->c16a.getData();
|
401 kumpf 1.39 }
402
|
403 kumpf 1.53 Char16& String::operator[](Uint32 index)
|
404 mike 1.27 {
|
405 kumpf 1.53 if (index > size())
|
406 david.dillard 1.105 throw IndexOutOfBoundsException();
|
407 mike 1.27
|
408 kumpf 1.53 return _rep->c16a[index];
|
409 mike 1.27 }
410
|
411 kumpf 1.53 const Char16 String::operator[](Uint32 index) const
|
412 mike 1.27 {
|
413 kumpf 1.53 if (index > size())
|
414 david.dillard 1.105 throw IndexOutOfBoundsException();
|
415 mike 1.27
|
416 kumpf 1.53 return _rep->c16a[index];
|
417 mike 1.27 }
418
|
419 kumpf 1.39 String& String::append(const Char16& c)
420 {
|
421 kumpf 1.43 _rep->c16a.insert(_rep->c16a.size() - 1, c);
|
422 kumpf 1.39 return *this;
423 }
424
|
425 mike 1.27 String& String::append(const Char16* str, Uint32 n)
426 {
|
427 david.dillard 1.105 if (str == 0)
|
428 joyce.j 1.101 {
429 throw NullPointer();
430 }
|
431 david.dillard 1.105
|
432 joyce.j 1.101 _rep->c16a.reserveCapacity(_rep->c16a.size() + n);
|
433 kumpf 1.43 _rep->c16a.remove(_rep->c16a.size() - 1);
|
434 joyce.j 1.101 _rep->c16a.append(str, n);
|
435 kumpf 1.43 _rep->c16a.append('\0');
|
436 mike 1.27 return *this;
437 }
438
|
439 kumpf 1.39 String& String::append(const String& str)
|
440 mike 1.27 {
|
441 kumpf 1.55 return append(str.getChar16Data(), str.size());
|
442 mike 1.27 }
443
|
444 kumpf 1.53 void String::remove(Uint32 index, Uint32 size)
|
445 mike 1.27 {
|
446 kumpf 1.39 if (size == PEG_NOT_FOUND)
|
447 david.dillard 1.105 size = this->size() - index;
|
448 mike 1.27
|
449 kumpf 1.53 if (index + size > this->size())
|
450 david.dillard 1.105 throw IndexOutOfBoundsException();
|
451 mike 1.27
|
452 kumpf 1.39 if (size)
|
453 david.dillard 1.105 _rep->c16a.remove(index, size);
|
454 mike 1.27 }
455
|
456 kumpf 1.53 String String::subString(Uint32 index, Uint32 length) const
|
457 mike 1.27 {
|
458 kumpf 1.53 if (index < size())
|
459 mike 1.27 {
|
460 david.dillard 1.105 if ((length == PEG_NOT_FOUND) || (length > size() - index))
461 length = size() - index;
|
462 mike 1.27
|
463 david.dillard 1.105 return String(getChar16Data() + index, length);
|
464 mike 1.27 }
|
465 david.dillard 1.105
466 return String();
|
467 mike 1.27 }
468
469 Uint32 String::find(Char16 c) const
470 {
|
471 kumpf 1.55 const Char16* first = getChar16Data();
|
472 mike 1.27
473 for (const Char16* p = first; *p; p++)
474 {
|
475 david.dillard 1.105 if (*p == c)
476 return p - first;
|
477 mike 1.27 }
478
479 return PEG_NOT_FOUND;
480 }
481
|
482 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
|
483 mike 1.30 {
|
484 kumpf 1.55 const Char16* data = getChar16Data();
|
485 mike 1.30
|
486 kumpf 1.53 for (Uint32 i = index, n = size(); i < n; i++)
|
487 mike 1.30 {
|
488 david.dillard 1.105 if (data[i] == c)
489 return i;
|
490 mike 1.30 }
491
492 return PEG_NOT_FOUND;
493 }
494
|
495 mike 1.27 Uint32 String::find(const String& s) const
496 {
|
497 kumpf 1.55 const Char16* pSubStr = s.getChar16Data();
498 const Char16* pStr = getChar16Data();
|
499 mike 1.27 Uint32 subStrLen = s.size();
500 Uint32 strLen = size();
501
|
502 mike 1.30 if (subStrLen > strLen)
503 {
504 return PEG_NOT_FOUND;
505 }
506
|
507 mike 1.27 // loop to find first char match
508 Uint32 loc = 0;
509 for( ; loc <= (strLen-subStrLen); loc++)
510 {
|
511 david.dillard 1.105 if (*pStr++ == *pSubStr) // match first char
512 {
513 // point to substr 2nd char
514 const Char16* p = pSubStr + 1;
515
516 // Test remaining chars for equal
517 Uint32 i = 1;
518 for (; i < subStrLen; i++)
519 if (*pStr++ != *p++ )
520 {pStr-=i; break;} // break from loop
521 if (i == subStrLen)
522 return loc;
523 }
|
524 mike 1.27 }
525 return PEG_NOT_FOUND;
526 }
527
528 Uint32 String::reverseFind(Char16 c) const
529 {
|
530 kumpf 1.55 const Char16* first = getChar16Data();
531 const Char16* last = getChar16Data() + size();
|
532 mike 1.27
533 while (last != first)
534 {
|
535 david.dillard 1.105 if (*--last == c)
536 return last - first;
|
537 mike 1.27 }
538
539 return PEG_NOT_FOUND;
540 }
541
542 void String::toLower()
543 {
|
544 david 1.69 #ifdef PEGASUS_HAS_ICU
|
545 chuck 1.99 // This will do a locale-insensitive, but context-sensitive convert.
|
546 david.dillard 1.105 // Context-sensitive prevents any optimizations that try to
|
547 chuck 1.99 // convert just the ascii before calling ICU.
548 // The string may shrink or expand after the convert.
549
550 int32_t sz = size();
551 UChar* destbuf = new UChar[sz + 1];
552 const UChar* srcbuf = (const UChar *)getChar16Data();
553 UErrorCode err = U_ZERO_ERROR;
554
555 int32_t needed = u_strToLower(destbuf, sz + 1 , srcbuf, sz, NULL, &err);
556 if (err == U_BUFFER_OVERFLOW_ERROR)
557 {
558 delete [] destbuf;
559 destbuf = new UChar[needed + 1];
560 err = U_ZERO_ERROR;
561 u_strToLower(destbuf, needed + 1 , srcbuf, sz, NULL, &err);
562 }
563 if (U_FAILURE(err))
|
564 david 1.90 {
|
565 chuck 1.99 delete [] destbuf;
|
566 david.dillard 1.105 throw Exception(u_errorName(err));
|
567 chuck 1.99 }
568
569 if (needed == sz)
570 {
571 Char16* from = (Char16*)destbuf;
572 for (Char16* to = &_rep->c16a[0]; *to; to++, from++)
573 {
574 *to = *from;
575 }
|
576 david 1.90 }
577 else
578 {
|
579 chuck 1.99 assign((Char16 *)destbuf, needed);
|
580 david 1.90 }
|
581 chuck 1.99
582 delete [] destbuf;
|
583 david 1.69 #else
|
584 kumpf 1.43 for (Char16* p = &_rep->c16a[0]; *p; p++)
|
585 mike 1.27 {
|
586 chuck 1.99 if (*p <= PEGASUS_MAX_PRINTABLE_CHAR)
|
587 david.dillard 1.105 *p = tolower(*p);
|
588 mike 1.27 }
|
589 david 1.69 #endif
|
590 kumpf 1.39 }
591
|
592 chuck 1.99 void String::toUpper()
|
593 david 1.90 {
594 #ifdef PEGASUS_HAS_ICU
|
595 chuck 1.99 // This will do a locale-insensitive, but context-sensitive convert.
|
596 david.dillard 1.105 // Context-sensitive prevents any optimizations that try to
|
597 chuck 1.99 // convert just the ascii before calling ICU.
598 // The string may shrink or expand after the convert.
599
600 int32_t sz = size();
601 UChar* destbuf = new UChar[sz + 1];
602 const UChar* srcbuf = (const UChar *)getChar16Data();
603 UErrorCode err = U_ZERO_ERROR;
604
605 int32_t needed = u_strToUpper(destbuf, sz + 1 , srcbuf, sz, NULL, &err);
606 if (err == U_BUFFER_OVERFLOW_ERROR)
|
607 david 1.90 {
|
608 chuck 1.99 delete [] destbuf;
609 destbuf = new UChar[needed + 1];
610 err = U_ZERO_ERROR;
611 u_strToUpper(destbuf, needed + 1 , srcbuf, sz, NULL, &err);
612 }
613 if (U_FAILURE(err))
614 {
615 delete [] destbuf;
|
616 david.dillard 1.105 throw Exception(u_errorName(err));
|
617 chuck 1.99 }
618
619 if (needed == sz)
620 {
621 Char16* from = (Char16*)destbuf;
622 for (Char16* to = &_rep->c16a[0]; *to; to++, from++)
623 {
624 *to = *from;
625 }
|
626 david 1.90 }
627 else
628 {
|
629 chuck 1.99 assign((Char16 *)destbuf, needed);
|
630 david 1.90 }
631
|
632 chuck 1.99 delete [] destbuf;
|
633 david 1.91 #else
634 for (Char16* p = &_rep->c16a[0]; *p; p++)
635 {
|
636 david.dillard 1.105 if (*p <= PEGASUS_MAX_PRINTABLE_CHAR)
637 *p = toupper(*p);
|
638 david 1.91 }
|
639 david 1.90 #endif
640 }
641
|
642 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
|
643 kumpf 1.39 {
|
644 kumpf 1.55 const Char16* s1c16 = s1.getChar16Data();
645 const Char16* s2c16 = s2.getChar16Data();
|
646 kumpf 1.39
647 while (n--)
|
648 mike 1.27 {
|
649 david.dillard 1.105 int r = *s1c16++ - *s2c16++;
|
650 mike 1.27
|
651 david.dillard 1.105 if (r)
652 return r;
|
653 mike 1.27 }
654
655 return 0;
656 }
657
|
658 kumpf 1.43 int String::compare(const String& s1, const String& s2)
|
659 mike 1.30 {
|
660 kumpf 1.55 const Char16* s1c16 = s1.getChar16Data();
661 const Char16* s2c16 = s2.getChar16Data();
|
662 kumpf 1.43
663 while (*s1c16 && *s2c16)
|
664 mike 1.30 {
|
665 david.dillard 1.105 int r = *s1c16++ - *s2c16++;
|
666 mike 1.30
|
667 david.dillard 1.105 if (r)
668 return r;
|
669 mike 1.30 }
670
|
671 kumpf 1.43 if (*s2c16)
|
672 david.dillard 1.105 return -1;
|
673 kumpf 1.43 else if (*s1c16)
|
674 david.dillard 1.105 return 1;
|
675 mike 1.30
676 return 0;
677 }
678
|
679 kumpf 1.40 int String::compareNoCase(const String& s1, const String& s2)
680 {
|
681 david 1.69 #ifdef PEGASUS_HAS_ICU
|
682 chuck 1.99 return u_strcasecmp((const UChar*)s1.getChar16Data(),
683 (const UChar*)s2.getChar16Data(),
684 U_FOLD_CASE_DEFAULT);
|
685 david 1.69 #else
|
686 kumpf 1.55 const Char16* _s1 = s1.getChar16Data();
687 const Char16* _s2 = s2.getChar16Data();
|
688 kumpf 1.40
689 while (*_s1 && *_s2)
690 {
691 int r;
692
|
693 kumpf 1.46 if (*_s1 <= PEGASUS_MAX_PRINTABLE_CHAR &&
694 *_s2 <= PEGASUS_MAX_PRINTABLE_CHAR)
|
695 kumpf 1.40 {
696 r = tolower(*_s1++) - tolower(*_s2++);
697 }
698 else
699 {
700 r = *_s1++ - *_s2++;
701 }
702
|
703 david.dillard 1.105 if (r)
704 return r;
|
705 kumpf 1.40 }
706
707 if (*_s2)
|
708 david.dillard 1.105 return -1;
|
709 kumpf 1.40 else if (*_s1)
|
710 david.dillard 1.105 return 1;
|
711 kumpf 1.40
712 return 0;
|
713 david 1.69 #endif
|
714 kumpf 1.40 }
715
|
716 kumpf 1.39 Boolean String::equal(const String& str1, const String& str2)
|
717 mike 1.27 {
|
718 kumpf 1.43 return String::compare(str1, str2) == 0;
|
719 mike 1.27 }
720
|
721 kumpf 1.39 Boolean String::equalNoCase(const String& str1, const String& str2)
|
722 mike 1.27 {
|
723 david 1.69 #ifdef PEGASUS_HAS_ICU
|
724 chuck 1.99 return compareNoCase(str1, str2) == 0;
|
725 david 1.69 #else
|
726 kumpf 1.39 if (str1.size() != str2.size())
|
727 david.dillard 1.105 return false;
|
728 kumpf 1.39
|
729 kumpf 1.55 const Char16* p = str1.getChar16Data();
730 const Char16* q = str2.getChar16Data();
|
731 kumpf 1.39
732 Uint32 n = str1.size();
|
733 mike 1.27
|
734 kumpf 1.39 while (n--)
735 {
|
736 david.dillard 1.105 if (*p <= PEGASUS_MAX_PRINTABLE_CHAR &&
|
737 kumpf 1.46 *q <= PEGASUS_MAX_PRINTABLE_CHAR)
|
738 david.dillard 1.105 {
739 if (tolower(*p++) != tolower(*q++))
740 return false;
741 }
742 else if (*p++ != *q++)
743 return false;
|
744 kumpf 1.39 }
|
745 mike 1.28
|
746 kumpf 1.39 return true;
|
747 david 1.69 #endif
748 }
749
|
750 mike 1.27
|
751 david 1.90 CString String::getCString() const
|
752 david 1.69 {
|
753 david 1.79 Uint32 n = 3*size() + 1;
|
754 david 1.69 char* str = new char[n];
755
756 const Char16* msg16 = getChar16Data();
757
758 const Uint16 *strsrc = (Uint16 *)msg16;
|
759 david 1.71 Uint16 *endsrc = (Uint16 *)&msg16[size()+1];
|
760 david 1.69
761 Uint8 *strtgt = (Uint8 *)str;
762 Uint8 *endtgt = (Uint8 *)&str[n];
763
764 UTF16toUTF8 (&strsrc,
|
765 david.dillard 1.105 endsrc,
766 &strtgt,
767 endtgt);
768
769 char* str1 = new char[strlen(str)+1];
770 strcpy(str1,str);
771 delete [] str;
|
772 david 1.71
773 return CString(str1);
|
774 david 1.69 }
|
775 kumpf 1.42
|
776 kumpf 1.65 #if 0
|
777 kumpf 1.42 // ATTN-RK-P3-20020603: This code is not completely correct
|
778 karl 1.36 // Wildcard String matching function that may be useful in the future
779 // The following code was provided by Bob Blair.
780
781 /* _StringMatch Match input MatchString against a GLOB style pattern
782 Note that MatchChar is the char type so that this source
783 in portable to different string types. This is an internal function
|
784 david.dillard 1.105
|
785 karl 1.36 Results: The return value is 1 if string matches pattern, and
|
786 david.dillard 1.105 0 otherwise. The matching operation permits the following
787 special characters in the pattern: *?\[] (see the manual
788 entry for details on what these mean).
789
|
790 chuck 1.78
|
791 karl 1.36 Side effects: None.
792 */
|
793 david.dillard 1.105
|
794 karl 1.36 /* MatchChar defined as a separate entity because this function source used
|
795 david.dillard 1.105 elsewhere was an unsigned char *. Here we use Uint16 to maintain 16 bit
|
796 karl 1.36 size.
797 */
798 typedef Uint16 MatchChar;
799
800 inline Uint16 _ToLower(Uint16 ch)
801 {
|
802 david 1.69 // ICU_TODO: If ICU is available we should do this the correct way.
|
803 kumpf 1.46 return ch <= PEGASUS_MAX_PRINTABLE_CHAR ? tolower(char(ch)) : ch;
|
804 karl 1.36 }
805
806 inline Boolean _Equal(MatchChar ch1, MatchChar ch2, int nocase)
807 {
|
808 david 1.69 // ICU_TODO: If ICU is available we should do this the correct way.
|
809 karl 1.36 if (nocase)
|
810 david.dillard 1.105 return _ToLower(ch1) == _ToLower(ch2);
811
812 return ch1 == ch2;
|
813 karl 1.36 }
|
814 mike 1.28
|
815 kumpf 1.35
|
816 karl 1.36 static const MatchChar *
817 _matchrange(const MatchChar *range, MatchChar c, int nocase)
818 {
819 const MatchChar *p = range;
820 const MatchChar *rstart = range + 1;
821 const MatchChar *rend = 0;
822 MatchChar compchar;
823
|
824 kumpf 1.35 for (rend = rstart; *rend && *rend != ']'; rend++);
|
825 karl 1.36 if (*rend == ']') { // if there is an end to this pattern
|
826 kumpf 1.35 for (compchar = *rstart; rstart != rend; rstart++) {
|
827 karl 1.36 if (_Equal(*rstart, c, nocase))
|
828 kumpf 1.35 return ++rend;
829 if (*rstart == '-') {
830 rstart++;
831 if (c >= compchar && c <= *rstart)
832 return ++rend;
833 }
834 }
835 }
|
836 karl 1.36 return (const MatchChar *)0;
|
837 kumpf 1.35 }
838
839 static int
|
840 david.dillard 1.105 _StringMatch(
841 const MatchChar *testString,
|
842 karl 1.36 const MatchChar *pattern,
|
843 david.dillard 1.105 int nocase ) /* Ignore case if this is true */
|
844 karl 1.36 {
845 const MatchChar *pat = pattern;
846 const MatchChar *str = testString;
|
847 kumpf 1.35 unsigned int done = 0;
848 unsigned int res = 0; // the result: 1 == match
849
850 while (!done) { // main loop walks through pattern and test string
851 //cerr << "Comparing <" << *pat << "> and <" << *str << ">" << endl;
852 if (!*pat) { //end of pattern
853 done = 1; // we're done
854 if (!*str) //end of test, too?
855 res = 1; // then we matched
856 } else { //Not end of pattern
857 if (!*str) { // but end of test
858 done = 1; // We're done
859 if (*pat == '*') // If pattern openends
860 res = 1; // then we matched
861 } else { //Not end of test
862 if (*pat == '*') { //Ambiguuity found
863 if (!*++pat) { //and it ends pattern
864 done = 1; // then we're done
865 res = 1; // and match
866 } else { //if it doesn't end
867 while (!done) { // until we're done
|
868 karl 1.36 if (_StringMatch(str, pat, nocase)) { // we recurse
|
869 kumpf 1.35 done = 1; //if it recurses true
870 res = 1; // we done and match
871 } else { //it recurses false
872 if (!*str) // see if test is done
873 done = 1; // yes: we done
874 else // not done:
875 str++; // keep testing
876 } // end test on recursive call
877 } // end looping on recursive calls
878 } // end logic when pattern is ambiguous
879 } else { //pattern not ambiguus
880 if (*pat == '?') { //pattern is 'any'
881 pat++, str++; // so move along
882 } else if (*pat == '[') { //see if it's a range
|
883 karl 1.36 pat = _matchrange(pat, *str, nocase); // and is a match
|
884 kumpf 1.35 if (!pat) { //It is not a match
885 done = 1; // we're done
|
886 kumpf 1.42 res = 0; // no match
|
887 kumpf 1.35 } else { //Range matches
888 str++, pat++; // keep going
889 }
890 } else { // only case left is individual characters
|
891 karl 1.36 if (!_Equal(*pat++, *str++, nocase)) // if they don't match
|
892 kumpf 1.35 done = 1; // bail.
893 }
894 } // end ("pattern is not ambiguous (*)" logic
895 } // end logic when pattern and string still have data
896 } // end logic when pattern still has data
897 } // end main loop
898 return res;
899 }
900
|
901 kumpf 1.39
|
902 kumpf 1.65 /** match matches a string against a GLOB style pattern.
903 Return trues if the String parameter matches the pattern. C-Shell style
|
904 david.dillard 1.105 glob matching is used.
|
905 kumpf 1.65 @param str String to be matched against the pattern
906 @param pattern Pattern to use in the match
907 @return Boolean true if str matches pattern
908 The pattern definition is as follows:
909 <pre>
910 * Matches any number of any characters
911 ? Match exactly one character
912 [chars] Match any character in chars
913 [chara-charb] Match any character in the range between chara and charb
914 </pre>
915 The literal characters *, ?, [, ] can be included in a string by
916 escaping them with backslash "\". Ranges of characters can be concatenated.
917 <pre>
918 examples:
919 Boolean result = String::match("This is a test", "*is*");
920 Boolean works = String::match("abcdef123", "*[0-9]");
921 </pre>
922 */
|
923 karl 1.36 Boolean String::match(const String& str, const String& pattern)
924 {
925 return _StringMatch(
|
926 david.dillard 1.105 (Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 0) != 0;
|
927 karl 1.36 }
928
|
929 kumpf 1.65 /** matchNoCase Matches a String against a GLOB style pattern independent
|
930 david.dillard 1.105 of case.
|
931 kumpf 1.65 Returns true if the str parameter matches the pattern. C-Shell style
|
932 david.dillard 1.104 glob matching is used. Ignore case in all comparisons. Case is
|
933 kumpf 1.65 ignored in the match.
934 @parm str String containing the string to be matched\
935 @parm pattern GLOB style patterh to use in the match.
936 @return Boolean true if str matches patterh
|
937 david.dillard 1.104 @see match
|
938 kumpf 1.65 */
|
939 karl 1.36 Boolean String::matchNoCase(const String& str, const String& pattern)
940 {
941 return _StringMatch(
|
942 david.dillard 1.105 (Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 1) != 0;
|
943 kumpf 1.39 }
|
944 kumpf 1.65 #endif
|
945 kumpf 1.39
946
947 ///////////////////////////////////////////////////////////////////////////////
948 //
949 // String-related functions
950 //
951 ///////////////////////////////////////////////////////////////////////////////
952
953 Boolean operator==(const String& str1, const String& str2)
954 {
955 return String::equal(str1, str2);
956 }
957
958 Boolean operator==(const String& str1, const char* str2)
959 {
960 return String::equal(str1, str2);
961 }
962
963 Boolean operator==(const char* str1, const String& str2)
964 {
965 return String::equal(str1, str2);
966 kumpf 1.39 }
967
968 Boolean operator!=(const String& str1, const String& str2)
969 {
970 return !String::equal(str1, str2);
971 }
972
|
973 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
|
974 kumpf 1.39 {
|
975 david 1.72
|
976 david 1.69 #if defined(PEGASUS_OS_OS400)
|
977 david 1.93 CString cstr = str.getCString();
|
978 david 1.69 const char* utf8str = cstr;
979
980 os << utf8str;
981
|
982 humberto 1.76 #elif defined(PEGASUS_HAS_ICU)
|
983 david.dillard 1.105 char *buf = NULL;
984 const int size = str.size() * 6;
985 UnicodeString UniStr((const UChar *)str.getChar16Data(), (int32_t)str.size());
986 Uint32 bufsize = UniStr.extract(0,size,buf);
987
988 buf = new char[bufsize+1];
989 UniStr.extract(0,bufsize,buf);
990 os << buf;
991 os.flush();
992 delete [] buf;
|
993 david 1.69 #else
|
994 david.dillard 1.105 for (Uint32 i = 0, n = str.size(); i < n; i++)
995 {
996 Uint16 code = str[i];
997
998 if (code > 0 && code <= PEGASUS_MAX_PRINTABLE_CHAR)
999 {
1000 os << char(code);
1001 }
1002 else
1003 {
1004 // Print in hex format:
1005 char buffer[8];
1006 sprintf(buffer, "\\x%04X", code);
1007 os << buffer;
1008 }
1009 }
|
1010 david 1.69 #endif // End of PEGASUS_HAS_ICU #else leg.
|
1011 kumpf 1.39
1012 return os;
1013 }
1014
1015 String operator+(const String& str1, const String& str2)
1016 {
1017 return String(str1).append(str2);
1018 }
1019
1020 Boolean operator<(const String& str1, const String& str2)
1021 {
|
1022 kumpf 1.43 return String::compare(str1, str2) < 0;
|
1023 kumpf 1.39 }
1024
1025 Boolean operator<=(const String& str1, const String& str2)
1026 {
|
1027 kumpf 1.43 return String::compare(str1, str2) <= 0;
|
1028 kumpf 1.39 }
1029
1030 Boolean operator>(const String& str1, const String& str2)
1031 {
|
1032 kumpf 1.43 return String::compare(str1, str2) > 0;
|
1033 kumpf 1.39 }
1034
1035 Boolean operator>=(const String& str1, const String& str2)
1036 {
|
1037 kumpf 1.43 return String::compare(str1, str2) >= 0;
|
1038 kumpf 1.39 }
1039
|
1040 mike 1.27 PEGASUS_NAMESPACE_END
|