1 karl 1.98 //%2005////////////////////////////////////////////////////////////////////////
|
2 mike 1.27 //
|
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
|
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group.
|
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
|
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
|
11 mike 1.27 //
12 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
14 // deal in the Software without restriction, including without limitation the
15 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
16 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is
17 // furnished to do so, subject to the following conditions:
|
18 david.dillard 1.105 //
|
19 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
20 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
21 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
22 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
23 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
24 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
25 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28 //==============================================================================
29 //
30 // Author: Mike Brasher (mbrasher@bmc.com)
31 //
|
32 kumpf 1.39 // Modified By: Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
|
33 joyce.j 1.101 // Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
|
34 david.dillard 1.111 // David Dillard, VERITAS Software Corp.
35 // (david.dillard@veritas.com)
|
36 mike 1.27 //
37 //%/////////////////////////////////////////////////////////////////////////////
38
39
|
40 kumpf 1.64 #include <cstring>
|
41 mike 1.27 #include "String.h"
|
42 kumpf 1.43 #include "Array.h"
|
43 chuck 1.103 #include "AutoPtr.h"
|
44 kumpf 1.48 #include "InternalException.h"
|
45 yi.zhou 1.108 #include <Pegasus/Common/CommonUTF.h>
|
46 mike 1.27
|
47 david 1.69 #include "CommonUTF.h"
48
49 #ifdef PEGASUS_HAS_ICU
|
50 chuck 1.99 #include <unicode/ustring.h>
51 #include <unicode/uchar.h>
|
52 david 1.69 #endif
53
|
54 mike 1.28 PEGASUS_USING_STD;
55
|
56 mike 1.27 PEGASUS_NAMESPACE_BEGIN
57
|
58 kumpf 1.39 ///////////////////////////////////////////////////////////////////////////////
59 //
|
60 kumpf 1.54 // CString
61 //
62 ///////////////////////////////////////////////////////////////////////////////
63
64 CString::CString()
65 : _rep(0)
66 {
67 }
68
69 CString::CString(const CString& cstr)
70 {
|
71 kumpf 1.82 _rep = 0;
72
73 if (cstr._rep)
74 {
75 _rep = (void*)new char[strlen((char*)cstr._rep)+1];
76 strcpy((char*)_rep, (char*)cstr._rep);
77 }
|
78 kumpf 1.54 }
79
80 CString::CString(char* cstr)
81 : _rep(cstr)
82 {
83 }
84
85 CString::~CString()
86 {
87 if (_rep)
|
88 kumpf 1.82 {
|
89 kumpf 1.59 delete [] (char*)_rep;
|
90 kumpf 1.82 }
|
91 kumpf 1.54 }
92
|
93 kumpf 1.56 CString& CString::operator=(const CString& cstr)
94 {
|
95 kumpf 1.82 if (&cstr != this)
|
96 kumpf 1.81 {
|
97 kumpf 1.82 if (_rep)
98 {
99 delete [] (char*)_rep;
100 _rep = 0;
101 }
102 if (cstr._rep)
103 {
104 _rep = (char*)new char[strlen((char*)cstr._rep)+1];
105 strcpy((char*)_rep, (char*)cstr._rep);
106 }
|
107 kumpf 1.81 }
|
108 kumpf 1.56 return *this;
109 }
110
|
111 kumpf 1.54 CString::operator const char*() const
112 {
|
113 kumpf 1.59 return (char*)_rep;
|
114 kumpf 1.54 }
115
116 ///////////////////////////////////////////////////////////////////////////////
117 //
|
118 kumpf 1.39 // String
119 //
120 ///////////////////////////////////////////////////////////////////////////////
121
|
122 kumpf 1.37 const String String::EMPTY = String();
|
123 mike 1.27
|
124 kumpf 1.39 inline Uint32 _StrLen(const Char16* str)
|
125 mike 1.27 {
126 if (!str)
|
127 david.dillard 1.105 throw NullPointer();
|
128 mike 1.27
129 Uint32 n = 0;
130
131 while (*str++)
|
132 david.dillard 1.105 n++;
|
133 mike 1.27
134 return n;
135 }
136
|
137 chuck 1.102 //
138 // Converts a utf-8 char buffer to utf-16 and appends the utf-16 to the Array.
139 // n is the length of the input char *, if stopAtTerm is 0
140 // A terminator character is appended to the end.
141 // Note that each input char is converted individually, which gives
142 // the fastest performance.
143 //
144 void _convertAndAppend(const char* str, Array<Char16>& c16a, Uint32 n, Uint8 stopAtTerm)
145 {
146 Uint32 i = 0;
147 while ((stopAtTerm && *str) || (!stopAtTerm && i < n))
148 {
149 if (*(Uint8*)str <= 0x7f)
|
150 david.dillard 1.105 {
|
151 chuck 1.102 // Current byte sequence is in the us-ascii range.
152 c16a.append(Uint8(*str++));
153 }
154 else
155 {
156 //
157 // Current byte sequence is not in the us-ascii range.
158 //
159
160 // Check if the byte sequence is valid utf-8, and if so,
161 // call the converter to utf-16
162 Uint16 tgt[3];
163 tgt[1] = 0;
164 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*str);
165 if ( (!stopAtTerm && i + c >= n) ||
166 (!isValid_U8((const Uint8 *)str, c+1)) )
167 {
168 // Note about error conditions.
|
169 david.dillard 1.105 // It is possible that the last utf-8 char before the
|
170 chuck 1.102 // end of input string extends past the end of the input string.
171 // This is caught in both cases -
172 // If counting up to n, then the test above catches it.
173 // If converting until terminator found, then a terminator
174 // in the middle of a multi-byte utf-8 char is invalid.
175 MessageLoaderParms parms("Common.String.BAD_UTF8",
176 "The byte sequence starting at index $0 is not valid UTF-8 encoding.",
177 i);
178 throw Exception(parms);
179 }
180 else
181 {
182 // str is incremented by this call to the start of the next char
183 Uint16 * tgtBuf = tgt;
|
184 david.dillard 1.105 UTF8toUTF16((const Uint8 **)&str, (Uint8 *)&str[c+1], &tgtBuf, &tgtBuf[2]);
|
185 chuck 1.102 c16a.append(tgt[0]);
186 if (tgt[1])
|
187 david.dillard 1.105 {
|
188 chuck 1.102 // Its a utf-16 surrogate pair (uses 2 Char16's)
189 c16a.append(tgt[1]);
190 }
|
191 david.dillard 1.105
|
192 chuck 1.102 // bump by the trailing byte count
193 i += c;
194 }
195 }
196
197 i++;
198 } // end while
199
200 c16a.append('\0');
201 }
202
|
203 kumpf 1.43 class StringRep
204 {
205 public:
206 StringRep()
207 {}
208 StringRep(const StringRep& r)
209 : c16a(r.c16a)
210 {}
211 StringRep(const Char16* str)
212 : c16a(str, _StrLen(str) + 1)
213 {}
214
215 Array<Char16> c16a;
216 };
217
|
218 mike 1.27 String::String()
219 {
|
220 kumpf 1.43 _rep = new StringRep;
221 _rep->c16a.append('\0');
|
222 mike 1.27 }
223
|
224 kumpf 1.39 String::String(const String& str)
|
225 mike 1.27 {
|
226 tony 1.66 if (str._rep != NULL)
227 {
|
228 kumpf 1.43 _rep = new StringRep(*str._rep);
|
229 tony 1.66 }
230 else
231 {
232 _rep = new StringRep();
233 }
|
234 kumpf 1.39 }
|
235 tony 1.66
|
236 mike 1.27
|
237 kumpf 1.39 String::String(const String& str, Uint32 n)
238 {
|
239 kumpf 1.43 _rep = new StringRep;
|
240 kumpf 1.55 assign(str.getChar16Data(), n);
|
241 kumpf 1.39 }
242
243 String::String(const Char16* str)
244 {
|
245 david.dillard 1.105 if ( str == 0 )
246 {
247 throw NullPointer();
248 }
249
|
250 kumpf 1.43 _rep = new StringRep(str);
|
251 mike 1.27 }
252
|
253 kumpf 1.39 String::String(const Char16* str, Uint32 n)
254 {
|
255 david.dillard 1.105 if ( str == 0 )
256 {
257 throw NullPointer();
258 }
259
|
260 kumpf 1.43 _rep = new StringRep;
|
261 kumpf 1.39 assign(str, n);
262 }
263
264 String::String(const char* str)
|
265 mike 1.27 {
|
266 david.dillard 1.105 if ( str == 0 )
267 {
268 throw NullPointer();
269 }
270
|
271 kumpf 1.43 _rep = new StringRep;
|
272 chuck 1.103 AutoPtr<StringRep> tempRep(_rep);
273 // An exception can be thrown, so use a temp AutoPtr.
|
274 chuck 1.102 _convertAndAppend(str, _rep->c16a, 0, 1);
|
275 chuck 1.103 tempRep.release();
|
276 mike 1.27 }
277
|
278 kumpf 1.39 String::String(const char* str, Uint32 n)
|
279 mike 1.27 {
|
280 david.dillard 1.105 if ( str == 0 )
281 {
282 throw NullPointer();
283 }
284
|
285 kumpf 1.43 _rep = new StringRep;
|
286 chuck 1.103 AutoPtr<StringRep> tempRep(_rep);
287 // An exception can be thrown, so use a temp AutoPtr.
|
288 chuck 1.102 _convertAndAppend(str, _rep->c16a, n, 0);
|
289 chuck 1.103 tempRep.release();
|
290 kumpf 1.39 }
|
291 mike 1.27
|
292 kumpf 1.39 String::~String()
293 {
|
294 kumpf 1.43 delete _rep;
|
295 mike 1.27 }
296
|
297 kumpf 1.39 String& String::operator=(const String& str)
|
298 mike 1.27 {
|
299 kumpf 1.82 if (&str != this)
300 {
301 assign(str);
302 }
303 return *this;
|
304 mike 1.27 }
305
|
306 kumpf 1.39 String& String::assign(const String& str)
|
307 mike 1.27 {
|
308 kumpf 1.43 _rep->c16a = str._rep->c16a;
|
309 kumpf 1.39 return *this;
|
310 mike 1.27 }
311
|
312 kumpf 1.39 String& String::assign(const Char16* str)
|
313 mike 1.27 {
|
314 david.dillard 1.105 if ( str == 0 )
315 {
316 throw NullPointer();
317 }
318
|
319 kumpf 1.43 _rep->c16a.clear();
320 _rep->c16a.append(str, _StrLen(str) + 1);
|
321 mike 1.27 return *this;
322 }
323
324 String& String::assign(const Char16* str, Uint32 n)
325 {
|
326 david.dillard 1.105 if ( str == 0 )
327 {
328 throw NullPointer();
329 }
330
|
331 kumpf 1.43 _rep->c16a.clear();
|
332 joyce.j 1.101 _rep->c16a.append(str, n);
|
333 kumpf 1.43 _rep->c16a.append('\0');
|
334 mike 1.27 return *this;
335 }
336
|
337 chuck 1.102 String& String::assign(const char* str)
338 {
|
339 david.dillard 1.105 if ( str == 0 )
340 {
341 throw NullPointer();
342 }
343
|
344 chuck 1.102 _rep->c16a.clear();
345 _convertAndAppend(str, _rep->c16a, 0, 1);
346 return *this;
347 }
348
|
349 kumpf 1.39 String& String::assign(const char* str, Uint32 n)
|
350 mike 1.27 {
|
351 david.dillard 1.105 if ( str == 0 )
352 {
353 throw NullPointer();
354 }
355
|
356 chuck 1.102 _rep->c16a.clear();
357 _convertAndAppend(str, _rep->c16a, n, 0);
|
358 mike 1.27 return *this;
359 }
360
|
361 kumpf 1.39 void String::clear()
362 {
|
363 kumpf 1.43 _rep->c16a.clear();
364 _rep->c16a.append('\0');
|
365 kumpf 1.39 }
366
|
367 kumpf 1.43 void String::reserveCapacity(Uint32 capacity)
|
368 kumpf 1.39 {
|
369 kumpf 1.45 _rep->c16a.reserveCapacity(capacity + 1);
|
370 kumpf 1.39 }
371
372 Uint32 String::size() const
373 {
|
374 gs.keenan 1.110 #if defined (PEGASUS_OS_VMS)
|
375 gs.keenan 1.106 //
376 // This prevents returning a minus number.
377 //
378 // Seems as though the first time through
379 // the XML parser something doesn't get
380 // initialized and there is no check for
381 // a negative number in the parser!
382 //
|
383 gs.keenan 1.110 Uint32 foo;
384 foo = _rep->c16a.size();
385 if (foo == 0)
386 {
387 return 0;
388 }
389 else
390 {
391 return (foo -1);
392 }
393 #else
|
394 kumpf 1.43 return _rep->c16a.size() - 1;
|
395 gs.keenan 1.110 #endif
|
396 kumpf 1.39 }
397
|
398 kumpf 1.55 const Char16* String::getChar16Data() const
|
399 kumpf 1.39 {
|
400 kumpf 1.43 return _rep->c16a.getData();
|
401 kumpf 1.39 }
402
|
403 kumpf 1.53 Char16& String::operator[](Uint32 index)
|
404 mike 1.27 {
|
405 kumpf 1.53 if (index > size())
|
406 david.dillard 1.105 throw IndexOutOfBoundsException();
|
407 mike 1.27
|
408 kumpf 1.53 return _rep->c16a[index];
|
409 mike 1.27 }
410
|
411 kumpf 1.53 const Char16 String::operator[](Uint32 index) const
|
412 mike 1.27 {
|
413 kumpf 1.53 if (index > size())
|
414 david.dillard 1.105 throw IndexOutOfBoundsException();
|
415 mike 1.27
|
416 kumpf 1.53 return _rep->c16a[index];
|
417 mike 1.27 }
418
|
419 kumpf 1.39 String& String::append(const Char16& c)
420 {
|
421 kumpf 1.43 _rep->c16a.insert(_rep->c16a.size() - 1, c);
|
422 kumpf 1.39 return *this;
423 }
424
|
425 mike 1.27 String& String::append(const Char16* str, Uint32 n)
426 {
|
427 david.dillard 1.105 if (str == 0)
|
428 joyce.j 1.101 {
429 throw NullPointer();
430 }
|
431 david.dillard 1.105
|
432 joyce.j 1.101 _rep->c16a.reserveCapacity(_rep->c16a.size() + n);
|
433 kumpf 1.43 _rep->c16a.remove(_rep->c16a.size() - 1);
|
434 joyce.j 1.101 _rep->c16a.append(str, n);
|
435 kumpf 1.43 _rep->c16a.append('\0');
|
436 mike 1.27 return *this;
437 }
438
|
439 kumpf 1.39 String& String::append(const String& str)
|
440 mike 1.27 {
|
441 kumpf 1.55 return append(str.getChar16Data(), str.size());
|
442 mike 1.27 }
443
|
444 kumpf 1.53 void String::remove(Uint32 index, Uint32 size)
|
445 mike 1.27 {
|
446 kumpf 1.39 if (size == PEG_NOT_FOUND)
|
447 david.dillard 1.105 size = this->size() - index;
|
448 mike 1.27
|
449 kumpf 1.53 if (index + size > this->size())
|
450 david.dillard 1.105 throw IndexOutOfBoundsException();
|
451 mike 1.27
|
452 kumpf 1.39 if (size)
|
453 david.dillard 1.105 _rep->c16a.remove(index, size);
|
454 mike 1.27 }
455
|
456 kumpf 1.53 String String::subString(Uint32 index, Uint32 length) const
|
457 mike 1.27 {
|
458 kumpf 1.53 if (index < size())
|
459 mike 1.27 {
|
460 david.dillard 1.105 if ((length == PEG_NOT_FOUND) || (length > size() - index))
461 length = size() - index;
|
462 mike 1.27
|
463 david.dillard 1.105 return String(getChar16Data() + index, length);
|
464 mike 1.27 }
|
465 david.dillard 1.105
466 return String();
|
467 mike 1.27 }
468
469 Uint32 String::find(Char16 c) const
470 {
|
471 kumpf 1.55 const Char16* first = getChar16Data();
|
472 mike 1.27
473 for (const Char16* p = first; *p; p++)
474 {
|
475 david.dillard 1.105 if (*p == c)
476 return p - first;
|
477 mike 1.27 }
478
479 return PEG_NOT_FOUND;
480 }
481
|
482 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
|
483 mike 1.30 {
|
484 kumpf 1.55 const Char16* data = getChar16Data();
|
485 mike 1.30
|
486 kumpf 1.53 for (Uint32 i = index, n = size(); i < n; i++)
|
487 mike 1.30 {
|
488 david.dillard 1.105 if (data[i] == c)
489 return i;
|
490 mike 1.30 }
491
492 return PEG_NOT_FOUND;
493 }
494
|
495 mike 1.27 Uint32 String::find(const String& s) const
496 {
|
497 kumpf 1.55 const Char16* pSubStr = s.getChar16Data();
498 const Char16* pStr = getChar16Data();
|
499 mike 1.27 Uint32 subStrLen = s.size();
500 Uint32 strLen = size();
501
|
502 mike 1.30 if (subStrLen > strLen)
503 {
504 return PEG_NOT_FOUND;
505 }
506
|
507 mike 1.27 // loop to find first char match
508 Uint32 loc = 0;
509 for( ; loc <= (strLen-subStrLen); loc++)
510 {
|
511 david.dillard 1.105 if (*pStr++ == *pSubStr) // match first char
512 {
513 // point to substr 2nd char
514 const Char16* p = pSubStr + 1;
515
516 // Test remaining chars for equal
517 Uint32 i = 1;
518 for (; i < subStrLen; i++)
519 if (*pStr++ != *p++ )
520 {pStr-=i; break;} // break from loop
521 if (i == subStrLen)
522 return loc;
523 }
|
524 mike 1.27 }
525 return PEG_NOT_FOUND;
526 }
527
528 Uint32 String::reverseFind(Char16 c) const
529 {
|
530 kumpf 1.55 const Char16* first = getChar16Data();
531 const Char16* last = getChar16Data() + size();
|
532 mike 1.27
533 while (last != first)
534 {
|
535 david.dillard 1.105 if (*--last == c)
536 return last - first;
|
537 mike 1.27 }
538
539 return PEG_NOT_FOUND;
540 }
541
542 void String::toLower()
543 {
|
544 david 1.69 #ifdef PEGASUS_HAS_ICU
|
545 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
546 david 1.90 {
|
547 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
548 // Context-sensitive prevents any optimizations that try to
549 // convert just the ascii before calling ICU.
550 // The string may shrink or expand after the convert.
551
552 int32_t sz = size();
553 UChar* destbuf = new UChar[sz + 1];
554 const UChar* srcbuf = (const UChar *)getChar16Data();
555 UErrorCode err = U_ZERO_ERROR;
556
557 int32_t needed = u_strToLower(destbuf, sz + 1 , srcbuf, sz, NULL, &err);
558 if (err == U_BUFFER_OVERFLOW_ERROR)
559 {
560 delete [] destbuf;
561 destbuf = new UChar[needed + 1];
562 err = U_ZERO_ERROR;
563 u_strToLower(destbuf, needed + 1 , srcbuf, sz, NULL, &err);
564 }
565 if (U_FAILURE(err))
566 {
567 delete [] destbuf;
568 yi.zhou 1.108 throw Exception(u_errorName(err));
569 }
|
570 chuck 1.99
|
571 yi.zhou 1.108 if (needed == sz)
572 {
573 Char16* from = (Char16*)destbuf;
574 for (Char16* to = &_rep->c16a[0]; *to; to++, from++)
575 {
576 *to = *from;
577 }
578 }
579 else
|
580 chuck 1.99 {
|
581 yi.zhou 1.108 assign((Char16 *)destbuf, needed);
|
582 chuck 1.99 }
|
583 yi.zhou 1.108
584 delete [] destbuf;
|
585 david 1.90 }
586 else
|
587 yi.zhou 1.108 #endif
|
588 david 1.90 {
|
589 yi.zhou 1.108 for (Char16* p = &_rep->c16a[0]; *p; p++)
590 {
591 if (*p <= PEGASUS_MAX_PRINTABLE_CHAR)
|
592 david.dillard 1.105 *p = tolower(*p);
|
593 yi.zhou 1.108 }
|
594 mike 1.27 }
|
595 kumpf 1.39 }
596
|
597 chuck 1.99 void String::toUpper()
|
598 david 1.90 {
599 #ifdef PEGASUS_HAS_ICU
|
600 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
|
601 chuck 1.99 {
|
602 yi.zhou 1.108 // This will do a locale-insensitive, but context-sensitive convert.
603 // Context-sensitive prevents any optimizations that try to
604 // convert just the ascii before calling ICU.
605 // The string may shrink or expand after the convert.
606
607 int32_t sz = size();
608 UChar* destbuf = new UChar[sz + 1];
609 const UChar* srcbuf = (const UChar *)getChar16Data();
610 UErrorCode err = U_ZERO_ERROR;
611
612 int32_t needed = u_strToUpper(destbuf, sz + 1 , srcbuf, sz, NULL, &err);
613 if (err == U_BUFFER_OVERFLOW_ERROR)
614 {
615 delete [] destbuf;
616 destbuf = new UChar[needed + 1];
617 err = U_ZERO_ERROR;
618 u_strToUpper(destbuf, needed + 1 , srcbuf, sz, NULL, &err);
619 }
620 if (U_FAILURE(err))
621 {
622 delete [] destbuf;
623 yi.zhou 1.108 throw Exception(u_errorName(err));
624 }
|
625 chuck 1.99
|
626 yi.zhou 1.108 if (needed == sz)
627 {
628 Char16* from = (Char16*)destbuf;
629 for (Char16* to = &_rep->c16a[0]; *to; to++, from++)
630 {
631 *to = *from;
632 }
633 }
634 else
|
635 chuck 1.99 {
|
636 yi.zhou 1.108 assign((Char16 *)destbuf, needed);
|
637 chuck 1.99 }
|
638 yi.zhou 1.108
639 delete [] destbuf;
|
640 david 1.90 }
641 else
|
642 yi.zhou 1.108 #endif
|
643 david 1.90 {
|
644 yi.zhou 1.108 for (Char16* p = &_rep->c16a[0]; *p; p++)
645 {
|
646 david.dillard 1.105 if (*p <= PEGASUS_MAX_PRINTABLE_CHAR)
647 *p = toupper(*p);
|
648 yi.zhou 1.108 }
|
649 david 1.91 }
|
650 david 1.90 }
651
|
652 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
|
653 kumpf 1.39 {
|
654 kumpf 1.55 const Char16* s1c16 = s1.getChar16Data();
655 const Char16* s2c16 = s2.getChar16Data();
|
656 kumpf 1.39
657 while (n--)
|
658 mike 1.27 {
|
659 david.dillard 1.105 int r = *s1c16++ - *s2c16++;
|
660 mike 1.27
|
661 david.dillard 1.105 if (r)
662 return r;
|
663 mike 1.27 }
664
665 return 0;
666 }
667
|
668 kumpf 1.43 int String::compare(const String& s1, const String& s2)
|
669 mike 1.30 {
|
670 kumpf 1.55 const Char16* s1c16 = s1.getChar16Data();
671 const Char16* s2c16 = s2.getChar16Data();
|
672 kumpf 1.43
673 while (*s1c16 && *s2c16)
|
674 mike 1.30 {
|
675 david.dillard 1.105 int r = *s1c16++ - *s2c16++;
|
676 mike 1.30
|
677 david.dillard 1.105 if (r)
678 return r;
|
679 mike 1.30 }
680
|
681 kumpf 1.43 if (*s2c16)
|
682 david.dillard 1.105 return -1;
|
683 kumpf 1.43 else if (*s1c16)
|
684 david.dillard 1.105 return 1;
|
685 mike 1.30
686 return 0;
687 }
688
|
689 kumpf 1.40 int String::compareNoCase(const String& s1, const String& s2)
690 {
|
691 david 1.69 #ifdef PEGASUS_HAS_ICU
|
692 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
693 {
694 return u_strcasecmp((const UChar*)s1.getChar16Data(),
695 (const UChar*)s2.getChar16Data(),
696 U_FOLD_CASE_DEFAULT);
697 }
698 #endif
|
699 kumpf 1.55 const Char16* _s1 = s1.getChar16Data();
700 const Char16* _s2 = s2.getChar16Data();
|
701 kumpf 1.40
702 while (*_s1 && *_s2)
703 {
704 int r;
705
|
706 kumpf 1.46 if (*_s1 <= PEGASUS_MAX_PRINTABLE_CHAR &&
707 *_s2 <= PEGASUS_MAX_PRINTABLE_CHAR)
|
708 kumpf 1.40 {
709 r = tolower(*_s1++) - tolower(*_s2++);
710 }
711 else
712 {
713 r = *_s1++ - *_s2++;
714 }
715
|
716 david.dillard 1.105 if (r)
717 return r;
|
718 kumpf 1.40 }
719
720 if (*_s2)
|
721 david.dillard 1.105 return -1;
|
722 kumpf 1.40 else if (*_s1)
|
723 david.dillard 1.105 return 1;
|
724 kumpf 1.40
725 return 0;
726 }
727
|
728 kumpf 1.39 Boolean String::equal(const String& str1, const String& str2)
|
729 mike 1.27 {
|
730 kumpf 1.43 return String::compare(str1, str2) == 0;
|
731 mike 1.27 }
732
|
733 kumpf 1.39 Boolean String::equalNoCase(const String& str1, const String& str2)
|
734 mike 1.27 {
|
735 david 1.69 #ifdef PEGASUS_HAS_ICU
|
736 chuck 1.99 return compareNoCase(str1, str2) == 0;
|
737 david 1.69 #else
|
738 kumpf 1.39 if (str1.size() != str2.size())
|
739 david.dillard 1.105 return false;
|
740 kumpf 1.39
|
741 kumpf 1.55 const Char16* p = str1.getChar16Data();
742 const Char16* q = str2.getChar16Data();
|
743 kumpf 1.39
744 Uint32 n = str1.size();
|
745 mike 1.27
|
746 kumpf 1.39 while (n--)
747 {
|
748 david.dillard 1.105 if (*p <= PEGASUS_MAX_PRINTABLE_CHAR &&
|
749 kumpf 1.46 *q <= PEGASUS_MAX_PRINTABLE_CHAR)
|
750 david.dillard 1.105 {
751 if (tolower(*p++) != tolower(*q++))
752 return false;
753 }
754 else if (*p++ != *q++)
755 return false;
|
756 kumpf 1.39 }
|
757 mike 1.28
|
758 kumpf 1.39 return true;
|
759 david 1.69 #endif
760 }
761
|
762 mike 1.27
|
763 david 1.90 CString String::getCString() const
|
764 david 1.69 {
|
765 david 1.79 Uint32 n = 3*size() + 1;
|
766 david 1.69 char* str = new char[n];
767
768 const Char16* msg16 = getChar16Data();
769
770 const Uint16 *strsrc = (Uint16 *)msg16;
|
771 david 1.71 Uint16 *endsrc = (Uint16 *)&msg16[size()+1];
|
772 david 1.69
773 Uint8 *strtgt = (Uint8 *)str;
774 Uint8 *endtgt = (Uint8 *)&str[n];
775
776 UTF16toUTF8 (&strsrc,
|
777 david.dillard 1.105 endsrc,
778 &strtgt,
779 endtgt);
780
781 char* str1 = new char[strlen(str)+1];
782 strcpy(str1,str);
783 delete [] str;
|
784 david 1.71
785 return CString(str1);
|
786 david 1.69 }
|
787 kumpf 1.42
|
788 kumpf 1.65 #if 0
|
789 kumpf 1.42 // ATTN-RK-P3-20020603: This code is not completely correct
|
790 karl 1.36 // Wildcard String matching function that may be useful in the future
791 // The following code was provided by Bob Blair.
792
793 /* _StringMatch Match input MatchString against a GLOB style pattern
794 Note that MatchChar is the char type so that this source
795 in portable to different string types. This is an internal function
|
796 david.dillard 1.105
|
797 karl 1.36 Results: The return value is 1 if string matches pattern, and
|
798 david.dillard 1.105 0 otherwise. The matching operation permits the following
799 special characters in the pattern: *?\[] (see the manual
800 entry for details on what these mean).
801
|
802 chuck 1.78
|
803 karl 1.36 Side effects: None.
804 */
|
805 david.dillard 1.105
|
806 karl 1.36 /* MatchChar defined as a separate entity because this function source used
|
807 david.dillard 1.105 elsewhere was an unsigned char *. Here we use Uint16 to maintain 16 bit
|
808 karl 1.36 size.
809 */
810 typedef Uint16 MatchChar;
811
812 inline Uint16 _ToLower(Uint16 ch)
813 {
|
814 david 1.69 // ICU_TODO: If ICU is available we should do this the correct way.
|
815 kumpf 1.46 return ch <= PEGASUS_MAX_PRINTABLE_CHAR ? tolower(char(ch)) : ch;
|
816 karl 1.36 }
817
818 inline Boolean _Equal(MatchChar ch1, MatchChar ch2, int nocase)
819 {
|
820 david 1.69 // ICU_TODO: If ICU is available we should do this the correct way.
|
821 karl 1.36 if (nocase)
|
822 david.dillard 1.105 return _ToLower(ch1) == _ToLower(ch2);
823
824 return ch1 == ch2;
|
825 karl 1.36 }
|
826 mike 1.28
|
827 kumpf 1.35
|
828 karl 1.36 static const MatchChar *
829 _matchrange(const MatchChar *range, MatchChar c, int nocase)
830 {
831 const MatchChar *p = range;
832 const MatchChar *rstart = range + 1;
833 const MatchChar *rend = 0;
834 MatchChar compchar;
835
|
836 kumpf 1.35 for (rend = rstart; *rend && *rend != ']'; rend++);
|
837 karl 1.36 if (*rend == ']') { // if there is an end to this pattern
|
838 kumpf 1.35 for (compchar = *rstart; rstart != rend; rstart++) {
|
839 karl 1.36 if (_Equal(*rstart, c, nocase))
|
840 kumpf 1.35 return ++rend;
841 if (*rstart == '-') {
842 rstart++;
843 if (c >= compchar && c <= *rstart)
844 return ++rend;
845 }
846 }
847 }
|
848 karl 1.36 return (const MatchChar *)0;
|
849 kumpf 1.35 }
850
851 static int
|
852 david.dillard 1.105 _StringMatch(
853 const MatchChar *testString,
|
854 karl 1.36 const MatchChar *pattern,
|
855 david.dillard 1.105 int nocase ) /* Ignore case if this is true */
|
856 karl 1.36 {
857 const MatchChar *pat = pattern;
858 const MatchChar *str = testString;
|
859 kumpf 1.35 unsigned int done = 0;
860 unsigned int res = 0; // the result: 1 == match
861
862 while (!done) { // main loop walks through pattern and test string
863 //cerr << "Comparing <" << *pat << "> and <" << *str << ">" << endl;
864 if (!*pat) { //end of pattern
865 done = 1; // we're done
866 if (!*str) //end of test, too?
867 res = 1; // then we matched
868 } else { //Not end of pattern
869 if (!*str) { // but end of test
870 done = 1; // We're done
871 if (*pat == '*') // If pattern openends
872 res = 1; // then we matched
873 } else { //Not end of test
874 if (*pat == '*') { //Ambiguuity found
875 if (!*++pat) { //and it ends pattern
876 done = 1; // then we're done
877 res = 1; // and match
878 } else { //if it doesn't end
879 while (!done) { // until we're done
|
880 karl 1.36 if (_StringMatch(str, pat, nocase)) { // we recurse
|
881 kumpf 1.35 done = 1; //if it recurses true
882 res = 1; // we done and match
883 } else { //it recurses false
884 if (!*str) // see if test is done
885 done = 1; // yes: we done
886 else // not done:
887 str++; // keep testing
888 } // end test on recursive call
889 } // end looping on recursive calls
890 } // end logic when pattern is ambiguous
891 } else { //pattern not ambiguus
892 if (*pat == '?') { //pattern is 'any'
893 pat++, str++; // so move along
894 } else if (*pat == '[') { //see if it's a range
|
895 karl 1.36 pat = _matchrange(pat, *str, nocase); // and is a match
|
896 kumpf 1.35 if (!pat) { //It is not a match
897 done = 1; // we're done
|
898 kumpf 1.42 res = 0; // no match
|
899 kumpf 1.35 } else { //Range matches
900 str++, pat++; // keep going
901 }
902 } else { // only case left is individual characters
|
903 karl 1.36 if (!_Equal(*pat++, *str++, nocase)) // if they don't match
|
904 kumpf 1.35 done = 1; // bail.
905 }
906 } // end ("pattern is not ambiguous (*)" logic
907 } // end logic when pattern and string still have data
908 } // end logic when pattern still has data
909 } // end main loop
910 return res;
911 }
912
|
913 kumpf 1.39
|
914 kumpf 1.65 /** match matches a string against a GLOB style pattern.
915 Return trues if the String parameter matches the pattern. C-Shell style
|
916 david.dillard 1.105 glob matching is used.
|
917 kumpf 1.65 @param str String to be matched against the pattern
918 @param pattern Pattern to use in the match
919 @return Boolean true if str matches pattern
920 The pattern definition is as follows:
921 <pre>
922 * Matches any number of any characters
923 ? Match exactly one character
924 [chars] Match any character in chars
925 [chara-charb] Match any character in the range between chara and charb
926 </pre>
927 The literal characters *, ?, [, ] can be included in a string by
928 escaping them with backslash "\". Ranges of characters can be concatenated.
929 <pre>
930 examples:
931 Boolean result = String::match("This is a test", "*is*");
932 Boolean works = String::match("abcdef123", "*[0-9]");
933 </pre>
934 */
|
935 karl 1.36 Boolean String::match(const String& str, const String& pattern)
936 {
937 return _StringMatch(
|
938 david.dillard 1.105 (Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 0) != 0;
|
939 karl 1.36 }
940
|
941 kumpf 1.65 /** matchNoCase Matches a String against a GLOB style pattern independent
|
942 david.dillard 1.105 of case.
|
943 kumpf 1.65 Returns true if the str parameter matches the pattern. C-Shell style
|
944 david.dillard 1.104 glob matching is used. Ignore case in all comparisons. Case is
|
945 kumpf 1.65 ignored in the match.
946 @parm str String containing the string to be matched\
947 @parm pattern GLOB style patterh to use in the match.
948 @return Boolean true if str matches patterh
|
949 david.dillard 1.104 @see match
|
950 kumpf 1.65 */
|
951 karl 1.36 Boolean String::matchNoCase(const String& str, const String& pattern)
952 {
953 return _StringMatch(
|
954 david.dillard 1.105 (Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 1) != 0;
|
955 kumpf 1.39 }
|
956 kumpf 1.65 #endif
|
957 kumpf 1.39
958
959 ///////////////////////////////////////////////////////////////////////////////
960 //
961 // String-related functions
962 //
963 ///////////////////////////////////////////////////////////////////////////////
964
965 Boolean operator==(const String& str1, const String& str2)
966 {
967 return String::equal(str1, str2);
968 }
969
970 Boolean operator==(const String& str1, const char* str2)
971 {
972 return String::equal(str1, str2);
973 }
974
975 Boolean operator==(const char* str1, const String& str2)
976 {
977 return String::equal(str1, str2);
978 kumpf 1.39 }
979
980 Boolean operator!=(const String& str1, const String& str2)
981 {
982 return !String::equal(str1, str2);
983 }
984
|
985 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
|
986 kumpf 1.39 {
|
987 david 1.72
|
988 david 1.69 #if defined(PEGASUS_OS_OS400)
|
989 david 1.93 CString cstr = str.getCString();
|
990 david 1.69 const char* utf8str = cstr;
991
992 os << utf8str;
993
|
994 humberto 1.76 #elif defined(PEGASUS_HAS_ICU)
|
995 yi.zhou 1.108 if (InitializeICU::initICUSuccessful())
996 {
|
997 david.dillard 1.105 char *buf = NULL;
998 const int size = str.size() * 6;
999 UnicodeString UniStr((const UChar *)str.getChar16Data(), (int32_t)str.size());
1000 Uint32 bufsize = UniStr.extract(0,size,buf);
1001
1002 buf = new char[bufsize+1];
1003 UniStr.extract(0,bufsize,buf);
1004 os << buf;
1005 os.flush();
1006 delete [] buf;
|
1007 yi.zhou 1.108 }
1008 else
1009 #endif // End of PEGASUS_HAS_ICU #else leg.
1010 {
|
1011 david.dillard 1.105 for (Uint32 i = 0, n = str.size(); i < n; i++)
1012 {
1013 Uint16 code = str[i];
1014
1015 if (code > 0 && code <= PEGASUS_MAX_PRINTABLE_CHAR)
1016 {
1017 os << char(code);
1018 }
1019 else
1020 {
1021 // Print in hex format:
1022 char buffer[8];
1023 sprintf(buffer, "\\x%04X", code);
1024 os << buffer;
1025 }
1026 }
|
1027 yi.zhou 1.108 }
|
1028 kumpf 1.39
1029 return os;
1030 }
1031
1032 String operator+(const String& str1, const String& str2)
1033 {
1034 return String(str1).append(str2);
1035 }
1036
1037 Boolean operator<(const String& str1, const String& str2)
1038 {
|
1039 kumpf 1.43 return String::compare(str1, str2) < 0;
|
1040 kumpf 1.39 }
1041
1042 Boolean operator<=(const String& str1, const String& str2)
1043 {
|
1044 kumpf 1.43 return String::compare(str1, str2) <= 0;
|
1045 kumpf 1.39 }
1046
1047 Boolean operator>(const String& str1, const String& str2)
1048 {
|
1049 kumpf 1.43 return String::compare(str1, str2) > 0;
|
1050 kumpf 1.39 }
1051
1052 Boolean operator>=(const String& str1, const String& str2)
1053 {
|
1054 kumpf 1.43 return String::compare(str1, str2) >= 0;
|
1055 kumpf 1.39 }
1056
|
1057 mike 1.27 PEGASUS_NAMESPACE_END
|