1 karl 1.85 //%2003////////////////////////////////////////////////////////////////////////
|
2 mike 1.27 //
|
3 karl 1.85 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Development
4 // Company, L. P., IBM Corp., The Open Group, Tivoli Systems.
5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L. P.;
6 // IBM Corp.; EMC Corporation, The Open Group.
|
7 mike 1.27 //
8 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
9 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
10 // deal in the Software without restriction, including without limitation the
11 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
12 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is
13 // furnished to do so, subject to the following conditions:
14 //
|
15 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
16 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
17 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
18 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
19 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
21 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 //
24 //==============================================================================
25 //
26 // Author: Mike Brasher (mbrasher@bmc.com)
27 //
|
28 kumpf 1.39 // Modified By: Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
|
29 mike 1.27 //
30 //%/////////////////////////////////////////////////////////////////////////////
31
32
33 #include <cctype>
|
34 kumpf 1.64 #include <cstring>
|
35 mike 1.27 #include "String.h"
|
36 kumpf 1.43 #include "Array.h"
|
37 kumpf 1.48 #include "InternalException.h"
|
38 mike 1.27 #include <iostream>
|
39 kumpf 1.63 #include <fstream>
|
40 kumpf 1.86 #ifdef PEGASUS_USE_DEPRECATED_INTERFACES
|
41 kumpf 1.60 #include "System.h"
42 #endif
|
43 mike 1.27
|
44 david 1.69 #include "CommonUTF.h"
45
46 #ifdef PEGASUS_HAS_ICU
|
47 chuck 1.74 #include <unicode/unistr.h>
|
48 david 1.69 #endif
49
|
50 mike 1.28 PEGASUS_USING_STD;
51
|
52 mike 1.27 PEGASUS_NAMESPACE_BEGIN
53
|
54 kumpf 1.39 ///////////////////////////////////////////////////////////////////////////////
55 //
|
56 kumpf 1.54 // CString
57 //
58 ///////////////////////////////////////////////////////////////////////////////
59
60 CString::CString()
61 : _rep(0)
62 {
63 }
64
65 CString::CString(const CString& cstr)
66 {
|
67 kumpf 1.82 _rep = 0;
68
69 if (cstr._rep)
70 {
71 _rep = (void*)new char[strlen((char*)cstr._rep)+1];
72 strcpy((char*)_rep, (char*)cstr._rep);
73 }
|
74 kumpf 1.54 }
75
76 CString::CString(char* cstr)
77 : _rep(cstr)
78 {
79 }
80
81 CString::~CString()
82 {
83 if (_rep)
|
84 kumpf 1.82 {
|
85 kumpf 1.59 delete [] (char*)_rep;
|
86 kumpf 1.82 }
|
87 kumpf 1.54 }
88
|
89 kumpf 1.56 CString& CString::operator=(const CString& cstr)
90 {
|
91 kumpf 1.82 if (&cstr != this)
|
92 kumpf 1.81 {
|
93 kumpf 1.82 if (_rep)
94 {
95 delete [] (char*)_rep;
96 _rep = 0;
97 }
98 if (cstr._rep)
99 {
100 _rep = (char*)new char[strlen((char*)cstr._rep)+1];
101 strcpy((char*)_rep, (char*)cstr._rep);
102 }
|
103 kumpf 1.81 }
|
104 kumpf 1.56 return *this;
105 }
106
|
107 kumpf 1.54 CString::operator const char*() const
108 {
|
109 kumpf 1.59 return (char*)_rep;
|
110 kumpf 1.54 }
111
112 ///////////////////////////////////////////////////////////////////////////////
113 //
|
114 kumpf 1.39 // String
115 //
116 ///////////////////////////////////////////////////////////////////////////////
117
|
118 kumpf 1.37 const String String::EMPTY = String();
|
119 mike 1.27
|
120 kumpf 1.38 Uint32 _strnlen(const char* str, Uint32 n)
121 {
122 if (!str)
123 throw NullPointer();
124
125 for (Uint32 i=0; i<n; i++)
126 {
127 if (!*str)
128 {
129 return i;
130 }
131 }
132
133 return n;
134 }
135
136 Uint32 _strnlen(const Char16* str, Uint32 n)
137 {
138 if (!str)
139 throw NullPointer();
140
141 kumpf 1.38 for (Uint32 i=0; i<n; i++)
142 {
143 if (!*str)
144 {
145 return i;
146 }
147 }
148
149 return n;
150 }
151
|
152 kumpf 1.39 inline Uint32 _StrLen(const char* str)
|
153 mike 1.27 {
154 if (!str)
155 throw NullPointer();
156
157 return strlen(str);
158 }
159
|
160 kumpf 1.39 inline Uint32 _StrLen(const Char16* str)
|
161 mike 1.27 {
162 if (!str)
163 throw NullPointer();
164
165 Uint32 n = 0;
166
167 while (*str++)
168 n++;
169
170 return n;
171 }
172
|
173 kumpf 1.43 class StringRep
174 {
175 public:
176 StringRep()
177 {}
178 StringRep(const StringRep& r)
179 : c16a(r.c16a)
180 {}
181 StringRep(const Char16* str)
182 : c16a(str, _StrLen(str) + 1)
183 {}
184
185 Array<Char16> c16a;
186 };
187
|
188 mike 1.27 String::String()
189 {
|
190 kumpf 1.43 _rep = new StringRep;
191 _rep->c16a.append('\0');
|
192 mike 1.27 }
193
|
194 kumpf 1.39 String::String(const String& str)
|
195 mike 1.27 {
|
196 tony 1.66 if (str._rep != NULL)
197 {
|
198 kumpf 1.43 _rep = new StringRep(*str._rep);
|
199 tony 1.66 }
200 else
201 {
202 _rep = new StringRep();
203 }
|
204 kumpf 1.39 }
|
205 tony 1.66
|
206 mike 1.27
|
207 kumpf 1.39 String::String(const String& str, Uint32 n)
208 {
|
209 kumpf 1.43 _rep = new StringRep;
|
210 kumpf 1.55 assign(str.getChar16Data(), n);
|
211 kumpf 1.39 }
212
213 String::String(const Char16* str)
214 {
|
215 kumpf 1.43 _rep = new StringRep(str);
|
216 mike 1.27 }
217
|
218 kumpf 1.39 String::String(const Char16* str, Uint32 n)
219 {
|
220 kumpf 1.43 _rep = new StringRep;
|
221 kumpf 1.39 assign(str, n);
222 }
223
224 String::String(const char* str)
|
225 mike 1.27 {
|
226 kumpf 1.43 _rep = new StringRep;
|
227 kumpf 1.39 assign(str);
|
228 mike 1.27 }
229
|
230 david 1.69 String::String(const char* str, const char* utfFlag)
231 {
232 _rep = new StringRep;
233
234 if(!memcmp(utfFlag,STRING_FLAG_UTF8,sizeof(STRING_FLAG_UTF8)))
235 {
236 assignUTF8(str);
237 }
238 else
239 {
240 assign(str);
241 }
242 }
243
|
244 kumpf 1.39 String::String(const char* str, Uint32 n)
|
245 mike 1.27 {
|
246 kumpf 1.43 _rep = new StringRep;
|
247 kumpf 1.39 assign(str, n);
248 }
|
249 mike 1.27
|
250 kumpf 1.39 String::~String()
251 {
|
252 kumpf 1.43 delete _rep;
|
253 mike 1.27 }
254
|
255 kumpf 1.39 String& String::operator=(const String& str)
|
256 mike 1.27 {
|
257 kumpf 1.82 if (&str != this)
258 {
259 assign(str);
260 }
261 return *this;
|
262 mike 1.27 }
263
|
264 kumpf 1.39 String& String::assign(const String& str)
|
265 mike 1.27 {
|
266 kumpf 1.43 _rep->c16a = str._rep->c16a;
|
267 kumpf 1.39 return *this;
|
268 mike 1.27 }
269
|
270 kumpf 1.39 String& String::assign(const Char16* str)
|
271 mike 1.27 {
|
272 kumpf 1.43 _rep->c16a.clear();
273 _rep->c16a.append(str, _StrLen(str) + 1);
|
274 mike 1.27 return *this;
275 }
276
277 String& String::assign(const Char16* str, Uint32 n)
278 {
|
279 kumpf 1.43 _rep->c16a.clear();
|
280 kumpf 1.38 Uint32 m = _strnlen(str, n);
|
281 kumpf 1.43 _rep->c16a.append(str, m);
282 _rep->c16a.append('\0');
|
283 mike 1.27 return *this;
284 }
285
|
286 kumpf 1.39 String& String::assign(const char* str)
|
287 mike 1.27 {
|
288 kumpf 1.43 _rep->c16a.clear();
|
289 kumpf 1.38
|
290 kumpf 1.39 Uint32 n = strlen(str) + 1;
|
291 kumpf 1.45 _rep->c16a.reserveCapacity(n);
|
292 mike 1.27
293 while (n--)
|
294 kumpf 1.67 _rep->c16a.append(Uint8(*str++));
|
295 mike 1.27
296 return *this;
297 }
298
|
299 kumpf 1.39 String& String::assign(const char* str, Uint32 n)
|
300 mike 1.27 {
|
301 kumpf 1.43 _rep->c16a.clear();
|
302 mike 1.27
|
303 kumpf 1.39 Uint32 _n = _strnlen(str, n);
|
304 kumpf 1.45 _rep->c16a.reserveCapacity(_n + 1);
|
305 mike 1.27
|
306 kumpf 1.39 while (_n--)
|
307 kumpf 1.67 _rep->c16a.append(Uint8(*str++));
|
308 mike 1.27
|
309 kumpf 1.43 _rep->c16a.append('\0');
|
310 mike 1.27
311 return *this;
312 }
313
|
314 kumpf 1.39 void String::clear()
315 {
|
316 kumpf 1.43 _rep->c16a.clear();
317 _rep->c16a.append('\0');
|
318 kumpf 1.39 }
319
|
320 kumpf 1.43 void String::reserveCapacity(Uint32 capacity)
|
321 kumpf 1.39 {
|
322 kumpf 1.45 _rep->c16a.reserveCapacity(capacity + 1);
|
323 kumpf 1.39 }
324
325 Uint32 String::size() const
326 {
|
327 kumpf 1.43 return _rep->c16a.size() - 1;
|
328 kumpf 1.39 }
329
|
330 kumpf 1.55 const Char16* String::getChar16Data() const
|
331 kumpf 1.39 {
|
332 kumpf 1.43 return _rep->c16a.getData();
|
333 kumpf 1.39 }
334
|
335 kumpf 1.54 CString String::getCString() const
|
336 mike 1.27 {
337 Uint32 n = size() + 1;
|
338 kumpf 1.54 char* str = new char[n];
|
339 mike 1.27 char* p = str;
|
340 kumpf 1.55 const Char16* q = getChar16Data();
|
341 mike 1.27
342 for (Uint32 i = 0; i < n; i++)
343 {
344 Uint16 c = *q++;
345 *p++ = char(c);
346
|
347 kumpf 1.54 //if (c & 0xff00)
348 // truncatedCharacters = true;
|
349 mike 1.27 }
350
|
351 kumpf 1.54 return CString(str);
|
352 kumpf 1.49 }
353
|
354 kumpf 1.53 Char16& String::operator[](Uint32 index)
|
355 mike 1.27 {
|
356 kumpf 1.53 if (index > size())
|
357 kumpf 1.49 throw IndexOutOfBoundsException();
|
358 mike 1.27
|
359 kumpf 1.53 return _rep->c16a[index];
|
360 mike 1.27 }
361
|
362 kumpf 1.53 const Char16 String::operator[](Uint32 index) const
|
363 mike 1.27 {
|
364 kumpf 1.53 if (index > size())
|
365 kumpf 1.49 throw IndexOutOfBoundsException();
|
366 mike 1.27
|
367 kumpf 1.53 return _rep->c16a[index];
|
368 mike 1.27 }
369
|
370 kumpf 1.39 String& String::append(const Char16& c)
371 {
|
372 kumpf 1.43 _rep->c16a.insert(_rep->c16a.size() - 1, c);
|
373 kumpf 1.39 return *this;
374 }
375
|
376 mike 1.27 String& String::append(const Char16* str, Uint32 n)
377 {
|
378 kumpf 1.38 Uint32 m = _strnlen(str, n);
|
379 kumpf 1.45 _rep->c16a.reserveCapacity(_rep->c16a.size() + m);
|
380 kumpf 1.43 _rep->c16a.remove(_rep->c16a.size() - 1);
381 _rep->c16a.append(str, m);
382 _rep->c16a.append('\0');
|
383 mike 1.27 return *this;
384 }
385
|
386 kumpf 1.39 String& String::append(const String& str)
|
387 mike 1.27 {
|
388 kumpf 1.55 return append(str.getChar16Data(), str.size());
|
389 mike 1.27 }
390
|
391 kumpf 1.53 void String::remove(Uint32 index, Uint32 size)
|
392 mike 1.27 {
|
393 kumpf 1.39 if (size == PEG_NOT_FOUND)
|
394 kumpf 1.53 size = this->size() - index;
|
395 mike 1.27
|
396 kumpf 1.53 if (index + size > this->size())
|
397 kumpf 1.49 throw IndexOutOfBoundsException();
|
398 mike 1.27
|
399 kumpf 1.39 if (size)
|
400 kumpf 1.53 _rep->c16a.remove(index, size);
|
401 mike 1.27 }
402
|
403 kumpf 1.53 String String::subString(Uint32 index, Uint32 length) const
|
404 mike 1.27 {
|
405 kumpf 1.53 if (index < size())
|
406 mike 1.27 {
|
407 kumpf 1.57 if ((length == PEG_NOT_FOUND) || (length > size() - index))
|
408 kumpf 1.53 length = size() - index;
|
409 mike 1.27
|
410 kumpf 1.55 return String(getChar16Data() + index, length);
|
411 mike 1.27 }
412 else
413 return String();
414 }
415
416 Uint32 String::find(Char16 c) const
417 {
|
418 kumpf 1.55 const Char16* first = getChar16Data();
|
419 mike 1.27
420 for (const Char16* p = first; *p; p++)
421 {
422 if (*p == c)
423 return p - first;
424 }
425
426 return PEG_NOT_FOUND;
427 }
428
|
429 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
|
430 mike 1.30 {
|
431 kumpf 1.55 const Char16* data = getChar16Data();
|
432 mike 1.30
|
433 kumpf 1.53 for (Uint32 i = index, n = size(); i < n; i++)
|
434 mike 1.30 {
435 if (data[i] == c)
436 return i;
437 }
438
439 return PEG_NOT_FOUND;
440 }
441
|
442 mike 1.27 Uint32 String::find(const String& s) const
443 {
|
444 kumpf 1.55 const Char16* pSubStr = s.getChar16Data();
445 const Char16* pStr = getChar16Data();
|
446 mike 1.27 Uint32 subStrLen = s.size();
447 Uint32 strLen = size();
448
|
449 mike 1.30 if (subStrLen > strLen)
450 {
451 return PEG_NOT_FOUND;
452 }
453
|
454 mike 1.27 // loop to find first char match
455 Uint32 loc = 0;
456 for( ; loc <= (strLen-subStrLen); loc++)
457 {
458 if (*pStr++ == *pSubStr) // match first char
459 {
460 // point to substr 2nd char
461 const Char16* p = pSubStr + 1;
462
463 // Test remaining chars for equal
464 Uint32 i = 1;
465 for (; i < subStrLen; i++)
466 if (*pStr++ != *p++ )
467 {pStr--; break;} // break from loop
468 if (i == subStrLen)
469 return loc;
470 }
471 }
472 return PEG_NOT_FOUND;
473 }
474
475 mike 1.27 Uint32 String::reverseFind(Char16 c) const
476 {
|
477 kumpf 1.55 const Char16* first = getChar16Data();
478 const Char16* last = getChar16Data() + size();
|
479 mike 1.27
480 while (last != first)
481 {
482 if (*--last == c)
483 return last - first;
484 }
485
486 return PEG_NOT_FOUND;
487 }
488
|
489 kumpf 1.62 // ATTN-RK-P3-20020509: Define case-sensitivity for non-English characters
|
490 chuck 1.78 // ATTN-CEC-20030913: ICU code added, but uses the server's locale. Look at adding
491 // a toLower( ) with Locale parameter - like ICU's toLower( )
|
492 mike 1.27 void String::toLower()
493 {
|
494 david 1.69 #ifdef PEGASUS_HAS_ICU
|
495 chuck 1.78 UnicodeString UniStr((const UChar *)_rep->c16a.getData());
496 UniStr.toLower();
|
497 david 1.80 UniStr.append((UChar)'\0');
498
499 assign((Char16*)UniStr.getBuffer());
|
500 david 1.69 #else
|
501 kumpf 1.43 for (Char16* p = &_rep->c16a[0]; *p; p++)
|
502 mike 1.27 {
|
503 kumpf 1.46 if (*p <= PEGASUS_MAX_PRINTABLE_CHAR)
|
504 mike 1.27 *p = tolower(*p);
505 }
|
506 david 1.69 #endif
|
507 kumpf 1.39 }
508
|
509 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
|
510 kumpf 1.39 {
|
511 kumpf 1.55 const Char16* s1c16 = s1.getChar16Data();
512 const Char16* s2c16 = s2.getChar16Data();
|
513 kumpf 1.39
514 while (n--)
|
515 mike 1.27 {
|
516 kumpf 1.43 int r = *s1c16++ - *s2c16++;
|
517 mike 1.27
518 if (r)
519 return r;
520 }
521
522 return 0;
523 }
524
|
525 kumpf 1.43 int String::compare(const String& s1, const String& s2)
|
526 mike 1.30 {
|
527 kumpf 1.55 const Char16* s1c16 = s1.getChar16Data();
528 const Char16* s2c16 = s2.getChar16Data();
|
529 kumpf 1.43
530 while (*s1c16 && *s2c16)
|
531 mike 1.30 {
|
532 kumpf 1.43 int r = *s1c16++ - *s2c16++;
|
533 mike 1.30
534 if (r)
535 return r;
536 }
537
|
538 kumpf 1.43 if (*s2c16)
|
539 mike 1.30 return -1;
|
540 kumpf 1.43 else if (*s1c16)
|
541 mike 1.30 return 1;
542
543 return 0;
544 }
545
|
546 kumpf 1.40 int String::compareNoCase(const String& s1, const String& s2)
547 {
|
548 david 1.69 #ifdef PEGASUS_HAS_ICU
549 UnicodeString UniStr1((const UChar *)s1.getChar16Data(), (int32_t)s1.size());
550 UnicodeString UniStr2((const UChar *)s2.getChar16Data(), (int32_t)s2.size());
|
551 chuck 1.78 UniStr1.toLower();
552 UniStr2.toLower();
|
553 david 1.69 return (UniStr2.compare(UniStr1));
554 #else
|
555 kumpf 1.55 const Char16* _s1 = s1.getChar16Data();
556 const Char16* _s2 = s2.getChar16Data();
|
557 kumpf 1.40
558 while (*_s1 && *_s2)
559 {
560 int r;
561
|
562 kumpf 1.46 if (*_s1 <= PEGASUS_MAX_PRINTABLE_CHAR &&
563 *_s2 <= PEGASUS_MAX_PRINTABLE_CHAR)
|
564 kumpf 1.40 {
565 r = tolower(*_s1++) - tolower(*_s2++);
566 }
567 else
568 {
569 r = *_s1++ - *_s2++;
570 }
571
572 if (r)
573 return r;
574 }
575
576 if (*_s2)
577 return -1;
578 else if (*_s1)
579 return 1;
580
581 return 0;
|
582 david 1.69 #endif
|
583 kumpf 1.40 }
584
|
585 kumpf 1.39 Boolean String::equal(const String& str1, const String& str2)
|
586 mike 1.27 {
|
587 kumpf 1.43 return String::compare(str1, str2) == 0;
|
588 mike 1.27 }
589
|
590 kumpf 1.39 Boolean String::equalNoCase(const String& str1, const String& str2)
|
591 mike 1.27 {
|
592 david 1.69 #ifdef PEGASUS_HAS_ICU
593 UnicodeString UniStr1((const UChar *)str1.getChar16Data(), (int32_t)str1.size());
594 UnicodeString UniStr2((const UChar *)str2.getChar16Data(), (int32_t)str2.size());
|
595 chuck 1.78 UniStr1.toLower();
596 UniStr2.toLower();
|
597 david 1.69 return (UniStr1 == UniStr2);
598 #else
|
599 kumpf 1.39 if (str1.size() != str2.size())
600 return false;
601
|
602 kumpf 1.55 const Char16* p = str1.getChar16Data();
603 const Char16* q = str2.getChar16Data();
|
604 kumpf 1.39
605 Uint32 n = str1.size();
|
606 mike 1.27
|
607 kumpf 1.39 while (n--)
608 {
|
609 kumpf 1.46 if (*p <= PEGASUS_MAX_PRINTABLE_CHAR &&
610 *q <= PEGASUS_MAX_PRINTABLE_CHAR)
|
611 kumpf 1.39 {
612 if (tolower(*p++) != tolower(*q++))
613 return false;
614 }
615 else if (*p++ != *q++)
616 return false;
617 }
|
618 mike 1.28
|
619 kumpf 1.39 return true;
|
620 david 1.69 #endif
621 }
622
623 // UTF8 specific code:
624 String& String::assignUTF8(const char* str)
625 {
626 _rep->c16a.clear();
627 Uint32 n = strlen(str) + 1;
628
629 const Uint8 *strsrc = (Uint8 *)str;
630 Uint8 *endsrc = (Uint8 *)&str[n-1];
631
632 Char16 *msg16 = new Char16[n];
633 Uint16 *strtgt = (Uint16 *)msg16;
634 Uint16 *endtgt = (Uint16 *)&msg16[n];
635
636 UTF8toUTF16(&strsrc,
637 endsrc,
638 &strtgt,
639 endtgt);
640
641 david 1.69 Uint32 count;
642
|
643 s.hills 1.87 for(count = 0; ((msg16[count]) != Char16(0x00)) && (count < (n - 1)); ++count);
|
644 david 1.69
645 _rep->c16a.append(msg16, count);
646
647 _rep->c16a.append('\0');
648
649 delete [] msg16;
650
651 return *this;
|
652 mike 1.27 }
653
|
654 david 1.69 CString String::getCStringUTF8() const
655 {
|
656 david 1.79 Uint32 n = 3*size() + 1;
|
657 david 1.69 char* str = new char[n];
658
659 const Char16* msg16 = getChar16Data();
660
661 const Uint16 *strsrc = (Uint16 *)msg16;
|
662 david 1.71 Uint16 *endsrc = (Uint16 *)&msg16[size()+1];
|
663 david 1.69
664 Uint8 *strtgt = (Uint8 *)str;
665 Uint8 *endtgt = (Uint8 *)&str[n];
666
667 UTF16toUTF8 (&strsrc,
668 endsrc,
669 &strtgt,
670 endtgt);
671
|
672 david 1.71 char* str1 = new char[strlen(str)+1];
673 strcpy(str1,str);
|
674 david 1.72 delete [] str;
|
675 david 1.71
676 return CString(str1);
|
677 david 1.69 }
678
679 Boolean String::isUTF8(const char *legal)
680 {
|
681 kumpf 1.73 char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1;
682
683 // Validate that the string is long enough to hold all the expected bytes.
684 // Note that if legal[0] == 0, numBytes will be 1.
|
685 chuck 1.74 for (char i=1; i<numBytes; i++)
|
686 kumpf 1.73 {
687 if (legal[i] == 0)
688 {
689 return false;
690 }
691 }
692
693 return (isValid_U8((const Uint8 *)legal, numBytes));
|
694 david 1.69 }
|
695 kumpf 1.42
|
696 kumpf 1.65 #if 0
|
697 kumpf 1.42 // ATTN-RK-P3-20020603: This code is not completely correct
|
698 karl 1.36 // Wildcard String matching function that may be useful in the future
699 // The following code was provided by Bob Blair.
700
701 /* _StringMatch Match input MatchString against a GLOB style pattern
702 Note that MatchChar is the char type so that this source
703 in portable to different string types. This is an internal function
704
705 Results: The return value is 1 if string matches pattern, and
706 0 otherwise. The matching operation permits the following
707 special characters in the pattern: *?\[] (see the manual
708 entry for details on what these mean).
|
709 chuck 1.78
|
710 karl 1.36
711 Side effects: None.
712 */
|
713 chuck 1.78
|
714 karl 1.36 /* MatchChar defined as a separate entity because this function source used
715 elsewhere was an unsigned char *. Here we use Uint16 to maintain 16 bit
716 size.
717 */
718 typedef Uint16 MatchChar;
719
720 inline Uint16 _ToLower(Uint16 ch)
721 {
|
722 david 1.69 // ICU_TODO: If ICU is available we should do this the correct way.
|
723 kumpf 1.46 return ch <= PEGASUS_MAX_PRINTABLE_CHAR ? tolower(char(ch)) : ch;
|
724 karl 1.36 }
725
726 inline Boolean _Equal(MatchChar ch1, MatchChar ch2, int nocase)
727 {
|
728 david 1.69 // ICU_TODO: If ICU is available we should do this the correct way.
|
729 karl 1.36 if (nocase)
730 return _ToLower(ch1) == _ToLower(ch2);
731 else
732 return ch1 == ch2;
733 }
|
734 mike 1.28
|
735 kumpf 1.35
|
736 karl 1.36 static const MatchChar *
737 _matchrange(const MatchChar *range, MatchChar c, int nocase)
738 {
739 const MatchChar *p = range;
740 const MatchChar *rstart = range + 1;
741 const MatchChar *rend = 0;
742 MatchChar compchar;
743
|
744 kumpf 1.35 for (rend = rstart; *rend && *rend != ']'; rend++);
|
745 karl 1.36 if (*rend == ']') { // if there is an end to this pattern
|
746 kumpf 1.35 for (compchar = *rstart; rstart != rend; rstart++) {
|
747 karl 1.36 if (_Equal(*rstart, c, nocase))
|
748 kumpf 1.35 return ++rend;
749 if (*rstart == '-') {
750 rstart++;
751 if (c >= compchar && c <= *rstart)
752 return ++rend;
753 }
754 }
755 }
|
756 karl 1.36 return (const MatchChar *)0;
|
757 kumpf 1.35 }
758
759 static int
|
760 karl 1.36 _StringMatch(
761 const MatchChar *testString,
762 const MatchChar *pattern,
763 int nocase ) /* Ignore case if this is true */
764 {
765 const MatchChar *pat = pattern;
766 const MatchChar *str = testString;
|
767 kumpf 1.35 unsigned int done = 0;
768 unsigned int res = 0; // the result: 1 == match
769
770 while (!done) { // main loop walks through pattern and test string
771 //cerr << "Comparing <" << *pat << "> and <" << *str << ">" << endl;
772 if (!*pat) { //end of pattern
773 done = 1; // we're done
774 if (!*str) //end of test, too?
775 res = 1; // then we matched
776 } else { //Not end of pattern
777 if (!*str) { // but end of test
778 done = 1; // We're done
779 if (*pat == '*') // If pattern openends
780 res = 1; // then we matched
781 } else { //Not end of test
782 if (*pat == '*') { //Ambiguuity found
783 if (!*++pat) { //and it ends pattern
784 done = 1; // then we're done
785 res = 1; // and match
786 } else { //if it doesn't end
787 while (!done) { // until we're done
|
788 karl 1.36 if (_StringMatch(str, pat, nocase)) { // we recurse
|
789 kumpf 1.35 done = 1; //if it recurses true
790 res = 1; // we done and match
791 } else { //it recurses false
792 if (!*str) // see if test is done
793 done = 1; // yes: we done
794 else // not done:
795 str++; // keep testing
796 } // end test on recursive call
797 } // end looping on recursive calls
798 } // end logic when pattern is ambiguous
799 } else { //pattern not ambiguus
800 if (*pat == '?') { //pattern is 'any'
801 pat++, str++; // so move along
802 } else if (*pat == '[') { //see if it's a range
|
803 karl 1.36 pat = _matchrange(pat, *str, nocase); // and is a match
|
804 kumpf 1.35 if (!pat) { //It is not a match
805 done = 1; // we're done
|
806 kumpf 1.42 res = 0; // no match
|
807 kumpf 1.35 } else { //Range matches
808 str++, pat++; // keep going
809 }
810 } else { // only case left is individual characters
|
811 karl 1.36 if (!_Equal(*pat++, *str++, nocase)) // if they don't match
|
812 kumpf 1.35 done = 1; // bail.
813 }
814 } // end ("pattern is not ambiguous (*)" logic
815 } // end logic when pattern and string still have data
816 } // end logic when pattern still has data
817 } // end main loop
818 return res;
819 }
820
|
821 kumpf 1.39
|
822 kumpf 1.65 /** match matches a string against a GLOB style pattern.
823 Return trues if the String parameter matches the pattern. C-Shell style
824 glob matching is used.
825 @param str String to be matched against the pattern
826 @param pattern Pattern to use in the match
827 @return Boolean true if str matches pattern
828 The pattern definition is as follows:
829 <pre>
830 * Matches any number of any characters
831 ? Match exactly one character
832 [chars] Match any character in chars
833 [chara-charb] Match any character in the range between chara and charb
834 </pre>
835 The literal characters *, ?, [, ] can be included in a string by
836 escaping them with backslash "\". Ranges of characters can be concatenated.
837 <pre>
838 examples:
839 Boolean result = String::match("This is a test", "*is*");
840 Boolean works = String::match("abcdef123", "*[0-9]");
841 </pre>
842 */
|
843 karl 1.36 Boolean String::match(const String& str, const String& pattern)
844 {
845 return _StringMatch(
|
846 kumpf 1.55 (Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 0) != 0;
|
847 karl 1.36 }
848
|
849 kumpf 1.65 /** matchNoCase Matches a String against a GLOB style pattern independent
850 of case.
851 Returns true if the str parameter matches the pattern. C-Shell style
852 glob matching is used. Ignore case in all comparisons. Case is
853 ignored in the match.
854 @parm str String containing the string to be matched\
855 @parm pattern GLOB style patterh to use in the match.
856 @return Boolean true if str matches patterh
857 @SeeAlso match
858 */
|
859 karl 1.36 Boolean String::matchNoCase(const String& str, const String& pattern)
860 {
861 return _StringMatch(
|
862 kumpf 1.55 (Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 1) != 0;
|
863 kumpf 1.39 }
|
864 kumpf 1.65 #endif
|
865 kumpf 1.39
866
867 ///////////////////////////////////////////////////////////////////////////////
868 //
869 // String-related functions
870 //
871 ///////////////////////////////////////////////////////////////////////////////
872
873 Boolean operator==(const String& str1, const String& str2)
874 {
875 return String::equal(str1, str2);
876 }
877
878 Boolean operator==(const String& str1, const char* str2)
879 {
880 return String::equal(str1, str2);
881 }
882
883 Boolean operator==(const char* str1, const String& str2)
884 {
885 return String::equal(str1, str2);
886 kumpf 1.39 }
887
888 Boolean operator!=(const String& str1, const String& str2)
889 {
890 return !String::equal(str1, str2);
891 }
892
|
893 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
|
894 kumpf 1.39 {
|
895 david 1.72
|
896 david 1.69 #if defined(PEGASUS_OS_OS400)
897 CString cstr = str.getCStringUTF8();
898 const char* utf8str = cstr;
899
900 os << utf8str;
901
|
902 humberto 1.76 #elif defined(PEGASUS_HAS_ICU)
|
903 humberto 1.83 if(os == cout || os == cerr){
904 char *buf = NULL;
905 const int size = str.size() * 6;
906 UnicodeString UniStr((const UChar *)str.getChar16Data(), (int32_t)str.size());
907 Uint32 bufsize = UniStr.extract(0,size,buf);
908
909 buf = new char[bufsize+1];
910 UniStr.extract(0,bufsize,buf);
911 os << buf;
912 os.flush();
913 delete [] buf;
914 }else{
915 CString cstr = str.getCStringUTF8();
916 const char* utf8str = cstr;
917 os << utf8str;
918 }
|
919 humberto 1.76
|
920 david 1.69 #else
|
921 humberto 1.83 for (Uint32 i = 0, n = str.size(); i < n; i++)
922 {
923 Uint16 code = str[i];
|
924 david 1.69
|
925 humberto 1.83 if (code > 0 && code <= PEGASUS_MAX_PRINTABLE_CHAR)
926 {
927 os << char(code);
928 }
929 else
930 {
931 // Print in hex format:
932 char buffer[8];
933 sprintf(buffer, "\\x%04X", code);
934 os << buffer;
935 }
936 }
|
937 david 1.69 #endif // End of PEGASUS_HAS_ICU #else leg.
|
938 kumpf 1.39
939 return os;
940 }
941
942 String operator+(const String& str1, const String& str2)
943 {
944 return String(str1).append(str2);
945 }
946
947 Boolean operator<(const String& str1, const String& str2)
948 {
|
949 kumpf 1.43 return String::compare(str1, str2) < 0;
|
950 kumpf 1.39 }
951
952 Boolean operator<=(const String& str1, const String& str2)
953 {
|
954 kumpf 1.43 return String::compare(str1, str2) <= 0;
|
955 kumpf 1.39 }
956
957 Boolean operator>(const String& str1, const String& str2)
958 {
|
959 kumpf 1.43 return String::compare(str1, str2) > 0;
|
960 kumpf 1.39 }
961
962 Boolean operator>=(const String& str1, const String& str2)
963 {
|
964 kumpf 1.43 return String::compare(str1, str2) >= 0;
|
965 kumpf 1.39 }
966
|
967 kumpf 1.86 #ifdef PEGASUS_USE_DEPRECATED_INTERFACES
|
968 kumpf 1.39 int CompareNoCase(const char* s1, const char* s2)
969 {
|
970 kumpf 1.60 return System::strcasecmp(s1, s2);
|
971 kumpf 1.39 }
|
972 kumpf 1.60 #endif
|
973 kumpf 1.39
|
974 mike 1.27 PEGASUS_NAMESPACE_END
|