1 mike 1.27 //%/////////////////////////////////////////////////////////////////////////////
2 //
|
3 kumpf 1.41 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM,
4 // The Open Group, Tivoli Systems
|
5 mike 1.27 //
6 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
7 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
8 // deal in the Software without restriction, including without limitation the
9 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
10 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is
11 // furnished to do so, subject to the following conditions:
12 //
|
13 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
14 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
15 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
16 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
17 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
19 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 //
22 //==============================================================================
23 //
24 // Author: Mike Brasher (mbrasher@bmc.com)
25 //
|
26 kumpf 1.39 // Modified By: Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
|
27 mike 1.27 //
28 //%/////////////////////////////////////////////////////////////////////////////
29
30
31 #include <cctype>
|
32 kumpf 1.64 #include <cstring>
|
33 mike 1.27 #include "String.h"
|
34 kumpf 1.43 #include "Array.h"
|
35 kumpf 1.48 #include "InternalException.h"
|
36 mike 1.27 #include <iostream>
|
37 kumpf 1.63 #include <fstream>
|
38 kumpf 1.61 #ifndef PEGASUS_REMOVE_DEPRECATED
|
39 kumpf 1.60 #include "System.h"
40 #endif
|
41 mike 1.27
|
42 david 1.69 #include "CommonUTF.h"
43
44 #ifdef PEGASUS_HAS_ICU
|
45 chuck 1.74 #include <unicode/unistr.h>
|
46 david 1.69 #endif
47
|
48 mike 1.28 PEGASUS_USING_STD;
49
|
50 mike 1.27 PEGASUS_NAMESPACE_BEGIN
51
|
52 kumpf 1.39 ///////////////////////////////////////////////////////////////////////////////
53 //
|
54 kumpf 1.54 // CString
55 //
56 ///////////////////////////////////////////////////////////////////////////////
57
58 CString::CString()
59 : _rep(0)
60 {
61 }
62
63 CString::CString(const CString& cstr)
64 {
|
65 kumpf 1.59 _rep = (void*)new char[strlen((char*)cstr._rep)+1];
66 strcpy((char*)_rep, (char*)cstr._rep);
|
67 kumpf 1.54 }
68
69 CString::CString(char* cstr)
70 : _rep(cstr)
71 {
72 }
73
74 CString::~CString()
75 {
76 if (_rep)
|
77 kumpf 1.59 delete [] (char*)_rep;
|
78 kumpf 1.54 }
79
|
80 kumpf 1.56 CString& CString::operator=(const CString& cstr)
81 {
|
82 kumpf 1.59 _rep = (char*)new char[strlen((char*)cstr._rep)+1];
83 strcpy((char*)_rep, (char*)cstr._rep);
|
84 kumpf 1.56 return *this;
85 }
86
|
87 kumpf 1.54 CString::operator const char*() const
88 {
|
89 kumpf 1.59 return (char*)_rep;
|
90 kumpf 1.54 }
91
92 ///////////////////////////////////////////////////////////////////////////////
93 //
|
94 kumpf 1.39 // String
95 //
96 ///////////////////////////////////////////////////////////////////////////////
97
|
98 kumpf 1.37 const String String::EMPTY = String();
|
99 mike 1.27
|
100 kumpf 1.38 Uint32 _strnlen(const char* str, Uint32 n)
101 {
102 if (!str)
103 throw NullPointer();
104
105 for (Uint32 i=0; i<n; i++)
106 {
107 if (!*str)
108 {
109 return i;
110 }
111 }
112
113 return n;
114 }
115
116 Uint32 _strnlen(const Char16* str, Uint32 n)
117 {
118 if (!str)
119 throw NullPointer();
120
121 kumpf 1.38 for (Uint32 i=0; i<n; i++)
122 {
123 if (!*str)
124 {
125 return i;
126 }
127 }
128
129 return n;
130 }
131
|
132 kumpf 1.39 inline Uint32 _StrLen(const char* str)
|
133 mike 1.27 {
134 if (!str)
135 throw NullPointer();
136
137 return strlen(str);
138 }
139
|
140 kumpf 1.39 inline Uint32 _StrLen(const Char16* str)
|
141 mike 1.27 {
142 if (!str)
143 throw NullPointer();
144
145 Uint32 n = 0;
146
147 while (*str++)
148 n++;
149
150 return n;
151 }
152
|
153 kumpf 1.43 class StringRep
154 {
155 public:
156 StringRep()
157 {}
158 StringRep(const StringRep& r)
159 : c16a(r.c16a)
160 {}
161 StringRep(const Char16* str)
162 : c16a(str, _StrLen(str) + 1)
163 {}
164
165 Array<Char16> c16a;
166 };
167
|
168 mike 1.27 String::String()
169 {
|
170 kumpf 1.43 _rep = new StringRep;
171 _rep->c16a.append('\0');
|
172 mike 1.27 }
173
|
174 kumpf 1.39 String::String(const String& str)
|
175 mike 1.27 {
|
176 tony 1.66 if (str._rep != NULL)
177 {
|
178 kumpf 1.43 _rep = new StringRep(*str._rep);
|
179 tony 1.66 }
180 else
181 {
182 _rep = new StringRep();
183 }
|
184 kumpf 1.39 }
|
185 tony 1.66
|
186 mike 1.27
|
187 kumpf 1.39 String::String(const String& str, Uint32 n)
188 {
|
189 kumpf 1.43 _rep = new StringRep;
|
190 kumpf 1.55 assign(str.getChar16Data(), n);
|
191 kumpf 1.39 }
192
193 String::String(const Char16* str)
194 {
|
195 kumpf 1.43 _rep = new StringRep(str);
|
196 mike 1.27 }
197
|
198 kumpf 1.39 String::String(const Char16* str, Uint32 n)
199 {
|
200 kumpf 1.43 _rep = new StringRep;
|
201 kumpf 1.39 assign(str, n);
202 }
203
204 String::String(const char* str)
|
205 mike 1.27 {
|
206 kumpf 1.43 _rep = new StringRep;
|
207 kumpf 1.39 assign(str);
|
208 mike 1.27 }
209
|
210 david 1.69 String::String(const char* str, const char* utfFlag)
211 {
212 _rep = new StringRep;
213
214 if(!memcmp(utfFlag,STRING_FLAG_UTF8,sizeof(STRING_FLAG_UTF8)))
215 {
216 assignUTF8(str);
217 }
218 else
219 {
220 assign(str);
221 }
222 }
223
|
224 kumpf 1.39 String::String(const char* str, Uint32 n)
|
225 mike 1.27 {
|
226 kumpf 1.43 _rep = new StringRep;
|
227 kumpf 1.39 assign(str, n);
228 }
|
229 mike 1.27
|
230 kumpf 1.39 String::~String()
231 {
|
232 kumpf 1.43 delete _rep;
|
233 mike 1.27 }
234
|
235 kumpf 1.39 String& String::operator=(const String& str)
|
236 mike 1.27 {
|
237 kumpf 1.39 return assign(str);
|
238 mike 1.27 }
239
|
240 kumpf 1.39 String& String::assign(const String& str)
|
241 mike 1.27 {
|
242 kumpf 1.43 _rep->c16a = str._rep->c16a;
|
243 kumpf 1.39 return *this;
|
244 mike 1.27 }
245
|
246 kumpf 1.39 String& String::assign(const Char16* str)
|
247 mike 1.27 {
|
248 kumpf 1.43 _rep->c16a.clear();
249 _rep->c16a.append(str, _StrLen(str) + 1);
|
250 mike 1.27 return *this;
251 }
252
253 String& String::assign(const Char16* str, Uint32 n)
254 {
|
255 kumpf 1.43 _rep->c16a.clear();
|
256 kumpf 1.38 Uint32 m = _strnlen(str, n);
|
257 kumpf 1.43 _rep->c16a.append(str, m);
258 _rep->c16a.append('\0');
|
259 mike 1.27 return *this;
260 }
261
|
262 kumpf 1.39 String& String::assign(const char* str)
|
263 mike 1.27 {
|
264 kumpf 1.43 _rep->c16a.clear();
|
265 kumpf 1.38
|
266 kumpf 1.39 Uint32 n = strlen(str) + 1;
|
267 kumpf 1.45 _rep->c16a.reserveCapacity(n);
|
268 mike 1.27
269 while (n--)
|
270 kumpf 1.67 _rep->c16a.append(Uint8(*str++));
|
271 mike 1.27
272 return *this;
273 }
274
|
275 kumpf 1.39 String& String::assign(const char* str, Uint32 n)
|
276 mike 1.27 {
|
277 kumpf 1.43 _rep->c16a.clear();
|
278 mike 1.27
|
279 kumpf 1.39 Uint32 _n = _strnlen(str, n);
|
280 kumpf 1.45 _rep->c16a.reserveCapacity(_n + 1);
|
281 mike 1.27
|
282 kumpf 1.39 while (_n--)
|
283 kumpf 1.67 _rep->c16a.append(Uint8(*str++));
|
284 mike 1.27
|
285 kumpf 1.43 _rep->c16a.append('\0');
|
286 mike 1.27
287 return *this;
288 }
289
|
290 kumpf 1.39 void String::clear()
291 {
|
292 kumpf 1.43 _rep->c16a.clear();
293 _rep->c16a.append('\0');
|
294 kumpf 1.39 }
295
|
296 kumpf 1.43 void String::reserveCapacity(Uint32 capacity)
|
297 kumpf 1.39 {
|
298 kumpf 1.45 _rep->c16a.reserveCapacity(capacity + 1);
|
299 kumpf 1.39 }
300
301 Uint32 String::size() const
302 {
|
303 kumpf 1.43 return _rep->c16a.size() - 1;
|
304 kumpf 1.39 }
305
|
306 kumpf 1.55 const Char16* String::getChar16Data() const
|
307 kumpf 1.39 {
|
308 kumpf 1.43 return _rep->c16a.getData();
|
309 kumpf 1.39 }
310
|
311 kumpf 1.54 CString String::getCString() const
|
312 mike 1.27 {
313 Uint32 n = size() + 1;
|
314 kumpf 1.54 char* str = new char[n];
|
315 mike 1.27 char* p = str;
|
316 kumpf 1.55 const Char16* q = getChar16Data();
|
317 mike 1.27
318 for (Uint32 i = 0; i < n; i++)
319 {
320 Uint16 c = *q++;
321 *p++ = char(c);
322
|
323 kumpf 1.54 //if (c & 0xff00)
324 // truncatedCharacters = true;
|
325 mike 1.27 }
326
|
327 kumpf 1.54 return CString(str);
|
328 kumpf 1.49 }
329
|
330 kumpf 1.53 Char16& String::operator[](Uint32 index)
|
331 mike 1.27 {
|
332 kumpf 1.53 if (index > size())
|
333 kumpf 1.49 throw IndexOutOfBoundsException();
|
334 mike 1.27
|
335 kumpf 1.53 return _rep->c16a[index];
|
336 mike 1.27 }
337
|
338 kumpf 1.53 const Char16 String::operator[](Uint32 index) const
|
339 mike 1.27 {
|
340 kumpf 1.53 if (index > size())
|
341 kumpf 1.49 throw IndexOutOfBoundsException();
|
342 mike 1.27
|
343 kumpf 1.53 return _rep->c16a[index];
|
344 mike 1.27 }
345
|
346 kumpf 1.39 String& String::append(const Char16& c)
347 {
|
348 kumpf 1.43 _rep->c16a.insert(_rep->c16a.size() - 1, c);
|
349 kumpf 1.39 return *this;
350 }
351
|
352 mike 1.27 String& String::append(const Char16* str, Uint32 n)
353 {
|
354 kumpf 1.38 Uint32 m = _strnlen(str, n);
|
355 kumpf 1.45 _rep->c16a.reserveCapacity(_rep->c16a.size() + m);
|
356 kumpf 1.43 _rep->c16a.remove(_rep->c16a.size() - 1);
357 _rep->c16a.append(str, m);
358 _rep->c16a.append('\0');
|
359 mike 1.27 return *this;
360 }
361
|
362 kumpf 1.39 String& String::append(const String& str)
|
363 mike 1.27 {
|
364 kumpf 1.55 return append(str.getChar16Data(), str.size());
|
365 mike 1.27 }
366
|
367 kumpf 1.53 void String::remove(Uint32 index, Uint32 size)
|
368 mike 1.27 {
|
369 kumpf 1.39 if (size == PEG_NOT_FOUND)
|
370 kumpf 1.53 size = this->size() - index;
|
371 mike 1.27
|
372 kumpf 1.53 if (index + size > this->size())
|
373 kumpf 1.49 throw IndexOutOfBoundsException();
|
374 mike 1.27
|
375 kumpf 1.39 if (size)
|
376 kumpf 1.53 _rep->c16a.remove(index, size);
|
377 mike 1.27 }
378
|
379 kumpf 1.53 String String::subString(Uint32 index, Uint32 length) const
|
380 mike 1.27 {
|
381 kumpf 1.53 if (index < size())
|
382 mike 1.27 {
|
383 kumpf 1.57 if ((length == PEG_NOT_FOUND) || (length > size() - index))
|
384 kumpf 1.53 length = size() - index;
|
385 mike 1.27
|
386 kumpf 1.55 return String(getChar16Data() + index, length);
|
387 mike 1.27 }
388 else
389 return String();
390 }
391
392 Uint32 String::find(Char16 c) const
393 {
|
394 kumpf 1.55 const Char16* first = getChar16Data();
|
395 mike 1.27
396 for (const Char16* p = first; *p; p++)
397 {
398 if (*p == c)
399 return p - first;
400 }
401
402 return PEG_NOT_FOUND;
403 }
404
|
405 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
|
406 mike 1.30 {
|
407 kumpf 1.55 const Char16* data = getChar16Data();
|
408 mike 1.30
|
409 kumpf 1.53 for (Uint32 i = index, n = size(); i < n; i++)
|
410 mike 1.30 {
411 if (data[i] == c)
412 return i;
413 }
414
415 return PEG_NOT_FOUND;
416 }
417
|
418 mike 1.27 Uint32 String::find(const String& s) const
419 {
|
420 kumpf 1.55 const Char16* pSubStr = s.getChar16Data();
421 const Char16* pStr = getChar16Data();
|
422 mike 1.27 Uint32 subStrLen = s.size();
423 Uint32 strLen = size();
424
|
425 mike 1.30 if (subStrLen > strLen)
426 {
427 return PEG_NOT_FOUND;
428 }
429
|
430 mike 1.27 // loop to find first char match
431 Uint32 loc = 0;
432 for( ; loc <= (strLen-subStrLen); loc++)
433 {
434 if (*pStr++ == *pSubStr) // match first char
435 {
436 // point to substr 2nd char
437 const Char16* p = pSubStr + 1;
438
439 // Test remaining chars for equal
440 Uint32 i = 1;
441 for (; i < subStrLen; i++)
442 if (*pStr++ != *p++ )
443 {pStr--; break;} // break from loop
444 if (i == subStrLen)
445 return loc;
446 }
447 }
448 return PEG_NOT_FOUND;
449 }
450
451 mike 1.27 Uint32 String::reverseFind(Char16 c) const
452 {
|
453 kumpf 1.55 const Char16* first = getChar16Data();
454 const Char16* last = getChar16Data() + size();
|
455 mike 1.27
456 while (last != first)
457 {
458 if (*--last == c)
459 return last - first;
460 }
461
462 return PEG_NOT_FOUND;
463 }
464
|
465 kumpf 1.62 // ATTN-RK-P3-20020509: Define case-sensitivity for non-English characters
|
466 mike 1.27 void String::toLower()
467 {
|
468 david 1.69 #ifdef PEGASUS_HAS_ICU
469 Char16* utf16str;
470 UnicodeString UniStr((const UChar *)_rep->c16a.getData(), (int32_t)size());
|
471 david 1.77 UniStr.append((UChar)'\0');
|
472 david 1.69 UniStr = UniStr.toLower();
473 utf16str = (Char16 *)UniStr.getTerminatedBuffer();
474 assign(utf16str);
|
475 humberto 1.76 // DEVELOPER NOTE: do not delete utf16str, this is handled by ICU
476
|
477 david 1.69 #else
|
478 kumpf 1.43 for (Char16* p = &_rep->c16a[0]; *p; p++)
|
479 mike 1.27 {
|
480 kumpf 1.46 if (*p <= PEGASUS_MAX_PRINTABLE_CHAR)
|
481 mike 1.27 *p = tolower(*p);
482 }
|
483 david 1.69 #endif
|
484 kumpf 1.39 }
485
|
486 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
|
487 kumpf 1.39 {
|
488 kumpf 1.55 const Char16* s1c16 = s1.getChar16Data();
489 const Char16* s2c16 = s2.getChar16Data();
|
490 kumpf 1.39
491 while (n--)
|
492 mike 1.27 {
|
493 kumpf 1.43 int r = *s1c16++ - *s2c16++;
|
494 mike 1.27
495 if (r)
496 return r;
497 }
498
499 return 0;
500 }
501
|
502 kumpf 1.43 int String::compare(const String& s1, const String& s2)
|
503 mike 1.30 {
|
504 kumpf 1.55 const Char16* s1c16 = s1.getChar16Data();
505 const Char16* s2c16 = s2.getChar16Data();
|
506 kumpf 1.43
507 while (*s1c16 && *s2c16)
|
508 mike 1.30 {
|
509 kumpf 1.43 int r = *s1c16++ - *s2c16++;
|
510 mike 1.30
511 if (r)
512 return r;
513 }
514
|
515 kumpf 1.43 if (*s2c16)
|
516 mike 1.30 return -1;
|
517 kumpf 1.43 else if (*s1c16)
|
518 mike 1.30 return 1;
519
520 return 0;
521 }
522
|
523 kumpf 1.40 int String::compareNoCase(const String& s1, const String& s2)
524 {
|
525 david 1.69 #ifdef PEGASUS_HAS_ICU
526 UnicodeString UniStr1((const UChar *)s1.getChar16Data(), (int32_t)s1.size());
527 UnicodeString UniStr2((const UChar *)s2.getChar16Data(), (int32_t)s2.size());
528 UniStr1 = UniStr1.toLower();
529 UniStr2 = UniStr2.toLower();
530 return (UniStr2.compare(UniStr1));
531 #else
|
532 kumpf 1.55 const Char16* _s1 = s1.getChar16Data();
533 const Char16* _s2 = s2.getChar16Data();
|
534 kumpf 1.40
535 while (*_s1 && *_s2)
536 {
537 int r;
538
|
539 kumpf 1.46 if (*_s1 <= PEGASUS_MAX_PRINTABLE_CHAR &&
540 *_s2 <= PEGASUS_MAX_PRINTABLE_CHAR)
|
541 kumpf 1.40 {
542 r = tolower(*_s1++) - tolower(*_s2++);
543 }
544 else
545 {
546 r = *_s1++ - *_s2++;
547 }
548
549 if (r)
550 return r;
551 }
552
553 if (*_s2)
554 return -1;
555 else if (*_s1)
556 return 1;
557
558 return 0;
|
559 david 1.69 #endif
|
560 kumpf 1.40 }
561
|
562 kumpf 1.39 Boolean String::equal(const String& str1, const String& str2)
|
563 mike 1.27 {
|
564 kumpf 1.43 return String::compare(str1, str2) == 0;
|
565 mike 1.27 }
566
|
567 kumpf 1.39 Boolean String::equalNoCase(const String& str1, const String& str2)
|
568 mike 1.27 {
|
569 david 1.69 #ifdef PEGASUS_HAS_ICU
570 UnicodeString UniStr1((const UChar *)str1.getChar16Data(), (int32_t)str1.size());
571 UnicodeString UniStr2((const UChar *)str2.getChar16Data(), (int32_t)str2.size());
572 UniStr1 = UniStr1.toLower();
573 UniStr2 = UniStr2.toLower();
574 return (UniStr1 == UniStr2);
575 #else
|
576 kumpf 1.39 if (str1.size() != str2.size())
577 return false;
578
|
579 kumpf 1.55 const Char16* p = str1.getChar16Data();
580 const Char16* q = str2.getChar16Data();
|
581 kumpf 1.39
582 Uint32 n = str1.size();
|
583 mike 1.27
|
584 kumpf 1.39 while (n--)
585 {
|
586 kumpf 1.46 if (*p <= PEGASUS_MAX_PRINTABLE_CHAR &&
587 *q <= PEGASUS_MAX_PRINTABLE_CHAR)
|
588 kumpf 1.39 {
589 if (tolower(*p++) != tolower(*q++))
590 return false;
591 }
592 else if (*p++ != *q++)
593 return false;
594 }
|
595 mike 1.28
|
596 kumpf 1.39 return true;
|
597 david 1.69 #endif
598 }
599
600 // UTF8 specific code:
601 String& String::assignUTF8(const char* str)
602 {
603 _rep->c16a.clear();
604 Uint32 n = strlen(str) + 1;
605
606 const Uint8 *strsrc = (Uint8 *)str;
607 Uint8 *endsrc = (Uint8 *)&str[n-1];
608
609 Char16 *msg16 = new Char16[n];
610 Uint16 *strtgt = (Uint16 *)msg16;
611 Uint16 *endtgt = (Uint16 *)&msg16[n];
612
613 UTF8toUTF16(&strsrc,
614 endsrc,
615 &strtgt,
616 endtgt);
617
618 david 1.69 Uint32 count;
619
620 for(count = 0; ((msg16[count]) != Char16(0x00)) && (count <= n); ++count);
621
622 _rep->c16a.append(msg16, count);
623
624 _rep->c16a.append('\0');
625
626 delete [] msg16;
627
628 return *this;
|
629 mike 1.27 }
630
|
631 david 1.69 CString String::getCStringUTF8() const
632 {
|
633 david 1.71 Uint32 n = 3*size();
|
634 david 1.69 char* str = new char[n];
635
636 const Char16* msg16 = getChar16Data();
637
638 const Uint16 *strsrc = (Uint16 *)msg16;
|
639 david 1.71 Uint16 *endsrc = (Uint16 *)&msg16[size()+1];
|
640 david 1.69
641 Uint8 *strtgt = (Uint8 *)str;
642 Uint8 *endtgt = (Uint8 *)&str[n];
643
644 UTF16toUTF8 (&strsrc,
645 endsrc,
646 &strtgt,
647 endtgt);
648
|
649 david 1.71 char* str1 = new char[strlen(str)+1];
650 strcpy(str1,str);
|
651 david 1.72 delete [] str;
|
652 david 1.71
653 return CString(str1);
|
654 david 1.69 }
655
656 Boolean String::isUTF8(const char *legal)
657 {
|
658 kumpf 1.73 char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1;
659
660 // Validate that the string is long enough to hold all the expected bytes.
661 // Note that if legal[0] == 0, numBytes will be 1.
|
662 chuck 1.74 for (char i=1; i<numBytes; i++)
|
663 kumpf 1.73 {
664 if (legal[i] == 0)
665 {
666 return false;
667 }
668 }
669
670 return (isValid_U8((const Uint8 *)legal, numBytes));
|
671 david 1.69 }
|
672 kumpf 1.42
|
673 kumpf 1.65 #if 0
|
674 kumpf 1.42 // ATTN-RK-P3-20020603: This code is not completely correct
|
675 karl 1.36 // Wildcard String matching function that may be useful in the future
676 // The following code was provided by Bob Blair.
677
678 /* _StringMatch Match input MatchString against a GLOB style pattern
679 Note that MatchChar is the char type so that this source
680 in portable to different string types. This is an internal function
681
682 Results: The return value is 1 if string matches pattern, and
683 0 otherwise. The matching operation permits the following
684 special characters in the pattern: *?\[] (see the manual
685 entry for details on what these mean).
686
687 Side effects: None.
688 */
689
690 /* MatchChar defined as a separate entity because this function source used
691 elsewhere was an unsigned char *. Here we use Uint16 to maintain 16 bit
692 size.
693 */
694 typedef Uint16 MatchChar;
695
696 karl 1.36 inline Uint16 _ToLower(Uint16 ch)
697 {
|
698 david 1.69 // ICU_TODO: If ICU is available we should do this the correct way.
|
699 kumpf 1.46 return ch <= PEGASUS_MAX_PRINTABLE_CHAR ? tolower(char(ch)) : ch;
|
700 karl 1.36 }
701
702 inline Boolean _Equal(MatchChar ch1, MatchChar ch2, int nocase)
703 {
|
704 david 1.69 // ICU_TODO: If ICU is available we should do this the correct way.
|
705 karl 1.36 if (nocase)
706 return _ToLower(ch1) == _ToLower(ch2);
707 else
708 return ch1 == ch2;
709 }
|
710 mike 1.28
|
711 kumpf 1.35
|
712 karl 1.36 static const MatchChar *
713 _matchrange(const MatchChar *range, MatchChar c, int nocase)
714 {
715 const MatchChar *p = range;
716 const MatchChar *rstart = range + 1;
717 const MatchChar *rend = 0;
718 MatchChar compchar;
719
|
720 kumpf 1.35 for (rend = rstart; *rend && *rend != ']'; rend++);
|
721 karl 1.36 if (*rend == ']') { // if there is an end to this pattern
|
722 kumpf 1.35 for (compchar = *rstart; rstart != rend; rstart++) {
|
723 karl 1.36 if (_Equal(*rstart, c, nocase))
|
724 kumpf 1.35 return ++rend;
725 if (*rstart == '-') {
726 rstart++;
727 if (c >= compchar && c <= *rstart)
728 return ++rend;
729 }
730 }
731 }
|
732 karl 1.36 return (const MatchChar *)0;
|
733 kumpf 1.35 }
734
735 static int
|
736 karl 1.36 _StringMatch(
737 const MatchChar *testString,
738 const MatchChar *pattern,
739 int nocase ) /* Ignore case if this is true */
740 {
741 const MatchChar *pat = pattern;
742 const MatchChar *str = testString;
|
743 kumpf 1.35 unsigned int done = 0;
744 unsigned int res = 0; // the result: 1 == match
745
746 while (!done) { // main loop walks through pattern and test string
747 //cerr << "Comparing <" << *pat << "> and <" << *str << ">" << endl;
748 if (!*pat) { //end of pattern
749 done = 1; // we're done
750 if (!*str) //end of test, too?
751 res = 1; // then we matched
752 } else { //Not end of pattern
753 if (!*str) { // but end of test
754 done = 1; // We're done
755 if (*pat == '*') // If pattern openends
756 res = 1; // then we matched
757 } else { //Not end of test
758 if (*pat == '*') { //Ambiguuity found
759 if (!*++pat) { //and it ends pattern
760 done = 1; // then we're done
761 res = 1; // and match
762 } else { //if it doesn't end
763 while (!done) { // until we're done
|
764 karl 1.36 if (_StringMatch(str, pat, nocase)) { // we recurse
|
765 kumpf 1.35 done = 1; //if it recurses true
766 res = 1; // we done and match
767 } else { //it recurses false
768 if (!*str) // see if test is done
769 done = 1; // yes: we done
770 else // not done:
771 str++; // keep testing
772 } // end test on recursive call
773 } // end looping on recursive calls
774 } // end logic when pattern is ambiguous
775 } else { //pattern not ambiguus
776 if (*pat == '?') { //pattern is 'any'
777 pat++, str++; // so move along
778 } else if (*pat == '[') { //see if it's a range
|
779 karl 1.36 pat = _matchrange(pat, *str, nocase); // and is a match
|
780 kumpf 1.35 if (!pat) { //It is not a match
781 done = 1; // we're done
|
782 kumpf 1.42 res = 0; // no match
|
783 kumpf 1.35 } else { //Range matches
784 str++, pat++; // keep going
785 }
786 } else { // only case left is individual characters
|
787 karl 1.36 if (!_Equal(*pat++, *str++, nocase)) // if they don't match
|
788 kumpf 1.35 done = 1; // bail.
789 }
790 } // end ("pattern is not ambiguous (*)" logic
791 } // end logic when pattern and string still have data
792 } // end logic when pattern still has data
793 } // end main loop
794 return res;
795 }
796
|
797 kumpf 1.39
|
798 kumpf 1.65 /** match matches a string against a GLOB style pattern.
799 Return trues if the String parameter matches the pattern. C-Shell style
800 glob matching is used.
801 @param str String to be matched against the pattern
802 @param pattern Pattern to use in the match
803 @return Boolean true if str matches pattern
804 The pattern definition is as follows:
805 <pre>
806 * Matches any number of any characters
807 ? Match exactly one character
808 [chars] Match any character in chars
809 [chara-charb] Match any character in the range between chara and charb
810 </pre>
811 The literal characters *, ?, [, ] can be included in a string by
812 escaping them with backslash "\". Ranges of characters can be concatenated.
813 <pre>
814 examples:
815 Boolean result = String::match("This is a test", "*is*");
816 Boolean works = String::match("abcdef123", "*[0-9]");
817 </pre>
818 */
|
819 karl 1.36 Boolean String::match(const String& str, const String& pattern)
820 {
821 return _StringMatch(
|
822 kumpf 1.55 (Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 0) != 0;
|
823 karl 1.36 }
824
|
825 kumpf 1.65 /** matchNoCase Matches a String against a GLOB style pattern independent
826 of case.
827 Returns true if the str parameter matches the pattern. C-Shell style
828 glob matching is used. Ignore case in all comparisons. Case is
829 ignored in the match.
830 @parm str String containing the string to be matched\
831 @parm pattern GLOB style patterh to use in the match.
832 @return Boolean true if str matches patterh
833 @SeeAlso match
834 */
|
835 karl 1.36 Boolean String::matchNoCase(const String& str, const String& pattern)
836 {
837 return _StringMatch(
|
838 kumpf 1.55 (Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 1) != 0;
|
839 kumpf 1.39 }
|
840 kumpf 1.65 #endif
|
841 kumpf 1.39
842
843 ///////////////////////////////////////////////////////////////////////////////
844 //
845 // String-related functions
846 //
847 ///////////////////////////////////////////////////////////////////////////////
848
849 Boolean operator==(const String& str1, const String& str2)
850 {
851 return String::equal(str1, str2);
852 }
853
854 Boolean operator==(const String& str1, const char* str2)
855 {
856 return String::equal(str1, str2);
857 }
858
859 Boolean operator==(const char* str1, const String& str2)
860 {
861 return String::equal(str1, str2);
862 kumpf 1.39 }
863
864 Boolean operator!=(const String& str1, const String& str2)
865 {
866 return !String::equal(str1, str2);
867 }
868
|
869 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
|
870 kumpf 1.39 {
|
871 david 1.72
|
872 david 1.69 #if defined(PEGASUS_OS_OS400)
873 CString cstr = str.getCStringUTF8();
874 const char* utf8str = cstr;
875
876 os << utf8str;
877
|
878 humberto 1.76 #elif defined(PEGASUS_HAS_ICU)
879
|
880 david 1.69 char *buf = NULL;
|
881 humberto 1.76 const int size = str.size() * 6;
|
882 david 1.69 UnicodeString UniStr((const UChar *)str.getChar16Data(), (int32_t)str.size());
|
883 humberto 1.76 Uint32 bufsize = UniStr.extract(0,size,buf);
884
|
885 david 1.69 buf = new char[bufsize+1];
886 UniStr.extract(0,bufsize,buf);
887 os << buf;
|
888 humberto 1.75 os.flush();
|
889 david 1.72 delete [] buf;
|
890 humberto 1.76
|
891 david 1.69 #else
892
|
893 gerarda 1.68
|
894 kumpf 1.47 for (Uint32 i = 0, n = str.size(); i < n; i++)
|
895 kumpf 1.50 {
896 Uint16 code = str[i];
897
898 if (code > 0 && code <= PEGASUS_MAX_PRINTABLE_CHAR)
899 {
900 os << char(code);
901 }
902 else
903 {
904 // Print in hex format:
905 char buffer[8];
906 sprintf(buffer, "\\x%04X", code);
907 os << buffer;
908 }
909 }
|
910 david 1.69 #endif // End of PEGASUS_HAS_ICU #else leg.
|
911 kumpf 1.39
912 return os;
913 }
914
915 String operator+(const String& str1, const String& str2)
916 {
917 return String(str1).append(str2);
918 }
919
920 Boolean operator<(const String& str1, const String& str2)
921 {
|
922 kumpf 1.43 return String::compare(str1, str2) < 0;
|
923 kumpf 1.39 }
924
925 Boolean operator<=(const String& str1, const String& str2)
926 {
|
927 kumpf 1.43 return String::compare(str1, str2) <= 0;
|
928 kumpf 1.39 }
929
930 Boolean operator>(const String& str1, const String& str2)
931 {
|
932 kumpf 1.43 return String::compare(str1, str2) > 0;
|
933 kumpf 1.39 }
934
935 Boolean operator>=(const String& str1, const String& str2)
936 {
|
937 kumpf 1.43 return String::compare(str1, str2) >= 0;
|
938 kumpf 1.39 }
939
|
940 kumpf 1.61 #ifndef PEGASUS_REMOVE_DEPRECATED
|
941 kumpf 1.39 int CompareNoCase(const char* s1, const char* s2)
942 {
|
943 kumpf 1.60 return System::strcasecmp(s1, s2);
|
944 kumpf 1.39 }
|
945 kumpf 1.60 #endif
|
946 kumpf 1.39
|
947 mike 1.27 PEGASUS_NAMESPACE_END
|