1 mike 1.27 //%/////////////////////////////////////////////////////////////////////////////
2 //
|
3 kumpf 1.41 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM,
4 // The Open Group, Tivoli Systems
|
5 mike 1.27 //
6 // Permission is hereby granted, free of charge, to any person obtaining a copy
|
7 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
8 // deal in the Software without restriction, including without limitation the
9 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
10 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is
11 // furnished to do so, subject to the following conditions:
12 //
|
13 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
|
14 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
15 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
|
16 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
17 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
18 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
19 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 //
22 //==============================================================================
23 //
24 // Author: Mike Brasher (mbrasher@bmc.com)
25 //
|
26 kumpf 1.39 // Modified By: Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
|
27 mike 1.27 //
28 //%/////////////////////////////////////////////////////////////////////////////
29
30
31 #include <cctype>
|
32 kumpf 1.64 #include <cstring>
|
33 mike 1.27 #include "String.h"
|
34 kumpf 1.43 #include "Array.h"
|
35 kumpf 1.48 #include "InternalException.h"
|
36 mike 1.27 #include <iostream>
|
37 kumpf 1.63 #include <fstream>
|
38 kumpf 1.61 #ifndef PEGASUS_REMOVE_DEPRECATED
|
39 kumpf 1.60 #include "System.h"
40 #endif
|
41 mike 1.27
|
42 david 1.69 #include "CommonUTF.h"
43
44 #ifdef PEGASUS_HAS_ICU
|
45 chuck 1.74 #include <unicode/unistr.h>
|
46 david 1.69 #endif
47
|
48 mike 1.28 PEGASUS_USING_STD;
49
|
50 mike 1.27 PEGASUS_NAMESPACE_BEGIN
51
|
52 kumpf 1.39 ///////////////////////////////////////////////////////////////////////////////
53 //
|
54 kumpf 1.54 // CString
55 //
56 ///////////////////////////////////////////////////////////////////////////////
57
58 CString::CString()
59 : _rep(0)
60 {
61 }
62
63 CString::CString(const CString& cstr)
64 {
|
65 kumpf 1.59 _rep = (void*)new char[strlen((char*)cstr._rep)+1];
66 strcpy((char*)_rep, (char*)cstr._rep);
|
67 kumpf 1.54 }
68
69 CString::CString(char* cstr)
70 : _rep(cstr)
71 {
72 }
73
74 CString::~CString()
75 {
76 if (_rep)
|
77 kumpf 1.59 delete [] (char*)_rep;
|
78 kumpf 1.54 }
79
|
80 kumpf 1.56 CString& CString::operator=(const CString& cstr)
81 {
|
82 kumpf 1.59 _rep = (char*)new char[strlen((char*)cstr._rep)+1];
83 strcpy((char*)_rep, (char*)cstr._rep);
|
84 kumpf 1.56 return *this;
85 }
86
|
87 kumpf 1.54 CString::operator const char*() const
88 {
|
89 kumpf 1.59 return (char*)_rep;
|
90 kumpf 1.54 }
91
92 ///////////////////////////////////////////////////////////////////////////////
93 //
|
94 kumpf 1.39 // String
95 //
96 ///////////////////////////////////////////////////////////////////////////////
97
|
98 kumpf 1.37 const String String::EMPTY = String();
|
99 mike 1.27
|
100 kumpf 1.38 Uint32 _strnlen(const char* str, Uint32 n)
101 {
102 if (!str)
103 throw NullPointer();
104
105 for (Uint32 i=0; i<n; i++)
106 {
107 if (!*str)
108 {
109 return i;
110 }
111 }
112
113 return n;
114 }
115
116 Uint32 _strnlen(const Char16* str, Uint32 n)
117 {
118 if (!str)
119 throw NullPointer();
120
121 kumpf 1.38 for (Uint32 i=0; i<n; i++)
122 {
123 if (!*str)
124 {
125 return i;
126 }
127 }
128
129 return n;
130 }
131
|
132 kumpf 1.39 inline Uint32 _StrLen(const char* str)
|
133 mike 1.27 {
134 if (!str)
135 throw NullPointer();
136
137 return strlen(str);
138 }
139
|
140 kumpf 1.39 inline Uint32 _StrLen(const Char16* str)
|
141 mike 1.27 {
142 if (!str)
143 throw NullPointer();
144
145 Uint32 n = 0;
146
147 while (*str++)
148 n++;
149
150 return n;
151 }
152
|
153 kumpf 1.43 class StringRep
154 {
155 public:
156 StringRep()
157 {}
158 StringRep(const StringRep& r)
159 : c16a(r.c16a)
160 {}
161 StringRep(const Char16* str)
162 : c16a(str, _StrLen(str) + 1)
163 {}
164
165 Array<Char16> c16a;
166 };
167
|
168 mike 1.27 String::String()
169 {
|
170 kumpf 1.43 _rep = new StringRep;
171 _rep->c16a.append('\0');
|
172 mike 1.27 }
173
|
174 kumpf 1.39 String::String(const String& str)
|
175 mike 1.27 {
|
176 tony 1.66 if (str._rep != NULL)
177 {
|
178 kumpf 1.43 _rep = new StringRep(*str._rep);
|
179 tony 1.66 }
180 else
181 {
182 _rep = new StringRep();
183 }
|
184 kumpf 1.39 }
|
185 tony 1.66
|
186 mike 1.27
|
187 kumpf 1.39 String::String(const String& str, Uint32 n)
188 {
|
189 kumpf 1.43 _rep = new StringRep;
|
190 kumpf 1.55 assign(str.getChar16Data(), n);
|
191 kumpf 1.39 }
192
193 String::String(const Char16* str)
194 {
|
195 kumpf 1.43 _rep = new StringRep(str);
|
196 mike 1.27 }
197
|
198 kumpf 1.39 String::String(const Char16* str, Uint32 n)
199 {
|
200 kumpf 1.43 _rep = new StringRep;
|
201 kumpf 1.39 assign(str, n);
202 }
203
204 String::String(const char* str)
|
205 mike 1.27 {
|
206 kumpf 1.43 _rep = new StringRep;
|
207 kumpf 1.39 assign(str);
|
208 mike 1.27 }
209
|
210 david 1.69 String::String(const char* str, const char* utfFlag)
211 {
212 _rep = new StringRep;
213
214 if(!memcmp(utfFlag,STRING_FLAG_UTF8,sizeof(STRING_FLAG_UTF8)))
215 {
216 assignUTF8(str);
217 }
218 else
219 {
220 assign(str);
221 }
222 }
223
|
224 kumpf 1.39 String::String(const char* str, Uint32 n)
|
225 mike 1.27 {
|
226 kumpf 1.43 _rep = new StringRep;
|
227 kumpf 1.39 assign(str, n);
228 }
|
229 mike 1.27
|
230 kumpf 1.39 String::~String()
231 {
|
232 kumpf 1.43 delete _rep;
|
233 mike 1.27 }
234
|
235 kumpf 1.39 String& String::operator=(const String& str)
|
236 mike 1.27 {
|
237 kumpf 1.39 return assign(str);
|
238 mike 1.27 }
239
|
240 kumpf 1.39 String& String::assign(const String& str)
|
241 mike 1.27 {
|
242 kumpf 1.43 _rep->c16a = str._rep->c16a;
|
243 kumpf 1.39 return *this;
|
244 mike 1.27 }
245
|
246 kumpf 1.39 String& String::assign(const Char16* str)
|
247 mike 1.27 {
|
248 kumpf 1.43 _rep->c16a.clear();
249 _rep->c16a.append(str, _StrLen(str) + 1);
|
250 mike 1.27 return *this;
251 }
252
253 String& String::assign(const Char16* str, Uint32 n)
254 {
|
255 kumpf 1.43 _rep->c16a.clear();
|
256 kumpf 1.38 Uint32 m = _strnlen(str, n);
|
257 kumpf 1.43 _rep->c16a.append(str, m);
258 _rep->c16a.append('\0');
|
259 mike 1.27 return *this;
260 }
261
|
262 kumpf 1.39 String& String::assign(const char* str)
|
263 mike 1.27 {
|
264 kumpf 1.43 _rep->c16a.clear();
|
265 kumpf 1.38
|
266 kumpf 1.39 Uint32 n = strlen(str) + 1;
|
267 kumpf 1.45 _rep->c16a.reserveCapacity(n);
|
268 mike 1.27
269 while (n--)
|
270 kumpf 1.67 _rep->c16a.append(Uint8(*str++));
|
271 mike 1.27
272 return *this;
273 }
274
|
275 kumpf 1.39 String& String::assign(const char* str, Uint32 n)
|
276 mike 1.27 {
|
277 kumpf 1.43 _rep->c16a.clear();
|
278 mike 1.27
|
279 kumpf 1.39 Uint32 _n = _strnlen(str, n);
|
280 kumpf 1.45 _rep->c16a.reserveCapacity(_n + 1);
|
281 mike 1.27
|
282 kumpf 1.39 while (_n--)
|
283 kumpf 1.67 _rep->c16a.append(Uint8(*str++));
|
284 mike 1.27
|
285 kumpf 1.43 _rep->c16a.append('\0');
|
286 mike 1.27
287 return *this;
288 }
289
|
290 kumpf 1.39 void String::clear()
291 {
|
292 kumpf 1.43 _rep->c16a.clear();
293 _rep->c16a.append('\0');
|
294 kumpf 1.39 }
295
|
296 kumpf 1.43 void String::reserveCapacity(Uint32 capacity)
|
297 kumpf 1.39 {
|
298 kumpf 1.45 _rep->c16a.reserveCapacity(capacity + 1);
|
299 kumpf 1.39 }
300
301 Uint32 String::size() const
302 {
|
303 kumpf 1.43 return _rep->c16a.size() - 1;
|
304 kumpf 1.39 }
305
|
306 kumpf 1.55 const Char16* String::getChar16Data() const
|
307 kumpf 1.39 {
|
308 kumpf 1.43 return _rep->c16a.getData();
|
309 kumpf 1.39 }
310
|
311 kumpf 1.54 CString String::getCString() const
|
312 mike 1.27 {
313 Uint32 n = size() + 1;
|
314 kumpf 1.54 char* str = new char[n];
|
315 mike 1.27 char* p = str;
|
316 kumpf 1.55 const Char16* q = getChar16Data();
|
317 mike 1.27
318 for (Uint32 i = 0; i < n; i++)
319 {
320 Uint16 c = *q++;
321 *p++ = char(c);
322
|
323 kumpf 1.54 //if (c & 0xff00)
324 // truncatedCharacters = true;
|
325 mike 1.27 }
326
|
327 kumpf 1.54 return CString(str);
|
328 kumpf 1.49 }
329
|
330 kumpf 1.53 Char16& String::operator[](Uint32 index)
|
331 mike 1.27 {
|
332 kumpf 1.53 if (index > size())
|
333 kumpf 1.49 throw IndexOutOfBoundsException();
|
334 mike 1.27
|
335 kumpf 1.53 return _rep->c16a[index];
|
336 mike 1.27 }
337
|
338 kumpf 1.53 const Char16 String::operator[](Uint32 index) const
|
339 mike 1.27 {
|
340 kumpf 1.53 if (index > size())
|
341 kumpf 1.49 throw IndexOutOfBoundsException();
|
342 mike 1.27
|
343 kumpf 1.53 return _rep->c16a[index];
|
344 mike 1.27 }
345
|
346 kumpf 1.39 String& String::append(const Char16& c)
347 {
|
348 kumpf 1.43 _rep->c16a.insert(_rep->c16a.size() - 1, c);
|
349 kumpf 1.39 return *this;
350 }
351
|
352 mike 1.27 String& String::append(const Char16* str, Uint32 n)
353 {
|
354 kumpf 1.38 Uint32 m = _strnlen(str, n);
|
355 kumpf 1.45 _rep->c16a.reserveCapacity(_rep->c16a.size() + m);
|
356 kumpf 1.43 _rep->c16a.remove(_rep->c16a.size() - 1);
357 _rep->c16a.append(str, m);
358 _rep->c16a.append('\0');
|
359 mike 1.27 return *this;
360 }
361
|
362 kumpf 1.39 String& String::append(const String& str)
|
363 mike 1.27 {
|
364 kumpf 1.55 return append(str.getChar16Data(), str.size());
|
365 mike 1.27 }
366
|
367 kumpf 1.53 void String::remove(Uint32 index, Uint32 size)
|
368 mike 1.27 {
|
369 kumpf 1.39 if (size == PEG_NOT_FOUND)
|
370 kumpf 1.53 size = this->size() - index;
|
371 mike 1.27
|
372 kumpf 1.53 if (index + size > this->size())
|
373 kumpf 1.49 throw IndexOutOfBoundsException();
|
374 mike 1.27
|
375 kumpf 1.39 if (size)
|
376 kumpf 1.53 _rep->c16a.remove(index, size);
|
377 mike 1.27 }
378
|
379 kumpf 1.53 String String::subString(Uint32 index, Uint32 length) const
|
380 mike 1.27 {
|
381 kumpf 1.53 if (index < size())
|
382 mike 1.27 {
|
383 kumpf 1.57 if ((length == PEG_NOT_FOUND) || (length > size() - index))
|
384 kumpf 1.53 length = size() - index;
|
385 mike 1.27
|
386 kumpf 1.55 return String(getChar16Data() + index, length);
|
387 mike 1.27 }
388 else
389 return String();
390 }
391
392 Uint32 String::find(Char16 c) const
393 {
|
394 kumpf 1.55 const Char16* first = getChar16Data();
|
395 mike 1.27
396 for (const Char16* p = first; *p; p++)
397 {
398 if (*p == c)
399 return p - first;
400 }
401
402 return PEG_NOT_FOUND;
403 }
404
|
405 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
|
406 mike 1.30 {
|
407 kumpf 1.55 const Char16* data = getChar16Data();
|
408 mike 1.30
|
409 kumpf 1.53 for (Uint32 i = index, n = size(); i < n; i++)
|
410 mike 1.30 {
411 if (data[i] == c)
412 return i;
413 }
414
415 return PEG_NOT_FOUND;
416 }
417
|
418 mike 1.27 Uint32 String::find(const String& s) const
419 {
|
420 kumpf 1.55 const Char16* pSubStr = s.getChar16Data();
421 const Char16* pStr = getChar16Data();
|
422 mike 1.27 Uint32 subStrLen = s.size();
423 Uint32 strLen = size();
424
|
425 mike 1.30 if (subStrLen > strLen)
426 {
427 return PEG_NOT_FOUND;
428 }
429
|
430 mike 1.27 // loop to find first char match
431 Uint32 loc = 0;
432 for( ; loc <= (strLen-subStrLen); loc++)
433 {
434 if (*pStr++ == *pSubStr) // match first char
435 {
436 // point to substr 2nd char
437 const Char16* p = pSubStr + 1;
438
439 // Test remaining chars for equal
440 Uint32 i = 1;
441 for (; i < subStrLen; i++)
442 if (*pStr++ != *p++ )
443 {pStr--; break;} // break from loop
444 if (i == subStrLen)
445 return loc;
446 }
447 }
448 return PEG_NOT_FOUND;
449 }
450
451 mike 1.27 Uint32 String::reverseFind(Char16 c) const
452 {
|
453 kumpf 1.55 const Char16* first = getChar16Data();
454 const Char16* last = getChar16Data() + size();
|
455 mike 1.27
456 while (last != first)
457 {
458 if (*--last == c)
459 return last - first;
460 }
461
462 return PEG_NOT_FOUND;
463 }
464
|
465 kumpf 1.62 // ATTN-RK-P3-20020509: Define case-sensitivity for non-English characters
|
466 mike 1.27 void String::toLower()
467 {
|
468 david 1.69 #ifdef PEGASUS_HAS_ICU
469 Char16* utf16str;
470 UnicodeString UniStr((const UChar *)_rep->c16a.getData(), (int32_t)size());
471 UniStr = UniStr.toLower();
472 utf16str = (Char16 *)UniStr.getTerminatedBuffer();
473 assign(utf16str);
|
474 humberto 1.75 //delete utf16str;
|
475 david 1.69 #else
|
476 kumpf 1.43 for (Char16* p = &_rep->c16a[0]; *p; p++)
|
477 mike 1.27 {
|
478 kumpf 1.46 if (*p <= PEGASUS_MAX_PRINTABLE_CHAR)
|
479 mike 1.27 *p = tolower(*p);
480 }
|
481 david 1.69 #endif
|
482 kumpf 1.39 }
483
|
484 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
|
485 kumpf 1.39 {
|
486 kumpf 1.55 const Char16* s1c16 = s1.getChar16Data();
487 const Char16* s2c16 = s2.getChar16Data();
|
488 kumpf 1.39
489 while (n--)
|
490 mike 1.27 {
|
491 kumpf 1.43 int r = *s1c16++ - *s2c16++;
|
492 mike 1.27
493 if (r)
494 return r;
495 }
496
497 return 0;
498 }
499
|
500 kumpf 1.43 int String::compare(const String& s1, const String& s2)
|
501 mike 1.30 {
|
502 kumpf 1.55 const Char16* s1c16 = s1.getChar16Data();
503 const Char16* s2c16 = s2.getChar16Data();
|
504 kumpf 1.43
505 while (*s1c16 && *s2c16)
|
506 mike 1.30 {
|
507 kumpf 1.43 int r = *s1c16++ - *s2c16++;
|
508 mike 1.30
509 if (r)
510 return r;
511 }
512
|
513 kumpf 1.43 if (*s2c16)
|
514 mike 1.30 return -1;
|
515 kumpf 1.43 else if (*s1c16)
|
516 mike 1.30 return 1;
517
518 return 0;
519 }
520
|
521 kumpf 1.40 int String::compareNoCase(const String& s1, const String& s2)
522 {
|
523 david 1.69 #ifdef PEGASUS_HAS_ICU
524 UnicodeString UniStr1((const UChar *)s1.getChar16Data(), (int32_t)s1.size());
525 UnicodeString UniStr2((const UChar *)s2.getChar16Data(), (int32_t)s2.size());
526 UniStr1 = UniStr1.toLower();
527 UniStr2 = UniStr2.toLower();
528 return (UniStr2.compare(UniStr1));
529 #else
|
530 kumpf 1.55 const Char16* _s1 = s1.getChar16Data();
531 const Char16* _s2 = s2.getChar16Data();
|
532 kumpf 1.40
533 while (*_s1 && *_s2)
534 {
535 int r;
536
|
537 kumpf 1.46 if (*_s1 <= PEGASUS_MAX_PRINTABLE_CHAR &&
538 *_s2 <= PEGASUS_MAX_PRINTABLE_CHAR)
|
539 kumpf 1.40 {
540 r = tolower(*_s1++) - tolower(*_s2++);
541 }
542 else
543 {
544 r = *_s1++ - *_s2++;
545 }
546
547 if (r)
548 return r;
549 }
550
551 if (*_s2)
552 return -1;
553 else if (*_s1)
554 return 1;
555
556 return 0;
|
557 david 1.69 #endif
|
558 kumpf 1.40 }
559
|
560 kumpf 1.39 Boolean String::equal(const String& str1, const String& str2)
|
561 mike 1.27 {
|
562 kumpf 1.43 return String::compare(str1, str2) == 0;
|
563 mike 1.27 }
564
|
565 kumpf 1.39 Boolean String::equalNoCase(const String& str1, const String& str2)
|
566 mike 1.27 {
|
567 david 1.69 #ifdef PEGASUS_HAS_ICU
568 UnicodeString UniStr1((const UChar *)str1.getChar16Data(), (int32_t)str1.size());
569 UnicodeString UniStr2((const UChar *)str2.getChar16Data(), (int32_t)str2.size());
570 UniStr1 = UniStr1.toLower();
571 UniStr2 = UniStr2.toLower();
572 return (UniStr1 == UniStr2);
573 #else
|
574 kumpf 1.39 if (str1.size() != str2.size())
575 return false;
576
|
577 kumpf 1.55 const Char16* p = str1.getChar16Data();
578 const Char16* q = str2.getChar16Data();
|
579 kumpf 1.39
580 Uint32 n = str1.size();
|
581 mike 1.27
|
582 kumpf 1.39 while (n--)
583 {
|
584 kumpf 1.46 if (*p <= PEGASUS_MAX_PRINTABLE_CHAR &&
585 *q <= PEGASUS_MAX_PRINTABLE_CHAR)
|
586 kumpf 1.39 {
587 if (tolower(*p++) != tolower(*q++))
588 return false;
589 }
590 else if (*p++ != *q++)
591 return false;
592 }
|
593 mike 1.28
|
594 kumpf 1.39 return true;
|
595 david 1.69 #endif
596 }
597
598 // UTF8 specific code:
599 String& String::assignUTF8(const char* str)
600 {
601 _rep->c16a.clear();
602 Uint32 n = strlen(str) + 1;
603
604 const Uint8 *strsrc = (Uint8 *)str;
605 Uint8 *endsrc = (Uint8 *)&str[n-1];
606
607 Char16 *msg16 = new Char16[n];
608 Uint16 *strtgt = (Uint16 *)msg16;
609 Uint16 *endtgt = (Uint16 *)&msg16[n];
610
611 UTF8toUTF16(&strsrc,
612 endsrc,
613 &strtgt,
614 endtgt);
615
616 david 1.69 Uint32 count;
617
618 for(count = 0; ((msg16[count]) != Char16(0x00)) && (count <= n); ++count);
619
620 _rep->c16a.append(msg16, count);
621
622 _rep->c16a.append('\0');
623
624 delete [] msg16;
625
626 return *this;
|
627 mike 1.27 }
628
|
629 david 1.69 CString String::getCStringUTF8() const
630 {
|
631 david 1.71 Uint32 n = 3*size();
|
632 david 1.69 char* str = new char[n];
633
634 const Char16* msg16 = getChar16Data();
635
636 const Uint16 *strsrc = (Uint16 *)msg16;
|
637 david 1.71 Uint16 *endsrc = (Uint16 *)&msg16[size()+1];
|
638 david 1.69
639 Uint8 *strtgt = (Uint8 *)str;
640 Uint8 *endtgt = (Uint8 *)&str[n];
641
642 UTF16toUTF8 (&strsrc,
643 endsrc,
644 &strtgt,
645 endtgt);
646
|
647 david 1.71 char* str1 = new char[strlen(str)+1];
648 strcpy(str1,str);
|
649 david 1.72 delete [] str;
|
650 david 1.71
651 return CString(str1);
|
652 david 1.69 }
653
654 Boolean String::isUTF8(const char *legal)
655 {
|
656 kumpf 1.73 char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1;
657
658 // Validate that the string is long enough to hold all the expected bytes.
659 // Note that if legal[0] == 0, numBytes will be 1.
|
660 chuck 1.74 for (char i=1; i<numBytes; i++)
|
661 kumpf 1.73 {
662 if (legal[i] == 0)
663 {
664 return false;
665 }
666 }
667
668 return (isValid_U8((const Uint8 *)legal, numBytes));
|
669 david 1.69 }
|
670 kumpf 1.42
|
671 kumpf 1.65 #if 0
|
672 kumpf 1.42 // ATTN-RK-P3-20020603: This code is not completely correct
|
673 karl 1.36 // Wildcard String matching function that may be useful in the future
674 // The following code was provided by Bob Blair.
675
676 /* _StringMatch Match input MatchString against a GLOB style pattern
677 Note that MatchChar is the char type so that this source
678 in portable to different string types. This is an internal function
679
680 Results: The return value is 1 if string matches pattern, and
681 0 otherwise. The matching operation permits the following
682 special characters in the pattern: *?\[] (see the manual
683 entry for details on what these mean).
684
685 Side effects: None.
686 */
687
688 /* MatchChar defined as a separate entity because this function source used
689 elsewhere was an unsigned char *. Here we use Uint16 to maintain 16 bit
690 size.
691 */
692 typedef Uint16 MatchChar;
693
694 karl 1.36 inline Uint16 _ToLower(Uint16 ch)
695 {
|
696 david 1.69 // ICU_TODO: If ICU is available we should do this the correct way.
|
697 kumpf 1.46 return ch <= PEGASUS_MAX_PRINTABLE_CHAR ? tolower(char(ch)) : ch;
|
698 karl 1.36 }
699
700 inline Boolean _Equal(MatchChar ch1, MatchChar ch2, int nocase)
701 {
|
702 david 1.69 // ICU_TODO: If ICU is available we should do this the correct way.
|
703 karl 1.36 if (nocase)
704 return _ToLower(ch1) == _ToLower(ch2);
705 else
706 return ch1 == ch2;
707 }
|
708 mike 1.28
|
709 kumpf 1.35
|
710 karl 1.36 static const MatchChar *
711 _matchrange(const MatchChar *range, MatchChar c, int nocase)
712 {
713 const MatchChar *p = range;
714 const MatchChar *rstart = range + 1;
715 const MatchChar *rend = 0;
716 MatchChar compchar;
717
|
718 kumpf 1.35 for (rend = rstart; *rend && *rend != ']'; rend++);
|
719 karl 1.36 if (*rend == ']') { // if there is an end to this pattern
|
720 kumpf 1.35 for (compchar = *rstart; rstart != rend; rstart++) {
|
721 karl 1.36 if (_Equal(*rstart, c, nocase))
|
722 kumpf 1.35 return ++rend;
723 if (*rstart == '-') {
724 rstart++;
725 if (c >= compchar && c <= *rstart)
726 return ++rend;
727 }
728 }
729 }
|
730 karl 1.36 return (const MatchChar *)0;
|
731 kumpf 1.35 }
732
733 static int
|
734 karl 1.36 _StringMatch(
735 const MatchChar *testString,
736 const MatchChar *pattern,
737 int nocase ) /* Ignore case if this is true */
738 {
739 const MatchChar *pat = pattern;
740 const MatchChar *str = testString;
|
741 kumpf 1.35 unsigned int done = 0;
742 unsigned int res = 0; // the result: 1 == match
743
744 while (!done) { // main loop walks through pattern and test string
745 //cerr << "Comparing <" << *pat << "> and <" << *str << ">" << endl;
746 if (!*pat) { //end of pattern
747 done = 1; // we're done
748 if (!*str) //end of test, too?
749 res = 1; // then we matched
750 } else { //Not end of pattern
751 if (!*str) { // but end of test
752 done = 1; // We're done
753 if (*pat == '*') // If pattern openends
754 res = 1; // then we matched
755 } else { //Not end of test
756 if (*pat == '*') { //Ambiguuity found
757 if (!*++pat) { //and it ends pattern
758 done = 1; // then we're done
759 res = 1; // and match
760 } else { //if it doesn't end
761 while (!done) { // until we're done
|
762 karl 1.36 if (_StringMatch(str, pat, nocase)) { // we recurse
|
763 kumpf 1.35 done = 1; //if it recurses true
764 res = 1; // we done and match
765 } else { //it recurses false
766 if (!*str) // see if test is done
767 done = 1; // yes: we done
768 else // not done:
769 str++; // keep testing
770 } // end test on recursive call
771 } // end looping on recursive calls
772 } // end logic when pattern is ambiguous
773 } else { //pattern not ambiguus
774 if (*pat == '?') { //pattern is 'any'
775 pat++, str++; // so move along
776 } else if (*pat == '[') { //see if it's a range
|
777 karl 1.36 pat = _matchrange(pat, *str, nocase); // and is a match
|
778 kumpf 1.35 if (!pat) { //It is not a match
779 done = 1; // we're done
|
780 kumpf 1.42 res = 0; // no match
|
781 kumpf 1.35 } else { //Range matches
782 str++, pat++; // keep going
783 }
784 } else { // only case left is individual characters
|
785 karl 1.36 if (!_Equal(*pat++, *str++, nocase)) // if they don't match
|
786 kumpf 1.35 done = 1; // bail.
787 }
788 } // end ("pattern is not ambiguous (*)" logic
789 } // end logic when pattern and string still have data
790 } // end logic when pattern still has data
791 } // end main loop
792 return res;
793 }
794
|
795 kumpf 1.39
|
796 kumpf 1.65 /** match matches a string against a GLOB style pattern.
797 Return trues if the String parameter matches the pattern. C-Shell style
798 glob matching is used.
799 @param str String to be matched against the pattern
800 @param pattern Pattern to use in the match
801 @return Boolean true if str matches pattern
802 The pattern definition is as follows:
803 <pre>
804 * Matches any number of any characters
805 ? Match exactly one character
806 [chars] Match any character in chars
807 [chara-charb] Match any character in the range between chara and charb
808 </pre>
809 The literal characters *, ?, [, ] can be included in a string by
810 escaping them with backslash "\". Ranges of characters can be concatenated.
811 <pre>
812 examples:
813 Boolean result = String::match("This is a test", "*is*");
814 Boolean works = String::match("abcdef123", "*[0-9]");
815 </pre>
816 */
|
817 karl 1.36 Boolean String::match(const String& str, const String& pattern)
818 {
819 return _StringMatch(
|
820 kumpf 1.55 (Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 0) != 0;
|
821 karl 1.36 }
822
|
823 kumpf 1.65 /** matchNoCase Matches a String against a GLOB style pattern independent
824 of case.
825 Returns true if the str parameter matches the pattern. C-Shell style
826 glob matching is used. Ignore case in all comparisons. Case is
827 ignored in the match.
828 @parm str String containing the string to be matched\
829 @parm pattern GLOB style patterh to use in the match.
830 @return Boolean true if str matches patterh
831 @SeeAlso match
832 */
|
833 karl 1.36 Boolean String::matchNoCase(const String& str, const String& pattern)
834 {
835 return _StringMatch(
|
836 kumpf 1.55 (Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 1) != 0;
|
837 kumpf 1.39 }
|
838 kumpf 1.65 #endif
|
839 kumpf 1.39
840
841 ///////////////////////////////////////////////////////////////////////////////
842 //
843 // String-related functions
844 //
845 ///////////////////////////////////////////////////////////////////////////////
846
847 Boolean operator==(const String& str1, const String& str2)
848 {
849 return String::equal(str1, str2);
850 }
851
852 Boolean operator==(const String& str1, const char* str2)
853 {
854 return String::equal(str1, str2);
855 }
856
857 Boolean operator==(const char* str1, const String& str2)
858 {
859 return String::equal(str1, str2);
860 kumpf 1.39 }
861
862 Boolean operator!=(const String& str1, const String& str2)
863 {
864 return !String::equal(str1, str2);
865 }
866
|
867 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
|
868 kumpf 1.39 {
|
869 david 1.72
|
870 david 1.69 #if defined(PEGASUS_OS_OS400)
871 CString cstr = str.getCStringUTF8();
872 const char* utf8str = cstr;
873
874 os << utf8str;
875
|
876 humberto 1.75 /*#elif defined(PEGASUS_HAS_ICU)
877
|
878 david 1.69 char *buf = NULL;
879 UnicodeString UniStr((const UChar *)str.getChar16Data(), (int32_t)str.size());
880
881 Uint32 bufsize = UniStr.extract(0,0,buf);
882 buf = new char[bufsize+1];
883 UniStr.extract(0,bufsize,buf);
|
884 humberto 1.75 cout << "strlen(buf) == " << strlen(buf) << endl;
|
885 david 1.69 os << buf;
|
886 humberto 1.75 os.flush();
|
887 david 1.72 delete [] buf;
|
888 humberto 1.75 */
|
889 david 1.69 #else
890
|
891 gerarda 1.68
|
892 kumpf 1.47 for (Uint32 i = 0, n = str.size(); i < n; i++)
|
893 kumpf 1.50 {
894 Uint16 code = str[i];
895
896 if (code > 0 && code <= PEGASUS_MAX_PRINTABLE_CHAR)
897 {
898 os << char(code);
899 }
900 else
901 {
902 // Print in hex format:
903 char buffer[8];
904 sprintf(buffer, "\\x%04X", code);
905 os << buffer;
906 }
907 }
|
908 david 1.69 #endif // End of PEGASUS_HAS_ICU #else leg.
|
909 kumpf 1.39
910 return os;
911 }
912
913 String operator+(const String& str1, const String& str2)
914 {
915 return String(str1).append(str2);
916 }
917
918 Boolean operator<(const String& str1, const String& str2)
919 {
|
920 kumpf 1.43 return String::compare(str1, str2) < 0;
|
921 kumpf 1.39 }
922
923 Boolean operator<=(const String& str1, const String& str2)
924 {
|
925 kumpf 1.43 return String::compare(str1, str2) <= 0;
|
926 kumpf 1.39 }
927
928 Boolean operator>(const String& str1, const String& str2)
929 {
|
930 kumpf 1.43 return String::compare(str1, str2) > 0;
|
931 kumpf 1.39 }
932
933 Boolean operator>=(const String& str1, const String& str2)
934 {
|
935 kumpf 1.43 return String::compare(str1, str2) >= 0;
|
936 kumpf 1.39 }
937
|
938 kumpf 1.61 #ifndef PEGASUS_REMOVE_DEPRECATED
|
939 kumpf 1.39 int CompareNoCase(const char* s1, const char* s2)
940 {
|
941 kumpf 1.60 return System::strcasecmp(s1, s2);
|
942 kumpf 1.39 }
|
943 kumpf 1.60 #endif
|
944 kumpf 1.39
|
945 mike 1.27 PEGASUS_NAMESPACE_END
|