pegasus/src/Pegasus/Common/String.cpp - annotate

Return to String.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

1 karl 1.98 //%2005////////////////////////////////////////////////////////////////////////
2 mike 1.27 //
3 karl 1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development 4 // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems. 5 // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
6 karl 1.85 // IBM Corp.; EMC Corporation, The Open Group.
7 karl 1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.; 8 // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
9 karl 1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.; 10 // EMC Corporation; VERITAS Software Corporation; The Open Group.
11 mike 1.27 // 12 // Permission is hereby granted, free of charge, to any person obtaining a copy
13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to 14 // deal in the Software without restriction, including without limitation the 15 // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
16 mike 1.27 // sell copies of the Software, and to permit persons to whom the Software is 17 // furnished to do so, subject to the following conditions:
18 david.dillard 1.105 //
19 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
20 mike 1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED 21 // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
22 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR 23 // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 24 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
25 mike 1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 26 // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 27 // 28 //============================================================================== 29 // 30 // Author: Mike Brasher (mbrasher@bmc.com) 31 //
32 kumpf 1.39 // Modified By: Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
33 joyce.j 1.101 // Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
34 mike 1.27 // 35 //%///////////////////////////////////////////////////////////////////////////// 36 37 38 #include <cctype>
39 kumpf 1.64 #include <cstring>
40 mike 1.27 #include "String.h"
41 kumpf 1.43 #include "Array.h"
42 chuck 1.103 #include "AutoPtr.h"
43 kumpf 1.48 #include "InternalException.h"
44 mike 1.27 #include <iostream>
45 kumpf 1.63 #include <fstream>
46 mike 1.27
47 david 1.69 #include "CommonUTF.h" 48 49 #ifdef PEGASUS_HAS_ICU
50 chuck 1.99 #include <unicode/ustring.h> 51 #include <unicode/uchar.h>
52 david 1.69 #endif 53
54 mike 1.28 PEGASUS_USING_STD; 55
56 mike 1.27 PEGASUS_NAMESPACE_BEGIN 57
58 kumpf 1.39 /////////////////////////////////////////////////////////////////////////////// 59 //
60 kumpf 1.54 // CString 61 // 62 /////////////////////////////////////////////////////////////////////////////// 63 64 CString::CString() 65 : _rep(0) 66 { 67 } 68 69 CString::CString(const CString& cstr) 70 {
71 kumpf 1.82 _rep = 0; 72 73 if (cstr._rep) 74 { 75 _rep = (void)new char[strlen((char)cstr._rep)+1]; 76 strcpy((char)_rep, (char)cstr._rep); 77 }
78 kumpf 1.54 } 79 80 CString::CString(char* cstr) 81 : _rep(cstr) 82 { 83 } 84 85 CString::~CString() 86 { 87 if (_rep)
88 kumpf 1.82 {
89 kumpf 1.59 delete [] (char*)_rep;
90 kumpf 1.82 }
91 kumpf 1.54 } 92
93 kumpf 1.56 CString& CString::operator=(const CString& cstr) 94 {
95 kumpf 1.82 if (&cstr != this)
96 kumpf 1.81 {
97 kumpf 1.82 if (_rep) 98 { 99 delete [] (char)_rep; 100 _rep = 0; 101 } 102 if (cstr._rep) 103 { 104 _rep = (char)new char[strlen((char)cstr._rep)+1]; 105 strcpy((char)_rep, (char*)cstr._rep); 106 }
107 kumpf 1.81 }
108 kumpf 1.56 return *this; 109 } 110
111 kumpf 1.54 CString::operator const char*() const 112 {
113 kumpf 1.59 return (char*)_rep;
114 kumpf 1.54 } 115 116 /////////////////////////////////////////////////////////////////////////////// 117 //
118 kumpf 1.39 // String 119 // 120 /////////////////////////////////////////////////////////////////////////////// 121
122 kumpf 1.37 const String String::EMPTY = String();
123 mike 1.27
124 kumpf 1.39 inline Uint32 _StrLen(const Char16* str)
125 mike 1.27 { 126 if (!str)
127 david.dillard 1.105 throw NullPointer();
128 mike 1.27 129 Uint32 n = 0; 130 131 while (*str++)
132 david.dillard 1.105 n++;
133 mike 1.27 134 return n; 135 } 136
137 chuck 1.102 // 138 // Converts a utf-8 char buffer to utf-16 and appends the utf-16 to the Array. 139 // n is the length of the input char , if stopAtTerm is 0 140 // A terminator character is appended to the end. 141 // Note that each input char is converted individually, which gives 142 // the fastest performance. 143 // 144 void _convertAndAppend(const char str, Array<Char16>& c16a, Uint32 n, Uint8 stopAtTerm) 145 { 146 Uint32 i = 0; 147 while ((stopAtTerm && str) \|\| (!stopAtTerm && i < n)) 148 { 149 if ((Uint8*)str <= 0x7f)
150 david.dillard 1.105 {
151 chuck 1.102 // Current byte sequence is in the us-ascii range. 152 c16a.append(Uint8(str++)); 153 } 154 else 155 { 156 // 157 // Current byte sequence is not in the us-ascii range. 158 // 159 160 // Check if the byte sequence is valid utf-8, and if so, 161 // call the converter to utf-16 162 Uint16 tgt[3]; 163 tgt[1] = 0; 164 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(str); 165 if ( (!stopAtTerm && i + c >= n) \|\| 166 (!isValid_U8((const Uint8 *)str, c+1)) ) 167 { 168 // Note about error conditions.
169 david.dillard 1.105 // It is possible that the last utf-8 char before the
170 chuck 1.102 // end of input string extends past the end of the input string. 171 // This is caught in both cases - 172 // If counting up to n, then the test above catches it. 173 // If converting until terminator found, then a terminator 174 // in the middle of a multi-byte utf-8 char is invalid. 175 MessageLoaderParms parms("Common.String.BAD_UTF8", 176 "The byte sequence starting at index $0 is not valid UTF-8 encoding.", 177 i); 178 throw Exception(parms); 179 } 180 else 181 { 182 // str is incremented by this call to the start of the next char 183 Uint16 * tgtBuf = tgt;
184 david.dillard 1.105 UTF8toUTF16((const Uint8 *)&str, (Uint8 )&str[c+1], &tgtBuf, &tgtBuf[2]);
185 chuck 1.102 c16a.append(tgt[0]); 186 if (tgt[1])
187 david.dillard 1.105 {
188 chuck 1.102 // Its a utf-16 surrogate pair (uses 2 Char16's) 189 c16a.append(tgt[1]); 190 }
191 david.dillard 1.105
192 chuck 1.102 // bump by the trailing byte count 193 i += c; 194 } 195 } 196 197 i++; 198 } // end while 199 200 c16a.append('\0'); 201 } 202
203 kumpf 1.43 class StringRep 204 { 205 public: 206 StringRep() 207 {} 208 StringRep(const StringRep& r) 209 : c16a(r.c16a) 210 {} 211 StringRep(const Char16* str) 212 : c16a(str, _StrLen(str) + 1) 213 {} 214 215 Array<Char16> c16a; 216 }; 217
218 mike 1.27 String::String() 219 {
220 kumpf 1.43 _rep = new StringRep; 221 _rep->c16a.append('\0');
222 mike 1.27 } 223
224 kumpf 1.39 String::String(const String& str)
225 mike 1.27 {
226 tony 1.66 if (str._rep != NULL) 227 {
228 kumpf 1.43 _rep = new StringRep(*str._rep);
229 tony 1.66 } 230 else 231 { 232 _rep = new StringRep(); 233 }
234 kumpf 1.39 }
235 tony 1.66
236 mike 1.27
237 kumpf 1.39 String::String(const String& str, Uint32 n) 238 {
239 kumpf 1.43 _rep = new StringRep;
240 kumpf 1.55 assign(str.getChar16Data(), n);
241 kumpf 1.39 } 242 243 String::String(const Char16* str) 244 {
245 david.dillard 1.105 if ( str == 0 ) 246 { 247 throw NullPointer(); 248 } 249
250 kumpf 1.43 _rep = new StringRep(str);
251 mike 1.27 } 252
253 kumpf 1.39 String::String(const Char16* str, Uint32 n) 254 {
255 david.dillard 1.105 if ( str == 0 ) 256 { 257 throw NullPointer(); 258 } 259
260 kumpf 1.43 _rep = new StringRep;
261 kumpf 1.39 assign(str, n); 262 } 263 264 String::String(const char* str)
265 mike 1.27 {
266 david.dillard 1.105 if ( str == 0 ) 267 { 268 throw NullPointer(); 269 } 270
271 kumpf 1.43 _rep = new StringRep;
272 chuck 1.103 AutoPtr<StringRep> tempRep(_rep); 273 // An exception can be thrown, so use a temp AutoPtr.
274 chuck 1.102 _convertAndAppend(str, _rep->c16a, 0, 1);
275 chuck 1.103 tempRep.release();
276 mike 1.27 } 277
278 kumpf 1.39 String::String(const char* str, Uint32 n)
279 mike 1.27 {
280 david.dillard 1.105 if ( str == 0 ) 281 { 282 throw NullPointer(); 283 } 284
285 kumpf 1.43 _rep = new StringRep;
286 chuck 1.103 AutoPtr<StringRep> tempRep(_rep); 287 // An exception can be thrown, so use a temp AutoPtr.
288 chuck 1.102 _convertAndAppend(str, _rep->c16a, n, 0);
289 chuck 1.103 tempRep.release();
290 kumpf 1.39 }
291 mike 1.27
292 kumpf 1.39 String::~String() 293 {
294 kumpf 1.43 delete _rep;
295 mike 1.27 } 296
297 kumpf 1.39 String& String::operator=(const String& str)
298 mike 1.27 {
299 kumpf 1.82 if (&str != this) 300 { 301 assign(str); 302 } 303 return *this;
304 mike 1.27 } 305
306 kumpf 1.39 String& String::assign(const String& str)
307 mike 1.27 {
308 kumpf 1.43 _rep->c16a = str._rep->c16a;
309 kumpf 1.39 return *this;
310 mike 1.27 } 311
312 kumpf 1.39 String& String::assign(const Char16* str)
313 mike 1.27 {
314 david.dillard 1.105 if ( str == 0 ) 315 { 316 throw NullPointer(); 317 } 318
319 kumpf 1.43 _rep->c16a.clear(); 320 _rep->c16a.append(str, _StrLen(str) + 1);
321 mike 1.27 return this; 322 } 323 324 String& String::assign(const Char16 str, Uint32 n) 325 {
326 david.dillard 1.105 if ( str == 0 ) 327 { 328 throw NullPointer(); 329 } 330
331 kumpf 1.43 _rep->c16a.clear();
332 joyce.j 1.101 _rep->c16a.append(str, n);
333 kumpf 1.43 _rep->c16a.append('\0');
334 mike 1.27 return *this; 335 } 336
337 chuck 1.102 String& String::assign(const char* str) 338 {
339 david.dillard 1.105 if ( str == 0 ) 340 { 341 throw NullPointer(); 342 } 343
344 chuck 1.102 _rep->c16a.clear(); 345 _convertAndAppend(str, _rep->c16a, 0, 1); 346 return *this; 347 } 348
349 kumpf 1.39 String& String::assign(const char* str, Uint32 n)
350 mike 1.27 {
351 david.dillard 1.105 if ( str == 0 ) 352 { 353 throw NullPointer(); 354 } 355
356 chuck 1.102 _rep->c16a.clear(); 357 _convertAndAppend(str, _rep->c16a, n, 0);
358 mike 1.27 return *this; 359 } 360
361 kumpf 1.39 void String::clear() 362 {
363 kumpf 1.43 _rep->c16a.clear(); 364 _rep->c16a.append('\0');
365 kumpf 1.39 } 366
367 kumpf 1.43 void String::reserveCapacity(Uint32 capacity)
368 kumpf 1.39 {
369 kumpf 1.45 _rep->c16a.reserveCapacity(capacity + 1);
370 kumpf 1.39 } 371 372 Uint32 String::size() const 373 {
374 gs.keenan 1.106 #if defined (PEGASUS_OS_VMS) 375 // 376 // This prevents returning a minus number. 377 // 378 // Seems as though the first time through 379 // the XML parser something doesn't get 380 // initialized and there is no check for 381 // a negative number in the parser! 382 // 383 Uint32 foo; 384 foo = _rep->c16a.size();
385 gs.keenan 1.107 if (foo == 0)
386 gs.keenan 1.106 { 387 return 0; 388 } 389 else 390 { 391 return (foo -1); 392 } 393 #else
394 kumpf 1.43 return _rep->c16a.size() - 1;
395 gs.keenan 1.106 #endif
396 kumpf 1.39 } 397
398 kumpf 1.55 const Char16* String::getChar16Data() const
399 kumpf 1.39 {
400 kumpf 1.43 return _rep->c16a.getData();
401 kumpf 1.39 } 402
403 kumpf 1.53 Char16& String::operator[](Uint32 index)
404 mike 1.27 {
405 kumpf 1.53 if (index > size())
406 david.dillard 1.105 throw IndexOutOfBoundsException();
407 mike 1.27
408 kumpf 1.53 return _rep->c16a[index];
409 mike 1.27 } 410
411 kumpf 1.53 const Char16 String::operator[](Uint32 index) const
412 mike 1.27 {
413 kumpf 1.53 if (index > size())
414 david.dillard 1.105 throw IndexOutOfBoundsException();
415 mike 1.27
416 kumpf 1.53 return _rep->c16a[index];
417 mike 1.27 } 418
419 kumpf 1.39 String& String::append(const Char16& c) 420 {
421 kumpf 1.43 _rep->c16a.insert(_rep->c16a.size() - 1, c);
422 kumpf 1.39 return *this; 423 } 424
425 mike 1.27 String& String::append(const Char16* str, Uint32 n) 426 {
427 david.dillard 1.105 if (str == 0)
428 joyce.j 1.101 { 429 throw NullPointer(); 430 }
431 david.dillard 1.105
432 joyce.j 1.101 _rep->c16a.reserveCapacity(_rep->c16a.size() + n);
433 kumpf 1.43 _rep->c16a.remove(_rep->c16a.size() - 1);
434 joyce.j 1.101 _rep->c16a.append(str, n);
435 kumpf 1.43 _rep->c16a.append('\0');
436 mike 1.27 return *this; 437 } 438
439 kumpf 1.39 String& String::append(const String& str)
440 mike 1.27 {
441 kumpf 1.55 return append(str.getChar16Data(), str.size());
442 mike 1.27 } 443
444 kumpf 1.53 void String::remove(Uint32 index, Uint32 size)
445 mike 1.27 {
446 kumpf 1.39 if (size == PEG_NOT_FOUND)
447 david.dillard 1.105 size = this->size() - index;
448 mike 1.27
449 kumpf 1.53 if (index + size > this->size())
450 david.dillard 1.105 throw IndexOutOfBoundsException();
451 mike 1.27
452 kumpf 1.39 if (size)
453 david.dillard 1.105 _rep->c16a.remove(index, size);
454 mike 1.27 } 455
456 kumpf 1.53 String String::subString(Uint32 index, Uint32 length) const
457 mike 1.27 {
458 kumpf 1.53 if (index < size())
459 mike 1.27 {
460 david.dillard 1.105 if ((length == PEG_NOT_FOUND) \|\| (length > size() - index)) 461 length = size() - index;
462 mike 1.27
463 david.dillard 1.105 return String(getChar16Data() + index, length);
464 mike 1.27 }
465 david.dillard 1.105 466 return String();
467 mike 1.27 } 468 469 Uint32 String::find(Char16 c) const 470 {
471 kumpf 1.55 const Char16* first = getChar16Data();
472 mike 1.27 473 for (const Char16* p = first; *p; p++) 474 {
475 david.dillard 1.105 if (*p == c) 476 return p - first;
477 mike 1.27 } 478 479 return PEG_NOT_FOUND; 480 } 481
482 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
483 mike 1.30 {
484 kumpf 1.55 const Char16* data = getChar16Data();
485 mike 1.30
486 kumpf 1.53 for (Uint32 i = index, n = size(); i < n; i++)
487 mike 1.30 {
488 david.dillard 1.105 if (data[i] == c) 489 return i;
490 mike 1.30 } 491 492 return PEG_NOT_FOUND; 493 } 494
495 mike 1.27 Uint32 String::find(const String& s) const 496 {
497 kumpf 1.55 const Char16* pSubStr = s.getChar16Data(); 498 const Char16* pStr = getChar16Data();
499 mike 1.27 Uint32 subStrLen = s.size(); 500 Uint32 strLen = size(); 501
502 mike 1.30 if (subStrLen > strLen) 503 { 504 return PEG_NOT_FOUND; 505 } 506
507 mike 1.27 // loop to find first char match 508 Uint32 loc = 0; 509 for( ; loc <= (strLen-subStrLen); loc++) 510 {
511 david.dillard 1.105 if (pStr++ == pSubStr) // match first char 512 { 513 // point to substr 2nd char 514 const Char16* p = pSubStr + 1; 515 516 // Test remaining chars for equal 517 Uint32 i = 1; 518 for (; i < subStrLen; i++) 519 if (pStr++ != p++ ) 520 {pStr-=i; break;} // break from loop 521 if (i == subStrLen) 522 return loc; 523 }
524 mike 1.27 } 525 return PEG_NOT_FOUND; 526 } 527 528 Uint32 String::reverseFind(Char16 c) const 529 {
530 kumpf 1.55 const Char16* first = getChar16Data(); 531 const Char16* last = getChar16Data() + size();
532 mike 1.27 533 while (last != first) 534 {
535 david.dillard 1.105 if (*--last == c) 536 return last - first;
537 mike 1.27 } 538 539 return PEG_NOT_FOUND; 540 } 541 542 void String::toLower() 543 {
544 david 1.69 #ifdef PEGASUS_HAS_ICU
545 chuck 1.99 // This will do a locale-insensitive, but context-sensitive convert.
546 david.dillard 1.105 // Context-sensitive prevents any optimizations that try to
547 chuck 1.99 // convert just the ascii before calling ICU. 548 // The string may shrink or expand after the convert. 549 550 int32_t sz = size(); 551 UChar* destbuf = new UChar[sz + 1]; 552 const UChar* srcbuf = (const UChar *)getChar16Data(); 553 UErrorCode err = U_ZERO_ERROR; 554 555 int32_t needed = u_strToLower(destbuf, sz + 1 , srcbuf, sz, NULL, &err); 556 if (err == U_BUFFER_OVERFLOW_ERROR) 557 { 558 delete [] destbuf; 559 destbuf = new UChar[needed + 1]; 560 err = U_ZERO_ERROR; 561 u_strToLower(destbuf, needed + 1 , srcbuf, sz, NULL, &err); 562 } 563 if (U_FAILURE(err))
564 david 1.90 {
565 chuck 1.99 delete [] destbuf;
566 david.dillard 1.105 throw Exception(u_errorName(err));
567 chuck 1.99 } 568 569 if (needed == sz) 570 { 571 Char16* from = (Char16)destbuf; 572 for (Char16 to = &_rep->c16a[0]; to; to++, from++) 573 { 574 to = *from; 575 }
576 david 1.90 } 577 else 578 {
579 chuck 1.99 assign((Char16 *)destbuf, needed);
580 david 1.90 }
581 chuck 1.99 582 delete [] destbuf;
583 david 1.69 #else
584 kumpf 1.43 for (Char16* p = &_rep->c16a[0]; *p; p++)
585 mike 1.27 {
586 chuck 1.99 if (*p <= PEGASUS_MAX_PRINTABLE_CHAR)
587 david.dillard 1.105 p = tolower(p);
588 mike 1.27 }
589 david 1.69 #endif
590 kumpf 1.39 } 591
592 chuck 1.99 void String::toUpper()
593 david 1.90 { 594 #ifdef PEGASUS_HAS_ICU
595 chuck 1.99 // This will do a locale-insensitive, but context-sensitive convert.
596 david.dillard 1.105 // Context-sensitive prevents any optimizations that try to
597 chuck 1.99 // convert just the ascii before calling ICU. 598 // The string may shrink or expand after the convert. 599 600 int32_t sz = size(); 601 UChar* destbuf = new UChar[sz + 1]; 602 const UChar* srcbuf = (const UChar *)getChar16Data(); 603 UErrorCode err = U_ZERO_ERROR; 604 605 int32_t needed = u_strToUpper(destbuf, sz + 1 , srcbuf, sz, NULL, &err); 606 if (err == U_BUFFER_OVERFLOW_ERROR)
607 david 1.90 {
608 chuck 1.99 delete [] destbuf; 609 destbuf = new UChar[needed + 1]; 610 err = U_ZERO_ERROR; 611 u_strToUpper(destbuf, needed + 1 , srcbuf, sz, NULL, &err); 612 } 613 if (U_FAILURE(err)) 614 { 615 delete [] destbuf;
616 david.dillard 1.105 throw Exception(u_errorName(err));
617 chuck 1.99 } 618 619 if (needed == sz) 620 { 621 Char16* from = (Char16)destbuf; 622 for (Char16 to = &_rep->c16a[0]; to; to++, from++) 623 { 624 to = *from; 625 }
626 david 1.90 } 627 else 628 {
629 chuck 1.99 assign((Char16 *)destbuf, needed);
630 david 1.90 } 631
632 chuck 1.99 delete [] destbuf;
633 david 1.91 #else 634 for (Char16* p = &_rep->c16a[0]; *p; p++) 635 {
636 david.dillard 1.105 if (p <= PEGASUS_MAX_PRINTABLE_CHAR) 637 p = toupper(*p);
638 david 1.91 }
639 david 1.90 #endif 640 } 641
642 kumpf 1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
643 kumpf 1.39 {
644 kumpf 1.55 const Char16* s1c16 = s1.getChar16Data(); 645 const Char16* s2c16 = s2.getChar16Data();
646 kumpf 1.39 647 while (n--)
648 mike 1.27 {
649 david.dillard 1.105 int r = s1c16++ - s2c16++;
650 mike 1.27
651 david.dillard 1.105 if (r) 652 return r;
653 mike 1.27 } 654 655 return 0; 656 } 657
658 kumpf 1.43 int String::compare(const String& s1, const String& s2)
659 mike 1.30 {
660 kumpf 1.55 const Char16* s1c16 = s1.getChar16Data(); 661 const Char16* s2c16 = s2.getChar16Data();
662 kumpf 1.43 663 while (s1c16 && s2c16)
664 mike 1.30 {
665 david.dillard 1.105 int r = s1c16++ - s2c16++;
666 mike 1.30
667 david.dillard 1.105 if (r) 668 return r;
669 mike 1.30 } 670
671 kumpf 1.43 if (*s2c16)
672 david.dillard 1.105 return -1;
673 kumpf 1.43 else if (*s1c16)
674 david.dillard 1.105 return 1;
675 mike 1.30 676 return 0; 677 } 678
679 kumpf 1.40 int String::compareNoCase(const String& s1, const String& s2) 680 {
681 david 1.69 #ifdef PEGASUS_HAS_ICU
682 chuck 1.99 return u_strcasecmp((const UChar)s1.getChar16Data(), 683 (const UChar)s2.getChar16Data(), 684 U_FOLD_CASE_DEFAULT);
685 david 1.69 #else
686 kumpf 1.55 const Char16* _s1 = s1.getChar16Data(); 687 const Char16* _s2 = s2.getChar16Data();
688 kumpf 1.40 689 while (_s1 && _s2) 690 { 691 int r; 692
693 kumpf 1.46 if (_s1 <= PEGASUS_MAX_PRINTABLE_CHAR && 694 _s2 <= PEGASUS_MAX_PRINTABLE_CHAR)
695 kumpf 1.40 { 696 r = tolower(_s1++) - tolower(_s2++); 697 } 698 else 699 { 700 r = _s1++ - _s2++; 701 } 702
703 david.dillard 1.105 if (r) 704 return r;
705 kumpf 1.40 } 706 707 if (*_s2)
708 david.dillard 1.105 return -1;
709 kumpf 1.40 else if (*_s1)
710 david.dillard 1.105 return 1;
711 kumpf 1.40 712 return 0;
713 david 1.69 #endif
714 kumpf 1.40 } 715
716 kumpf 1.39 Boolean String::equal(const String& str1, const String& str2)
717 mike 1.27 {
718 kumpf 1.43 return String::compare(str1, str2) == 0;
719 mike 1.27 } 720
721 kumpf 1.39 Boolean String::equalNoCase(const String& str1, const String& str2)
722 mike 1.27 {
723 david 1.69 #ifdef PEGASUS_HAS_ICU
724 chuck 1.99 return compareNoCase(str1, str2) == 0;
725 david 1.69 #else
726 kumpf 1.39 if (str1.size() != str2.size())
727 david.dillard 1.105 return false;
728 kumpf 1.39
729 kumpf 1.55 const Char16* p = str1.getChar16Data(); 730 const Char16* q = str2.getChar16Data();
731 kumpf 1.39 732 Uint32 n = str1.size();
733 mike 1.27
734 kumpf 1.39 while (n--) 735 {
736 david.dillard 1.105 if (*p <= PEGASUS_MAX_PRINTABLE_CHAR &&
737 kumpf 1.46 *q <= PEGASUS_MAX_PRINTABLE_CHAR)
738 david.dillard 1.105 { 739 if (tolower(p++) != tolower(q++)) 740 return false; 741 } 742 else if (p++ != q++) 743 return false;
744 kumpf 1.39 }
745 mike 1.28
746 kumpf 1.39 return true;
747 david 1.69 #endif 748 } 749
750 mike 1.27
751 david 1.90 CString String::getCString() const
752 david 1.69 {
753 david 1.79 Uint32 n = 3*size() + 1;
754 david 1.69 char* str = new char[n]; 755 756 const Char16* msg16 = getChar16Data(); 757 758 const Uint16 strsrc = (Uint16 )msg16;
759 david 1.71 Uint16 endsrc = (Uint16 )&msg16[size()+1];
760 david 1.69 761 Uint8 strtgt = (Uint8 )str; 762 Uint8 endtgt = (Uint8 )&str[n]; 763 764 UTF16toUTF8 (&strsrc,
765 david.dillard 1.105 endsrc, 766 &strtgt, 767 endtgt); 768 769 char* str1 = new char[strlen(str)+1]; 770 strcpy(str1,str); 771 delete [] str;
772 david 1.71 773 return CString(str1);
774 david 1.69 }
775 kumpf 1.42
776 kumpf 1.65 #if 0
777 kumpf 1.42 // ATTN-RK-P3-20020603: This code is not completely correct
778 karl 1.36 // Wildcard String matching function that may be useful in the future 779 // The following code was provided by Bob Blair. 780 781 /* _StringMatch Match input MatchString against a GLOB style pattern 782 Note that MatchChar is the char type so that this source 783 in portable to different string types. This is an internal function
784 david.dillard 1.105
785 karl 1.36 Results: The return value is 1 if string matches pattern, and
786 david.dillard 1.105 0 otherwise. The matching operation permits the following 787 special characters in the pattern: *?\[] (see the manual 788 entry for details on what these mean). 789
790 chuck 1.78
791 karl 1.36 Side effects: None. 792 */
793 david.dillard 1.105
794 karl 1.36 /* MatchChar defined as a separate entity because this function source used
795 david.dillard 1.105 elsewhere was an unsigned char *. Here we use Uint16 to maintain 16 bit
796 karl 1.36 size. 797 */ 798 typedef Uint16 MatchChar; 799 800 inline Uint16 _ToLower(Uint16 ch) 801 {
802 david 1.69 // ICU_TODO: If ICU is available we should do this the correct way.
803 kumpf 1.46 return ch <= PEGASUS_MAX_PRINTABLE_CHAR ? tolower(char(ch)) : ch;
804 karl 1.36 } 805 806 inline Boolean _Equal(MatchChar ch1, MatchChar ch2, int nocase) 807 {
808 david 1.69 // ICU_TODO: If ICU is available we should do this the correct way.
809 karl 1.36 if (nocase)
810 david.dillard 1.105 return _ToLower(ch1) == _ToLower(ch2); 811 812 return ch1 == ch2;
813 karl 1.36 }
814 mike 1.28
815 kumpf 1.35
816 karl 1.36 static const MatchChar * 817 _matchrange(const MatchChar range, MatchChar c, int nocase) 818 { 819 const MatchChar p = range; 820 const MatchChar rstart = range + 1; 821 const MatchChar rend = 0; 822 MatchChar compchar; 823
824 kumpf 1.35 for (rend = rstart; rend && rend != ']'; rend++);
825 karl 1.36 if (*rend == ']') { // if there is an end to this pattern
826 kumpf 1.35 for (compchar = *rstart; rstart != rend; rstart++) {
827 karl 1.36 if (_Equal(*rstart, c, nocase))
828 kumpf 1.35 return ++rend; 829 if (rstart == '-') { 830 rstart++; 831 if (c >= compchar && c <= rstart) 832 return ++rend; 833 } 834 } 835 }
836 karl 1.36 return (const MatchChar *)0;
837 kumpf 1.35 } 838 839 static int
840 david.dillard 1.105 _StringMatch( 841 const MatchChar *testString,
842 karl 1.36 const MatchChar *pattern,
843 david.dillard 1.105 int nocase ) /* Ignore case if this is true */
844 karl 1.36 { 845 const MatchChar pat = pattern; 846 const MatchChar str = testString;
847 kumpf 1.35 unsigned int done = 0; 848 unsigned int res = 0; // the result: 1 == match 849 850 while (!done) { // main loop walks through pattern and test string 851 //cerr << "Comparing <" << pat << "> and <" << str << ">" << endl; 852 if (!pat) { //end of pattern 853 done = 1; // we're done 854 if (!str) //end of test, too? 855 res = 1; // then we matched 856 } else { //Not end of pattern 857 if (!str) { // but end of test 858 done = 1; // We're done 859 if (pat == '') // If pattern openends 860 res = 1; // then we matched 861 } else { //Not end of test 862 if (pat == '') { //Ambiguuity found 863 if (!++pat) { //and it ends pattern 864 done = 1; // then we're done 865 res = 1; // and match 866 } else { //if it doesn't end 867 while (!done) { // until we're done
868 karl 1.36 if (_StringMatch(str, pat, nocase)) { // we recurse
869 kumpf 1.35 done = 1; //if it recurses true 870 res = 1; // we done and match 871 } else { //it recurses false 872 if (!str) // see if test is done 873 done = 1; // yes: we done 874 else // not done: 875 str++; // keep testing 876 } // end test on recursive call 877 } // end looping on recursive calls 878 } // end logic when pattern is ambiguous 879 } else { //pattern not ambiguus 880 if (pat == '?') { //pattern is 'any' 881 pat++, str++; // so move along 882 } else if (*pat == '[') { //see if it's a range
883 karl 1.36 pat = _matchrange(pat, *str, nocase); // and is a match
884 kumpf 1.35 if (!pat) { //It is not a match 885 done = 1; // we're done
886 kumpf 1.42 res = 0; // no match
887 kumpf 1.35 } else { //Range matches 888 str++, pat++; // keep going 889 } 890 } else { // only case left is individual characters
891 karl 1.36 if (!_Equal(pat++, str++, nocase)) // if they don't match
892 kumpf 1.35 done = 1; // bail. 893 } 894 } // end ("pattern is not ambiguous (*)" logic 895 } // end logic when pattern and string still have data 896 } // end logic when pattern still has data 897 } // end main loop 898 return res; 899 } 900
901 kumpf 1.39
902 kumpf 1.65 /** match matches a string against a GLOB style pattern. 903 Return trues if the String parameter matches the pattern. C-Shell style
904 david.dillard 1.105 glob matching is used.
905 kumpf 1.65 @param str String to be matched against the pattern 906 @param pattern Pattern to use in the match 907 @return Boolean true if str matches pattern 908 The pattern definition is as follows: 909 <pre> 910 * Matches any number of any characters 911 ? Match exactly one character 912 [chars] Match any character in chars 913 [chara-charb] Match any character in the range between chara and charb 914 </pre> 915 The literal characters , ?, [, ] can be included in a string by 916 escaping them with backslash "\". Ranges of characters can be concatenated. 917 <pre> 918 examples: 919 Boolean result = String::match("This is a test", "is"); 920 Boolean works = String::match("abcdef123", "[0-9]"); 921 </pre> 922 */
923 karl 1.36 Boolean String::match(const String& str, const String& pattern) 924 { 925 return _StringMatch(
926 david.dillard 1.105 (Uint16)str.getChar16Data(), (Uint16)pattern.getChar16Data(), 0) != 0;
927 karl 1.36 } 928
929 kumpf 1.65 /** matchNoCase Matches a String against a GLOB style pattern independent
930 david.dillard 1.105 of case.
931 kumpf 1.65 Returns true if the str parameter matches the pattern. C-Shell style
932 david.dillard 1.104 glob matching is used. Ignore case in all comparisons. Case is
933 kumpf 1.65 ignored in the match. 934 @parm str String containing the string to be matched\ 935 @parm pattern GLOB style patterh to use in the match. 936 @return Boolean true if str matches patterh
937 david.dillard 1.104 @see match
938 kumpf 1.65 */
939 karl 1.36 Boolean String::matchNoCase(const String& str, const String& pattern) 940 { 941 return _StringMatch(
942 david.dillard 1.105 (Uint16)str.getChar16Data(), (Uint16)pattern.getChar16Data(), 1) != 0;
943 kumpf 1.39 }
944 kumpf 1.65 #endif
945 kumpf 1.39 946 947 /////////////////////////////////////////////////////////////////////////////// 948 // 949 // String-related functions 950 // 951 /////////////////////////////////////////////////////////////////////////////// 952 953 Boolean operator==(const String& str1, const String& str2) 954 { 955 return String::equal(str1, str2); 956 } 957 958 Boolean operator==(const String& str1, const char* str2) 959 { 960 return String::equal(str1, str2); 961 } 962 963 Boolean operator==(const char* str1, const String& str2) 964 { 965 return String::equal(str1, str2); 966 kumpf 1.39 } 967 968 Boolean operator!=(const String& str1, const String& str2) 969 { 970 return !String::equal(str1, str2); 971 } 972
973 kumpf 1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
974 kumpf 1.39 {
975 david 1.72
976 david 1.69 #if defined(PEGASUS_OS_OS400)
977 david 1.93 CString cstr = str.getCString();
978 david 1.69 const char* utf8str = cstr; 979 980 os << utf8str; 981
982 humberto 1.76 #elif defined(PEGASUS_HAS_ICU)
983 david.dillard 1.105 char buf = NULL; 984 const int size = str.size() 6; 985 UnicodeString UniStr((const UChar *)str.getChar16Data(), (int32_t)str.size()); 986 Uint32 bufsize = UniStr.extract(0,size,buf); 987 988 buf = new char[bufsize+1]; 989 UniStr.extract(0,bufsize,buf); 990 os << buf; 991 os.flush(); 992 delete [] buf;
993 david 1.69 #else
994 david.dillard 1.105 for (Uint32 i = 0, n = str.size(); i < n; i++) 995 { 996 Uint16 code = str[i]; 997 998 if (code > 0 && code <= PEGASUS_MAX_PRINTABLE_CHAR) 999 { 1000 os << char(code); 1001 } 1002 else 1003 { 1004 // Print in hex format: 1005 char buffer[8]; 1006 sprintf(buffer, "\\x%04X", code); 1007 os << buffer; 1008 } 1009 }
1010 david 1.69 #endif // End of PEGASUS_HAS_ICU #else leg.
1011 kumpf 1.39 1012 return os; 1013 } 1014 1015 String operator+(const String& str1, const String& str2) 1016 { 1017 return String(str1).append(str2); 1018 } 1019 1020 Boolean operator<(const String& str1, const String& str2) 1021 {
1022 kumpf 1.43 return String::compare(str1, str2) < 0;
1023 kumpf 1.39 } 1024 1025 Boolean operator<=(const String& str1, const String& str2) 1026 {
1027 kumpf 1.43 return String::compare(str1, str2) <= 0;
1028 kumpf 1.39 } 1029 1030 Boolean operator>(const String& str1, const String& str2) 1031 {
1032 kumpf 1.43 return String::compare(str1, str2) > 0;
1033 kumpf 1.39 } 1034 1035 Boolean operator>=(const String& str1, const String& str2) 1036 {
1037 kumpf 1.43 return String::compare(str1, str2) >= 0;
1038 kumpf 1.39 } 1039
1040 mike 1.27 PEGASUS_NAMESPACE_END

No CVS admin address has been configured