(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.98 //%2005////////////////////////////////////////////////////////////////////////
   2 mike  1.27 //
   3 karl  1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.85 // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 karl  1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
  11 mike  1.27 //
  12            // Permission is hereby granted, free of charge, to any person obtaining a copy
  13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
  14            // deal in the Software without restriction, including without limitation the
  15            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  16 mike  1.27 // sell copies of the Software, and to permit persons to whom the Software is
  17            // furnished to do so, subject to the following conditions:
  18 david.dillard 1.105 //
  19 kumpf         1.41  // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  20 mike          1.27  // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  21                     // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  22 kumpf         1.41  // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  23                     // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  24                     // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  25 mike          1.27  // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26                     // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27                     //
  28                     //==============================================================================
  29                     //
  30                     // Author: Mike Brasher (mbrasher@bmc.com)
  31                     //
  32 kumpf         1.39  // Modified By: Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
  33 joyce.j       1.101 //              Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
  34 mike          1.27  //
  35                     //%/////////////////////////////////////////////////////////////////////////////
  36                     
  37                     
  38                     #include <cctype>
  39 kumpf         1.64  #include <cstring>
  40 mike          1.27  #include "String.h"
  41 kumpf         1.43  #include "Array.h"
  42 chuck         1.103 #include "AutoPtr.h"
  43 kumpf         1.48  #include "InternalException.h"
  44 mike          1.27  #include <iostream>
  45 kumpf         1.63  #include <fstream>
  46 mike          1.27  
  47 david         1.69  #include "CommonUTF.h"
  48                     
  49                     #ifdef PEGASUS_HAS_ICU
  50 chuck         1.99  #include <unicode/ustring.h>
  51                     #include <unicode/uchar.h>
  52 david         1.69  #endif
  53                     
  54 mike          1.28  PEGASUS_USING_STD;
  55                     
  56 mike          1.27  PEGASUS_NAMESPACE_BEGIN
  57                     
  58 kumpf         1.39  ///////////////////////////////////////////////////////////////////////////////
  59                     //
  60 kumpf         1.54  // CString
  61                     //
  62                     ///////////////////////////////////////////////////////////////////////////////
  63                     
  64                     CString::CString()
  65                         : _rep(0)
  66                     {
  67                     }
  68                     
  69                     CString::CString(const CString& cstr)
  70                     {
  71 kumpf         1.82      _rep = 0;
  72                     
  73                         if (cstr._rep)
  74                         {
  75                             _rep = (void*)new char[strlen((char*)cstr._rep)+1];
  76                             strcpy((char*)_rep, (char*)cstr._rep);
  77                         }
  78 kumpf         1.54  }
  79                     
  80                     CString::CString(char* cstr)
  81                         : _rep(cstr)
  82                     {
  83                     }
  84                     
  85                     CString::~CString()
  86                     {
  87                         if (_rep)
  88 kumpf         1.82      {
  89 kumpf         1.59          delete [] (char*)_rep;
  90 kumpf         1.82      }
  91 kumpf         1.54  }
  92                     
  93 kumpf         1.56  CString& CString::operator=(const CString& cstr)
  94                     {
  95 kumpf         1.82      if (&cstr != this)
  96 kumpf         1.81      {
  97 kumpf         1.82          if (_rep)
  98                             {
  99                                 delete [] (char*)_rep;
 100                                 _rep = 0;
 101                             }
 102                             if (cstr._rep)
 103                             {
 104                                 _rep = (char*)new char[strlen((char*)cstr._rep)+1];
 105                                 strcpy((char*)_rep, (char*)cstr._rep);
 106                             }
 107 kumpf         1.81      }
 108 kumpf         1.56      return *this;
 109                     }
 110                     
 111 kumpf         1.54  CString::operator const char*() const
 112                     {
 113 kumpf         1.59      return (char*)_rep;
 114 kumpf         1.54  }
 115                     
 116                     ///////////////////////////////////////////////////////////////////////////////
 117                     //
 118 kumpf         1.39  // String
 119                     //
 120                     ///////////////////////////////////////////////////////////////////////////////
 121                     
 122 kumpf         1.37  const String String::EMPTY = String();
 123 mike          1.27  
 124 kumpf         1.39  inline Uint32 _StrLen(const Char16* str)
 125 mike          1.27  {
 126                         if (!str)
 127 david.dillard 1.105         throw NullPointer();
 128 mike          1.27  
 129                         Uint32 n = 0;
 130                     
 131                         while (*str++)
 132 david.dillard 1.105         n++;
 133 mike          1.27  
 134                         return n;
 135                     }
 136                     
 137 chuck         1.102 //
 138                     // Converts a utf-8 char buffer to utf-16 and appends the utf-16 to the Array.
 139                     // n is the length of the input char *, if stopAtTerm is 0
 140                     // A terminator character is appended to the end.
 141                     // Note that each input char is converted individually, which gives
 142                     // the fastest performance.
 143                     //
 144                     void _convertAndAppend(const char* str, Array<Char16>& c16a, Uint32 n, Uint8 stopAtTerm)
 145                     {
 146                         Uint32 i = 0;
 147                         while ((stopAtTerm && *str) || (!stopAtTerm && i < n))
 148                         {
 149                             if (*(Uint8*)str <= 0x7f)
 150 david.dillard 1.105         {
 151 chuck         1.102             // Current byte sequence is in the us-ascii range.
 152                                 c16a.append(Uint8(*str++));
 153                             }
 154                             else
 155                             {
 156                                 //
 157                                 // Current byte sequence is not in the us-ascii range.
 158                                 //
 159                     
 160                                 // Check if the byte sequence is valid utf-8, and if so,
 161                                 // call the converter to utf-16
 162                                 Uint16 tgt[3];
 163                                 tgt[1] = 0;
 164                                 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*str);
 165                                 if ( (!stopAtTerm && i + c >= n) ||
 166                                      (!isValid_U8((const Uint8 *)str, c+1)) )
 167                                 {
 168                                     // Note about error conditions.
 169 david.dillard 1.105                 // It is possible that the last utf-8 char before the
 170 chuck         1.102                 // end of input string extends past the end of the input string.
 171                                     // This is caught in both cases -
 172                                     // If counting up to n, then the test above catches it.
 173                                     // If converting until terminator found, then a terminator
 174                                     // in the middle of a multi-byte utf-8 char is invalid.
 175                                     MessageLoaderParms parms("Common.String.BAD_UTF8",
 176                                       "The byte sequence starting at index $0 is not valid UTF-8 encoding.",
 177                                       i);
 178                                     throw Exception(parms);
 179                                 }
 180                                 else
 181                                 {
 182                                     //  str is incremented by this call to the start of the next char
 183                                     Uint16 * tgtBuf = tgt;
 184 david.dillard 1.105                 UTF8toUTF16((const Uint8 **)&str, (Uint8 *)&str[c+1], &tgtBuf,  &tgtBuf[2]);
 185 chuck         1.102                 c16a.append(tgt[0]);
 186                                     if (tgt[1])
 187 david.dillard 1.105                 {
 188 chuck         1.102                     // Its a utf-16 surrogate pair (uses 2 Char16's)
 189                                         c16a.append(tgt[1]);
 190                                     }
 191 david.dillard 1.105 
 192 chuck         1.102                 // bump by the trailing byte count
 193                                     i += c;
 194                                 }
 195                             }
 196                     
 197                             i++;
 198                         }  // end while
 199                     
 200                         c16a.append('\0');
 201                     }
 202                     
 203 kumpf         1.43  class StringRep
 204                     {
 205                     public:
 206                         StringRep()
 207                         {}
 208                         StringRep(const StringRep& r)
 209                             : c16a(r.c16a)
 210                         {}
 211                         StringRep(const Char16* str)
 212                             : c16a(str, _StrLen(str) + 1)
 213                         {}
 214                     
 215                         Array<Char16> c16a;
 216                     };
 217                     
 218 mike          1.27  String::String()
 219                     {
 220 kumpf         1.43      _rep = new StringRep;
 221                         _rep->c16a.append('\0');
 222 mike          1.27  }
 223                     
 224 kumpf         1.39  String::String(const String& str)
 225 mike          1.27  {
 226 tony          1.66    if (str._rep != NULL)
 227                       {
 228 kumpf         1.43      _rep = new StringRep(*str._rep);
 229 tony          1.66    }
 230                       else
 231                       {
 232                         _rep = new StringRep();
 233                       }
 234 kumpf         1.39  }
 235 tony          1.66  
 236 mike          1.27  
 237 kumpf         1.39  String::String(const String& str, Uint32 n)
 238                     {
 239 kumpf         1.43      _rep = new StringRep;
 240 kumpf         1.55      assign(str.getChar16Data(), n);
 241 kumpf         1.39  }
 242                     
 243                     String::String(const Char16* str)
 244                     {
 245 david.dillard 1.105     if ( str == 0 )
 246                         {
 247                             throw NullPointer();
 248                         }
 249                     
 250 kumpf         1.43      _rep = new StringRep(str);
 251 mike          1.27  }
 252                     
 253 kumpf         1.39  String::String(const Char16* str, Uint32 n)
 254                     {
 255 david.dillard 1.105     if ( str == 0 )
 256                         {
 257                             throw NullPointer();
 258                         }
 259                     
 260 kumpf         1.43      _rep = new StringRep;
 261 kumpf         1.39      assign(str, n);
 262                     }
 263                     
 264                     String::String(const char* str)
 265 mike          1.27  {
 266 david.dillard 1.105     if ( str == 0 )
 267                         {
 268                             throw NullPointer();
 269                         }
 270                     
 271 kumpf         1.43      _rep = new StringRep;
 272 chuck         1.103     AutoPtr<StringRep> tempRep(_rep);
 273                         // An exception can be thrown, so use a temp AutoPtr.
 274 chuck         1.102     _convertAndAppend(str, _rep->c16a, 0, 1);
 275 chuck         1.103     tempRep.release();
 276 mike          1.27  }
 277                     
 278 kumpf         1.39  String::String(const char* str, Uint32 n)
 279 mike          1.27  {
 280 david.dillard 1.105     if ( str == 0 )
 281                         {
 282                             throw NullPointer();
 283                         }
 284                     
 285 kumpf         1.43      _rep = new StringRep;
 286 chuck         1.103     AutoPtr<StringRep> tempRep(_rep);
 287                         // An exception can be thrown, so use a temp AutoPtr.
 288 chuck         1.102     _convertAndAppend(str, _rep->c16a, n, 0);
 289 chuck         1.103     tempRep.release();
 290 kumpf         1.39  }
 291 mike          1.27  
 292 kumpf         1.39  String::~String()
 293                     {
 294 kumpf         1.43      delete _rep;
 295 mike          1.27  }
 296                     
 297 kumpf         1.39  String& String::operator=(const String& str)
 298 mike          1.27  {
 299 kumpf         1.82      if (&str != this)
 300                         {
 301                             assign(str);
 302                         }
 303                         return *this;
 304 mike          1.27  }
 305                     
 306 kumpf         1.39  String& String::assign(const String& str)
 307 mike          1.27  {
 308 kumpf         1.43      _rep->c16a = str._rep->c16a;
 309 kumpf         1.39      return *this;
 310 mike          1.27  }
 311                     
 312 kumpf         1.39  String& String::assign(const Char16* str)
 313 mike          1.27  {
 314 david.dillard 1.105     if ( str == 0 )
 315                         {
 316                             throw NullPointer();
 317                         }
 318                     
 319 kumpf         1.43      _rep->c16a.clear();
 320                         _rep->c16a.append(str, _StrLen(str) + 1);
 321 mike          1.27      return *this;
 322                     }
 323                     
 324                     String& String::assign(const Char16* str, Uint32 n)
 325                     {
 326 david.dillard 1.105     if ( str == 0 )
 327                         {
 328                             throw NullPointer();
 329                         }
 330                     
 331 kumpf         1.43      _rep->c16a.clear();
 332 joyce.j       1.101     _rep->c16a.append(str, n);
 333 kumpf         1.43      _rep->c16a.append('\0');
 334 mike          1.27      return *this;
 335                     }
 336                     
 337 chuck         1.102 String& String::assign(const char* str)
 338                     {
 339 david.dillard 1.105     if ( str == 0 )
 340                         {
 341                             throw NullPointer();
 342                         }
 343                     
 344 chuck         1.102     _rep->c16a.clear();
 345                         _convertAndAppend(str, _rep->c16a, 0, 1);
 346                         return *this;
 347                     }
 348                     
 349 kumpf         1.39  String& String::assign(const char* str, Uint32 n)
 350 mike          1.27  {
 351 david.dillard 1.105     if ( str == 0 )
 352                         {
 353                             throw NullPointer();
 354                         }
 355                     
 356 chuck         1.102     _rep->c16a.clear();
 357                         _convertAndAppend(str, _rep->c16a, n, 0);
 358 mike          1.27      return *this;
 359                     }
 360                     
 361 kumpf         1.39  void String::clear()
 362                     {
 363 kumpf         1.43      _rep->c16a.clear();
 364                         _rep->c16a.append('\0');
 365 kumpf         1.39  }
 366                     
 367 kumpf         1.43  void String::reserveCapacity(Uint32 capacity)
 368 kumpf         1.39  {
 369 kumpf         1.45      _rep->c16a.reserveCapacity(capacity + 1);
 370 kumpf         1.39  }
 371                     
 372                     Uint32 String::size() const
 373                     {
 374 gs.keenan     1.106 #if defined (PEGASUS_OS_VMS)
 375                       //
 376                       // This prevents returning a minus number.
 377                       //
 378                       // Seems as though the first time through
 379                       //  the XML parser something doesn't get
 380                       //  initialized and there is no check for
 381                       //  a negative number in the parser!
 382                       //
 383                       Uint32 foo;
 384                       foo = _rep->c16a.size();
 385 gs.keenan     1.107   if (foo == 0)
 386 gs.keenan     1.106   {
 387                         return 0;
 388                       }
 389                       else
 390                       {
 391                         return (foo -1);
 392                       }
 393                     #else
 394 kumpf         1.43      return _rep->c16a.size() - 1;
 395 gs.keenan     1.106 #endif
 396 kumpf         1.39  }
 397                     
 398 kumpf         1.55  const Char16* String::getChar16Data() const
 399 kumpf         1.39  {
 400 kumpf         1.43      return _rep->c16a.getData();
 401 kumpf         1.39  }
 402                     
 403 kumpf         1.53  Char16& String::operator[](Uint32 index)
 404 mike          1.27  {
 405 kumpf         1.53      if (index > size())
 406 david.dillard 1.105         throw IndexOutOfBoundsException();
 407 mike          1.27  
 408 kumpf         1.53      return _rep->c16a[index];
 409 mike          1.27  }
 410                     
 411 kumpf         1.53  const Char16 String::operator[](Uint32 index) const
 412 mike          1.27  {
 413 kumpf         1.53      if (index > size())
 414 david.dillard 1.105         throw IndexOutOfBoundsException();
 415 mike          1.27  
 416 kumpf         1.53      return _rep->c16a[index];
 417 mike          1.27  }
 418                     
 419 kumpf         1.39  String& String::append(const Char16& c)
 420                     {
 421 kumpf         1.43      _rep->c16a.insert(_rep->c16a.size() - 1, c);
 422 kumpf         1.39      return *this;
 423                     }
 424                     
 425 mike          1.27  String& String::append(const Char16* str, Uint32 n)
 426                     {
 427 david.dillard 1.105      if (str == 0)
 428 joyce.j       1.101      {
 429                              throw NullPointer();
 430                          }
 431 david.dillard 1.105 
 432 joyce.j       1.101     _rep->c16a.reserveCapacity(_rep->c16a.size() + n);
 433 kumpf         1.43      _rep->c16a.remove(_rep->c16a.size() - 1);
 434 joyce.j       1.101     _rep->c16a.append(str, n);
 435 kumpf         1.43      _rep->c16a.append('\0');
 436 mike          1.27      return *this;
 437                     }
 438                     
 439 kumpf         1.39  String& String::append(const String& str)
 440 mike          1.27  {
 441 kumpf         1.55      return append(str.getChar16Data(), str.size());
 442 mike          1.27  }
 443                     
 444 kumpf         1.53  void String::remove(Uint32 index, Uint32 size)
 445 mike          1.27  {
 446 kumpf         1.39      if (size == PEG_NOT_FOUND)
 447 david.dillard 1.105         size = this->size() - index;
 448 mike          1.27  
 449 kumpf         1.53      if (index + size > this->size())
 450 david.dillard 1.105         throw IndexOutOfBoundsException();
 451 mike          1.27  
 452 kumpf         1.39      if (size)
 453 david.dillard 1.105         _rep->c16a.remove(index, size);
 454 mike          1.27  }
 455                     
 456 kumpf         1.53  String String::subString(Uint32 index, Uint32 length) const
 457 mike          1.27  {
 458 kumpf         1.53      if (index < size())
 459 mike          1.27      {
 460 david.dillard 1.105         if ((length == PEG_NOT_FOUND) || (length > size() - index))
 461                                 length = size() - index;
 462 mike          1.27  
 463 david.dillard 1.105         return String(getChar16Data() + index, length);
 464 mike          1.27      }
 465 david.dillard 1.105 
 466                         return String();
 467 mike          1.27  }
 468                     
 469                     Uint32 String::find(Char16 c) const
 470                     {
 471 kumpf         1.55      const Char16* first = getChar16Data();
 472 mike          1.27  
 473                         for (const Char16* p = first; *p; p++)
 474                         {
 475 david.dillard 1.105         if (*p == c)
 476                                 return  p - first;
 477 mike          1.27      }
 478                     
 479                         return PEG_NOT_FOUND;
 480                     }
 481                     
 482 kumpf         1.53  Uint32 String::find(Uint32 index, Char16 c) const
 483 mike          1.30  {
 484 kumpf         1.55      const Char16* data = getChar16Data();
 485 mike          1.30  
 486 kumpf         1.53      for (Uint32 i = index, n = size(); i < n; i++)
 487 mike          1.30      {
 488 david.dillard 1.105         if (data[i] == c)
 489                                 return i;
 490 mike          1.30      }
 491                     
 492                         return PEG_NOT_FOUND;
 493                     }
 494                     
 495 mike          1.27  Uint32 String::find(const String& s) const
 496                     {
 497 kumpf         1.55      const Char16* pSubStr = s.getChar16Data();
 498                         const Char16* pStr = getChar16Data();
 499 mike          1.27      Uint32 subStrLen = s.size();
 500                         Uint32 strLen = size();
 501                     
 502 mike          1.30      if (subStrLen > strLen)
 503                         {
 504                             return PEG_NOT_FOUND;
 505                         }
 506                     
 507 mike          1.27      // loop to find first char match
 508                         Uint32 loc = 0;
 509                         for( ; loc <= (strLen-subStrLen); loc++)
 510                         {
 511 david.dillard 1.105         if (*pStr++ == *pSubStr)  // match first char
 512                             {
 513                                 // point to substr 2nd char
 514                                 const Char16* p = pSubStr + 1;
 515                     
 516                                 // Test remaining chars for equal
 517                                 Uint32 i = 1;
 518                                 for (; i < subStrLen; i++)
 519                                     if (*pStr++ != *p++ )
 520                                         {pStr-=i; break;} // break from loop
 521                                 if (i == subStrLen)
 522                                     return loc;
 523                             }
 524 mike          1.27      }
 525                         return PEG_NOT_FOUND;
 526                     }
 527                     
 528                     Uint32 String::reverseFind(Char16 c) const
 529                     {
 530 kumpf         1.55      const Char16* first = getChar16Data();
 531                         const Char16* last = getChar16Data() + size();
 532 mike          1.27  
 533                         while (last != first)
 534                         {
 535 david.dillard 1.105         if (*--last == c)
 536                                 return last - first;
 537 mike          1.27      }
 538                     
 539                         return PEG_NOT_FOUND;
 540                     }
 541                     
 542                     void String::toLower()
 543                     {
 544 david         1.69  #ifdef PEGASUS_HAS_ICU
 545 chuck         1.99      // This will do a locale-insensitive, but context-sensitive convert.
 546 david.dillard 1.105     // Context-sensitive prevents any optimizations that try to
 547 chuck         1.99      // convert just the ascii before calling ICU.
 548                         // The string may shrink or expand after the convert.
 549                     
 550                         int32_t sz = size();
 551                         UChar* destbuf = new UChar[sz + 1];
 552                         const UChar* srcbuf = (const UChar *)getChar16Data();
 553                         UErrorCode err = U_ZERO_ERROR;
 554                     
 555                         int32_t needed = u_strToLower(destbuf, sz + 1 , srcbuf, sz, NULL, &err);
 556                         if (err == U_BUFFER_OVERFLOW_ERROR)
 557                         {
 558                           delete [] destbuf;
 559                           destbuf = new UChar[needed + 1];
 560                           err = U_ZERO_ERROR;
 561                           u_strToLower(destbuf, needed + 1 , srcbuf, sz, NULL, &err);
 562                         }
 563                         if (U_FAILURE(err))
 564 david         1.90      {
 565 chuck         1.99          delete [] destbuf;
 566 david.dillard 1.105         throw Exception(u_errorName(err));
 567 chuck         1.99      }
 568                     
 569                         if (needed == sz)
 570                         {
 571                             Char16* from = (Char16*)destbuf;
 572                             for (Char16* to = &_rep->c16a[0]; *to; to++, from++)
 573                             {
 574                               *to = *from;
 575                             }
 576 david         1.90      }
 577                         else
 578                         {
 579 chuck         1.99          assign((Char16 *)destbuf, needed);
 580 david         1.90      }
 581 chuck         1.99  
 582                         delete [] destbuf;
 583 david         1.69  #else
 584 kumpf         1.43      for (Char16* p = &_rep->c16a[0]; *p; p++)
 585 mike          1.27      {
 586 chuck         1.99          if (*p <= PEGASUS_MAX_PRINTABLE_CHAR)
 587 david.dillard 1.105                 *p = tolower(*p);
 588 mike          1.27      }
 589 david         1.69  #endif
 590 kumpf         1.39  }
 591                     
 592 chuck         1.99  void String::toUpper()
 593 david         1.90  {
 594                     #ifdef PEGASUS_HAS_ICU
 595 chuck         1.99      // This will do a locale-insensitive, but context-sensitive convert.
 596 david.dillard 1.105     // Context-sensitive prevents any optimizations that try to
 597 chuck         1.99      // convert just the ascii before calling ICU.
 598                         // The string may shrink or expand after the convert.
 599                     
 600                         int32_t sz = size();
 601                         UChar* destbuf = new UChar[sz + 1];
 602                         const UChar* srcbuf = (const UChar *)getChar16Data();
 603                         UErrorCode err = U_ZERO_ERROR;
 604                     
 605                         int32_t needed = u_strToUpper(destbuf, sz + 1 , srcbuf, sz, NULL, &err);
 606                         if (err == U_BUFFER_OVERFLOW_ERROR)
 607 david         1.90      {
 608 chuck         1.99        delete [] destbuf;
 609                           destbuf = new UChar[needed + 1];
 610                           err = U_ZERO_ERROR;
 611                           u_strToUpper(destbuf, needed + 1 , srcbuf, sz, NULL, &err);
 612                         }
 613                         if (U_FAILURE(err))
 614                         {
 615                             delete [] destbuf;
 616 david.dillard 1.105         throw Exception(u_errorName(err));
 617 chuck         1.99      }
 618                     
 619                         if (needed == sz)
 620                         {
 621                             Char16* from = (Char16*)destbuf;
 622                             for (Char16* to = &_rep->c16a[0]; *to; to++, from++)
 623                             {
 624                               *to = *from;
 625                             }
 626 david         1.90      }
 627                         else
 628                         {
 629 chuck         1.99          assign((Char16 *)destbuf, needed);
 630 david         1.90      }
 631                     
 632 chuck         1.99      delete [] destbuf;
 633 david         1.91  #else
 634                         for (Char16* p = &_rep->c16a[0]; *p; p++)
 635                         {
 636 david.dillard 1.105             if (*p <= PEGASUS_MAX_PRINTABLE_CHAR)
 637                                     *p = toupper(*p);
 638 david         1.91      }
 639 david         1.90  #endif
 640                     }
 641                     
 642 kumpf         1.43  int String::compare(const String& s1, const String& s2, Uint32 n)
 643 kumpf         1.39  {
 644 kumpf         1.55      const Char16* s1c16 = s1.getChar16Data();
 645                         const Char16* s2c16 = s2.getChar16Data();
 646 kumpf         1.39  
 647                         while (n--)
 648 mike          1.27      {
 649 david.dillard 1.105         int r = *s1c16++ - *s2c16++;
 650 mike          1.27  
 651 david.dillard 1.105         if (r)
 652                                 return r;
 653 mike          1.27      }
 654                     
 655                         return 0;
 656                     }
 657                     
 658 kumpf         1.43  int String::compare(const String& s1, const String& s2)
 659 mike          1.30  {
 660 kumpf         1.55      const Char16* s1c16 = s1.getChar16Data();
 661                         const Char16* s2c16 = s2.getChar16Data();
 662 kumpf         1.43  
 663                         while (*s1c16 && *s2c16)
 664 mike          1.30      {
 665 david.dillard 1.105         int r = *s1c16++ - *s2c16++;
 666 mike          1.30  
 667 david.dillard 1.105         if (r)
 668                                 return r;
 669 mike          1.30      }
 670                     
 671 kumpf         1.43      if (*s2c16)
 672 david.dillard 1.105         return -1;
 673 kumpf         1.43      else if (*s1c16)
 674 david.dillard 1.105         return 1;
 675 mike          1.30  
 676                         return 0;
 677                     }
 678                     
 679 kumpf         1.40  int String::compareNoCase(const String& s1, const String& s2)
 680                     {
 681 david         1.69  #ifdef PEGASUS_HAS_ICU
 682 chuck         1.99      return  u_strcasecmp((const UChar*)s1.getChar16Data(),
 683                                               (const UChar*)s2.getChar16Data(),
 684                                               U_FOLD_CASE_DEFAULT);
 685 david         1.69  #else
 686 kumpf         1.55      const Char16* _s1 = s1.getChar16Data();
 687                         const Char16* _s2 = s2.getChar16Data();
 688 kumpf         1.40  
 689                         while (*_s1 && *_s2)
 690                         {
 691                             int r;
 692                     
 693 kumpf         1.46          if (*_s1 <= PEGASUS_MAX_PRINTABLE_CHAR &&
 694                                 *_s2 <= PEGASUS_MAX_PRINTABLE_CHAR)
 695 kumpf         1.40          {
 696                                 r = tolower(*_s1++) - tolower(*_s2++);
 697                             }
 698                             else
 699                             {
 700                                 r = *_s1++ - *_s2++;
 701                             }
 702                     
 703 david.dillard 1.105         if (r)
 704                                 return r;
 705 kumpf         1.40      }
 706                     
 707                         if (*_s2)
 708 david.dillard 1.105         return -1;
 709 kumpf         1.40      else if (*_s1)
 710 david.dillard 1.105         return 1;
 711 kumpf         1.40  
 712                         return 0;
 713 david         1.69  #endif
 714 kumpf         1.40  }
 715                     
 716 kumpf         1.39  Boolean String::equal(const String& str1, const String& str2)
 717 mike          1.27  {
 718 kumpf         1.43      return String::compare(str1, str2) == 0;
 719 mike          1.27  }
 720                     
 721 kumpf         1.39  Boolean String::equalNoCase(const String& str1, const String& str2)
 722 mike          1.27  {
 723 david         1.69  #ifdef PEGASUS_HAS_ICU
 724 chuck         1.99      return  compareNoCase(str1, str2) == 0;
 725 david         1.69  #else
 726 kumpf         1.39      if (str1.size() != str2.size())
 727 david.dillard 1.105         return false;
 728 kumpf         1.39  
 729 kumpf         1.55      const Char16* p = str1.getChar16Data();
 730                         const Char16* q = str2.getChar16Data();
 731 kumpf         1.39  
 732                         Uint32 n = str1.size();
 733 mike          1.27  
 734 kumpf         1.39      while (n--)
 735                         {
 736 david.dillard 1.105         if (*p <= PEGASUS_MAX_PRINTABLE_CHAR &&
 737 kumpf         1.46              *q <= PEGASUS_MAX_PRINTABLE_CHAR)
 738 david.dillard 1.105         {
 739                                 if (tolower(*p++) != tolower(*q++))
 740                                     return false;
 741                             }
 742                             else if (*p++ != *q++)
 743                                 return false;
 744 kumpf         1.39      }
 745 mike          1.28  
 746 kumpf         1.39      return true;
 747 david         1.69  #endif
 748                     }
 749                     
 750 mike          1.27  
 751 david         1.90  CString String::getCString() const
 752 david         1.69  {
 753 david         1.79      Uint32 n = 3*size() + 1;
 754 david         1.69      char* str = new char[n];
 755                     
 756                         const Char16* msg16 = getChar16Data();
 757                     
 758                         const Uint16 *strsrc = (Uint16 *)msg16;
 759 david         1.71      Uint16 *endsrc = (Uint16 *)&msg16[size()+1];
 760 david         1.69  
 761                         Uint8 *strtgt = (Uint8 *)str;
 762                         Uint8 *endtgt = (Uint8 *)&str[n];
 763                     
 764                         UTF16toUTF8 (&strsrc,
 765 david.dillard 1.105                  endsrc,
 766                                      &strtgt,
 767                                      endtgt);
 768                     
 769                             char* str1 = new char[strlen(str)+1];
 770                             strcpy(str1,str);
 771                             delete [] str;
 772 david         1.71  
 773                         return CString(str1);
 774 david         1.69  }
 775 kumpf         1.42  
 776 kumpf         1.65  #if 0
 777 kumpf         1.42  // ATTN-RK-P3-20020603: This code is not completely correct
 778 karl          1.36   // Wildcard String matching function that may be useful in the future
 779                     // The following code was provided by Bob Blair.
 780                     
 781                     /* _StringMatch Match input MatchString against a GLOB style pattern
 782                            Note that MatchChar is the char type so that this source
 783                            in portable to different string types. This is an internal function
 784 david.dillard 1.105 
 785 karl          1.36    Results: The return value is 1 if string matches pattern, and
 786 david.dillard 1.105         0 otherwise.  The matching operation permits the following
 787                             special characters in the pattern: *?\[] (see the manual
 788                             entry for details on what these mean).
 789                     
 790 chuck         1.78  
 791 karl          1.36    Side effects: None.
 792                      */
 793 david.dillard 1.105 
 794 karl          1.36  /* MatchChar defined as a separate entity because this function source used
 795 david.dillard 1.105     elsewhere was an unsigned char *. Here we use Uint16 to  maintain 16 bit
 796 karl          1.36      size.
 797                     */
 798                     typedef Uint16 MatchChar;
 799                     
 800                     inline Uint16 _ToLower(Uint16 ch)
 801                     {
 802 david         1.69      // ICU_TODO:  If ICU is available we should do this the correct way.
 803 kumpf         1.46      return ch <= PEGASUS_MAX_PRINTABLE_CHAR ? tolower(char(ch)) : ch;
 804 karl          1.36  }
 805                     
 806                     inline Boolean _Equal(MatchChar ch1, MatchChar ch2, int nocase)
 807                     {
 808 david         1.69      // ICU_TODO:  If ICU is available we should do this the correct way.
 809 karl          1.36      if (nocase)
 810 david.dillard 1.105         return _ToLower(ch1) == _ToLower(ch2);
 811                     
 812                         return ch1 == ch2;
 813 karl          1.36  }
 814 mike          1.28  
 815 kumpf         1.35  
 816 karl          1.36  static const MatchChar *
 817                     _matchrange(const MatchChar *range, MatchChar c, int nocase)
 818                     {
 819                       const MatchChar *p = range;
 820                       const MatchChar *rstart = range + 1;
 821                       const MatchChar *rend = 0;
 822                       MatchChar compchar;
 823                     
 824 kumpf         1.35    for (rend = rstart; *rend && *rend != ']'; rend++);
 825 karl          1.36    if (*rend == ']') {  // if there is an end to this pattern
 826 kumpf         1.35      for (compchar = *rstart; rstart != rend; rstart++) {
 827 karl          1.36        if (_Equal(*rstart, c, nocase))
 828 kumpf         1.35          return ++rend;
 829                           if (*rstart == '-') {
 830                             rstart++;
 831                             if (c >= compchar && c <= *rstart)
 832                               return ++rend;
 833                           }
 834                         }
 835                       }
 836 karl          1.36    return (const MatchChar *)0;
 837 kumpf         1.35  }
 838                     
 839                     static int
 840 david.dillard 1.105 _StringMatch(
 841                         const MatchChar *testString,
 842 karl          1.36      const MatchChar *pattern,
 843 david.dillard 1.105     int nocase )                /* Ignore case if this is true */
 844 karl          1.36  {
 845                       const MatchChar *pat = pattern;
 846                       const MatchChar *str = testString;
 847 kumpf         1.35    unsigned int done = 0;
 848                       unsigned int res = 0;  // the result: 1 == match
 849                     
 850                       while (!done) { // main loop walks through pattern and test string
 851                         //cerr << "Comparing <" << *pat << "> and <" << *str << ">" << endl;
 852                         if (!*pat) {                                         //end of pattern
 853                           done = 1;                                          // we're done
 854                           if (!*str)                                         //end of test, too?
 855                             res = 1;                                         // then we matched
 856                         } else {                                             //Not end of pattern
 857                           if (!*str) {                                       // but end of test
 858                             done = 1;                                        // We're done
 859                             if (*pat == '*')                                 // If pattern openends
 860                               res = 1;                                       //  then we matched
 861                           } else {                                           //Not end of test
 862                             if (*pat == '*') {                               //Ambiguuity found
 863                               if (!*++pat) {                                 //and it ends pattern
 864                                 done = 1;                                    //  then we're done
 865                                 res = 1;                                     //  and match
 866                               } else {                                       //if it doesn't end
 867                                 while (!done) {                              //  until we're done
 868 karl          1.36                if (_StringMatch(str, pat, nocase)) {      //  we recurse
 869 kumpf         1.35                  done = 1;                                //if it recurses true
 870                                     res = 1;                                 //  we done and match
 871                                   } else {                                   //it recurses false
 872                                     if (!*str)                               // see if test is done
 873                                       done = 1;                              //  yes: we done
 874                                     else                                     // not done:
 875                                       str++;                                 //   keep testing
 876                                   } // end test on recursive call
 877                                 } // end looping on recursive calls
 878                               } // end logic when pattern is ambiguous
 879                             } else {                                         //pattern not ambiguus
 880                               if (*pat == '?') {                             //pattern is 'any'
 881                                 pat++, str++;                                //  so move along
 882                               } else if (*pat == '[') {                      //see if it's a range
 883 karl          1.36              pat = _matchrange(pat, *str, nocase);         // and is a match
 884 kumpf         1.35              if (!pat) {                                  //It is not a match
 885                                   done = 1;                                  //  we're done
 886 kumpf         1.42                res = 0;                                   //  no match
 887 kumpf         1.35              } else {                                     //Range matches
 888                                   str++, pat++;                              //  keep going
 889                                 }
 890                               } else {               // only case left is individual characters
 891 karl          1.36              if (!_Equal(*pat++, *str++, nocase))         // if they don't match
 892 kumpf         1.35                done = 1;                                  //   bail.
 893                               }
 894                             }  // end ("pattern is not ambiguous (*)" logic
 895                           } // end logic when pattern and string still have data
 896                         } // end logic when pattern still has data
 897                       } // end main loop
 898                       return res;
 899                     }
 900                     
 901 kumpf         1.39  
 902 kumpf         1.65      /** match matches a string against a GLOB style pattern.
 903                             Return trues if the String parameter matches the pattern. C-Shell style
 904 david.dillard 1.105         glob matching is used.
 905 kumpf         1.65          @param str String to be matched against the pattern
 906                             @param pattern Pattern to use in the match
 907                             @return Boolean true if str matches pattern
 908                             The pattern definition is as follows:
 909                             <pre>
 910                             *             Matches any number of any characters
 911                             ?             Match exactly one character
 912                             [chars]       Match any character in chars
 913                             [chara-charb] Match any character in the range between chara and charb
 914                             </pre>
 915                             The literal characters *, ?, [, ] can be included in a string by
 916                             escaping them with backslash "\".  Ranges of characters can be concatenated.
 917                             <pre>
 918                             examples:
 919                             Boolean result = String::match("This is a test", "*is*");
 920                             Boolean works =  String::match("abcdef123", "*[0-9]");
 921                             </pre>
 922                         */
 923 karl          1.36  Boolean String::match(const String& str, const String& pattern)
 924                     {
 925                         return _StringMatch(
 926 david.dillard 1.105         (Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 0) != 0;
 927 karl          1.36  }
 928                     
 929 kumpf         1.65      /** matchNoCase Matches a String against a GLOB style pattern independent
 930 david.dillard 1.105         of case.
 931 kumpf         1.65          Returns true if the str parameter matches the pattern. C-Shell style
 932 david.dillard 1.104         glob matching is used. Ignore case in all comparisons. Case is
 933 kumpf         1.65          ignored in the match.
 934                             @parm str String containing the string to be matched\
 935                             @parm pattern GLOB style patterh to use in the match.
 936                             @return Boolean true if str matches patterh
 937 david.dillard 1.104         @see match
 938 kumpf         1.65      */
 939 karl          1.36  Boolean String::matchNoCase(const String& str, const String& pattern)
 940                     {
 941                         return _StringMatch(
 942 david.dillard 1.105         (Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 1) != 0;
 943 kumpf         1.39  }
 944 kumpf         1.65  #endif
 945 kumpf         1.39  
 946                     
 947                     ///////////////////////////////////////////////////////////////////////////////
 948                     //
 949                     // String-related functions
 950                     //
 951                     ///////////////////////////////////////////////////////////////////////////////
 952                     
 953                     Boolean operator==(const String& str1, const String& str2)
 954                     {
 955                         return String::equal(str1, str2);
 956                     }
 957                     
 958                     Boolean operator==(const String& str1, const char* str2)
 959                     {
 960                         return String::equal(str1, str2);
 961                     }
 962                     
 963                     Boolean operator==(const char* str1, const String& str2)
 964                     {
 965                         return String::equal(str1, str2);
 966 kumpf         1.39  }
 967                     
 968                     Boolean operator!=(const String& str1, const String& str2)
 969                     {
 970                         return !String::equal(str1, str2);
 971                     }
 972                     
 973 kumpf         1.47  PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
 974 kumpf         1.39  {
 975 david         1.72  
 976 david         1.69  #if defined(PEGASUS_OS_OS400)
 977 david         1.93      CString cstr = str.getCString();
 978 david         1.69      const char* utf8str = cstr;
 979                     
 980                         os << utf8str;
 981                     
 982 humberto      1.76  #elif defined(PEGASUS_HAS_ICU)
 983 david.dillard 1.105         char *buf = NULL;
 984                             const int size = str.size() * 6;
 985                             UnicodeString UniStr((const UChar *)str.getChar16Data(), (int32_t)str.size());
 986                             Uint32 bufsize = UniStr.extract(0,size,buf);
 987                     
 988                             buf = new char[bufsize+1];
 989                             UniStr.extract(0,bufsize,buf);
 990                             os << buf;
 991                             os.flush();
 992                             delete [] buf;
 993 david         1.69  #else
 994 david.dillard 1.105         for (Uint32 i = 0, n = str.size(); i < n; i++)
 995                             {
 996                                     Uint16 code = str[i];
 997                     
 998                                     if (code > 0 && code <= PEGASUS_MAX_PRINTABLE_CHAR)
 999                                     {
1000                                      os << char(code);
1001                                     }
1002                                     else
1003                                     {
1004                                     // Print in hex format:
1005                                     char buffer[8];
1006                                     sprintf(buffer, "\\x%04X", code);
1007                                     os << buffer;
1008                                     }
1009                             }
1010 david         1.69  #endif // End of PEGASUS_HAS_ICU #else leg.
1011 kumpf         1.39  
1012                         return os;
1013                     }
1014                     
1015                     String operator+(const String& str1, const String& str2)
1016                     {
1017                         return String(str1).append(str2);
1018                     }
1019                     
1020                     Boolean operator<(const String& str1, const String& str2)
1021                     {
1022 kumpf         1.43      return String::compare(str1, str2) < 0;
1023 kumpf         1.39  }
1024                     
1025                     Boolean operator<=(const String& str1, const String& str2)
1026                     {
1027 kumpf         1.43      return String::compare(str1, str2) <= 0;
1028 kumpf         1.39  }
1029                     
1030                     Boolean operator>(const String& str1, const String& str2)
1031                     {
1032 kumpf         1.43      return String::compare(str1, str2) > 0;
1033 kumpf         1.39  }
1034                     
1035                     Boolean operator>=(const String& str1, const String& str2)
1036                     {
1037 kumpf         1.43      return String::compare(str1, str2) >= 0;
1038 kumpf         1.39  }
1039                     
1040 mike          1.27  PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2