(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.98 //%2005////////////////////////////////////////////////////////////////////////
   2 mike  1.27 //
   3 karl  1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.85 // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 karl  1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
  11 mike  1.27 //
  12            // Permission is hereby granted, free of charge, to any person obtaining a copy
  13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
  14            // deal in the Software without restriction, including without limitation the
  15            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  16 mike  1.27 // sell copies of the Software, and to permit persons to whom the Software is
  17            // furnished to do so, subject to the following conditions:
  18 david.dillard 1.105 //
  19 kumpf         1.41  // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  20 mike          1.27  // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  21                     // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  22 kumpf         1.41  // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  23                     // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  24                     // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  25 mike          1.27  // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26                     // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27                     //
  28                     //==============================================================================
  29                     //
  30                     // Author: Mike Brasher (mbrasher@bmc.com)
  31                     //
  32 kumpf         1.39  // Modified By: Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
  33 joyce.j       1.101 //              Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
  34 david.dillard 1.111 //              David Dillard, VERITAS Software Corp.
  35                     //                  (david.dillard@veritas.com)
  36 mike          1.27  //
  37                     //%/////////////////////////////////////////////////////////////////////////////
  38                     
  39                     
  40 kumpf         1.64  #include <cstring>
  41 mike          1.27  #include "String.h"
  42 kumpf         1.43  #include "Array.h"
  43 chuck         1.103 #include "AutoPtr.h"
  44 kumpf         1.48  #include "InternalException.h"
  45 yi.zhou       1.108 #include <Pegasus/Common/CommonUTF.h>
  46 mike          1.27  
  47 david         1.69  #include "CommonUTF.h"
  48                     
  49                     #ifdef PEGASUS_HAS_ICU
  50 chuck         1.99  #include <unicode/ustring.h>
  51                     #include <unicode/uchar.h>
  52 david         1.69  #endif
  53                     
  54 mike          1.28  PEGASUS_USING_STD;
  55                     
  56 mike          1.27  PEGASUS_NAMESPACE_BEGIN
  57                     
  58 kumpf         1.39  ///////////////////////////////////////////////////////////////////////////////
  59                     //
  60 kumpf         1.54  // CString
  61                     //
  62                     ///////////////////////////////////////////////////////////////////////////////
  63                     
  64                     CString::CString()
  65                         : _rep(0)
  66                     {
  67                     }
  68                     
  69                     CString::CString(const CString& cstr)
  70                     {
  71 kumpf         1.82      _rep = 0;
  72                     
  73                         if (cstr._rep)
  74                         {
  75                             _rep = (void*)new char[strlen((char*)cstr._rep)+1];
  76                             strcpy((char*)_rep, (char*)cstr._rep);
  77                         }
  78 kumpf         1.54  }
  79                     
  80                     CString::CString(char* cstr)
  81                         : _rep(cstr)
  82                     {
  83                     }
  84                     
  85                     CString::~CString()
  86                     {
  87                         if (_rep)
  88 kumpf         1.82      {
  89 kumpf         1.59          delete [] (char*)_rep;
  90 kumpf         1.82      }
  91 kumpf         1.54  }
  92                     
  93 kumpf         1.56  CString& CString::operator=(const CString& cstr)
  94                     {
  95 kumpf         1.82      if (&cstr != this)
  96 kumpf         1.81      {
  97 kumpf         1.82          if (_rep)
  98                             {
  99                                 delete [] (char*)_rep;
 100                                 _rep = 0;
 101                             }
 102                             if (cstr._rep)
 103                             {
 104                                 _rep = (char*)new char[strlen((char*)cstr._rep)+1];
 105                                 strcpy((char*)_rep, (char*)cstr._rep);
 106                             }
 107 kumpf         1.81      }
 108 kumpf         1.56      return *this;
 109                     }
 110                     
 111 kumpf         1.54  CString::operator const char*() const
 112                     {
 113 kumpf         1.59      return (char*)_rep;
 114 kumpf         1.54  }
 115                     
 116                     ///////////////////////////////////////////////////////////////////////////////
 117                     //
 118 kumpf         1.39  // String
 119                     //
 120                     ///////////////////////////////////////////////////////////////////////////////
 121                     
 122 kumpf         1.37  const String String::EMPTY = String();
 123 mike          1.27  
 124 kumpf         1.39  inline Uint32 _StrLen(const Char16* str)
 125 mike          1.27  {
 126                         if (!str)
 127 david.dillard 1.105         throw NullPointer();
 128 mike          1.27  
 129                         Uint32 n = 0;
 130                     
 131                         while (*str++)
 132 david.dillard 1.105         n++;
 133 mike          1.27  
 134                         return n;
 135                     }
 136                     
 137 chuck         1.102 //
 138                     // Converts a utf-8 char buffer to utf-16 and appends the utf-16 to the Array.
 139                     // n is the length of the input char *, if stopAtTerm is 0
 140                     // A terminator character is appended to the end.
 141                     // Note that each input char is converted individually, which gives
 142                     // the fastest performance.
 143                     //
 144                     void _convertAndAppend(const char* str, Array<Char16>& c16a, Uint32 n, Uint8 stopAtTerm)
 145                     {
 146                         Uint32 i = 0;
 147                         while ((stopAtTerm && *str) || (!stopAtTerm && i < n))
 148                         {
 149                             if (*(Uint8*)str <= 0x7f)
 150 david.dillard 1.105         {
 151 chuck         1.102             // Current byte sequence is in the us-ascii range.
 152                                 c16a.append(Uint8(*str++));
 153                             }
 154                             else
 155                             {
 156                                 //
 157                                 // Current byte sequence is not in the us-ascii range.
 158                                 //
 159                     
 160                                 // Check if the byte sequence is valid utf-8, and if so,
 161                                 // call the converter to utf-16
 162                                 Uint16 tgt[3];
 163                                 tgt[1] = 0;
 164                                 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*str);
 165                                 if ( (!stopAtTerm && i + c >= n) ||
 166                                      (!isValid_U8((const Uint8 *)str, c+1)) )
 167                                 {
 168                                     // Note about error conditions.
 169 david.dillard 1.105                 // It is possible that the last utf-8 char before the
 170 chuck         1.102                 // end of input string extends past the end of the input string.
 171                                     // This is caught in both cases -
 172                                     // If counting up to n, then the test above catches it.
 173                                     // If converting until terminator found, then a terminator
 174                                     // in the middle of a multi-byte utf-8 char is invalid.
 175                                     MessageLoaderParms parms("Common.String.BAD_UTF8",
 176                                       "The byte sequence starting at index $0 is not valid UTF-8 encoding.",
 177                                       i);
 178                                     throw Exception(parms);
 179                                 }
 180                                 else
 181                                 {
 182                                     //  str is incremented by this call to the start of the next char
 183                                     Uint16 * tgtBuf = tgt;
 184 david.dillard 1.105                 UTF8toUTF16((const Uint8 **)&str, (Uint8 *)&str[c+1], &tgtBuf,  &tgtBuf[2]);
 185 chuck         1.102                 c16a.append(tgt[0]);
 186                                     if (tgt[1])
 187 david.dillard 1.105                 {
 188 chuck         1.102                     // Its a utf-16 surrogate pair (uses 2 Char16's)
 189                                         c16a.append(tgt[1]);
 190                                     }
 191 david.dillard 1.105 
 192 chuck         1.102                 // bump by the trailing byte count
 193                                     i += c;
 194                                 }
 195                             }
 196                     
 197                             i++;
 198                         }  // end while
 199                     
 200                         c16a.append('\0');
 201                     }
 202                     
 203 kumpf         1.43  class StringRep
 204                     {
 205                     public:
 206                         StringRep()
 207                         {}
 208                         StringRep(const StringRep& r)
 209                             : c16a(r.c16a)
 210                         {}
 211                         StringRep(const Char16* str)
 212                             : c16a(str, _StrLen(str) + 1)
 213                         {}
 214                     
 215                         Array<Char16> c16a;
 216                     };
 217                     
 218 mike          1.27  String::String()
 219                     {
 220 kumpf         1.43      _rep = new StringRep;
 221                         _rep->c16a.append('\0');
 222 mike          1.27  }
 223                     
 224 kumpf         1.39  String::String(const String& str)
 225 mike          1.27  {
 226 tony          1.66    if (str._rep != NULL)
 227                       {
 228 kumpf         1.43      _rep = new StringRep(*str._rep);
 229 tony          1.66    }
 230                       else
 231                       {
 232                         _rep = new StringRep();
 233                       }
 234 kumpf         1.39  }
 235 tony          1.66  
 236 mike          1.27  
 237 kumpf         1.39  String::String(const String& str, Uint32 n)
 238                     {
 239 kumpf         1.43      _rep = new StringRep;
 240 kumpf         1.55      assign(str.getChar16Data(), n);
 241 kumpf         1.39  }
 242                     
 243                     String::String(const Char16* str)
 244                     {
 245 david.dillard 1.105     if ( str == 0 )
 246                         {
 247                             throw NullPointer();
 248                         }
 249                     
 250 kumpf         1.43      _rep = new StringRep(str);
 251 mike          1.27  }
 252                     
 253 kumpf         1.39  String::String(const Char16* str, Uint32 n)
 254                     {
 255 david.dillard 1.105     if ( str == 0 )
 256                         {
 257                             throw NullPointer();
 258                         }
 259                     
 260 kumpf         1.43      _rep = new StringRep;
 261 kumpf         1.39      assign(str, n);
 262                     }
 263                     
 264                     String::String(const char* str)
 265 mike          1.27  {
 266 david.dillard 1.105     if ( str == 0 )
 267                         {
 268                             throw NullPointer();
 269                         }
 270                     
 271 kumpf         1.43      _rep = new StringRep;
 272 chuck         1.103     AutoPtr<StringRep> tempRep(_rep);
 273                         // An exception can be thrown, so use a temp AutoPtr.
 274 chuck         1.102     _convertAndAppend(str, _rep->c16a, 0, 1);
 275 chuck         1.103     tempRep.release();
 276 mike          1.27  }
 277                     
 278 kumpf         1.39  String::String(const char* str, Uint32 n)
 279 mike          1.27  {
 280 david.dillard 1.105     if ( str == 0 )
 281                         {
 282                             throw NullPointer();
 283                         }
 284                     
 285 kumpf         1.43      _rep = new StringRep;
 286 chuck         1.103     AutoPtr<StringRep> tempRep(_rep);
 287                         // An exception can be thrown, so use a temp AutoPtr.
 288 chuck         1.102     _convertAndAppend(str, _rep->c16a, n, 0);
 289 chuck         1.103     tempRep.release();
 290 kumpf         1.39  }
 291 mike          1.27  
 292 kumpf         1.39  String::~String()
 293                     {
 294 kumpf         1.43      delete _rep;
 295 mike          1.27  }
 296                     
 297 kumpf         1.39  String& String::operator=(const String& str)
 298 mike          1.27  {
 299 kumpf         1.82      if (&str != this)
 300                         {
 301                             assign(str);
 302                         }
 303                         return *this;
 304 mike          1.27  }
 305                     
 306 kumpf         1.39  String& String::assign(const String& str)
 307 mike          1.27  {
 308 kumpf         1.43      _rep->c16a = str._rep->c16a;
 309 kumpf         1.39      return *this;
 310 mike          1.27  }
 311                     
 312 kumpf         1.39  String& String::assign(const Char16* str)
 313 mike          1.27  {
 314 david.dillard 1.105     if ( str == 0 )
 315                         {
 316                             throw NullPointer();
 317                         }
 318                     
 319 kumpf         1.43      _rep->c16a.clear();
 320                         _rep->c16a.append(str, _StrLen(str) + 1);
 321 mike          1.27      return *this;
 322                     }
 323                     
 324                     String& String::assign(const Char16* str, Uint32 n)
 325                     {
 326 david.dillard 1.105     if ( str == 0 )
 327                         {
 328                             throw NullPointer();
 329                         }
 330                     
 331 kumpf         1.43      _rep->c16a.clear();
 332 joyce.j       1.101     _rep->c16a.append(str, n);
 333 kumpf         1.43      _rep->c16a.append('\0');
 334 mike          1.27      return *this;
 335                     }
 336                     
 337 chuck         1.102 String& String::assign(const char* str)
 338                     {
 339 david.dillard 1.105     if ( str == 0 )
 340                         {
 341                             throw NullPointer();
 342                         }
 343                     
 344 chuck         1.102     _rep->c16a.clear();
 345                         _convertAndAppend(str, _rep->c16a, 0, 1);
 346                         return *this;
 347                     }
 348                     
 349 kumpf         1.39  String& String::assign(const char* str, Uint32 n)
 350 mike          1.27  {
 351 david.dillard 1.105     if ( str == 0 )
 352                         {
 353                             throw NullPointer();
 354                         }
 355                     
 356 chuck         1.102     _rep->c16a.clear();
 357                         _convertAndAppend(str, _rep->c16a, n, 0);
 358 mike          1.27      return *this;
 359                     }
 360                     
 361 kumpf         1.39  void String::clear()
 362                     {
 363 kumpf         1.43      _rep->c16a.clear();
 364                         _rep->c16a.append('\0');
 365 kumpf         1.39  }
 366                     
 367 kumpf         1.43  void String::reserveCapacity(Uint32 capacity)
 368 kumpf         1.39  {
 369 kumpf         1.45      _rep->c16a.reserveCapacity(capacity + 1);
 370 kumpf         1.39  }
 371                     
 372                     Uint32 String::size() const
 373                     {
 374 gs.keenan     1.110 #if defined (PEGASUS_OS_VMS)
 375 gs.keenan     1.106   //
 376                       // This prevents returning a minus number.
 377                       //
 378                       // Seems as though the first time through
 379                       //  the XML parser something doesn't get
 380                       //  initialized and there is no check for
 381                       //  a negative number in the parser!
 382                       //
 383 gs.keenan     1.110   Uint32 foo;
 384                       foo = _rep->c16a.size();
 385                       if (foo == 0)
 386                       {
 387                         return 0;
 388                       }
 389                       else
 390                       {
 391                         return (foo -1);
 392                       }
 393                     #else
 394 kumpf         1.43      return _rep->c16a.size() - 1;
 395 gs.keenan     1.110 #endif
 396 kumpf         1.39  }
 397                     
 398 kumpf         1.55  const Char16* String::getChar16Data() const
 399 kumpf         1.39  {
 400 kumpf         1.43      return _rep->c16a.getData();
 401 kumpf         1.39  }
 402                     
 403 kumpf         1.53  Char16& String::operator[](Uint32 index)
 404 mike          1.27  {
 405 kumpf         1.53      if (index > size())
 406 david.dillard 1.105         throw IndexOutOfBoundsException();
 407 mike          1.27  
 408 kumpf         1.53      return _rep->c16a[index];
 409 mike          1.27  }
 410                     
 411 kumpf         1.53  const Char16 String::operator[](Uint32 index) const
 412 mike          1.27  {
 413 kumpf         1.53      if (index > size())
 414 david.dillard 1.105         throw IndexOutOfBoundsException();
 415 mike          1.27  
 416 kumpf         1.53      return _rep->c16a[index];
 417 mike          1.27  }
 418                     
 419 kumpf         1.39  String& String::append(const Char16& c)
 420                     {
 421 kumpf         1.43      _rep->c16a.insert(_rep->c16a.size() - 1, c);
 422 kumpf         1.39      return *this;
 423                     }
 424                     
 425 mike          1.27  String& String::append(const Char16* str, Uint32 n)
 426                     {
 427 david.dillard 1.105      if (str == 0)
 428 joyce.j       1.101      {
 429                              throw NullPointer();
 430                          }
 431 david.dillard 1.105 
 432 joyce.j       1.101     _rep->c16a.reserveCapacity(_rep->c16a.size() + n);
 433 kumpf         1.43      _rep->c16a.remove(_rep->c16a.size() - 1);
 434 joyce.j       1.101     _rep->c16a.append(str, n);
 435 kumpf         1.43      _rep->c16a.append('\0');
 436 mike          1.27      return *this;
 437                     }
 438                     
 439 kumpf         1.39  String& String::append(const String& str)
 440 mike          1.27  {
 441 kumpf         1.55      return append(str.getChar16Data(), str.size());
 442 mike          1.27  }
 443                     
 444 kumpf         1.53  void String::remove(Uint32 index, Uint32 size)
 445 mike          1.27  {
 446 kumpf         1.39      if (size == PEG_NOT_FOUND)
 447 david.dillard 1.105         size = this->size() - index;
 448 mike          1.27  
 449 kumpf         1.53      if (index + size > this->size())
 450 david.dillard 1.105         throw IndexOutOfBoundsException();
 451 mike          1.27  
 452 kumpf         1.39      if (size)
 453 david.dillard 1.105         _rep->c16a.remove(index, size);
 454 mike          1.27  }
 455                     
 456 kumpf         1.53  String String::subString(Uint32 index, Uint32 length) const
 457 mike          1.27  {
 458 kumpf         1.53      if (index < size())
 459 mike          1.27      {
 460 david.dillard 1.105         if ((length == PEG_NOT_FOUND) || (length > size() - index))
 461                                 length = size() - index;
 462 mike          1.27  
 463 david.dillard 1.105         return String(getChar16Data() + index, length);
 464 mike          1.27      }
 465 david.dillard 1.105 
 466                         return String();
 467 mike          1.27  }
 468                     
 469                     Uint32 String::find(Char16 c) const
 470                     {
 471 kumpf         1.55      const Char16* first = getChar16Data();
 472 mike          1.27  
 473                         for (const Char16* p = first; *p; p++)
 474                         {
 475 david.dillard 1.105         if (*p == c)
 476                                 return  p - first;
 477 mike          1.27      }
 478                     
 479                         return PEG_NOT_FOUND;
 480                     }
 481                     
 482 kumpf         1.53  Uint32 String::find(Uint32 index, Char16 c) const
 483 mike          1.30  {
 484 kumpf         1.55      const Char16* data = getChar16Data();
 485 mike          1.30  
 486 kumpf         1.53      for (Uint32 i = index, n = size(); i < n; i++)
 487 mike          1.30      {
 488 david.dillard 1.105         if (data[i] == c)
 489                                 return i;
 490 mike          1.30      }
 491                     
 492                         return PEG_NOT_FOUND;
 493                     }
 494                     
 495 mike          1.27  Uint32 String::find(const String& s) const
 496                     {
 497 kumpf         1.55      const Char16* pSubStr = s.getChar16Data();
 498                         const Char16* pStr = getChar16Data();
 499 mike          1.27      Uint32 subStrLen = s.size();
 500                         Uint32 strLen = size();
 501                     
 502 mike          1.30      if (subStrLen > strLen)
 503                         {
 504                             return PEG_NOT_FOUND;
 505                         }
 506                     
 507 mike          1.27      // loop to find first char match
 508                         Uint32 loc = 0;
 509                         for( ; loc <= (strLen-subStrLen); loc++)
 510                         {
 511 david.dillard 1.105         if (*pStr++ == *pSubStr)  // match first char
 512                             {
 513                                 // point to substr 2nd char
 514                                 const Char16* p = pSubStr + 1;
 515                     
 516                                 // Test remaining chars for equal
 517                                 Uint32 i = 1;
 518                                 for (; i < subStrLen; i++)
 519                                     if (*pStr++ != *p++ )
 520                                         {pStr-=i; break;} // break from loop
 521                                 if (i == subStrLen)
 522                                     return loc;
 523                             }
 524 mike          1.27      }
 525                         return PEG_NOT_FOUND;
 526                     }
 527                     
 528                     Uint32 String::reverseFind(Char16 c) const
 529                     {
 530 kumpf         1.55      const Char16* first = getChar16Data();
 531                         const Char16* last = getChar16Data() + size();
 532 mike          1.27  
 533                         while (last != first)
 534                         {
 535 david.dillard 1.105         if (*--last == c)
 536                                 return last - first;
 537 mike          1.27      }
 538                     
 539                         return PEG_NOT_FOUND;
 540                     }
 541                     
 542                     void String::toLower()
 543                     {
 544 david         1.69  #ifdef PEGASUS_HAS_ICU
 545 yi.zhou       1.108     if (InitializeICU::initICUSuccessful())
 546 david         1.90      {
 547 yi.zhou       1.108         // This will do a locale-insensitive, but context-sensitive convert.
 548                             // Context-sensitive prevents any optimizations that try to
 549                             // convert just the ascii before calling ICU.
 550                             // The string may shrink or expand after the convert.
 551                     
 552                             int32_t sz = size();
 553                             UChar* destbuf = new UChar[sz + 1];
 554                             const UChar* srcbuf = (const UChar *)getChar16Data();
 555                             UErrorCode err = U_ZERO_ERROR;
 556                     
 557                             int32_t needed = u_strToLower(destbuf, sz + 1 , srcbuf, sz, NULL, &err);
 558                             if (err == U_BUFFER_OVERFLOW_ERROR)
 559                             {
 560                               delete [] destbuf;
 561                               destbuf = new UChar[needed + 1];
 562                               err = U_ZERO_ERROR;
 563                               u_strToLower(destbuf, needed + 1 , srcbuf, sz, NULL, &err);
 564                             }
 565                             if (U_FAILURE(err))
 566                             {
 567                                 delete [] destbuf;
 568 yi.zhou       1.108             throw Exception(u_errorName(err));
 569                             }
 570 chuck         1.99  
 571 yi.zhou       1.108         if (needed == sz)
 572                             {
 573                                 Char16* from = (Char16*)destbuf;
 574                                 for (Char16* to = &_rep->c16a[0]; *to; to++, from++)
 575                                 {
 576                                   *to = *from;
 577                                 }
 578                             }
 579                             else
 580 chuck         1.99          {
 581 yi.zhou       1.108             assign((Char16 *)destbuf, needed);
 582 chuck         1.99          }
 583 yi.zhou       1.108 
 584                             delete [] destbuf;
 585 david         1.90      }
 586                         else
 587 yi.zhou       1.108 #endif
 588 david         1.90      {
 589 yi.zhou       1.108         for (Char16* p = &_rep->c16a[0]; *p; p++)
 590                             {
 591                                 if (*p <= PEGASUS_MAX_PRINTABLE_CHAR)
 592 david.dillard 1.105                 *p = tolower(*p);
 593 yi.zhou       1.108         }
 594 mike          1.27      }
 595 kumpf         1.39  }
 596                     
 597 chuck         1.99  void String::toUpper()
 598 david         1.90  {
 599                     #ifdef PEGASUS_HAS_ICU
 600 yi.zhou       1.108     if (InitializeICU::initICUSuccessful())
 601 chuck         1.99      {
 602 yi.zhou       1.108         // This will do a locale-insensitive, but context-sensitive convert.
 603                             // Context-sensitive prevents any optimizations that try to
 604                             // convert just the ascii before calling ICU.
 605                             // The string may shrink or expand after the convert.
 606                     
 607                             int32_t sz = size();
 608                             UChar* destbuf = new UChar[sz + 1];
 609                             const UChar* srcbuf = (const UChar *)getChar16Data();
 610                             UErrorCode err = U_ZERO_ERROR;
 611                     
 612                             int32_t needed = u_strToUpper(destbuf, sz + 1 , srcbuf, sz, NULL, &err);
 613                             if (err == U_BUFFER_OVERFLOW_ERROR)
 614                             {
 615                               delete [] destbuf;
 616                               destbuf = new UChar[needed + 1];
 617                               err = U_ZERO_ERROR;
 618                               u_strToUpper(destbuf, needed + 1 , srcbuf, sz, NULL, &err);
 619                             }
 620                             if (U_FAILURE(err))
 621                             {
 622                                 delete [] destbuf;
 623 yi.zhou       1.108             throw Exception(u_errorName(err));
 624                             }
 625 chuck         1.99  
 626 yi.zhou       1.108         if (needed == sz)
 627                             {
 628                                 Char16* from = (Char16*)destbuf;
 629                                 for (Char16* to = &_rep->c16a[0]; *to; to++, from++)
 630                                 {
 631                                   *to = *from;
 632                                 }
 633                             }
 634                             else
 635 chuck         1.99          {
 636 yi.zhou       1.108             assign((Char16 *)destbuf, needed);
 637 chuck         1.99          }
 638 yi.zhou       1.108 
 639                             delete [] destbuf;
 640 david         1.90      }
 641                         else
 642 yi.zhou       1.108 #endif
 643 david         1.90      {
 644 yi.zhou       1.108         for (Char16* p = &_rep->c16a[0]; *p; p++)
 645                             {
 646 david.dillard 1.105             if (*p <= PEGASUS_MAX_PRINTABLE_CHAR)
 647                                     *p = toupper(*p);
 648 yi.zhou       1.108         }
 649 david         1.91      }
 650 david         1.90  }
 651                     
 652 kumpf         1.43  int String::compare(const String& s1, const String& s2, Uint32 n)
 653 kumpf         1.39  {
 654 kumpf         1.55      const Char16* s1c16 = s1.getChar16Data();
 655                         const Char16* s2c16 = s2.getChar16Data();
 656 kumpf         1.39  
 657                         while (n--)
 658 mike          1.27      {
 659 david.dillard 1.105         int r = *s1c16++ - *s2c16++;
 660 mike          1.27  
 661 david.dillard 1.105         if (r)
 662                                 return r;
 663 mike          1.27      }
 664                     
 665                         return 0;
 666                     }
 667                     
 668 kumpf         1.43  int String::compare(const String& s1, const String& s2)
 669 mike          1.30  {
 670 kumpf         1.55      const Char16* s1c16 = s1.getChar16Data();
 671                         const Char16* s2c16 = s2.getChar16Data();
 672 kumpf         1.43  
 673                         while (*s1c16 && *s2c16)
 674 mike          1.30      {
 675 david.dillard 1.105         int r = *s1c16++ - *s2c16++;
 676 mike          1.30  
 677 david.dillard 1.105         if (r)
 678                                 return r;
 679 mike          1.30      }
 680                     
 681 kumpf         1.43      if (*s2c16)
 682 david.dillard 1.105         return -1;
 683 kumpf         1.43      else if (*s1c16)
 684 david.dillard 1.105         return 1;
 685 mike          1.30  
 686                         return 0;
 687                     }
 688                     
 689 kumpf         1.40  int String::compareNoCase(const String& s1, const String& s2)
 690                     {
 691 david         1.69  #ifdef PEGASUS_HAS_ICU
 692 yi.zhou       1.108     if (InitializeICU::initICUSuccessful())
 693                         {
 694                             return  u_strcasecmp((const UChar*)s1.getChar16Data(),
 695                                                  (const UChar*)s2.getChar16Data(),
 696                                                  U_FOLD_CASE_DEFAULT);
 697                         }
 698                     #endif
 699 kumpf         1.55      const Char16* _s1 = s1.getChar16Data();
 700                         const Char16* _s2 = s2.getChar16Data();
 701 kumpf         1.40  
 702                         while (*_s1 && *_s2)
 703                         {
 704                             int r;
 705                     
 706 kumpf         1.46          if (*_s1 <= PEGASUS_MAX_PRINTABLE_CHAR &&
 707                                 *_s2 <= PEGASUS_MAX_PRINTABLE_CHAR)
 708 kumpf         1.40          {
 709                                 r = tolower(*_s1++) - tolower(*_s2++);
 710                             }
 711                             else
 712                             {
 713                                 r = *_s1++ - *_s2++;
 714                             }
 715                     
 716 david.dillard 1.105         if (r)
 717                                 return r;
 718 kumpf         1.40      }
 719                     
 720                         if (*_s2)
 721 david.dillard 1.105         return -1;
 722 kumpf         1.40      else if (*_s1)
 723 david.dillard 1.105         return 1;
 724 kumpf         1.40  
 725                         return 0;
 726                     }
 727                     
 728 kumpf         1.39  Boolean String::equal(const String& str1, const String& str2)
 729 mike          1.27  {
 730 kumpf         1.43      return String::compare(str1, str2) == 0;
 731 mike          1.27  }
 732                     
 733 kumpf         1.39  Boolean String::equalNoCase(const String& str1, const String& str2)
 734 mike          1.27  {
 735 david         1.69  #ifdef PEGASUS_HAS_ICU
 736 chuck         1.99      return  compareNoCase(str1, str2) == 0;
 737 david         1.69  #else
 738 kumpf         1.39      if (str1.size() != str2.size())
 739 david.dillard 1.105         return false;
 740 kumpf         1.39  
 741 kumpf         1.55      const Char16* p = str1.getChar16Data();
 742                         const Char16* q = str2.getChar16Data();
 743 kumpf         1.39  
 744                         Uint32 n = str1.size();
 745 mike          1.27  
 746 kumpf         1.39      while (n--)
 747                         {
 748 david.dillard 1.105         if (*p <= PEGASUS_MAX_PRINTABLE_CHAR &&
 749 kumpf         1.46              *q <= PEGASUS_MAX_PRINTABLE_CHAR)
 750 david.dillard 1.105         {
 751                                 if (tolower(*p++) != tolower(*q++))
 752                                     return false;
 753                             }
 754                             else if (*p++ != *q++)
 755                                 return false;
 756 kumpf         1.39      }
 757 mike          1.28  
 758 kumpf         1.39      return true;
 759 david         1.69  #endif
 760                     }
 761                     
 762 mike          1.27  
 763 david         1.90  CString String::getCString() const
 764 david         1.69  {
 765 david         1.79      Uint32 n = 3*size() + 1;
 766 david         1.69      char* str = new char[n];
 767                     
 768                         const Char16* msg16 = getChar16Data();
 769                     
 770                         const Uint16 *strsrc = (Uint16 *)msg16;
 771 david         1.71      Uint16 *endsrc = (Uint16 *)&msg16[size()+1];
 772 david         1.69  
 773                         Uint8 *strtgt = (Uint8 *)str;
 774                         Uint8 *endtgt = (Uint8 *)&str[n];
 775                     
 776                         UTF16toUTF8 (&strsrc,
 777 david.dillard 1.105                  endsrc,
 778                                      &strtgt,
 779                                      endtgt);
 780                     
 781                             char* str1 = new char[strlen(str)+1];
 782                             strcpy(str1,str);
 783                             delete [] str;
 784 david         1.71  
 785                         return CString(str1);
 786 david         1.69  }
 787 kumpf         1.42  
 788 kumpf         1.65  #if 0
 789 kumpf         1.42  // ATTN-RK-P3-20020603: This code is not completely correct
 790 karl          1.36   // Wildcard String matching function that may be useful in the future
 791                     // The following code was provided by Bob Blair.
 792                     
 793                     /* _StringMatch Match input MatchString against a GLOB style pattern
 794                            Note that MatchChar is the char type so that this source
 795                            in portable to different string types. This is an internal function
 796 david.dillard 1.105 
 797 karl          1.36    Results: The return value is 1 if string matches pattern, and
 798 david.dillard 1.105         0 otherwise.  The matching operation permits the following
 799                             special characters in the pattern: *?\[] (see the manual
 800                             entry for details on what these mean).
 801                     
 802 chuck         1.78  
 803 karl          1.36    Side effects: None.
 804                      */
 805 david.dillard 1.105 
 806 karl          1.36  /* MatchChar defined as a separate entity because this function source used
 807 david.dillard 1.105     elsewhere was an unsigned char *. Here we use Uint16 to  maintain 16 bit
 808 karl          1.36      size.
 809                     */
 810                     typedef Uint16 MatchChar;
 811                     
 812                     inline Uint16 _ToLower(Uint16 ch)
 813                     {
 814 david         1.69      // ICU_TODO:  If ICU is available we should do this the correct way.
 815 kumpf         1.46      return ch <= PEGASUS_MAX_PRINTABLE_CHAR ? tolower(char(ch)) : ch;
 816 karl          1.36  }
 817                     
 818                     inline Boolean _Equal(MatchChar ch1, MatchChar ch2, int nocase)
 819                     {
 820 david         1.69      // ICU_TODO:  If ICU is available we should do this the correct way.
 821 karl          1.36      if (nocase)
 822 david.dillard 1.105         return _ToLower(ch1) == _ToLower(ch2);
 823                     
 824                         return ch1 == ch2;
 825 karl          1.36  }
 826 mike          1.28  
 827 kumpf         1.35  
 828 karl          1.36  static const MatchChar *
 829                     _matchrange(const MatchChar *range, MatchChar c, int nocase)
 830                     {
 831                       const MatchChar *p = range;
 832                       const MatchChar *rstart = range + 1;
 833                       const MatchChar *rend = 0;
 834                       MatchChar compchar;
 835                     
 836 kumpf         1.35    for (rend = rstart; *rend && *rend != ']'; rend++);
 837 karl          1.36    if (*rend == ']') {  // if there is an end to this pattern
 838 kumpf         1.35      for (compchar = *rstart; rstart != rend; rstart++) {
 839 karl          1.36        if (_Equal(*rstart, c, nocase))
 840 kumpf         1.35          return ++rend;
 841                           if (*rstart == '-') {
 842                             rstart++;
 843                             if (c >= compchar && c <= *rstart)
 844                               return ++rend;
 845                           }
 846                         }
 847                       }
 848 karl          1.36    return (const MatchChar *)0;
 849 kumpf         1.35  }
 850                     
 851                     static int
 852 david.dillard 1.105 _StringMatch(
 853                         const MatchChar *testString,
 854 karl          1.36      const MatchChar *pattern,
 855 david.dillard 1.105     int nocase )                /* Ignore case if this is true */
 856 karl          1.36  {
 857                       const MatchChar *pat = pattern;
 858                       const MatchChar *str = testString;
 859 kumpf         1.35    unsigned int done = 0;
 860                       unsigned int res = 0;  // the result: 1 == match
 861                     
 862                       while (!done) { // main loop walks through pattern and test string
 863                         //cerr << "Comparing <" << *pat << "> and <" << *str << ">" << endl;
 864                         if (!*pat) {                                         //end of pattern
 865                           done = 1;                                          // we're done
 866                           if (!*str)                                         //end of test, too?
 867                             res = 1;                                         // then we matched
 868                         } else {                                             //Not end of pattern
 869                           if (!*str) {                                       // but end of test
 870                             done = 1;                                        // We're done
 871                             if (*pat == '*')                                 // If pattern openends
 872                               res = 1;                                       //  then we matched
 873                           } else {                                           //Not end of test
 874                             if (*pat == '*') {                               //Ambiguuity found
 875                               if (!*++pat) {                                 //and it ends pattern
 876                                 done = 1;                                    //  then we're done
 877                                 res = 1;                                     //  and match
 878                               } else {                                       //if it doesn't end
 879                                 while (!done) {                              //  until we're done
 880 karl          1.36                if (_StringMatch(str, pat, nocase)) {      //  we recurse
 881 kumpf         1.35                  done = 1;                                //if it recurses true
 882                                     res = 1;                                 //  we done and match
 883                                   } else {                                   //it recurses false
 884                                     if (!*str)                               // see if test is done
 885                                       done = 1;                              //  yes: we done
 886                                     else                                     // not done:
 887                                       str++;                                 //   keep testing
 888                                   } // end test on recursive call
 889                                 } // end looping on recursive calls
 890                               } // end logic when pattern is ambiguous
 891                             } else {                                         //pattern not ambiguus
 892                               if (*pat == '?') {                             //pattern is 'any'
 893                                 pat++, str++;                                //  so move along
 894                               } else if (*pat == '[') {                      //see if it's a range
 895 karl          1.36              pat = _matchrange(pat, *str, nocase);         // and is a match
 896 kumpf         1.35              if (!pat) {                                  //It is not a match
 897                                   done = 1;                                  //  we're done
 898 kumpf         1.42                res = 0;                                   //  no match
 899 kumpf         1.35              } else {                                     //Range matches
 900                                   str++, pat++;                              //  keep going
 901                                 }
 902                               } else {               // only case left is individual characters
 903 karl          1.36              if (!_Equal(*pat++, *str++, nocase))         // if they don't match
 904 kumpf         1.35                done = 1;                                  //   bail.
 905                               }
 906                             }  // end ("pattern is not ambiguous (*)" logic
 907                           } // end logic when pattern and string still have data
 908                         } // end logic when pattern still has data
 909                       } // end main loop
 910                       return res;
 911                     }
 912                     
 913 kumpf         1.39  
 914 kumpf         1.65      /** match matches a string against a GLOB style pattern.
 915                             Return trues if the String parameter matches the pattern. C-Shell style
 916 david.dillard 1.105         glob matching is used.
 917 kumpf         1.65          @param str String to be matched against the pattern
 918                             @param pattern Pattern to use in the match
 919                             @return Boolean true if str matches pattern
 920                             The pattern definition is as follows:
 921                             <pre>
 922                             *             Matches any number of any characters
 923                             ?             Match exactly one character
 924                             [chars]       Match any character in chars
 925                             [chara-charb] Match any character in the range between chara and charb
 926                             </pre>
 927                             The literal characters *, ?, [, ] can be included in a string by
 928                             escaping them with backslash "\".  Ranges of characters can be concatenated.
 929                             <pre>
 930                             examples:
 931                             Boolean result = String::match("This is a test", "*is*");
 932                             Boolean works =  String::match("abcdef123", "*[0-9]");
 933                             </pre>
 934                         */
 935 karl          1.36  Boolean String::match(const String& str, const String& pattern)
 936                     {
 937                         return _StringMatch(
 938 david.dillard 1.105         (Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 0) != 0;
 939 karl          1.36  }
 940                     
 941 kumpf         1.65      /** matchNoCase Matches a String against a GLOB style pattern independent
 942 david.dillard 1.105         of case.
 943 kumpf         1.65          Returns true if the str parameter matches the pattern. C-Shell style
 944 david.dillard 1.104         glob matching is used. Ignore case in all comparisons. Case is
 945 kumpf         1.65          ignored in the match.
 946                             @parm str String containing the string to be matched\
 947                             @parm pattern GLOB style patterh to use in the match.
 948                             @return Boolean true if str matches patterh
 949 david.dillard 1.104         @see match
 950 kumpf         1.65      */
 951 karl          1.36  Boolean String::matchNoCase(const String& str, const String& pattern)
 952                     {
 953                         return _StringMatch(
 954 david.dillard 1.105         (Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 1) != 0;
 955 kumpf         1.39  }
 956 kumpf         1.65  #endif
 957 kumpf         1.39  
 958                     
 959                     ///////////////////////////////////////////////////////////////////////////////
 960                     //
 961                     // String-related functions
 962                     //
 963                     ///////////////////////////////////////////////////////////////////////////////
 964                     
 965                     Boolean operator==(const String& str1, const String& str2)
 966                     {
 967                         return String::equal(str1, str2);
 968                     }
 969                     
 970                     Boolean operator==(const String& str1, const char* str2)
 971                     {
 972                         return String::equal(str1, str2);
 973                     }
 974                     
 975                     Boolean operator==(const char* str1, const String& str2)
 976                     {
 977                         return String::equal(str1, str2);
 978 kumpf         1.39  }
 979                     
 980                     Boolean operator!=(const String& str1, const String& str2)
 981                     {
 982                         return !String::equal(str1, str2);
 983                     }
 984                     
 985 kumpf         1.47  PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
 986 kumpf         1.39  {
 987 david         1.72  
 988 david         1.69  #if defined(PEGASUS_OS_OS400)
 989 david         1.93      CString cstr = str.getCString();
 990 david         1.69      const char* utf8str = cstr;
 991                     
 992                         os << utf8str;
 993                     
 994 humberto      1.76  #elif defined(PEGASUS_HAS_ICU)
 995 yi.zhou       1.108     if (InitializeICU::initICUSuccessful())
 996                         {
 997 david.dillard 1.105         char *buf = NULL;
 998                             const int size = str.size() * 6;
 999                             UnicodeString UniStr((const UChar *)str.getChar16Data(), (int32_t)str.size());
1000                             Uint32 bufsize = UniStr.extract(0,size,buf);
1001                     
1002                             buf = new char[bufsize+1];
1003                             UniStr.extract(0,bufsize,buf);
1004                             os << buf;
1005                             os.flush();
1006                             delete [] buf;
1007 yi.zhou       1.108     }
1008                         else
1009                     #endif // End of PEGASUS_HAS_ICU #else leg.
1010                         {
1011 david.dillard 1.105         for (Uint32 i = 0, n = str.size(); i < n; i++)
1012                             {
1013                                     Uint16 code = str[i];
1014                     
1015                                     if (code > 0 && code <= PEGASUS_MAX_PRINTABLE_CHAR)
1016                                     {
1017                                      os << char(code);
1018                                     }
1019                                     else
1020                                     {
1021                                     // Print in hex format:
1022                                     char buffer[8];
1023                                     sprintf(buffer, "\\x%04X", code);
1024                                     os << buffer;
1025                                     }
1026                             }
1027 yi.zhou       1.108     }
1028 kumpf         1.39  
1029                         return os;
1030                     }
1031                     
1032                     String operator+(const String& str1, const String& str2)
1033                     {
1034                         return String(str1).append(str2);
1035                     }
1036                     
1037                     Boolean operator<(const String& str1, const String& str2)
1038                     {
1039 kumpf         1.43      return String::compare(str1, str2) < 0;
1040 kumpf         1.39  }
1041                     
1042                     Boolean operator<=(const String& str1, const String& str2)
1043                     {
1044 kumpf         1.43      return String::compare(str1, str2) <= 0;
1045 kumpf         1.39  }
1046                     
1047                     Boolean operator>(const String& str1, const String& str2)
1048                     {
1049 kumpf         1.43      return String::compare(str1, str2) > 0;
1050 kumpf         1.39  }
1051                     
1052                     Boolean operator>=(const String& str1, const String& str2)
1053                     {
1054 kumpf         1.43      return String::compare(str1, str2) >= 0;
1055 kumpf         1.39  }
1056                     
1057 mike          1.27  PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2