(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.98 //%2005////////////////////////////////////////////////////////////////////////
   2 mike  1.27 //
   3 karl  1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.85 // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 karl  1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
  11 mike  1.27 //
  12            // Permission is hereby granted, free of charge, to any person obtaining a copy
  13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
  14            // deal in the Software without restriction, including without limitation the
  15            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  16 mike  1.27 // sell copies of the Software, and to permit persons to whom the Software is
  17            // furnished to do so, subject to the following conditions:
  18 david.dillard 1.105 //
  19 kumpf         1.41  // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  20 mike          1.27  // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  21                     // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  22 kumpf         1.41  // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  23                     // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  24                     // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  25 mike          1.27  // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26                     // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27                     //
  28                     //==============================================================================
  29                     //
  30 mike          1.112 // Author: Mike Brasher (mbrasher@austin.rr.com)
  31 mike          1.27  //
  32 david.dillard 1.116 // Modified By:
  33 mike          1.112 //     Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
  34                     //     Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
  35 david.dillard 1.116 //     David Dillard, Symantec Corp. (david_dillard@symantec.com)
  36 mike          1.112 //     Mike Brasher (mike-brasher@austin.rr.com)
  37 mike          1.27  //
  38                     //%/////////////////////////////////////////////////////////////////////////////
  39                     
  40 jim.wunderlich 1.115 #include <Pegasus/Common/PegasusAssert.h>
  41 mike           1.113 #include <cstring>
  42 kumpf          1.48  #include "InternalException.h"
  43 david          1.69  #include "CommonUTF.h"
  44 mike           1.112 #include "MessageLoader.h"
  45                      #include "StringRep.h"
  46 david          1.69  
  47                      #ifdef PEGASUS_HAS_ICU
  48 chuck          1.99  #include <unicode/ustring.h>
  49                      #include <unicode/uchar.h>
  50 david          1.69  #endif
  51                      
  52 mike           1.112 PEGASUS_NAMESPACE_BEGIN
  53 mike           1.28  
  54 mike           1.112 //==============================================================================
  55                      //
  56                      // Compile-time macros (undefined by default).
  57                      //
  58                      //     PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
  59 david.dillard  1.116 //
  60 mike           1.112 //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
  61                      //
  62                      //==============================================================================
  63 mike           1.27  
  64 mike           1.112 //==============================================================================
  65 kumpf          1.39  //
  66 mike           1.112 // File-scope definitions:
  67 kumpf          1.54  //
  68 mike           1.112 //==============================================================================
  69                      
  70                      // Note: this table is much faster than the system toupper(). Please do not
  71                      // change.
  72 kumpf          1.54  
  73 david.dillard  1.116 const Uint8 _toUpperTable[256] =
  74 kumpf          1.54  {
  75 mike           1.112     0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
  76                          0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
  77                          0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
  78                          0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
  79                          0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
  80                          0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
  81                          0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
  82                          0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
  83                          0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  84                          0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  85                          0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  86                          0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
  87                          0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  88                          0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  89                          0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  90                          0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
  91                          0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
  92                          0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
  93                          0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
  94                          0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
  95                          0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
  96 mike           1.112     0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
  97                          0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
  98                          0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
  99                          0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
 100                          0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
 101                          0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
 102                          0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
 103                          0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 104                          0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 105                          0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 106                          0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 107                      };
 108                      
 109                      // Note: this table is much faster than the system tulower(). Please do not
 110                      // change.
 111                      
 112 david.dillard  1.116 const Uint8 _toLowerTable[256] =
 113 mike           1.112 {
 114                          0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
 115                          0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 116                          0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
 117                          0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
 118                          0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
 119                          0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
 120                          0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
 121                          0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
 122                          0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 123                          0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 124                          0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 125                          0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
 126                          0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 127                          0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 128                          0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 129                          0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
 130                          0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
 131                          0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
 132                          0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
 133                          0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
 134 mike           1.112     0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
 135                          0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
 136                          0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
 137                          0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
 138                          0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
 139                          0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
 140                          0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
 141                          0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
 142                          0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 143                          0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 144                          0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 145                          0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 146                      };
 147                      
 148                      // Converts 16-bit characters to upper case. This routine is faster than the
 149                      // system toupper(). Please do not change.
 150                      inline Uint16 _toUpper(Uint16 x)
 151                      {
 152                          return (x & 0xFF00) ? x : _toUpperTable[x];
 153 kumpf          1.54  }
 154                      
 155 mike           1.112 // Converts 16-bit characters to lower case. This routine is faster than the
 156                      // system toupper(). Please do not change.
 157                      inline Uint16 _toLower(Uint16 x)
 158 kumpf          1.54  {
 159 mike           1.112     return (x & 0xFF00) ? x : _toLowerTable[x];
 160                      }
 161                      
 162                      // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
 163                      static Uint32 _roundUpToPow2(Uint32 x)
 164                      {
 165                      #ifndef PEGASUS_STRING_NO_THROW
 166                      
 167                          if (x > 0x0FFFFFFF)
 168                              throw PEGASUS_STD(bad_alloc)();
 169                      
 170                      #endif
 171                      
 172                          if (x < 8)
 173                              return 8;
 174                      
 175                          x--;
 176                          x |= (x >> 1);
 177                          x |= (x >> 2);
 178                          x |= (x >> 4);
 179                          x |= (x >> 8);
 180 mike           1.112     x |= (x >> 16);
 181                          x++;
 182                      
 183                          return x;
 184                      }
 185                      
 186                      template<class P, class Q>
 187                      static void _copy(P* p, const Q* q, size_t n)
 188                      {
 189                          // The following employs loop unrolling for efficiency. Please do not
 190                          // eliminate.
 191                      
 192                          while (n >= 8)
 193                          {
 194                              p[0] = q[0];
 195                              p[1] = q[1];
 196                              p[2] = q[2];
 197                              p[3] = q[3];
 198                              p[4] = q[4];
 199                              p[5] = q[5];
 200                              p[6] = q[6];
 201 mike           1.112         p[7] = q[7];
 202                              p += 8;
 203                              q += 8;
 204                              n -= 8;
 205                          }
 206                      
 207                          while (n >= 4)
 208                          {
 209                              p[0] = q[0];
 210                              p[1] = q[1];
 211                              p[2] = q[2];
 212                              p[3] = q[3];
 213                              p += 4;
 214                              q += 4;
 215                              n -= 4;
 216                          }
 217                      
 218                          while (n--)
 219                              *p++ = *q++;
 220                      }
 221                      
 222 mike           1.112 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
 223                      {
 224                          // The following employs loop unrolling for efficiency. Please do not
 225                          // eliminate.
 226                      
 227                          while (n >= 4)
 228                          {
 229                              if (s[0] == c)
 230                                  return (Uint16*)s;
 231                              if (s[1] == c)
 232                                  return (Uint16*)&s[1];
 233                              if (s[2] == c)
 234                                  return (Uint16*)&s[2];
 235                              if (s[3] == c)
 236                                  return (Uint16*)&s[3];
 237 kumpf          1.82  
 238 mike           1.112         n -= 4;
 239                              s += 4;
 240                          }
 241                      
 242                          if (n)
 243                          {
 244                              if (*s == c)
 245                                  return (Uint16*)s;
 246                              s++;
 247                              n--;
 248                          }
 249                      
 250                          if (n)
 251                          {
 252                              if (*s == c)
 253                                  return (Uint16*)s;
 254                              s++;
 255                              n--;
 256                          }
 257                      
 258                          if (n && *s == c)
 259 mike           1.112         return (Uint16*)s;
 260                      
 261                          // Not found!
 262                          return 0;
 263                      }
 264                      
 265                      static int _compare(const Uint16* s1, const Uint16* s2)
 266                      {
 267                          while (*s1 && *s2)
 268                          {
 269                              int r = *s1++ - *s2++;
 270                      
 271                              if (r)
 272                                  return r;
 273                          }
 274                      
 275                          if (*s2)
 276                              return -1;
 277                          else if (*s1)
 278                              return 1;
 279                      
 280 mike           1.112     return 0;
 281                      }
 282                      
 283                      static int _compareNoUTF8(const Uint16* s1, const char* s2)
 284                      {
 285                          Uint16 c1;
 286                          Uint16 c2;
 287                      
 288                          do
 289                          {
 290                              c1 = *s1++;
 291                              c2 = *s2++;
 292                      
 293                              if (c1 == 0)
 294                                  return c1 - c2;
 295                          }
 296                          while (c1 == c2);
 297                      
 298                          return c1 - c2;
 299                      }
 300                      
 301 mike           1.112 static int _compare(const Uint16* s1, const Uint16* s2, size_t n)
 302                      {
 303                          // This should only be called when s1 and s2 have the same length.
 304                      
 305                          while (n-- && (*s1++ - *s2++) == 0)
 306                              ;
 307                      
 308                          return s1[-1] - s2[-1];
 309                      }
 310                      
 311                      static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
 312                      {
 313                          memcpy(s1, s2, n * sizeof(Uint16));
 314                      }
 315                      
 316                      void StringThrowOutOfBounds()
 317                      {
 318                          throw IndexOutOfBoundsException();
 319                      }
 320                      
 321                      inline void _checkNullPointer(const void* ptr)
 322 mike           1.112 {
 323                      #ifdef PEGASUS_STRING_NO_THROW
 324                      
 325                          if (!ptr)
 326                              throw NullPointer();
 327                      
 328                      #endif
 329                      }
 330                      
 331                      static void _StringThrowBadUTF8(Uint32 index)
 332                      {
 333                          MessageLoaderParms parms(
 334                              "Common.String.BAD_UTF8",
 335                              "The byte sequence starting at index $0 "
 336                              "is not valid UTF-8 encoding.",
 337                              index);
 338                          throw Exception(parms);
 339                      }
 340                      
 341                      static size_t _copyFromUTF8(
 342 david.dillard  1.116     Uint16* dest,
 343                          const char* src,
 344 mike           1.112     size_t n,
 345                          size_t& utf8_error_index)
 346                      {
 347                          Uint16* p = dest;
 348                          const Uint8* q = (const Uint8*)src;
 349                      
 350                          // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
 351                          // Use loop-unrolling.
 352                      
 353                          while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
 354                          {
 355                              p[0] = q[0];
 356                              p[1] = q[1];
 357                              p[2] = q[2];
 358                              p[3] = q[3];
 359                              p[4] = q[4];
 360                              p[5] = q[5];
 361                              p[6] = q[6];
 362                              p[7] = q[7];
 363                              p += 8;
 364                              q += 8;
 365 mike           1.112         n -= 8;
 366                          }
 367                      
 368                          while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
 369                          {
 370                              p[0] = q[0];
 371                              p[1] = q[1];
 372                              p[2] = q[2];
 373                              p[3] = q[3];
 374                              p += 4;
 375                              q += 4;
 376                              n -= 4;
 377                          }
 378                      
 379                          switch (n)
 380                          {
 381                              case 0:
 382                                  return p - dest;
 383                              case 1:
 384                                  if (q[0] < 128)
 385                                  {
 386 mike           1.112                 p[0] = q[0];
 387                                      return p + 1 - dest;
 388                                  }
 389                                  break;
 390                              case 2:
 391                                  if (((q[0]|q[1]) & 0x80) == 0)
 392                                  {
 393                                      p[0] = q[0];
 394                                      p[1] = q[1];
 395                                      return p + 2 - dest;
 396                                  }
 397                                  break;
 398                              case 3:
 399                                  if (((q[0]|q[1]|q[2]) & 0x80) == 0)
 400                                  {
 401                                      p[0] = q[0];
 402                                      p[1] = q[1];
 403                                      p[2] = q[2];
 404                                      return p + 3 - dest;
 405                                  }
 406                                  break;
 407 mike           1.112     }
 408                      
 409                          // Process remaining characters.
 410                      
 411                          while (n)
 412                          {
 413                              // Optimize for 7-bit ASCII case.
 414                      
 415                              if (*q < 128)
 416                              {
 417                                  *p++ = *q++;
 418                                  n--;
 419                              }
 420                              else
 421                              {
 422                                  Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
 423                      
 424                                  if (c > n || !isValid_U8(q, c) ||
 425                                      UTF8toUTF16(&q, q + c, &p, p + n) != 0)
 426                                  {
 427                                      utf8_error_index = q - (const Uint8*)src;
 428 mike           1.112                 return size_t(-1);
 429                                  }
 430                      
 431                                  n -= c;
 432                              }
 433                          }
 434                      
 435                          return p - dest;
 436                      }
 437                      
 438 david.dillard  1.116 // Note: dest must be at least three times src (plus an extra byte for
 439 mike           1.112 // terminator).
 440                      static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
 441                      {
 442                          // The following employs loop unrolling for efficiency. Please do not
 443                          // eliminate.
 444                      
 445                          const Uint16* q = src;
 446                          Uint8* p = (Uint8*)dest;
 447                      
 448                          while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
 449 kumpf          1.82      {
 450 mike           1.112         p[0] = q[0];
 451                              p[1] = q[1];
 452                              p[2] = q[2];
 453                              p[3] = q[3];
 454                              p += 4;
 455                              q += 4;
 456                              n -= 4;
 457 kumpf          1.82      }
 458 mike           1.112 
 459                          switch (n)
 460                          {
 461                              case 0:
 462                                  return p - (Uint8*)dest;
 463                              case 1:
 464                                  if (q[0] < 128)
 465                                  {
 466                                      p[0] = q[0];
 467                                      return p + 1 - (Uint8*)dest;
 468                                  }
 469                                  break;
 470                              case 2:
 471                                  if (q[0] < 128 && q[1] < 128)
 472                                  {
 473                                      p[0] = q[0];
 474                                      p[1] = q[1];
 475                                      return p + 2 - (Uint8*)dest;
 476                                  }
 477                                  break;
 478                              case 3:
 479 mike           1.112             if (q[0] < 128 && q[1] < 128 && q[2] < 128)
 480                                  {
 481                                      p[0] = q[0];
 482                                      p[1] = q[1];
 483                                      p[2] = q[2];
 484                                      return p + 3 - (Uint8*)dest;
 485                                  }
 486                                  break;
 487                          }
 488                      
 489                          // If this line was reached, there must be characters greater than 128.
 490                      
 491                          UTF16toUTF8(&q, q + n, &p, p + 3 * n);
 492                      
 493                          return p - (Uint8*)dest;
 494 kumpf          1.54  }
 495                      
 496 mike           1.112 static inline size_t _convert(
 497                          Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
 498 kumpf          1.54  {
 499 mike           1.112 #ifdef PEGASUS_STRING_NO_UTF8
 500                          _copy(p, q, n);
 501                          return n;
 502                      #else
 503                          return _copyFromUTF8(p, q, n, utf8_error_index);
 504                      #endif
 505 kumpf          1.54  }
 506                      
 507 mike           1.112 //==============================================================================
 508                      //
 509                      // class CString
 510                      //
 511                      //==============================================================================
 512                      
 513                      CString::CString(const CString& cstr) : _rep(0)
 514 kumpf          1.54  {
 515 mike           1.112     if (cstr._rep)
 516 kumpf          1.82      {
 517 mike           1.112         size_t n = strlen(cstr._rep) + 1;
 518                              _rep = (char*)operator new(n);
 519                              memcpy(_rep, cstr._rep, n);
 520 kumpf          1.82      }
 521 kumpf          1.54  }
 522                      
 523 kumpf          1.56  CString& CString::operator=(const CString& cstr)
 524                      {
 525 kumpf          1.82      if (&cstr != this)
 526 kumpf          1.81      {
 527 kumpf          1.82          if (_rep)
 528                              {
 529 mike           1.112             operator delete(_rep);
 530 kumpf          1.82              _rep = 0;
 531                              }
 532 mike           1.112 
 533 kumpf          1.82          if (cstr._rep)
 534                              {
 535 mike           1.112             size_t n = strlen(cstr._rep) + 1;
 536                                  _rep = (char*)operator new(n);
 537                                  memcpy(_rep, cstr._rep, n);
 538 kumpf          1.82          }
 539 kumpf          1.81      }
 540 mike           1.112 
 541 kumpf          1.56      return *this;
 542                      }
 543                      
 544 mike           1.112 //==============================================================================
 545 kumpf          1.54  //
 546 mike           1.112 // class StringRep
 547 kumpf          1.39  //
 548 mike           1.112 //==============================================================================
 549 kumpf          1.39  
 550 mike           1.112 StringRep StringRep::_emptyRep;
 551 mike           1.27  
 552 mike           1.112 inline StringRep* StringRep::alloc(size_t cap)
 553 mike           1.27  {
 554 mike           1.112 #ifndef PEGASUS_STRING_NO_THROW
 555 mike           1.27  
 556 mike           1.112     // Any string bigger than this is seriously suspect.
 557                          if (cap > 0x0FFFFFFF)
 558                              throw PEGASUS_STD(bad_alloc)();
 559 mike           1.27  
 560 mike           1.112 #endif
 561 mike           1.27  
 562 mike           1.112     StringRep* rep = (StringRep*)::operator new(
 563                              sizeof(StringRep) + cap * sizeof(Uint16));
 564                          rep->cap = cap;
 565                          new(&rep->refs) AtomicInt(1);
 566                      
 567                          return rep;
 568 mike           1.27  }
 569                      
 570 mike           1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
 571 chuck          1.102 {
 572 mike           1.114     if (cap > rep->cap || rep->refs.get() != 1)
 573 chuck          1.102     {
 574 mike           1.112         size_t n = _roundUpToPow2(cap);
 575                              StringRep* newRep = StringRep::alloc(n);
 576                              newRep->size = rep->size;
 577                              _copy(newRep->data, rep->data, rep->size + 1);
 578                              StringRep::unref(rep);
 579                              rep = newRep;
 580                          }
 581                      }
 582 david.dillard  1.105 
 583 mike           1.112 StringRep* StringRep::create(const Uint16* data, size_t size)
 584                      {
 585                          StringRep* rep = StringRep::alloc(size);
 586                          rep->size = size;
 587                          _copy(rep->data, data, size);
 588                          rep->data[size] = '\0';
 589                          return rep;
 590                      }
 591 chuck          1.102 
 592 mike           1.112 StringRep* StringRep::copyOnWrite(StringRep* rep)
 593                      {
 594                          // Return a new copy of rep. Release rep.
 595 chuck          1.102 
 596 mike           1.112     StringRep* newRep = StringRep::alloc(rep->size);
 597                          newRep->size = rep->size;
 598                          _copy(newRep->data, rep->data, rep->size);
 599                          newRep->data[newRep->size] = '\0';
 600                          StringRep::unref(rep);
 601                          return newRep;
 602 chuck          1.102 }
 603                      
 604 mike           1.112 StringRep* StringRep::create(const char* data, size_t size)
 605 kumpf          1.43  {
 606 mike           1.112     StringRep* rep = StringRep::alloc(size);
 607                          size_t utf8_error_index;
 608                          rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
 609                      
 610                      #ifndef PEGASUS_STRING_NO_THROW
 611                          if (rep->size == size_t(-1))
 612                          {
 613                              StringRep::free(rep);
 614                              _StringThrowBadUTF8(utf8_error_index);
 615                          }
 616                      #endif
 617 kumpf          1.43  
 618 mike           1.112     rep->data[rep->size] = '\0';
 619 kumpf          1.43  
 620 mike           1.112     return rep;
 621 mike           1.27  }
 622                      
 623 mike           1.112 Uint32 StringRep::length(const Uint16* str)
 624 mike           1.27  {
 625 mike           1.112     // Note: We could unroll this but it is rarely called.
 626                      
 627                          const Uint16* end = (Uint16*)str;
 628                      
 629                          while (*end++)
 630                              ;
 631                      
 632                          return end - str - 1;
 633 kumpf          1.39  }
 634 tony           1.66  
 635 mike           1.112 //==============================================================================
 636                      //
 637                      // class String
 638                      //
 639                      //==============================================================================
 640                      
 641                      const String String::EMPTY;
 642 mike           1.27  
 643 kumpf          1.39  String::String(const String& str, Uint32 n)
 644                      {
 645 mike           1.112     _checkBounds(n, str._rep->size);
 646                          _rep = StringRep::create(str._rep->data, n);
 647 kumpf          1.39  }
 648                      
 649                      String::String(const Char16* str)
 650                      {
 651 mike           1.112     _checkNullPointer(str);
 652                          _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
 653 mike           1.27  }
 654                      
 655 kumpf          1.39  String::String(const Char16* str, Uint32 n)
 656                      {
 657 mike           1.112     _checkNullPointer(str);
 658                          _rep = StringRep::create((Uint16*)str, n);
 659 kumpf          1.39  }
 660                      
 661                      String::String(const char* str)
 662 mike           1.27  {
 663 mike           1.112     _checkNullPointer(str);
 664 david.dillard  1.105 
 665 mike           1.112     // Set this just in case create() throws an exception.
 666                          _rep = &StringRep::_emptyRep;
 667                          _rep = StringRep::create(str, strlen(str));
 668 mike           1.27  }
 669                      
 670 kumpf          1.39  String::String(const char* str, Uint32 n)
 671 mike           1.27  {
 672 mike           1.112     _checkNullPointer(str);
 673 david.dillard  1.105 
 674 mike           1.112     // Set this just in case create() throws an exception.
 675                          _rep = &StringRep::_emptyRep;
 676                          _rep = StringRep::create(str, n);
 677 kumpf          1.39  }
 678 mike           1.27  
 679 mike           1.112 String::String(const String& s1, const String& s2)
 680 kumpf          1.39  {
 681 mike           1.112     size_t n1 = s1._rep->size;
 682                          size_t n2 = s2._rep->size;
 683                          size_t n = n1 + n2;
 684                          _rep = StringRep::alloc(n);
 685                          _copy(_rep->data, s1._rep->data, n1);
 686                          _copy(_rep->data + n1, s2._rep->data, n2);
 687                          _rep->size = n;
 688                          _rep->data[n] = '\0';
 689 mike           1.27  }
 690                      
 691 mike           1.112 String::String(const String& s1, const char* s2)
 692 mike           1.27  {
 693 mike           1.112     _checkNullPointer(s2);
 694                          size_t n1 = s1._rep->size;
 695                          size_t n2 = strlen(s2);
 696                          _rep = StringRep::alloc(n1 + n2);
 697                          _copy(_rep->data, s1._rep->data, n1);
 698                          size_t utf8_error_index;
 699                          size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
 700                      
 701                      #ifndef PEGASUS_STRING_NO_THROW
 702                          if (tmp == size_t(-1))
 703 kumpf          1.82      {
 704 mike           1.112         StringRep::free(_rep);
 705                              _rep = &StringRep::_emptyRep;
 706                              _StringThrowBadUTF8(utf8_error_index);
 707 kumpf          1.82      }
 708 mike           1.112 #endif
 709                      
 710                          _rep->size = n1 + tmp;
 711                          _rep->data[_rep->size] = '\0';
 712 mike           1.27  }
 713                      
 714 mike           1.112 String::String(const char* s1, const String& s2)
 715 mike           1.27  {
 716 mike           1.112     _checkNullPointer(s1);
 717                          size_t n1 = strlen(s1);
 718                          size_t n2 = s2._rep->size;
 719                          _rep = StringRep::alloc(n1 + n2);
 720                          size_t utf8_error_index;
 721                          size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
 722                      
 723                      #ifndef PEGASUS_STRING_NO_THROW
 724                          if (tmp ==  size_t(-1))
 725                          {
 726                              StringRep::free(_rep);
 727                              _rep = &StringRep::_emptyRep;
 728                              _StringThrowBadUTF8(utf8_error_index);
 729                          }
 730                      #endif
 731                      
 732                          _rep->size = n2 + tmp;
 733                          _copy(_rep->data + n1, s2._rep->data, n2);
 734                          _rep->data[_rep->size] = '\0';
 735 mike           1.27  }
 736                      
 737 mike           1.112 String& String::assign(const String& str)
 738 mike           1.27  {
 739 mike           1.112     if (_rep != str._rep)
 740 david.dillard  1.105     {
 741 mike           1.112         StringRep::unref(_rep);
 742                              StringRep::ref(_rep = str._rep);
 743 david.dillard  1.105     }
 744                      
 745 mike           1.27      return *this;
 746                      }
 747                      
 748                      String& String::assign(const Char16* str, Uint32 n)
 749                      {
 750 mike           1.112     _checkNullPointer(str);
 751                      
 752 mike           1.114     if (n > _rep->cap || _rep->refs.get() != 1)
 753 david.dillard  1.105     {
 754 mike           1.112         StringRep::unref(_rep);
 755                              _rep = StringRep::alloc(n);
 756 david.dillard  1.105     }
 757                      
 758 mike           1.112     _rep->size = n;
 759                          _copy(_rep->data, (Uint16*)str, n);
 760                          _rep->data[n] = '\0';
 761                      
 762 mike           1.27      return *this;
 763                      }
 764                      
 765 mike           1.112 String& String::assign(const char* str, Uint32 n)
 766 chuck          1.102 {
 767 mike           1.112     _checkNullPointer(str);
 768                      
 769 mike           1.114     if (n > _rep->cap || _rep->refs.get() != 1)
 770 david.dillard  1.105     {
 771 mike           1.112         StringRep::unref(_rep);
 772                              _rep = StringRep::alloc(n);
 773 david.dillard  1.105     }
 774                      
 775 mike           1.112     size_t utf8_error_index;
 776                          _rep->size = _convert(_rep->data, str, n, utf8_error_index);
 777 chuck          1.102 
 778 mike           1.112 #ifndef PEGASUS_STRING_NO_THROW
 779                          if (_rep->size ==  size_t(-1))
 780 david.dillard  1.105     {
 781 mike           1.112         StringRep::free(_rep);
 782                              _rep = &StringRep::_emptyRep;
 783                              _StringThrowBadUTF8(utf8_error_index);
 784 david.dillard  1.105     }
 785 mike           1.112 #endif
 786                      
 787                          _rep->data[_rep->size] = 0;
 788 david.dillard  1.105 
 789 mike           1.27      return *this;
 790                      }
 791                      
 792 kumpf          1.39  void String::clear()
 793                      {
 794 mike           1.112     if (_rep->size)
 795                          {
 796 mike           1.114         if (_rep->refs.get() == 1)
 797 mike           1.112         {
 798                                  _rep->size = 0;
 799                                  _rep->data[0] = '\0';
 800                              }
 801                              else
 802                              {
 803                                  StringRep::unref(_rep);
 804                                  _rep = &StringRep::_emptyRep;
 805                              }
 806                          }
 807 kumpf          1.39  }
 808                      
 809 mike           1.112 void String::reserveCapacity(Uint32 cap)
 810 kumpf          1.39  {
 811 mike           1.112     _reserve(_rep, cap);
 812 kumpf          1.39  }
 813                      
 814 mike           1.112 CString String::getCString() const
 815                      {
 816 david.dillard  1.116     // A UTF8 string can have three times as many characters as its UTF16
 817                          // counterpart, so we allocate extra memory for the worst case. In the
 818 mike           1.112     // best case, we may need only one third of the memory allocated. But
 819 david.dillard  1.116     // downsizing the string afterwards is expensive and unecessary since
 820                          // CString objects are usually short-lived (disappearing after only a few
 821 mike           1.112     // instructions). CString objects are typically created on the stack as
 822                          // means to obtain a char* pointer.
 823                      
 824                      #ifdef PEGASUS_STRING_NO_UTF8
 825                          char* str = (char*)operator new(_rep->size + 1);
 826                          _copy(str, _rep->data, _rep->size);
 827                          str[_rep->size] = '\0';
 828                          return CString(str);
 829 gs.keenan      1.110 #else
 830 mike           1.112     Uint32 n = 3 * _rep->size;
 831                          char* str = (char*)operator new(n + 1);
 832                          size_t size = _copyToUTF8(str, _rep->data, _rep->size);
 833                          str[size] = '\0';
 834                          return CString(str);
 835 gs.keenan      1.110 #endif
 836 kumpf          1.39  }
 837                      
 838 mike           1.112 String& String::append(const Char16* str, Uint32 n)
 839 kumpf          1.39  {
 840 mike           1.112     _checkNullPointer(str);
 841                      
 842                          size_t oldSize = _rep->size;
 843                          size_t newSize = oldSize + n;
 844                          _reserve(_rep, newSize);
 845                          _copy(_rep->data + oldSize, (Uint16*)str, n);
 846                          _rep->size = newSize;
 847                          _rep->data[newSize] = '\0';
 848                      
 849                          return *this;
 850 kumpf          1.39  }
 851                      
 852 mike           1.112 String& String::append(const String& str)
 853 mike           1.27  {
 854 mike           1.112     return append((Char16*)str._rep->data, str._rep->size);
 855 mike           1.27  }
 856                      
 857 mike           1.112 String& String::append(const char* str, Uint32 size)
 858 mike           1.27  {
 859 mike           1.112     _checkNullPointer(str);
 860                      
 861                          size_t oldSize = _rep->size;
 862                          size_t cap = oldSize + size;
 863                      
 864                          _reserve(_rep, cap);
 865                          size_t utf8_error_index;
 866                          size_t tmp = _convert(
 867                              (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
 868                      
 869                      #ifndef PEGASUS_STRING_NO_THROW
 870                          if (tmp ==  size_t(-1))
 871                          {
 872                              StringRep::free(_rep);
 873                              _rep = &StringRep::_emptyRep;
 874                              _StringThrowBadUTF8(utf8_error_index);
 875                          }
 876                      #endif
 877 mike           1.27  
 878 mike           1.112     _rep->size += tmp;
 879                          _rep->data[_rep->size] = '\0';
 880 mike           1.27  
 881 kumpf          1.39      return *this;
 882                      }
 883                      
 884 mike           1.112 void String::remove(Uint32 index, Uint32 n)
 885 mike           1.27  {
 886 mike           1.112     if (n == PEG_NOT_FOUND)
 887                              n = _rep->size - index;
 888                      
 889                          _checkBounds(index + n, _rep->size);
 890                      
 891 mike           1.114     if (_rep->refs.get() != 1)
 892 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
 893 mike           1.27  
 894 jim.wunderlich 1.115     PEGASUS_ASSERT(index + n <= _rep->size);
 895 mike           1.27  
 896 mike           1.112     size_t rem = _rep->size - (index + n);
 897                          Uint16* data = _rep->data;
 898 mike           1.27  
 899 mike           1.112     if (rem)
 900                              memmove(data + index, data + index + n, rem * sizeof(Uint16));
 901 mike           1.27  
 902 mike           1.112     _rep->size -= n;
 903                          data[_rep->size] = '\0';
 904 mike           1.27  }
 905                      
 906 mike           1.112 String String::subString(Uint32 index, Uint32 n) const
 907 mike           1.27  {
 908 mike           1.112     // Note: this implementation is very permissive but used for
 909                          // backwards compatibility.
 910                      
 911                          if (index < _rep->size)
 912 mike           1.27      {
 913 mike           1.112         if (n == PEG_NOT_FOUND || n > _rep->size - index)
 914                                  n = _rep->size - index;
 915 mike           1.27  
 916 mike           1.112         return String((Char16*)_rep->data + index, n);
 917 mike           1.27      }
 918 david.dillard  1.105 
 919                          return String();
 920 mike           1.27  }
 921                      
 922                      Uint32 String::find(Char16 c) const
 923                      {
 924 mike           1.112     Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
 925 mike           1.27  
 926 mike           1.112     if (p)
 927 david.dillard  1.116         return static_cast<Uint32>(p - _rep->data);
 928 mike           1.27  
 929                          return PEG_NOT_FOUND;
 930                      }
 931                      
 932 kumpf          1.53  Uint32 String::find(Uint32 index, Char16 c) const
 933 mike           1.30  {
 934 mike           1.112     _checkBounds(index, _rep->size);
 935                      
 936                          if (index >= _rep->size)
 937                              return PEG_NOT_FOUND;
 938                      
 939                          Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
 940 mike           1.30  
 941 mike           1.112     if (p)
 942 david.dillard  1.116         return static_cast<Uint32>(p - _rep->data);
 943 mike           1.30  
 944                          return PEG_NOT_FOUND;
 945                      }
 946                      
 947 mike           1.112 Uint32 StringFindAux(
 948                          const StringRep* _rep, const Char16* s, Uint32 n)
 949 mike           1.27  {
 950 mike           1.112     _checkNullPointer(s);
 951 mike           1.27  
 952 mike           1.112     const Uint16* data = _rep->data;
 953                          size_t rem = _rep->size;
 954                      
 955                          while (n <= rem)
 956 mike           1.30      {
 957 mike           1.112         Uint16* p = (Uint16*)_find(data, rem, s[0]);
 958                      
 959                              if (!p)
 960                                  break;
 961 mike           1.30  
 962 mike           1.112         if (memcmp(p, s, n * sizeof(Uint16)) == 0)
 963 david.dillard  1.116             return static_cast<Uint32>(p - _rep->data);
 964 david.dillard  1.105 
 965 mike           1.112         p++;
 966                              rem -= p - data;
 967                              data = p;
 968 mike           1.27      }
 969 mike           1.112 
 970 mike           1.27      return PEG_NOT_FOUND;
 971                      }
 972                      
 973 mike           1.112 Uint32 String::find(const char* s) const
 974                      {
 975                          _checkNullPointer(s);
 976                      
 977                          // Note: could optimize away creation of temporary, but this is rarely
 978                          // called.
 979                          return find(String(s));
 980                      }
 981                      
 982 mike           1.27  Uint32 String::reverseFind(Char16 c) const
 983                      {
 984 mike           1.112     Uint16 x = c;
 985                          Uint16* p = _rep->data;
 986                          Uint16* q = _rep->data + _rep->size;
 987 mike           1.27  
 988 mike           1.112     while (q != p)
 989 mike           1.27      {
 990 mike           1.112         if (*--q == x)
 991 david.dillard  1.116             return static_cast<Uint32>(q - p);
 992 mike           1.27      }
 993                      
 994                          return PEG_NOT_FOUND;
 995                      }
 996                      
 997                      void String::toLower()
 998                      {
 999 david          1.69  #ifdef PEGASUS_HAS_ICU
1000 mike           1.112 
1001 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
1002 david          1.90      {
1003 mike           1.114         if (_rep->refs.get() != 1)
1004 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
1005                      
1006 yi.zhou        1.108         // This will do a locale-insensitive, but context-sensitive convert.
1007 david.dillard  1.116         // Since context-sensitive casing looks at adjacent chars, this
1008                              // prevents optimizations where the us-ascii is converted before
1009 mike           1.112         // calling ICU.
1010 yi.zhou        1.108         // The string may shrink or expand after the convert.
1011                      
1012 mike           1.112         //// First calculate size of resulting string. u_strToLower() returns
1013                              //// only the size when zero is passed as the destination size argument.
1014                      
1015 yi.zhou        1.108         UErrorCode err = U_ZERO_ERROR;
1016                      
1017 mike           1.112         int32_t newSize = u_strToLower(
1018                                  NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1019 david.dillard  1.116 
1020 mike           1.112         err = U_ZERO_ERROR;
1021                      
1022                              //// Reserve enough space for the result.
1023                      
1024                              if ((Uint32)newSize > _rep->cap)
1025                                  _reserve(_rep, newSize);
1026                      
1027                              //// Perform the conversion (overlapping buffers are allowed).
1028 chuck          1.99  
1029 mike           1.112         u_strToLower((UChar*)_rep->data, newSize,
1030                                  (UChar*)_rep->data, _rep->size, NULL, &err);
1031 yi.zhou        1.108 
1032 mike           1.112         _rep->size = newSize;
1033                              return;
1034 david          1.90      }
1035 mike           1.112 
1036                      #endif /* PEGASUS_HAS_ICU */
1037                      
1038 mike           1.114     if (_rep->refs.get() != 1)
1039 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
1040                      
1041                          Uint16* p = _rep->data;
1042                          size_t n = _rep->size;
1043                      
1044                          for (; n--; p++)
1045 david          1.90      {
1046 mike           1.112         if (!(*p & 0xFF00))
1047                                  *p = _toLower(*p);
1048 mike           1.27      }
1049 kumpf          1.39  }
1050                      
1051 chuck          1.99  void String::toUpper()
1052 david          1.90  {
1053                      #ifdef PEGASUS_HAS_ICU
1054 mike           1.112 
1055 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
1056 chuck          1.99      {
1057 mike           1.114         if (_rep->refs.get() != 1)
1058 mike           1.112             _rep = StringRep::copyOnWrite(_rep);
1059                      
1060 yi.zhou        1.108         // This will do a locale-insensitive, but context-sensitive convert.
1061 david.dillard  1.116         // Since context-sensitive casing looks at adjacent chars, this
1062                              // prevents optimizations where the us-ascii is converted before
1063 mike           1.112         // calling ICU.
1064 yi.zhou        1.108         // The string may shrink or expand after the convert.
1065                      
1066 mike           1.112         //// First calculate size of resulting string. u_strToUpper() returns
1067                              //// only the size when zero is passed as the destination size argument.
1068                      
1069 yi.zhou        1.108         UErrorCode err = U_ZERO_ERROR;
1070                      
1071 mike           1.112         int32_t newSize = u_strToUpper(
1072                                  NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1073                      
1074                              err = U_ZERO_ERROR;
1075                      
1076                              //// Reserve enough space for the result.
1077                      
1078                              if ((Uint32)newSize > _rep->cap)
1079                                  _reserve(_rep, newSize);
1080                      
1081                              //// Perform the conversion (overlapping buffers are allowed).
1082                      
1083                              u_strToUpper((UChar*)_rep->data, newSize,
1084                                  (UChar*)_rep->data, _rep->size, NULL, &err);
1085 chuck          1.99  
1086 mike           1.112         _rep->size = newSize;
1087 yi.zhou        1.108 
1088 mike           1.112         return;
1089 david          1.91      }
1090 mike           1.112 
1091                      #endif /* PEGASUS_HAS_ICU */
1092                      
1093 mike           1.114     if (_rep->refs.get() != 1)
1094 mike           1.112         _rep = StringRep::copyOnWrite(_rep);
1095                      
1096                          Uint16* p = _rep->data;
1097                          size_t n = _rep->size;
1098                      
1099                          for (; n--; p++)
1100                              *p = _toUpper(*p);
1101 david          1.90  }
1102                      
1103 kumpf          1.43  int String::compare(const String& s1, const String& s2, Uint32 n)
1104 kumpf          1.39  {
1105 jim.wunderlich 1.115     PEGASUS_ASSERT(n <= s1._rep->size);
1106                          PEGASUS_ASSERT(n <= s2._rep->size);
1107 mike           1.27  
1108 mike           1.112     // Ignoring error in which n is greater than s1.size() or s2.size()
1109                          return _compare(s1._rep->data, s2._rep->data, n);
1110 mike           1.27  }
1111                      
1112 kumpf          1.43  int String::compare(const String& s1, const String& s2)
1113 mike           1.30  {
1114 mike           1.112     return _compare(s1._rep->data, s2._rep->data);
1115                      }
1116 kumpf          1.43  
1117 mike           1.112 int String::compare(const String& s1, const char* s2)
1118                      {
1119                          _checkNullPointer(s2);
1120 mike           1.30  
1121 mike           1.112 #ifdef PEGASUS_STRING_NO_UTF8
1122                          return _compareNoUTF8(s1._rep->data, s2);
1123                      #else
1124                          // ATTN: optimize this!
1125                          return String::compare(s1, String(s2));
1126                      #endif
1127 mike           1.30  }
1128                      
1129 mike           1.112 int String::compareNoCase(const String& str1, const String& str2)
1130 kumpf          1.40  {
1131 david          1.69  #ifdef PEGASUS_HAS_ICU
1132 mike           1.112 
1133 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
1134                          {
1135 mike           1.112         return  u_strcasecmp(
1136                                  str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
1137 yi.zhou        1.108     }
1138 kumpf          1.40  
1139 mike           1.112 #endif /* PEGASUS_HAS_ICU */
1140                      
1141                          const Uint16* s1 = str1._rep->data;
1142                          const Uint16* s2 = str2._rep->data;
1143                      
1144                          while (*s1 && *s2)
1145 kumpf          1.40      {
1146 mike           1.112         int r = _toLower(*s1++) - _toLower(*s2++);
1147 kumpf          1.40  
1148 david.dillard  1.105         if (r)
1149                                  return r;
1150 kumpf          1.40      }
1151                      
1152 mike           1.112     if (*s2)
1153 david.dillard  1.105         return -1;
1154 mike           1.112     else if (*s1)
1155 david.dillard  1.105         return 1;
1156 kumpf          1.40  
1157                          return 0;
1158                      }
1159                      
1160 mike           1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
1161 mike           1.27  {
1162 mike           1.112 #ifdef PEGASUS_HAS_ICU
1163                      
1164                          return String::compareNoCase(s1, s2) == 0;
1165                      
1166                      #else /* PEGASUS_HAS_ICU */
1167 mike           1.27  
1168 mike           1.112     // The following employs loop unrolling for efficiency. Please do not
1169                          // eliminate.
1170 kumpf          1.39  
1171 mike           1.112     Uint16* p = (Uint16*)s1.getChar16Data();
1172                          Uint16* q = (Uint16*)s2.getChar16Data();
1173                          Uint32 n = s2.size();
1174                      
1175                          while (n >= 8)
1176                          {
1177                              if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1178                                  ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1179                                  ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1180                                  ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1181                                  ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1182                                  ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1183                                  ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1184                                  ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1185                              {
1186                                  return false;
1187                              }
1188 kumpf          1.39  
1189 mike           1.112         n -= 8;
1190                              p += 8;
1191                              q += 8;
1192                          }
1193 mike           1.27  
1194 mike           1.112     while (n >= 4)
1195 kumpf          1.39      {
1196 mike           1.112         if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1197                                  ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1198                                  ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1199                                  ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1200 david.dillard  1.105         {
1201 mike           1.112             return false;
1202 david.dillard  1.105         }
1203 mike           1.112 
1204                              n -= 4;
1205                              p += 4;
1206                              q += 4;
1207                          }
1208                      
1209                          while (n--)
1210                          {
1211                              if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1212 david.dillard  1.105             return false;
1213 mike           1.112 
1214                              p++;
1215                              q++;
1216 kumpf          1.39      }
1217 mike           1.28  
1218 kumpf          1.39      return true;
1219 mike           1.112 
1220                      #endif /* PEGASUS_HAS_ICU */
1221 david          1.69  }
1222                      
1223 mike           1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
1224 david          1.69  {
1225 mike           1.112     _checkNullPointer(s2);
1226 david          1.69  
1227 mike           1.112 #if defined(PEGASUS_HAS_ICU)
1228 david          1.69  
1229 mike           1.112     return String::equalNoCase(s1, String(s2));
1230 david          1.69  
1231 mike           1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
1232 david          1.69  
1233 mike           1.112     const Uint16* p1 = (Uint16*)s1._rep->data;
1234                          const char* p2 = s2;
1235                          size_t n = s1._rep->size;
1236 david.dillard  1.105 
1237 mike           1.112     while (n--)
1238                          {
1239                              if (!*p2)
1240                                  return false;
1241 david          1.71  
1242 mike           1.112         if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1243                                  return false;
1244                          }
1245 kumpf          1.42  
1246 mike           1.112     if (*p2)
1247                              return false;
1248 david.dillard  1.116 
1249 mike           1.112     return true;
1250 karl           1.36  
1251 mike           1.112 #else /* PEGASUS_HAS_ICU */
1252 david.dillard  1.105 
1253 mike           1.112     // ATTN: optimize this!
1254                          return String::equalNoCase(s1, String(s2));
1255 david.dillard  1.105 
1256 mike           1.112 #endif /* PEGASUS_HAS_ICU */
1257                      }
1258 chuck          1.78  
1259 mike           1.112 Boolean String::equal(const String& s1, const String& s2)
1260 karl           1.36  {
1261 david.dillard  1.116     return s1._rep->size == s2._rep->size && memcmp(s1._rep->data,
1262 mike           1.112         s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
1263 karl           1.36  }
1264                      
1265 mike           1.112 Boolean String::equal(const String& s1, const char* s2)
1266                      {
1267                      #ifdef PEGASUS_STRING_NO_UTF8
1268 kumpf          1.35  
1269 mike           1.112     _checkNullPointer(s2);
1270 kumpf          1.39  
1271 mike           1.112     const Uint16* p = (Uint16*)s1._rep->data;
1272                          const char* q = s2;
1273 kumpf          1.39  
1274 mike           1.112     while (*p && *q)
1275                          {
1276                              if (*p++ != Uint16(*q++))
1277                                  return false;
1278                          }
1279 kumpf          1.39  
1280 mike           1.112     return !(*p || *q);
1281 kumpf          1.39  
1282 mike           1.112 #else /* PEGASUS_STRING_NO_UTF8 */
1283 kumpf          1.39  
1284 mike           1.112     return String::equal(s1, String(s2));
1285 kumpf          1.39  
1286 mike           1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
1287 kumpf          1.39  }
1288                      
1289 kumpf          1.47  PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1290 kumpf          1.39  {
1291 mike           1.112 #if defined(PEGASUS_OS_OS400)
1292 david          1.72  
1293 david          1.93      CString cstr = str.getCString();
1294 david          1.69      const char* utf8str = cstr;
1295 mike           1.112     os << utf8str;
1296                          return os;
1297 david.dillard  1.116 #else
1298 david          1.69  
1299 mike           1.112 #if defined(PEGASUS_HAS_ICU)
1300 david          1.69  
1301 yi.zhou        1.108     if (InitializeICU::initICUSuccessful())
1302                          {
1303 david.dillard  1.105         char *buf = NULL;
1304                              const int size = str.size() * 6;
1305 mike           1.112         UnicodeString UniStr(
1306                                  (const UChar *)str.getChar16Data(), (int32_t)str.size());
1307 david.dillard  1.105         Uint32 bufsize = UniStr.extract(0,size,buf);
1308                              buf = new char[bufsize+1];
1309                              UniStr.extract(0,bufsize,buf);
1310                              os << buf;
1311                              os.flush();
1312                              delete [] buf;
1313 david.dillard  1.116         return os;
1314 yi.zhou        1.108     }
1315 mike           1.112 
1316 david.dillard  1.116 #endif  // PEGASUS_HAS_ICU
1317 mike           1.112 
1318                          for (Uint32 i = 0, n = str.size(); i < n; i++)
1319 yi.zhou        1.108     {
1320 mike           1.112         Uint16 code = str[i];
1321 david.dillard  1.105 
1322 mike           1.112         if (code > 0 && !(code & 0xFF00))
1323                                      os << char(code);
1324                              else
1325                                  {
1326                                  // Print in hex format:
1327                                  char buffer[8];
1328                                  sprintf(buffer, "\\x%04X", code);
1329                                  os << buffer;
1330 david.dillard  1.105         }
1331 yi.zhou        1.108     }
1332 kumpf          1.39  
1333                          return os;
1334 mike           1.112 #endif // PEGASUS_OS_OS400
1335 kumpf          1.39  }
1336                      
1337 mike           1.112 void StringAppendCharAux(StringRep*& _rep)
1338 kumpf          1.39  {
1339 mike           1.112     StringRep* tmp;
1340                      
1341                          if (_rep->cap)
1342                          {
1343                              tmp = StringRep::alloc(2 * _rep->cap);
1344                              tmp->size = _rep->size;
1345                              _copy(tmp->data, _rep->data, _rep->size);
1346                          }
1347                          else
1348                          {
1349                              tmp = StringRep::alloc(8);
1350                              tmp->size = 0;
1351                          }
1352                      
1353                          StringRep::unref(_rep);
1354                          _rep = tmp;
1355 kumpf          1.39  }
1356                      
1357 mike           1.112 PEGASUS_NAMESPACE_END
1358                      
1359                      /*
1360                      ================================================================================
1361                      
1362                      String optimizations:
1363                      
1364                          1.  Added mechanism allowing certain functions to be inlined only when
1365                              used by internal Pegasus modules. External modules (i.e., providers)
1366                              link to a non-inline version, which allows for binary compatibility.
1367                      
1368                          2.  Implemented copy-on-write with atomic increment/decrement. This
1369                              yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1370                              for the 'ni1000' benchmark.
1371                      
1372                          3.  Employed loop unrolling in several places. For example, see:
1373                      
1374                                  static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1375                      
1376                          4.  Used the "empty-rep" optimization (described in whitepaper from the
1377                              GCC Developers Summit). This reduced default construction to a simple
1378 mike           1.112         pointer assignment.
1379                      
1380                                  inline String::String() : _rep(&_emptyRep) { }
1381                      
1382                          5.  Implemented Uint16 versions of toupper() and tolower() using tables.
1383                              For example:
1384                      
1385                                  static const char _upper[] =
1386                                  {
1387                                      0,1,2,...255
1388                                  };
1389                      
1390                                  inline Uint16 _toUpper(Uint16 x)
1391                                  {
1392                                      return (x & 0xFF00) ? x : _upper[x];
1393                                  }
1394                      
1395 david.dillard  1.116         This outperforms the system implementation by avoiding an anding
1396 mike           1.112         operation.
1397                      
1398 david.dillard  1.116     6.  Implemented char* version of the following member functions to
1399                              eliminate unecessary creation of anonymous string objects
1400 mike           1.112         (temporaries).
1401                      
1402                                  String(const String& s1, const char* s2);
1403                                  String(const char* s1, const String& s2);
1404                                  String& String::operator=(const char* str);
1405                                  Uint32 String::find(const char* s) const;
1406                                  bool String::equal(const String& s1, const char* s2);
1407                                  static int String::compare(const String& s1, const char* s2);
1408                                  String& String::append(const char* str);
1409                                  String& String::append(const char* str, Uint32 size);
1410                                  static bool String::equalNoCase(const String& s1, const char* s2);
1411                                  String& operator=(const char* str)
1412                                  String& String::assign(const char* str)
1413                                  String& String::append(const char* str)
1414                                  Boolean operator==(const String& s1, const char* s2)
1415                                  Boolean operator==(const char* s1, const String& s2)
1416                                  Boolean operator!=(const String& s1, const char* s2)
1417                                  Boolean operator!=(const char* s1, const String& s2)
1418                                  Boolean operator<(const String& s1, const char* s2)
1419                                  Boolean operator<(const char* s1, const String& s2)
1420                                  Boolean operator>(const String& s1, const char* s2)
1421 mike           1.112             Boolean operator>(const char* s1, const String& s2)
1422                                  Boolean operator<=(const String& s1, const char* s2)
1423                                  Boolean operator<=(const char* s1, const String& s2)
1424                                  Boolean operator>=(const String& s1, const char* s2)
1425                                  Boolean operator>=(const char* s1, const String& s2)
1426                                  String operator+(const String& s1, const char* s2)
1427                                  String operator+(const char* s1, const String& s2)
1428                      
1429 david.dillard  1.116     7.  Optimized _roundUpToPow2(), used in rounding the capacity to the next
1430 mike           1.112         power of two (algorithm from the book "Hacker's Delight").
1431                      
1432                                  static Uint32 _roundUpToPow2(Uint32 x)
1433                                  {
1434                                      if (x < 8)
1435                                          return 8;
1436                      
1437                                      x--;
1438                                      x |= (x >> 1);
1439                                      x |= (x >> 2);
1440                                      x |= (x >> 4);
1441                                      x |= (x >> 8);
1442                                      x |= (x >> 16);
1443                                      x++;
1444                      
1445                                      return x;
1446                                  }
1447                      
1448                          8.  Implemented "concatenating constructors" to eliminate temporaries
1449 david.dillard  1.116         created by operator+(). This scheme employs the "return-value
1450 mike           1.112         optimization" described by Stan Lippman.
1451                      
1452                                  inline String operator+(const String& s1, const String& s2)
1453                                  {
1454                                      return String(s1, s2, 0);
1455                                  }
1456                      
1457                          9.  Experimented to find the optimial initial size for a short string.
1458                              Eight seems to offer the best tradeoff between space and time.
1459                      
1460                          10. Inlined all members of the Char16 class.
1461                      
1462                          11. Used Uint16 internally in the String class. This showed no improvememnt
1463                              since Char16 was already fully inlined and was essentially reduced to
1464                              Uint16 in any case.
1465                      
1466                          12. Implemented conditional logic (#if) allowing error checking logic to
1467 david.dillard  1.116         be excluded to better performance. Examples include bounds checking
1468 mike           1.112         and null-pointer checking.
1469                      
1470                          13. Used memcpy() and memcmp() where possible. These are implemented using
1471                              the rep family of intructions under Intel and are much faster.
1472                      
1473 david.dillard  1.116     14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8
1474 mike           1.112         copy routine overhead.
1475                      
1476                          15. Added ASCII7 form of the constructor and assign().
1477                      
1478                                  String s("hello world", String::ASCII7);
1479                      
1480                                  s.assignASCII7("hello world");
1481                      
1482                              This avoids slower UTF8 processing when not needed.
1483                      
1484                      ================================================================================
1485                      
1486                      TO-DO:
1487                      
1488                          (+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES
1489                      
1490                          (+) [DONE] Submit BUG-2754 (Windows buffer limit).
1491                      
1492                          (+) [DONE] Eliminate char versions of find() and append().
1493                      
1494                          (+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h
1495 mike           1.112 
1496                          (+) [DONE] Change _next_pow_2() to _roundUpToPow2().
1497                      
1498                          (+) [DONE] Change '99' to '2' in StringRep constructor (comment as well).
1499                      
1500                          (+) [DONE] Comment StringRep allocation layout.
1501                      
1502                          (+) [DONE] Conceal private inline functions.
1503                      
1504                          (+) [DONE] Shorten inclusion of StringInline.h in String.h.
1505                      
1506                          (+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get
1507                              rid of altogether.
1508                      
1509                          (+) [DONE] useCamelNotationOnAllFunctionNames.
1510                      
1511                          (+) [DONE] Check for overlow condition in StringRep::alloc().
1512                      
1513                          (+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab").
1514                      
1515                          (+) [DONE] Fix throw-related memory leak.
1516 mike           1.112 
1517                          (+) [DONE] Look at PEP223 for coding security guidelines.
1518                      
1519                          (+) [DONE] Use old AtomicInt for now (new AtomicInt part of bug #4250).
1520 kumpf          1.39  
1521 mike           1.112     (+) [DONE] Removed appendASCII() and the ASCII form of the constructor.
1522 kumpf          1.39  
1523 mike           1.112     (+) DOC++ String.h - will open new bug?
1524 kumpf          1.39  
1525 mike           1.112     (+) Added PEGASUS_DISABLE_INTERNAL_INLINES macro (to permit suppression
1526                      	on certain platforms).
1527 kumpf          1.39  
1528 mike           1.112 ================================================================================
1529                      */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2