(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.98 //%2005////////////////////////////////////////////////////////////////////////
   2 mike  1.27 //
   3 karl  1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.85 // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 karl  1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
  11 mike  1.27 //
  12            // Permission is hereby granted, free of charge, to any person obtaining a copy
  13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
  14            // deal in the Software without restriction, including without limitation the
  15            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  16 mike  1.27 // sell copies of the Software, and to permit persons to whom the Software is
  17            // furnished to do so, subject to the following conditions:
  18 david.dillard 1.105 //
  19 kumpf         1.41  // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  20 mike          1.27  // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  21                     // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  22 kumpf         1.41  // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  23                     // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  24                     // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  25 mike          1.27  // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26                     // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27                     //
  28                     //==============================================================================
  29                     //
  30 mike          1.112 // Author: Mike Brasher (mbrasher@austin.rr.com)
  31 mike          1.27  //
  32 mike          1.112 // Modified By: 
  33                     //     Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
  34                     //     Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
  35                     //     David Dillard, VERITAS Software Corp. (david.dillard@veritas.com)
  36                     //     Mike Brasher (mike-brasher@austin.rr.com)
  37 mike          1.27  //
  38                     //%/////////////////////////////////////////////////////////////////////////////
  39                     
  40 mike          1.112 #include <cassert>
  41 mike          1.113 #include <cstring>
  42 kumpf         1.48  #include "InternalException.h"
  43 david         1.69  #include "CommonUTF.h"
  44 mike          1.112 #include "MessageLoader.h"
  45                     #include "StringRep.h"
  46 david         1.69  
  47                     #ifdef PEGASUS_HAS_ICU
  48 chuck         1.99  #include <unicode/ustring.h>
  49                     #include <unicode/uchar.h>
  50 david         1.69  #endif
  51                     
  52 mike          1.112 PEGASUS_NAMESPACE_BEGIN
  53 mike          1.28  
  54 mike          1.112 //==============================================================================
  55                     //
  56                     // Compile-time macros (undefined by default).
  57                     //
  58                     //     PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
  59                     //      
  60                     //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
  61                     //
  62                     //==============================================================================
  63 mike          1.27  
  64 mike          1.112 //==============================================================================
  65 kumpf         1.39  //
  66 mike          1.112 // File-scope definitions:
  67 kumpf         1.54  //
  68 mike          1.112 //==============================================================================
  69                     
  70                     // Note: this table is much faster than the system toupper(). Please do not
  71                     // change.
  72 kumpf         1.54  
  73 mike          1.112 const Uint8 _toUpperTable[256] = 
  74 kumpf         1.54  {
  75 mike          1.112     0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
  76                         0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
  77                         0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
  78                         0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
  79                         0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
  80                         0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
  81                         0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
  82                         0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
  83                         0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  84                         0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  85                         0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  86                         0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
  87                         0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  88                         0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  89                         0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  90                         0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
  91                         0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
  92                         0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
  93                         0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
  94                         0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
  95                         0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
  96 mike          1.112     0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
  97                         0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
  98                         0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
  99                         0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
 100                         0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
 101                         0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
 102                         0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
 103                         0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 104                         0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 105                         0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 106                         0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 107                     };
 108                     
 109                     // Note: this table is much faster than the system tulower(). Please do not
 110                     // change.
 111                     
 112                     const Uint8 _toLowerTable[256] = 
 113                     {
 114                         0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
 115                         0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 116                         0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
 117 mike          1.112     0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
 118                         0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
 119                         0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
 120                         0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
 121                         0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
 122                         0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 123                         0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 124                         0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 125                         0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
 126                         0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 127                         0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 128                         0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 129                         0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
 130                         0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
 131                         0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
 132                         0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
 133                         0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
 134                         0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
 135                         0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
 136                         0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
 137                         0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
 138 mike          1.112     0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
 139                         0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
 140                         0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
 141                         0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
 142                         0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 143                         0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 144                         0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 145                         0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 146                     };
 147                     
 148                     // Converts 16-bit characters to upper case. This routine is faster than the
 149                     // system toupper(). Please do not change.
 150                     inline Uint16 _toUpper(Uint16 x)
 151                     {
 152                         return (x & 0xFF00) ? x : _toUpperTable[x];
 153 kumpf         1.54  }
 154                     
 155 mike          1.112 // Converts 16-bit characters to lower case. This routine is faster than the
 156                     // system toupper(). Please do not change.
 157                     inline Uint16 _toLower(Uint16 x)
 158 kumpf         1.54  {
 159 mike          1.112     return (x & 0xFF00) ? x : _toLowerTable[x];
 160                     }
 161                     
 162                     // Rounds x up to the nearest power of two (or just returns 8 if x < 8).
 163                     static Uint32 _roundUpToPow2(Uint32 x)
 164                     {
 165                     #ifndef PEGASUS_STRING_NO_THROW
 166                     
 167                         if (x > 0x0FFFFFFF)
 168                             throw PEGASUS_STD(bad_alloc)();
 169                     
 170                     #endif
 171                     
 172                         if (x < 8)
 173                             return 8;
 174                     
 175                         x--;
 176                         x |= (x >> 1);
 177                         x |= (x >> 2);
 178                         x |= (x >> 4);
 179                         x |= (x >> 8);
 180 mike          1.112     x |= (x >> 16);
 181                         x++;
 182                     
 183                         return x;
 184                     }
 185                     
 186                     template<class P, class Q>
 187                     static void _copy(P* p, const Q* q, size_t n)
 188                     {
 189                         // The following employs loop unrolling for efficiency. Please do not
 190                         // eliminate.
 191                     
 192                         while (n >= 8)
 193                         {
 194                             p[0] = q[0];
 195                             p[1] = q[1];
 196                             p[2] = q[2];
 197                             p[3] = q[3];
 198                             p[4] = q[4];
 199                             p[5] = q[5];
 200                             p[6] = q[6];
 201 mike          1.112         p[7] = q[7];
 202                             p += 8;
 203                             q += 8;
 204                             n -= 8;
 205                         }
 206                     
 207                         while (n >= 4)
 208                         {
 209                             p[0] = q[0];
 210                             p[1] = q[1];
 211                             p[2] = q[2];
 212                             p[3] = q[3];
 213                             p += 4;
 214                             q += 4;
 215                             n -= 4;
 216                         }
 217                     
 218                         while (n--)
 219                             *p++ = *q++;
 220                     }
 221                     
 222 mike          1.112 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
 223                     {
 224                         // The following employs loop unrolling for efficiency. Please do not
 225                         // eliminate.
 226                     
 227                         while (n >= 4)
 228                         {
 229                             if (s[0] == c)
 230                                 return (Uint16*)s;
 231                             if (s[1] == c)
 232                                 return (Uint16*)&s[1];
 233                             if (s[2] == c)
 234                                 return (Uint16*)&s[2];
 235                             if (s[3] == c)
 236                                 return (Uint16*)&s[3];
 237 kumpf         1.82  
 238 mike          1.112         n -= 4;
 239                             s += 4;
 240                         }
 241                     
 242                         if (n)
 243                         {
 244                             if (*s == c)
 245                                 return (Uint16*)s;
 246                             s++;
 247                             n--;
 248                         }
 249                     
 250                         if (n)
 251                         {
 252                             if (*s == c)
 253                                 return (Uint16*)s;
 254                             s++;
 255                             n--;
 256                         }
 257                     
 258                         if (n && *s == c)
 259 mike          1.112         return (Uint16*)s;
 260                     
 261                         // Not found!
 262                         return 0;
 263                     }
 264                     
 265                     static int _compare(const Uint16* s1, const Uint16* s2)
 266                     {
 267                         while (*s1 && *s2)
 268                         {
 269                             int r = *s1++ - *s2++;
 270                     
 271                             if (r)
 272                                 return r;
 273                         }
 274                     
 275                         if (*s2)
 276                             return -1;
 277                         else if (*s1)
 278                             return 1;
 279                     
 280 mike          1.112     return 0;
 281                     }
 282                     
 283                     static int _compareNoUTF8(const Uint16* s1, const char* s2)
 284                     {
 285                         Uint16 c1;
 286                         Uint16 c2;
 287                     
 288                         do
 289                         {
 290                             c1 = *s1++;
 291                             c2 = *s2++;
 292                     
 293                             if (c1 == 0)
 294                                 return c1 - c2;
 295                         }
 296                         while (c1 == c2);
 297                     
 298                         return c1 - c2;
 299                     }
 300                     
 301 mike          1.112 static int _compare(const Uint16* s1, const Uint16* s2, size_t n)
 302                     {
 303                         // This should only be called when s1 and s2 have the same length.
 304                     
 305                         while (n-- && (*s1++ - *s2++) == 0)
 306                             ;
 307                     
 308                         return s1[-1] - s2[-1];
 309                     }
 310                     
 311                     static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
 312                     {
 313                         memcpy(s1, s2, n * sizeof(Uint16));
 314                     }
 315                     
 316                     void StringThrowOutOfBounds()
 317                     {
 318                         throw IndexOutOfBoundsException();
 319                     }
 320                     
 321                     inline void _checkNullPointer(const void* ptr)
 322 mike          1.112 {
 323                     #ifdef PEGASUS_STRING_NO_THROW
 324                     
 325                         if (!ptr)
 326                             throw NullPointer();
 327                     
 328                     #endif
 329                     }
 330                     
 331                     static void _StringThrowBadUTF8(Uint32 index)
 332                     {
 333                         MessageLoaderParms parms(
 334                             "Common.String.BAD_UTF8",
 335                             "The byte sequence starting at index $0 "
 336                             "is not valid UTF-8 encoding.",
 337                             index);
 338                         throw Exception(parms);
 339                     }
 340                     
 341                     static size_t _copyFromUTF8(
 342                         Uint16* dest, 
 343 mike          1.112     const char* src, 
 344                         size_t n,
 345                         size_t& utf8_error_index)
 346                     {
 347                         Uint16* p = dest;
 348                         const Uint8* q = (const Uint8*)src;
 349                     
 350                         // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
 351                         // Use loop-unrolling.
 352                     
 353                         while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
 354                         {
 355                             p[0] = q[0];
 356                             p[1] = q[1];
 357                             p[2] = q[2];
 358                             p[3] = q[3];
 359                             p[4] = q[4];
 360                             p[5] = q[5];
 361                             p[6] = q[6];
 362                             p[7] = q[7];
 363                             p += 8;
 364 mike          1.112         q += 8;
 365                             n -= 8;
 366                         }
 367                     
 368                         while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
 369                         {
 370                             p[0] = q[0];
 371                             p[1] = q[1];
 372                             p[2] = q[2];
 373                             p[3] = q[3];
 374                             p += 4;
 375                             q += 4;
 376                             n -= 4;
 377                         }
 378                     
 379                         switch (n)
 380                         {
 381                             case 0:
 382                                 return p - dest;
 383                             case 1:
 384                                 if (q[0] < 128)
 385 mike          1.112             {
 386                                     p[0] = q[0];
 387                                     return p + 1 - dest;
 388                                 }
 389                                 break;
 390                             case 2:
 391                                 if (((q[0]|q[1]) & 0x80) == 0)
 392                                 {
 393                                     p[0] = q[0];
 394                                     p[1] = q[1];
 395                                     return p + 2 - dest;
 396                                 }
 397                                 break;
 398                             case 3:
 399                                 if (((q[0]|q[1]|q[2]) & 0x80) == 0)
 400                                 {
 401                                     p[0] = q[0];
 402                                     p[1] = q[1];
 403                                     p[2] = q[2];
 404                                     return p + 3 - dest;
 405                                 }
 406 mike          1.112             break;
 407                         }
 408                     
 409                         // Process remaining characters.
 410                     
 411                         while (n)
 412                         {
 413                             // Optimize for 7-bit ASCII case.
 414                     
 415                             if (*q < 128)
 416                             {
 417                                 *p++ = *q++;
 418                                 n--;
 419                             }
 420                             else
 421                             {
 422                                 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
 423                     
 424                                 if (c > n || !isValid_U8(q, c) ||
 425                                     UTF8toUTF16(&q, q + c, &p, p + n) != 0)
 426                                 {
 427 mike          1.112                 utf8_error_index = q - (const Uint8*)src;
 428                                     return size_t(-1);
 429                                 }
 430                     
 431                                 n -= c;
 432                             }
 433                         }
 434                     
 435                         return p - dest;
 436                     }
 437                     
 438                     // Note: dest must be at least three times src (plus an extra byte for 
 439                     // terminator).
 440                     static inline size_t _copyToUTF8(char* dest, const Uint16* src, size_t n)
 441                     {
 442                         // The following employs loop unrolling for efficiency. Please do not
 443                         // eliminate.
 444                     
 445                         const Uint16* q = src;
 446                         Uint8* p = (Uint8*)dest;
 447                     
 448 mike          1.112     while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
 449 kumpf         1.82      {
 450 mike          1.112         p[0] = q[0];
 451                             p[1] = q[1];
 452                             p[2] = q[2];
 453                             p[3] = q[3];
 454                             p += 4;
 455                             q += 4;
 456                             n -= 4;
 457 kumpf         1.82      }
 458 mike          1.112 
 459                         switch (n)
 460                         {
 461                             case 0:
 462                                 return p - (Uint8*)dest;
 463                             case 1:
 464                                 if (q[0] < 128)
 465                                 {
 466                                     p[0] = q[0];
 467                                     return p + 1 - (Uint8*)dest;
 468                                 }
 469                                 break;
 470                             case 2:
 471                                 if (q[0] < 128 && q[1] < 128)
 472                                 {
 473                                     p[0] = q[0];
 474                                     p[1] = q[1];
 475                                     return p + 2 - (Uint8*)dest;
 476                                 }
 477                                 break;
 478                             case 3:
 479 mike          1.112             if (q[0] < 128 && q[1] < 128 && q[2] < 128)
 480                                 {
 481                                     p[0] = q[0];
 482                                     p[1] = q[1];
 483                                     p[2] = q[2];
 484                                     return p + 3 - (Uint8*)dest;
 485                                 }
 486                                 break;
 487                         }
 488                     
 489                         // If this line was reached, there must be characters greater than 128.
 490                     
 491                         UTF16toUTF8(&q, q + n, &p, p + 3 * n);
 492                     
 493                         return p - (Uint8*)dest;
 494 kumpf         1.54  }
 495                     
 496 mike          1.112 static inline size_t _convert(
 497                         Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
 498 kumpf         1.54  {
 499 mike          1.112 #ifdef PEGASUS_STRING_NO_UTF8
 500                         _copy(p, q, n);
 501                         return n;
 502                     #else
 503                         return _copyFromUTF8(p, q, n, utf8_error_index);
 504                     #endif
 505 kumpf         1.54  }
 506                     
 507 mike          1.112 //==============================================================================
 508                     //
 509                     // class CString
 510                     //
 511                     //==============================================================================
 512                     
 513                     CString::CString(const CString& cstr) : _rep(0)
 514 kumpf         1.54  {
 515 mike          1.112     if (cstr._rep)
 516 kumpf         1.82      {
 517 mike          1.112         size_t n = strlen(cstr._rep) + 1;
 518                             _rep = (char*)operator new(n);
 519                             memcpy(_rep, cstr._rep, n);
 520 kumpf         1.82      }
 521 kumpf         1.54  }
 522                     
 523 kumpf         1.56  CString& CString::operator=(const CString& cstr)
 524                     {
 525 kumpf         1.82      if (&cstr != this)
 526 kumpf         1.81      {
 527 kumpf         1.82          if (_rep)
 528                             {
 529 mike          1.112             operator delete(_rep);
 530 kumpf         1.82              _rep = 0;
 531                             }
 532 mike          1.112 
 533 kumpf         1.82          if (cstr._rep)
 534                             {
 535 mike          1.112             size_t n = strlen(cstr._rep) + 1;
 536                                 _rep = (char*)operator new(n);
 537                                 memcpy(_rep, cstr._rep, n);
 538 kumpf         1.82          }
 539 kumpf         1.81      }
 540 mike          1.112 
 541 kumpf         1.56      return *this;
 542                     }
 543                     
 544 mike          1.112 //==============================================================================
 545 kumpf         1.54  //
 546 mike          1.112 // class StringRep
 547 kumpf         1.39  //
 548 mike          1.112 //==============================================================================
 549 kumpf         1.39  
 550 mike          1.112 StringRep StringRep::_emptyRep;
 551 mike          1.27  
 552 mike          1.112 inline StringRep* StringRep::alloc(size_t cap)
 553 mike          1.27  {
 554 mike          1.112 #ifndef PEGASUS_STRING_NO_THROW
 555 mike          1.27  
 556 mike          1.112     // Any string bigger than this is seriously suspect.
 557                         if (cap > 0x0FFFFFFF)
 558                             throw PEGASUS_STD(bad_alloc)();
 559 mike          1.27  
 560 mike          1.112 #endif
 561 mike          1.27  
 562 mike          1.112     StringRep* rep = (StringRep*)::operator new(
 563                             sizeof(StringRep) + cap * sizeof(Uint16));
 564                         rep->cap = cap;
 565                         new(&rep->refs) AtomicInt(1);
 566                     
 567                         return rep;
 568 mike          1.27  }
 569                     
 570 mike          1.112 static inline void _reserve(StringRep*& rep, Uint32 cap)
 571 chuck         1.102 {
 572 mike          1.114     if (cap > rep->cap || rep->refs.get() != 1)
 573 chuck         1.102     {
 574 mike          1.112         size_t n = _roundUpToPow2(cap);
 575                             StringRep* newRep = StringRep::alloc(n);
 576                             newRep->size = rep->size;
 577                             _copy(newRep->data, rep->data, rep->size + 1);
 578                             StringRep::unref(rep);
 579                             rep = newRep;
 580                         }
 581                     }
 582 david.dillard 1.105 
 583 mike          1.112 StringRep* StringRep::create(const Uint16* data, size_t size)
 584                     {
 585                         StringRep* rep = StringRep::alloc(size);
 586                         rep->size = size;
 587                         _copy(rep->data, data, size);
 588                         rep->data[size] = '\0';
 589                         return rep;
 590                     }
 591 chuck         1.102 
 592 mike          1.112 StringRep* StringRep::copyOnWrite(StringRep* rep)
 593                     {
 594                         // Return a new copy of rep. Release rep.
 595 chuck         1.102 
 596 mike          1.112     StringRep* newRep = StringRep::alloc(rep->size);
 597                         newRep->size = rep->size;
 598                         _copy(newRep->data, rep->data, rep->size);
 599                         newRep->data[newRep->size] = '\0';
 600                         StringRep::unref(rep);
 601                         return newRep;
 602 chuck         1.102 }
 603                     
 604 mike          1.112 StringRep* StringRep::create(const char* data, size_t size)
 605 kumpf         1.43  {
 606 mike          1.112     StringRep* rep = StringRep::alloc(size);
 607                         size_t utf8_error_index;
 608                         rep->size = _convert((Uint16*)rep->data, data, size, utf8_error_index);
 609                     
 610                     #ifndef PEGASUS_STRING_NO_THROW
 611                         if (rep->size == size_t(-1))
 612                         {
 613                             StringRep::free(rep);
 614                             _StringThrowBadUTF8(utf8_error_index);
 615                         }
 616                     #endif
 617 kumpf         1.43  
 618 mike          1.112     rep->data[rep->size] = '\0';
 619 kumpf         1.43  
 620 mike          1.112     return rep;
 621 mike          1.27  }
 622                     
 623 mike          1.112 Uint32 StringRep::length(const Uint16* str)
 624 mike          1.27  {
 625 mike          1.112     // Note: We could unroll this but it is rarely called.
 626                     
 627                         const Uint16* end = (Uint16*)str;
 628                     
 629                         while (*end++)
 630                             ;
 631                     
 632                         return end - str - 1;
 633 kumpf         1.39  }
 634 tony          1.66  
 635 mike          1.112 //==============================================================================
 636                     //
 637                     // class String
 638                     //
 639                     //==============================================================================
 640                     
 641                     const String String::EMPTY;
 642 mike          1.27  
 643 kumpf         1.39  String::String(const String& str, Uint32 n)
 644                     {
 645 mike          1.112     _checkBounds(n, str._rep->size);
 646                         _rep = StringRep::create(str._rep->data, n);
 647 kumpf         1.39  }
 648                     
 649                     String::String(const Char16* str)
 650                     {
 651 mike          1.112     _checkNullPointer(str);
 652                         _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
 653 mike          1.27  }
 654                     
 655 kumpf         1.39  String::String(const Char16* str, Uint32 n)
 656                     {
 657 mike          1.112     _checkNullPointer(str);
 658                         _rep = StringRep::create((Uint16*)str, n);
 659 kumpf         1.39  }
 660                     
 661                     String::String(const char* str)
 662 mike          1.27  {
 663 mike          1.112     _checkNullPointer(str);
 664 david.dillard 1.105 
 665 mike          1.112     // Set this just in case create() throws an exception.
 666                         _rep = &StringRep::_emptyRep;
 667                         _rep = StringRep::create(str, strlen(str));
 668 mike          1.27  }
 669                     
 670 kumpf         1.39  String::String(const char* str, Uint32 n)
 671 mike          1.27  {
 672 mike          1.112     _checkNullPointer(str);
 673 david.dillard 1.105 
 674 mike          1.112     // Set this just in case create() throws an exception.
 675                         _rep = &StringRep::_emptyRep;
 676                         _rep = StringRep::create(str, n);
 677 kumpf         1.39  }
 678 mike          1.27  
 679 mike          1.112 String::String(const String& s1, const String& s2)
 680 kumpf         1.39  {
 681 mike          1.112     size_t n1 = s1._rep->size;
 682                         size_t n2 = s2._rep->size;
 683                         size_t n = n1 + n2;
 684                         _rep = StringRep::alloc(n);
 685                         _copy(_rep->data, s1._rep->data, n1);
 686                         _copy(_rep->data + n1, s2._rep->data, n2);
 687                         _rep->size = n;
 688                         _rep->data[n] = '\0';
 689 mike          1.27  }
 690                     
 691 mike          1.112 String::String(const String& s1, const char* s2)
 692 mike          1.27  {
 693 mike          1.112     _checkNullPointer(s2);
 694                         size_t n1 = s1._rep->size;
 695                         size_t n2 = strlen(s2);
 696                         _rep = StringRep::alloc(n1 + n2);
 697                         _copy(_rep->data, s1._rep->data, n1);
 698                         size_t utf8_error_index;
 699                         size_t tmp = _convert((Uint16*)_rep->data + n1, s2, n2, utf8_error_index);
 700                     
 701                     #ifndef PEGASUS_STRING_NO_THROW
 702                         if (tmp == size_t(-1))
 703 kumpf         1.82      {
 704 mike          1.112         StringRep::free(_rep);
 705                             _rep = &StringRep::_emptyRep;
 706                             _StringThrowBadUTF8(utf8_error_index);
 707 kumpf         1.82      }
 708 mike          1.112 #endif
 709                     
 710                         _rep->size = n1 + tmp;
 711                         _rep->data[_rep->size] = '\0';
 712 mike          1.27  }
 713                     
 714 mike          1.112 String::String(const char* s1, const String& s2)
 715 mike          1.27  {
 716 mike          1.112     _checkNullPointer(s1);
 717                         size_t n1 = strlen(s1);
 718                         size_t n2 = s2._rep->size;
 719                         _rep = StringRep::alloc(n1 + n2);
 720                         size_t utf8_error_index;
 721                         size_t tmp = _convert((Uint16*)_rep->data, s1, n1, utf8_error_index);
 722                     
 723                     #ifndef PEGASUS_STRING_NO_THROW
 724                         if (tmp ==  size_t(-1))
 725                         {
 726                             StringRep::free(_rep);
 727                             _rep = &StringRep::_emptyRep;
 728                             _StringThrowBadUTF8(utf8_error_index);
 729                         }
 730                     #endif
 731                     
 732                         _rep->size = n2 + tmp;
 733                         _copy(_rep->data + n1, s2._rep->data, n2);
 734                         _rep->data[_rep->size] = '\0';
 735 mike          1.27  }
 736                     
 737 mike          1.112 String& String::assign(const String& str)
 738 mike          1.27  {
 739 mike          1.112     if (_rep != str._rep)
 740 david.dillard 1.105     {
 741 mike          1.112         StringRep::unref(_rep);
 742                             StringRep::ref(_rep = str._rep);
 743 david.dillard 1.105     }
 744                     
 745 mike          1.27      return *this;
 746                     }
 747                     
 748                     String& String::assign(const Char16* str, Uint32 n)
 749                     {
 750 mike          1.112     _checkNullPointer(str);
 751                     
 752 mike          1.114     if (n > _rep->cap || _rep->refs.get() != 1)
 753 david.dillard 1.105     {
 754 mike          1.112         StringRep::unref(_rep);
 755                             _rep = StringRep::alloc(n);
 756 david.dillard 1.105     }
 757                     
 758 mike          1.112     _rep->size = n;
 759                         _copy(_rep->data, (Uint16*)str, n);
 760                         _rep->data[n] = '\0';
 761                     
 762 mike          1.27      return *this;
 763                     }
 764                     
 765 mike          1.112 String& String::assign(const char* str, Uint32 n)
 766 chuck         1.102 {
 767 mike          1.112     _checkNullPointer(str);
 768                     
 769 mike          1.114     if (n > _rep->cap || _rep->refs.get() != 1)
 770 david.dillard 1.105     {
 771 mike          1.112         StringRep::unref(_rep);
 772                             _rep = StringRep::alloc(n);
 773 david.dillard 1.105     }
 774                     
 775 mike          1.112     size_t utf8_error_index;
 776                         _rep->size = _convert(_rep->data, str, n, utf8_error_index);
 777 chuck         1.102 
 778 mike          1.112 #ifndef PEGASUS_STRING_NO_THROW
 779                         if (_rep->size ==  size_t(-1))
 780 david.dillard 1.105     {
 781 mike          1.112         StringRep::free(_rep);
 782                             _rep = &StringRep::_emptyRep;
 783                             _StringThrowBadUTF8(utf8_error_index);
 784 david.dillard 1.105     }
 785 mike          1.112 #endif
 786                     
 787                         _rep->data[_rep->size] = 0;
 788 david.dillard 1.105 
 789 mike          1.27      return *this;
 790                     }
 791                     
 792 kumpf         1.39  void String::clear()
 793                     {
 794 mike          1.112     if (_rep->size)
 795                         {
 796 mike          1.114         if (_rep->refs.get() == 1)
 797 mike          1.112         {
 798                                 _rep->size = 0;
 799                                 _rep->data[0] = '\0';
 800                             }
 801                             else
 802                             {
 803                                 StringRep::unref(_rep);
 804                                 _rep = &StringRep::_emptyRep;
 805                             }
 806                         }
 807 kumpf         1.39  }
 808                     
 809 mike          1.112 void String::reserveCapacity(Uint32 cap)
 810 kumpf         1.39  {
 811 mike          1.112     _reserve(_rep, cap);
 812 kumpf         1.39  }
 813                     
 814 mike          1.112 CString String::getCString() const
 815                     {
 816                         // A UTF8 string can have three times as many characters as its UTF16 
 817                         // counterpart, so we allocate extra memory for the worst case. In the 
 818                         // best case, we may need only one third of the memory allocated. But
 819                         // downsizing the string afterwards is expensive and unecessary since 
 820                         // CString objects are usually short-lived (disappearing after only a few 
 821                         // instructions). CString objects are typically created on the stack as
 822                         // means to obtain a char* pointer.
 823                     
 824                     #ifdef PEGASUS_STRING_NO_UTF8
 825                         char* str = (char*)operator new(_rep->size + 1);
 826                         _copy(str, _rep->data, _rep->size);
 827                         str[_rep->size] = '\0';
 828                         return CString(str);
 829 gs.keenan     1.110 #else
 830 mike          1.112     Uint32 n = 3 * _rep->size;
 831                         char* str = (char*)operator new(n + 1);
 832                         size_t size = _copyToUTF8(str, _rep->data, _rep->size);
 833                         str[size] = '\0';
 834                         return CString(str);
 835 gs.keenan     1.110 #endif
 836 kumpf         1.39  }
 837                     
 838 mike          1.112 String& String::append(const Char16* str, Uint32 n)
 839 kumpf         1.39  {
 840 mike          1.112     _checkNullPointer(str);
 841                     
 842                         size_t oldSize = _rep->size;
 843                         size_t newSize = oldSize + n;
 844                         _reserve(_rep, newSize);
 845                         _copy(_rep->data + oldSize, (Uint16*)str, n);
 846                         _rep->size = newSize;
 847                         _rep->data[newSize] = '\0';
 848                     
 849                         return *this;
 850 kumpf         1.39  }
 851                     
 852 mike          1.112 String& String::append(const String& str)
 853 mike          1.27  {
 854 mike          1.112     return append((Char16*)str._rep->data, str._rep->size);
 855 mike          1.27  }
 856                     
 857 mike          1.112 String& String::append(const char* str, Uint32 size)
 858 mike          1.27  {
 859 mike          1.112     _checkNullPointer(str);
 860                     
 861                         size_t oldSize = _rep->size;
 862                         size_t cap = oldSize + size;
 863                     
 864                         _reserve(_rep, cap);
 865                         size_t utf8_error_index;
 866                         size_t tmp = _convert(
 867                             (Uint16*)_rep->data + oldSize, str, size, utf8_error_index);
 868                     
 869                     #ifndef PEGASUS_STRING_NO_THROW
 870                         if (tmp ==  size_t(-1))
 871                         {
 872                             StringRep::free(_rep);
 873                             _rep = &StringRep::_emptyRep;
 874                             _StringThrowBadUTF8(utf8_error_index);
 875                         }
 876                     #endif
 877 mike          1.27  
 878 mike          1.112     _rep->size += tmp;
 879                         _rep->data[_rep->size] = '\0';
 880 mike          1.27  
 881 kumpf         1.39      return *this;
 882                     }
 883                     
 884 mike          1.112 void String::remove(Uint32 index, Uint32 n)
 885 mike          1.27  {
 886 mike          1.112     if (n == PEG_NOT_FOUND)
 887                             n = _rep->size - index;
 888                     
 889                         _checkBounds(index + n, _rep->size);
 890                     
 891 mike          1.114     if (_rep->refs.get() != 1)
 892 mike          1.112         _rep = StringRep::copyOnWrite(_rep);
 893 mike          1.27  
 894 mike          1.112     assert(index + n <= _rep->size);
 895 mike          1.27  
 896 mike          1.112     size_t rem = _rep->size - (index + n);
 897                         Uint16* data = _rep->data;
 898 mike          1.27  
 899 mike          1.112     if (rem)
 900                             memmove(data + index, data + index + n, rem * sizeof(Uint16));
 901 mike          1.27  
 902 mike          1.112     _rep->size -= n;
 903                         data[_rep->size] = '\0';
 904 mike          1.27  }
 905                     
 906 mike          1.112 String String::subString(Uint32 index, Uint32 n) const
 907 mike          1.27  {
 908 mike          1.112     // Note: this implementation is very permissive but used for
 909                         // backwards compatibility.
 910                     
 911                         if (index < _rep->size)
 912 mike          1.27      {
 913 mike          1.112         if (n == PEG_NOT_FOUND || n > _rep->size - index)
 914                                 n = _rep->size - index;
 915 mike          1.27  
 916 mike          1.112         return String((Char16*)_rep->data + index, n);
 917 mike          1.27      }
 918 david.dillard 1.105 
 919                         return String();
 920 mike          1.27  }
 921                     
 922                     Uint32 String::find(Char16 c) const
 923                     {
 924 mike          1.112     Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
 925 mike          1.27  
 926 mike          1.112     if (p)
 927                             return p - _rep->data;
 928 mike          1.27  
 929                         return PEG_NOT_FOUND;
 930                     }
 931                     
 932 kumpf         1.53  Uint32 String::find(Uint32 index, Char16 c) const
 933 mike          1.30  {
 934 mike          1.112     _checkBounds(index, _rep->size);
 935                     
 936                         if (index >= _rep->size)
 937                             return PEG_NOT_FOUND;
 938                     
 939                         Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
 940 mike          1.30  
 941 mike          1.112     if (p)
 942                             return p - _rep->data;
 943 mike          1.30  
 944                         return PEG_NOT_FOUND;
 945                     }
 946                     
 947 mike          1.112 Uint32 StringFindAux(
 948                         const StringRep* _rep, const Char16* s, Uint32 n)
 949 mike          1.27  {
 950 mike          1.112     _checkNullPointer(s);
 951 mike          1.27  
 952 mike          1.112     const Uint16* data = _rep->data;
 953                         size_t rem = _rep->size;
 954                     
 955                         while (n <= rem)
 956 mike          1.30      {
 957 mike          1.112         Uint16* p = (Uint16*)_find(data, rem, s[0]);
 958                     
 959                             if (!p)
 960                                 break;
 961 mike          1.30  
 962 mike          1.112         if (memcmp(p, s, n * sizeof(Uint16)) == 0)
 963                                 return p - _rep->data;
 964 david.dillard 1.105 
 965 mike          1.112         p++;
 966                             rem -= p - data;
 967                             data = p;
 968 mike          1.27      }
 969 mike          1.112 
 970 mike          1.27      return PEG_NOT_FOUND;
 971                     }
 972                     
 973 mike          1.112 Uint32 String::find(const char* s) const
 974                     {
 975                         _checkNullPointer(s);
 976                     
 977                         // Note: could optimize away creation of temporary, but this is rarely
 978                         // called.
 979                         return find(String(s));
 980                     }
 981                     
 982 mike          1.27  Uint32 String::reverseFind(Char16 c) const
 983                     {
 984 mike          1.112     Uint16 x = c;
 985                         Uint16* p = _rep->data;
 986                         Uint16* q = _rep->data + _rep->size;
 987 mike          1.27  
 988 mike          1.112     while (q != p)
 989 mike          1.27      {
 990 mike          1.112         if (*--q == x)
 991                                 return q - p;
 992 mike          1.27      }
 993                     
 994                         return PEG_NOT_FOUND;
 995                     }
 996                     
 997                     void String::toLower()
 998                     {
 999 david         1.69  #ifdef PEGASUS_HAS_ICU
1000 mike          1.112 
1001 yi.zhou       1.108     if (InitializeICU::initICUSuccessful())
1002 david         1.90      {
1003 mike          1.114         if (_rep->refs.get() != 1)
1004 mike          1.112             _rep = StringRep::copyOnWrite(_rep);
1005                     
1006 yi.zhou       1.108         // This will do a locale-insensitive, but context-sensitive convert.
1007 mike          1.112         // Since context-sensitive casing looks at adjacent chars, this 
1008                             // prevents optimizations where the us-ascii is converted before 
1009                             // calling ICU.
1010 yi.zhou       1.108         // The string may shrink or expand after the convert.
1011                     
1012 mike          1.112         //// First calculate size of resulting string. u_strToLower() returns
1013                             //// only the size when zero is passed as the destination size argument.
1014                     
1015 yi.zhou       1.108         UErrorCode err = U_ZERO_ERROR;
1016                     
1017 mike          1.112         int32_t newSize = u_strToLower(
1018                                 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1019                             
1020                             err = U_ZERO_ERROR;
1021                     
1022                             //// Reserve enough space for the result.
1023                     
1024                             if ((Uint32)newSize > _rep->cap)
1025                                 _reserve(_rep, newSize);
1026                     
1027                             //// Perform the conversion (overlapping buffers are allowed).
1028 chuck         1.99  
1029 mike          1.112         u_strToLower((UChar*)_rep->data, newSize,
1030                                 (UChar*)_rep->data, _rep->size, NULL, &err);
1031 yi.zhou       1.108 
1032 mike          1.112         _rep->size = newSize;
1033                             return;
1034 david         1.90      }
1035 mike          1.112 
1036                     #endif /* PEGASUS_HAS_ICU */
1037                     
1038 mike          1.114     if (_rep->refs.get() != 1)
1039 mike          1.112         _rep = StringRep::copyOnWrite(_rep);
1040                     
1041                         Uint16* p = _rep->data;
1042                         size_t n = _rep->size;
1043                     
1044                         for (; n--; p++)
1045 david         1.90      {
1046 mike          1.112         if (!(*p & 0xFF00))
1047                                 *p = _toLower(*p);
1048 mike          1.27      }
1049 kumpf         1.39  }
1050                     
1051 chuck         1.99  void String::toUpper()
1052 david         1.90  {
1053                     #ifdef PEGASUS_HAS_ICU
1054 mike          1.112 
1055 yi.zhou       1.108     if (InitializeICU::initICUSuccessful())
1056 chuck         1.99      {
1057 mike          1.114         if (_rep->refs.get() != 1)
1058 mike          1.112             _rep = StringRep::copyOnWrite(_rep);
1059                     
1060 yi.zhou       1.108         // This will do a locale-insensitive, but context-sensitive convert.
1061 mike          1.112         // Since context-sensitive casing looks at adjacent chars, this 
1062                             // prevents optimizations where the us-ascii is converted before 
1063                             // calling ICU.
1064 yi.zhou       1.108         // The string may shrink or expand after the convert.
1065                     
1066 mike          1.112         //// First calculate size of resulting string. u_strToUpper() returns
1067                             //// only the size when zero is passed as the destination size argument.
1068                     
1069 yi.zhou       1.108         UErrorCode err = U_ZERO_ERROR;
1070                     
1071 mike          1.112         int32_t newSize = u_strToUpper(
1072                                 NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
1073                     
1074                             err = U_ZERO_ERROR;
1075                     
1076                             //// Reserve enough space for the result.
1077                     
1078                             if ((Uint32)newSize > _rep->cap)
1079                                 _reserve(_rep, newSize);
1080                     
1081                             //// Perform the conversion (overlapping buffers are allowed).
1082                     
1083                             u_strToUpper((UChar*)_rep->data, newSize,
1084                                 (UChar*)_rep->data, _rep->size, NULL, &err);
1085 chuck         1.99  
1086 mike          1.112         _rep->size = newSize;
1087 yi.zhou       1.108 
1088 mike          1.112         return;
1089 david         1.91      }
1090 mike          1.112 
1091                     #endif /* PEGASUS_HAS_ICU */
1092                     
1093 mike          1.114     if (_rep->refs.get() != 1)
1094 mike          1.112         _rep = StringRep::copyOnWrite(_rep);
1095                     
1096                         Uint16* p = _rep->data;
1097                         size_t n = _rep->size;
1098                     
1099                         for (; n--; p++)
1100                             *p = _toUpper(*p);
1101 david         1.90  }
1102                     
1103 kumpf         1.43  int String::compare(const String& s1, const String& s2, Uint32 n)
1104 kumpf         1.39  {
1105 mike          1.112     assert(n <= s1._rep->size);
1106                         assert(n <= s2._rep->size);
1107 mike          1.27  
1108 mike          1.112     // Ignoring error in which n is greater than s1.size() or s2.size()
1109                         return _compare(s1._rep->data, s2._rep->data, n);
1110 mike          1.27  }
1111                     
1112 kumpf         1.43  int String::compare(const String& s1, const String& s2)
1113 mike          1.30  {
1114 mike          1.112     return _compare(s1._rep->data, s2._rep->data);
1115                     }
1116 kumpf         1.43  
1117 mike          1.112 int String::compare(const String& s1, const char* s2)
1118                     {
1119                         _checkNullPointer(s2);
1120 mike          1.30  
1121 mike          1.112 #ifdef PEGASUS_STRING_NO_UTF8
1122                         return _compareNoUTF8(s1._rep->data, s2);
1123                     #else
1124                         // ATTN: optimize this!
1125                         return String::compare(s1, String(s2));
1126                     #endif
1127 mike          1.30  }
1128                     
1129 mike          1.112 int String::compareNoCase(const String& str1, const String& str2)
1130 kumpf         1.40  {
1131 david         1.69  #ifdef PEGASUS_HAS_ICU
1132 mike          1.112 
1133 yi.zhou       1.108     if (InitializeICU::initICUSuccessful())
1134                         {
1135 mike          1.112         return  u_strcasecmp(
1136                                 str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
1137 yi.zhou       1.108     }
1138 kumpf         1.40  
1139 mike          1.112 #endif /* PEGASUS_HAS_ICU */
1140                     
1141                         const Uint16* s1 = str1._rep->data;
1142                         const Uint16* s2 = str2._rep->data;
1143                     
1144                         while (*s1 && *s2)
1145 kumpf         1.40      {
1146 mike          1.112         int r = _toLower(*s1++) - _toLower(*s2++);
1147 kumpf         1.40  
1148 david.dillard 1.105         if (r)
1149                                 return r;
1150 kumpf         1.40      }
1151                     
1152 mike          1.112     if (*s2)
1153 david.dillard 1.105         return -1;
1154 mike          1.112     else if (*s1)
1155 david.dillard 1.105         return 1;
1156 kumpf         1.40  
1157                         return 0;
1158                     }
1159                     
1160 mike          1.112 Boolean StringEqualNoCase(const String& s1, const String& s2)
1161 mike          1.27  {
1162 mike          1.112 #ifdef PEGASUS_HAS_ICU
1163                     
1164                         return String::compareNoCase(s1, s2) == 0;
1165                     
1166                     #else /* PEGASUS_HAS_ICU */
1167 mike          1.27  
1168 mike          1.112     // The following employs loop unrolling for efficiency. Please do not
1169                         // eliminate.
1170 kumpf         1.39  
1171 mike          1.112     Uint16* p = (Uint16*)s1.getChar16Data();
1172                         Uint16* q = (Uint16*)s2.getChar16Data();
1173                         Uint32 n = s2.size();
1174                     
1175                         while (n >= 8)
1176                         {
1177                             if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1178                                 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1179                                 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1180                                 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))) ||
1181                                 ((p[4] - q[4]) && (_toUpper(p[4]) - _toUpper(q[4]))) ||
1182                                 ((p[5] - q[5]) && (_toUpper(p[5]) - _toUpper(q[5]))) ||
1183                                 ((p[6] - q[6]) && (_toUpper(p[6]) - _toUpper(q[6]))) ||
1184                                 ((p[7] - q[7]) && (_toUpper(p[7]) - _toUpper(q[7]))))
1185                             {
1186                                 return false;
1187                             }
1188 kumpf         1.39  
1189 mike          1.112         n -= 8;
1190                             p += 8;
1191                             q += 8;
1192                         }
1193 mike          1.27  
1194 mike          1.112     while (n >= 4)
1195 kumpf         1.39      {
1196 mike          1.112         if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))) ||
1197                                 ((p[1] - q[1]) && (_toUpper(p[1]) - _toUpper(q[1]))) ||
1198                                 ((p[2] - q[2]) && (_toUpper(p[2]) - _toUpper(q[2]))) ||
1199                                 ((p[3] - q[3]) && (_toUpper(p[3]) - _toUpper(q[3]))))
1200 david.dillard 1.105         {
1201 mike          1.112             return false;
1202 david.dillard 1.105         }
1203 mike          1.112 
1204                             n -= 4;
1205                             p += 4;
1206                             q += 4;
1207                         }
1208                     
1209                         while (n--)
1210                         {
1211                             if (((p[0] - q[0]) && (_toUpper(p[0]) - _toUpper(q[0]))))
1212 david.dillard 1.105             return false;
1213 mike          1.112 
1214                             p++;
1215                             q++;
1216 kumpf         1.39      }
1217 mike          1.28  
1218 kumpf         1.39      return true;
1219 mike          1.112 
1220                     #endif /* PEGASUS_HAS_ICU */
1221 david         1.69  }
1222                     
1223 mike          1.112 Boolean String::equalNoCase(const String& s1, const char* s2)
1224 david         1.69  {
1225 mike          1.112     _checkNullPointer(s2);
1226 david         1.69  
1227 mike          1.112 #if defined(PEGASUS_HAS_ICU)
1228 david         1.69  
1229 mike          1.112     return String::equalNoCase(s1, String(s2));
1230 david         1.69  
1231 mike          1.112 #elif defined(PEGASUS_STRING_NO_UTF8)
1232 david         1.69  
1233 mike          1.112     const Uint16* p1 = (Uint16*)s1._rep->data;
1234                         const char* p2 = s2;
1235                         size_t n = s1._rep->size;
1236 david.dillard 1.105 
1237 mike          1.112     while (n--)
1238                         {
1239                             if (!*p2)
1240                                 return false;
1241 david         1.71  
1242 mike          1.112         if (_toUpper(*p1++) != _toUpperTable[int(*p2++)])
1243                                 return false;
1244                         }
1245 kumpf         1.42  
1246 mike          1.112     if (*p2)
1247                             return false;
1248                         
1249                         return true;
1250 karl          1.36  
1251 mike          1.112 #else /* PEGASUS_HAS_ICU */
1252 david.dillard 1.105 
1253 mike          1.112     // ATTN: optimize this!
1254                         return String::equalNoCase(s1, String(s2));
1255 david.dillard 1.105 
1256 mike          1.112 #endif /* PEGASUS_HAS_ICU */
1257                     }
1258 chuck         1.78  
1259 mike          1.112 Boolean String::equal(const String& s1, const String& s2)
1260 karl          1.36  {
1261 mike          1.112     return s1._rep->size == s2._rep->size && memcmp(s1._rep->data, 
1262                             s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
1263 karl          1.36  }
1264                     
1265 mike          1.112 Boolean String::equal(const String& s1, const char* s2)
1266                     {
1267                     #ifdef PEGASUS_STRING_NO_UTF8
1268 kumpf         1.35  
1269 mike          1.112     _checkNullPointer(s2);
1270 kumpf         1.39  
1271 mike          1.112     const Uint16* p = (Uint16*)s1._rep->data;
1272                         const char* q = s2;
1273 kumpf         1.39  
1274 mike          1.112     while (*p && *q)
1275                         {
1276                             if (*p++ != Uint16(*q++))
1277                                 return false;
1278                         }
1279 kumpf         1.39  
1280 mike          1.112     return !(*p || *q);
1281 kumpf         1.39  
1282 mike          1.112 #else /* PEGASUS_STRING_NO_UTF8 */
1283 kumpf         1.39  
1284 mike          1.112     return String::equal(s1, String(s2));
1285 kumpf         1.39  
1286 mike          1.112 #endif /* PEGASUS_STRING_NO_UTF8 */
1287 kumpf         1.39  }
1288                     
1289 kumpf         1.47  PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1290 kumpf         1.39  {
1291 mike          1.112 #if defined(PEGASUS_OS_OS400)
1292 david         1.72  
1293 david         1.93      CString cstr = str.getCString();
1294 david         1.69      const char* utf8str = cstr;
1295 mike          1.112     os << utf8str;
1296                         return os;
1297                     #else    
1298 david         1.69  
1299 mike          1.112 #if defined(PEGASUS_HAS_ICU)
1300 david         1.69  
1301 yi.zhou       1.108     if (InitializeICU::initICUSuccessful())
1302                         {
1303 david.dillard 1.105         char *buf = NULL;
1304                             const int size = str.size() * 6;
1305 mike          1.112         UnicodeString UniStr(
1306                                 (const UChar *)str.getChar16Data(), (int32_t)str.size());
1307 david.dillard 1.105         Uint32 bufsize = UniStr.extract(0,size,buf);
1308                             buf = new char[bufsize+1];
1309                             UniStr.extract(0,bufsize,buf);
1310                             os << buf;
1311                             os.flush();
1312                             delete [] buf;
1313 mike          1.112         return os;       
1314 yi.zhou       1.108     }
1315 mike          1.112 
1316                     #endif  // PEGASUS_HAS_ICU 
1317                     
1318                         for (Uint32 i = 0, n = str.size(); i < n; i++)
1319 yi.zhou       1.108     {
1320 mike          1.112         Uint16 code = str[i];
1321 david.dillard 1.105 
1322 mike          1.112         if (code > 0 && !(code & 0xFF00))
1323                                     os << char(code);
1324                             else
1325                                 {
1326                                 // Print in hex format:
1327                                 char buffer[8];
1328                                 sprintf(buffer, "\\x%04X", code);
1329                                 os << buffer;
1330 david.dillard 1.105         }
1331 yi.zhou       1.108     }
1332 kumpf         1.39  
1333                         return os;
1334 mike          1.112 #endif // PEGASUS_OS_OS400
1335 kumpf         1.39  }
1336                     
1337 mike          1.112 void StringAppendCharAux(StringRep*& _rep)
1338 kumpf         1.39  {
1339 mike          1.112     StringRep* tmp;
1340                     
1341                         if (_rep->cap)
1342                         {
1343                             tmp = StringRep::alloc(2 * _rep->cap);
1344                             tmp->size = _rep->size;
1345                             _copy(tmp->data, _rep->data, _rep->size);
1346                         }
1347                         else
1348                         {
1349                             tmp = StringRep::alloc(8);
1350                             tmp->size = 0;
1351                         }
1352                     
1353                         StringRep::unref(_rep);
1354                         _rep = tmp;
1355 kumpf         1.39  }
1356                     
1357 mike          1.112 PEGASUS_NAMESPACE_END
1358                     
1359                     /*
1360                     ================================================================================
1361                     
1362                     String optimizations:
1363                     
1364                         1.  Added mechanism allowing certain functions to be inlined only when
1365                             used by internal Pegasus modules. External modules (i.e., providers)
1366                             link to a non-inline version, which allows for binary compatibility.
1367                     
1368                         2.  Implemented copy-on-write with atomic increment/decrement. This
1369                             yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1370                             for the 'ni1000' benchmark.
1371                     
1372                         3.  Employed loop unrolling in several places. For example, see:
1373                     
1374                                 static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1375                     
1376                         4.  Used the "empty-rep" optimization (described in whitepaper from the
1377                             GCC Developers Summit). This reduced default construction to a simple
1378 mike          1.112         pointer assignment.
1379                     
1380                                 inline String::String() : _rep(&_emptyRep) { }
1381                     
1382                         5.  Implemented Uint16 versions of toupper() and tolower() using tables.
1383                             For example:
1384                     
1385                                 static const char _upper[] =
1386                                 {
1387                                     0,1,2,...255
1388                                 };
1389                     
1390                                 inline Uint16 _toUpper(Uint16 x)
1391                                 {
1392                                     return (x & 0xFF00) ? x : _upper[x];
1393                                 }
1394                     
1395                             This outperforms the system implementation by avoiding an anding 
1396                             operation.
1397                     
1398                         6.  Implemented char* version of the following member functions to 
1399 mike          1.112         eliminate unecessary creation of anonymous string objects 
1400                             (temporaries).
1401                     
1402                                 String(const String& s1, const char* s2);
1403                                 String(const char* s1, const String& s2);
1404                                 String& String::operator=(const char* str);
1405                                 Uint32 String::find(const char* s) const;
1406                                 bool String::equal(const String& s1, const char* s2);
1407                                 static int String::compare(const String& s1, const char* s2);
1408                                 String& String::append(const char* str);
1409                                 String& String::append(const char* str, Uint32 size);
1410                                 static bool String::equalNoCase(const String& s1, const char* s2);
1411                                 String& operator=(const char* str)
1412                                 String& String::assign(const char* str)
1413                                 String& String::append(const char* str)
1414                                 Boolean operator==(const String& s1, const char* s2)
1415                                 Boolean operator==(const char* s1, const String& s2)
1416                                 Boolean operator!=(const String& s1, const char* s2)
1417                                 Boolean operator!=(const char* s1, const String& s2)
1418                                 Boolean operator<(const String& s1, const char* s2)
1419                                 Boolean operator<(const char* s1, const String& s2)
1420 mike          1.112             Boolean operator>(const String& s1, const char* s2)
1421                                 Boolean operator>(const char* s1, const String& s2)
1422                                 Boolean operator<=(const String& s1, const char* s2)
1423                                 Boolean operator<=(const char* s1, const String& s2)
1424                                 Boolean operator>=(const String& s1, const char* s2)
1425                                 Boolean operator>=(const char* s1, const String& s2)
1426                                 String operator+(const String& s1, const char* s2)
1427                                 String operator+(const char* s1, const String& s2)
1428                     
1429                         7.  Optimized _roundUpToPow2(), used in rounding the capacity to the next 
1430                             power of two (algorithm from the book "Hacker's Delight").
1431                     
1432                                 static Uint32 _roundUpToPow2(Uint32 x)
1433                                 {
1434                                     if (x < 8)
1435                                         return 8;
1436                     
1437                                     x--;
1438                                     x |= (x >> 1);
1439                                     x |= (x >> 2);
1440                                     x |= (x >> 4);
1441 mike          1.112                 x |= (x >> 8);
1442                                     x |= (x >> 16);
1443                                     x++;
1444                     
1445                                     return x;
1446                                 }
1447                     
1448                         8.  Implemented "concatenating constructors" to eliminate temporaries
1449                             created by operator+(). This scheme employs the "return-value 
1450                             optimization" described by Stan Lippman.
1451                     
1452                                 inline String operator+(const String& s1, const String& s2)
1453                                 {
1454                                     return String(s1, s2, 0);
1455                                 }
1456                     
1457                         9.  Experimented to find the optimial initial size for a short string.
1458                             Eight seems to offer the best tradeoff between space and time.
1459                     
1460                         10. Inlined all members of the Char16 class.
1461                     
1462 mike          1.112     11. Used Uint16 internally in the String class. This showed no improvememnt
1463                             since Char16 was already fully inlined and was essentially reduced to
1464                             Uint16 in any case.
1465                     
1466                         12. Implemented conditional logic (#if) allowing error checking logic to
1467                             be excluded to better performance. Examples include bounds checking 
1468                             and null-pointer checking.
1469                     
1470                         13. Used memcpy() and memcmp() where possible. These are implemented using
1471                             the rep family of intructions under Intel and are much faster.
1472                     
1473                         14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8 
1474                             copy routine overhead.
1475                     
1476                         15. Added ASCII7 form of the constructor and assign().
1477                     
1478                                 String s("hello world", String::ASCII7);
1479                     
1480                                 s.assignASCII7("hello world");
1481                     
1482                             This avoids slower UTF8 processing when not needed.
1483 mike          1.112 
1484                     ================================================================================
1485                     
1486                     TO-DO:
1487                     
1488                         (+) [DONE] Use PEGASUS_USE_EXPERIMENTAL_INTERFACES
1489                     
1490                         (+) [DONE] Submit BUG-2754 (Windows buffer limit).
1491                     
1492                         (+) [DONE] Eliminate char versions of find() and append().
1493                     
1494                         (+) [DONE] Remove PEGASUS_MAX_PRINTABLE_CHARACTER from Config.h
1495                     
1496                         (+) [DONE] Change _next_pow_2() to _roundUpToPow2().
1497                     
1498                         (+) [DONE] Change '99' to '2' in StringRep constructor (comment as well).
1499                     
1500                         (+) [DONE] Comment StringRep allocation layout.
1501                     
1502                         (+) [DONE] Conceal private inline functions.
1503                     
1504 mike          1.112     (+) [DONE] Shorten inclusion of StringInline.h in String.h.
1505                     
1506                         (+) [DONE] Change USE_INTERNAL_INLINE TO DISABLE_INTERNAL_INLINE or get
1507                             rid of altogether.
1508                     
1509                         (+) [DONE] useCamelNotationOnAllFunctionNames.
1510                     
1511                         (+) [DONE] Check for overlow condition in StringRep::alloc().
1512                     
1513                         (+) [DONE] Remove tabs (used vim ":set expandtab" and ":retab").
1514                     
1515                         (+) [DONE] Fix throw-related memory leak.
1516                     
1517                         (+) [DONE] Look at PEP223 for coding security guidelines.
1518                     
1519                         (+) [DONE] Use old AtomicInt for now (new AtomicInt part of bug #4250).
1520 kumpf         1.39  
1521 mike          1.112     (+) [DONE] Removed appendASCII() and the ASCII form of the constructor.
1522 kumpf         1.39  
1523 mike          1.112     (+) DOC++ String.h - will open new bug?
1524 kumpf         1.39  
1525 mike          1.112     (+) Added PEGASUS_DISABLE_INTERNAL_INLINES macro (to permit suppression
1526                     	on certain platforms).
1527 kumpf         1.39  
1528 mike          1.112 ================================================================================
1529                     */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2