(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.98 //%2005////////////////////////////////////////////////////////////////////////
   2 mike  1.27 //
   3 karl  1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.85 // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 karl  1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
  11 mike  1.27 //
  12            // Permission is hereby granted, free of charge, to any person obtaining a copy
  13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
  14            // deal in the Software without restriction, including without limitation the
  15            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  16 mike  1.27 // sell copies of the Software, and to permit persons to whom the Software is
  17            // furnished to do so, subject to the following conditions:
  18 david.dillard 1.105 //
  19 kumpf         1.41  // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  20 mike          1.27  // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  21                     // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  22 kumpf         1.41  // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  23                     // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  24                     // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  25 mike          1.27  // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26                     // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27                     //
  28                     //==============================================================================
  29                     //
  30                     // Author: Mike Brasher (mbrasher@bmc.com)
  31                     //
  32 mike          1.111.6.1 // Modified By: 
  33                         //     Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
  34                         //     Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
  35                         //     David Dillard, VERITAS Software Corp. (david.dillard@veritas.com)
  36                         //     Mike Brasher (mike-brasher@austin.rr.com)
  37 mike          1.27      //
  38                         //%/////////////////////////////////////////////////////////////////////////////
  39                         
  40 mike          1.111.6.1 #include <cassert>
  41 kumpf         1.48      #include "InternalException.h"
  42 david         1.69      #include "CommonUTF.h"
  43 mike          1.111.6.1 #include "MessageLoader.h"
  44 mike          1.111.6.2 #include "StringRep.h"
  45 david         1.69      
  46                         #ifdef PEGASUS_HAS_ICU
  47 chuck         1.99      #include <unicode/ustring.h>
  48                         #include <unicode/uchar.h>
  49 david         1.69      #endif
  50                         
  51 mike          1.27      PEGASUS_NAMESPACE_BEGIN
  52                         
  53 mike          1.111.6.1 //==============================================================================
  54                         //
  55                         // Compile-time macros (undefined by default).
  56                         //
  57                         //     PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
  58                         //      
  59                         //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
  60                         //
  61                         //     PEGASUS_USE_INTERNAL_INLINES -- enables internal inlining feature.
  62                         // 
  63                         //==============================================================================
  64                         
  65                         //==============================================================================
  66 kumpf         1.39      //
  67 mike          1.111.6.1 // File-scope definitions:
  68 kumpf         1.54      //
  69 mike          1.111.6.1 //==============================================================================
  70                         
  71                         const Uint8 _to_upper_tbl[256] = 
  72                         {
  73                             0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
  74                             0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
  75                             0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
  76                             0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
  77                             0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
  78                             0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
  79                             0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
  80                             0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
  81                             0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  82                             0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  83                             0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  84                             0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
  85                             0x60,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  86                             0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  87                             0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  88                             0x58,0x59,0x5A,0x7B,0x7C,0x7D,0x7E,0x7F,
  89                             0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
  90 mike          1.111.6.1     0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
  91                             0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
  92                             0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
  93                             0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
  94                             0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
  95                             0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
  96                             0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
  97                             0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
  98                             0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
  99                             0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
 100                             0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
 101                             0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 102                             0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 103                             0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 104                             0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 105                         };
 106                         
 107                         const Uint8 _to_lower_tbl[256] = 
 108                         {
 109                             0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
 110                             0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
 111 mike          1.111.6.1     0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
 112                             0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
 113                             0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
 114                             0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
 115                             0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
 116                             0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
 117                             0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 118                             0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 119                             0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 120                             0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,
 121                             0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
 122                             0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
 123                             0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
 124                             0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
 125                             0x80,0x81,0x82,0x83,0x84,0x85,0x86,0x87,
 126                             0x88,0x89,0x8A,0x8B,0x8C,0x8D,0x8E,0x8F,
 127                             0x90,0x91,0x92,0x93,0x94,0x95,0x96,0x97,
 128                             0x98,0x99,0x9A,0x9B,0x9C,0x9D,0x9E,0x9F,
 129                             0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
 130                             0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
 131                             0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
 132 mike          1.111.6.1     0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
 133                             0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
 134                             0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
 135                             0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
 136                             0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
 137                             0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
 138                             0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
 139                             0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
 140                             0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF,
 141                         };
 142 kumpf         1.54      
 143 mike          1.111.6.1 // Converts 16-bit characters to upper case.
 144                         inline Uint16 _to_upper(Uint16 x)
 145 kumpf         1.54      {
 146 mike          1.111.6.1     return (x & 0xFF00) ? x : _to_upper_tbl[x];
 147 kumpf         1.54      }
 148                         
 149 mike          1.111.6.1 // Converts 16-bit characters to lower case.
 150                         inline Uint16 _to_lower(Uint16 x)
 151 kumpf         1.54      {
 152 mike          1.111.6.1     return (x & 0xFF00) ? x : _to_lower_tbl[x];
 153                         }
 154 kumpf         1.82      
 155 mike          1.111.6.1 // Rounds x to the next power of two (or just returns 8 if x < 8).
 156                         static Uint32 _next_pow_2(Uint32 x)
 157                         {
 158                             if (x < 8)
 159                         	return 8;
 160                         
 161                             x--;
 162                             x |= (x >> 1);
 163                             x |= (x >> 2);
 164                             x |= (x >> 4);
 165                             x |= (x >> 8);
 166                             x |= (x >> 16);
 167                             x++;
 168                         
 169                             return x;
 170                         }
 171                         
 172                         template<class P, class Q>
 173                         static void _copy(P* p, const Q* q, size_t n)
 174                         {
 175                             // Use loop unrolling.
 176 mike          1.111.6.1 
 177                             while (n >= 8)
 178 kumpf         1.82          {
 179 mike          1.111.6.1 	p[0] = q[0];
 180                         	p[1] = q[1];
 181                         	p[2] = q[2];
 182                         	p[3] = q[3];
 183                         	p[4] = q[4];
 184                         	p[5] = q[5];
 185                         	p[6] = q[6];
 186                         	p[7] = q[7];
 187                         	p += 8;
 188                         	q += 8;
 189                         	n -= 8;
 190 kumpf         1.82          }
 191 mike          1.111.6.1 
 192                             while (n >= 4)
 193                             {
 194                         	p[0] = q[0];
 195                         	p[1] = q[1];
 196                         	p[2] = q[2];
 197                         	p[3] = q[3];
 198                         	p += 4;
 199                         	q += 4;
 200                         	n -= 4;
 201                             }
 202                         
 203                             while (n--)
 204                         	*p++ = *q++;
 205 kumpf         1.54      }
 206                         
 207 mike          1.111.6.1 static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
 208 kumpf         1.54      {
 209 mike          1.111.6.1     while (n >= 4)
 210                             {
 211                         	if (s[0] == c)
 212                         	    return (Uint16*)s;
 213                         	if (s[1] == c)
 214                         	    return (Uint16*)&s[1];
 215                         	if (s[2] == c)
 216                         	    return (Uint16*)&s[2];
 217                         	if (s[3] == c)
 218                         	    return (Uint16*)&s[3];
 219                         
 220                         	n -= 4;
 221                         	s += 4;
 222                             }
 223                         
 224                             if (n)
 225                             {
 226                         	if (*s == c)
 227                         	    return (Uint16*)s;
 228                         	s++;
 229                         	n--;
 230 mike          1.111.6.1     }
 231                         
 232                             if (n)
 233                             {
 234                         	if (*s == c)
 235                         	    return (Uint16*)s;
 236                         	s++;
 237                         	n--;
 238                             }
 239                         
 240                             if (n && *s == c)
 241                         	return (Uint16*)s;
 242                         
 243                             // Not found!
 244                             return 0;
 245 kumpf         1.54      }
 246                         
 247 mike          1.111.6.1 static int _compare(const Uint16* s1, const Uint16* s2)
 248 kumpf         1.54      {
 249 mike          1.111.6.1     while (*s1 && *s2)
 250 kumpf         1.82          {
 251 mike          1.111.6.1         int r = *s1++ - *s2++;
 252                         
 253                                 if (r)
 254                                     return r;
 255 kumpf         1.82          }
 256 mike          1.111.6.1 
 257                             if (*s2)
 258                                 return -1;
 259                             else if (*s1)
 260                                 return 1;
 261                         
 262                             return 0;
 263 kumpf         1.54      }
 264                         
 265 mike          1.111.6.1 static int _compare_no_utf8(const Uint16* s1, const char* s2)
 266 kumpf         1.56      {
 267 mike          1.111.6.1     Uint16 c1;
 268                             Uint16 c2;
 269                         
 270                             do
 271 kumpf         1.81          {
 272 mike          1.111.6.1 	c1 = *s1++;
 273                         	c2 = *s2++;
 274                         
 275                         	if (c1 == 0)
 276                         	    return c1 - c2;
 277 kumpf         1.81          }
 278 mike          1.111.6.1     while (c1 == c2);
 279                         
 280                             return c1 - c2;
 281 kumpf         1.56      }
 282                         
 283 mike          1.111.6.1 static int _compare(const Uint16* s1, const Uint16* s2, size_t n)
 284 kumpf         1.54      {
 285 mike          1.111.6.1     // This should only be called when s1 and s2 have the same length.
 286                         
 287                             while (n-- && (*s1++ - *s2++) == 0)
 288                         	;
 289                         
 290                             // 
 291                         
 292                             return s1[-1] - s2[-1];
 293 kumpf         1.54      }
 294                         
 295 mike          1.111.6.1 static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
 296                         {
 297                             memcpy(s1, s2, n * sizeof(Uint16));
 298                         }
 299 kumpf         1.39      
 300 mike          1.111.6.1 void String_throw_out_of_bounds()
 301                         {
 302                             throw IndexOutOfBoundsException();
 303                         }
 304 mike          1.27      
 305 mike          1.111.6.1 #ifdef PEGASUS_STRING_NO_THROW
 306                         # define _check_null_pointer(ARG) /* empty */
 307                         #else
 308                         template<class T>
 309                         inline void _check_null_pointer(const T* ptr)
 310 mike          1.27      {
 311 mike          1.111.6.1     if (!ptr)
 312                         	throw NullPointer();
 313                         }
 314                         #endif
 315                         
 316                         static size_t _copy_from_utf8(Uint16* dest, const char* src, size_t n)
 317                         {
 318                             Uint16* p = dest;
 319                             const Uint8* q = (const Uint8*)src;
 320 mike          1.27      
 321 mike          1.111.6.1     // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
 322                             // Use loop-unrolling.
 323 mike          1.27      
 324 mike          1.111.6.1     while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
 325                             {
 326                         	p[0] = q[0];
 327                         	p[1] = q[1];
 328                         	p[2] = q[2];
 329                         	p[3] = q[3];
 330                         	p[4] = q[4];
 331                         	p[5] = q[5];
 332                         	p[6] = q[6];
 333                         	p[7] = q[7];
 334                         	p += 8;
 335                         	q += 8;
 336                         	n -= 8;
 337                             }
 338                         
 339                             while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
 340                             {
 341                         	p[0] = q[0];
 342                         	p[1] = q[1];
 343                         	p[2] = q[2];
 344                         	p[3] = q[3];
 345 mike          1.111.6.1 	p += 4;
 346                         	q += 4;
 347                         	n -= 4;
 348                             }
 349                         
 350                             switch (n)
 351                             {
 352                         	case 0:
 353                         	    return p - dest;
 354                         	case 1:
 355                         	    if (q[0] < 128)
 356                         	    {
 357                         		p[0] = q[0];
 358                         		return p + 1 - dest;
 359                         	    }
 360                         	    break;
 361                         	case 2:
 362                         	    if (((q[0]|q[1]) & 0x80) == 0)
 363                         	    {
 364                         		p[0] = q[0];
 365                         		p[1] = q[1];
 366 mike          1.111.6.1 		return p + 2 - dest;
 367                         	    }
 368                         	    break;
 369                         	case 3:
 370                         	    if (((q[0]|q[1]|q[2]) & 0x80) == 0)
 371                         	    {
 372                         		p[0] = q[0];
 373                         		p[1] = q[1];
 374                         		p[2] = q[2];
 375                         		return p + 3 - dest;
 376                         	    }
 377                         	    break;
 378                             }
 379                         
 380                             // Process remaining characters.
 381                         
 382                             while (n)
 383                             {
 384                         	// Optimize for 7-bit ASCII case.
 385                         
 386                         	if (*q < 128)
 387 mike          1.111.6.1 	{
 388                         	    *p++ = *q++;
 389                         	    n--;
 390                         	}
 391                         	else
 392                         	{
 393                         	    Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
 394                         
 395                         	    if (c > n || !isValid_U8(q, c) ||
 396                         		UTF8toUTF16(&q, q + c, &p, p + n) != 0)
 397                         	    {
 398                         		MessageLoaderParms parms("Common.String.BAD_UTF8",
 399                         		    "The byte sequence starting at index $0 "
 400                         		    "is not valid UTF-8 encoding.",
 401                         		     q - (const Uint8*)src);
 402                         		throw Exception(parms);
 403                         	    }
 404                         
 405                         	    n -= c;
 406                         	}
 407                             }
 408 mike          1.111.6.1 
 409                             return p - dest;
 410                         }
 411                         
 412                         // Note: dest must be at least three times src (plus an extra byte for 
 413                         // terminator).
 414                         static inline size_t _copy_to_utf8(char* dest, const Uint16* src, size_t n)
 415                         {
 416                             const Uint16* q = src;
 417                             Uint8* p = (Uint8*)dest;
 418                         
 419                             while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
 420                             {
 421                         	p[0] = q[0];
 422                         	p[1] = q[1];
 423                         	p[2] = q[2];
 424                         	p[3] = q[3];
 425                         	p += 4;
 426                         	q += 4;
 427                         	n -= 4;
 428                             }
 429 mike          1.111.6.1 
 430                             switch (n)
 431                             {
 432                         	case 0:
 433                         	    return p - (Uint8*)dest;
 434                         	case 1:
 435                         	    if (q[0] < 128)
 436                         	    {
 437                         		p[0] = q[0];
 438                         		return p + 1 - (Uint8*)dest;
 439                         	    }
 440                         	    break;
 441                         	case 2:
 442                         	    if (q[0] < 128 && q[1] < 128)
 443                         	    {
 444                         		p[0] = q[0];
 445                         		p[1] = q[1];
 446                         		return p + 2 - (Uint8*)dest;
 447                         	    }
 448                         	    break;
 449                         	case 3:
 450 mike          1.111.6.1 	    if (q[0] < 128 && q[1] < 128 && q[2] < 128)
 451                         	    {
 452                         		p[0] = q[0];
 453                         		p[1] = q[1];
 454                         		p[2] = q[2];
 455                         		return p + 3 - (Uint8*)dest;
 456                         	    }
 457                         	    break;
 458                             }
 459                         
 460                             // If this line was reached, there must be characters greater than 128.
 461                         
 462                             UTF16toUTF8(&q, q + n, &p, p + 3 * n);
 463 mike          1.27      
 464 mike          1.111.6.1     return p - (Uint8*)dest;
 465                         }
 466                         
 467                         static inline size_t _convert(Uint16* p, const char* q, size_t n)
 468                         {
 469                         #ifdef PEGASUS_STRING_NO_UTF8
 470                             _copy(p, q, n);
 471 mike          1.27          return n;
 472 mike          1.111.6.1 #else
 473                             return _copy_from_utf8(p, q, n);
 474                         #endif
 475 mike          1.27      }
 476                         
 477 mike          1.111.6.1 //==============================================================================
 478 chuck         1.102     //
 479 mike          1.111.6.1 // class CString
 480 chuck         1.102     //
 481 mike          1.111.6.1 //==============================================================================
 482                         
 483                         CString::CString(const CString& cstr) : _rep(0)
 484 chuck         1.102     {
 485 mike          1.111.6.1     if (cstr._rep)
 486 chuck         1.102         {
 487 mike          1.111.6.1 	size_t n = strlen(cstr._rep) + 1;
 488                                 _rep = (char*)operator new(n);
 489                         	memcpy(_rep, cstr._rep, n);
 490                             }
 491                         }
 492                         
 493                         CString& CString::operator=(const CString& cstr)
 494                         {
 495                             if (&cstr != this)
 496                             {
 497                                 if (_rep)
 498 david.dillard 1.105             {
 499 mike          1.111.6.1             operator delete(_rep);
 500                                     _rep = 0;
 501 chuck         1.102             }
 502 mike          1.111.6.1 
 503                                 if (cstr._rep)
 504 chuck         1.102             {
 505 mike          1.111.6.1 	    size_t n = strlen(cstr._rep) + 1;
 506                                     _rep = (char*)operator new(n);
 507                         	    memcpy(_rep, cstr._rep, n);
 508 chuck         1.102             }
 509 mike          1.111.6.1     }
 510 chuck         1.102     
 511 mike          1.111.6.1     return *this;
 512                         }
 513                         
 514                         //==============================================================================
 515                         //
 516                         // class StringRep
 517                         //
 518                         //==============================================================================
 519                         
 520                         StringRep StringRep::_empty_rep;
 521 chuck         1.102     
 522 mike          1.111.6.1 inline StringRep* StringRep::alloc(size_t cap)
 523                         {
 524                             StringRep* rep = (StringRep*)::operator new(
 525                         	sizeof(StringRep) + cap * sizeof(Uint16));
 526                             rep->cap = cap;
 527                             Atomic_create(&rep->refs, 1);
 528                         
 529                             return rep;
 530 chuck         1.102     }
 531                         
 532 mike          1.111.6.1 static inline void _reserve(StringRep*& rep, Uint32 cap)
 533 kumpf         1.43      {
 534 mike          1.111.6.1     if (cap > rep->cap || Atomic_get(&rep->refs) != 1)
 535                             {
 536                         	size_t n = _next_pow_2(cap);
 537                         	StringRep* new_rep = StringRep::alloc(n);
 538                         	new_rep->size = rep->size;
 539                         	_copy(new_rep->data, rep->data, rep->size + 1);
 540                         	StringRep::unref(rep);
 541                         	rep = new_rep;
 542                             }
 543                         }
 544 kumpf         1.43      
 545 mike          1.111.6.1 StringRep* StringRep::create(const Uint16* data, size_t size)
 546                         {
 547                             StringRep* rep = StringRep::alloc(size);
 548                             rep->size = size;
 549                             _copy(rep->data, data, size);
 550                             rep->data[size] = '\0';
 551                             return rep;
 552                         }
 553                         
 554                         StringRep* StringRep::copy_on_write(StringRep* rep)
 555                         {
 556                             // Return a new copy of rep. Release rep.
 557                         
 558                             StringRep* new_rep = StringRep::alloc(rep->size);
 559                             new_rep->size = rep->size;
 560                             _copy(new_rep->data, rep->data, rep->size);
 561                             new_rep->data[new_rep->size] = '\0';
 562                             StringRep::unref(rep);
 563                             return new_rep;
 564                         }
 565 kumpf         1.43      
 566 mike          1.111.6.1 StringRep* StringRep::create(const char* data, size_t size)
 567 mike          1.27      {
 568 mike          1.111.6.1     StringRep* rep = StringRep::alloc(size);
 569                             rep->size = _convert((Uint16*)rep->data, data, size);
 570                             rep->data[rep->size] = '\0';
 571                         
 572                             return rep;
 573 mike          1.27      }
 574                         
 575 mike          1.111.6.1 StringRep* StringRep::createASCII7(const char* data, size_t size)
 576 mike          1.27      {
 577 mike          1.111.6.1     StringRep* rep = StringRep::alloc(size);
 578                             _copy((Uint16*)rep->data, data, size);
 579                             rep->data[rep->size = size] = '\0';
 580                             return rep;
 581 kumpf         1.39      }
 582 tony          1.66      
 583 mike          1.111.6.1 Uint32 StringRep::length(const Uint16* str)
 584                         {
 585                             // Note: We could unroll this but it is rarely called.
 586                         
 587                             const Uint16* end = (Uint16*)str;
 588                         
 589                             while (*end++)
 590                         	;
 591                         
 592                             return end - str - 1;
 593                         }
 594                         
 595                         //==============================================================================
 596                         //
 597                         // class String
 598                         //
 599                         //==============================================================================
 600                         
 601                         const String String::EMPTY;
 602 mike          1.27      
 603 kumpf         1.39      String::String(const String& str, Uint32 n)
 604                         {
 605 mike          1.111.6.1     _check_bounds(n, str._rep->size);
 606                             _rep = StringRep::create(str._rep->data, n);
 607 kumpf         1.39      }
 608                         
 609                         String::String(const Char16* str)
 610                         {
 611 mike          1.111.6.1     _check_null_pointer(str);
 612                             _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
 613 mike          1.27      }
 614                         
 615 kumpf         1.39      String::String(const Char16* str, Uint32 n)
 616                         {
 617 mike          1.111.6.1     _check_null_pointer(str);
 618                             _rep = StringRep::create((Uint16*)str, n);
 619 kumpf         1.39      }
 620                         
 621                         String::String(const char* str)
 622 mike          1.27      {
 623 mike          1.111.6.1     _check_null_pointer(str);
 624                             _rep = StringRep::create(str, strlen(str));
 625                         }
 626 david.dillard 1.105     
 627 mike          1.111.6.1 String::String(const char* str, String::ASCII7Tag tag)
 628                         {
 629                             _check_null_pointer(str);
 630                             _rep = StringRep::createASCII7(str, strlen(str));
 631 mike          1.27      }
 632                         
 633 kumpf         1.39      String::String(const char* str, Uint32 n)
 634 mike          1.27      {
 635 mike          1.111.6.1     _check_null_pointer(str);
 636                             _rep = StringRep::create(str, n);
 637                         }
 638 david.dillard 1.105     
 639 mike          1.111.6.1 String::String(const char* str, size_t n, String::ASCII7Tag tag)
 640                         {
 641                             _check_null_pointer(str);
 642                             _rep = StringRep::createASCII7(str, n);
 643 kumpf         1.39      }
 644 mike          1.27      
 645 mike          1.111.6.1 String::String(const String& s1, const String& s2)
 646 kumpf         1.39      {
 647 mike          1.111.6.1     size_t n1 = s1._rep->size;
 648                             size_t n2 = s2._rep->size;
 649                             size_t n = n1 + n2;
 650                             _rep = StringRep::alloc(n);
 651                             _copy(_rep->data, s1._rep->data, n1);
 652                             _copy(_rep->data + n1, s2._rep->data, n2);
 653                             _rep->size = n;
 654                             _rep->data[n] = '\0';
 655 mike          1.27      }
 656                         
 657 mike          1.111.6.1 String::String(const String& s1, const char* s2)
 658 mike          1.27      {
 659 mike          1.111.6.1     _check_null_pointer(s2);
 660                             size_t n1 = s1._rep->size;
 661                             size_t n2 = strlen(s2);
 662                             _rep = StringRep::alloc(n1 + n2);
 663                             _copy(_rep->data, s1._rep->data, n1);
 664                             _rep->size = n1 + _convert((Uint16*)_rep->data + n1, s2, n2);
 665                             _rep->data[_rep->size] = '\0';
 666 mike          1.27      }
 667                         
 668 mike          1.111.6.1 String::String(const char* s1, const String& s2)
 669 mike          1.27      {
 670 mike          1.111.6.1     _check_null_pointer(s1);
 671                             size_t n1 = strlen(s1);
 672                             size_t n2 = s2._rep->size;
 673                             _rep = StringRep::alloc(n1 + n2);
 674                             _rep->size = n2 + _convert((Uint16*)_rep->data, s1, n1);
 675                             _copy(_rep->data + n1, s2._rep->data, n2);
 676                             _rep->data[_rep->size] = '\0';
 677 mike          1.27      }
 678                         
 679 mike          1.111.6.1 String& String::assign(const String& str)
 680 mike          1.27      {
 681 mike          1.111.6.1     if (_rep != str._rep)
 682 david.dillard 1.105         {
 683 mike          1.111.6.1 	StringRep::unref(_rep);
 684                         	StringRep::ref(_rep = str._rep);
 685 david.dillard 1.105         }
 686                         
 687 mike          1.27          return *this;
 688                         }
 689                         
 690                         String& String::assign(const Char16* str, Uint32 n)
 691                         {
 692 mike          1.111.6.1     _check_null_pointer(str);
 693                         
 694                             if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
 695 david.dillard 1.105         {
 696 mike          1.111.6.1 	StringRep::unref(_rep);
 697                         	_rep = StringRep::alloc(n);
 698 david.dillard 1.105         }
 699                         
 700 mike          1.111.6.1     _rep->size = n;
 701                             _copy(_rep->data, (Uint16*)str, n);
 702                             _rep->data[n] = '\0';
 703                         
 704 mike          1.27          return *this;
 705                         }
 706                         
 707 mike          1.111.6.1 String& String::assign(const char* str, Uint32 n)
 708 chuck         1.102     {
 709 mike          1.111.6.1     _check_null_pointer(str);
 710                         
 711                             if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
 712 david.dillard 1.105         {
 713 mike          1.111.6.1 	StringRep::unref(_rep);
 714                         	_rep = StringRep::alloc(n);
 715 david.dillard 1.105         }
 716                         
 717 mike          1.111.6.1     _rep->size = _convert(_rep->data, str, n);
 718                             _rep->data[_rep->size] = 0;
 719                         
 720 chuck         1.102         return *this;
 721                         }
 722                         
 723 mike          1.111.6.1 String& String::assignASCII7(const char* str, Uint32 n)
 724 mike          1.27      {
 725 mike          1.111.6.1     _check_null_pointer(str);
 726                         
 727                             if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
 728 david.dillard 1.105         {
 729 mike          1.111.6.1 	StringRep::unref(_rep);
 730                         	_rep = StringRep::alloc(n);
 731 david.dillard 1.105         }
 732                         
 733 mike          1.111.6.1     _copy(_rep->data, str, n);
 734                             _rep->data[_rep->size = n] = 0;
 735                         
 736 mike          1.27          return *this;
 737                         }
 738                         
 739 kumpf         1.39      void String::clear()
 740                         {
 741 mike          1.111.6.1     if (_rep->size)
 742                             {
 743                         	if (Atomic_get(&_rep->refs) == 1)
 744                         	    _rep->size = 0;
 745                         	else
 746                         	{
 747                         	    StringRep::unref(_rep);
 748                         	    _rep = &StringRep::_empty_rep;
 749                         	}
 750                             }
 751 kumpf         1.39      }
 752                         
 753 mike          1.111.6.1 void String::reserveCapacity(Uint32 cap)
 754 kumpf         1.39      {
 755 mike          1.111.6.1     _reserve(_rep, cap);
 756 kumpf         1.39      }
 757                         
 758 mike          1.111.6.1 CString String::getCString() const
 759                         {
 760                         #ifdef PEGASUS_STRING_NO_UTF8
 761                             char* str = (char*)operator new(_rep->size + 1);
 762                             _copy(str, _rep->data, _rep->size);
 763                             str[_rep->size] = '\0';
 764                             return CString(str);
 765 gs.keenan     1.110     #else
 766 mike          1.111.6.1     Uint32 n = 3 * _rep->size;
 767                             char* str = (char*)operator new(n + 1);
 768                             size_t size = _copy_to_utf8(str, _rep->data, _rep->size);
 769                             str[size] = '\0';
 770                             return CString(str);
 771 gs.keenan     1.110     #endif
 772 kumpf         1.39      }
 773                         
 774 mike          1.111.6.1 String& String::append(const Char16* str, Uint32 n)
 775 kumpf         1.39      {
 776 mike          1.111.6.1     _check_null_pointer(str);
 777 kumpf         1.39      
 778 mike          1.111.6.1     size_t old_size = _rep->size;
 779                             size_t new_size = old_size + n;
 780                             _reserve(_rep, new_size);
 781                             _copy(_rep->data + old_size, (Uint16*)str, n);
 782                             _rep->size = new_size;
 783                             _rep->data[new_size] = '\0';
 784 mike          1.27      
 785 mike          1.111.6.1     return *this;
 786 mike          1.27      }
 787                         
 788 mike          1.111.6.1 String& String::append(const String& str)
 789 mike          1.27      {
 790 mike          1.111.6.1     return append((Char16*)str._rep->data, str._rep->size);
 791 mike          1.27      }
 792                         
 793 mike          1.111.6.1 String& String::append(const char* str, Uint32 size)
 794 kumpf         1.39      {
 795 mike          1.111.6.1     _check_null_pointer(str);
 796                         
 797                             size_t old_size = _rep->size;
 798                             size_t cap = old_size + size;
 799                         
 800                             _reserve(_rep, cap);
 801                             _rep->size += _convert((Uint16*)_rep->data + old_size, str, size);
 802                             _rep->data[_rep->size] = '\0';
 803 kumpf         1.39      
 804 mike          1.27          return *this;
 805                         }
 806                         
 807 mike          1.111.6.1 void String::remove(Uint32 index, Uint32 n)
 808 mike          1.27      {
 809 mike          1.111.6.1     if (n == PEG_NOT_FOUND)
 810                                 n = _rep->size - index;
 811 mike          1.27      
 812 mike          1.111.6.1     _check_bounds(index + n, _rep->size);
 813 mike          1.27      
 814 mike          1.111.6.1     if (Atomic_get(&_rep->refs) != 1)
 815                         	_rep = StringRep::copy_on_write(_rep);
 816 mike          1.27      
 817 mike          1.111.6.1     assert(index + n <= _rep->size);
 818                         
 819                             size_t rem = _rep->size - (index + n);
 820                             Uint16* data = _rep->data;
 821                         
 822                             if (rem)
 823                                 memmove(data + index, data + index + n, rem * sizeof(Uint16));
 824                         
 825                             _rep->size -= n;
 826                             data[_rep->size] = '\0';
 827 mike          1.27      }
 828                         
 829 mike          1.111.6.1 String String::subString(Uint32 index, Uint32 n) const
 830 mike          1.27      {
 831 mike          1.111.6.1     // Note: this implementation is very permissive but used for
 832                             // backwards compatibility.
 833                         
 834                             if (index < _rep->size)
 835 mike          1.27          {
 836 mike          1.111.6.1 	if (n == PEG_NOT_FOUND || n > _rep->size - index)
 837                         	    n = _rep->size - index;
 838 mike          1.27      
 839 mike          1.111.6.1 	return String((Char16*)_rep->data + index, n);
 840 mike          1.27          }
 841 david.dillard 1.105     
 842                             return String();
 843 mike          1.27      }
 844                         
 845                         Uint32 String::find(Char16 c) const
 846                         {
 847 mike          1.111.6.1     Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
 848 mike          1.27      
 849 mike          1.111.6.1     if (p)
 850                         	return p - _rep->data;
 851 mike          1.27      
 852                             return PEG_NOT_FOUND;
 853                         }
 854                         
 855 kumpf         1.53      Uint32 String::find(Uint32 index, Char16 c) const
 856 mike          1.30      {
 857 mike          1.111.6.1     _check_bounds(index, _rep->size);
 858 mike          1.30      
 859 mike          1.111.6.1     if (index >= _rep->size)
 860                         	return PEG_NOT_FOUND;
 861                         
 862                             Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
 863                         
 864                             if (p)
 865                         	return p - _rep->data;
 866 mike          1.30      
 867                             return PEG_NOT_FOUND;
 868                         }
 869                         
 870 mike          1.111.6.1 Uint32 String::_find_aux(const Char16* s, Uint32 n) const
 871 mike          1.27      {
 872 mike          1.111.6.1     _check_null_pointer(s);
 873 mike          1.27      
 874 mike          1.111.6.1     const Uint16* data = _rep->data;
 875                             size_t rem = _rep->size;
 876 mike          1.30      
 877 mike          1.111.6.1     while (n <= rem)
 878 mike          1.27          {
 879 mike          1.111.6.1 	Uint16* p = (Uint16*)_find(data, rem, s[0]);
 880 david.dillard 1.105     
 881 mike          1.111.6.1 	if (!p)
 882                         	    break;
 883                         
 884                         	if (memcmp(p, s, n * sizeof(Uint16)) == 0)
 885                         	    return p - _rep->data;
 886                         
 887                         	p++;
 888                         	rem -= p - data;
 889                         	data = p;
 890 mike          1.27          }
 891 mike          1.111.6.1 
 892 mike          1.27          return PEG_NOT_FOUND;
 893                         }
 894                         
 895 mike          1.111.6.1 Uint32 String::find(const char* s) const
 896                         {
 897                             _check_null_pointer(s);
 898                         
 899                             // Note: could optimize away creation of temporary, but this is rarely
 900                             // called.
 901                             return find(String(s));
 902                         }
 903                         
 904 mike          1.27      Uint32 String::reverseFind(Char16 c) const
 905                         {
 906 mike          1.111.6.1     Uint16 x = c;
 907                             Uint16* p = _rep->data;
 908                             Uint16* q = _rep->data + _rep->size;
 909 mike          1.27      
 910 mike          1.111.6.1     while (q != p)
 911 mike          1.27          {
 912 mike          1.111.6.1 	if (*--q == x)
 913                         	    return q - p;
 914 mike          1.27          }
 915                         
 916                             return PEG_NOT_FOUND;
 917                         }
 918                         
 919                         void String::toLower()
 920                         {
 921 david         1.69      #ifdef PEGASUS_HAS_ICU
 922 mike          1.111.6.1 
 923 yi.zhou       1.108         if (InitializeICU::initICUSuccessful())
 924 david         1.90          {
 925 mike          1.111.6.1         if (Atomic_get(&_rep->refs) != 1)
 926                         	        _rep = StringRep::copy_on_write(_rep);
 927                         
 928 yi.zhou       1.108             // This will do a locale-insensitive, but context-sensitive convert.
 929 mike          1.111.6.1         // Since context-sensitive casing looks at adjacent chars, this 
 930                                 // prevents optimizations where the us-ascii is converted before 
 931                                 // calling ICU.
 932 yi.zhou       1.108             // The string may shrink or expand after the convert.
 933                         
 934 mike          1.111.6.1 	//// First calculate size of resulting string. u_strToLower() returns
 935                         	//// only the size when zero is passed as the destination size argument.
 936                         
 937 yi.zhou       1.108             UErrorCode err = U_ZERO_ERROR;
 938                         
 939 mike          1.111.6.1         int32_t new_size = u_strToLower(
 940                         	    NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 941                                 
 942                                 err = U_ZERO_ERROR;
 943 chuck         1.99      
 944 mike          1.111.6.1 	//// Reserve enough space for the result.
 945                         
 946                         	if ((Uint32)new_size > _rep->cap)
 947                         	    _reserve(_rep, new_size);
 948                         
 949                         	//// Perform the conversion (overlapping buffers are allowed).
 950 yi.zhou       1.108     
 951 mike          1.111.6.1         u_strToLower((UChar*)_rep->data, new_size,
 952                         	    (UChar*)_rep->data, _rep->size, NULL, &err);
 953                         
 954                         	_rep->size = new_size;
 955                         	return;
 956 david         1.90          }
 957 mike          1.111.6.1 
 958                         #endif /* PEGASUS_HAS_ICU */
 959                         
 960                             if (Atomic_get(&_rep->refs) != 1)
 961                         	_rep = StringRep::copy_on_write(_rep);
 962                         
 963                             Uint16* p = _rep->data;
 964                             size_t n = _rep->size;
 965                         
 966                             for (; n--; p++)
 967 david         1.90          {
 968 mike          1.111.6.1 	if (!(*p & 0xFF00))
 969                         	    *p = _to_lower(*p);
 970 mike          1.27          }
 971 kumpf         1.39      }
 972                         
 973 chuck         1.99      void String::toUpper()
 974 david         1.90      {
 975                         #ifdef PEGASUS_HAS_ICU
 976 mike          1.111.6.1 
 977 yi.zhou       1.108         if (InitializeICU::initICUSuccessful())
 978 chuck         1.99          {
 979 mike          1.111.6.1         if (Atomic_get(&_rep->refs) != 1)
 980                         	    _rep = StringRep::copy_on_write(_rep);
 981                         
 982 yi.zhou       1.108             // This will do a locale-insensitive, but context-sensitive convert.
 983 mike          1.111.6.1         // Since context-sensitive casing looks at adjacent chars, this 
 984                                 // prevents optimizations where the us-ascii is converted before 
 985                                 // calling ICU.
 986 yi.zhou       1.108             // The string may shrink or expand after the convert.
 987                         
 988 mike          1.111.6.1 	//// First calculate size of resulting string. u_strToUpper() returns
 989                         	//// only the size when zero is passed as the destination size argument.
 990                         
 991 yi.zhou       1.108             UErrorCode err = U_ZERO_ERROR;
 992                         
 993 mike          1.111.6.1         int32_t new_size = u_strToUpper(
 994                         	    NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 995 chuck         1.99      
 996 mike          1.111.6.1         err = U_ZERO_ERROR;
 997 yi.zhou       1.108     
 998 mike          1.111.6.1 	//// Reserve enough space for the result.
 999 david         1.90      
1000 mike          1.111.6.1 	if ((Uint32)new_size > _rep->cap)
1001                         	    _reserve(_rep, new_size);
1002 kumpf         1.39      
1003 mike          1.111.6.1 	//// Perform the conversion (overlapping buffers are allowed).
1004 mike          1.27      
1005 mike          1.111.6.1         u_strToUpper((UChar*)_rep->data, new_size,
1006                         	    (UChar*)_rep->data, _rep->size, NULL, &err);
1007                         
1008                         	_rep->size = new_size;
1009                         
1010                         	return;
1011 mike          1.27          }
1012                         
1013 mike          1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1014                         
1015                             if (Atomic_get(&_rep->refs) != 1)
1016                         	_rep = StringRep::copy_on_write(_rep);
1017                         
1018                             Uint16* p = _rep->data;
1019                             size_t n = _rep->size;
1020                         
1021                             for (; n--; p++)
1022                         	*p = _to_upper(*p);
1023 mike          1.27      }
1024                         
1025 mike          1.111.6.1 int String::compare(const String& s1, const String& s2, Uint32 n)
1026 mike          1.30      {
1027 mike          1.111.6.1     assert(n <= s1._rep->size);
1028                             assert(n <= s2._rep->size);
1029 kumpf         1.43      
1030 mike          1.111.6.1     // Ignoring error in which n is greater than s1.size() or s2.size()
1031                             return _compare(s1._rep->data, s2._rep->data, n);
1032                         }
1033 mike          1.30      
1034 mike          1.111.6.1 int String::compare(const String& s1, const String& s2)
1035                         {
1036                             return _compare(s1._rep->data, s2._rep->data);
1037                         }
1038 mike          1.30      
1039 mike          1.111.6.1 int String::compare(const String& s1, const char* s2)
1040                         {
1041                             _check_null_pointer(s2);
1042 mike          1.30      
1043 mike          1.111.6.1 #ifdef PEGASUS_STRING_NO_UTF8
1044                             return _compare_no_utf8(s1._rep->data, s2);
1045                         #else
1046                             // ATTN: optimize this!
1047                             return String::compare(s1, String(s2));
1048                         #endif
1049 mike          1.30      }
1050                         
1051 mike          1.111.6.1 int String::compareNoCase(const String& str1, const String& str2)
1052 kumpf         1.40      {
1053 david         1.69      #ifdef PEGASUS_HAS_ICU
1054 mike          1.111.6.1 
1055 yi.zhou       1.108         if (InitializeICU::initICUSuccessful())
1056                             {
1057 mike          1.111.6.1         return  u_strcasecmp(
1058                         	    str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
1059 yi.zhou       1.108         }
1060 kumpf         1.40      
1061 mike          1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1062 kumpf         1.40      
1063 mike          1.111.6.1     const Uint16* s1 = str1._rep->data;
1064                             const Uint16* s2 = str2._rep->data;
1065                         
1066                             while (*s1 && *s2)
1067                             {
1068                                 int r = _to_lower(*s1++) - _to_lower(*s2++);
1069 kumpf         1.40      
1070 david.dillard 1.105             if (r)
1071                                     return r;
1072 kumpf         1.40          }
1073                         
1074 mike          1.111.6.1     if (*s2)
1075 david.dillard 1.105             return -1;
1076 mike          1.111.6.1     else if (*s1)
1077 david.dillard 1.105             return 1;
1078 kumpf         1.40      
1079                             return 0;
1080                         }
1081                         
1082 mike          1.111.6.1 Boolean String::equalNoCase_aux(const String& s1, const String& s2)
1083 mike          1.27      {
1084 david         1.69      #ifdef PEGASUS_HAS_ICU
1085 kumpf         1.39      
1086 mike          1.111.6.1     return String::compareNoCase(s1, s2) == 0;
1087 kumpf         1.39      
1088 mike          1.111.6.1 #else /* PEGASUS_HAS_ICU */
1089                         
1090                             Uint16* p = (Uint16*)s1._rep->data;
1091                             Uint16* q = (Uint16*)s2._rep->data;
1092                             Uint32 n = s2._rep->size;
1093                         
1094                             while (n >= 8)
1095                             {
1096                         	if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) ||
1097                         	    ((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) ||
1098                         	    ((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) ||
1099                         	    ((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3]))) ||
1100                         	    ((p[4] - q[4]) && (_to_upper(p[4]) - _to_upper(q[4]))) ||
1101                         	    ((p[5] - q[5]) && (_to_upper(p[5]) - _to_upper(q[5]))) ||
1102                         	    ((p[6] - q[6]) && (_to_upper(p[6]) - _to_upper(q[6]))) ||
1103                         	    ((p[7] - q[7]) && (_to_upper(p[7]) - _to_upper(q[7]))))
1104                         	{
1105                         	    return false;
1106                         	}
1107                         
1108                         	n -= 8;
1109 mike          1.111.6.1 	p += 8;
1110                         	q += 8;
1111                             }
1112                         
1113                             while (n >= 4)
1114                             {
1115                         	if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) ||
1116                         	    ((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) ||
1117                         	    ((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) ||
1118                         	    ((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3]))))
1119                         	{
1120                         	    return false;
1121                         	}
1122                         
1123                         	n -= 4;
1124                         	p += 4;
1125                         	q += 4;
1126                             }
1127 mike          1.27      
1128 kumpf         1.39          while (n--)
1129                             {
1130 mike          1.111.6.1 	if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))))
1131                         	    return false;
1132                         
1133                         	p++;
1134                         	q++;
1135 kumpf         1.39          }
1136 mike          1.28      
1137 kumpf         1.39          return true;
1138 david         1.69      
1139 mike          1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1140                         }
1141 mike          1.27      
1142 mike          1.111.6.1 Boolean String::equalNoCase(const String& s1, const char* s2)
1143 david         1.69      {
1144 mike          1.111.6.1     _check_null_pointer(s2);
1145 david         1.69      
1146 mike          1.111.6.1 #if defined(PEGASUS_HAS_ICU)
1147 david         1.69      
1148 mike          1.111.6.1     return String::equalNoCase(s1, String(s2));
1149 david.dillard 1.105     
1150 mike          1.111.6.1 #elif defined(PEGASUS_STRING_NO_UTF8)
1151 david         1.71      
1152 mike          1.111.6.1     const Uint16* p1 = (Uint16*)s1._rep->data;
1153                             const char* p2 = s2;
1154                             size_t n = s1._rep->size;
1155 kumpf         1.42      
1156 mike          1.111.6.1     while (n--)
1157                             {
1158                         	if (!*p2)
1159                         	    return false;
1160 karl          1.36      
1161 mike          1.111.6.1 	if (_to_upper(*p1++) != _to_upper_tbl[int(*p2++)])
1162                         	    return false;
1163                             }
1164 david.dillard 1.105     
1165 mike          1.111.6.1     if (*p2)
1166                         	return false;
1167                             
1168                             return true;
1169 david.dillard 1.105     
1170 mike          1.111.6.1 #else /* PEGASUS_HAS_ICU */
1171 chuck         1.78      
1172 mike          1.111.6.1     // ATTN: optimize this!
1173                             return String::equalNoCase(s1, String(s2));
1174 david.dillard 1.105     
1175 mike          1.111.6.1 #endif /* PEGASUS_HAS_ICU */
1176                         }
1177 karl          1.36      
1178 mike          1.111.6.1 Boolean String::equal(const String& s1, const String& s2)
1179 karl          1.36      {
1180 mike          1.111.6.1     return s1._rep->size == s2._rep->size && memcmp(s1._rep->data, 
1181                         	s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
1182 karl          1.36      }
1183                         
1184 mike          1.111.6.1 Boolean String::equal(const String& s1, const char* s2)
1185 karl          1.36      {
1186 mike          1.111.6.1 #ifdef PEGASUS_STRING_NO_UTF8
1187 kumpf         1.39      
1188 mike          1.111.6.1     _check_null_pointer(s2);
1189 kumpf         1.39      
1190 mike          1.111.6.1     const Uint16* p = (Uint16*)s1._rep->data;
1191                             const char* q = s2;
1192 kumpf         1.39      
1193 mike          1.111.6.1     while (*p && *q)
1194                             {
1195                         	if (*p++ != Uint16(*q++))
1196                         	    return false;
1197                             }
1198 kumpf         1.39      
1199 mike          1.111.6.1     return !(*p || *q);
1200 kumpf         1.39      
1201 mike          1.111.6.1 #else /* PEGASUS_STRING_NO_UTF8 */
1202 kumpf         1.39      
1203 mike          1.111.6.1     return String::equal(s1, String(s2));
1204                         
1205                         #endif /* PEGASUS_STRING_NO_UTF8 */
1206 kumpf         1.39      }
1207                         
1208 kumpf         1.47      PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1209 kumpf         1.39      {
1210 david         1.69      #if defined(PEGASUS_OS_OS400)
1211 mike          1.111.6.1 
1212 david         1.93          CString cstr = str.getCString();
1213 david         1.69          const char* utf8str = cstr;
1214                             os << utf8str;
1215 mike          1.111.6.1     return os;
1216                         #else    
1217                         
1218                         #if defined(PEGASUS_HAS_ICU)
1219 david         1.69      
1220 yi.zhou       1.108         if (InitializeICU::initICUSuccessful())
1221                             {
1222 david.dillard 1.105             char *buf = NULL;
1223                                 const int size = str.size() * 6;
1224 mike          1.111.6.1         UnicodeString UniStr(
1225                         	    (const UChar *)str.getChar16Data(), (int32_t)str.size());
1226 david.dillard 1.105             Uint32 bufsize = UniStr.extract(0,size,buf);
1227                                 buf = new char[bufsize+1];
1228                                 UniStr.extract(0,bufsize,buf);
1229                                 os << buf;
1230                                 os.flush();
1231                                 delete [] buf;
1232 mike          1.111.6.1         return os;       
1233 yi.zhou       1.108         }
1234 mike          1.111.6.1 
1235                         #endif  // PEGASUS_HAS_ICU 
1236                         
1237                             for (Uint32 i = 0, n = str.size(); i < n; i++)
1238 yi.zhou       1.108         {
1239 mike          1.111.6.1         Uint16 code = str[i];
1240 david.dillard 1.105     
1241 mike          1.111.6.1        	if (code > 0 && !(code & 0xFF00))
1242                            	        os << char(code);
1243                                 else
1244                            	    {
1245                                     // Print in hex format:
1246                                     char buffer[8];
1247                                     sprintf(buffer, "\\x%04X", code);
1248                                     os << buffer;
1249 david.dillard 1.105             }
1250 yi.zhou       1.108         }
1251 kumpf         1.39      
1252                             return os;
1253 mike          1.111.6.1 #endif // PEGASUS_OS_OS400
1254 kumpf         1.39      }
1255                         
1256 mike          1.111.6.1 void String::_append_char_aux()
1257 kumpf         1.39      {
1258 mike          1.111.6.1     StringRep* tmp;
1259 kumpf         1.39      
1260 mike          1.111.6.1     if (_rep->cap)
1261                             {
1262                         	tmp = StringRep::alloc(2 * _rep->cap);
1263                         	tmp->size = _rep->size;
1264                         	_copy(tmp->data, _rep->data, _rep->size);
1265                             }
1266                             else
1267                             {
1268                         	tmp = StringRep::alloc(8);
1269                         	tmp->size = 0;
1270                             }
1271 kumpf         1.39      
1272 mike          1.111.6.1     StringRep::unref(_rep);
1273                             _rep = tmp;
1274 kumpf         1.39      }
1275                         
1276 mike          1.111.6.1 PEGASUS_NAMESPACE_END
1277 kumpf         1.39      
1278 mike          1.111.6.1 /*
1279                         ================================================================================
1280 kumpf         1.39      
1281 mike          1.111.6.1 String optimizations:
1282                         
1283                             1.  Added mechanism allowing certain functions to be inlined only when
1284                         	used by internal Pegasus modules. External modules (i.e., providers)
1285                         	link to a non-inline version, which allows for binary compatibility.
1286                         
1287                             2.  Implemented copy-on-write with atomic increment/decrement. This
1288                         	yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1289                         	for the 'ni1000' benchmark.
1290                         
1291                             3.	Employed loop unrolling in several places. For example, see:
1292                         
1293                         	    static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1294                         
1295                             4.  Used the "empty-rep" optimization (described in whitepaper from the
1296                         	GCC Developers Summit). This reduced default construction to a simple
1297                         	pointer assignment.
1298                         
1299                         	    inline String::String() : _rep(&_empty_rep) { }
1300                         
1301                             5.  Implemented Uint16 versions of toupper() and tolower() using tables.
1302 mike          1.111.6.1 	For example:
1303                         
1304                         	    static const char _upper[] =
1305                         	    {
1306                         		0,1,2,...255
1307                         	    };
1308                         
1309                         	    inline Uint16 _to_upper(Uint16 x)
1310                         	    {
1311                         		return (x & 0xFF00) ? x : _upper[x];
1312                         	    }
1313                         
1314                         	This outperforms the system implementation by avoiding an anding 
1315                         	operation.
1316                         
1317                             6.  Implemented char* version of the following member functions to 
1318                         	eliminate unecessary creation of anonymous string objects 
1319                         	(temporaries).
1320                         
1321                         	    String(const String& s1, const char* s2);
1322                         	    String(const char* s1, const String& s2);
1323 mike          1.111.6.1 	    String& String::operator=(const char* str);
1324                         	    Uint32 String::find(const char* s) const;
1325                         	    bool String::equal(const String& s1, const char* s2);
1326                         	    static int String::compare(const String& s1, const char* s2);
1327                         	    String& String::append(const char* str);
1328                         	    String& String::append(const char* str, Uint32 size);
1329                         	    static bool String::equalNoCase(const String& s1, const char* s2);
1330                         	    String& operator=(const char* str)
1331                         	    String& String::assign(const char* str)
1332                         	    String& String::append(const char* str)
1333                         	    Boolean operator==(const String& s1, const char* s2)
1334                         	    Boolean operator==(const char* s1, const String& s2)
1335                         	    Boolean operator!=(const String& s1, const char* s2)
1336                         	    Boolean operator!=(const char* s1, const String& s2)
1337                         	    Boolean operator<(const String& s1, const char* s2)
1338                         	    Boolean operator<(const char* s1, const String& s2)
1339                         	    Boolean operator>(const String& s1, const char* s2)
1340                         	    Boolean operator>(const char* s1, const String& s2)
1341                         	    Boolean operator<=(const String& s1, const char* s2)
1342                         	    Boolean operator<=(const char* s1, const String& s2)
1343                         	    Boolean operator>=(const String& s1, const char* s2)
1344 mike          1.111.6.1 	    Boolean operator>=(const char* s1, const String& s2)
1345                         	    String operator+(const String& s1, const char* s2)
1346                         	    String operator+(const char* s1, const String& s2)
1347                         
1348                             7.  Optimized _next_pow_2(), used in rounding the capacity to the next 
1349                                 power of two (algorithm from the book "Hacker's Delight").
1350                         
1351                         	    static Uint32 _next_pow_2(Uint32 x)
1352                         	    {
1353                         		if (x < 8)
1354                         		    return 8;
1355                         
1356                         		x--;
1357                         		x |= (x >> 1);
1358                         		x |= (x >> 2);
1359                         		x |= (x >> 4);
1360                         		x |= (x >> 8);
1361                         		x |= (x >> 16);
1362                         		x++;
1363                         
1364                         		return x;
1365 mike          1.111.6.1 	    }
1366                         
1367                             8.  Implemented "concatenating constructors" to eliminate temporaries
1368                         	created by operator+(). This scheme employs the "return-value 
1369                         	optimization" described by Stan Lippman.
1370                         
1371                         	    inline String operator+(const String& s1, const String& s2)
1372                         	    {
1373                         		return String(s1, s2, 0);
1374                         	    }
1375                         
1376                             9.  Experimented to find the optimial initial size for a short string.
1377                         	Eight seems to offer the best tradeoff between space and time.
1378                         
1379                             10. Inlined all members of the Char16 class.
1380                         
1381                             11. Used Uint16 internally in the String class. This showed no improvememnt
1382                         	since Char16 was already fully inlined and was essentially reduced to
1383                         	Uint16 in any case.
1384                         
1385                             12. Implemented conditional logic (#if) allowing error checking logic to
1386 mike          1.111.6.1 	be excluded to better performance. Examples include bounds checking 
1387                         	and null-pointer checking.
1388                         
1389                             13. Used memcpy() and memcmp() where possible. These are implemented using
1390                         	the rep family of intructions under Intel and are much faster.
1391                         
1392                             14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8 
1393                         	copy routine overhead.
1394                         
1395                             15. Added ASCII7 form of the constructor and assign().
1396                         
1397                         	    String s("hello world", String::ASCII7);
1398                         
1399                         	    s.assignASCII7("hello world");
1400                         
1401                         	This avoids slower UTF8 processing when not needed.
1402                         
1403                         BUG-4200 Review actions:
1404                         
1405                             1. 	Use PEGASUS_USE_EXPERIMENTAL_INTERFACES instead of 
1406                         	PEGASUS_STRING_EXTENSIONS.
1407 mike          1.111.6.1 
1408                         	Status: done
1409                         
1410                             2.  Doc++ String.h
1411                         
1412                         	Status: pending
1413                         
1414                             3.  Look at PEP223 for security coding guidelines for strings.
1415                         
1416                         	Status: pending
1417                         
1418                             4.	Increasing the number of objects may break Windows 2000 build
1419                                 (limit of 2048 bytes for command line). See BUG-2754
1420                         	
1421                         	Status: looking into the use auto-generated linker files.
1422                         
1423                             5.  Concerns about whether generating inlines and non-inline versions
1424                         	of functions will work with all compilers.
1425                         
1426                         	Status: confident it will work on platforms except maybe Windows.
1427                         
1428 mike          1.111.6.1 ================================================================================
1429                         */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2