(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

   1 karl  1.98 //%2005////////////////////////////////////////////////////////////////////////
   2 mike  1.27 //
   3 karl  1.97 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
   4            // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
   5            // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
   6 karl  1.85 // IBM Corp.; EMC Corporation, The Open Group.
   7 karl  1.97 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
   8            // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
   9 karl  1.98 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
  10            // EMC Corporation; VERITAS Software Corporation; The Open Group.
  11 mike  1.27 //
  12            // Permission is hereby granted, free of charge, to any person obtaining a copy
  13 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
  14            // deal in the Software without restriction, including without limitation the
  15            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
  16 mike  1.27 // sell copies of the Software, and to permit persons to whom the Software is
  17            // furnished to do so, subject to the following conditions:
  18 david.dillard 1.105 //
  19 kumpf         1.41  // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
  20 mike          1.27  // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
  21                     // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
  22 kumpf         1.41  // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
  23                     // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  24                     // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  25 mike          1.27  // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  26                     // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27                     //
  28                     //==============================================================================
  29                     //
  30                     // Author: Mike Brasher (mbrasher@bmc.com)
  31                     //
  32 mike          1.111.2.9 // Modified By: 
  33                         //     Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
  34                         //     Josephine Eskaline Joyce, IBM (jojustin@in.ibm.com) for Bug#3297
  35                         //     David Dillard, VERITAS Software Corp. (david.dillard@veritas.com)
  36                         //     Mike Brasher (mike-brasher@austin.rr.com)
  37                         //
  38 mike          1.27      //%/////////////////////////////////////////////////////////////////////////////
  39                         
  40 mike          1.111.2.10 #ifndef PEGASUS_USE_INTERNAL_INLINES
  41                          # define PEGASUS_USE_INTERNAL_INLINES
  42                          #endif
  43                          
  44 mike          1.27       #include "String.h"
  45 mike          1.111.2.6  #include <cassert>
  46 kumpf         1.48       #include "InternalException.h"
  47 david         1.69       #include "CommonUTF.h"
  48 chuck         1.111.2.12 #include "MessageLoader.h"
  49 mike          1.111.2.1  #include "CharSet.h"
  50 david         1.69       
  51 chuck         1.111.2.14 #ifdef PEGASUS_HAS_ICU
  52 chuck         1.99       #include <unicode/ustring.h>
  53                          #include <unicode/uchar.h>
  54 david         1.69       #endif
  55                          
  56 mike          1.27       PEGASUS_NAMESPACE_BEGIN
  57                          
  58 mike          1.111.2.1  //==============================================================================
  59 kumpf         1.39       //
  60 mike          1.111.2.7  // Compile-time macros (undefined by default).
  61 mike          1.111.2.6  //
  62                          //     PEGASUS_STRING_NO_THROW -- suppresses throwing of exceptions
  63                          //      
  64                          //     PEGASUS_STRING_NO_UTF8 -- don't generate slower UTF8 code.
  65                          //
  66                          //     PEGASUS_USE_INTERNAL_INLINES -- enables internal inlining feature.
  67                          //
  68                          //==============================================================================
  69                          
  70                          //==============================================================================
  71                          //
  72 mike          1.111.2.1  // File-scope definitions:
  73 kumpf         1.54       //
  74 mike          1.111.2.1  //==============================================================================
  75 kumpf         1.54       
  76 mike          1.111.2.1  // Converts 16-bit characters to upper case.
  77                          inline Uint16 _to_upper(Uint16 x)
  78 kumpf         1.54       {
  79 mike          1.111.2.1      return (x & 0xFF00) ? x : CharSet::to_upper(x);
  80 kumpf         1.54       }
  81                          
  82 mike          1.111.2.1  // Converts 16-bit characters to lower case.
  83                          inline Uint16 _to_lower(Uint16 x)
  84 kumpf         1.54       {
  85 mike          1.111.2.1      return (x & 0xFF00) ? x : CharSet::to_lower(x);
  86                          }
  87 kumpf         1.82       
  88 mike          1.111.2.1  // Rounds x to the next power of two (or just returns 8 if x < 8).
  89                          static Uint32 _next_pow_2(Uint32 x)
  90                          {
  91                              if (x < 8)
  92                          	return 8;
  93                          
  94                              x--;
  95                              x |= (x >> 1);
  96                              x |= (x >> 2);
  97                              x |= (x >> 4);
  98                              x |= (x >> 8);
  99                              x |= (x >> 16);
 100                              x++;
 101                          
 102                              return x;
 103                          }
 104                          
 105                          template<class P, class Q>
 106                          static void _copy(P* p, const Q* q, size_t n)
 107                          {
 108                              // Use loop unrolling.
 109 mike          1.111.2.1  
 110                              while (n >= 8)
 111                              {
 112                          	p[0] = q[0];
 113                          	p[1] = q[1];
 114                          	p[2] = q[2];
 115                          	p[3] = q[3];
 116                          	p[4] = q[4];
 117                          	p[5] = q[5];
 118                          	p[6] = q[6];
 119                          	p[7] = q[7];
 120                          	p += 8;
 121                          	q += 8;
 122                          	n -= 8;
 123                              }
 124                          
 125                              while (n >= 4)
 126 kumpf         1.82           {
 127 mike          1.111.2.1  	p[0] = q[0];
 128                          	p[1] = q[1];
 129                          	p[2] = q[2];
 130                          	p[3] = q[3];
 131                          	p += 4;
 132                          	q += 4;
 133                          	n -= 4;
 134 kumpf         1.82           }
 135 mike          1.111.2.1  
 136                              while (n--)
 137                          	*p++ = *q++;
 138 kumpf         1.54       }
 139                          
 140 mike          1.111.2.1  static Uint16* _find(const Uint16* s, size_t n, Uint16 c)
 141 kumpf         1.54       {
 142 mike          1.111.2.1      while (n >= 4)
 143                              {
 144                          	if (s[0] == c)
 145                          	    return (Uint16*)s;
 146                          	if (s[1] == c)
 147                          	    return (Uint16*)&s[1];
 148                          	if (s[2] == c)
 149                          	    return (Uint16*)&s[2];
 150                          	if (s[3] == c)
 151                          	    return (Uint16*)&s[3];
 152                          
 153                          	n -= 4;
 154                          	s += 4;
 155                              }
 156                          
 157                              if (n)
 158                              {
 159                          	if (*s == c)
 160                          	    return (Uint16*)s;
 161                          	s++;
 162                          	n--;
 163 mike          1.111.2.1      }
 164                          
 165                              if (n)
 166                              {
 167                          	if (*s == c)
 168                          	    return (Uint16*)s;
 169                          	s++;
 170                          	n--;
 171                              }
 172                          
 173                              if (n && *s == c)
 174                          	return (Uint16*)s;
 175                          
 176                              // Not found!
 177                              return 0;
 178 kumpf         1.54       }
 179                          
 180 mike          1.111.2.1  static int _compare(const Uint16* s1, const Uint16* s2)
 181 kumpf         1.54       {
 182 mike          1.111.2.1      while (*s1 && *s2)
 183 kumpf         1.82           {
 184 mike          1.111.2.1          int r = *s1++ - *s2++;
 185                          
 186                                  if (r)
 187                                      return r;
 188 kumpf         1.82           }
 189 mike          1.111.2.1  
 190                              if (*s2)
 191                                  return -1;
 192                              else if (*s1)
 193                                  return 1;
 194                          
 195                              return 0;
 196 kumpf         1.54       }
 197                          
 198 mike          1.111.2.1  static int _compare_no_utf8(const Uint16* s1, const char* s2)
 199 kumpf         1.56       {
 200 mike          1.111.2.1      Uint16 c1;
 201                              Uint16 c2;
 202                          
 203                              do
 204 kumpf         1.81           {
 205 mike          1.111.2.1  	c1 = *s1++;
 206                          	c2 = *s2++;
 207                          
 208                          	if (c1 == 0)
 209                          	    return c1 - c2;
 210 kumpf         1.81           }
 211 mike          1.111.2.1      while (c1 == c2);
 212                          
 213                              return c1 - c2;
 214 kumpf         1.56       }
 215                          
 216 mike          1.111.2.1  static int _compare(const Uint16* s1, const Uint16* s2, size_t n)
 217 kumpf         1.54       {
 218 mike          1.111.2.1      // This should only be called when s1 and s2 have the same length.
 219                          
 220                              while (n-- && (*s1++ - *s2++) == 0)
 221                          	;
 222                          
 223 mike          1.111.2.8      // 
 224                          
 225 mike          1.111.2.1      return s1[-1] - s2[-1];
 226 kumpf         1.54       }
 227                          
 228 mike          1.111.2.1  static inline void _copy(Uint16* s1, const Uint16* s2, size_t n)
 229                          {
 230                              memcpy(s1, s2, n * sizeof(Uint16));
 231                          }
 232 kumpf         1.39       
 233 mike          1.111.2.1  void String_throw_out_of_bounds()
 234                          {
 235                              throw IndexOutOfBoundsException();
 236                          }
 237                          
 238                          #ifdef PEGASUS_STRING_NO_THROW
 239                          # define _check_null_pointer(ARG) /* empty */
 240                          #else
 241                          template<class T>
 242                          inline void _check_null_pointer(const T* ptr)
 243                          {
 244                              if (!ptr)
 245                          	throw NullPointer();
 246                          }
 247                          #endif
 248 mike          1.27       
 249 mike          1.111.2.1  static size_t _copy_from_utf8(Uint16* dest, const char* src, size_t n)
 250 mike          1.27       {
 251 mike          1.111.2.1      Uint16* p = dest;
 252                              const Uint8* q = (const Uint8*)src;
 253 mike          1.111.2.2  
 254 mike          1.111.2.11     // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
 255                              // Use loop-unrolling.
 256                          
 257                              while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
 258                              {
 259                          	p[0] = q[0];
 260                          	p[1] = q[1];
 261                          	p[2] = q[2];
 262                          	p[3] = q[3];
 263                          	p[4] = q[4];
 264                          	p[5] = q[5];
 265                          	p[6] = q[6];
 266                          	p[7] = q[7];
 267                          	p += 8;
 268                          	q += 8;
 269                          	n -= 8;
 270                              }
 271 mike          1.111.2.2  
 272 mike          1.111.2.10     while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
 273 mike          1.111.2.2      {
 274                          	p[0] = q[0];
 275                          	p[1] = q[1];
 276                          	p[2] = q[2];
 277                          	p[3] = q[3];
 278                          	p += 4;
 279                          	q += 4;
 280                          	n -= 4;
 281                              }
 282                          
 283                              switch (n)
 284                              {
 285                          	case 0:
 286                          	    return p - dest;
 287                          	case 1:
 288                          	    if (q[0] < 128)
 289                          	    {
 290                          		p[0] = q[0];
 291                          		return p + 1 - dest;
 292                          	    }
 293                          	    break;
 294 mike          1.111.2.2  	case 2:
 295 mike          1.111.2.11 	    if (((q[0]|q[1]) & 0x80) == 0)
 296 mike          1.111.2.2  	    {
 297                          		p[0] = q[0];
 298                          		p[1] = q[1];
 299                          		return p + 2 - dest;
 300                          	    }
 301                          	    break;
 302                          	case 3:
 303 mike          1.111.2.11 	    if (((q[0]|q[1]|q[2]) & 0x80) == 0)
 304 mike          1.111.2.2  	    {
 305                          		p[0] = q[0];
 306                          		p[1] = q[1];
 307                          		p[2] = q[2];
 308                          		return p + 3 - dest;
 309                          	    }
 310                          	    break;
 311                              }
 312                          
 313                              // Process remaining characters.
 314 mike          1.111.2.1  
 315                              while (n)
 316                              {
 317 mike          1.111.2.2  	// Optimize for 7-bit ASCII case.
 318 mike          1.111.2.1  
 319 mike          1.111.2.2  	if (*q < 128)
 320 mike          1.111.2.1  	{
 321                          	    *p++ = *q++;
 322                          	    n--;
 323                          	}
 324 mike          1.111.2.2  	else
 325                          	{
 326                          	    Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
 327 mike          1.111.2.1  
 328 mike          1.111.2.2  	    if (c > n || !isValid_U8(q, c) ||
 329                          		UTF8toUTF16(&q, q + c, &p, p + n) != 0)
 330                          	    {
 331 mike          1.111.2.13 		MessageLoaderParms parms("Common.String.BAD_UTF8",
 332                          		    "The byte sequence starting at index $0 "
 333                          		    "is not valid UTF-8 encoding.",
 334                          		     q - (const Uint8*)src);
 335                          		throw Exception(parms);
 336 mike          1.111.2.2  	    }
 337 mike          1.111.2.1  
 338 mike          1.111.2.2  	    n -= c;
 339 mike          1.111.2.1  	}
 340                              }
 341 mike          1.27       
 342 mike          1.111.2.1      return p - dest;
 343                          }
 344 mike          1.27       
 345 mike          1.111.2.1  // Note: dest must be at least three times src (plus an extra byte for 
 346                          // terminator).
 347 mike          1.111.2.2  static inline size_t _copy_to_utf8(char* dest, const Uint16* src, size_t n)
 348 mike          1.111.2.1  {
 349 mike          1.111.2.2      const Uint16* q = src;
 350                              Uint8* p = (Uint8*)dest;
 351                          
 352                              while (n >= 4 && q[0] < 128 && q[1] < 128 && q[2] < 128 && q[3] < 128)
 353                              {
 354                          	p[0] = q[0];
 355                          	p[1] = q[1];
 356                          	p[2] = q[2];
 357                          	p[3] = q[3];
 358                          	p += 4;
 359                          	q += 4;
 360                          	n -= 4;
 361                              }
 362                          
 363                              switch (n)
 364                              {
 365                          	case 0:
 366                          	    return p - (Uint8*)dest;
 367                          	case 1:
 368                          	    if (q[0] < 128)
 369                          	    {
 370 mike          1.111.2.2  		p[0] = q[0];
 371                          		return p + 1 - (Uint8*)dest;
 372                          	    }
 373                          	    break;
 374                          	case 2:
 375                          	    if (q[0] < 128 && q[1] < 128)
 376                          	    {
 377                          		p[0] = q[0];
 378                          		p[1] = q[1];
 379                          		return p + 2 - (Uint8*)dest;
 380                          	    }
 381                          	    break;
 382                          	case 3:
 383                          	    if (q[0] < 128 && q[1] < 128 && q[2] < 128)
 384                          	    {
 385                          		p[0] = q[0];
 386                          		p[1] = q[1];
 387                          		p[2] = q[2];
 388                          		return p + 3 - (Uint8*)dest;
 389                          	    }
 390                          	    break;
 391 mike          1.111.2.2      }
 392                          
 393                              // If this line was reached, there must be characters greater than 128.
 394                          
 395                              UTF16toUTF8(&q, q + n, &p, p + 3 * n);
 396                          
 397                              return p - (Uint8*)dest;
 398 mike          1.111.2.1  }
 399 mike          1.27       
 400 mike          1.111.2.1  static inline size_t _convert(Uint16* p, const char* q, size_t n)
 401                          {
 402                          #ifdef PEGASUS_STRING_NO_UTF8
 403                              _copy(p, q, n);
 404 mike          1.27           return n;
 405 mike          1.111.2.1  #else
 406                              return _copy_from_utf8(p, q, n);
 407                          #endif
 408 mike          1.27       }
 409                          
 410 mike          1.111.2.1  //==============================================================================
 411 chuck         1.102      //
 412 mike          1.111.2.1  // class CString
 413 chuck         1.102      //
 414 mike          1.111.2.1  //==============================================================================
 415                          
 416                          CString::CString(const CString& cstr) : _rep(0)
 417 chuck         1.102      {
 418 mike          1.111.2.1      if (cstr._rep)
 419 chuck         1.102          {
 420 mike          1.111.2.1  	size_t n = strlen(cstr._rep) + 1;
 421                                  _rep = (char*)operator new(n);
 422                          	memcpy(_rep, cstr._rep, n);
 423                              }
 424                          }
 425                          
 426                          CString& CString::operator=(const CString& cstr)
 427                          {
 428                              if (&cstr != this)
 429                              {
 430                                  if (_rep)
 431 david.dillard 1.105              {
 432 mike          1.111.2.1              operator delete(_rep);
 433                                      _rep = 0;
 434 chuck         1.102              }
 435 mike          1.111.2.1  
 436                                  if (cstr._rep)
 437 chuck         1.102              {
 438 mike          1.111.2.1  	    size_t n = strlen(cstr._rep) + 1;
 439                                      _rep = (char*)operator new(n);
 440                          	    memcpy(_rep, cstr._rep, n);
 441 chuck         1.102              }
 442 mike          1.111.2.1      }
 443 chuck         1.102      
 444 mike          1.111.2.1      return *this;
 445 chuck         1.102      }
 446                          
 447 mike          1.111.2.1  //==============================================================================
 448                          //
 449                          // class StringRep
 450                          //
 451                          //==============================================================================
 452 kumpf         1.43       
 453 mike          1.111.2.1  StringRep StringRep::_empty_rep;
 454 kumpf         1.43       
 455 mike          1.111.2.1  inline StringRep* StringRep::alloc(size_t cap)
 456 mike          1.27       {
 457 mike          1.111.2.1      StringRep* rep = (StringRep*)::operator new(
 458                          	sizeof(StringRep) + cap * sizeof(Uint16));
 459                              rep->cap = cap;
 460                              Atomic_create(&rep->refs, 1);
 461                          
 462                              return rep;
 463 mike          1.27       }
 464                          
 465 mike          1.111.2.1  static inline void _reserve(StringRep*& rep, Uint32 cap)
 466 mike          1.27       {
 467 mike          1.111.2.1      if (cap > rep->cap || Atomic_get(&rep->refs) != 1)
 468                              {
 469                          	size_t n = _next_pow_2(cap);
 470                          	StringRep* new_rep = StringRep::alloc(n);
 471                          	new_rep->size = rep->size;
 472                          	_copy(new_rep->data, rep->data, rep->size + 1);
 473                          	StringRep::unref(rep);
 474                          	rep = new_rep;
 475                              }
 476 kumpf         1.39       }
 477 tony          1.66       
 478 mike          1.111.2.1  StringRep* StringRep::create(const Uint16* data, size_t size)
 479 kumpf         1.39       {
 480 mike          1.111.2.1      StringRep* rep = StringRep::alloc(size);
 481                              rep->size = size;
 482                              _copy(rep->data, data, size);
 483                              rep->data[size] = '\0';
 484                              return rep;
 485 kumpf         1.39       }
 486                          
 487 mike          1.111.2.1  StringRep* StringRep::copy_on_write(StringRep* rep)
 488 kumpf         1.39       {
 489 mike          1.111.2.1      // Return a new copy of rep. Release rep.
 490 david.dillard 1.105      
 491 mike          1.111.2.1      StringRep* new_rep = StringRep::alloc(rep->size);
 492                              new_rep->size = rep->size;
 493                              _copy(new_rep->data, rep->data, rep->size);
 494                              new_rep->data[new_rep->size] = '\0';
 495                              StringRep::unref(rep);
 496                              return new_rep;
 497 mike          1.27       }
 498                          
 499 mike          1.111.2.1  StringRep* StringRep::create(const char* data, size_t size)
 500 kumpf         1.39       {
 501 mike          1.111.2.1      StringRep* rep = StringRep::alloc(size);
 502                              rep->size = _convert((Uint16*)rep->data, data, size);
 503                              rep->data[rep->size] = '\0';
 504 david.dillard 1.105      
 505 mike          1.111.2.1      return rep;
 506 kumpf         1.39       }
 507                          
 508 mike          1.111.2.4  StringRep* StringRep::createASCII7(const char* data, size_t size)
 509 mike          1.111.2.2  {
 510                              StringRep* rep = StringRep::alloc(size);
 511                              _copy((Uint16*)rep->data, data, size);
 512                              rep->data[rep->size = size] = '\0';
 513                              return rep;
 514                          }
 515                          
 516 mike          1.111.2.1  Uint32 StringRep::length(const Uint16* str)
 517 mike          1.27       {
 518 mike          1.111.2.5      // Note: We could unroll this but it is rarely called.
 519 david.dillard 1.105      
 520 mike          1.111.2.1      const Uint16* end = (Uint16*)str;
 521                          
 522                              while (*end++)
 523                          	;
 524                          
 525                              return end - str - 1;
 526 mike          1.27       }
 527                          
 528 mike          1.111.2.1  //==============================================================================
 529                          //
 530                          // class String
 531                          //
 532                          //==============================================================================
 533 david.dillard 1.105      
 534 mike          1.111.2.1  const String String::EMPTY;
 535                          
 536                          String::String(const String& str, Uint32 n)
 537                          {
 538                              _check_bounds(n, str._rep->size);
 539                              _rep = StringRep::create(str._rep->data, n);
 540 kumpf         1.39       }
 541 mike          1.27       
 542 mike          1.111.2.1  String::String(const Char16* str)
 543 kumpf         1.39       {
 544 mike          1.111.2.1      _check_null_pointer(str);
 545                              _rep = StringRep::create((Uint16*)str, StringRep::length((Uint16*)str));
 546 mike          1.27       }
 547                          
 548 mike          1.111.2.1  String::String(const Char16* str, Uint32 n)
 549 mike          1.27       {
 550 mike          1.111.2.1      _check_null_pointer(str);
 551                              _rep = StringRep::create((Uint16*)str, n);
 552 mike          1.27       }
 553                          
 554 mike          1.111.2.1  String::String(const char* str)
 555 mike          1.27       {
 556 mike          1.111.2.1      _check_null_pointer(str);
 557                              _rep = StringRep::create(str, strlen(str));
 558 mike          1.27       }
 559                          
 560 mike          1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
 561 mike          1.111.2.2  String::String(const char* str, String::ASCII7Tag tag)
 562                          {
 563                              _check_null_pointer(str);
 564 mike          1.111.2.4      _rep = StringRep::createASCII7(str, strlen(str));
 565 mike          1.111.2.2  }
 566 mike          1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
 567 mike          1.111.2.2  
 568 mike          1.111.2.1  String::String(const char* str, Uint32 n)
 569 mike          1.27       {
 570 mike          1.111.2.1      _check_null_pointer(str);
 571                              _rep = StringRep::create(str, n);
 572                          }
 573 david.dillard 1.105      
 574 mike          1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
 575 mike          1.111.2.2  String::String(const char* str, size_t n, String::ASCII7Tag tag)
 576                          {
 577                              _check_null_pointer(str);
 578 mike          1.111.2.4      _rep = StringRep::createASCII7(str, n);
 579 mike          1.111.2.2  }
 580 mike          1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
 581 mike          1.111.2.2  
 582 mike          1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
 583 mike          1.111.2.1  String::String(const String& s1, const String& s2)
 584                          {
 585                              size_t n1 = s1._rep->size;
 586                              size_t n2 = s2._rep->size;
 587                              size_t n = n1 + n2;
 588                              _rep = StringRep::alloc(n);
 589                              _copy(_rep->data, s1._rep->data, n1);
 590                              _copy(_rep->data + n1, s2._rep->data, n2);
 591                              _rep->size = n;
 592                              _rep->data[n] = '\0';
 593                          }
 594 mike          1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
 595 mike          1.111.2.1  
 596 mike          1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
 597 mike          1.111.2.1  String::String(const String& s1, const char* s2)
 598                          {
 599                              _check_null_pointer(s2);
 600                              size_t n1 = s1._rep->size;
 601                              size_t n2 = strlen(s2);
 602                              _rep = StringRep::alloc(n1 + n2);
 603                              _copy(_rep->data, s1._rep->data, n1);
 604                              _rep->size = n1 + _convert((Uint16*)_rep->data + n1, s2, n2);
 605                              _rep->data[_rep->size] = '\0';
 606                          }
 607 mike          1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
 608 mike          1.111.2.1  
 609 mike          1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
 610 mike          1.111.2.1  String::String(const char* s1, const String& s2)
 611                          {
 612                              _check_null_pointer(s1);
 613                              size_t n1 = strlen(s1);
 614                              size_t n2 = s2._rep->size;
 615                              _rep = StringRep::alloc(n1 + n2);
 616                              _rep->size = n2 + _convert((Uint16*)_rep->data, s1, n1);
 617                              _copy(_rep->data + n1, s2._rep->data, n2);
 618                              _rep->data[_rep->size] = '\0';
 619 mike          1.27       }
 620 mike          1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
 621 mike          1.27       
 622 mike          1.111.2.1  String& String::assign(const String& str)
 623 mike          1.27       {
 624 mike          1.111.2.1      if (_rep != str._rep)
 625 david.dillard 1.105          {
 626 mike          1.111.2.1  	StringRep::unref(_rep);
 627                          	StringRep::ref(_rep = str._rep);
 628 david.dillard 1.105          }
 629                          
 630 mike          1.27           return *this;
 631                          }
 632                          
 633 mike          1.111.2.1  String& String::assign(const Char16* str, Uint32 n)
 634 chuck         1.102      {
 635 mike          1.111.2.1      _check_null_pointer(str);
 636                          
 637                              if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
 638 david.dillard 1.105          {
 639 mike          1.111.2.1  	StringRep::unref(_rep);
 640                          	_rep = StringRep::alloc(n);
 641 david.dillard 1.105          }
 642                          
 643 mike          1.111.2.1      _rep->size = n;
 644                              _copy(_rep->data, (Uint16*)str, n);
 645                              _rep->data[n] = '\0';
 646                          
 647 chuck         1.102          return *this;
 648                          }
 649                          
 650 kumpf         1.39       String& String::assign(const char* str, Uint32 n)
 651 mike          1.27       {
 652 mike          1.111.2.1      _check_null_pointer(str);
 653                          
 654                              if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
 655 david.dillard 1.105          {
 656 mike          1.111.2.1  	StringRep::unref(_rep);
 657                          	_rep = StringRep::alloc(n);
 658 david.dillard 1.105          }
 659                          
 660 mike          1.111.2.1      _rep->size = _convert(_rep->data, str, n);
 661                              _rep->data[_rep->size] = 0;
 662                          
 663 mike          1.27           return *this;
 664                          }
 665                          
 666 mike          1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
 667 mike          1.111.2.4  String& String::assignASCII7(const char* str, Uint32 n)
 668 mike          1.111.2.2  {
 669                              _check_null_pointer(str);
 670                          
 671                              if (n > _rep->cap || Atomic_get(&_rep->refs) != 1)
 672                              {
 673                          	StringRep::unref(_rep);
 674                          	_rep = StringRep::alloc(n);
 675                              }
 676                          
 677                              _copy(_rep->data, str, n);
 678                              _rep->data[_rep->size = n] = 0;
 679                          
 680                              return *this;
 681                          }
 682 mike          1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
 683 mike          1.111.2.2  
 684 kumpf         1.39       void String::clear()
 685                          {
 686 mike          1.111.2.1      if (_rep->size)
 687                              {
 688                          	if (Atomic_get(&_rep->refs) == 1)
 689                          	    _rep->size = 0;
 690                          	else
 691                          	{
 692                          	    StringRep::unref(_rep);
 693                          	    _rep = &StringRep::_empty_rep;
 694                          	}
 695                              }
 696 kumpf         1.39       }
 697                          
 698 mike          1.111.2.1  void String::reserveCapacity(Uint32 cap)
 699 kumpf         1.39       {
 700 mike          1.111.2.1      _reserve(_rep, cap);
 701 kumpf         1.39       }
 702                          
 703 mike          1.111.2.1  CString String::getCString() const
 704                          {
 705                          #ifdef PEGASUS_STRING_NO_UTF8
 706                              char* str = (char*)operator new(_rep->size + 1);
 707                              _copy(str, _rep->data, _rep->size);
 708                              str[_rep->size] = '\0';
 709                              return CString(str);
 710 gs.keenan     1.110      #else
 711 mike          1.111.2.1      Uint32 n = 3 * _rep->size;
 712                              char* str = (char*)operator new(n + 1);
 713 mike          1.111.2.2      size_t size = _copy_to_utf8(str, _rep->data, _rep->size);
 714 mike          1.111.2.1      str[size] = '\0';
 715                              return CString(str);
 716 gs.keenan     1.110      #endif
 717 kumpf         1.39       }
 718                          
 719 mike          1.111.2.1  String& String::append(const Char16* str, Uint32 n)
 720 kumpf         1.39       {
 721 mike          1.111.2.1      _check_null_pointer(str);
 722 kumpf         1.39       
 723 mike          1.111.2.1      size_t old_size = _rep->size;
 724                              size_t new_size = old_size + n;
 725                              _reserve(_rep, new_size);
 726                              _copy(_rep->data + old_size, (Uint16*)str, n);
 727                              _rep->size = new_size;
 728                              _rep->data[new_size] = '\0';
 729 mike          1.27       
 730 mike          1.111.2.1      return *this;
 731 mike          1.27       }
 732                          
 733 mike          1.111.2.1  String& String::append(const String& str)
 734 mike          1.27       {
 735 mike          1.111.2.1      return append((Char16*)str._rep->data, str._rep->size);
 736 mike          1.27       }
 737                          
 738 mike          1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
 739 mike          1.111.2.1  String& String::append(const char* str, Uint32 size)
 740 kumpf         1.39       {
 741 mike          1.111.2.1      _check_null_pointer(str);
 742                          
 743                              size_t old_size = _rep->size;
 744                              size_t cap = old_size + size;
 745                          
 746                              _reserve(_rep, cap);
 747                              _rep->size += _convert((Uint16*)_rep->data + old_size, str, size);
 748                              _rep->data[_rep->size] = '\0';
 749 kumpf         1.39       
 750 mike          1.27           return *this;
 751                          }
 752 mike          1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
 753 mike          1.27       
 754 mike          1.111.2.1  void String::remove(Uint32 index, Uint32 n)
 755 mike          1.27       {
 756 mike          1.111.2.1      if (n == PEG_NOT_FOUND)
 757                                  n = _rep->size - index;
 758 mike          1.27       
 759 mike          1.111.2.1      _check_bounds(index + n, _rep->size);
 760                          
 761                              if (Atomic_get(&_rep->refs) != 1)
 762                          	_rep = StringRep::copy_on_write(_rep);
 763 mike          1.27       
 764 mike          1.111.2.1      assert(index + n <= _rep->size);
 765 mike          1.27       
 766 mike          1.111.2.1      size_t rem = _rep->size - (index + n);
 767                              Uint16* data = _rep->data;
 768                          
 769                              if (rem)
 770                                  memmove(data + index, data + index + n, rem * sizeof(Uint16));
 771                          
 772                              _rep->size -= n;
 773                              data[_rep->size] = '\0';
 774 mike          1.27       }
 775                          
 776 mike          1.111.2.1  String String::subString(Uint32 index, Uint32 n) const
 777 mike          1.27       {
 778 mike          1.111.2.5      // Note: this implementation is very permissive but used for
 779 mike          1.111.2.1      // backwards compatibility.
 780                          
 781                              if (index < _rep->size)
 782 mike          1.27           {
 783 mike          1.111.2.1  	if (n == PEG_NOT_FOUND || n > _rep->size - index)
 784                          	    n = _rep->size - index;
 785 mike          1.27       
 786 mike          1.111.2.1  	return String((Char16*)_rep->data + index, n);
 787 mike          1.27           }
 788 david.dillard 1.105      
 789                              return String();
 790 mike          1.27       }
 791                          
 792                          Uint32 String::find(Char16 c) const
 793                          {
 794 mike          1.111.2.1      Uint16* p = (Uint16*)_find(_rep->data, _rep->size, c);
 795 mike          1.27       
 796 mike          1.111.2.1      if (p)
 797                          	return p - _rep->data;
 798 mike          1.27       
 799                              return PEG_NOT_FOUND;
 800                          }
 801                          
 802 kumpf         1.53       Uint32 String::find(Uint32 index, Char16 c) const
 803 mike          1.30       {
 804 mike          1.111.2.1      _check_bounds(index, _rep->size);
 805 mike          1.30       
 806 mike          1.111.2.1      if (index >= _rep->size)
 807                          	return PEG_NOT_FOUND;
 808                          
 809                              Uint16* p = (Uint16*)_find(_rep->data + index, _rep->size - index, c);
 810                          
 811                              if (p)
 812                          	return p - _rep->data;
 813 mike          1.30       
 814                              return PEG_NOT_FOUND;
 815                          }
 816                          
 817 mike          1.111.2.1  Uint32 String::_find_aux(const Char16* s, Uint32 n) const
 818 mike          1.27       {
 819 mike          1.111.2.1      _check_null_pointer(s);
 820 mike          1.27       
 821 mike          1.111.2.1      const Uint16* data = _rep->data;
 822                              size_t rem = _rep->size;
 823 mike          1.30       
 824 mike          1.111.2.1      while (n <= rem)
 825 mike          1.27           {
 826 mike          1.111.2.1  	Uint16* p = (Uint16*)_find(data, rem, s[0]);
 827 david.dillard 1.105      
 828 mike          1.111.2.1  	if (!p)
 829                          	    break;
 830                          
 831                          	if (memcmp(p, s, n * sizeof(Uint16)) == 0)
 832                          	    return p - _rep->data;
 833                          
 834                          	p++;
 835                          	rem -= p - data;
 836                          	data = p;
 837 mike          1.27           }
 838 mike          1.111.2.1  
 839 mike          1.27           return PEG_NOT_FOUND;
 840                          }
 841                          
 842 mike          1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
 843 mike          1.111.2.1  Uint32 String::find(const char* s) const
 844                          {
 845                              _check_null_pointer(s);
 846                          
 847 mike          1.111.2.5      // Note: could optimize away creation of temporary, but this is rarely
 848                              // called.
 849 mike          1.111.2.1      return find(String(s));
 850                          }
 851 mike          1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
 852 mike          1.111.2.1  
 853 mike          1.27       Uint32 String::reverseFind(Char16 c) const
 854                          {
 855 mike          1.111.2.1      Uint16 x = c;
 856                              Uint16* p = _rep->data;
 857                              Uint16* q = _rep->data + _rep->size;
 858 mike          1.27       
 859 mike          1.111.2.1      while (q != p)
 860 mike          1.27           {
 861 mike          1.111.2.1  	if (*--q == x)
 862                          	    return q - p;
 863 mike          1.27           }
 864                          
 865                              return PEG_NOT_FOUND;
 866                          }
 867                          
 868                          void String::toLower()
 869                          {
 870 chuck         1.111.2.14 #ifdef PEGASUS_HAS_ICU
 871 mike          1.111.2.1  
 872 yi.zhou       1.108          if (InitializeICU::initICUSuccessful())
 873 david         1.90           {
 874 chuck         1.111.2.12         if (Atomic_get(&_rep->refs) != 1)
 875                          	        _rep = StringRep::copy_on_write(_rep);
 876                          
 877                                  // This will do a locale-insensitive, but context-sensitive convert.
 878                                  // Since context-sensitive casing looks at adjacent chars, this 
 879                                  // prevents optimizations where the us-ascii is converted before 
 880                                  // calling ICU.
 881                                  // The string may shrink or expand after the convert.
 882                          
 883 mike          1.111.2.1  	//// First calculate size of resulting string. u_strToLower() returns
 884                          	//// only the size when zero is passed as the destination size argument.
 885                          
 886 yi.zhou       1.108              UErrorCode err = U_ZERO_ERROR;
 887                          
 888 mike          1.111.2.1          int32_t new_size = u_strToLower(
 889                          	    NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 890 chuck         1.111.2.12         
 891                                  err = U_ZERO_ERROR;
 892 chuck         1.99       
 893 mike          1.111.2.1  	//// Reserve enough space for the result.
 894                          
 895                          	if ((Uint32)new_size > _rep->cap)
 896                          	    _reserve(_rep, new_size);
 897                          
 898                          	//// Perform the conversion (overlapping buffers are allowed).
 899                          
 900                                  u_strToLower((UChar*)_rep->data, new_size,
 901                          	    (UChar*)_rep->data, _rep->size, NULL, &err);
 902 yi.zhou       1.108      
 903 mike          1.111.2.1  	_rep->size = new_size;
 904 mike          1.111.2.13 	return;
 905 david         1.90           }
 906 mike          1.111.2.1  
 907 chuck         1.111.2.14 #endif /* PEGASUS_HAS_ICU */
 908 mike          1.111.2.1  
 909                              if (Atomic_get(&_rep->refs) != 1)
 910                          	_rep = StringRep::copy_on_write(_rep);
 911                          
 912                              Uint16* p = _rep->data;
 913                              size_t n = _rep->size;
 914                          
 915                              for (; n--; p++)
 916 david         1.90           {
 917 mike          1.111.2.1  	if (!(*p & 0xFF00))
 918                          	    *p = _to_lower(*p);
 919 mike          1.27           }
 920 kumpf         1.39       }
 921                          
 922 chuck         1.99       void String::toUpper()
 923 david         1.90       {
 924 chuck         1.111.2.14 #ifdef PEGASUS_HAS_ICU
 925 mike          1.111.2.1  
 926 yi.zhou       1.108          if (InitializeICU::initICUSuccessful())
 927 chuck         1.99           {
 928 chuck         1.111.2.12         if (Atomic_get(&_rep->refs) != 1)
 929 mike          1.111.2.13 	    _rep = StringRep::copy_on_write(_rep);
 930 chuck         1.111.2.12 
 931                                  // This will do a locale-insensitive, but context-sensitive convert.
 932                                  // Since context-sensitive casing looks at adjacent chars, this 
 933                                  // prevents optimizations where the us-ascii is converted before 
 934                                  // calling ICU.
 935                                  // The string may shrink or expand after the convert.
 936                          
 937 mike          1.111.2.1  	//// First calculate size of resulting string. u_strToUpper() returns
 938                          	//// only the size when zero is passed as the destination size argument.
 939                          
 940 yi.zhou       1.108              UErrorCode err = U_ZERO_ERROR;
 941                          
 942 mike          1.111.2.1          int32_t new_size = u_strToUpper(
 943                          	    NULL, 0, (UChar*)_rep->data, _rep->size, NULL, &err);
 944 chuck         1.99       
 945 chuck         1.111.2.12         err = U_ZERO_ERROR;
 946                          
 947 mike          1.111.2.1  	//// Reserve enough space for the result.
 948 yi.zhou       1.108      
 949 mike          1.111.2.1  	if ((Uint32)new_size > _rep->cap)
 950                          	    _reserve(_rep, new_size);
 951 david         1.90       
 952 mike          1.111.2.1  	//// Perform the conversion (overlapping buffers are allowed).
 953 kumpf         1.39       
 954 mike          1.111.2.1          u_strToUpper((UChar*)_rep->data, new_size,
 955                          	    (UChar*)_rep->data, _rep->size, NULL, &err);
 956 mike          1.27       
 957 mike          1.111.2.1  	_rep->size = new_size;
 958 chuck         1.111.2.12 
 959 mike          1.111.2.13 	return;
 960 mike          1.27           }
 961                          
 962 chuck         1.111.2.14 #endif /* PEGASUS_HAS_ICU */
 963 mike          1.111.2.1  
 964                              if (Atomic_get(&_rep->refs) != 1)
 965                          	_rep = StringRep::copy_on_write(_rep);
 966                          
 967                              Uint16* p = _rep->data;
 968                              size_t n = _rep->size;
 969                          
 970                              for (; n--; p++)
 971                          	*p = _to_upper(*p);
 972 mike          1.27       }
 973                          
 974 mike          1.111.2.1  int String::compare(const String& s1, const String& s2, Uint32 n)
 975 mike          1.30       {
 976 mike          1.111.2.1      assert(n <= s1._rep->size);
 977                              assert(n <= s2._rep->size);
 978 kumpf         1.43       
 979 mike          1.111.2.1      // Ignoring error in which n is greater than s1.size() or s2.size()
 980                              return _compare(s1._rep->data, s2._rep->data, n);
 981                          }
 982 mike          1.30       
 983 mike          1.111.2.1  int String::compare(const String& s1, const String& s2)
 984                          {
 985                              return _compare(s1._rep->data, s2._rep->data);
 986                          }
 987 mike          1.30       
 988 mike          1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
 989 mike          1.111.2.1  int String::compare(const String& s1, const char* s2)
 990                          {
 991                              _check_null_pointer(s2);
 992 mike          1.30       
 993 mike          1.111.2.1  #ifdef PEGASUS_STRING_NO_UTF8
 994                              return _compare_no_utf8(s1._rep->data, s2);
 995                          #else
 996                              // ATTN: optimize this!
 997                              return String::compare(s1, String(s2));
 998                          #endif
 999 mike          1.30       }
1000 mike          1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
1001 mike          1.30       
1002 mike          1.111.2.1  int String::compareNoCase(const String& str1, const String& str2)
1003 kumpf         1.40       {
1004 chuck         1.111.2.14 #ifdef PEGASUS_HAS_ICU
1005 mike          1.111.2.1  
1006 yi.zhou       1.108          if (InitializeICU::initICUSuccessful())
1007                              {
1008 mike          1.111.2.1          return  u_strcasecmp(
1009                          	    str1._rep->data, str2._rep->data, U_FOLD_CASE_DEFAULT);
1010 yi.zhou       1.108          }
1011 kumpf         1.40       
1012 chuck         1.111.2.14 #endif /* PEGASUS_HAS_ICU */
1013 kumpf         1.40       
1014 mike          1.111.2.1      const Uint16* s1 = str1._rep->data;
1015                              const Uint16* s2 = str2._rep->data;
1016                          
1017                              while (*s1 && *s2)
1018                              {
1019                                  int r = _to_lower(*s1++) - _to_lower(*s2++);
1020 kumpf         1.40       
1021 david.dillard 1.105              if (r)
1022                                      return r;
1023 kumpf         1.40           }
1024                          
1025 mike          1.111.2.1      if (*s2)
1026 david.dillard 1.105              return -1;
1027 mike          1.111.2.1      else if (*s1)
1028 david.dillard 1.105              return 1;
1029 kumpf         1.40       
1030                              return 0;
1031                          }
1032                          
1033 mike          1.111.2.1  Boolean String::equalNoCase_aux(const String& s1, const String& s2)
1034 mike          1.27       {
1035 chuck         1.111.2.14 #ifdef PEGASUS_HAS_ICU
1036 mike          1.27       
1037 mike          1.111.2.1      return String::compareNoCase(s1, s2) == 0;
1038 kumpf         1.39       
1039 chuck         1.111.2.14 #else /* PEGASUS_HAS_ICU */
1040 kumpf         1.39       
1041 mike          1.111.2.1      Uint16* p = (Uint16*)s1._rep->data;
1042                              Uint16* q = (Uint16*)s2._rep->data;
1043                              Uint32 n = s2._rep->size;
1044                          
1045                              while (n >= 8)
1046                              {
1047                          	if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) ||
1048                          	    ((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) ||
1049                          	    ((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) ||
1050                          	    ((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3]))) ||
1051                          	    ((p[4] - q[4]) && (_to_upper(p[4]) - _to_upper(q[4]))) ||
1052                          	    ((p[5] - q[5]) && (_to_upper(p[5]) - _to_upper(q[5]))) ||
1053                          	    ((p[6] - q[6]) && (_to_upper(p[6]) - _to_upper(q[6]))) ||
1054                          	    ((p[7] - q[7]) && (_to_upper(p[7]) - _to_upper(q[7]))))
1055                          	{
1056                          	    return false;
1057                          	}
1058                          
1059                          	n -= 8;
1060                          	p += 8;
1061                          	q += 8;
1062 mike          1.111.2.1      }
1063                          
1064                              while (n >= 4)
1065                              {
1066                          	if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))) ||
1067                          	    ((p[1] - q[1]) && (_to_upper(p[1]) - _to_upper(q[1]))) ||
1068                          	    ((p[2] - q[2]) && (_to_upper(p[2]) - _to_upper(q[2]))) ||
1069                          	    ((p[3] - q[3]) && (_to_upper(p[3]) - _to_upper(q[3]))))
1070                          	{
1071                          	    return false;
1072                          	}
1073                          
1074                          	n -= 4;
1075                          	p += 4;
1076                          	q += 4;
1077                              }
1078 mike          1.27       
1079 kumpf         1.39           while (n--)
1080                              {
1081 mike          1.111.2.1  	if (((p[0] - q[0]) && (_to_upper(p[0]) - _to_upper(q[0]))))
1082                          	    return false;
1083                          
1084                          	p++;
1085                          	q++;
1086 kumpf         1.39           }
1087 mike          1.28       
1088 kumpf         1.39           return true;
1089 david         1.69       
1090 chuck         1.111.2.14 #endif /* PEGASUS_HAS_ICU */
1091 mike          1.111.2.1  }
1092 mike          1.27       
1093 mike          1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
1094 mike          1.111.2.1  Boolean String::equalNoCase(const String& s1, const char* s2)
1095 david         1.69       {
1096 mike          1.111.2.1      _check_null_pointer(s2);
1097 david         1.69       
1098 chuck         1.111.2.14 #if defined(PEGASUS_HAS_ICU)
1099 david.dillard 1.105      
1100 mike          1.111.2.1      return String::equalNoCase(s1, String(s2));
1101 david         1.71       
1102 mike          1.111.2.1  #elif defined(PEGASUS_STRING_NO_UTF8)
1103 kumpf         1.42       
1104 mike          1.111.2.1      const Uint16* p1 = (Uint16*)s1._rep->data;
1105                              const char* p2 = s2;
1106                              size_t n = s1._rep->size;
1107 karl          1.36       
1108 mike          1.111.2.1      while (n--)
1109                              {
1110                          	if (!*p2)
1111                          	    return false;
1112 david.dillard 1.105      
1113 mike          1.111.2.1  	if (_to_upper(*p1++) != CharSet::to_upper(int(*p2++)))
1114                          	    return false;
1115                              }
1116                              
1117                              return true;
1118 david.dillard 1.105      
1119 chuck         1.111.2.14 #else /* PEGASUS_HAS_ICU */
1120 chuck         1.78       
1121 mike          1.111.2.1      // ATTN: optimize this!
1122                              return String::equalNoCase(s1, String(s2));
1123 david.dillard 1.105      
1124 chuck         1.111.2.14 #endif /* PEGASUS_HAS_ICU */
1125 mike          1.111.2.1  }
1126 mike          1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
1127 karl          1.36       
1128 mike          1.111.2.1  Boolean String::equal(const String& s1, const String& s2)
1129 karl          1.36       {
1130 mike          1.111.2.1      return s1._rep->size == s2._rep->size && memcmp(s1._rep->data, 
1131                          	s2._rep->data, s1._rep->size * sizeof(Uint16)) == 0;
1132 karl          1.36       }
1133                          
1134 mike          1.111.2.15 #ifdef PEGASUS_USE_STRING_EXTENSIONS
1135 mike          1.111.2.1  Boolean String::equal(const String& s1, const char* s2)
1136 karl          1.36       {
1137 mike          1.111.2.1  #ifdef PEGASUS_STRING_NO_UTF8
1138 kumpf         1.39       
1139 mike          1.111.2.1      _check_null_pointer(s2);
1140 kumpf         1.39       
1141 mike          1.111.2.1      const Uint16* p = (Uint16*)s1._rep->data;
1142                              const char* q = s2;
1143 kumpf         1.39       
1144 mike          1.111.2.1      while (*p && *q)
1145                              {
1146                          	if (*p++ != Uint16(*q++))
1147                          	    return false;
1148                              }
1149 kumpf         1.39       
1150 mike          1.111.2.1      return !(*p || *q);
1151 kumpf         1.39       
1152 mike          1.111.2.1  #else /* PEGASUS_STRING_NO_UTF8 */
1153 kumpf         1.39       
1154 mike          1.111.2.1      return String::equal(s1, String(s2));
1155                          
1156                          #endif /* PEGASUS_STRING_NO_UTF8 */
1157 kumpf         1.39       }
1158 mike          1.111.2.15 #endif /* PEGASUS_USE_STRING_EXTENSIONS */
1159 kumpf         1.39       
1160 kumpf         1.47       PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
1161 kumpf         1.39       {
1162 david         1.69       #if defined(PEGASUS_OS_OS400)
1163 mike          1.111.2.1  
1164 david         1.93           CString cstr = str.getCString();
1165 david         1.69           const char* utf8str = cstr;
1166                              os << utf8str;
1167 chuck         1.111.2.12     return os;
1168                          #else    
1169 david         1.69       
1170 chuck         1.111.2.14 #if defined(PEGASUS_HAS_ICU)
1171 mike          1.111.2.1  
1172 yi.zhou       1.108          if (InitializeICU::initICUSuccessful())
1173                              {
1174 david.dillard 1.105              char *buf = NULL;
1175                                  const int size = str.size() * 6;
1176 mike          1.111.2.1          UnicodeString UniStr(
1177                          	    (const UChar *)str.getChar16Data(), (int32_t)str.size());
1178 david.dillard 1.105              Uint32 bufsize = UniStr.extract(0,size,buf);
1179                                  buf = new char[bufsize+1];
1180                                  UniStr.extract(0,bufsize,buf);
1181                                  os << buf;
1182                                  os.flush();
1183                                  delete [] buf;
1184 chuck         1.111.2.12         return os;       
1185 yi.zhou       1.108          }
1186 mike          1.111.2.1  
1187 chuck         1.111.2.14 #endif  // PEGASUS_HAS_ICU 
1188 mike          1.111.2.1  
1189                              for (Uint32 i = 0, n = str.size(); i < n; i++)
1190 yi.zhou       1.108          {
1191 chuck         1.111.2.12         Uint16 code = str[i];
1192 david.dillard 1.105      
1193 chuck         1.111.2.12        	if (code > 0 && !(code & 0xFF00))
1194                             	        os << char(code);
1195                                  else
1196                             	    {
1197                                      // Print in hex format:
1198                                      char buffer[8];
1199                                      sprintf(buffer, "\\x%04X", code);
1200                                      os << buffer;
1201                                  }
1202 yi.zhou       1.108          }
1203 kumpf         1.39       
1204                              return os;
1205 chuck         1.111.2.12 #endif // PEGASUS_OS_OS400
1206 kumpf         1.39       }
1207                          
1208 mike          1.111.2.1  void String::_append_char_aux()
1209 kumpf         1.39       {
1210 mike          1.111.2.1      StringRep* tmp;
1211 kumpf         1.39       
1212 mike          1.111.2.1      if (_rep->cap)
1213                              {
1214                          	tmp = StringRep::alloc(2 * _rep->cap);
1215                          	tmp->size = _rep->size;
1216                          	_copy(tmp->data, _rep->data, _rep->size);
1217                              }
1218                              else
1219                              {
1220                          	tmp = StringRep::alloc(8);
1221                          	tmp->size = 0;
1222                              }
1223 kumpf         1.39       
1224 mike          1.111.2.1      StringRep::unref(_rep);
1225                              _rep = tmp;
1226 kumpf         1.39       }
1227                          
1228 mike          1.111.2.1  PEGASUS_NAMESPACE_END
1229 kumpf         1.39       
1230 mike          1.111.2.1  /*
1231                          ================================================================================
1232 kumpf         1.39       
1233 mike          1.111.2.1  String optimizations:
1234                          
1235                              1.  Added mechanism allowing certain functions to be inlined only when
1236                          	used by internal Pegasus modules. External modules (i.e., providers)
1237                          	link to a non-inline version, which allows for binary compatibility.
1238                          
1239                              2.  Implemented copy-on-write with atomic increment/decrement. This
1240                          	yieled a 10% improvement for the 'gc' benchmark and a 11% improvment
1241                          	for the 'ni1000' benchmark.
1242                          
1243                              3.	Employed loop unrolling in several places. For example, see:
1244                          
1245                          	    static Uint16* _find(const Uint16* s, size_t n, Uint16 c);
1246                          
1247                              4.  Used the "empty-rep" optimization (described in whitepaper from the
1248                          	GCC Developers Summit). This reduced default construction to a simple
1249                          	pointer assignment.
1250                          
1251                          	    inline String::String() : _rep(&_empty_rep) { }
1252                          
1253                              5.  Implemented Uint16 versions of toupper() and tolower() using tables.
1254 mike          1.111.2.1  	For example:
1255                          
1256                          	    static const char _upper[] =
1257                          	    {
1258                          		0,1,2,...255
1259                          	    };
1260                          
1261                          	    inline Uint16 _to_upper(Uint16 x)
1262                          	    {
1263                          		return (x & 0xFF00) ? x : _upper[x];
1264                          	    }
1265                          
1266                          	This outperforms the system implementation by avoiding an anding 
1267                          	operation.
1268                          
1269                              6.  Implemented char* version of the following member functions to 
1270                          	eliminate unecessary creation of anonymous string objects 
1271                          	(temporaries).
1272                          
1273                          	    String(const String& s1, const char* s2);
1274                          	    String(const char* s1, const String& s2);
1275 mike          1.111.2.1  	    String& String::operator=(const char* str);
1276                          	    Uint32 String::find(const char* s) const;
1277                          	    bool String::equal(const String& s1, const char* s2);
1278                          	    static int String::compare(const String& s1, const char* s2);
1279                          	    String& String::append(const char* str);
1280                          	    String& String::append(const char* str, Uint32 size);
1281                          	    static bool String::equalNoCase(const String& s1, const char* s2);
1282                          	    String& operator=(const char* str)
1283                          	    String& String::assign(const char* str)
1284                          	    String& String::append(const char* str)
1285                          	    Boolean operator==(const String& s1, const char* s2)
1286                          	    Boolean operator==(const char* s1, const String& s2)
1287                          	    Boolean operator!=(const String& s1, const char* s2)
1288                          	    Boolean operator!=(const char* s1, const String& s2)
1289                          	    Boolean operator<(const String& s1, const char* s2)
1290                          	    Boolean operator<(const char* s1, const String& s2)
1291                          	    Boolean operator>(const String& s1, const char* s2)
1292                          	    Boolean operator>(const char* s1, const String& s2)
1293                          	    Boolean operator<=(const String& s1, const char* s2)
1294                          	    Boolean operator<=(const char* s1, const String& s2)
1295                          	    Boolean operator>=(const String& s1, const char* s2)
1296 mike          1.111.2.1  	    Boolean operator>=(const char* s1, const String& s2)
1297                          	    String operator+(const String& s1, const char* s2)
1298                          	    String operator+(const char* s1, const String& s2)
1299                          
1300                              7.  Optimized _next_pow_2(), used in rounding the capacity to the next 
1301                                  power of two (algorithm from the book "Hacker's Delight").
1302                          
1303                          	    static Uint32 _next_pow_2(Uint32 x)
1304                          	    {
1305                          		if (x < 8)
1306                          		    return 8;
1307                          
1308                          		x--;
1309                          		x |= (x >> 1);
1310                          		x |= (x >> 2);
1311                          		x |= (x >> 4);
1312                          		x |= (x >> 8);
1313                          		x |= (x >> 16);
1314                          		x++;
1315                          
1316                          		return x;
1317 mike          1.111.2.1  	    }
1318                          
1319                              8.  Implemented "concatenating constructors" to eliminate temporaries
1320                          	created by operator+(). This scheme employs the "return-value 
1321                          	optimization" described by Stan Lippman.
1322                          
1323                          	    inline String operator+(const String& s1, const String& s2)
1324                          	    {
1325                          		return String(s1, s2, 0);
1326                          	    }
1327                          
1328                              9.  Experimented to find the optimial initial size for a short string.
1329 mike          1.111.2.13 	Eight seems to offer the best tradeoff between space and time.
1330 mike          1.111.2.1  
1331                              10. Inlined all members of the Char16 class.
1332                          
1333                              11. Used Uint16 internally in the String class. This showed no improvememnt
1334                          	since Char16 was already fully inlined and was essentially reduced to
1335                          	Uint16 in any case.
1336                          
1337                              12. Implemented conditional logic (#if) allowing error checking logic to
1338                          	be excluded to better performance. Examples include bounds checking 
1339                          	and null-pointer checking.
1340                          
1341                              13. Used memcpy() and memcmp() where possible. These are implemented using
1342                          	the rep family of intructions under Intel and are much faster.
1343                          
1344 mike          1.111.2.2      14. Used loop unrolling, jump-tables, and short-circuiting to reduce UTF8 
1345                          	copy routine overhead.
1346                          
1347                              15. Added ASCII7 form of the constructor and assign().
1348                          
1349                          	    String s("hello world", String::ASCII7);
1350                          
1351 mike          1.111.2.4  	    s.assignASCII7("hello world");
1352 mike          1.111.2.2  
1353 mike          1.111.2.6  	This avoids slower UTF8 processing when not needed.
1354 mike          1.111.2.2  
1355 mike          1.111.2.1  ================================================================================
1356                          */

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2