(file) Return to String.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 mike  1.27 //%/////////////////////////////////////////////////////////////////////////////
  2            //
  3 kumpf 1.41 // Copyright (c) 2000, 2001, 2002 BMC Software, Hewlett-Packard Company, IBM,
  4            // The Open Group, Tivoli Systems
  5 mike  1.27 //
  6            // Permission is hereby granted, free of charge, to any person obtaining a copy
  7 kumpf 1.41 // of this software and associated documentation files (the "Software"), to
  8            // deal in the Software without restriction, including without limitation the
  9            // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 10 mike  1.27 // sell copies of the Software, and to permit persons to whom the Software is
 11            // furnished to do so, subject to the following conditions:
 12            // 
 13 kumpf 1.41 // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 14 mike  1.27 // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 15            // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 16 kumpf 1.41 // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 17            // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 18            // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 19 mike  1.27 // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 20            // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21            //
 22            //==============================================================================
 23            //
 24            // Author: Mike Brasher (mbrasher@bmc.com)
 25            //
 26 kumpf 1.39 // Modified By: Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
 27 mike  1.27 //
 28            //%/////////////////////////////////////////////////////////////////////////////
 29            
 30            
 31            #include <cctype>
 32 kumpf 1.64 #include <cstring>
 33 mike  1.27 #include "String.h"
 34 kumpf 1.43 #include "Array.h"
 35 kumpf 1.48 #include "InternalException.h"
 36 mike  1.27 #include <iostream>
 37 kumpf 1.63 #include <fstream>
 38 kumpf 1.61 #ifndef PEGASUS_REMOVE_DEPRECATED
 39 kumpf 1.60 #include "System.h"
 40            #endif
 41 mike  1.27 
 42 david 1.69 #include "CommonUTF.h"
 43            
 44            #ifdef PEGASUS_HAS_ICU
 45 chuck 1.74 #include <unicode/unistr.h>
 46 david 1.69 #endif
 47            
 48 mike  1.28 PEGASUS_USING_STD;
 49            
 50 mike  1.27 PEGASUS_NAMESPACE_BEGIN
 51            
 52 kumpf 1.39 ///////////////////////////////////////////////////////////////////////////////
 53            //
 54 kumpf 1.54 // CString
 55            //
 56            ///////////////////////////////////////////////////////////////////////////////
 57            
 58            CString::CString()
 59                : _rep(0)
 60            {
 61            }
 62            
 63            CString::CString(const CString& cstr)
 64            {
 65 kumpf 1.59     _rep = (void*)new char[strlen((char*)cstr._rep)+1];
 66                strcpy((char*)_rep, (char*)cstr._rep);
 67 kumpf 1.54 }
 68            
 69            CString::CString(char* cstr)
 70                : _rep(cstr)
 71            {
 72            }
 73            
 74            CString::~CString()
 75            {
 76                if (_rep)
 77 kumpf 1.59         delete [] (char*)_rep;
 78 kumpf 1.54 }
 79            
 80 kumpf 1.56 CString& CString::operator=(const CString& cstr)
 81            {
 82 kumpf 1.59     _rep = (char*)new char[strlen((char*)cstr._rep)+1];
 83                strcpy((char*)_rep, (char*)cstr._rep);
 84 kumpf 1.56     return *this;
 85            }
 86            
 87 kumpf 1.54 CString::operator const char*() const
 88            {
 89 kumpf 1.59     return (char*)_rep;
 90 kumpf 1.54 }
 91            
 92            ///////////////////////////////////////////////////////////////////////////////
 93            //
 94 kumpf 1.39 // String
 95            //
 96            ///////////////////////////////////////////////////////////////////////////////
 97            
 98 kumpf 1.37 const String String::EMPTY = String();
 99 mike  1.27 
100 kumpf 1.38 Uint32 _strnlen(const char* str, Uint32 n)
101            {
102                if (!str)
103            	throw NullPointer();
104            
105                for (Uint32 i=0; i<n; i++)
106                {
107                    if (!*str)
108                    {
109                        return i;
110                    }
111                }
112            
113                return n;
114            }
115            
116            Uint32 _strnlen(const Char16* str, Uint32 n)
117            {
118                if (!str)
119            	throw NullPointer();
120            
121 kumpf 1.38     for (Uint32 i=0; i<n; i++)
122                {
123                    if (!*str)
124                    {
125                        return i;
126                    }
127                }
128            
129                return n;
130            }
131            
132 kumpf 1.39 inline Uint32 _StrLen(const char* str)
133 mike  1.27 {
134                if (!str)
135            	throw NullPointer();
136            
137                return strlen(str);
138            }
139            
140 kumpf 1.39 inline Uint32 _StrLen(const Char16* str)
141 mike  1.27 {
142                if (!str)
143            	throw NullPointer();
144            
145                Uint32 n = 0;
146            
147                while (*str++)
148            	n++;
149            
150                return n;
151            }
152            
153 kumpf 1.43 class StringRep
154            {
155            public:
156                StringRep()
157                {}
158                StringRep(const StringRep& r)
159                    : c16a(r.c16a)
160                {}
161                StringRep(const Char16* str)
162                    : c16a(str, _StrLen(str) + 1)
163                {}
164            
165                Array<Char16> c16a;
166            };
167            
168 mike  1.27 String::String()
169            {
170 kumpf 1.43     _rep = new StringRep;
171                _rep->c16a.append('\0');
172 mike  1.27 }
173            
174 kumpf 1.39 String::String(const String& str)
175 mike  1.27 {
176 tony  1.66   if (str._rep != NULL)
177              {
178 kumpf 1.43     _rep = new StringRep(*str._rep);
179 tony  1.66   }
180              else
181              {
182                _rep = new StringRep();
183              }
184 kumpf 1.39 }
185 tony  1.66 
186 mike  1.27 
187 kumpf 1.39 String::String(const String& str, Uint32 n)
188            {
189 kumpf 1.43     _rep = new StringRep;
190 kumpf 1.55     assign(str.getChar16Data(), n);
191 kumpf 1.39 }
192            
193            String::String(const Char16* str)
194            {
195 kumpf 1.43     _rep = new StringRep(str);
196 mike  1.27 }
197            
198 kumpf 1.39 String::String(const Char16* str, Uint32 n)
199            {
200 kumpf 1.43     _rep = new StringRep;
201 kumpf 1.39     assign(str, n);
202            }
203            
204            String::String(const char* str)
205 mike  1.27 {
206 kumpf 1.43     _rep = new StringRep;
207 kumpf 1.39     assign(str);
208 mike  1.27 }
209            
210 david 1.69 String::String(const char* str, const char* utfFlag)
211            {
212                _rep = new StringRep;
213            
214                if(!memcmp(utfFlag,STRING_FLAG_UTF8,sizeof(STRING_FLAG_UTF8)))
215                {
216            	assignUTF8(str);
217                }
218                else
219                {
220            	assign(str);
221                }
222            }
223            
224 kumpf 1.39 String::String(const char* str, Uint32 n)
225 mike  1.27 {
226 kumpf 1.43     _rep = new StringRep;
227 kumpf 1.39     assign(str, n);
228            }
229 mike  1.27 
230 kumpf 1.39 String::~String()
231            {
232 kumpf 1.43     delete _rep;
233 mike  1.27 }
234            
235 kumpf 1.39 String& String::operator=(const String& str)
236 mike  1.27 {
237 kumpf 1.39     return assign(str);
238 mike  1.27 }
239            
240 kumpf 1.39 String& String::assign(const String& str)
241 mike  1.27 {
242 kumpf 1.43     _rep->c16a = str._rep->c16a;
243 kumpf 1.39     return *this;
244 mike  1.27 }
245            
246 kumpf 1.39 String& String::assign(const Char16* str)
247 mike  1.27 {
248 kumpf 1.43     _rep->c16a.clear();
249                _rep->c16a.append(str, _StrLen(str) + 1);
250 mike  1.27     return *this;
251            }
252            
253            String& String::assign(const Char16* str, Uint32 n)
254            {
255 kumpf 1.43     _rep->c16a.clear();
256 kumpf 1.38     Uint32 m = _strnlen(str, n);
257 kumpf 1.43     _rep->c16a.append(str, m);
258                _rep->c16a.append('\0');
259 mike  1.27     return *this;
260            }
261            
262 kumpf 1.39 String& String::assign(const char* str)
263 mike  1.27 {
264 kumpf 1.43     _rep->c16a.clear();
265 kumpf 1.38 
266 kumpf 1.39     Uint32 n = strlen(str) + 1;
267 kumpf 1.45     _rep->c16a.reserveCapacity(n);
268 mike  1.27 
269                while (n--)
270 kumpf 1.67 	_rep->c16a.append(Uint8(*str++));
271 mike  1.27 
272                return *this;
273            }
274            
275 kumpf 1.39 String& String::assign(const char* str, Uint32 n)
276 mike  1.27 {
277 kumpf 1.43     _rep->c16a.clear();
278 mike  1.27 
279 kumpf 1.39     Uint32 _n = _strnlen(str, n);
280 kumpf 1.45     _rep->c16a.reserveCapacity(_n + 1);
281 mike  1.27 
282 kumpf 1.39     while (_n--)
283 kumpf 1.67 	_rep->c16a.append(Uint8(*str++));
284 mike  1.27 
285 kumpf 1.43     _rep->c16a.append('\0');
286 mike  1.27 
287                return *this;
288            }
289            
290 kumpf 1.39 void String::clear()
291            {
292 kumpf 1.43     _rep->c16a.clear();
293                _rep->c16a.append('\0');
294 kumpf 1.39 }
295            
296 kumpf 1.43 void String::reserveCapacity(Uint32 capacity)
297 kumpf 1.39 {
298 kumpf 1.45     _rep->c16a.reserveCapacity(capacity + 1);
299 kumpf 1.39 }
300            
301            Uint32 String::size() const
302            {
303 kumpf 1.43     return _rep->c16a.size() - 1;
304 kumpf 1.39 }
305            
306 kumpf 1.55 const Char16* String::getChar16Data() const
307 kumpf 1.39 {
308 kumpf 1.43     return _rep->c16a.getData();
309 kumpf 1.39 }
310            
311 kumpf 1.54 CString String::getCString() const
312 mike  1.27 {
313                Uint32 n = size() + 1;
314 kumpf 1.54     char* str = new char[n];
315 mike  1.27     char* p = str;
316 kumpf 1.55     const Char16* q = getChar16Data();
317 mike  1.27 
318                for (Uint32 i = 0; i < n; i++)
319                {
320            	Uint16 c = *q++;
321            	*p++ = char(c);
322            
323 kumpf 1.54 	//if (c & 0xff00)
324            	//    truncatedCharacters = true;
325 mike  1.27     }
326            
327 kumpf 1.54     return CString(str);
328 kumpf 1.49 }
329            
330 kumpf 1.53 Char16& String::operator[](Uint32 index)
331 mike  1.27 {
332 kumpf 1.53     if (index > size())
333 kumpf 1.49 	throw IndexOutOfBoundsException();
334 mike  1.27 
335 kumpf 1.53     return _rep->c16a[index];
336 mike  1.27 }
337            
338 kumpf 1.53 const Char16 String::operator[](Uint32 index) const
339 mike  1.27 {
340 kumpf 1.53     if (index > size())
341 kumpf 1.49 	throw IndexOutOfBoundsException();
342 mike  1.27 
343 kumpf 1.53     return _rep->c16a[index];
344 mike  1.27 }
345            
346 kumpf 1.39 String& String::append(const Char16& c)
347            {
348 kumpf 1.43     _rep->c16a.insert(_rep->c16a.size() - 1, c);
349 kumpf 1.39     return *this;
350            }
351            
352 mike  1.27 String& String::append(const Char16* str, Uint32 n)
353            {
354 kumpf 1.38     Uint32 m = _strnlen(str, n);
355 kumpf 1.45     _rep->c16a.reserveCapacity(_rep->c16a.size() + m);
356 kumpf 1.43     _rep->c16a.remove(_rep->c16a.size() - 1);
357                _rep->c16a.append(str, m);
358                _rep->c16a.append('\0');
359 mike  1.27     return *this;
360            }
361            
362 kumpf 1.39 String& String::append(const String& str)
363 mike  1.27 {
364 kumpf 1.55     return append(str.getChar16Data(), str.size());
365 mike  1.27 }
366            
367 kumpf 1.53 void String::remove(Uint32 index, Uint32 size)
368 mike  1.27 {
369 kumpf 1.39     if (size == PEG_NOT_FOUND)
370 kumpf 1.53 	size = this->size() - index;
371 mike  1.27 
372 kumpf 1.53     if (index + size > this->size())
373 kumpf 1.49 	throw IndexOutOfBoundsException();
374 mike  1.27 
375 kumpf 1.39     if (size)
376 kumpf 1.53 	_rep->c16a.remove(index, size);
377 mike  1.27 }
378            
379 kumpf 1.53 String String::subString(Uint32 index, Uint32 length) const
380 mike  1.27 {
381 kumpf 1.53     if (index < size())
382 mike  1.27     {
383 kumpf 1.57 	if ((length == PEG_NOT_FOUND) || (length > size() - index))
384 kumpf 1.53 	    length = size() - index;
385 mike  1.27 
386 kumpf 1.55 	return String(getChar16Data() + index, length);
387 mike  1.27     }
388                else
389            	return String();
390            }
391            
392            Uint32 String::find(Char16 c) const
393            {
394 kumpf 1.55     const Char16* first = getChar16Data();
395 mike  1.27 
396                for (const Char16* p = first; *p; p++)
397                {
398            	if (*p == c)
399            	    return  p - first;
400                }
401            
402                return PEG_NOT_FOUND;
403            }
404            
405 kumpf 1.53 Uint32 String::find(Uint32 index, Char16 c) const
406 mike  1.30 {
407 kumpf 1.55     const Char16* data = getChar16Data();
408 mike  1.30 
409 kumpf 1.53     for (Uint32 i = index, n = size(); i < n; i++)
410 mike  1.30     {
411            	if (data[i] == c)
412            	    return i;
413                }
414            
415                return PEG_NOT_FOUND;
416            }
417            
418 mike  1.27 Uint32 String::find(const String& s) const
419            {
420 kumpf 1.55     const Char16* pSubStr = s.getChar16Data();
421                const Char16* pStr = getChar16Data();
422 mike  1.27     Uint32 subStrLen = s.size();
423                Uint32 strLen = size();
424            
425 mike  1.30     if (subStrLen > strLen)
426                {
427                    return PEG_NOT_FOUND;
428                }
429            
430 mike  1.27     // loop to find first char match
431                Uint32 loc = 0;
432                for( ; loc <= (strLen-subStrLen); loc++)
433                {
434            	if (*pStr++ == *pSubStr)  // match first char
435            	{
436            	    // point to substr 2nd char
437            	    const Char16* p = pSubStr + 1;
438            
439            	    // Test remaining chars for equal
440            	    Uint32 i = 1;
441            	    for (; i < subStrLen; i++)
442            		if (*pStr++ != *p++ )
443            		    {pStr--; break;} // break from loop
444            	    if (i == subStrLen)
445            		return loc;
446            	}
447                }
448                return PEG_NOT_FOUND;
449            }
450            
451 mike  1.27 Uint32 String::reverseFind(Char16 c) const
452            {
453 kumpf 1.55     const Char16* first = getChar16Data();
454                const Char16* last = getChar16Data() + size();
455 mike  1.27 
456                while (last != first)
457                {
458            	if (*--last == c)
459            	    return last - first;
460                }
461            
462                return PEG_NOT_FOUND;
463            }
464            
465 kumpf 1.62 // ATTN-RK-P3-20020509: Define case-sensitivity for non-English characters
466 mike  1.27 void String::toLower()
467            {
468 david 1.69 #ifdef PEGASUS_HAS_ICU
469                Char16* utf16str; 
470                UnicodeString UniStr((const UChar *)_rep->c16a.getData(), (int32_t)size());
471 david 1.77     UniStr.append((UChar)'\0');
472 david 1.69     UniStr = UniStr.toLower();
473                utf16str = (Char16 *)UniStr.getTerminatedBuffer();
474                assign(utf16str);
475 humberto 1.76     // DEVELOPER NOTE: do not delete utf16str, this is handled by ICU
476                   
477 david    1.69 #else
478 kumpf    1.43     for (Char16* p = &_rep->c16a[0]; *p; p++)
479 mike     1.27     {
480 kumpf    1.46 	if (*p <= PEGASUS_MAX_PRINTABLE_CHAR)
481 mike     1.27 	    *p = tolower(*p);
482                   }
483 david    1.69 #endif
484 kumpf    1.39 }
485               
486 kumpf    1.43 int String::compare(const String& s1, const String& s2, Uint32 n)
487 kumpf    1.39 {
488 kumpf    1.55     const Char16* s1c16 = s1.getChar16Data();
489                   const Char16* s2c16 = s2.getChar16Data();
490 kumpf    1.39 
491                   while (n--)
492 mike     1.27     {
493 kumpf    1.43 	int r = *s1c16++ - *s2c16++;
494 mike     1.27 
495               	if (r)
496               	    return r;
497                   }
498               
499                   return 0;
500               }
501               
502 kumpf    1.43 int String::compare(const String& s1, const String& s2)
503 mike     1.30 {
504 kumpf    1.55     const Char16* s1c16 = s1.getChar16Data();
505                   const Char16* s2c16 = s2.getChar16Data();
506 kumpf    1.43 
507                   while (*s1c16 && *s2c16)
508 mike     1.30     {
509 kumpf    1.43 	int r = *s1c16++ - *s2c16++;
510 mike     1.30 
511               	if (r)
512               	    return r;
513                   }
514               
515 kumpf    1.43     if (*s2c16)
516 mike     1.30 	return -1;
517 kumpf    1.43     else if (*s1c16)
518 mike     1.30 	return 1;
519               
520                   return 0;
521               }
522               
523 kumpf    1.40 int String::compareNoCase(const String& s1, const String& s2)
524               {
525 david    1.69 #ifdef PEGASUS_HAS_ICU
526                   UnicodeString UniStr1((const UChar *)s1.getChar16Data(), (int32_t)s1.size());
527                   UnicodeString UniStr2((const UChar *)s2.getChar16Data(), (int32_t)s2.size());
528                   UniStr1 = UniStr1.toLower();
529                   UniStr2 = UniStr2.toLower();
530                   return (UniStr2.compare(UniStr1));    
531               #else
532 kumpf    1.55     const Char16* _s1 = s1.getChar16Data();
533                   const Char16* _s2 = s2.getChar16Data();
534 kumpf    1.40 
535                   while (*_s1 && *_s2)
536                   {
537                       int r;
538               
539 kumpf    1.46         if (*_s1 <= PEGASUS_MAX_PRINTABLE_CHAR &&
540                           *_s2 <= PEGASUS_MAX_PRINTABLE_CHAR)
541 kumpf    1.40         {
542                           r = tolower(*_s1++) - tolower(*_s2++);
543                       }
544                       else
545                       {
546                           r = *_s1++ - *_s2++;
547                       }
548               
549               	if (r)
550               	    return r;
551                   }
552               
553                   if (*_s2)
554               	return -1;
555                   else if (*_s1)
556               	return 1;
557               
558                   return 0;
559 david    1.69 #endif
560 kumpf    1.40 }
561               
562 kumpf    1.39 Boolean String::equal(const String& str1, const String& str2)
563 mike     1.27 {
564 kumpf    1.43     return String::compare(str1, str2) == 0;
565 mike     1.27 }
566               
567 kumpf    1.39 Boolean String::equalNoCase(const String& str1, const String& str2)
568 mike     1.27 {
569 david    1.69 #ifdef PEGASUS_HAS_ICU
570                   UnicodeString UniStr1((const UChar *)str1.getChar16Data(), (int32_t)str1.size());
571                   UnicodeString UniStr2((const UChar *)str2.getChar16Data(), (int32_t)str2.size());
572                   UniStr1 = UniStr1.toLower();
573                   UniStr2 = UniStr2.toLower();
574                   return (UniStr1 == UniStr2);    
575               #else
576 kumpf    1.39     if (str1.size() != str2.size())
577               	return false;
578               
579 kumpf    1.55     const Char16* p = str1.getChar16Data();
580                   const Char16* q = str2.getChar16Data();
581 kumpf    1.39 
582                   Uint32 n = str1.size();
583 mike     1.27 
584 kumpf    1.39     while (n--)
585                   {
586 kumpf    1.46 	if (*p <= PEGASUS_MAX_PRINTABLE_CHAR &&
587                           *q <= PEGASUS_MAX_PRINTABLE_CHAR)
588 kumpf    1.39 	{
589               	    if (tolower(*p++) != tolower(*q++))
590               		return false;
591               	}
592               	else if (*p++ != *q++)
593               	    return false;
594                   }
595 mike     1.28 
596 kumpf    1.39     return true;
597 david    1.69 #endif
598               }
599               
600               // UTF8 specific code:
601               String& String::assignUTF8(const char* str)
602               {
603                   _rep->c16a.clear();
604                   Uint32 n = strlen(str) + 1;
605               
606                   const Uint8 *strsrc = (Uint8 *)str;
607                   Uint8 *endsrc = (Uint8 *)&str[n-1];
608               
609                   Char16 *msg16 = new Char16[n];
610                   Uint16 *strtgt = (Uint16 *)msg16;
611                   Uint16 *endtgt = (Uint16 *)&msg16[n];
612               
613                   UTF8toUTF16(&strsrc,
614               		endsrc,
615               		&strtgt,
616               		endtgt);
617               
618 david    1.69     Uint32 count;
619               
620                   for(count = 0; ((msg16[count]) != Char16(0x00)) && (count <= n); ++count);
621               
622                   _rep->c16a.append(msg16, count);
623               
624                   _rep->c16a.append('\0');
625               
626                   delete [] msg16;
627               
628                   return *this;
629 mike     1.27 }
630               
631 david    1.69 CString String::getCStringUTF8() const
632               {
633 david    1.71     Uint32 n = 3*size();
634 david    1.69     char* str = new char[n];
635               
636                   const Char16* msg16 = getChar16Data();
637               
638                   const Uint16 *strsrc = (Uint16 *)msg16;
639 david    1.71     Uint16 *endsrc = (Uint16 *)&msg16[size()+1];
640 david    1.69 
641                   Uint8 *strtgt = (Uint8 *)str;
642                   Uint8 *endtgt = (Uint8 *)&str[n];
643               
644                   UTF16toUTF8 (&strsrc,
645               		 endsrc,
646               		 &strtgt,
647               		 endtgt);
648               
649 david    1.71 	char* str1 = new char[strlen(str)+1];
650               	strcpy(str1,str);
651 david    1.72 	delete [] str;
652 david    1.71 
653                   return CString(str1);
654 david    1.69 }
655               
656               Boolean String::isUTF8(const char *legal)
657               {
658 kumpf    1.73     char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1;
659               
660                   // Validate that the string is long enough to hold all the expected bytes.
661                   // Note that if legal[0] == 0, numBytes will be 1.
662 chuck    1.74     for (char i=1; i<numBytes; i++)
663 kumpf    1.73     {
664                       if (legal[i] == 0)
665                       {
666                           return false;
667                       }
668                   }
669               
670                   return (isValid_U8((const Uint8 *)legal, numBytes));
671 david    1.69 }
672 kumpf    1.42 
673 kumpf    1.65 #if 0
674 kumpf    1.42 // ATTN-RK-P3-20020603: This code is not completely correct
675 karl     1.36  // Wildcard String matching function that may be useful in the future
676               // The following code was provided by Bob Blair.
677               
678               /* _StringMatch Match input MatchString against a GLOB style pattern
679                      Note that MatchChar is the char type so that this source
680                      in portable to different string types. This is an internal function
681                
682                 Results: The return value is 1 if string matches pattern, and
683                	0 otherwise.  The matching operation permits the following
684                	special characters in the pattern: *?\[] (see the manual
685                	entry for details on what these mean).
686                
687                 Side effects: None.
688                */
689               
690               /* MatchChar defined as a separate entity because this function source used
691                   elsewhere was an unsigned char *. Here we use Uint16 to  maintain 16 bit 
692                   size.
693               */
694               typedef Uint16 MatchChar;
695               
696 karl     1.36 inline Uint16 _ToLower(Uint16 ch)
697               {
698 david    1.69     // ICU_TODO:  If ICU is available we should do this the correct way.
699 kumpf    1.46     return ch <= PEGASUS_MAX_PRINTABLE_CHAR ? tolower(char(ch)) : ch;
700 karl     1.36 }
701               
702               inline Boolean _Equal(MatchChar ch1, MatchChar ch2, int nocase)
703               {
704 david    1.69     // ICU_TODO:  If ICU is available we should do this the correct way.
705 karl     1.36     if (nocase)
706               	return _ToLower(ch1) == _ToLower(ch2);
707                   else
708               	return ch1 == ch2;
709               }
710 mike     1.28 
711 kumpf    1.35 
712 karl     1.36 static const MatchChar *
713               _matchrange(const MatchChar *range, MatchChar c, int nocase)
714               {
715                 const MatchChar *p = range;
716                 const MatchChar *rstart = range + 1;
717                 const MatchChar *rend = 0;
718                 MatchChar compchar;
719               
720 kumpf    1.35   for (rend = rstart; *rend && *rend != ']'; rend++);
721 karl     1.36   if (*rend == ']') {  // if there is an end to this pattern
722 kumpf    1.35     for (compchar = *rstart; rstart != rend; rstart++) {
723 karl     1.36       if (_Equal(*rstart, c, nocase))
724 kumpf    1.35         return ++rend;
725                     if (*rstart == '-') {
726                       rstart++;
727                       if (c >= compchar && c <= *rstart)
728                         return ++rend;
729                     }
730                   }
731                 }
732 karl     1.36   return (const MatchChar *)0;
733 kumpf    1.35 }
734               
735               static int
736 karl     1.36 _StringMatch( 
737                   const MatchChar *testString, 
738                   const MatchChar *pattern,
739                   int nocase ) 		/* Ignore case if this is true */
740               {
741                 const MatchChar *pat = pattern;
742                 const MatchChar *str = testString;
743 kumpf    1.35   unsigned int done = 0;
744                 unsigned int res = 0;  // the result: 1 == match
745               
746                 while (!done) { // main loop walks through pattern and test string
747                   //cerr << "Comparing <" << *pat << "> and <" << *str << ">" << endl;
748                   if (!*pat) {                                         //end of pattern
749                     done = 1;                                          // we're done
750                     if (!*str)                                         //end of test, too?
751                       res = 1;                                         // then we matched
752                   } else {                                             //Not end of pattern
753                     if (!*str) {                                       // but end of test
754                       done = 1;                                        // We're done
755                       if (*pat == '*')                                 // If pattern openends
756                         res = 1;                                       //  then we matched
757                     } else {                                           //Not end of test
758                       if (*pat == '*') {                               //Ambiguuity found
759                         if (!*++pat) {                                 //and it ends pattern
760                           done = 1;                                    //  then we're done
761                           res = 1;                                     //  and match
762                         } else {                                       //if it doesn't end
763                           while (!done) {                              //  until we're done
764 karl     1.36               if (_StringMatch(str, pat, nocase)) {      //  we recurse
765 kumpf    1.35                 done = 1;                                //if it recurses true
766                               res = 1;                                 //  we done and match
767                             } else {                                   //it recurses false
768                               if (!*str)                               // see if test is done
769                                 done = 1;                              //  yes: we done
770                               else                                     // not done:
771                                 str++;                                 //   keep testing
772                             } // end test on recursive call
773                           } // end looping on recursive calls
774                         } // end logic when pattern is ambiguous
775                       } else {                                         //pattern not ambiguus
776                         if (*pat == '?') {                             //pattern is 'any'
777                           pat++, str++;                                //  so move along
778                         } else if (*pat == '[') {                      //see if it's a range
779 karl     1.36             pat = _matchrange(pat, *str, nocase);         // and is a match
780 kumpf    1.35             if (!pat) {                                  //It is not a match
781                             done = 1;                                  //  we're done
782 kumpf    1.42               res = 0;                                   //  no match
783 kumpf    1.35             } else {                                     //Range matches
784                             str++, pat++;                              //  keep going
785                           }
786                         } else {               // only case left is individual characters
787 karl     1.36             if (!_Equal(*pat++, *str++, nocase))         // if they don't match
788 kumpf    1.35               done = 1;                                  //   bail.
789                         }
790                       }  // end ("pattern is not ambiguous (*)" logic
791                     } // end logic when pattern and string still have data
792                   } // end logic when pattern still has data
793                 } // end main loop
794                 return res;
795               }
796               
797 kumpf    1.39 
798 kumpf    1.65     /** match matches a string against a GLOB style pattern.
799                       Return trues if the String parameter matches the pattern. C-Shell style
800               	glob matching is used.
801                       @param str String to be matched against the pattern
802                       @param pattern Pattern to use in the match
803                       @return Boolean true if str matches pattern
804                       The pattern definition is as follows:
805                       <pre>
806                       *             Matches any number of any characters
807                       ?             Match exactly one character
808                       [chars]       Match any character in chars
809                       [chara-charb] Match any character in the range between chara and charb
810                       </pre>
811                       The literal characters *, ?, [, ] can be included in a string by
812                       escaping them with backslash "\".  Ranges of characters can be concatenated.
813                       <pre>
814                       examples:
815                       Boolean result = String::match("This is a test", "*is*");
816                       Boolean works =  String::match("abcdef123", "*[0-9]");
817                       </pre>
818                   */
819 karl     1.36 Boolean String::match(const String& str, const String& pattern)
820               {
821                   return _StringMatch(
822 kumpf    1.55 	(Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 0) != 0;
823 karl     1.36 }
824               
825 kumpf    1.65     /** matchNoCase Matches a String against a GLOB style pattern independent
826                       of case. 
827                       Returns true if the str parameter matches the pattern. C-Shell style
828               	glob matching is used. Ignore case in all comparisons. Case is
829                       ignored in the match.
830                       @parm str String containing the string to be matched\
831                       @parm pattern GLOB style patterh to use in the match.
832                       @return Boolean true if str matches patterh
833                       @SeeAlso match
834                   */
835 karl     1.36 Boolean String::matchNoCase(const String& str, const String& pattern)
836               {
837                   return _StringMatch(
838 kumpf    1.55 	(Uint16*)str.getChar16Data(), (Uint16*)pattern.getChar16Data(), 1) != 0;
839 kumpf    1.39 }
840 kumpf    1.65 #endif
841 kumpf    1.39 
842               
843               ///////////////////////////////////////////////////////////////////////////////
844               //
845               // String-related functions
846               //
847               ///////////////////////////////////////////////////////////////////////////////
848               
849               Boolean operator==(const String& str1, const String& str2)
850               {
851                   return String::equal(str1, str2);
852               }
853               
854               Boolean operator==(const String& str1, const char* str2)
855               {
856                   return String::equal(str1, str2);
857               }
858               
859               Boolean operator==(const char* str1, const String& str2)
860               {
861                   return String::equal(str1, str2);
862 kumpf    1.39 }
863               
864               Boolean operator!=(const String& str1, const String& str2)
865               {
866                   return !String::equal(str1, str2);
867               }
868               
869 kumpf    1.47 PEGASUS_STD(ostream)& operator<<(PEGASUS_STD(ostream)& os, const String& str)
870 kumpf    1.39 {
871 david    1.72 
872 david    1.69 #if defined(PEGASUS_OS_OS400)
873                   CString cstr = str.getCStringUTF8();
874                   const char* utf8str = cstr;
875               
876                   os << utf8str;
877               
878 humberto 1.76 #elif defined(PEGASUS_HAS_ICU)
879               	
880 david    1.69     char *buf = NULL;
881 humberto 1.76     const int size = str.size() * 6;
882 david    1.69     UnicodeString UniStr((const UChar *)str.getChar16Data(), (int32_t)str.size());
883 humberto 1.76     Uint32 bufsize = UniStr.extract(0,size,buf);
884                   
885 david    1.69     buf = new char[bufsize+1];
886                   UniStr.extract(0,bufsize,buf);
887                   os << buf;
888 humberto 1.75     os.flush();
889 david    1.72     delete [] buf;
890 humberto 1.76 
891 david    1.69 #else
892               
893 gerarda  1.68 
894 kumpf    1.47     for (Uint32 i = 0, n = str.size(); i < n; i++)
895 kumpf    1.50     {
896                       Uint16 code = str[i];
897               
898                       if (code > 0 && code <= PEGASUS_MAX_PRINTABLE_CHAR)
899                       {
900                           os << char(code);
901                       }
902                       else
903                       {
904                           // Print in hex format:
905                           char buffer[8];
906                           sprintf(buffer, "\\x%04X", code);
907                           os << buffer;
908                       }
909                   }
910 david    1.69 #endif // End of PEGASUS_HAS_ICU #else leg.
911 kumpf    1.39 
912                   return os;
913               }
914               
915               String operator+(const String& str1, const String& str2)
916               {
917                   return String(str1).append(str2);
918               }
919               
920               Boolean operator<(const String& str1, const String& str2)
921               {
922 kumpf    1.43     return String::compare(str1, str2) < 0;
923 kumpf    1.39 }
924               
925               Boolean operator<=(const String& str1, const String& str2)
926               {
927 kumpf    1.43     return String::compare(str1, str2) <= 0;
928 kumpf    1.39 }
929               
930               Boolean operator>(const String& str1, const String& str2)
931               {
932 kumpf    1.43     return String::compare(str1, str2) > 0;
933 kumpf    1.39 }
934               
935               Boolean operator>=(const String& str1, const String& str2)
936               {
937 kumpf    1.43     return String::compare(str1, str2) >= 0;
938 kumpf    1.39 }
939               
940 kumpf    1.61 #ifndef PEGASUS_REMOVE_DEPRECATED
941 kumpf    1.39 int CompareNoCase(const char* s1, const char* s2)
942               {
943 kumpf    1.60     return System::strcasecmp(s1, s2);
944 kumpf    1.39 }
945 kumpf    1.60 #endif
946 kumpf    1.39 
947 mike     1.27 PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2