1 martin 1.10 //%LICENSE////////////////////////////////////////////////////////////////
|
2 martin 1.11 //
|
3 martin 1.10 // Licensed to The Open Group (TOG) under one or more contributor license
4 // agreements. Refer to the OpenPegasusNOTICE.txt file distributed with
5 // this work for additional information regarding copyright ownership.
6 // Each contributor licenses this file to you under the OpenPegasus Open
7 // Source License; you may not use this file except in compliance with the
8 // License.
|
9 martin 1.11 //
|
10 martin 1.10 // Permission is hereby granted, free of charge, to any person obtaining a
11 // copy of this software and associated documentation files (the "Software"),
12 // to deal in the Software without restriction, including without limitation
13 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 // and/or sell copies of the Software, and to permit persons to whom the
15 // Software is furnished to do so, subject to the following conditions:
|
16 martin 1.11 //
|
17 martin 1.10 // The above copyright notice and this permission notice shall be included
18 // in all copies or substantial portions of the Software.
|
19 martin 1.11 //
|
20 martin 1.10 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
21 martin 1.11 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22 martin 1.10 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27 martin 1.11 //
|
28 martin 1.10 //////////////////////////////////////////////////////////////////////////
|
29 mike 1.2 //
30 //%/////////////////////////////////////////////////////////////////////////////
31
32 #ifndef _Pegasus_StringRep_h
33 #define _Pegasus_StringRep_h
34
|
35 mike 1.4 #include <Pegasus/Common/Config.h>
36 #include <Pegasus/Common/AtomicInt.h>
|
37 thilo.boehm 1.12 #include <Pegasus/Common/CommonUTF.h>
|
38 mike 1.2 #include <new>
39
40 PEGASUS_NAMESPACE_BEGIN
41
|
42 mike 1.7 struct PEGASUS_COMMON_LINKAGE StringRep
|
43 mike 1.2 {
44 StringRep();
45
46 ~StringRep();
47
48 static StringRep* alloc(size_t cap);
49
50 static void free(StringRep* rep);
51
52 static StringRep* create(const Uint16* data, size_t size);
53
54 static StringRep* create(const char* data, size_t size);
55
56 static StringRep* copyOnWrite(StringRep* rep);
57
58 static Uint32 length(const Uint16* str);
59
60 static void ref(const StringRep* rep);
61
62 static void unref(const StringRep* rep);
63
64 mike 1.2 static StringRep _emptyRep;
65
66 // Number of characters in this string, excluding the null terminator.
67 size_t size;
68
69 // Number of characters this representation has room for. This is
70 // greater or equal to size.
71 size_t cap;
72
73 // Number of string refering to this StringRep (1, 2, etc).
74 AtomicInt refs;
75
76 // The first character in the string. Extra space is allocated off the
77 // end of this structure for additional characters.
78 Uint16 data[1];
79 };
80
81 inline void StringRep::free(StringRep* rep)
82 {
83 rep->refs.~AtomicInt();
84 ::operator delete(rep);
85 mike 1.2 }
86
87 inline StringRep::StringRep() : size(0), cap(0), refs(2)
88 {
89 // Only called on _emptyRep. We set the reference count to two to
90 // keep a String from modifying it (if the reference count were one,
91 // a string would think it was the sole owner of the StringRep object).
92 data[0] = 0;
93 }
94
95 inline StringRep::~StringRep()
96 {
97 // Only called on _emptyRep.
98 }
99
100 inline void StringRep::ref(const StringRep* rep)
101 {
102 if (rep != &StringRep::_emptyRep)
103 ((StringRep*)rep)->refs++;
104 }
105
106 mike 1.2 inline void StringRep::unref(const StringRep* rep)
107 {
|
108 kumpf 1.9 if (rep != &StringRep::_emptyRep &&
109 ((StringRep*)rep)->refs.decAndTestIfZero())
|
110 mike 1.2 StringRep::free((StringRep*)rep);
111 }
112
113 PEGASUS_COMMON_LINKAGE void StringThrowOutOfBounds();
114
115 PEGASUS_COMMON_LINKAGE void StringAppendCharAux(StringRep*& _rep);
116
117 PEGASUS_COMMON_LINKAGE Boolean StringEqualNoCase(
118 const String& s1, const String& s2);
119
120 PEGASUS_COMMON_LINKAGE Uint32 StringFindAux(
121 const StringRep* _rep, const Char16* s, Uint32 n);
122
123 inline void _checkBounds(size_t index, size_t size)
124 {
125 if (index > size)
126 StringThrowOutOfBounds();
127 }
128
|
129 thilo.boehm 1.12 template<class P, class Q>
130 static void _copy(P* p, const Q* q, size_t n)
131 {
132 // The following employs loop unrolling for efficiency. Please do not
133 // eliminate.
134
135 while (n >= 8)
136 {
137 p[0] = q[0];
138 p[1] = q[1];
139 p[2] = q[2];
140 p[3] = q[3];
141 p[4] = q[4];
142 p[5] = q[5];
143 p[6] = q[6];
144 p[7] = q[7];
145 p += 8;
146 q += 8;
147 n -= 8;
148 }
149
150 thilo.boehm 1.12 while (n >= 4)
151 {
152 p[0] = q[0];
153 p[1] = q[1];
154 p[2] = q[2];
155 p[3] = q[3];
156 p += 4;
157 q += 4;
158 n -= 4;
159 }
160
161 while (n--)
162 *p++ = *q++;
163 }
164
165 static size_t _copyFromUTF8(
166 Uint16* dest,
167 const char* src,
168 size_t n,
169 size_t& utf8_error_index)
170 {
171 thilo.boehm 1.12 Uint16* p = dest;
172 const Uint8* q = (const Uint8*)src;
173
174 // Process leading 7-bit ASCII characters (to avoid UTF8 overhead later).
175 // Use loop-unrolling.
176
177 while (n >=8 && ((q[0]|q[1]|q[2]|q[3]|q[4]|q[5]|q[6]|q[7]) & 0x80) == 0)
178 {
179 p[0] = q[0];
180 p[1] = q[1];
181 p[2] = q[2];
182 p[3] = q[3];
183 p[4] = q[4];
184 p[5] = q[5];
185 p[6] = q[6];
186 p[7] = q[7];
187 p += 8;
188 q += 8;
189 n -= 8;
190 }
191
192 thilo.boehm 1.12 while (n >=4 && ((q[0]|q[1]|q[2]|q[3]) & 0x80) == 0)
193 {
194 p[0] = q[0];
195 p[1] = q[1];
196 p[2] = q[2];
197 p[3] = q[3];
198 p += 4;
199 q += 4;
200 n -= 4;
201 }
202
203 switch (n)
204 {
205 case 0:
206 return p - dest;
207 case 1:
208 if (q[0] < 128)
209 {
210 p[0] = q[0];
211 return p + 1 - dest;
212 }
213 thilo.boehm 1.12 break;
214 case 2:
215 if (((q[0]|q[1]) & 0x80) == 0)
216 {
217 p[0] = q[0];
218 p[1] = q[1];
219 return p + 2 - dest;
220 }
221 break;
222 case 3:
223 if (((q[0]|q[1]|q[2]) & 0x80) == 0)
224 {
225 p[0] = q[0];
226 p[1] = q[1];
227 p[2] = q[2];
228 return p + 3 - dest;
229 }
230 break;
231 }
232
233 // Process remaining characters.
234 thilo.boehm 1.12
235 while (n)
236 {
237 // Optimize for 7-bit ASCII case.
238
239 if (*q < 128)
240 {
241 *p++ = *q++;
242 n--;
243 }
244 else
245 {
246 Uint8 c = UTF_8_COUNT_TRAIL_BYTES(*q) + 1;
247
248 if (c > n || !isValid_U8(q, c) ||
249 UTF8toUTF16(&q, q + c, &p, p + n) != 0)
250 {
251 utf8_error_index = q - (const Uint8*)src;
252 return size_t(-1);
253 }
254
255 thilo.boehm 1.12 n -= c;
256 }
257 }
258
259 return p - dest;
260 }
261
262 static inline size_t _convert(
263 Uint16* p, const char* q, size_t n, size_t& utf8_error_index)
264 {
265 #ifdef PEGASUS_STRING_NO_UTF8
266 _copy(p, q, n);
267 return n;
268 #else
269 return _copyFromUTF8(p, q, n, utf8_error_index);
270 #endif
271 }
272
|
273 mike 1.2 PEGASUS_NAMESPACE_END
274
275 #endif /* _Pegasus_StringRep_h */
|