1 karl 1.1 //%LICENSE////////////////////////////////////////////////////////////////
2 //
3 // Licensed to The Open Group (TOG) under one or more contributor license
4 // agreements. Refer to the OpenPegasusNOTICE.txt file distributed with
5 // this work for additional information regarding copyright ownership.
6 // Each contributor licenses this file to you under the OpenPegasus Open
7 // Source License; you may not use this file except in compliance with the
8 // License.
9 //
10 // Permission is hereby granted, free of charge, to any person obtaining a
11 // copy of this software and associated documentation files (the "Software"),
12 // to deal in the Software without restriction, including without limitation
13 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
14 // and/or sell copies of the Software, and to permit persons to whom the
15 // Software is furnished to do so, subject to the following conditions:
16 //
17 // The above copyright notice and this permission notice shall be included
18 // in all copies or substantial portions of the Software.
19 //
20 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 karl 1.1 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
24 // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 //
28 //////////////////////////////////////////////////////////////////////////
29 //
30 //%/////////////////////////////////////////////////////////////////////////////
31 //
32 /*
33 Current regex definition the same as CQL Basic LIKE Regular expressions
34 See DSP0202 v 1.0, section C.1
35 Regular Expression Parser.
36 The CQL parser regular expression is
37 * matches zero or more instances of the proceeding single character.
38 . matches any single character
39 \ Escape the next character (next must be must be *, . or \)
40 \\ The backslash character
41 */
42
43 karl 1.1 // TODO: This was taken from CQL and we need to merge the code so that there
44 // is only one function for the ltwo. KS August 2014
45
46 /*
47 The goal is to produce a full Regex tool in accord with DSP1001
48 Annex B
49
50 The FQL regular expressions defined in DSP1001 are a subset of
51 UNIX Regular Expressions as follows
52
53 The abnf for the regex is documented in the readme in this
54 directory. NOTE: Today that is the goal.
55 */
56
57 #include "FQLRegularExpression.h"
58 #include <Pegasus/Common/Char16.h>
59 #include <Pegasus/Common/CommonUTF.h>
60
61 PEGASUS_USING_STD;
62
63 // FQL_TOTRACE defined in the Makefile
64 karl 1.1 #ifdef FQL_DOTRACE
65 #define DCOUT if (true) cout << __FILE__ << ":" << __LINE__ << " "
66 #define COMPILE_LOCAL_TRACE
67 #define DISPSTRANDPAT DCOUT << "RegularExpression Line " \
68 << " strIndex " << strIndex \
69 << " patIndex " << patIndex << endl
70 // macro to conditionally display return information
71 #define MATCHRETURN(RTN_VALUE) \
72 DCOUT << "RegularExpression rtns " << RTN_VALUE << " " << __LINE__ \
73 << " strIndex " << strIndex << " \'" \
74 << string.subString(strIndex,1) << "\' " \
75 << " string.size() " << string.size() \
76 << " patIndex \'" << patIndex << string.subString(patIndex,1) \
77 << "\' " << " pattern.size() " << pattern.size()<< endl; \
78 return RTN_VALUE
79 #else
80 #define MATCHRETURN(rtnValue) return rtnValue
81 #define DISPSTRANDPAT
82 #endif
83
84 PEGASUS_NAMESPACE_BEGIN
85 karl 1.1
86 PEGASUS_USING_STD;
87
88 bool testSurrogates(const String & pattern, Uint32 patIndex)
89 {
90 return (((Uint16)pattern[patIndex] >= FIRST_HIGH_SURROGATE) &&
91 ((Uint16)pattern[patIndex] <= LAST_HIGH_SURROGATE)) ||
92 (((Uint16)pattern[patIndex] >= FIRST_LOW_SURROGATE) &&
93 ((Uint16)pattern[patIndex] <= LAST_LOW_SURROGATE));
94 }
95
96 bool matchChar(const String & pattern, const String & string,
97 Uint32 patIndex, Uint32 strIndex)
98 {
99 #ifdef COMPILE_LOCAL_TRACE
100 DCOUT << "RegularExpressionmatchChar Pattern \"" << pattern
101 << " pchar " << pattern.subString(patIndex,1)
102 << "\" String to match \"" << string << "\" "
103 << " strChar " << string.subString(strIndex,1) << endl;
104 #endif
105 if ( (pattern[patIndex] != string[strIndex]))
106 karl 1.1 {
107 MATCHRETURN(false) ;
108 }
109 else if (pattern[patIndex+1] != string[strIndex+1])
110 {
111 MATCHRETURN(false) ;
112 }
113 return true;
114 }
115
116 FQLRegularExpression::FQLRegularExpression(const String& pattern):
117 pattern(pattern)
118 {
119 }
120
121 FQLRegularExpression::~FQLRegularExpression()
122 {
123 }
124
125
126 Boolean FQLRegularExpression::match(const String& string)
127 karl 1.1 {
128 Uint32 patIndex = 0;
129 Uint32 strIndex = 0;
130
131 #ifdef COMPILE_LOCAL_TRACE
132 DCOUT << "RegularExpression Pattern \"" << pattern
133 << "\" String to match \"" << string << "\"" << endl;
134 #endif
135 // if either pattern or string are "EMPTY" you have an invalid String
136 if (pattern.size() == 0 || (string.size() == 0))
137 {
138 MATCHRETURN(false);
139 }
140
141 while (true)
142 {
143 #ifdef COMPILE_LOCAL_TRACE
144 DCOUT << "RegularExpression while true "
145 << " strIndex " << strIndex << " \'"
146 << string.subString(strIndex,1) << "\' "
147 << " patIndex \'" << patIndex << string.subString(strIndex,1)
148 karl 1.1 << "\' " << endl;
149 #endif
150 if ( (string.size() == strIndex) && (pattern.size() == patIndex))
151 {
152 MATCHRETURN(true);
153 }
154
155 else if ((string.size() == strIndex) || (pattern.size() == patIndex))
156 {
157 MATCHRETURN(false);
158 }
159
160 // Check if pattern equal to '.'
161 if (pattern[patIndex] == '.')
162 {
163 DISPSTRANDPAT;
164
165 //assumes a valid multi-byte pair has been passed
166 if (testSurrogates(pattern, patIndex))
167 {
168 patIndex ++;
169 karl 1.1 strIndex ++;
170 }
171
172 strIndex ++;
173 patIndex ++;
174
175 }
176 // Check if pattern char equal to '*'
177 else if (pattern[patIndex] == '*')
178 {
179 DISPSTRANDPAT;
180
181 // and if first char in pattern, illegal
182 if(patIndex == 0)
183 {
184 MATCHRETURN(false);
185 }
186 // and if prev char is escape char, DOT
187 if (pattern[patIndex-1] == '.')
188 {
189 if ((patIndex > 1) && pattern[patIndex-2] =='\\')
190 karl 1.1 {
191 if (string[strIndex] != '.')
192 {
193 MATCHRETURN(false);
194 }
195 }
196 else if (pattern.size()-1 == patIndex)
197 {
198 MATCHRETURN(true);
199 }
200 else if (string.size()-1 == strIndex)
201 {
202 MATCHRETURN(false);
203 }
204 }
205 // otherwise if prev char is backslash
206 else if (pattern[patIndex-1] == '\\')
207 {
208 if (pattern[patIndex-2] == '.')
209 {
210 if (string[strIndex] != '*')
211 karl 1.1 {
212 MATCHRETURN(false);
213 }
214 }
215 }
216 else if(testSurrogates(pattern, patIndex-2))
217 {
218 if (!matchChar(pattern, string, patIndex-2, strIndex))
219 {
220 MATCHRETURN(false);
221 }
222 else
223 {
224 strIndex ++;
225 }
226 }
227 else if (pattern[patIndex-1] != string[strIndex])
228 {
229 MATCHRETURN(false);
230 }
231
232 karl 1.1 while (true)
233 {
234 DISPSTRANDPAT;
235 strIndex ++;
236
237 if (pattern[patIndex-1] == '.')
238 {
239 if ((patIndex > 1) && (pattern[patIndex-2] =='\\'))
240 {
241 if (string[strIndex] != '.')
242 {
243 patIndex ++;
244 break;
245 }
246 }
247 else if (pattern[patIndex+1] == string[strIndex])
248 {
249 //make copies of the indexes in case you do not reach
250 //the end of the string
251 int stringOrig = strIndex;
252 int patternOrig = patIndex;
253 karl 1.1 patIndex++;
254
255 if (strIndex == string.size()-1 &&
256 patIndex == pattern.size()-1)
257 {
258 MATCHRETURN(true);
259 }
260 while (true)
261 {
262 strIndex++;
263 patIndex ++;
264 if (pattern[patIndex] != string[strIndex])
265 {
266 strIndex = stringOrig + 1;
267 patIndex = patternOrig;
268 break;
269 }
270 else if (strIndex == string.size()-1 &&
271 patIndex == pattern.size()-1)
272 {
273 break;
274 karl 1.1 }
275 patIndex++;
276 }
277 }
278 }
279 else if (pattern[patIndex-1] == '\\')
280 {
281 DISPSTRANDPAT;
282
283 if (pattern[patIndex-2] == '.')
284 {
285 if (string[strIndex] != '*')
286 {
287 patIndex ++;
288 break;
289 }
290 if (strIndex == string.size()-1 &&
291 patIndex == pattern.size()-1)
292 {
293 MATCHRETURN(true);
294 }
295 karl 1.1 while (true)
296 {
297 strIndex ++;
298 if (string[strIndex] != '*')
299 {
300 patIndex ++;
301 break;
302 }
303 if (strIndex == string.size()-1 &&
304 patIndex == pattern.size()-1)
305 {
306 MATCHRETURN(true);
307 }
308 }
309 }
310 }
311
312 else if(testSurrogates(pattern, patIndex-2))
313 {
314 DISPSTRANDPAT;
315
316 karl 1.1 if (!matchChar(pattern, string, patIndex-2, strIndex))
317 {
318 patIndex++;
319 break;
320 }
321 else
322 {
323 strIndex ++;
324 }
325 }
326 else if (pattern[patIndex-1] != string[strIndex])
327 {
328 patIndex ++;
329 break;
330 }
331
332 if (strIndex == string.size()-1 &&
333 patIndex == pattern.size()-1)
334 {
335 MATCHRETURN(true);
336 }
337 karl 1.1 else if (strIndex == string.size()-1)
338 {
339 MATCHRETURN(false);
340 }
341 }
342 }
343 // check if pattern equal to '\', the escape char
344 else if (pattern[patIndex] == '\\')
345 {
346 DISPSTRANDPAT;
347
348 patIndex ++;
349 if (testSurrogates(pattern, patIndex))
350 {
351 if (!matchChar(pattern, string, patIndex, strIndex))
352 {
353 MATCHRETURN(false);
354 }
355 else
356 {
357 patIndex ++;
358 karl 1.1 strIndex ++;
359 }
360 }
361 else
362 {
363 DISPSTRANDPAT;
364
365 if (pattern[patIndex] != string[strIndex])
366 {
367 MATCHRETURN(false);
368 }
369 if (strIndex == string.size()-1 &&
370 patIndex == pattern.size()-1)
371 {
372 MATCHRETURN(true);
373 }
374 strIndex ++;
375 patIndex ++;
376 }
377
378 //default, Test next character against pattern for equality
379 karl 1.1 }
380 else
381 {
382 DISPSTRANDPAT;
383 if (testSurrogates(pattern, patIndex))
384 {
385 if (!matchChar(pattern, string, patIndex, strIndex))
386 {
387 MATCHRETURN(false);
388 }
389 else
390 {
391 patIndex ++;
392 strIndex ++;
393 }
394 }
395 else if (pattern[patIndex] != string[strIndex])
396 {
397 MATCHRETURN(false);
398 }
399 patIndex ++;
400 karl 1.1 strIndex ++;
401 }
402 } // end of while loop
403 PEGASUS_UNREACHABLE( return false; )
404 }
405
406 PEGASUS_NAMESPACE_END
|