(file) Return to FQLRegularExpression.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / FQL

  1 karl  1.1 //%LICENSE////////////////////////////////////////////////////////////////
  2           //
  3           // Licensed to The Open Group (TOG) under one or more contributor license
  4           // agreements.  Refer to the OpenPegasusNOTICE.txt file distributed with
  5           // this work for additional information regarding copyright ownership.
  6           // Each contributor licenses this file to you under the OpenPegasus Open
  7           // Source License; you may not use this file except in compliance with the
  8           // License.
  9           //
 10           // Permission is hereby granted, free of charge, to any person obtaining a
 11           // copy of this software and associated documentation files (the "Software"),
 12           // to deal in the Software without restriction, including without limitation
 13           // the rights to use, copy, modify, merge, publish, distribute, sublicense,
 14           // and/or sell copies of the Software, and to permit persons to whom the
 15           // Software is furnished to do so, subject to the following conditions:
 16           //
 17           // The above copyright notice and this permission notice shall be included
 18           // in all copies or substantial portions of the Software.
 19           //
 20           // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 21           // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 22 karl  1.1 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 23           // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 24           // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 25           // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 26           // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 27           //
 28           //////////////////////////////////////////////////////////////////////////
 29           //
 30           //%/////////////////////////////////////////////////////////////////////////////
 31           //
 32           /*
 33               Current regex definition the same as CQL Basic LIKE Regular expressions
 34               See DSP0202 v 1.0, section C.1
 35                   Regular Expression Parser.
 36               The CQL parser regular expression is
 37               * matches zero or more instances of the proceeding single character.
 38               . matches any single character
 39               \ Escape the next character (next must be must be *, . or \)
 40               \\ The backslash character
 41           */
 42           
 43 karl  1.1 // TODO: This was taken from CQL and we need to merge the code so that there
 44           // is only one function for the ltwo. KS August 2014
 45           
 46           /*
 47               The goal is to produce a full Regex tool in accord with DSP1001
 48               Annex B
 49           
 50               The FQL regular expressions defined in DSP1001 are a subset of
 51               UNIX Regular Expressions as follows
 52           
 53               The abnf for the regex is documented in the readme in this
 54               directory. NOTE: Today that is the goal.
 55           */
 56           
 57           #include "FQLRegularExpression.h"
 58           #include <Pegasus/Common/Char16.h>
 59           #include <Pegasus/Common/CommonUTF.h>
 60           
 61           PEGASUS_USING_STD;
 62           
 63           // FQL_TOTRACE defined in the Makefile
 64 karl  1.1 #ifdef FQL_DOTRACE
 65           #define DCOUT if (true) cout << __FILE__ << ":" << __LINE__ << " "
 66           #define COMPILE_LOCAL_TRACE
 67           #define DISPSTRANDPAT  DCOUT << "RegularExpression Line " \
 68                                   << " strIndex " << strIndex \
 69                                   << " patIndex " << patIndex << endl
 70           // macro to conditionally display return information
 71           #define MATCHRETURN(RTN_VALUE) \
 72               DCOUT << "RegularExpression rtns  " << RTN_VALUE << " " <<  __LINE__ \
 73                   << " strIndex " << strIndex << " \'" \
 74                   << string.subString(strIndex,1) << "\' " \
 75                   << " string.size() " << string.size() \
 76                   << " patIndex \'" << patIndex << string.subString(patIndex,1) \
 77                   << "\' " << " pattern.size() " << pattern.size()<< endl; \
 78                   return RTN_VALUE
 79           #else
 80           #define MATCHRETURN(rtnValue) return rtnValue
 81           #define DISPSTRANDPAT
 82           #endif
 83           
 84           PEGASUS_NAMESPACE_BEGIN
 85 karl  1.1 
 86           PEGASUS_USING_STD;
 87           
 88           bool testSurrogates(const String & pattern, Uint32 patIndex)
 89           {
 90           return (((Uint16)pattern[patIndex] >= FIRST_HIGH_SURROGATE) &&
 91                   ((Uint16)pattern[patIndex] <= LAST_HIGH_SURROGATE)) ||
 92                   (((Uint16)pattern[patIndex] >= FIRST_LOW_SURROGATE) &&
 93                   ((Uint16)pattern[patIndex] <= LAST_LOW_SURROGATE));
 94           }
 95           
 96           bool matchChar(const String & pattern, const String & string,
 97                           Uint32 patIndex, Uint32 strIndex)
 98           {
 99           #ifdef COMPILE_LOCAL_TRACE
100               DCOUT << "RegularExpressionmatchChar Pattern \"" << pattern
101                    << " pchar " << pattern.subString(patIndex,1)
102                    << "\" String to match \"" << string << "\" "
103                    << " strChar " << string.subString(strIndex,1) <<  endl;
104           #endif
105               if ( (pattern[patIndex] != string[strIndex]))
106 karl  1.1     {
107                   MATCHRETURN(false) ;
108               }
109               else if (pattern[patIndex+1] != string[strIndex+1])
110               {
111                   MATCHRETURN(false) ;
112               }
113               return true;
114           }
115           
116           FQLRegularExpression::FQLRegularExpression(const String& pattern):
117               pattern(pattern)
118           {
119           }
120           
121           FQLRegularExpression::~FQLRegularExpression()
122           {
123           }
124           
125           
126           Boolean FQLRegularExpression::match(const String& string)
127 karl  1.1 {
128               Uint32 patIndex = 0;
129               Uint32 strIndex = 0;
130           
131           #ifdef COMPILE_LOCAL_TRACE
132               DCOUT << "RegularExpression Pattern \"" << pattern
133                    << "\" String to match \"" << string << "\"" << endl;
134           #endif
135               // if either pattern or string are "EMPTY" you have an invalid String
136               if (pattern.size() == 0 || (string.size() == 0))
137               {
138                   MATCHRETURN(false);
139               }
140           
141               while (true)
142               {
143           #ifdef COMPILE_LOCAL_TRACE
144                   DCOUT << "RegularExpression while true "
145                         << " strIndex " << strIndex << " \'"
146                         << string.subString(strIndex,1) << "\' "
147                         << " patIndex \'" << patIndex << string.subString(strIndex,1)
148 karl  1.1               << "\' " << endl;
149           #endif
150                   if ( (string.size() == strIndex) && (pattern.size() == patIndex))
151                   {
152                       MATCHRETURN(true);
153                   }
154           
155                   else if ((string.size() == strIndex) || (pattern.size() == patIndex))
156                   {
157                       MATCHRETURN(false);
158                   }
159           
160                   //  Check if pattern equal to  '.'
161                   if (pattern[patIndex] == '.')
162                   {
163                       DISPSTRANDPAT;
164           
165                       //assumes a valid multi-byte pair has been passed
166                       if (testSurrogates(pattern, patIndex))
167                       {
168                           patIndex ++;
169 karl  1.1                 strIndex ++;
170                       }
171           
172                       strIndex ++;
173                       patIndex ++;
174           
175                   }
176                   // Check if pattern char  equal to '*'
177                   else if (pattern[patIndex] == '*')
178                   {
179                       DISPSTRANDPAT;
180           
181                       // and if first char in pattern, illegal
182                       if(patIndex == 0)
183                       {
184                           MATCHRETURN(false);
185                       }
186                       // and if prev char is escape char, DOT
187                       if (pattern[patIndex-1] == '.')
188                       {
189                           if ((patIndex > 1) && pattern[patIndex-2] =='\\')
190 karl  1.1                 {
191                               if (string[strIndex] != '.')
192                               {
193                                   MATCHRETURN(false);
194                               }
195                           }
196                           else if (pattern.size()-1 == patIndex)
197                           {
198                               MATCHRETURN(true);
199                           }
200                           else if (string.size()-1 == strIndex)
201                           {
202                               MATCHRETURN(false);
203                           }
204                       }
205                       // otherwise if prev char is backslash
206                       else if (pattern[patIndex-1] == '\\')
207                       {
208                           if (pattern[patIndex-2] == '.')
209                           {
210                               if (string[strIndex] != '*')
211 karl  1.1                     {
212                                   MATCHRETURN(false);
213                               }
214                           }
215                       }
216                       else if(testSurrogates(pattern, patIndex-2))
217                       {
218                           if (!matchChar(pattern, string, patIndex-2, strIndex))
219                           {
220                               MATCHRETURN(false);
221                           }
222                           else
223                           {
224                               strIndex ++;
225                           }
226                       }
227                       else if (pattern[patIndex-1] != string[strIndex])
228                       {
229                           MATCHRETURN(false);
230                       }
231           
232 karl  1.1             while (true)
233                       {
234                           DISPSTRANDPAT;
235                           strIndex ++;
236           
237                           if (pattern[patIndex-1] == '.')
238                           {
239                               if ((patIndex > 1) && (pattern[patIndex-2] =='\\'))
240                               {
241                                   if (string[strIndex] != '.')
242                                   {
243                                       patIndex ++;
244                                       break;
245                                   }
246                               }
247                               else if (pattern[patIndex+1] == string[strIndex])
248                               {
249                                   //make copies of the indexes in case you do not reach
250                                   //the end of the string
251                                   int stringOrig = strIndex;
252                                   int patternOrig = patIndex;
253 karl  1.1                         patIndex++;
254           
255                                   if (strIndex == string.size()-1 &&
256                                           patIndex == pattern.size()-1)
257                                   {
258                                       MATCHRETURN(true);
259                                   }
260                                   while (true)
261                                   {
262                                       strIndex++;
263                                       patIndex ++;
264                                       if (pattern[patIndex] != string[strIndex])
265                                       {
266                                           strIndex = stringOrig + 1;
267                                           patIndex = patternOrig;
268                                           break;
269                                       }
270                                       else if (strIndex == string.size()-1 &&
271                                              patIndex == pattern.size()-1)
272                                       {
273                                           break;
274 karl  1.1                             }
275                                       patIndex++;
276                                   }
277                               }
278                           }
279                           else if (pattern[patIndex-1] == '\\')
280                           {
281                               DISPSTRANDPAT;
282           
283                               if (pattern[patIndex-2] == '.')
284                               {
285                                   if (string[strIndex] != '*')
286                                   {
287                                       patIndex ++;
288                                       break;
289                                   }
290                                   if (strIndex == string.size()-1 &&
291                                       patIndex == pattern.size()-1)
292                                   {
293                                       MATCHRETURN(true);
294                                   }
295 karl  1.1                         while (true)
296                                   {
297                                       strIndex ++;
298                                       if (string[strIndex] != '*')
299                                       {
300                                           patIndex ++;
301                                           break;
302                                       }
303                                       if (strIndex == string.size()-1 &&
304                                           patIndex == pattern.size()-1)
305                                       {
306                                           MATCHRETURN(true);
307                                       }
308                                   }
309                               }
310                           }
311           
312                           else if(testSurrogates(pattern, patIndex-2))
313                           {
314                               DISPSTRANDPAT;
315           
316 karl  1.1                     if (!matchChar(pattern, string, patIndex-2, strIndex))
317                               {
318                                   patIndex++;
319                                   break;
320                               }
321                               else
322                               {
323                                   strIndex ++;
324                               }
325                           }
326                           else if (pattern[patIndex-1] != string[strIndex])
327                           {
328                               patIndex ++;
329                               break;
330                           }
331           
332                           if (strIndex == string.size()-1 &&
333                               patIndex == pattern.size()-1)
334                           {
335                               MATCHRETURN(true);
336                           }
337 karl  1.1                 else if (strIndex == string.size()-1)
338                           {
339                               MATCHRETURN(false);
340                           }
341                       }
342                   }
343                   // check if pattern equal to '\', the escape char
344                   else if (pattern[patIndex] == '\\')
345                   {
346                       DISPSTRANDPAT;
347           
348                       patIndex ++;
349                       if (testSurrogates(pattern, patIndex))
350                       {
351                           if (!matchChar(pattern, string, patIndex, strIndex))
352                           {
353                               MATCHRETURN(false);
354                           }
355                           else
356                           {
357                               patIndex ++;
358 karl  1.1                     strIndex ++;
359                           }
360                       }
361                       else
362                       {
363                           DISPSTRANDPAT;
364           
365                           if (pattern[patIndex] != string[strIndex])
366                           {
367                               MATCHRETURN(false);
368                           }
369                           if (strIndex == string.size()-1 &&
370                               patIndex == pattern.size()-1)
371                           {
372                               MATCHRETURN(true);
373                           }
374                           strIndex ++;
375                           patIndex ++;
376                       }
377           
378                   //default, Test next character against pattern for equality
379 karl  1.1         }
380                   else
381                   {
382                       DISPSTRANDPAT;
383                       if (testSurrogates(pattern, patIndex))
384                       {
385                           if (!matchChar(pattern, string, patIndex, strIndex))
386                           {
387                               MATCHRETURN(false);
388                           }
389                           else
390                           {
391                               patIndex ++;
392                               strIndex ++;
393                           }
394                       }
395                       else if (pattern[patIndex] != string[strIndex])
396                       {
397                           MATCHRETURN(false);
398                       }
399                       patIndex ++;
400 karl  1.1             strIndex ++;
401                   }
402               }   // end of while loop
403               PEGASUS_UNREACHABLE( return false; )
404           }
405           
406           PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2