(file) Return to LanguageParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 karl  1.25 //%2006////////////////////////////////////////////////////////////////////////
  2 humberto 1.8  //
  3 karl     1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
  4               // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
  5               // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
  6 karl     1.9  // IBM Corp.; EMC Corporation, The Open Group.
  7 karl     1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
  8               // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
  9 karl     1.11 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 10               // EMC Corporation; VERITAS Software Corporation; The Open Group.
 11 karl     1.25 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 12               // EMC Corporation; Symantec Corporation; The Open Group.
 13 humberto 1.8  //
 14               // Permission is hereby granted, free of charge, to any person obtaining a copy
 15               // of this software and associated documentation files (the "Software"), to
 16               // deal in the Software without restriction, including without limitation the
 17               // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 18               // sell copies of the Software, and to permit persons to whom the Software is
 19               // furnished to do so, subject to the following conditions:
 20               // 
 21               // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 22               // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 23               // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 24               // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 25               // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 26               // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 27               // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 28               // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 29               //
 30               //==============================================================================
 31               //
 32               //%/////////////////////////////////////////////////////////////////////////////
 33               
 34 humberto 1.8  #include <Pegasus/Common/LanguageParser.h>
 35               #include <Pegasus/Common/InternalException.h>
 36               #include <Pegasus/Common/Tracer.h>
 37 kumpf    1.30 #include <Pegasus/Common/MessageLoader.h>
 38 humberto 1.8  #include <cstring>
 39               
 40 kumpf    1.21 #ifdef PEGASUS_HAS_ICU
 41               # include <unicode/locid.h>
 42               #endif
 43               
 44 humberto 1.8  PEGASUS_NAMESPACE_BEGIN
 45               
 46 kumpf    1.21 static char LANGUAGE_TAG_SEPARATOR_CHAR = '-';
 47 kumpf    1.26 static char LOCALE_ID_SEPARATOR_CHAR = '_';
 48 kumpf    1.21 
 49 kumpf    1.23 AcceptLanguageList LanguageParser::parseAcceptLanguageHeader(
 50 kumpf    1.21     const String& acceptLanguageHeader)
 51               {
 52                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseAcceptLanguageHeader");
 53               
 54 kumpf    1.23     AcceptLanguageList acceptLanguages;
 55 kumpf    1.21 
 56                   try
 57                   {
 58                       Array<String> languageElements;
 59                       LanguageParser::_parseLanguageHeader(
 60                           acceptLanguageHeader,
 61                           languageElements);
 62               
 63                       for (Uint32 i = 0; i < languageElements.size(); i++)
 64                       {
 65                           String languageTagString;
 66                           Real32 qualityValue;
 67                           LanguageParser::_parseAcceptLanguageElement(
 68                               languageElements[i], languageTagString, qualityValue);
 69 kumpf    1.30             acceptLanguages.insert(
 70                               LanguageTag(languageTagString), qualityValue);
 71 kumpf    1.21         }
 72                   }
 73                   catch (Exception& e)
 74                   {
 75                       throw InvalidAcceptLanguageHeader(e.getMessage());
 76                   }
 77               
 78                   PEG_METHOD_EXIT();
 79                   return acceptLanguages;
 80               }
 81               
 82 kumpf    1.23 ContentLanguageList LanguageParser::parseContentLanguageHeader(
 83 kumpf    1.21     const String& contentLanguageHeader)
 84               {
 85                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseContentLanguageHeader");
 86               
 87 kumpf    1.23     ContentLanguageList contentLanguages;
 88 kumpf    1.21 
 89                   try
 90                   {
 91                       Array<String> languageElements;
 92                       LanguageParser::_parseLanguageHeader(
 93                           contentLanguageHeader,
 94                           languageElements);
 95               
 96                       for (Uint32 i = 0; i < languageElements.size(); i++)
 97                       {
 98                           contentLanguages.append(LanguageTag(languageElements[i]));
 99                       }
100                   }
101                   catch (Exception& e)
102                   {
103                       throw InvalidContentLanguageHeader(e.getMessage());
104                   }
105               
106                   PEG_METHOD_EXIT();
107                   return contentLanguages;
108               }
109 kumpf    1.21 
110               void LanguageParser::parseLanguageTag(
111 kumpf    1.24     const String& languageTagString,
112 kumpf    1.21     String& language,
113                   String& country,
114                   String& variant)
115               {
116                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseLanguageTag");
117               
118                   language.clear();
119                   country.clear();
120                   variant.clear();
121               
122                   if (languageTagString == "*")
123                   {
124                       // Parsing and validation is complete
125                       PEG_METHOD_EXIT();
126                       return;
127                   }
128               
129                   Boolean isStandardFormat = true;    // RFC 3066 (ISO 639, ISO 3166)
130                   Array<String> subtags;
131               
132                   _parseLanguageSubtags(subtags, languageTagString);
133 kumpf    1.21 
134                   // _parseLanguageSubtags() always returns at least one subtag.
135                   PEGASUS_ASSERT(subtags.size() > 0);
136               
137                   // Validate the primary subtag.
138                   // Given a languageTagString "en-US-mn" the language is "en".
139               
140                   language = subtags[0];
141               
142                   if ((language == "i") || (language == "x"))
143                   {
144                       // These primary tags are allowed, but are not ISO 639 compliant
145                       isStandardFormat = false;
146                       language.clear();
147                   }
148                   else if ((language.size() != 2) && (language.size() != 3))
149                   {
150                       // Except for "i" and "x", primary tags must be 2 or 3 characters,
151                       // according to RFC 3066.
152 kumpf    1.33 
153                       // Do not localize this message; it could cause recursion.
154 kumpf    1.21         PEG_METHOD_EXIT();
155 kumpf    1.33         throw Exception(Formatter::format(
156                           "Invalid language tag \"$0\".",
157                           languageTagString));
158 kumpf    1.21     }
159               
160                   if (subtags.size() == 1)
161                   {
162                       // If only the primary subtag is present, we are done!
163                       PEG_METHOD_EXIT();
164                       return;
165                   }
166               
167                   // Validate the second subtag.
168                   // Given a languageTagString "en-US-mn" the country is "US".
169               
170                   if (subtags[1].size() == 1)
171                   {
172                       // The second subtag may not be a single character according to
173                       // RFC 3066.
174 kumpf    1.33 
175                       // Do not localize this message; it could cause recursion.
176 kumpf    1.21         PEG_METHOD_EXIT();
177 kumpf    1.33         throw Exception(Formatter::format(
178                           "Invalid language tag \"$0\".",
179                           languageTagString));
180 kumpf    1.21     }
181               
182                   if (isStandardFormat)
183                   {
184                       Uint32 variantIndex = 1;
185               
186                       if (subtags[1].size() == 2)
187                       {
188                           country = subtags[1];
189                           variantIndex = 2;
190                       }
191               
192                       Uint32 numSubtags = subtags.size();
193               
194                       if (variantIndex < numSubtags)
195                       {
196                           variant = subtags[variantIndex++];
197               
198                           while (variantIndex < numSubtags)
199                           {
200                               variant.append(LANGUAGE_TAG_SEPARATOR_CHAR);
201 kumpf    1.21                 variant.append(subtags[variantIndex++]);
202                           }
203                       }
204                   }
205               
206                   PEG_METHOD_EXIT();
207               }
208               
209               void LanguageParser::validateQualityValue(Real32 quality)
210               {
211                   if ((quality > 1.0) || (quality < 0.0))
212                   {
213                       MessageLoaderParms parms(
214                           "Common.LanguageParser.INVALID_QUALITY_VALUE",
215                           "AcceptLanguage contains an invalid quality value");
216                       throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
217                   }
218               }
219               
220               String LanguageParser::buildAcceptLanguageHeader(
221 kumpf    1.23     const AcceptLanguageList& acceptLanguages)
222 kumpf    1.21 {
223                   String alString;
224                   Uint32 numAcceptLanguages = acceptLanguages.size();
225               
226                   for (Uint32 i = 0; i < numAcceptLanguages; i++)
227                   {
228                       alString.append(acceptLanguages.getLanguageTag(i).toString());
229               
230                       Real32 q = acceptLanguages.getQualityValue(i);
231                       if (q != 1.0)
232                       {
233                           char qValueString[6];
234                           sprintf(qValueString, "%4.3f", q);
235                           alString.append(";q=");
236                           alString.append(qValueString);
237                       }
238               
239                       if (i < numAcceptLanguages - 1)
240                       {
241                           alString.append(",");
242                       }
243 kumpf    1.21     }
244               
245                   return alString;
246               }
247               
248               String LanguageParser::buildContentLanguageHeader(
249 kumpf    1.23     const ContentLanguageList& contentLanguages)
250 kumpf    1.21 {
251                   String clString;
252                   Uint32 numContentLanguages = contentLanguages.size();
253               
254                   for (Uint32 i = 0; i < numContentLanguages; i++)
255                   {
256                       clString.append(contentLanguages.getLanguageTag(i).toString());
257               
258                       if (i < numContentLanguages - 1)
259                       {
260                           clString.append(",");
261                       }
262                   }
263               
264                   return clString;
265               }
266               
267 kumpf    1.26 #ifdef PEGASUS_HAS_ICU
268               String& LanguageParser::convertLocaleIdToLanguageTag(String& localeId)
269               {
270                   Uint32 index = 0;
271                   while ((index = localeId.find(index, LOCALE_ID_SEPARATOR_CHAR)) !=
272                               PEG_NOT_FOUND)
273                   {
274                       localeId[index] = LANGUAGE_TAG_SEPARATOR_CHAR;
275                   }
276                   return localeId;
277               }
278               #endif
279               
280 kumpf    1.23 AcceptLanguageList LanguageParser::getDefaultAcceptLanguages()
281 kumpf    1.21 {
282               #if defined(PEGASUS_HAS_MESSAGES) && defined(PEGASUS_HAS_ICU)
283                   Locale default_loc = Locale::getDefault();
284               
285 kumpf    1.26     String localeId = default_loc.getName();
286               
287 kumpf    1.21     try
288                   {
289 kumpf    1.26         return LanguageParser::parseAcceptLanguageHeader(
290                           convertLocaleIdToLanguageTag(localeId));
291 kumpf    1.21     }
292                   catch (const InvalidAcceptLanguageHeader& e)
293                   {
294                       Logger::put_l(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
295 kumpf    1.34             MessageLoaderParms(
296                               "src.Server.cimserver.FAILED_TO_SET_PROCESS_LOCALE",
297                               "Cannot convert the system process locale into a valid "
298                                   "Accept-Language format."));
299 kumpf    1.21         Logger::put(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
300                           e.getMessage());
301 kumpf    1.23         AcceptLanguageList al;
302 kumpf    1.22         al.insert(LanguageTag("*"), 1);
303                       return al;
304 kumpf    1.21     }
305 kumpf    1.22 #else
306 kumpf    1.23     return AcceptLanguageList();
307 kumpf    1.21 #endif
308               }
309               
310               void LanguageParser::_parseLanguageHeader(
311                   const String& languageHeaderValue,
312                   Array<String>& languageElements)
313               {
314                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageHeader");
315               
316                   languageElements.clear();
317                   String element;
318               
319                   for (Uint32 i=0, len=languageHeaderValue.size(); i<len; i++)
320                   {
321                       Char16 nextChar = languageHeaderValue[i];
322               
323                       if (isascii(nextChar) && isspace(nextChar))
324                       {
325                           // Ignore whitespace
326                       }
327                       else if (nextChar == '(')
328 kumpf    1.21         {
329                           // Ignore comments
330                           while (i < len)
331                           {
332                               // Search for the closing parenthesis
333                               if (languageHeaderValue[i] == ')')
334                               {
335                                   break;
336                               }
337               
338                               // Skip over escape characters
339                               if (languageHeaderValue[i] == '\\')
340                               {
341                                   i++;
342                               }
343               
344                               i++;
345                           }
346               
347                           // Check for a missing closing parenthesis
348                           if (i >= len)
349 kumpf    1.21             {
350                               MessageLoaderParms parms(
351                                   "Common.LanguageParser.DOES_NOT_CONTAIN_TERMINATING",
352                                   "Closing \")\" character is missing.");
353                               throw Exception(MessageLoader::getMessage(parms));
354                           }
355                       }
356                       else if (nextChar == ',')
357                       {
358                           // Check for the end of the element
359                           languageElements.append(element);
360                           element.clear();
361                       }
362                       else
363                       {
364                           // Unescape an escape character
365                           if ((nextChar == '\\') && (i < len-1))
366                           {
367                               nextChar = languageHeaderValue[++i];
368                           }
369               
370 kumpf    1.21             // Include this character in the value
371                           element.append(nextChar);
372                       }
373                   }
374               
375                   // Include the last element in the languageElements array
376                   languageElements.append(element);
377               
378                   PEG_METHOD_EXIT();
379               }
380               
381               void LanguageParser::_parseAcceptLanguageElement(
382                   const String& acceptLanguageElement,
383                   String& languageTag,
384                   Real32& quality)
385               {
386                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseAcceptLanguageElement");
387               
388                   // look for ';' in acceptLanguageElement, that means we have a
389                   // quality value to capture.  If not present, we only have a language tag.
390               
391 kumpf    1.21     Uint32 semicolonIndex = acceptLanguageElement.find(";");
392                   if (semicolonIndex != PEG_NOT_FOUND)
393                   {
394                       // Separate the language tag and quality value
395               
396                       String qualityString =
397                           acceptLanguageElement.subString(semicolonIndex+1);
398                       languageTag = acceptLanguageElement.subString(0, semicolonIndex);
399               
400                       // Parse the quality value
401 humberto 1.8  
402 kumpf    1.21         char dummyChar;
403                       int scanfConversions = sscanf(
404                           qualityString.getCString(),
405                           "q=%f%c", &quality, &dummyChar);
406               
407                       if ((scanfConversions != 1) ||
408                           (qualityString.size() > 7))
409                       {
410                           MessageLoaderParms parms(
411                               "Common.LanguageParser.INVALID_QUALITY_VALUE",
412                               "AcceptLanguage contains an invalid quality value");
413                           PEG_METHOD_EXIT();
414                           throw Exception(MessageLoader::getMessage(parms));
415                       }
416                   }
417                   else
418                   {
419                       languageTag = acceptLanguageElement;
420                       quality = 1.0;
421                   }
422               
423 kumpf    1.21     PEG_METHOD_EXIT();
424               }
425               
426               void LanguageParser::_parseLanguageSubtags(
427                   Array<String>& subtags,
428                   const String& languageTagString)
429               {
430                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageSubtags");
431               
432                   // Parse the language tag into subtags
433               
434                   Uint32 subtagIndex = 0;
435                   Uint32 separatorIndex;
436                   while ((separatorIndex = languageTagString.find(
437                               subtagIndex, LANGUAGE_TAG_SEPARATOR_CHAR)) != PEG_NOT_FOUND)
438                   {
439                       subtags.append(languageTagString.subString(
440                           subtagIndex, separatorIndex - subtagIndex));
441                       subtagIndex = separatorIndex + 1;
442                   }
443                   subtags.append(languageTagString.subString(subtagIndex));
444 kumpf    1.21 
445                   // Validate the syntax of each of the subtags
446               
447                   for (Uint32 i = 0, n = subtags.size(); i < n; i++)
448                   {
449                       if (((i == 0) && !_isValidPrimarySubtagSyntax(subtags[i])) ||
450                           ((i > 0) && !_isValidSubtagSyntax(subtags[i])))
451                       {
452                           MessageLoaderParms parms(
453                               "Common.LanguageParser.MALFORMED_LANGUAGE_TAG",
454                               "Malformed language tag \"$0\".", languageTagString);
455                           PEG_METHOD_EXIT();
456 marek    1.29             // throw Exception(MessageLoader::getMessage(parms));
457 marek    1.28             // do not localize message, requires a language tag for this
458                           // localization can cause recursion here
459                           throw Exception(parms.toString());
460 kumpf    1.21         }
461                   }
462               
463                   PEG_METHOD_EXIT();
464 humberto 1.8  }
465               
466 kumpf    1.21 Boolean LanguageParser::_isValidPrimarySubtagSyntax(const String& subtag)
467               {
468                   if ((subtag.size() == 0) || (subtag.size() > 8))
469                   {
470                       return false;
471                   }
472               
473                   for (Uint32 i = 0, n = subtag.size(); i < n; i++)
474                   {
475                       if (!(isascii(subtag[i]) && isalpha(subtag[i])))
476                       {
477                           return false;
478                       }
479                   }
480               
481                   return true;
482               }
483               
484               Boolean LanguageParser::_isValidSubtagSyntax(const String& subtag)
485               {
486                   if ((subtag.size() == 0) || (subtag.size() > 8))
487 kumpf    1.21     {
488                       return false;
489                   }
490               
491                   for (Uint32 i = 0, n = subtag.size(); i < n; i++)
492                   {
493                       if (!(isascii(subtag[i]) && isalnum(subtag[i])))
494                       {
495                           return false;
496                       }
497                   }
498               
499                   return true;
500               }
501 humberto 1.8  
502               PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2