(file) Return to LanguageParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 karl  1.25 //%2006////////////////////////////////////////////////////////////////////////
  2 humberto 1.8  //
  3 karl     1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
  4               // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
  5               // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
  6 karl     1.9  // IBM Corp.; EMC Corporation, The Open Group.
  7 karl     1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
  8               // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
  9 karl     1.11 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 10               // EMC Corporation; VERITAS Software Corporation; The Open Group.
 11 karl     1.25 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 12               // EMC Corporation; Symantec Corporation; The Open Group.
 13 humberto 1.8  //
 14               // Permission is hereby granted, free of charge, to any person obtaining a copy
 15               // of this software and associated documentation files (the "Software"), to
 16               // deal in the Software without restriction, including without limitation the
 17               // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 18               // sell copies of the Software, and to permit persons to whom the Software is
 19               // furnished to do so, subject to the following conditions:
 20               // 
 21               // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 22               // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 23               // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 24               // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 25               // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 26               // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 27               // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 28               // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 29               //
 30               //==============================================================================
 31               //
 32               //%/////////////////////////////////////////////////////////////////////////////
 33               
 34 humberto 1.8  #include <Pegasus/Common/LanguageParser.h>
 35               #include <Pegasus/Common/InternalException.h>
 36               #include <Pegasus/Common/Tracer.h>
 37 kumpf    1.30 #include <Pegasus/Common/MessageLoader.h>
 38 humberto 1.8  #include <cstring>
 39               
 40 kumpf    1.21 #ifdef PEGASUS_HAS_ICU
 41               # include <unicode/locid.h>
 42               #endif
 43               
 44 humberto 1.8  PEGASUS_NAMESPACE_BEGIN
 45               
 46 kumpf    1.21 static char LANGUAGE_TAG_SEPARATOR_CHAR = '-';
 47 kumpf    1.26 static char LOCALE_ID_SEPARATOR_CHAR = '_';
 48 kumpf    1.21 
 49 kumpf    1.23 AcceptLanguageList LanguageParser::parseAcceptLanguageHeader(
 50 kumpf    1.21     const String& acceptLanguageHeader)
 51               {
 52                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseAcceptLanguageHeader");
 53               
 54 kumpf    1.23     AcceptLanguageList acceptLanguages;
 55 kumpf    1.21 
 56                   try
 57                   {
 58                       Array<String> languageElements;
 59                       LanguageParser::_parseLanguageHeader(
 60                           acceptLanguageHeader,
 61                           languageElements);
 62               
 63                       for (Uint32 i = 0; i < languageElements.size(); i++)
 64                       {
 65                           String languageTagString;
 66                           Real32 qualityValue;
 67                           LanguageParser::_parseAcceptLanguageElement(
 68                               languageElements[i], languageTagString, qualityValue);
 69 kumpf    1.30             acceptLanguages.insert(
 70                               LanguageTag(languageTagString), qualityValue);
 71 kumpf    1.21         }
 72                   }
 73                   catch (Exception& e)
 74                   {
 75                       throw InvalidAcceptLanguageHeader(e.getMessage());
 76                   }
 77               
 78                   PEG_METHOD_EXIT();
 79                   return acceptLanguages;
 80               }
 81               
 82 kumpf    1.23 ContentLanguageList LanguageParser::parseContentLanguageHeader(
 83 kumpf    1.21     const String& contentLanguageHeader)
 84               {
 85                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseContentLanguageHeader");
 86               
 87 kumpf    1.23     ContentLanguageList contentLanguages;
 88 kumpf    1.21 
 89                   try
 90                   {
 91                       Array<String> languageElements;
 92                       LanguageParser::_parseLanguageHeader(
 93                           contentLanguageHeader,
 94                           languageElements);
 95               
 96                       for (Uint32 i = 0; i < languageElements.size(); i++)
 97                       {
 98                           contentLanguages.append(LanguageTag(languageElements[i]));
 99                       }
100                   }
101                   catch (Exception& e)
102                   {
103                       throw InvalidContentLanguageHeader(e.getMessage());
104                   }
105               
106                   PEG_METHOD_EXIT();
107                   return contentLanguages;
108               }
109 kumpf    1.21 
110               void LanguageParser::parseLanguageTag(
111 kumpf    1.24     const String& languageTagString,
112 kumpf    1.21     String& language,
113                   String& country,
114                   String& variant)
115               {
116                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseLanguageTag");
117               
118                   language.clear();
119                   country.clear();
120                   variant.clear();
121               
122                   if (languageTagString == "*")
123                   {
124                       // Parsing and validation is complete
125                       PEG_METHOD_EXIT();
126                       return;
127                   }
128               
129                   Boolean isStandardFormat = true;    // RFC 3066 (ISO 639, ISO 3166)
130                   Array<String> subtags;
131               
132                   _parseLanguageSubtags(subtags, languageTagString);
133 kumpf    1.21 
134                   // _parseLanguageSubtags() always returns at least one subtag.
135                   PEGASUS_ASSERT(subtags.size() > 0);
136               
137                   // Validate the primary subtag.
138                   // Given a languageTagString "en-US-mn" the language is "en".
139               
140                   language = subtags[0];
141               
142                   if ((language == "i") || (language == "x"))
143                   {
144                       // These primary tags are allowed, but are not ISO 639 compliant
145                       isStandardFormat = false;
146                       language.clear();
147                   }
148                   else if ((language.size() != 2) && (language.size() != 3))
149                   {
150                       // Except for "i" and "x", primary tags must be 2 or 3 characters,
151                       // according to RFC 3066.
152                       MessageLoaderParms parms(
153                           "Common.LanguageParser.INVALID_LANGUAGE_TAG",
154 kumpf    1.21             "Invalid language tag \"$0\".", languageTagString);
155                       PEG_METHOD_EXIT();
156 marek    1.29         // throw Exception(MessageLoader::getMessage(parms));
157 marek    1.28         // do not localize message, requires a language tag for this
158                       // localization can cause recursion here
159                       throw Exception(parms.toString());
160 kumpf    1.21     }
161               
162                   if (subtags.size() == 1)
163                   {
164                       // If only the primary subtag is present, we are done!
165                       PEG_METHOD_EXIT();
166                       return;
167                   }
168               
169                   // Validate the second subtag.
170                   // Given a languageTagString "en-US-mn" the country is "US".
171               
172                   if (subtags[1].size() == 1)
173                   {
174                       // The second subtag may not be a single character according to
175                       // RFC 3066.
176                       MessageLoaderParms parms(
177                           "Common.LanguageParser.INVALID_LANGUAGE_TAG",
178                           "Invalid language tag \"$0\".", languageTagString);
179                       PEG_METHOD_EXIT();
180 marek    1.29         // throw Exception(MessageLoader::getMessage(parms));
181 marek    1.28         // do not localize message, requires a language tag for this
182                       // localization can cause recursion here
183                       throw Exception(parms.toString());
184 kumpf    1.21     }
185               
186                   if (isStandardFormat)
187                   {
188                       Uint32 variantIndex = 1;
189               
190                       if (subtags[1].size() == 2)
191                       {
192                           country = subtags[1];
193                           variantIndex = 2;
194                       }
195               
196                       Uint32 numSubtags = subtags.size();
197               
198                       if (variantIndex < numSubtags)
199                       {
200                           variant = subtags[variantIndex++];
201               
202                           while (variantIndex < numSubtags)
203                           {
204                               variant.append(LANGUAGE_TAG_SEPARATOR_CHAR);
205 kumpf    1.21                 variant.append(subtags[variantIndex++]);
206                           }
207                       }
208                   }
209               
210                   PEG_METHOD_EXIT();
211               }
212               
213               void LanguageParser::validateQualityValue(Real32 quality)
214               {
215                   if ((quality > 1.0) || (quality < 0.0))
216                   {
217                       MessageLoaderParms parms(
218                           "Common.LanguageParser.INVALID_QUALITY_VALUE",
219                           "AcceptLanguage contains an invalid quality value");
220                       throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
221                   }
222               }
223               
224               String LanguageParser::buildAcceptLanguageHeader(
225 kumpf    1.23     const AcceptLanguageList& acceptLanguages)
226 kumpf    1.21 {
227                   String alString;
228                   Uint32 numAcceptLanguages = acceptLanguages.size();
229               
230                   for (Uint32 i = 0; i < numAcceptLanguages; i++)
231                   {
232                       alString.append(acceptLanguages.getLanguageTag(i).toString());
233               
234                       Real32 q = acceptLanguages.getQualityValue(i);
235                       if (q != 1.0)
236                       {
237                           char qValueString[6];
238                           sprintf(qValueString, "%4.3f", q);
239                           alString.append(";q=");
240                           alString.append(qValueString);
241                       }
242               
243                       if (i < numAcceptLanguages - 1)
244                       {
245                           alString.append(",");
246                       }
247 kumpf    1.21     }
248               
249                   return alString;
250               }
251               
252               String LanguageParser::buildContentLanguageHeader(
253 kumpf    1.23     const ContentLanguageList& contentLanguages)
254 kumpf    1.21 {
255                   String clString;
256                   Uint32 numContentLanguages = contentLanguages.size();
257               
258                   for (Uint32 i = 0; i < numContentLanguages; i++)
259                   {
260                       clString.append(contentLanguages.getLanguageTag(i).toString());
261               
262                       if (i < numContentLanguages - 1)
263                       {
264                           clString.append(",");
265                       }
266                   }
267               
268                   return clString;
269               }
270               
271 kumpf    1.26 #ifdef PEGASUS_HAS_ICU
272               String& LanguageParser::convertLocaleIdToLanguageTag(String& localeId)
273               {
274                   Uint32 index = 0;
275                   while ((index = localeId.find(index, LOCALE_ID_SEPARATOR_CHAR)) !=
276                               PEG_NOT_FOUND)
277                   {
278                       localeId[index] = LANGUAGE_TAG_SEPARATOR_CHAR;
279                   }
280                   return localeId;
281               }
282               #endif
283               
284 kumpf    1.23 AcceptLanguageList LanguageParser::getDefaultAcceptLanguages()
285 kumpf    1.21 {
286               #if defined(PEGASUS_HAS_MESSAGES) && defined(PEGASUS_HAS_ICU)
287                   Locale default_loc = Locale::getDefault();
288               
289 kumpf    1.26     String localeId = default_loc.getName();
290               
291 kumpf    1.21     try
292                   {
293 kumpf    1.26         return LanguageParser::parseAcceptLanguageHeader(
294                           convertLocaleIdToLanguageTag(localeId));
295 kumpf    1.21     }
296                   catch (const InvalidAcceptLanguageHeader& e)
297                   {
298                       Logger::put_l(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
299 kumpf    1.32             "src.Server.cimserver.FAILED_TO_SET_PROCESS_LOCALE",
300                           "Cannot convert the system process locale into a valid "
301                               "Accept-Language format.");
302 kumpf    1.21         Logger::put(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
303                           e.getMessage());
304 kumpf    1.23         AcceptLanguageList al;
305 kumpf    1.22         al.insert(LanguageTag("*"), 1);
306                       return al;
307 kumpf    1.21     }
308 kumpf    1.22 #else
309 kumpf    1.23     return AcceptLanguageList();
310 kumpf    1.21 #endif
311               }
312               
313               void LanguageParser::_parseLanguageHeader(
314                   const String& languageHeaderValue,
315                   Array<String>& languageElements)
316               {
317                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageHeader");
318               
319                   languageElements.clear();
320                   String element;
321               
322                   for (Uint32 i=0, len=languageHeaderValue.size(); i<len; i++)
323                   {
324                       Char16 nextChar = languageHeaderValue[i];
325               
326                       if (isascii(nextChar) && isspace(nextChar))
327                       {
328                           // Ignore whitespace
329                       }
330                       else if (nextChar == '(')
331 kumpf    1.21         {
332                           // Ignore comments
333                           while (i < len)
334                           {
335                               // Search for the closing parenthesis
336                               if (languageHeaderValue[i] == ')')
337                               {
338                                   break;
339                               }
340               
341                               // Skip over escape characters
342                               if (languageHeaderValue[i] == '\\')
343                               {
344                                   i++;
345                               }
346               
347                               i++;
348                           }
349               
350                           // Check for a missing closing parenthesis
351                           if (i >= len)
352 kumpf    1.21             {
353                               MessageLoaderParms parms(
354                                   "Common.LanguageParser.DOES_NOT_CONTAIN_TERMINATING",
355                                   "Closing \")\" character is missing.");
356                               throw Exception(MessageLoader::getMessage(parms));
357                           }
358                       }
359                       else if (nextChar == ',')
360                       {
361                           // Check for the end of the element
362                           languageElements.append(element);
363                           element.clear();
364                       }
365                       else
366                       {
367                           // Unescape an escape character
368                           if ((nextChar == '\\') && (i < len-1))
369                           {
370                               nextChar = languageHeaderValue[++i];
371                           }
372               
373 kumpf    1.21             // Include this character in the value
374                           element.append(nextChar);
375                       }
376                   }
377               
378                   // Include the last element in the languageElements array
379                   languageElements.append(element);
380               
381                   PEG_METHOD_EXIT();
382               }
383               
384               void LanguageParser::_parseAcceptLanguageElement(
385                   const String& acceptLanguageElement,
386                   String& languageTag,
387                   Real32& quality)
388               {
389                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseAcceptLanguageElement");
390               
391                   // look for ';' in acceptLanguageElement, that means we have a
392                   // quality value to capture.  If not present, we only have a language tag.
393               
394 kumpf    1.21     Uint32 semicolonIndex = acceptLanguageElement.find(";");
395                   if (semicolonIndex != PEG_NOT_FOUND)
396                   {
397                       // Separate the language tag and quality value
398               
399                       String qualityString =
400                           acceptLanguageElement.subString(semicolonIndex+1);
401                       languageTag = acceptLanguageElement.subString(0, semicolonIndex);
402               
403                       // Parse the quality value
404 humberto 1.8  
405 kumpf    1.21         char dummyChar;
406                       int scanfConversions = sscanf(
407                           qualityString.getCString(),
408                           "q=%f%c", &quality, &dummyChar);
409               
410                       if ((scanfConversions != 1) ||
411                           (qualityString.size() > 7))
412                       {
413                           MessageLoaderParms parms(
414                               "Common.LanguageParser.INVALID_QUALITY_VALUE",
415                               "AcceptLanguage contains an invalid quality value");
416                           PEG_METHOD_EXIT();
417                           throw Exception(MessageLoader::getMessage(parms));
418                       }
419                   }
420                   else
421                   {
422                       languageTag = acceptLanguageElement;
423                       quality = 1.0;
424                   }
425               
426 kumpf    1.21     PEG_METHOD_EXIT();
427               }
428               
429               void LanguageParser::_parseLanguageSubtags(
430                   Array<String>& subtags,
431                   const String& languageTagString)
432               {
433                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageSubtags");
434               
435                   // Parse the language tag into subtags
436               
437                   Uint32 subtagIndex = 0;
438                   Uint32 separatorIndex;
439                   while ((separatorIndex = languageTagString.find(
440                               subtagIndex, LANGUAGE_TAG_SEPARATOR_CHAR)) != PEG_NOT_FOUND)
441                   {
442                       subtags.append(languageTagString.subString(
443                           subtagIndex, separatorIndex - subtagIndex));
444                       subtagIndex = separatorIndex + 1;
445                   }
446                   subtags.append(languageTagString.subString(subtagIndex));
447 kumpf    1.21 
448                   // Validate the syntax of each of the subtags
449               
450                   for (Uint32 i = 0, n = subtags.size(); i < n; i++)
451                   {
452                       if (((i == 0) && !_isValidPrimarySubtagSyntax(subtags[i])) ||
453                           ((i > 0) && !_isValidSubtagSyntax(subtags[i])))
454                       {
455                           MessageLoaderParms parms(
456                               "Common.LanguageParser.MALFORMED_LANGUAGE_TAG",
457                               "Malformed language tag \"$0\".", languageTagString);
458                           PEG_METHOD_EXIT();
459 marek    1.29             // throw Exception(MessageLoader::getMessage(parms));
460 marek    1.28             // do not localize message, requires a language tag for this
461                           // localization can cause recursion here
462                           throw Exception(parms.toString());
463 kumpf    1.21         }
464                   }
465               
466                   PEG_METHOD_EXIT();
467 humberto 1.8  }
468               
469 kumpf    1.21 Boolean LanguageParser::_isValidPrimarySubtagSyntax(const String& subtag)
470               {
471                   if ((subtag.size() == 0) || (subtag.size() > 8))
472                   {
473                       return false;
474                   }
475               
476                   for (Uint32 i = 0, n = subtag.size(); i < n; i++)
477                   {
478                       if (!(isascii(subtag[i]) && isalpha(subtag[i])))
479                       {
480                           return false;
481                       }
482                   }
483               
484                   return true;
485               }
486               
487               Boolean LanguageParser::_isValidSubtagSyntax(const String& subtag)
488               {
489                   if ((subtag.size() == 0) || (subtag.size() > 8))
490 kumpf    1.21     {
491                       return false;
492                   }
493               
494                   for (Uint32 i = 0, n = subtag.size(); i < n; i++)
495                   {
496                       if (!(isascii(subtag[i]) && isalnum(subtag[i])))
497                       {
498                           return false;
499                       }
500                   }
501               
502                   return true;
503               }
504 humberto 1.8  
505               PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2