(file) Return to LanguageParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 martin 1.35 //%LICENSE////////////////////////////////////////////////////////////////
  2 martin 1.36 //
  3 martin 1.35 // Licensed to The Open Group (TOG) under one or more contributor license
  4             // agreements.  Refer to the OpenPegasusNOTICE.txt file distributed with
  5             // this work for additional information regarding copyright ownership.
  6             // Each contributor licenses this file to you under the OpenPegasus Open
  7             // Source License; you may not use this file except in compliance with the
  8             // License.
  9 martin 1.36 //
 10 martin 1.35 // Permission is hereby granted, free of charge, to any person obtaining a
 11             // copy of this software and associated documentation files (the "Software"),
 12             // to deal in the Software without restriction, including without limitation
 13             // the rights to use, copy, modify, merge, publish, distribute, sublicense,
 14             // and/or sell copies of the Software, and to permit persons to whom the
 15             // Software is furnished to do so, subject to the following conditions:
 16 martin 1.36 //
 17 martin 1.35 // The above copyright notice and this permission notice shall be included
 18             // in all copies or substantial portions of the Software.
 19 martin 1.36 //
 20 martin 1.35 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 21 martin 1.36 // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 22 martin 1.35 // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 23             // IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 24             // CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 25             // TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 26             // SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 27 martin 1.36 //
 28 martin 1.35 //////////////////////////////////////////////////////////////////////////
 29 humberto 1.8  //
 30               //%/////////////////////////////////////////////////////////////////////////////
 31               
 32               #include <Pegasus/Common/LanguageParser.h>
 33               #include <Pegasus/Common/InternalException.h>
 34               #include <Pegasus/Common/Tracer.h>
 35 kumpf    1.30 #include <Pegasus/Common/MessageLoader.h>
 36 humberto 1.8  #include <cstring>
 37               
 38 kumpf    1.21 #ifdef PEGASUS_HAS_ICU
 39               # include <unicode/locid.h>
 40               #endif
 41               
 42 humberto 1.8  PEGASUS_NAMESPACE_BEGIN
 43               
 44 kumpf    1.21 static char LANGUAGE_TAG_SEPARATOR_CHAR = '-';
 45               
 46 kumpf    1.23 AcceptLanguageList LanguageParser::parseAcceptLanguageHeader(
 47 kumpf    1.21     const String& acceptLanguageHeader)
 48               {
 49                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseAcceptLanguageHeader");
 50               
 51 kumpf    1.23     AcceptLanguageList acceptLanguages;
 52 kumpf    1.21 
 53                   try
 54                   {
 55                       Array<String> languageElements;
 56                       LanguageParser::_parseLanguageHeader(
 57                           acceptLanguageHeader,
 58                           languageElements);
 59               
 60                       for (Uint32 i = 0; i < languageElements.size(); i++)
 61                       {
 62                           String languageTagString;
 63                           Real32 qualityValue;
 64                           LanguageParser::_parseAcceptLanguageElement(
 65                               languageElements[i], languageTagString, qualityValue);
 66 kumpf    1.30             acceptLanguages.insert(
 67                               LanguageTag(languageTagString), qualityValue);
 68 kumpf    1.21         }
 69                   }
 70                   catch (Exception& e)
 71                   {
 72                       throw InvalidAcceptLanguageHeader(e.getMessage());
 73                   }
 74               
 75                   PEG_METHOD_EXIT();
 76                   return acceptLanguages;
 77               }
 78               
 79 kumpf    1.23 ContentLanguageList LanguageParser::parseContentLanguageHeader(
 80 kumpf    1.21     const String& contentLanguageHeader)
 81               {
 82                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseContentLanguageHeader");
 83               
 84 kumpf    1.23     ContentLanguageList contentLanguages;
 85 kumpf    1.21 
 86                   try
 87                   {
 88                       Array<String> languageElements;
 89                       LanguageParser::_parseLanguageHeader(
 90                           contentLanguageHeader,
 91                           languageElements);
 92               
 93                       for (Uint32 i = 0; i < languageElements.size(); i++)
 94                       {
 95                           contentLanguages.append(LanguageTag(languageElements[i]));
 96                       }
 97                   }
 98                   catch (Exception& e)
 99                   {
100                       throw InvalidContentLanguageHeader(e.getMessage());
101                   }
102               
103                   PEG_METHOD_EXIT();
104                   return contentLanguages;
105               }
106 kumpf    1.21 
107               void LanguageParser::parseLanguageTag(
108 kumpf    1.24     const String& languageTagString,
109 kumpf    1.21     String& language,
110                   String& country,
111                   String& variant)
112               {
113                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseLanguageTag");
114               
115                   language.clear();
116                   country.clear();
117                   variant.clear();
118               
119                   if (languageTagString == "*")
120                   {
121                       // Parsing and validation is complete
122                       PEG_METHOD_EXIT();
123                       return;
124                   }
125               
126                   Boolean isStandardFormat = true;    // RFC 3066 (ISO 639, ISO 3166)
127                   Array<String> subtags;
128               
129                   _parseLanguageSubtags(subtags, languageTagString);
130 kumpf    1.21 
131                   // _parseLanguageSubtags() always returns at least one subtag.
132                   PEGASUS_ASSERT(subtags.size() > 0);
133               
134                   // Validate the primary subtag.
135                   // Given a languageTagString "en-US-mn" the language is "en".
136               
137                   language = subtags[0];
138               
139                   if ((language == "i") || (language == "x"))
140                   {
141                       // These primary tags are allowed, but are not ISO 639 compliant
142                       isStandardFormat = false;
143                       language.clear();
144                   }
145                   else if ((language.size() != 2) && (language.size() != 3))
146                   {
147                       // Except for "i" and "x", primary tags must be 2 or 3 characters,
148                       // according to RFC 3066.
149 kumpf    1.33 
150                       // Do not localize this message; it could cause recursion.
151 kumpf    1.21         PEG_METHOD_EXIT();
152 kumpf    1.33         throw Exception(Formatter::format(
153                           "Invalid language tag \"$0\".",
154                           languageTagString));
155 kumpf    1.21     }
156               
157                   if (subtags.size() == 1)
158                   {
159                       // If only the primary subtag is present, we are done!
160                       PEG_METHOD_EXIT();
161                       return;
162                   }
163               
164                   // Validate the second subtag.
165                   // Given a languageTagString "en-US-mn" the country is "US".
166               
167                   if (subtags[1].size() == 1)
168                   {
169                       // The second subtag may not be a single character according to
170                       // RFC 3066.
171 kumpf    1.33 
172                       // Do not localize this message; it could cause recursion.
173 kumpf    1.21         PEG_METHOD_EXIT();
174 kumpf    1.33         throw Exception(Formatter::format(
175                           "Invalid language tag \"$0\".",
176                           languageTagString));
177 kumpf    1.21     }
178               
179                   if (isStandardFormat)
180                   {
181                       Uint32 variantIndex = 1;
182               
183                       if (subtags[1].size() == 2)
184                       {
185                           country = subtags[1];
186                           variantIndex = 2;
187                       }
188               
189                       Uint32 numSubtags = subtags.size();
190               
191                       if (variantIndex < numSubtags)
192                       {
193                           variant = subtags[variantIndex++];
194               
195                           while (variantIndex < numSubtags)
196                           {
197                               variant.append(LANGUAGE_TAG_SEPARATOR_CHAR);
198 kumpf    1.21                 variant.append(subtags[variantIndex++]);
199                           }
200                       }
201                   }
202               
203                   PEG_METHOD_EXIT();
204               }
205               
206               void LanguageParser::validateQualityValue(Real32 quality)
207               {
208                   if ((quality > 1.0) || (quality < 0.0))
209                   {
210                       MessageLoaderParms parms(
211                           "Common.LanguageParser.INVALID_QUALITY_VALUE",
212                           "AcceptLanguage contains an invalid quality value");
213                       throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
214                   }
215               }
216               
217               String LanguageParser::buildAcceptLanguageHeader(
218 kumpf    1.23     const AcceptLanguageList& acceptLanguages)
219 kumpf    1.21 {
220                   String alString;
221                   Uint32 numAcceptLanguages = acceptLanguages.size();
222               
223                   for (Uint32 i = 0; i < numAcceptLanguages; i++)
224                   {
225                       alString.append(acceptLanguages.getLanguageTag(i).toString());
226               
227                       Real32 q = acceptLanguages.getQualityValue(i);
228                       if (q != 1.0)
229                       {
230                           char qValueString[6];
231                           sprintf(qValueString, "%4.3f", q);
232                           alString.append(";q=");
233                           alString.append(qValueString);
234                       }
235               
236                       if (i < numAcceptLanguages - 1)
237                       {
238                           alString.append(",");
239                       }
240 kumpf    1.21     }
241               
242                   return alString;
243               }
244               
245               String LanguageParser::buildContentLanguageHeader(
246 kumpf    1.23     const ContentLanguageList& contentLanguages)
247 kumpf    1.21 {
248                   String clString;
249                   Uint32 numContentLanguages = contentLanguages.size();
250               
251                   for (Uint32 i = 0; i < numContentLanguages; i++)
252                   {
253                       clString.append(contentLanguages.getLanguageTag(i).toString());
254               
255                       if (i < numContentLanguages - 1)
256                       {
257                           clString.append(",");
258                       }
259                   }
260               
261                   return clString;
262               }
263               
264 kumpf    1.26 #ifdef PEGASUS_HAS_ICU
265               String& LanguageParser::convertLocaleIdToLanguageTag(String& localeId)
266               {
267 marek    1.37     static char LOCALE_ID_SEPARATOR_CHAR = '_';
268 kumpf    1.26     Uint32 index = 0;
269                   while ((index = localeId.find(index, LOCALE_ID_SEPARATOR_CHAR)) !=
270                               PEG_NOT_FOUND)
271                   {
272                       localeId[index] = LANGUAGE_TAG_SEPARATOR_CHAR;
273                   }
274                   return localeId;
275               }
276               #endif
277               
278 kumpf    1.23 AcceptLanguageList LanguageParser::getDefaultAcceptLanguages()
279 kumpf    1.21 {
280               #if defined(PEGASUS_HAS_MESSAGES) && defined(PEGASUS_HAS_ICU)
281                   Locale default_loc = Locale::getDefault();
282               
283 kumpf    1.26     String localeId = default_loc.getName();
284               
285 kumpf    1.21     try
286                   {
287 kumpf    1.26         return LanguageParser::parseAcceptLanguageHeader(
288                           convertLocaleIdToLanguageTag(localeId));
289 kumpf    1.21     }
290                   catch (const InvalidAcceptLanguageHeader& e)
291                   {
292                       Logger::put_l(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
293 kumpf    1.34             MessageLoaderParms(
294                               "src.Server.cimserver.FAILED_TO_SET_PROCESS_LOCALE",
295                               "Cannot convert the system process locale into a valid "
296                                   "Accept-Language format."));
297 kumpf    1.21         Logger::put(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
298                           e.getMessage());
299 kumpf    1.23         AcceptLanguageList al;
300 kumpf    1.22         al.insert(LanguageTag("*"), 1);
301                       return al;
302 kumpf    1.21     }
303 kumpf    1.22 #else
304 kumpf    1.23     return AcceptLanguageList();
305 kumpf    1.21 #endif
306               }
307               
308               void LanguageParser::_parseLanguageHeader(
309                   const String& languageHeaderValue,
310                   Array<String>& languageElements)
311               {
312                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageHeader");
313               
314                   languageElements.clear();
315                   String element;
316               
317                   for (Uint32 i=0, len=languageHeaderValue.size(); i<len; i++)
318                   {
319                       Char16 nextChar = languageHeaderValue[i];
320               
321                       if (isascii(nextChar) && isspace(nextChar))
322                       {
323                           // Ignore whitespace
324                       }
325                       else if (nextChar == '(')
326 kumpf    1.21         {
327                           // Ignore comments
328                           while (i < len)
329                           {
330                               // Search for the closing parenthesis
331                               if (languageHeaderValue[i] == ')')
332                               {
333                                   break;
334                               }
335               
336                               // Skip over escape characters
337                               if (languageHeaderValue[i] == '\\')
338                               {
339                                   i++;
340                               }
341               
342                               i++;
343                           }
344               
345                           // Check for a missing closing parenthesis
346                           if (i >= len)
347 kumpf    1.21             {
348                               MessageLoaderParms parms(
349                                   "Common.LanguageParser.DOES_NOT_CONTAIN_TERMINATING",
350                                   "Closing \")\" character is missing.");
351                               throw Exception(MessageLoader::getMessage(parms));
352                           }
353                       }
354                       else if (nextChar == ',')
355                       {
356                           // Check for the end of the element
357                           languageElements.append(element);
358                           element.clear();
359                       }
360                       else
361                       {
362                           // Unescape an escape character
363                           if ((nextChar == '\\') && (i < len-1))
364                           {
365                               nextChar = languageHeaderValue[++i];
366                           }
367               
368 kumpf    1.21             // Include this character in the value
369                           element.append(nextChar);
370                       }
371                   }
372               
373                   // Include the last element in the languageElements array
374                   languageElements.append(element);
375               
376                   PEG_METHOD_EXIT();
377               }
378               
379               void LanguageParser::_parseAcceptLanguageElement(
380                   const String& acceptLanguageElement,
381                   String& languageTag,
382                   Real32& quality)
383               {
384                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseAcceptLanguageElement");
385               
386                   // look for ';' in acceptLanguageElement, that means we have a
387                   // quality value to capture.  If not present, we only have a language tag.
388               
389 kumpf    1.21     Uint32 semicolonIndex = acceptLanguageElement.find(";");
390                   if (semicolonIndex != PEG_NOT_FOUND)
391                   {
392                       // Separate the language tag and quality value
393               
394                       String qualityString =
395                           acceptLanguageElement.subString(semicolonIndex+1);
396                       languageTag = acceptLanguageElement.subString(0, semicolonIndex);
397               
398                       // Parse the quality value
399 humberto 1.8  
400 kumpf    1.21         char dummyChar;
401                       int scanfConversions = sscanf(
402                           qualityString.getCString(),
403                           "q=%f%c", &quality, &dummyChar);
404               
405                       if ((scanfConversions != 1) ||
406                           (qualityString.size() > 7))
407                       {
408                           MessageLoaderParms parms(
409                               "Common.LanguageParser.INVALID_QUALITY_VALUE",
410                               "AcceptLanguage contains an invalid quality value");
411                           PEG_METHOD_EXIT();
412                           throw Exception(MessageLoader::getMessage(parms));
413                       }
414                   }
415                   else
416                   {
417                       languageTag = acceptLanguageElement;
418                       quality = 1.0;
419                   }
420               
421 kumpf    1.21     PEG_METHOD_EXIT();
422               }
423               
424               void LanguageParser::_parseLanguageSubtags(
425                   Array<String>& subtags,
426                   const String& languageTagString)
427               {
428                   PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageSubtags");
429               
430                   // Parse the language tag into subtags
431               
432                   Uint32 subtagIndex = 0;
433                   Uint32 separatorIndex;
434                   while ((separatorIndex = languageTagString.find(
435                               subtagIndex, LANGUAGE_TAG_SEPARATOR_CHAR)) != PEG_NOT_FOUND)
436                   {
437                       subtags.append(languageTagString.subString(
438                           subtagIndex, separatorIndex - subtagIndex));
439                       subtagIndex = separatorIndex + 1;
440                   }
441                   subtags.append(languageTagString.subString(subtagIndex));
442 kumpf    1.21 
443                   // Validate the syntax of each of the subtags
444               
445                   for (Uint32 i = 0, n = subtags.size(); i < n; i++)
446                   {
447                       if (((i == 0) && !_isValidPrimarySubtagSyntax(subtags[i])) ||
448                           ((i > 0) && !_isValidSubtagSyntax(subtags[i])))
449                       {
450 marek    1.38             // throw Exception(MessageLoader::getMessage(parms));
451                           // do not localize message, requires a language tag for this
452                           // localization can cause recursion here
453                           // MessageLoaderParms::toString adds 5kb static code size, Do NOT
454                           // include in non-debug builds
455               #ifdef PEGASUS_DEBUG
456 kumpf    1.21             MessageLoaderParms parms(
457                               "Common.LanguageParser.MALFORMED_LANGUAGE_TAG",
458                               "Malformed language tag \"$0\".", languageTagString);
459                           PEG_METHOD_EXIT();
460 marek    1.28             throw Exception(parms.toString());
461 marek    1.38 #else
462                           String malFormed("Malformed language tag:");
463                           malFormed.append(languageTagString);
464                           PEG_METHOD_EXIT();
465                           throw Exception(malFormed);
466               #endif
467 kumpf    1.21         }
468                   }
469               
470                   PEG_METHOD_EXIT();
471 humberto 1.8  }
472               
473 kumpf    1.21 Boolean LanguageParser::_isValidPrimarySubtagSyntax(const String& subtag)
474               {
475                   if ((subtag.size() == 0) || (subtag.size() > 8))
476                   {
477                       return false;
478                   }
479               
480                   for (Uint32 i = 0, n = subtag.size(); i < n; i++)
481                   {
482                       if (!(isascii(subtag[i]) && isalpha(subtag[i])))
483                       {
484                           return false;
485                       }
486                   }
487               
488                   return true;
489               }
490               
491               Boolean LanguageParser::_isValidSubtagSyntax(const String& subtag)
492               {
493                   if ((subtag.size() == 0) || (subtag.size() > 8))
494 kumpf    1.21     {
495                       return false;
496                   }
497               
498                   for (Uint32 i = 0, n = subtag.size(); i < n; i++)
499                   {
500                       if (!(isascii(subtag[i]) && isalnum(subtag[i])))
501                       {
502                           return false;
503                       }
504                   }
505               
506                   return true;
507               }
508 humberto 1.8  
509               PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2