(file) Return to LanguageParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 karl  1.25 //%2006////////////////////////////////////////////////////////////////////////
  2 humberto 1.8  //
  3 karl     1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
  4               // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
  5               // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
  6 karl     1.9  // IBM Corp.; EMC Corporation, The Open Group.
  7 karl     1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
  8               // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
  9 karl     1.11 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 10               // EMC Corporation; VERITAS Software Corporation; The Open Group.
 11 karl     1.25 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 12               // EMC Corporation; Symantec Corporation; The Open Group.
 13 humberto 1.8  //
 14               // Permission is hereby granted, free of charge, to any person obtaining a copy
 15               // of this software and associated documentation files (the "Software"), to
 16               // deal in the Software without restriction, including without limitation the
 17               // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 18               // sell copies of the Software, and to permit persons to whom the Software is
 19               // furnished to do so, subject to the following conditions:
 20               // 
 21               // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 22               // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 23               // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 24               // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 25               // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 26               // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 27               // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 28               // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 29               //
 30               //==============================================================================
 31               //
 32               // Author: Humberto Rivero (hurivero@us.ibm.com)
 33               //
 34 aruran.ms 1.15 // Modified By: Aruran, IBM (ashanmug@in.ibm.com) for Bug# 3697, 3698, 3699, 3700
 35 aruran.ms 1.19 //              Aruran, IBM (ashanmug@in.ibm.com) for Bug# 3701, 3702, 3703, 3704
 36 kumpf     1.21 //              Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
 37 humberto  1.8  //
 38                //%/////////////////////////////////////////////////////////////////////////////
 39                
 40                #include <Pegasus/Common/LanguageParser.h>
 41                #include <Pegasus/Common/InternalException.h>
 42                #include <Pegasus/Common/Tracer.h>
 43                #include <Pegasus/Common/MessageLoader.h> //l10n
 44                #include <cstring>
 45                
 46 kumpf     1.21 #ifdef PEGASUS_HAS_ICU
 47                # include <unicode/locid.h>
 48                #endif
 49                #if defined(PEGASUS_OS_OS400)
 50                # include "OS400ConvertChar.h"
 51                #endif
 52                
 53 humberto  1.8  PEGASUS_NAMESPACE_BEGIN
 54                
 55 kumpf     1.21 static char LANGUAGE_TAG_SEPARATOR_CHAR = '-';
 56 kumpf     1.26 static char LOCALE_ID_SEPARATOR_CHAR = '_';
 57 kumpf     1.21 
 58 kumpf     1.23 AcceptLanguageList LanguageParser::parseAcceptLanguageHeader(
 59 kumpf     1.21     const String& acceptLanguageHeader)
 60                {
 61                    PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseAcceptLanguageHeader");
 62                
 63 kumpf     1.23     AcceptLanguageList acceptLanguages;
 64 kumpf     1.21 
 65                    try
 66                    {
 67                        Array<String> languageElements;
 68                        LanguageParser::_parseLanguageHeader(
 69                            acceptLanguageHeader,
 70                            languageElements);
 71                
 72                        for (Uint32 i = 0; i < languageElements.size(); i++)
 73                        {
 74                            String languageTagString;
 75                            Real32 qualityValue;
 76                            LanguageParser::_parseAcceptLanguageElement(
 77                                languageElements[i], languageTagString, qualityValue);
 78                            acceptLanguages.insert(LanguageTag(languageTagString), qualityValue);
 79                        }
 80                    }
 81                    catch (Exception& e)
 82                    {
 83                        throw InvalidAcceptLanguageHeader(e.getMessage());
 84                    }
 85 kumpf     1.21 
 86                    PEG_METHOD_EXIT();
 87                    return acceptLanguages;
 88                }
 89                
 90 kumpf     1.23 ContentLanguageList LanguageParser::parseContentLanguageHeader(
 91 kumpf     1.21     const String& contentLanguageHeader)
 92                {
 93                    PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseContentLanguageHeader");
 94                
 95 kumpf     1.23     ContentLanguageList contentLanguages;
 96 kumpf     1.21 
 97                    try
 98                    {
 99                        Array<String> languageElements;
100                        LanguageParser::_parseLanguageHeader(
101                            contentLanguageHeader,
102                            languageElements);
103                
104                        for (Uint32 i = 0; i < languageElements.size(); i++)
105                        {
106                            contentLanguages.append(LanguageTag(languageElements[i]));
107                        }
108                    }
109                    catch (Exception& e)
110                    {
111                        throw InvalidContentLanguageHeader(e.getMessage());
112                    }
113                
114                    PEG_METHOD_EXIT();
115                    return contentLanguages;
116                }
117 kumpf     1.21 
118                void LanguageParser::parseLanguageTag(
119 kumpf     1.24     const String& languageTagString,
120 kumpf     1.21     String& language,
121                    String& country,
122                    String& variant)
123                {
124                    PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseLanguageTag");
125                
126                    language.clear();
127                    country.clear();
128                    variant.clear();
129                
130                    if (languageTagString == "*")
131                    {
132                        // Parsing and validation is complete
133                        PEG_METHOD_EXIT();
134                        return;
135                    }
136                
137                    Boolean isStandardFormat = true;    // RFC 3066 (ISO 639, ISO 3166)
138                    Array<String> subtags;
139                
140                    _parseLanguageSubtags(subtags, languageTagString);
141 kumpf     1.21 
142                    // _parseLanguageSubtags() always returns at least one subtag.
143                    PEGASUS_ASSERT(subtags.size() > 0);
144                
145                    // Validate the primary subtag.
146                    // Given a languageTagString "en-US-mn" the language is "en".
147                
148                    language = subtags[0];
149                
150                    if ((language == "i") || (language == "x"))
151                    {
152                        // These primary tags are allowed, but are not ISO 639 compliant
153                        isStandardFormat = false;
154                        language.clear();
155                    }
156                    else if ((language.size() != 2) && (language.size() != 3))
157                    {
158                        // Except for "i" and "x", primary tags must be 2 or 3 characters,
159                        // according to RFC 3066.
160                        MessageLoaderParms parms(
161                            "Common.LanguageParser.INVALID_LANGUAGE_TAG",
162 kumpf     1.21             "Invalid language tag \"$0\".", languageTagString);
163                        PEG_METHOD_EXIT();
164                        throw Exception(MessageLoader::getMessage(parms));
165                    }
166                
167                    if (subtags.size() == 1)
168                    {
169                        // If only the primary subtag is present, we are done!
170                        PEG_METHOD_EXIT();
171                        return;
172                    }
173                
174                    // Validate the second subtag.
175                    // Given a languageTagString "en-US-mn" the country is "US".
176                
177                    if (subtags[1].size() == 1)
178                    {
179                        // The second subtag may not be a single character according to
180                        // RFC 3066.
181                        MessageLoaderParms parms(
182                            "Common.LanguageParser.INVALID_LANGUAGE_TAG",
183 kumpf     1.21             "Invalid language tag \"$0\".", languageTagString);
184                        PEG_METHOD_EXIT();
185                        throw Exception(MessageLoader::getMessage(parms));
186                    }
187                
188                    if (isStandardFormat)
189                    {
190                        Uint32 variantIndex = 1;
191                
192                        if (subtags[1].size() == 2)
193                        {
194                            country = subtags[1];
195                            variantIndex = 2;
196                        }
197                
198                        Uint32 numSubtags = subtags.size();
199                
200                        if (variantIndex < numSubtags)
201                        {
202                            variant = subtags[variantIndex++];
203                
204 kumpf     1.21             while (variantIndex < numSubtags)
205                            {
206                                variant.append(LANGUAGE_TAG_SEPARATOR_CHAR);
207                                variant.append(subtags[variantIndex++]);
208                            }
209                        }
210                    }
211                
212                    PEG_METHOD_EXIT();
213                }
214                
215                void LanguageParser::validateQualityValue(Real32 quality)
216                {
217                    if ((quality > 1.0) || (quality < 0.0))
218                    {
219                        MessageLoaderParms parms(
220                            "Common.LanguageParser.INVALID_QUALITY_VALUE",
221                            "AcceptLanguage contains an invalid quality value");
222                        throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
223                    }
224                }
225 kumpf     1.21 
226                String LanguageParser::buildAcceptLanguageHeader(
227 kumpf     1.23     const AcceptLanguageList& acceptLanguages)
228 kumpf     1.21 {
229                    String alString;
230                    Uint32 numAcceptLanguages = acceptLanguages.size();
231                
232                    for (Uint32 i = 0; i < numAcceptLanguages; i++)
233                    {
234                        alString.append(acceptLanguages.getLanguageTag(i).toString());
235                
236                        Real32 q = acceptLanguages.getQualityValue(i);
237                        if (q != 1.0)
238                        {
239                            char qValueString[6];
240                            sprintf(qValueString, "%4.3f", q);
241                            alString.append(";q=");
242                            alString.append(qValueString);
243                        }
244                
245                        if (i < numAcceptLanguages - 1)
246                        {
247                            alString.append(",");
248                        }
249 kumpf     1.21     }
250                
251                    return alString;
252                }
253                
254                String LanguageParser::buildContentLanguageHeader(
255 kumpf     1.23     const ContentLanguageList& contentLanguages)
256 kumpf     1.21 {
257                    String clString;
258                    Uint32 numContentLanguages = contentLanguages.size();
259                
260                    for (Uint32 i = 0; i < numContentLanguages; i++)
261                    {
262                        clString.append(contentLanguages.getLanguageTag(i).toString());
263                
264                        if (i < numContentLanguages - 1)
265                        {
266                            clString.append(",");
267                        }
268                    }
269                
270                    return clString;
271                }
272                
273 kumpf     1.26 #ifdef PEGASUS_HAS_ICU
274                String& LanguageParser::convertLocaleIdToLanguageTag(String& localeId)
275                {
276                    Uint32 index = 0;
277                    while ((index = localeId.find(index, LOCALE_ID_SEPARATOR_CHAR)) !=
278                                PEG_NOT_FOUND)
279                    {
280                        localeId[index] = LANGUAGE_TAG_SEPARATOR_CHAR;
281                    }
282                    return localeId;
283                }
284                #endif
285                
286 kumpf     1.23 AcceptLanguageList LanguageParser::getDefaultAcceptLanguages()
287 kumpf     1.21 {
288                #if defined(PEGASUS_HAS_MESSAGES) && defined(PEGASUS_HAS_ICU)
289                    Locale default_loc = Locale::getDefault();
290                
291                # ifdef PEGASUS_OS_OS400
292                    char* tmp = (char*)default_loc.getName();
293                    char tmp_[100];
294                    EtoA(strcpy(tmp_,tmp));
295 kumpf     1.26     String localeId = tmp_;
296 kumpf     1.21 # else
297 kumpf     1.26     String localeId = default_loc.getName();
298                # endif
299                
300 kumpf     1.21     try
301                    {
302 kumpf     1.26         return LanguageParser::parseAcceptLanguageHeader(
303                            convertLocaleIdToLanguageTag(localeId));
304 kumpf     1.21     }
305                    catch (const InvalidAcceptLanguageHeader& e)
306                    {
307                        Logger::put_l(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
308                           "src.Server.cimserver.FAILED_TO_GET_PROCESS_LOCALE",
309                           "Could not convert the system locale to a valid accept-language "
310                               "format");
311                        Logger::put(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
312                            e.getMessage());
313 kumpf     1.23         AcceptLanguageList al;
314 kumpf     1.22         al.insert(LanguageTag("*"), 1);
315                        return al;
316 kumpf     1.21     }
317 kumpf     1.22 #else
318 kumpf     1.23     return AcceptLanguageList();
319 kumpf     1.21 #endif
320                }
321                
322                void LanguageParser::_parseLanguageHeader(
323                    const String& languageHeaderValue,
324                    Array<String>& languageElements)
325                {
326                    PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageHeader");
327                
328                    languageElements.clear();
329                    String element;
330                
331                    for (Uint32 i=0, len=languageHeaderValue.size(); i<len; i++)
332                    {
333                        Char16 nextChar = languageHeaderValue[i];
334                
335                        if (isascii(nextChar) && isspace(nextChar))
336                        {
337                            // Ignore whitespace
338                        }
339                        else if (nextChar == '(')
340 kumpf     1.21         {
341                            // Ignore comments
342                            while (i < len)
343                            {
344                                // Search for the closing parenthesis
345                                if (languageHeaderValue[i] == ')')
346                                {
347                                    break;
348                                }
349                
350                                // Skip over escape characters
351                                if (languageHeaderValue[i] == '\\')
352                                {
353                                    i++;
354                                }
355                
356                                i++;
357                            }
358                
359                            // Check for a missing closing parenthesis
360                            if (i >= len)
361 kumpf     1.21             {
362                                MessageLoaderParms parms(
363                                    "Common.LanguageParser.DOES_NOT_CONTAIN_TERMINATING",
364                                    "Closing \")\" character is missing.");
365                                throw Exception(MessageLoader::getMessage(parms));
366                            }
367                        }
368                        else if (nextChar == ',')
369                        {
370                            // Check for the end of the element
371                            languageElements.append(element);
372                            element.clear();
373                        }
374                        else
375                        {
376                            // Unescape an escape character
377                            if ((nextChar == '\\') && (i < len-1))
378                            {
379                                nextChar = languageHeaderValue[++i];
380                            }
381                
382 kumpf     1.21             // Include this character in the value
383                            element.append(nextChar);
384                        }
385                    }
386                
387                    // Include the last element in the languageElements array
388                    languageElements.append(element);
389                
390                    PEG_METHOD_EXIT();
391                }
392                
393                void LanguageParser::_parseAcceptLanguageElement(
394                    const String& acceptLanguageElement,
395                    String& languageTag,
396                    Real32& quality)
397                {
398                    PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseAcceptLanguageElement");
399                
400                    // look for ';' in acceptLanguageElement, that means we have a
401                    // quality value to capture.  If not present, we only have a language tag.
402                
403 kumpf     1.21     Uint32 semicolonIndex = acceptLanguageElement.find(";");
404                    if (semicolonIndex != PEG_NOT_FOUND)
405                    {
406                        // Separate the language tag and quality value
407                
408                        String qualityString =
409                            acceptLanguageElement.subString(semicolonIndex+1);
410                        languageTag = acceptLanguageElement.subString(0, semicolonIndex);
411                
412                        // Parse the quality value
413 humberto  1.8  
414 kumpf     1.21         char dummyChar;
415                        int scanfConversions = sscanf(
416                            qualityString.getCString(),
417                            "q=%f%c", &quality, &dummyChar);
418                
419                        if ((scanfConversions != 1) ||
420                            (qualityString.size() > 7))
421                        {
422                            MessageLoaderParms parms(
423                                "Common.LanguageParser.INVALID_QUALITY_VALUE",
424                                "AcceptLanguage contains an invalid quality value");
425                            PEG_METHOD_EXIT();
426                            throw Exception(MessageLoader::getMessage(parms));
427                        }
428                    }
429                    else
430                    {
431                        languageTag = acceptLanguageElement;
432                        quality = 1.0;
433                    }
434                
435 kumpf     1.21     PEG_METHOD_EXIT();
436                }
437                
438                void LanguageParser::_parseLanguageSubtags(
439                    Array<String>& subtags,
440                    const String& languageTagString)
441                {
442                    PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageSubtags");
443                
444                    // Parse the language tag into subtags
445                
446                    Uint32 subtagIndex = 0;
447                    Uint32 separatorIndex;
448                    while ((separatorIndex = languageTagString.find(
449                                subtagIndex, LANGUAGE_TAG_SEPARATOR_CHAR)) != PEG_NOT_FOUND)
450                    {
451                        subtags.append(languageTagString.subString(
452                            subtagIndex, separatorIndex - subtagIndex));
453                        subtagIndex = separatorIndex + 1;
454                    }
455                    subtags.append(languageTagString.subString(subtagIndex));
456 kumpf     1.21 
457                    // Validate the syntax of each of the subtags
458                
459                    for (Uint32 i = 0, n = subtags.size(); i < n; i++)
460                    {
461                        if (((i == 0) && !_isValidPrimarySubtagSyntax(subtags[i])) ||
462                            ((i > 0) && !_isValidSubtagSyntax(subtags[i])))
463                        {
464                            MessageLoaderParms parms(
465                                "Common.LanguageParser.MALFORMED_LANGUAGE_TAG",
466                                "Malformed language tag \"$0\".", languageTagString);
467                            PEG_METHOD_EXIT();
468                            throw Exception(MessageLoader::getMessage(parms));
469                        }
470                    }
471                
472                    PEG_METHOD_EXIT();
473 humberto  1.8  }
474                
475 kumpf     1.21 Boolean LanguageParser::_isValidPrimarySubtagSyntax(const String& subtag)
476                {
477                    if ((subtag.size() == 0) || (subtag.size() > 8))
478                    {
479                        return false;
480                    }
481                
482                    for (Uint32 i = 0, n = subtag.size(); i < n; i++)
483                    {
484                        if (!(isascii(subtag[i]) && isalpha(subtag[i])))
485                        {
486                            return false;
487                        }
488                    }
489                
490                    return true;
491                }
492                
493                Boolean LanguageParser::_isValidSubtagSyntax(const String& subtag)
494                {
495                    if ((subtag.size() == 0) || (subtag.size() > 8))
496 kumpf     1.21     {
497                        return false;
498                    }
499                
500                    for (Uint32 i = 0, n = subtag.size(); i < n; i++)
501                    {
502                        if (!(isascii(subtag[i]) && isalnum(subtag[i])))
503                        {
504                            return false;
505                        }
506                    }
507                
508                    return true;
509                }
510 humberto  1.8  
511                PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2