(file) Return to LanguageParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 karl  1.11 //%2005////////////////////////////////////////////////////////////////////////
  2 humberto 1.8  //
  3 karl     1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
  4               // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
  5               // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
  6 karl     1.9  // IBM Corp.; EMC Corporation, The Open Group.
  7 karl     1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
  8               // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
  9 karl     1.11 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 10               // EMC Corporation; VERITAS Software Corporation; The Open Group.
 11 humberto 1.8  //
 12               // Permission is hereby granted, free of charge, to any person obtaining a copy
 13               // of this software and associated documentation files (the "Software"), to
 14               // deal in the Software without restriction, including without limitation the
 15               // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 16               // sell copies of the Software, and to permit persons to whom the Software is
 17               // furnished to do so, subject to the following conditions:
 18               // 
 19               // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 20               // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 21               // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 22               // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 23               // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 24               // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 25               // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 26               // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 27               //
 28               //==============================================================================
 29               //
 30               // Author: Humberto Rivero (hurivero@us.ibm.com)
 31               //
 32 aruran.ms 1.15 // Modified By: Aruran, IBM (ashanmug@in.ibm.com) for Bug# 3697, 3698, 3699, 3700
 33 aruran.ms 1.19 //              Aruran, IBM (ashanmug@in.ibm.com) for Bug# 3701, 3702, 3703, 3704
 34 kumpf     1.21 //              Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
 35 humberto  1.8  //
 36                //%/////////////////////////////////////////////////////////////////////////////
 37                
 38                #include <Pegasus/Common/LanguageParser.h>
 39                #include <Pegasus/Common/InternalException.h>
 40                #include <Pegasus/Common/Tracer.h>
 41                #include <Pegasus/Common/MessageLoader.h> //l10n
 42                #include <cstring>
 43                
 44 kumpf     1.21 #ifdef PEGASUS_HAS_ICU
 45                # include <unicode/locid.h>
 46                #endif
 47                #if defined(PEGASUS_OS_OS400)
 48                # include "OS400ConvertChar.h"
 49                #endif
 50                
 51 humberto  1.8  PEGASUS_NAMESPACE_BEGIN
 52                
 53 kumpf     1.21 static char LANGUAGE_TAG_SEPARATOR_CHAR = '-';
 54 kumpf     1.22 static char LOCALE_ID_SEPARATOR_CHAR = '_';
 55 kumpf     1.21 
 56                AcceptLanguages LanguageParser::parseAcceptLanguageHeader(
 57                    const String& acceptLanguageHeader)
 58                {
 59                    PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseAcceptLanguageHeader");
 60                
 61                    AcceptLanguages acceptLanguages;
 62                
 63                    try
 64                    {
 65                        Array<String> languageElements;
 66                        LanguageParser::_parseLanguageHeader(
 67                            acceptLanguageHeader,
 68                            languageElements);
 69                
 70                        for (Uint32 i = 0; i < languageElements.size(); i++)
 71                        {
 72                            String languageTagString;
 73                            Real32 qualityValue;
 74                            LanguageParser::_parseAcceptLanguageElement(
 75                                languageElements[i], languageTagString, qualityValue);
 76 kumpf     1.21             acceptLanguages.insert(LanguageTag(languageTagString), qualityValue);
 77                        }
 78                    }
 79                    catch (Exception& e)
 80                    {
 81                        throw InvalidAcceptLanguageHeader(e.getMessage());
 82                    }
 83                
 84                    PEG_METHOD_EXIT();
 85                    return acceptLanguages;
 86                }
 87                
 88                ContentLanguages LanguageParser::parseContentLanguageHeader(
 89                    const String& contentLanguageHeader)
 90                {
 91                    PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseContentLanguageHeader");
 92                
 93                    ContentLanguages contentLanguages;
 94                
 95                    try
 96                    {
 97 kumpf     1.21         Array<String> languageElements;
 98                        LanguageParser::_parseLanguageHeader(
 99                            contentLanguageHeader,
100                            languageElements);
101                
102                        for (Uint32 i = 0; i < languageElements.size(); i++)
103                        {
104                            contentLanguages.append(LanguageTag(languageElements[i]));
105                        }
106                    }
107                    catch (Exception& e)
108                    {
109                        throw InvalidContentLanguageHeader(e.getMessage());
110                    }
111                
112                    PEG_METHOD_EXIT();
113                    return contentLanguages;
114                }
115                
116                void LanguageParser::parseLanguageTag(
117 kumpf     1.22     const String& languageTagString_,
118 kumpf     1.21     String& language,
119                    String& country,
120                    String& variant)
121                {
122                    PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseLanguageTag");
123                
124 kumpf     1.22     // Convert locale ID format to language tag format
125                    String languageTagString = languageTagString_;
126                    Uint32 index = 0;
127                    while ((index = languageTagString.find(index, LOCALE_ID_SEPARATOR_CHAR)) !=
128                        PEG_NOT_FOUND)
129                    {
130                        languageTagString[index] = LANGUAGE_TAG_SEPARATOR_CHAR;
131                    }
132                
133 kumpf     1.21     language.clear();
134                    country.clear();
135                    variant.clear();
136                
137                    if (languageTagString == "*")
138                    {
139                        // Parsing and validation is complete
140                        PEG_METHOD_EXIT();
141                        return;
142                    }
143                
144                    Boolean isStandardFormat = true;    // RFC 3066 (ISO 639, ISO 3166)
145                    Array<String> subtags;
146                
147                    _parseLanguageSubtags(subtags, languageTagString);
148                
149                    // _parseLanguageSubtags() always returns at least one subtag.
150                    PEGASUS_ASSERT(subtags.size() > 0);
151                
152                    // Validate the primary subtag.
153                    // Given a languageTagString "en-US-mn" the language is "en".
154 kumpf     1.21 
155                    language = subtags[0];
156                
157                    if ((language == "i") || (language == "x"))
158                    {
159                        // These primary tags are allowed, but are not ISO 639 compliant
160                        isStandardFormat = false;
161                        language.clear();
162                    }
163                    else if ((language.size() != 2) && (language.size() != 3))
164                    {
165                        // Except for "i" and "x", primary tags must be 2 or 3 characters,
166                        // according to RFC 3066.
167                        MessageLoaderParms parms(
168                            "Common.LanguageParser.INVALID_LANGUAGE_TAG",
169                            "Invalid language tag \"$0\".", languageTagString);
170                        PEG_METHOD_EXIT();
171                        throw Exception(MessageLoader::getMessage(parms));
172                    }
173                
174                    if (subtags.size() == 1)
175 kumpf     1.21     {
176                        // If only the primary subtag is present, we are done!
177                        PEG_METHOD_EXIT();
178                        return;
179                    }
180                
181                    // Validate the second subtag.
182                    // Given a languageTagString "en-US-mn" the country is "US".
183                
184                    if (subtags[1].size() == 1)
185                    {
186                        // The second subtag may not be a single character according to
187                        // RFC 3066.
188                        MessageLoaderParms parms(
189                            "Common.LanguageParser.INVALID_LANGUAGE_TAG",
190                            "Invalid language tag \"$0\".", languageTagString);
191                        PEG_METHOD_EXIT();
192                        throw Exception(MessageLoader::getMessage(parms));
193                    }
194                
195                    if (isStandardFormat)
196 kumpf     1.21     {
197                        Uint32 variantIndex = 1;
198                
199                        if (subtags[1].size() == 2)
200                        {
201                            country = subtags[1];
202                            variantIndex = 2;
203                        }
204                
205                        Uint32 numSubtags = subtags.size();
206                
207                        if (variantIndex < numSubtags)
208                        {
209                            variant = subtags[variantIndex++];
210                
211                            while (variantIndex < numSubtags)
212                            {
213                                variant.append(LANGUAGE_TAG_SEPARATOR_CHAR);
214                                variant.append(subtags[variantIndex++]);
215                            }
216                        }
217 kumpf     1.21     }
218                
219                    PEG_METHOD_EXIT();
220                }
221                
222                void LanguageParser::validateQualityValue(Real32 quality)
223                {
224                    if ((quality > 1.0) || (quality < 0.0))
225                    {
226                        MessageLoaderParms parms(
227                            "Common.LanguageParser.INVALID_QUALITY_VALUE",
228                            "AcceptLanguage contains an invalid quality value");
229                        throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
230                    }
231                }
232                
233                String LanguageParser::buildAcceptLanguageHeader(
234                    const AcceptLanguages& acceptLanguages)
235                {
236                    String alString;
237                    Uint32 numAcceptLanguages = acceptLanguages.size();
238 kumpf     1.21 
239                    for (Uint32 i = 0; i < numAcceptLanguages; i++)
240                    {
241                        alString.append(acceptLanguages.getLanguageTag(i).toString());
242                
243                        Real32 q = acceptLanguages.getQualityValue(i);
244                        if (q != 1.0)
245                        {
246                            char qValueString[6];
247                            sprintf(qValueString, "%4.3f", q);
248                            alString.append(";q=");
249                            alString.append(qValueString);
250                        }
251                
252                        if (i < numAcceptLanguages - 1)
253                        {
254                            alString.append(",");
255                        }
256                    }
257                
258                    return alString;
259 kumpf     1.21 }
260                
261                String LanguageParser::buildContentLanguageHeader(
262                    const ContentLanguages& contentLanguages)
263                {
264                    String clString;
265                    Uint32 numContentLanguages = contentLanguages.size();
266                
267                    for (Uint32 i = 0; i < numContentLanguages; i++)
268                    {
269                        clString.append(contentLanguages.getLanguageTag(i).toString());
270                
271                        if (i < numContentLanguages - 1)
272                        {
273                            clString.append(",");
274                        }
275                    }
276                
277                    return clString;
278                }
279                
280 kumpf     1.21 AcceptLanguages LanguageParser::getDefaultAcceptLanguages()
281                {
282                #if defined(PEGASUS_HAS_MESSAGES) && defined(PEGASUS_HAS_ICU)
283                    Locale default_loc = Locale::getDefault();
284                
285                # ifdef PEGASUS_OS_OS400
286                    char* tmp = (char*)default_loc.getName();
287                    char tmp_[100];
288                    EtoA(strcpy(tmp_,tmp));
289                    try
290                    {
291 kumpf     1.22         return LanguageParser::parseAcceptLanguageHeader(tmp_);
292 kumpf     1.21     }
293                # else
294                    try
295                    {
296 kumpf     1.22         return LanguageParser::parseAcceptLanguageHeader(default_loc.getName());
297 kumpf     1.21     }
298                # endif
299                    catch (const InvalidAcceptLanguageHeader& e)
300                    {
301                        Logger::put_l(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
302                           "src.Server.cimserver.FAILED_TO_GET_PROCESS_LOCALE",
303                           "Could not convert the system locale to a valid accept-language "
304                               "format");
305                        Logger::put(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
306                            e.getMessage());
307 kumpf     1.22         AcceptLanguages al;
308                        al.insert(LanguageTag("*"), 1);
309                        return al;
310 kumpf     1.21     }
311 kumpf     1.22 #else
312                    return AcceptLanguages();
313 kumpf     1.21 #endif
314                }
315                
316                void LanguageParser::_parseLanguageHeader(
317                    const String& languageHeaderValue,
318                    Array<String>& languageElements)
319                {
320                    PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageHeader");
321                
322                    languageElements.clear();
323                    String element;
324                
325                    for (Uint32 i=0, len=languageHeaderValue.size(); i<len; i++)
326                    {
327                        Char16 nextChar = languageHeaderValue[i];
328                
329                        if (isascii(nextChar) && isspace(nextChar))
330                        {
331                            // Ignore whitespace
332                        }
333                        else if (nextChar == '(')
334 kumpf     1.21         {
335                            // Ignore comments
336                            while (i < len)
337                            {
338                                // Search for the closing parenthesis
339                                if (languageHeaderValue[i] == ')')
340                                {
341                                    break;
342                                }
343                
344                                // Skip over escape characters
345                                if (languageHeaderValue[i] == '\\')
346                                {
347                                    i++;
348                                }
349                
350                                i++;
351                            }
352                
353                            // Check for a missing closing parenthesis
354                            if (i >= len)
355 kumpf     1.21             {
356                                MessageLoaderParms parms(
357                                    "Common.LanguageParser.DOES_NOT_CONTAIN_TERMINATING",
358                                    "Closing \")\" character is missing.");
359                                throw Exception(MessageLoader::getMessage(parms));
360                            }
361                        }
362                        else if (nextChar == ',')
363                        {
364                            // Check for the end of the element
365                            languageElements.append(element);
366                            element.clear();
367                        }
368                        else
369                        {
370                            // Unescape an escape character
371                            if ((nextChar == '\\') && (i < len-1))
372                            {
373                                nextChar = languageHeaderValue[++i];
374                            }
375                
376 kumpf     1.21             // Include this character in the value
377                            element.append(nextChar);
378                        }
379                    }
380                
381                    // Include the last element in the languageElements array
382                    languageElements.append(element);
383                
384                    PEG_METHOD_EXIT();
385                }
386                
387                void LanguageParser::_parseAcceptLanguageElement(
388                    const String& acceptLanguageElement,
389                    String& languageTag,
390                    Real32& quality)
391                {
392                    PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseAcceptLanguageElement");
393                
394                    // look for ';' in acceptLanguageElement, that means we have a
395                    // quality value to capture.  If not present, we only have a language tag.
396                
397 kumpf     1.21     Uint32 semicolonIndex = acceptLanguageElement.find(";");
398                    if (semicolonIndex != PEG_NOT_FOUND)
399                    {
400                        // Separate the language tag and quality value
401                
402                        String qualityString =
403                            acceptLanguageElement.subString(semicolonIndex+1);
404                        languageTag = acceptLanguageElement.subString(0, semicolonIndex);
405                
406                        // Parse the quality value
407 humberto  1.8  
408 kumpf     1.21         char dummyChar;
409                        int scanfConversions = sscanf(
410                            qualityString.getCString(),
411                            "q=%f%c", &quality, &dummyChar);
412                
413                        if ((scanfConversions != 1) ||
414                            (qualityString.size() > 7))
415                        {
416                            MessageLoaderParms parms(
417                                "Common.LanguageParser.INVALID_QUALITY_VALUE",
418                                "AcceptLanguage contains an invalid quality value");
419                            PEG_METHOD_EXIT();
420                            throw Exception(MessageLoader::getMessage(parms));
421                        }
422                    }
423                    else
424                    {
425                        languageTag = acceptLanguageElement;
426                        quality = 1.0;
427                    }
428                
429 kumpf     1.21     PEG_METHOD_EXIT();
430                }
431                
432                void LanguageParser::_parseLanguageSubtags(
433                    Array<String>& subtags,
434                    const String& languageTagString)
435                {
436                    PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageSubtags");
437                
438                    // Parse the language tag into subtags
439                
440                    Uint32 subtagIndex = 0;
441                    Uint32 separatorIndex;
442                    while ((separatorIndex = languageTagString.find(
443                                subtagIndex, LANGUAGE_TAG_SEPARATOR_CHAR)) != PEG_NOT_FOUND)
444                    {
445                        subtags.append(languageTagString.subString(
446                            subtagIndex, separatorIndex - subtagIndex));
447                        subtagIndex = separatorIndex + 1;
448                    }
449                    subtags.append(languageTagString.subString(subtagIndex));
450 kumpf     1.21 
451                    // Validate the syntax of each of the subtags
452                
453                    for (Uint32 i = 0, n = subtags.size(); i < n; i++)
454                    {
455                        if (((i == 0) && !_isValidPrimarySubtagSyntax(subtags[i])) ||
456                            ((i > 0) && !_isValidSubtagSyntax(subtags[i])))
457                        {
458                            MessageLoaderParms parms(
459                                "Common.LanguageParser.MALFORMED_LANGUAGE_TAG",
460                                "Malformed language tag \"$0\".", languageTagString);
461                            PEG_METHOD_EXIT();
462                            throw Exception(MessageLoader::getMessage(parms));
463                        }
464                    }
465                
466                    PEG_METHOD_EXIT();
467 humberto  1.8  }
468                
469 kumpf     1.21 Boolean LanguageParser::_isValidPrimarySubtagSyntax(const String& subtag)
470                {
471                    if ((subtag.size() == 0) || (subtag.size() > 8))
472                    {
473                        return false;
474                    }
475                
476                    for (Uint32 i = 0, n = subtag.size(); i < n; i++)
477                    {
478                        if (!(isascii(subtag[i]) && isalpha(subtag[i])))
479                        {
480                            return false;
481                        }
482                    }
483                
484                    return true;
485                }
486                
487                Boolean LanguageParser::_isValidSubtagSyntax(const String& subtag)
488                {
489                    if ((subtag.size() == 0) || (subtag.size() > 8))
490 kumpf     1.21     {
491                        return false;
492                    }
493                
494                    for (Uint32 i = 0, n = subtag.size(); i < n; i++)
495                    {
496                        if (!(isascii(subtag[i]) && isalnum(subtag[i])))
497                        {
498                            return false;
499                        }
500                    }
501                
502                    return true;
503                }
504 humberto  1.8  
505                PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2