(file) Return to LanguageParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 karl  1.25 //%2006////////////////////////////////////////////////////////////////////////
  2 humberto 1.8  //
  3 karl     1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
  4               // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
  5               // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
  6 karl     1.9  // IBM Corp.; EMC Corporation, The Open Group.
  7 karl     1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
  8               // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
  9 karl     1.11 // Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 10               // EMC Corporation; VERITAS Software Corporation; The Open Group.
 11 karl     1.25 // Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
 12               // EMC Corporation; Symantec Corporation; The Open Group.
 13 humberto 1.8  //
 14               // Permission is hereby granted, free of charge, to any person obtaining a copy
 15               // of this software and associated documentation files (the "Software"), to
 16               // deal in the Software without restriction, including without limitation the
 17               // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 18               // sell copies of the Software, and to permit persons to whom the Software is
 19               // furnished to do so, subject to the following conditions:
 20               // 
 21               // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 22               // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 23               // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 24               // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 25               // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 26               // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 27               // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 28               // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 29               //
 30               //==============================================================================
 31               //
 32               // Author: Humberto Rivero (hurivero@us.ibm.com)
 33               //
 34 aruran.ms 1.15 // Modified By: Aruran, IBM (ashanmug@in.ibm.com) for Bug# 3697, 3698, 3699, 3700
 35 aruran.ms 1.19 //              Aruran, IBM (ashanmug@in.ibm.com) for Bug# 3701, 3702, 3703, 3704
 36 kumpf     1.21 //              Roger Kumpf, Hewlett-Packard Company (roger_kumpf@hp.com)
 37 humberto  1.8  //
 38                //%/////////////////////////////////////////////////////////////////////////////
 39                
 40                #include <Pegasus/Common/LanguageParser.h>
 41                #include <Pegasus/Common/InternalException.h>
 42                #include <Pegasus/Common/Tracer.h>
 43                #include <Pegasus/Common/MessageLoader.h> //l10n
 44                #include <cstring>
 45                
 46 kumpf     1.21 #ifdef PEGASUS_HAS_ICU
 47                # include <unicode/locid.h>
 48                #endif
 49                #if defined(PEGASUS_OS_OS400)
 50                # include "OS400ConvertChar.h"
 51                #endif
 52                
 53 humberto  1.8  PEGASUS_NAMESPACE_BEGIN
 54                
 55 kumpf     1.21 static char LANGUAGE_TAG_SEPARATOR_CHAR = '-';
 56                
 57 kumpf     1.23 AcceptLanguageList LanguageParser::parseAcceptLanguageHeader(
 58 kumpf     1.21     const String& acceptLanguageHeader)
 59                {
 60                    PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseAcceptLanguageHeader");
 61                
 62 kumpf     1.23     AcceptLanguageList acceptLanguages;
 63 kumpf     1.21 
 64                    try
 65                    {
 66                        Array<String> languageElements;
 67                        LanguageParser::_parseLanguageHeader(
 68                            acceptLanguageHeader,
 69                            languageElements);
 70                
 71                        for (Uint32 i = 0; i < languageElements.size(); i++)
 72                        {
 73                            String languageTagString;
 74                            Real32 qualityValue;
 75                            LanguageParser::_parseAcceptLanguageElement(
 76                                languageElements[i], languageTagString, qualityValue);
 77                            acceptLanguages.insert(LanguageTag(languageTagString), qualityValue);
 78                        }
 79                    }
 80                    catch (Exception& e)
 81                    {
 82                        throw InvalidAcceptLanguageHeader(e.getMessage());
 83                    }
 84 kumpf     1.21 
 85                    PEG_METHOD_EXIT();
 86                    return acceptLanguages;
 87                }
 88                
 89 kumpf     1.23 ContentLanguageList LanguageParser::parseContentLanguageHeader(
 90 kumpf     1.21     const String& contentLanguageHeader)
 91                {
 92                    PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseContentLanguageHeader");
 93                
 94 kumpf     1.23     ContentLanguageList contentLanguages;
 95 kumpf     1.21 
 96                    try
 97                    {
 98                        Array<String> languageElements;
 99                        LanguageParser::_parseLanguageHeader(
100                            contentLanguageHeader,
101                            languageElements);
102                
103                        for (Uint32 i = 0; i < languageElements.size(); i++)
104                        {
105                            contentLanguages.append(LanguageTag(languageElements[i]));
106                        }
107                    }
108                    catch (Exception& e)
109                    {
110                        throw InvalidContentLanguageHeader(e.getMessage());
111                    }
112                
113                    PEG_METHOD_EXIT();
114                    return contentLanguages;
115                }
116 kumpf     1.21 
117                void LanguageParser::parseLanguageTag(
118 kumpf     1.24     const String& languageTagString,
119 kumpf     1.21     String& language,
120                    String& country,
121                    String& variant)
122                {
123                    PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseLanguageTag");
124                
125                    language.clear();
126                    country.clear();
127                    variant.clear();
128                
129                    if (languageTagString == "*")
130                    {
131                        // Parsing and validation is complete
132                        PEG_METHOD_EXIT();
133                        return;
134                    }
135                
136                    Boolean isStandardFormat = true;    // RFC 3066 (ISO 639, ISO 3166)
137                    Array<String> subtags;
138                
139                    _parseLanguageSubtags(subtags, languageTagString);
140 kumpf     1.21 
141                    // _parseLanguageSubtags() always returns at least one subtag.
142                    PEGASUS_ASSERT(subtags.size() > 0);
143                
144                    // Validate the primary subtag.
145                    // Given a languageTagString "en-US-mn" the language is "en".
146                
147                    language = subtags[0];
148                
149                    if ((language == "i") || (language == "x"))
150                    {
151                        // These primary tags are allowed, but are not ISO 639 compliant
152                        isStandardFormat = false;
153                        language.clear();
154                    }
155                    else if ((language.size() != 2) && (language.size() != 3))
156                    {
157                        // Except for "i" and "x", primary tags must be 2 or 3 characters,
158                        // according to RFC 3066.
159                        MessageLoaderParms parms(
160                            "Common.LanguageParser.INVALID_LANGUAGE_TAG",
161 kumpf     1.21             "Invalid language tag \"$0\".", languageTagString);
162                        PEG_METHOD_EXIT();
163                        throw Exception(MessageLoader::getMessage(parms));
164                    }
165                
166                    if (subtags.size() == 1)
167                    {
168                        // If only the primary subtag is present, we are done!
169                        PEG_METHOD_EXIT();
170                        return;
171                    }
172                
173                    // Validate the second subtag.
174                    // Given a languageTagString "en-US-mn" the country is "US".
175                
176                    if (subtags[1].size() == 1)
177                    {
178                        // The second subtag may not be a single character according to
179                        // RFC 3066.
180                        MessageLoaderParms parms(
181                            "Common.LanguageParser.INVALID_LANGUAGE_TAG",
182 kumpf     1.21             "Invalid language tag \"$0\".", languageTagString);
183                        PEG_METHOD_EXIT();
184                        throw Exception(MessageLoader::getMessage(parms));
185                    }
186                
187                    if (isStandardFormat)
188                    {
189                        Uint32 variantIndex = 1;
190                
191                        if (subtags[1].size() == 2)
192                        {
193                            country = subtags[1];
194                            variantIndex = 2;
195                        }
196                
197                        Uint32 numSubtags = subtags.size();
198                
199                        if (variantIndex < numSubtags)
200                        {
201                            variant = subtags[variantIndex++];
202                
203 kumpf     1.21             while (variantIndex < numSubtags)
204                            {
205                                variant.append(LANGUAGE_TAG_SEPARATOR_CHAR);
206                                variant.append(subtags[variantIndex++]);
207                            }
208                        }
209                    }
210                
211                    PEG_METHOD_EXIT();
212                }
213                
214                void LanguageParser::validateQualityValue(Real32 quality)
215                {
216                    if ((quality > 1.0) || (quality < 0.0))
217                    {
218                        MessageLoaderParms parms(
219                            "Common.LanguageParser.INVALID_QUALITY_VALUE",
220                            "AcceptLanguage contains an invalid quality value");
221                        throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
222                    }
223                }
224 kumpf     1.21 
225                String LanguageParser::buildAcceptLanguageHeader(
226 kumpf     1.23     const AcceptLanguageList& acceptLanguages)
227 kumpf     1.21 {
228                    String alString;
229                    Uint32 numAcceptLanguages = acceptLanguages.size();
230                
231                    for (Uint32 i = 0; i < numAcceptLanguages; i++)
232                    {
233                        alString.append(acceptLanguages.getLanguageTag(i).toString());
234                
235                        Real32 q = acceptLanguages.getQualityValue(i);
236                        if (q != 1.0)
237                        {
238                            char qValueString[6];
239                            sprintf(qValueString, "%4.3f", q);
240                            alString.append(";q=");
241                            alString.append(qValueString);
242                        }
243                
244                        if (i < numAcceptLanguages - 1)
245                        {
246                            alString.append(",");
247                        }
248 kumpf     1.21     }
249                
250                    return alString;
251                }
252                
253                String LanguageParser::buildContentLanguageHeader(
254 kumpf     1.23     const ContentLanguageList& contentLanguages)
255 kumpf     1.21 {
256                    String clString;
257                    Uint32 numContentLanguages = contentLanguages.size();
258                
259                    for (Uint32 i = 0; i < numContentLanguages; i++)
260                    {
261                        clString.append(contentLanguages.getLanguageTag(i).toString());
262                
263                        if (i < numContentLanguages - 1)
264                        {
265                            clString.append(",");
266                        }
267                    }
268                
269                    return clString;
270                }
271                
272 kumpf     1.23 AcceptLanguageList LanguageParser::getDefaultAcceptLanguages()
273 kumpf     1.21 {
274                #if defined(PEGASUS_HAS_MESSAGES) && defined(PEGASUS_HAS_ICU)
275                    Locale default_loc = Locale::getDefault();
276                
277                # ifdef PEGASUS_OS_OS400
278                    char* tmp = (char*)default_loc.getName();
279                    char tmp_[100];
280                    EtoA(strcpy(tmp_,tmp));
281                    try
282                    {
283 kumpf     1.22         return LanguageParser::parseAcceptLanguageHeader(tmp_);
284 kumpf     1.21     }
285                # else
286                    try
287                    {
288 kumpf     1.22         return LanguageParser::parseAcceptLanguageHeader(default_loc.getName());
289 kumpf     1.21     }
290                # endif
291                    catch (const InvalidAcceptLanguageHeader& e)
292                    {
293                        Logger::put_l(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
294                           "src.Server.cimserver.FAILED_TO_GET_PROCESS_LOCALE",
295                           "Could not convert the system locale to a valid accept-language "
296                               "format");
297                        Logger::put(Logger::ERROR_LOG, System::CIMSERVER, Logger::SEVERE,
298                            e.getMessage());
299 kumpf     1.23         AcceptLanguageList al;
300 kumpf     1.22         al.insert(LanguageTag("*"), 1);
301                        return al;
302 kumpf     1.21     }
303 kumpf     1.22 #else
304 kumpf     1.23     return AcceptLanguageList();
305 kumpf     1.21 #endif
306                }
307                
308                void LanguageParser::_parseLanguageHeader(
309                    const String& languageHeaderValue,
310                    Array<String>& languageElements)
311                {
312                    PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageHeader");
313                
314                    languageElements.clear();
315                    String element;
316                
317                    for (Uint32 i=0, len=languageHeaderValue.size(); i<len; i++)
318                    {
319                        Char16 nextChar = languageHeaderValue[i];
320                
321                        if (isascii(nextChar) && isspace(nextChar))
322                        {
323                            // Ignore whitespace
324                        }
325                        else if (nextChar == '(')
326 kumpf     1.21         {
327                            // Ignore comments
328                            while (i < len)
329                            {
330                                // Search for the closing parenthesis
331                                if (languageHeaderValue[i] == ')')
332                                {
333                                    break;
334                                }
335                
336                                // Skip over escape characters
337                                if (languageHeaderValue[i] == '\\')
338                                {
339                                    i++;
340                                }
341                
342                                i++;
343                            }
344                
345                            // Check for a missing closing parenthesis
346                            if (i >= len)
347 kumpf     1.21             {
348                                MessageLoaderParms parms(
349                                    "Common.LanguageParser.DOES_NOT_CONTAIN_TERMINATING",
350                                    "Closing \")\" character is missing.");
351                                throw Exception(MessageLoader::getMessage(parms));
352                            }
353                        }
354                        else if (nextChar == ',')
355                        {
356                            // Check for the end of the element
357                            languageElements.append(element);
358                            element.clear();
359                        }
360                        else
361                        {
362                            // Unescape an escape character
363                            if ((nextChar == '\\') && (i < len-1))
364                            {
365                                nextChar = languageHeaderValue[++i];
366                            }
367                
368 kumpf     1.21             // Include this character in the value
369                            element.append(nextChar);
370                        }
371                    }
372                
373                    // Include the last element in the languageElements array
374                    languageElements.append(element);
375                
376                    PEG_METHOD_EXIT();
377                }
378                
379                void LanguageParser::_parseAcceptLanguageElement(
380                    const String& acceptLanguageElement,
381                    String& languageTag,
382                    Real32& quality)
383                {
384                    PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseAcceptLanguageElement");
385                
386                    // look for ';' in acceptLanguageElement, that means we have a
387                    // quality value to capture.  If not present, we only have a language tag.
388                
389 kumpf     1.21     Uint32 semicolonIndex = acceptLanguageElement.find(";");
390                    if (semicolonIndex != PEG_NOT_FOUND)
391                    {
392                        // Separate the language tag and quality value
393                
394                        String qualityString =
395                            acceptLanguageElement.subString(semicolonIndex+1);
396                        languageTag = acceptLanguageElement.subString(0, semicolonIndex);
397                
398                        // Parse the quality value
399 humberto  1.8  
400 kumpf     1.21         char dummyChar;
401                        int scanfConversions = sscanf(
402                            qualityString.getCString(),
403                            "q=%f%c", &quality, &dummyChar);
404                
405                        if ((scanfConversions != 1) ||
406                            (qualityString.size() > 7))
407                        {
408                            MessageLoaderParms parms(
409                                "Common.LanguageParser.INVALID_QUALITY_VALUE",
410                                "AcceptLanguage contains an invalid quality value");
411                            PEG_METHOD_EXIT();
412                            throw Exception(MessageLoader::getMessage(parms));
413                        }
414                    }
415                    else
416                    {
417                        languageTag = acceptLanguageElement;
418                        quality = 1.0;
419                    }
420                
421 kumpf     1.21     PEG_METHOD_EXIT();
422                }
423                
424                void LanguageParser::_parseLanguageSubtags(
425                    Array<String>& subtags,
426                    const String& languageTagString)
427                {
428                    PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::_parseLanguageSubtags");
429                
430                    // Parse the language tag into subtags
431                
432                    Uint32 subtagIndex = 0;
433                    Uint32 separatorIndex;
434                    while ((separatorIndex = languageTagString.find(
435                                subtagIndex, LANGUAGE_TAG_SEPARATOR_CHAR)) != PEG_NOT_FOUND)
436                    {
437                        subtags.append(languageTagString.subString(
438                            subtagIndex, separatorIndex - subtagIndex));
439                        subtagIndex = separatorIndex + 1;
440                    }
441                    subtags.append(languageTagString.subString(subtagIndex));
442 kumpf     1.21 
443                    // Validate the syntax of each of the subtags
444                
445                    for (Uint32 i = 0, n = subtags.size(); i < n; i++)
446                    {
447                        if (((i == 0) && !_isValidPrimarySubtagSyntax(subtags[i])) ||
448                            ((i > 0) && !_isValidSubtagSyntax(subtags[i])))
449                        {
450                            MessageLoaderParms parms(
451                                "Common.LanguageParser.MALFORMED_LANGUAGE_TAG",
452                                "Malformed language tag \"$0\".", languageTagString);
453                            PEG_METHOD_EXIT();
454                            throw Exception(MessageLoader::getMessage(parms));
455                        }
456                    }
457                
458                    PEG_METHOD_EXIT();
459 humberto  1.8  }
460                
461 kumpf     1.21 Boolean LanguageParser::_isValidPrimarySubtagSyntax(const String& subtag)
462                {
463                    if ((subtag.size() == 0) || (subtag.size() > 8))
464                    {
465                        return false;
466                    }
467                
468                    for (Uint32 i = 0, n = subtag.size(); i < n; i++)
469                    {
470                        if (!(isascii(subtag[i]) && isalpha(subtag[i])))
471                        {
472                            return false;
473                        }
474                    }
475                
476                    return true;
477                }
478                
479                Boolean LanguageParser::_isValidSubtagSyntax(const String& subtag)
480                {
481                    if ((subtag.size() == 0) || (subtag.size() > 8))
482 kumpf     1.21     {
483                        return false;
484                    }
485                
486                    for (Uint32 i = 0, n = subtag.size(); i < n; i++)
487                    {
488                        if (!(isascii(subtag[i]) && isalnum(subtag[i])))
489                        {
490                            return false;
491                        }
492                    }
493                
494                    return true;
495                }
496 humberto  1.8  
497                PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2