(file) Return to LanguageParser.cpp CVS log (file) (dir) Up to [Pegasus] / pegasus / src / Pegasus / Common

  1 karl  1.10 //%2004////////////////////////////////////////////////////////////////////////
  2 humberto 1.8  //
  3 karl     1.10 // Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
  4               // Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
  5               // Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
  6 karl     1.9  // IBM Corp.; EMC Corporation, The Open Group.
  7 karl     1.10 // Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
  8               // IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
  9 humberto 1.8  //
 10               // Permission is hereby granted, free of charge, to any person obtaining a copy
 11               // of this software and associated documentation files (the "Software"), to
 12               // deal in the Software without restriction, including without limitation the
 13               // rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
 14               // sell copies of the Software, and to permit persons to whom the Software is
 15               // furnished to do so, subject to the following conditions:
 16               // 
 17               // THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
 18               // ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
 19               // "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
 20               // LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 21               // PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 22               // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 23               // ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 24               // WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 25               //
 26               //==============================================================================
 27               //
 28               // Author: Humberto Rivero (hurivero@us.ibm.com)
 29               //
 30 humberto 1.8  // Modified By:
 31               //
 32               //%/////////////////////////////////////////////////////////////////////////////
 33               
 34               #include <Pegasus/Common/LanguageParser.h>
 35               #include <Pegasus/Common/InternalException.h>
 36               #include <Pegasus/Common/Tracer.h>
 37               #include <Pegasus/Common/MessageLoader.h> //l10n
 38               #include <cstring>
 39               
 40               //PEGASUS_USING_STD;
 41               PEGASUS_NAMESPACE_BEGIN
 42               
 43               const LanguageParser LanguageParser::EMPTY = LanguageParser();
 44               
 45               void LanguageParser::parseHdr(Array<String> &values, String & hdr){
 46               	// look for ',' which designates distict (Accept/Content)-Language fields
 47               	// the form: [languagetag, languagetag, languagetag] so whitespace removal
 48               	// may be necessary.
 49               	// then store them in the array
 50               	PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseHdr");
 51 humberto 1.8  	Uint32 i = 0;
 52               	while( i != PEG_NOT_FOUND ){
 53               		i = hdr.find(",");
 54               		if( i != PEG_NOT_FOUND ){
 55               			values.append(hdr.subString(0,i));
 56               			while(hdr[i+1] == ' ') i++;  // get rid of whitespace after ","
 57               			hdr = hdr.subString(i+1);
 58               		}
 59               		else{  // only one field, build an object with it
 60               			values.append(hdr);
 61               		}
 62               	}
 63               	PEG_METHOD_EXIT();
 64               }
 65               
 66               Real32 LanguageParser::parseAcceptLanguageValue(String &language_tag, String & hdr){
 67               	// look for ';' in hdr, that means we have a quality value to capture
 68                   // if not, we only have a language
 69                   
 70                   // if hdr begins with "x-" then we have a non-IANA (private) language tag
 71               	 PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseAcceptLanguageValue");
 72 humberto 1.8      Uint32 i;
 73                   Boolean validate_length = true;
 74               	if((( i = hdr.find("x-")) != PEG_NOT_FOUND ) && (i == 0)){
 75               		hdr = convertPrivateLanguageTag(hdr);
 76               		validate_length = false;	
 77               	}
 78               	
 79               	// get rid of any beginning or trailing whitespaces
 80               	Uint32 j;
 81               	while( (j = hdr.find(" ")) != PEG_NOT_FOUND ){
 82               		hdr.remove(j,1);
 83               	}
 84               	
 85                   Real32 quality = 1;
 86               	i = hdr.find(";");
 87               	if(i != PEG_NOT_FOUND){ // extract and store language and quality
 88               		if(isValid(hdr.subString(0,i), validate_length)){
 89               			language_tag = hdr.subString(0,i);
 90               			if(hdr.size() > i + 3)
 91               				hdr.remove(0,i+3);  // remove everything but the quality value
 92               			else{
 93 humberto 1.8  				MessageLoaderParms parms("Common.LanguageParser.INVALID_QUALITY_VALUE",
 94               									 "AcceptLanguage contains an invalid quality value");
 95               				throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));	
 96               			}
 97               		}
 98               		else{
 99               			//l10n
100               			//throw InvalidAcceptLanguageHeader(
101               				//"AcceptLanguage contains too many characters or non-alpha characters");
102               			MessageLoaderParms parms("Common.LanguageParser.TOO_MANY_OR_NON_ALPHA_CHARACTERS_AL",
103               									 "AcceptLanguage contains too many characters or non-alpha characters");
104               			throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
105               		}
106               		//validate quality 	
107               		quality = atof(hdr.getCString());
108               		if(quality > 1.0 || quality < 0.0){
109               			//l10n
110               			//throw InvalidAcceptLanguageHeader(
111               				//"AcceptLanguage contains an invalid quality value");
112               			MessageLoaderParms parms("Common.LanguageParser.INVALID_QUALITY_VALUE",
113               									 "AcceptLanguage contains an invalid quality value");
114 humberto 1.8  			throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
115               		}
116               	}
117               	else{	// extract and store language, quality defaults to 1.0
118               		if(isValid(hdr, validate_length)) language_tag = hdr;
119               		else{ 
120               			//l10n
121               			//throw InvalidAcceptLanguageHeader(
122               				//"AcceptLanguage contains too many characters or non-alpha characters");
123               			MessageLoaderParms parms("Common.LanguageParser.TOO_MANY_OR_NON_ALPHA_CHARACTERS_AL",
124               									 "AcceptLanguage contains too many characters or non-alpha characters");
125               			throw InvalidAcceptLanguageHeader(MessageLoader::getMessage(parms));
126               		}
127               	}
128               	
129               	PEG_METHOD_EXIT();
130               	return quality;
131               }
132               
133               String LanguageParser::parseContentLanguageValue(String & hdr){
134               	// we are looking for the language part of the hdr only,
135 humberto 1.8  	// according to the RFC, there may be parenthesized strings
136               	// that describe the purpose of the language, we need to ignore those
137               	PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseContentLanguageValue");
138               	String value = hdr;
139               	Uint32 i,j;
140               	while((i = value.find("(")) != PEG_NOT_FOUND){ // get rid of anything in parenthesis in hdr if found
141               		if((j = value.find(")")) != PEG_NOT_FOUND)
142               			value.remove(i, (j-i)+1);
143               		else{
144               			//l10n
145               			  //throw InvalidContentLanguageHeader(
146               							//"ContentLanguage does not contain terminating ) character");
147               			MessageLoaderParms parms("Common.LanguageParser.DOES_NOT_CONTAIN_TERMINATING",
148               									 "ContentLanguage does not contain terminating ) character");
149               			throw InvalidContentLanguageHeader(MessageLoader::getMessage(parms));
150               		}
151               	}
152               	// get rid of any beginning or trailing whitespaces
153               	while( (i = value.find(" ")) != PEG_NOT_FOUND ){
154               		value.remove(i,1);
155               	}
156 humberto 1.8  	if(!isValid(value)){
157               		//l10n
158               		 //throw InvalidContentLanguageHeader(
159               							//"ContentLanguage contains too many characters or non-alpha characters");
160               		MessageLoaderParms parms("Common.LanguageParser.TOO_MANY_OR_NON_ALPHA_CHARACTERS_CL",
161               								 "ContentLanguage contains too many characters or non-alpha characters");
162               		throw InvalidContentLanguageHeader(MessageLoader::getMessage(parms));
163               	
164               	}
165               	PEG_METHOD_EXIT();
166               	return value;
167               }
168               
169               String LanguageParser::getLanguage(String & language_tag){
170               	// given a language_tag: en-US-mn we want to return "en"
171               	Uint32 i;
172               	if((i = language_tag.find(findSeparator(language_tag.getCString()))) != PEG_NOT_FOUND)
173               		return language_tag.subString(0,i);
174               	return String(language_tag);
175               } 
176               
177 humberto 1.8  String LanguageParser::getCountry(String & language_tag){
178               	// given a language_tag: en-US-mn we want to return "US"
179               	Uint32 i,j;
180               	if( (i = language_tag.find(findSeparator(language_tag.getCString()))) != PEG_NOT_FOUND )
181               		if( (j = language_tag.find(i+1, findSeparator(language_tag.getCString()))) != PEG_NOT_FOUND )
182               			return language_tag.subString(i+1, j-(i+1));
183               		else 
184               			return language_tag.subString(i+1);
185               	return String::EMPTY;
186               }
187               
188               String LanguageParser::getVariant(String & language_tag){
189               	// given a language_tag: en-US-mn we want to return "mn"
190               	Uint32 i;
191               	if( (i = language_tag.find(findSeparator(language_tag.getCString()))) != PEG_NOT_FOUND )
192               		if( (i = language_tag.find(i+1, findSeparator(language_tag.getCString()))) != PEG_NOT_FOUND )
193               			return language_tag.subString(i+1);
194               	return String::EMPTY;
195               }
196               
197               void LanguageParser::parseLanguageSubtags(Array<String> &subtags, String language_tag){
198 humberto 1.8      PEG_METHOD_ENTER(TRC_L10N, "LanguageParser::parseLanguageSubtags");
199               	Uint32 i;
200               	char separator = findSeparator(language_tag.getCString());
201               	while( (i = language_tag.find(Char16(separator))) != PEG_NOT_FOUND ){
202               			subtags.append(language_tag.subString(0,i));
203               			language_tag.remove(0,i + 1);		
204               	}
205               	if(language_tag.size() > 0)
206               		subtags.append(language_tag);
207               	PEG_METHOD_EXIT();
208               }
209               
210               Boolean LanguageParser::isValid(String language_tag, Boolean validate_length){
211               	//break the String down into parts(subtags), then validate each part
212               	
213               	if(language_tag == "*") return true;
214               	
215               	Array<String> subtags;
216               	parseLanguageSubtags(subtags, language_tag);
217               	if(subtags.size() > 0){ 
218               		for(Uint32 i = 0; i < subtags.size(); i++){
219 humberto 1.8  			//length should be 8 or less AND all characters should be A-Z or a-z
220               			if((validate_length && subtags[i].size() > 8) || !checkAlpha(subtags[i].getCString()))
221               				return false;
222               		}
223               	}
224               	else{ //nothing back from parseLanguageSubtags
225               		return false;
226               	}
227               	return true;
228               }
229               
230               String LanguageParser::convertPrivateLanguageTag(String language_tag){
231               	// figure out if its a unix style locale or windows locale
232               	Uint32 i;
233               	if(( i = language_tag.find("pegasus-")) != PEG_NOT_FOUND ){
234               		language_tag = language_tag.subString(i+5);  // capture the remainder of the string
235               		return String(replaceSeparator(language_tag.getCString(), '-'));
236               	}
237               	//else if( (i = language_tag.find("win-")) != PEG_NOT_FOUND ){
238               	  // return language_tag.subString(i+4);  // capture the remainder of the string
239               		// call windows ID to ICU convert routine or locmap.c function here
240 humberto 1.8  	//}
241               	else{
242               		return language_tag;
243               	}		
244               }
245               
246               Boolean LanguageParser::checkAlpha(CString _str){
247 humberto 1.7  	Uint32 length = (Uint32) strlen(_str);
248 humberto 1.8  	for(Uint32 i = 0; i < length; i++)
249               		if( !isalpha(_str[i]) )
250               			return false;
251               	return true;
252               }
253               
254               char LanguageParser::findSeparator(CString _str){
255 kumpf    1.4  	Uint32 length = (Uint32) strlen(_str);
256 humberto 1.8  	for(Uint32 i = 0; i < length; i++)
257               		if(!isalnum(_str[i]))
258               			return _str[i];
259               	return '\0';
260               }
261               
262               CString LanguageParser::replaceSeparator(CString _s, char new_sep){
263               	char * _str = const_cast<char *>((const char*)_s);
264 kumpf    1.4  	Uint32 length = (Uint32) strlen(_str);
265 humberto 1.8  	for(Uint32 i = 0; i < length; i++)
266               		_str[i] = (!isalnum(_str[i])) ? new_sep : _str[i];
267               	return (String(_str)).getCString();
268               }
269               
270               
271               PEGASUS_NAMESPACE_END

No CVS admin address has been configured
Powered by
ViewCVS 0.9.2