pegasus/src/Pegasus/Common/CommonUTF.cpp - view

Return to CommonUTF.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

File: [Pegasus] / pegasus / src / Pegasus / Common / CommonUTF.cpp (download)
Revision: 1.17.2.1, Fri Feb 10 16:09:35 2006 UTC (18 years, 4 months ago) by a.dunfey
Branch: PEP233_EmbeddedInstSupport-branch
Changes since 1.17: +4 -2 lines

PEP#: 233
TITLE: Updated Embedded Instance Support branch

DESCRIPTION:

- Merged with latest code from 2.5.1

- Modified ObjectNormalizer to use a CIMOMHandle instead of the repository and to normalize the instances in an embedded instance property.

//%2006////////////////////////////////////////////////////////////////////////
//
// Copyright (c) 2000, 2001, 2002 BMC Software; Hewlett-Packard Development
// Company, L.P.; IBM Corp.; The Open Group; Tivoli Systems.
// Copyright (c) 2003 BMC Software; Hewlett-Packard Development Company, L.P.;
// IBM Corp.; EMC Corporation, The Open Group.
// Copyright (c) 2004 BMC Software; Hewlett-Packard Development Company, L.P.;
// IBM Corp.; EMC Corporation; VERITAS Software Corporation; The Open Group.
// Copyright (c) 2005 Hewlett-Packard Development Company, L.P.; IBM Corp.;
// EMC Corporation; VERITAS Software Corporation; The Open Group.
// Copyright (c) 2006 Hewlett-Packard Development Company, L.P.; IBM Corp.;
// EMC Corporation; Symantec Corporation; The Open Group.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// 
// THE ABOVE COPYRIGHT NOTICE AND THIS PERMISSION NOTICE SHALL BE INCLUDED IN
// ALL COPIES OR SUBSTANTIAL PORTIONS OF THE SOFTWARE. THE SOFTWARE IS PROVIDED
// "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
// LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
// PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
//==============================================================================
//
// Author: Dave Rosckes   (rosckes@us.ibm.com)
//
// Modified By: David Dillard, VERITAS Software Corp.
//                  (david.dillard@veritas.com)
//              Yi Zhou, Hewlett-Packard Company (yi.zhou@hp.com)
//
//%/////////////////////////////////////////////////////////////////////////////

#include <Pegasus/Common/Config.h>
#include <Pegasus/Common/Array.h>
#include <Pegasus/Common/Logger.h>
#include "CommonUTF.h"
#include <cstdio>
#include <cstring>
#include <cctype>

#ifdef PEGASUS_HAS_ICU
#include <unicode/uclean.h>
#endif

PEGASUS_NAMESPACE_BEGIN


inline Uint8 _hexCharToNumeric(Char16 c)
{
    Uint8 n;

    if (isdigit(c))
        n = (c - '0');
    else if (isupper(c))
        n = (c - 'A' + 10);
    else // if (islower(c))
        n = (c - 'a' + 10);

    return n;
}

// Note: Caller must ensure that "src" contains "size" bytes.
Boolean isValid_U8(const Uint8 *src, int size)
{
    Uint8 U8_char;
    const Uint8 *srcptr = src+size;
    switch (size)
    {
    case 4:
        if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
        {
        return false;
        }
    case 3:
        if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
        {
        return false;
        }
    case 2:
        if ((U8_char = (*--srcptr)) > 0xBF)
        {
        return false;
        }
        switch (*src)
        {
        case 0xE0:
            if (U8_char < 0xA0)
            {
            return false;
            }
            break;
        case 0xF0:
            if (U8_char < 0x90)
            {
            return false;
            }
            break;
        case 0xF4:
            if (U8_char > 0x8F)
            {
            return false;
            }
            break;
        default:
            if (U8_char < 0x80)
            {
            return false;
            }
        }
    case 1:
        if (*src >= 0x80 && *src < 0xC2)
        {
        return false;
        }
        if (*src > 0xF4)
        {
        return false;
        }
        break;
        default:
        {
        return false;
            }

    }
    return true;
}

int UTF16toUTF8(const Uint16** srcHead,
        const Uint16* srcEnd,
        Uint8** tgtHead,
        Uint8* tgtEnd)
{
    int returnCode = 0;
    const Uint16* src = *srcHead;
    Uint8* tgt = *tgtHead;
    while (src < srcEnd)
    {
	if (*src < 128)
	{
	    if (tgt == tgtEnd)
	    {
		returnCode = -1;
		break;
	    }

	    *tgt++ = *src++;
	    continue;
	}

    Uint32 tempchar;
    Uint16 numberOfBytes = 0;
    const Uint16* oldsrc = src;
    tempchar = *src++;
    if (tempchar >= FIRST_HIGH_SURROGATE
        && tempchar <= LAST_HIGH_SURROGATE)
    {
        if (src < srcEnd)
        {
        Uint32 tempchar2 = *src;
        if (tempchar2 >= FIRST_LOW_SURROGATE &&
            tempchar2 <= LAST_LOW_SURROGATE)
        {
            tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift)
              + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase;
            ++src;
        }
        }
        else
        {
        --src;
        returnCode = -1;
        break;
        }
    }
    if (tempchar < (Uint32)0x80)
    {
        numberOfBytes = 1;
    }
    else if (tempchar < (Uint32)0x800)
    {
        numberOfBytes = 2;
    }
    else if (tempchar < (Uint32)0x10000)
    {
        numberOfBytes = 3;
    }
    else if (tempchar < (Uint32)0x200000)
    {
        numberOfBytes = 4;
    }
    else
    {
        numberOfBytes = 2;
        tempchar = REPLACEMENT_CHARACTER;
    }

    tgt += numberOfBytes;
    if (tgt > tgtEnd)
    {
        src = oldsrc;
        tgt -= numberOfBytes;
        returnCode = -1;
        break;
    }

    switch (numberOfBytes)
    {
        case 4:
        *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
        tempchar >>= 6;
        case 3:
        *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
        tempchar >>= 6;
        case 2:
        *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
        tempchar >>= 6;
        case 1:
        *--tgt =  (Uint8)(tempchar | firstByteMark[numberOfBytes]);
    }
    tgt += numberOfBytes;
    }
    *srcHead = src;
    *tgtHead = tgt;
    return returnCode;
}

int UTF8toUTF16 (const Uint8** srcHead,
         const Uint8* srcEnd,
         Uint16** tgtHead,
         Uint16* tgtEnd)
{
    int returnCode = 0;
    const Uint8* src = *srcHead;
    Uint16* tgt = *tgtHead;
    while (src < srcEnd)
    {
    Uint32 tempchar = 0;
    Uint16 moreBytes = trailingBytesForUTF8[*src];
    if (src + moreBytes >= srcEnd)
    {
        returnCode = -1;
        break;
    }
    switch (moreBytes)
    {
        case 3:
        tempchar += *src++;
        tempchar <<= 6;
        case 2:
        tempchar += *src++;
        tempchar <<= 6;
        case 1:
        tempchar += *src++;
        tempchar <<= 6;
        case 0:
        tempchar += *src++;
    }
    tempchar -= offsetsFromUTF8[moreBytes];

    if (tgt >= tgtEnd)
    {
        src -= (moreBytes+1);
        returnCode = -1; break;
    }
    if (tempchar <= MAX_BYTE)
    {
        if ((tempchar >= FIRST_HIGH_SURROGATE &&
         tempchar <= LAST_LOW_SURROGATE) ||
        ((tempchar & 0xFFFE) == 0xFFFE))
        {
        *tgt++ = REPLACEMENT_CHARACTER;
        }
        else
        {
        *tgt++ = (Uint16)tempchar;
        }
    }
    else if (tempchar > MAX_UTF16)
    {
        *tgt++ = REPLACEMENT_CHARACTER;
    }
    else
    {
        if (tgt + 1 >= tgtEnd)
        {
        src -= (moreBytes+1);
        returnCode = -1;
        break;
        }
        tempchar -= halfBase;
        *tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE);
        *tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE);
    }
    }
    *srcHead = src;
    *tgtHead = tgt;
    return returnCode;
}

Boolean isUTF8Aux(const char *legal)
{
    char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1;

    // Validate that the string is long enough to hold all the expected bytes.
    // Note that if legal[0] == 0, numBytes will be 1.
    for (char i=1; i<numBytes; i++)
    {
        if (legal[i] == 0)
        {
            return false;
        }
    }

    return (isValid_U8((const Uint8 *)legal, numBytes));
}

Boolean isUTF8Str(const char *legal)
{
    /*char tmp[] = {0xCE,0x99,0xCE,0xBF,0xCF,0x8D,0xCE,0xBD,0xCE,
                      0xB9,0xCE,0xBA,0xCE,0xBF,0xCE,0xBD,0xCF,0x84,
                      0x00};*/
//  char tmp_[] = "class";
//  char * tmp = legal;
    size_t count = 0;
    const size_t size = strlen(legal);
//  printf("size = %d\n",size);
    while(count<size)
    {
//      printf("count = %d\n",count);
        if(isUTF8(&legal[count]) == true){
            UTF8_NEXT(legal,count);
        }else{
//          printf("bad string\n");
            return false;
        }
    }
//  printf("good string\n");
    return true;
/*
    printf("legal = %s\n\n", legal);
    Uint32 count = 0;
    Uint32 trailingBytes = 0;
        Uint32 size = strlen(legal);
    printf("size of legal is %d\n",size);
        while(count<size-1)
        {
        printf("count = %d\n", count);
                if(isUTF8((char*)&legal[count]) == true){
                    UTF8_NEXT(legal,trailingBytes);
            count += trailingBytes;
        } else{
            printf("CommonUTF8:: returning false; position[%d]",count);
             return false;
        }
        }
     printf("CommonUTF8:: returning false; position[%d]",count);
    return true;*/
}

String escapeStringEncoder(const String& Str)
{
    String escapeStr;
    Uint16 escChar;
    char hexencoding[6];

    for(Uint32 i = 0; i < Str.size(); ++i)
    {
    escChar = Str[i];
    if(escChar <= 0x7F)
        {
        escapeStr.append(escChar);
        }
    else
    {
        memset(hexencoding,0x00,sizeof(hexencoding));
            sprintf(hexencoding, "%%%03X%X", escChar/16, escChar%16);
            escapeStr.append(hexencoding);
    }
    }
    return(escapeStr);
}

String escapeStringDecoder(const String& Str)
{
    Uint32 i;

    Array<Uint16> utf16Chars;

    for (i=0; i< Str.size(); ++i)
    {
        if (Str[i] == '%')
        {
            Uint8 digit1 = _hexCharToNumeric((Str[++i]));
            Uint8 digit2 = _hexCharToNumeric((Str[++i]));
            Uint8 digit3 = _hexCharToNumeric((Str[++i]));
            Uint8 digit4 = _hexCharToNumeric((Str[++i]));

        Uint16 decodedChar = (digit1<<12) + (digit2<<8) +
                                 (digit3<< 4) + (digit4);

            utf16Chars.append(decodedChar);
        }
        else
        {
            utf16Chars.append((Uint16)Str[i]);
        }
    }

    // If there was a string to decode...
    if (Str.size() > 0)
    {
        utf16Chars.append('\0');
        return String((Char16 *)utf16Chars.getData());
    }
    else
    {
        return String();
    }
}

#ifdef PEGASUS_HAS_ICU

Boolean InitializeICU::_initAttempted = false;
Boolean InitializeICU::_initSuccessful = false;
Mutex InitializeICU::_initMutex;

Boolean InitializeICU::initICUSuccessful()
{
    if (!_initAttempted)
    {
	{
            AutoMutex lock(_initMutex);

	    if (!_initAttempted)
	    {
                UErrorCode _status = U_ZERO_ERROR;

		// Initialize ICU
                u_init(&_status);

                if (U_FAILURE(_status))
                {
                    _initSuccessful = false;
                    Logger::put (Logger::STANDARD_LOG , System::CIMSERVER,
				 Logger::WARNING,
                                 "ICU initialization failed with error: $0.", 
				 _status);
                }
                else
                {
                    _initSuccessful = true;
                }
                _initAttempted = true;
	    }
	}
    }

    return _initSuccessful;
}

#endif

PEGASUS_NAMESPACE_END

No CVS admin address has been configured