pegasus/src/Pegasus/Common/CommonUTF.cpp - view

Return to CommonUTF.cpp CVS log

Up to [Pegasus] / pegasus / src / Pegasus / Common

File: [Pegasus] / pegasus / src / Pegasus / Common / CommonUTF.cpp (download)
Revision: 1.26, Sun Jun 6 16:04:16 2010 UTC (14 years, 1 month ago) by marek
Branch: MAIN
CVS Tags: preBug9676, postBug9676, TASK_PEP317_1JUNE_2013, TASK-TASK_PEP362_RestfulService_branch-root, TASK-TASK_PEP362_RestfulService_branch-merged_out_from_trunk, TASK-TASK_PEP362_RestfulService_branch-merged_in_to_trunk, TASK-TASK_PEP362_RestfulService_branch-merged_in_from_branch, TASK-TASK_PEP362_RestfulService_branch-branch, TASK-PEP362_RestfulService-root, TASK-PEP362_RestfulService-merged_out_to_branch, TASK-PEP362_RestfulService-merged_out_from_trunk, TASK-PEP362_RestfulService-merged_in_to_trunk, TASK-PEP362_RestfulService-merged_in_from_branch, TASK-PEP362_RestfulService-branch, TASK-PEP317_pullop-root, TASK-PEP317_pullop-merged_out_to_branch, TASK-PEP317_pullop-merged_out_from_trunk, TASK-PEP317_pullop-merged_in_to_trunk, TASK-PEP317_pullop-merged_in_from_branch, TASK-PEP317_pullop-branch, RELEASE_2_14_1, RELEASE_2_14_0-RC2, RELEASE_2_14_0-RC1, RELEASE_2_14_0, RELEASE_2_14-root, RELEASE_2_14-branch, RELEASE_2_13_0-RC2, RELEASE_2_13_0-RC1, RELEASE_2_13_0-FC, RELEASE_2_13_0, RELEASE_2_13-root, RELEASE_2_13-branch, RELEASE_2_12_1-RC1, RELEASE_2_12_1, RELEASE_2_12_0-RC1, RELEASE_2_12_0-FC, RELEASE_2_12_0, RELEASE_2_12-root, RELEASE_2_12-branch, RELEASE_2_11_2-RC1, RELEASE_2_11_2, RELEASE_2_11_1-RC1, RELEASE_2_11_1, RELEASE_2_11_0-RC1, RELEASE_2_11_0-FC, RELEASE_2_11_0, RELEASE_2_11-root, RELEASE_2_11-branch, PREAUG25UPDATE, POSTAUG25UPDATE, HEAD, CIMRS_WORK_20130824, BeforeUpdateToHeadOct82011
Changes since 1.25: +2 -2 lines

BUG#:8642
TITLE: New warnings reported by gcc 4.3

DESCRIPTION:

//%LICENSE////////////////////////////////////////////////////////////////
//
// Licensed to The Open Group (TOG) under one or more contributor license
// agreements.  Refer to the OpenPegasusNOTICE.txt file distributed with
// this work for additional information regarding copyright ownership.
// Each contributor licenses this file to you under the OpenPegasus Open
// Source License; you may not use this file except in compliance with the
// License.
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the "Software"),
// to deal in the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included
// in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
//////////////////////////////////////////////////////////////////////////
//
//%////////////////////////////////////////////////////////////////////////////

#include <Pegasus/Common/Config.h>
#include <Pegasus/Common/Array.h>
#include <Pegasus/Common/Logger.h>
#include "CommonUTF.h"
#include <Pegasus/Common/String.h>
#include <cstdio>
#include <cstring>
#include <cctype>

#ifdef PEGASUS_HAS_ICU
#include <unicode/uclean.h>
#endif

PEGASUS_NAMESPACE_BEGIN

const Uint32 halfBase = 0x0010000UL;
const Uint32 halfMask = 0x3FFUL;
const int halfShift  = 10;
const Uint8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };

const Uint32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
             0x03C82080UL, 0xFA082080UL, 0x82082080UL };

const char trailingBytesForUTF8[256] = {
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
};
inline Uint8 _hexCharToNumeric(Char16 c)
{
    Uint8 n;

    if (isdigit(c))
        n = (c - '0');
    else if (isupper(c))
        n = (c - 'A' + 10);
    else // if (islower(c))
        n = (c - 'a' + 10);

    return n;
}

// Note: Caller must ensure that "src" contains "size" bytes.
Boolean isValid_U8(const Uint8 *src, int size)
{
    Uint8 U8_char;
    const Uint8 *srcptr = src+size;
    switch (size)
    {
    case 4:
        if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
        {
        return false;
        }
    case 3:
        if ((U8_char = (*--srcptr)) < 0x80 || U8_char > 0xBF)
        {
        return false;
        }
    case 2:
        if ((U8_char = (*--srcptr)) > 0xBF)
        {
        return false;
        }
        switch (*src)
        {
        case 0xE0:
            if (U8_char < 0xA0)
            {
            return false;
            }
            break;
        case 0xF0:
            if (U8_char < 0x90)
            {
            return false;
            }
            break;
        case 0xF4:
            if (U8_char > 0x8F)
            {
            return false;
            }
            break;
        default:
            if (U8_char < 0x80)
            {
            return false;
            }
        }
    case 1:
        if (*src >= 0x80 && *src < 0xC2)
        {
        return false;
        }
        if (*src > 0xF4)
        {
        return false;
        }
        break;
        default:
        {
        return false;
            }

    }
    return true;
}

int UTF16toUTF8(const Uint16** srcHead,
        const Uint16* srcEnd,
        Uint8** tgtHead,
        Uint8* tgtEnd)
{
    int returnCode = 0;
    const Uint16* src = *srcHead;
    Uint8* tgt = *tgtHead;
    while (src < srcEnd)
    {
        if (*src < 128)
        {
            if (tgt == tgtEnd)
            {
                returnCode = -1;
                break;
            }

            *tgt++ = (Uint8)*src++;
            continue;
        }

    Uint32 tempchar;
    Uint16 numberOfBytes = 0;
    const Uint16* oldsrc = src;
    tempchar = *src++;
    if (tempchar >= FIRST_HIGH_SURROGATE
        && tempchar <= LAST_HIGH_SURROGATE)
    {
        if (src < srcEnd)
        {
        Uint32 tempchar2 = *src;
        if (tempchar2 >= FIRST_LOW_SURROGATE &&
            tempchar2 <= LAST_LOW_SURROGATE)
        {
            tempchar = ((tempchar - FIRST_HIGH_SURROGATE) << halfShift)
              + (tempchar2 - FIRST_LOW_SURROGATE) + halfBase;
            ++src;
        }
        }
        else
        {
        --src;
        returnCode = -1;
        break;
        }
    }
    if (tempchar < (Uint32)0x80)
    {
        numberOfBytes = 1;
    }
    else if (tempchar < (Uint32)0x800)
    {
        numberOfBytes = 2;
    }
    else if (tempchar < (Uint32)0x10000)
    {
        numberOfBytes = 3;
    }
    else if (tempchar < (Uint32)0x200000)
    {
        numberOfBytes = 4;
    }
    else
    {
        numberOfBytes = 2;
        tempchar = REPLACEMENT_CHARACTER;
    }

    tgt += numberOfBytes;
    if (tgt > tgtEnd)
    {
        src = oldsrc;
        tgt -= numberOfBytes;
        returnCode = -1;
        break;
    }

    switch (numberOfBytes)
    {
        case 4:
        *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
        tempchar >>= 6;
        case 3:
        *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
        tempchar >>= 6;
        case 2:
        *--tgt = (Uint8)((tempchar | 0x80) & 0xBF);
        tempchar >>= 6;
        case 1:
        *--tgt =  (Uint8)(tempchar | firstByteMark[numberOfBytes]);
    }
    tgt += numberOfBytes;
    }
    *srcHead = src;
    *tgtHead = tgt;
    return returnCode;
}

int UTF8toUTF16 (const Uint8** srcHead,
         const Uint8* srcEnd,
         Uint16** tgtHead,
         Uint16* tgtEnd)
{
    int returnCode = 0;
    const Uint8* src = *srcHead;
    Uint16* tgt = *tgtHead;
    while (src < srcEnd)
    {
    Uint32 tempchar = 0;
    Uint16 moreBytes = trailingBytesForUTF8[*src];
    if (src + moreBytes >= srcEnd)
    {
        returnCode = -1;
        break;
    }
    switch (moreBytes)
    {
        case 3:
        tempchar += *src++;
        tempchar <<= 6;
        case 2:
        tempchar += *src++;
        tempchar <<= 6;
        case 1:
        tempchar += *src++;
        tempchar <<= 6;
        case 0:
        tempchar += *src++;
    }
    tempchar -= offsetsFromUTF8[moreBytes];

    if (tgt >= tgtEnd)
    {
        src -= (moreBytes+1);
        returnCode = -1; break;
    }
    if (tempchar <= MAX_BYTE)
    {
        if ((tempchar >= FIRST_HIGH_SURROGATE &&
         tempchar <= LAST_LOW_SURROGATE) ||
        ((tempchar & 0xFFFE) == 0xFFFE))
        {
        *tgt++ = REPLACEMENT_CHARACTER;
        }
        else
        {
        *tgt++ = (Uint16)tempchar;
        }
    }
    else if (tempchar > MAX_UTF16)
    {
        *tgt++ = REPLACEMENT_CHARACTER;
    }
    else
    {
        if (tgt + 1 >= tgtEnd)
        {
        src -= (moreBytes+1);
        returnCode = -1;
        break;
        }
        tempchar -= halfBase;
        *tgt++ = (Uint16)((tempchar >> halfShift) + FIRST_HIGH_SURROGATE);
        *tgt++ = (Uint16)((tempchar & halfMask) + FIRST_LOW_SURROGATE);
    }
    }
    *srcHead = src;
    *tgtHead = tgt;
    return returnCode;
}

Boolean isUTF8Aux(const char *legal)
{
    unsigned char numBytes = UTF_8_COUNT_TRAIL_BYTES(*legal)+1;

    // Validate that the string is long enough to hold all the expected bytes.
    // Note that if legal[0] == 0, numBytes will be 1.
    for (unsigned char i=1; i<numBytes; i++)
    {
        if (legal[i] == 0)
        {
            return false;
        }
    }

    return isValid_U8((const Uint8 *)legal, numBytes);
}

Boolean isUTF8Str(const char *legal)
{
    /*char tmp[] = {0xCE,0x99,0xCE,0xBF,0xCF,0x8D,0xCE,0xBD,0xCE,
                      0xB9,0xCE,0xBA,0xCE,0xBF,0xCE,0xBD,0xCF,0x84,
                      0x00};*/
//  char tmp_[] = "class";
//  char * tmp = legal;
    size_t count = 0;
    const size_t size = strlen(legal);
//  printf("size = %d\n",size);
    while (count<size)
    {
//      printf("count = %d\n",count);
        if (isUTF8(&legal[count]) == true)
        {
            UTF8_NEXT(legal,count);
        }
        else
        {
//          printf("bad string\n");
            return false;
        }
    }
//  printf("good string\n");
    return true;
/*
    printf("legal = %s\n\n", legal);
    Uint32 count = 0;
    Uint32 trailingBytes = 0;
    Uint32 size = strlen(legal);
    printf("size of legal is %d\n",size);
    while (count<size-1)
    {
        printf("count = %d\n", count);
        if (isUTF8((char*)&legal[count]) == true)
        {
            UTF8_NEXT(legal,trailingBytes);
            count += trailingBytes;
        }
        else
        {
            printf("CommonUTF8:: returning false; position[%d]",count);
            return false;
        }
    }
    printf("CommonUTF8:: returning false; position[%d]",count);
    return true;*/
}

String escapeStringEncoder(const String& Str)
{
    String escapeStr;
    Uint16 escChar;
    char hexencoding[6];

    for (Uint32 i = 0; i < Str.size(); ++i)
    {
        escChar = Str[i];
        if (escChar <= 0x7F)
        {
            escapeStr.append(escChar);
        }
        else
        {
            memset(hexencoding,0x00,sizeof(hexencoding));
            sprintf(hexencoding, "%%%03X%X", escChar/16, escChar%16);
            escapeStr.append(hexencoding);
        }
    }
    return escapeStr;
}

String escapeStringDecoder(const String& Str)
{
    Uint32 i;

    Array<Uint16> utf16Chars;

    for (i=0; i< Str.size(); ++i)
    {
        if (Str[i] == '%')
        {
            Uint8 digit1 = _hexCharToNumeric((Str[++i]));
            Uint8 digit2 = _hexCharToNumeric((Str[++i]));
            Uint8 digit3 = _hexCharToNumeric((Str[++i]));
            Uint8 digit4 = _hexCharToNumeric((Str[++i]));

        Uint16 decodedChar = (digit1<<12) + (digit2<<8) +
                                 (digit3<< 4) + (digit4);

            utf16Chars.append(decodedChar);
        }
        else
        {
            utf16Chars.append((Uint16)Str[i]);
        }
    }

    // If there was a string to decode...
    if (Str.size() > 0)
    {
        utf16Chars.append('\0');
        return String((Char16 *)utf16Chars.getData());
    }
    else
    {
        return String();
    }
}

#ifdef PEGASUS_HAS_ICU

Boolean InitializeICU::_initAttempted = false;
Boolean InitializeICU::_initSuccessful = false;
Mutex InitializeICU::_initMutex;

Boolean InitializeICU::initICUSuccessful()
{
    if (!_initAttempted)
    {
        {
            AutoMutex lock(_initMutex);

            if (!_initAttempted)
            {
                UErrorCode _status = U_ZERO_ERROR;

                // Initialize ICU
                u_init(&_status);

                if (U_FAILURE(_status))
                {
                    _initSuccessful = false;
                    Logger::put(
                        Logger::STANDARD_LOG , System::CIMSERVER,
                        Logger::WARNING,
                        "ICU initialization failed with error: $0.",
                        _status);
                }
                else
                {
                    _initSuccessful = true;
                }
                _initAttempted = true;
            }
        }
    }

    return _initSuccessful;
}

#endif

PEGASUS_NAMESPACE_END

No CVS admin address has been configured