| Index: source/common/ucnv2022.cpp |
| =================================================================== |
| --- source/common/ucnv2022.cpp (revision 259715) |
| +++ source/common/ucnv2022.cpp (working copy) |
| @@ -154,7 +154,11 @@ |
| } StateEnum; |
| |
| /* is the StateEnum charset value for a DBCS charset? */ |
| +#if UCONFIG_NO_NON_HTML5_CONVERSION |
| +#define IS_JP_DBCS(cs) (JISX208==(cs)) |
| +#else |
| #define IS_JP_DBCS(cs) (JISX208<=(cs) && (cs)<=KSC5601) |
| +#endif |
| |
| #define CSM(cs) ((uint16_t)1<<(cs)) |
| |
| @@ -167,13 +171,23 @@ |
| * all versions, not just JIS7 and JIS8. |
| * - ICU does not distinguish between different versions of JIS X 0208. |
| */ |
| +#if UCONFIG_NO_NON_HTML5_CONVERSION |
| +enum { MAX_JA_VERSION=0 }; |
| +#else |
| enum { MAX_JA_VERSION=4 }; |
| +#endif |
| static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ |
| +/* |
| + * TODO(jshin): The encoding spec has JISX212, but we don't support it. |
| + * See https://www.w3.org/Bugs/Public/show_bug.cgi?id=26885 |
| + */ |
| CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), |
| CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
| CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), |
| CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) |
| +#endif |
| }; |
| |
| typedef enum { |
| @@ -360,15 +374,18 @@ |
| ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 ,VALID_TERMINAL_2022 |
| }; |
| |
| - |
| /* Type def for refactoring changeState_2022 code*/ |
| typedef enum{ |
| #ifdef U_ENABLE_GENERIC_ISO_2022 |
| ISO_2022=0, |
| #endif |
| +#if UCONFIG_NO_NON_HTML5_CONVERSION |
| + ISO_2022_JP=1 |
| +#else |
| ISO_2022_JP=1, |
| ISO_2022_KR=2, |
| ISO_2022_CN=3 |
| +#endif |
| } Variant2022; |
| |
| /*********** ISO 2022 Converter Protos ***********/ |
| @@ -485,12 +502,15 @@ |
| /* prevent indexing beyond jpCharsetMasks[] */ |
| myConverterData->version = version = 0; |
| } |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| if(jpCharsetMasks[version]&CSM(ISO8859_7)) { |
| myConverterData->myConverterArray[ISO8859_7] = |
| ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode); |
| } |
| +#endif |
| myConverterData->myConverterArray[JISX208] = |
| ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode); |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| if(jpCharsetMasks[version]&CSM(JISX212)) { |
| myConverterData->myConverterArray[JISX212] = |
| ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode); |
| @@ -503,6 +523,7 @@ |
| myConverterData->myConverterArray[KSC5601] = |
| ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode); |
| } |
| +#endif |
| |
| /* set the function pointers to appropriate funtions */ |
| cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); |
| @@ -513,6 +534,7 @@ |
| myConverterData->name[len]=(char)(myConverterData->version+(int)'0'); |
| myConverterData->name[len+1]='\0'; |
| } |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && |
| (myLocale[2]=='_' || myLocale[2]=='\0')) |
| { |
| @@ -582,6 +604,7 @@ |
| (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2"); |
| } |
| } |
| +#endif // !UCONFIG_NO_NON_HTML5_CONVERSION |
| else{ |
| #ifdef U_ENABLE_GENERIC_ISO_2022 |
| myConverterData->isFirstBuffer = TRUE; |
| @@ -716,6 +739,7 @@ |
| ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE |
| }; |
| |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| /*************** to unicode *******************/ |
| static const int8_t nextStateToUnicodeCN[MAX_STATES_2022]= { |
| /* 0 1 2 3 4 5 6 7 8 9 */ |
| @@ -728,6 +752,7 @@ |
| ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE |
| ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE ,INVALID_STATE |
| }; |
| +#endif |
| |
| |
| static UCNV_TableStates_2022 |
| @@ -880,6 +905,7 @@ |
| } |
| break; |
| /* case SS3_STATE: not used in ISO-2022-JP-x */ |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| case ISO8859_1: |
| case ISO8859_7: |
| if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { |
| @@ -889,6 +915,7 @@ |
| myData2022->toU2022State.cs[2]=(int8_t)tempState; |
| } |
| break; |
| +#endif |
| default: |
| if((jpCharsetMasks[myData2022->version] & CSM(tempState)) == 0) { |
| *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
| @@ -900,6 +927,7 @@ |
| } |
| } |
| break; |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| case ISO_2022_CN: |
| { |
| StateEnum tempState=(StateEnum)nextStateToUnicodeCN[offset]; |
| @@ -961,6 +989,7 @@ |
| *err = U_UNSUPPORTED_ESCAPE_SEQUENCE; |
| } |
| break; |
| +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
| |
| default: |
| *err = U_ILLEGAL_ESCAPE_SEQUENCE; |
| @@ -1381,12 +1410,16 @@ |
| static const StateEnum jpCharsetPref[]={ |
| ASCII, |
| JISX201, |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| ISO8859_1, |
| ISO8859_7, |
| +#endif |
| JISX208, |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| JISX212, |
| GB2312, |
| KSC5601, |
| +#endif |
| HWKANA_7BIT |
| }; |
| |
| @@ -1756,6 +1789,7 @@ |
| g = 0; |
| } |
| break; |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| case ISO8859_1: |
| if(GR96_START <= sourceChar && sourceChar <= GR96_END) { |
| targetValue = (uint32_t)sourceChar - 0x80; |
| @@ -1764,6 +1798,7 @@ |
| g = 2; |
| } |
| break; |
| +#endif |
| case HWKANA_7BIT: |
| if((uint32_t)(sourceChar - HWKANA_START) <= (HWKANA_END - HWKANA_START)) { |
| if(converterData->version==3) { |
| @@ -1825,6 +1860,7 @@ |
| useFallback = FALSE; |
| } |
| break; |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| case ISO8859_7: |
| /* G0 SBCS forced to 7-bit output */ |
| len2 = MBCS_SINGLE_FROM_UCHAR32( |
| @@ -1839,6 +1875,7 @@ |
| useFallback = FALSE; |
| } |
| break; |
| +#endif |
| default: |
| /* G0 DBCS */ |
| len2 = MBCS_FROM_UCHAR32_ISO2022( |
| @@ -1846,6 +1883,7 @@ |
| sourceChar, &value, |
| useFallback, MBCS_OUTPUT_2); |
| if(len2 == 2 || (len2 == -2 && len == 0)) { /* only accept DBCS: abs(len)==2 */ |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| if(cs0 == KSC5601) { |
| /* |
| * Check for valid bytes for the encoding scheme. |
| @@ -1857,6 +1895,7 @@ |
| break; |
| } |
| } |
| +#endif |
| targetValue = value; |
| len = len2; |
| cs = cs0; |
| @@ -2150,6 +2189,7 @@ |
| targetUniChar = mySourceChar; |
| } |
| break; |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| case ISO8859_1: |
| if(mySourceChar <= 0x7f) { |
| targetUniChar = mySourceChar + 0x80; |
| @@ -2168,6 +2208,7 @@ |
| /* return from a single-shift state to the previous one */ |
| pToU2022State->g=pToU2022State->prevG; |
| break; |
| +#endif |
| case JISX201: |
| if(mySourceChar <= 0x7f) { |
| targetUniChar = jisx201ToU(mySourceChar); |
| @@ -2207,9 +2248,11 @@ |
| } else { |
| /* Copy before we modify tmpSourceChar so toUnicodeCallback() sees the correct bytes. */ |
| mySourceChar = tmpSourceChar; |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| if (cs == KSC5601) { |
| tmpSourceChar += 0x8080; /* = _2022ToGR94DBCS(tmpSourceChar) */ |
| } |
| +#endif |
| tempBuf[0] = (char)(tmpSourceChar >> 8); |
| tempBuf[1] = (char)(tmpSourceChar); |
| } |
| @@ -2271,6 +2314,7 @@ |
| } |
| |
| |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| /*************************************************************** |
| * Rules for ISO-2022-KR encoding |
| * i) The KSC5601 designator sequence should appear only once in a file, |
| @@ -3414,6 +3458,7 @@ |
| args->target = myTarget; |
| args->source = mySource; |
| } |
| +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
| |
| static void |
| _ISO_2022_WriteSub(UConverterFromUnicodeArgs *args, int32_t offsetIndex, UErrorCode *err) { |
| @@ -3615,6 +3660,7 @@ |
| /* include JIS X 0201 which is hardcoded */ |
| sa->add(sa->set, 0xa5); |
| sa->add(sa->set, 0x203e); |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| if(jpCharsetMasks[cnvData->version]&CSM(ISO8859_1)) { |
| /* include Latin-1 for some variants of JP */ |
| sa->addRange(sa->set, 0, 0xff); |
| @@ -3622,6 +3668,10 @@ |
| /* include ASCII for JP */ |
| sa->addRange(sa->set, 0, 0x7f); |
| } |
| +#else |
| + /* include ASCII for JP */ |
| + sa->addRange(sa->set, 0, 0x7f); |
| +#endif |
| if(cnvData->version==3 || cnvData->version==4 || which==UCNV_ROUNDTRIP_AND_FALLBACK_SET) { |
| /* |
| * Do not test (jpCharsetMasks[cnvData->version]&CSM(HWKANA_7BIT))!=0 |
| @@ -3640,6 +3690,7 @@ |
| sa->addRange(sa->set, HWKANA_START, HWKANA_END); |
| } |
| break; |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| case 'c': |
| case 'z': |
| /* include ASCII for CN */ |
| @@ -3651,6 +3702,7 @@ |
| cnvData->currentConverter, sa, which, pErrorCode); |
| /* the loop over myConverterArray[] will simply not find another converter */ |
| break; |
| +#endif |
| default: |
| break; |
| } |
| @@ -3671,10 +3723,16 @@ |
| for (i=0; i<UCNV_2022_MAX_CONVERTERS; i++) { |
| UConverterSetFilter filter; |
| if(cnvData->myConverterArray[i]!=NULL) { |
| - if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && |
| - cnvData->version==0 && i==CNS_11643 |
| - ) { |
| + if(cnvData->locale[0]=='j' && i==JISX208) { |
| /* |
| + * Only add code points that map to Shift-JIS codes |
| + * corresponding to JIS X 0208. |
| + */ |
| + filter=UCNV_SET_FILTER_SJIS; |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| + } else if( (cnvData->locale[0]=='c' || cnvData->locale[0]=='z') && |
| + cnvData->version==0 && i==CNS_11643) { |
| + /* |
| * Version-specific for CN: |
| * CN version 0 does not map CNS planes 3..7 although |
| * they are all available in the CNS conversion table; |
| @@ -3682,18 +3740,13 @@ |
| * The two versions create different Unicode sets. |
| */ |
| filter=UCNV_SET_FILTER_2022_CN; |
| - } else if(cnvData->locale[0]=='j' && i==JISX208) { |
| - /* |
| - * Only add code points that map to Shift-JIS codes |
| - * corresponding to JIS X 0208. |
| - */ |
| - filter=UCNV_SET_FILTER_SJIS; |
| } else if(i==KSC5601) { |
| /* |
| * Some of the KSC 5601 tables (convrtrs.txt has this aliases on multiple tables) |
| * are broader than GR94. |
| */ |
| filter=UCNV_SET_FILTER_GR94DBCS; |
| +#endif |
| } else { |
| filter=UCNV_SET_FILTER_NONE; |
| } |
| @@ -3831,6 +3884,7 @@ |
| |
| } // namespace |
| |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| /************* KR ***************/ |
| static const UConverterImpl _ISO2022KRImpl={ |
| UCNV_ISO_2022, |
| @@ -3947,5 +4001,6 @@ |
| }; |
| |
| } // namespace |
| +#endif /* #if !UCONFIG_NO_NON_HTML5_CONVERSION */ |
| |
| #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
| Index: source/common/ucnvbocu.cpp |
| =================================================================== |
| --- source/common/ucnvbocu.cpp (revision 259715) |
| +++ source/common/ucnvbocu.cpp (working copy) |
| @@ -19,7 +19,7 @@ |
| |
| #include "unicode/utypes.h" |
| |
| -#if !UCONFIG_NO_CONVERSION |
| +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION |
| |
| #include "unicode/ucnv.h" |
| #include "unicode/ucnv_cb.h" |
| Index: source/common/ucnvisci.c |
| =================================================================== |
| --- source/common/ucnvisci.c (revision 259715) |
| +++ source/common/ucnvisci.c (working copy) |
| @@ -17,7 +17,7 @@ |
| |
| #include "unicode/utypes.h" |
| |
| -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
| +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION |
| |
| #include "unicode/ucnv.h" |
| #include "unicode/ucnv_cb.h" |
| Index: source/common/ucnvscsu.c |
| =================================================================== |
| --- source/common/ucnvscsu.c (revision 259715) |
| +++ source/common/ucnvscsu.c (working copy) |
| @@ -21,7 +21,7 @@ |
| |
| #include "unicode/utypes.h" |
| |
| -#if !UCONFIG_NO_CONVERSION |
| +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION |
| |
| #include "unicode/ucnv.h" |
| #include "unicode/ucnv_cb.h" |
| Index: source/common/ucnv_u7.c |
| =================================================================== |
| --- source/common/ucnv_u7.c (revision 259715) |
| +++ source/common/ucnv_u7.c (working copy) |
| @@ -16,7 +16,7 @@ |
| |
| #include "unicode/utypes.h" |
| |
| -#if !UCONFIG_NO_CONVERSION |
| +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION |
| |
| #include "unicode/ucnv.h" |
| #include "ucnv_bld.h" |
| Index: source/common/unicode/uconfig.h |
| =================================================================== |
| --- source/common/unicode/uconfig.h (revision 259715) |
| +++ source/common/unicode/uconfig.h (working copy) |
| @@ -265,6 +265,14 @@ |
| #endif |
| |
| /** |
| + * This switch turns off all the converters NOT listed in |
| + * the encoding standard : http://encoding.spec.whatwg.org |
| + */ |
| +#ifndef UCONFIG_NO_NON_HTML5_CONVERSION |
| +#define UCONFIG_NO_NON_HTML5_CONVERSION 0 |
| +#endif |
| + |
| +/** |
| * \def UCONFIG_NO_LEGACY_CONVERSION |
| * This switch turns off all converters except for |
| * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1) |
| Index: source/common/ucnv_bld.cpp |
| =================================================================== |
| --- source/common/ucnv_bld.cpp (revision 259715) |
| +++ source/common/ucnv_bld.cpp (working copy) |
| @@ -69,28 +69,41 @@ |
| |
| #if UCONFIG_NO_LEGACY_CONVERSION |
| NULL, |
| +#else |
| + &_ISO2022Data, |
| +#endif |
| + |
| +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION |
| NULL, NULL, NULL, NULL, NULL, NULL, |
| NULL, NULL, NULL, NULL, NULL, NULL, |
| NULL, |
| #else |
| - &_ISO2022Data, |
| &_LMBCSData1,&_LMBCSData2, &_LMBCSData3, &_LMBCSData4, &_LMBCSData5, &_LMBCSData6, |
| &_LMBCSData8,&_LMBCSData11,&_LMBCSData16,&_LMBCSData17,&_LMBCSData18,&_LMBCSData19, |
| &_HZData, |
| #endif |
| |
| +#if UCONFIG_NO_NON_HTML5_CONVERSION |
| + NULL, |
| +#else |
| &_SCSUData, |
| +#endif |
| |
| -#if UCONFIG_NO_LEGACY_CONVERSION |
| + |
| +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION |
| NULL, |
| #else |
| &_ISCIIData, |
| #endif |
| |
| &_ASCIIData, |
| +#if UCONFIG_NO_NON_HTML5_CONVERSION |
| + NULL, NULL, &_UTF16Data, &_UTF32Data, NULL, NULL, |
| +#else |
| &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, |
| +#endif |
| |
| -#if UCONFIG_NO_LEGACY_CONVERSION |
| +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION |
| NULL, |
| #else |
| &_CompoundTextData |
| @@ -105,18 +118,24 @@ |
| const char *name; |
| const UConverterType type; |
| } const cnvNameType[] = { |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| { "bocu1", UCNV_BOCU1 }, |
| { "cesu8", UCNV_CESU8 }, |
| -#if !UCONFIG_NO_LEGACY_CONVERSION |
| +#endif |
| +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION |
| { "hz",UCNV_HZ }, |
| #endif |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| { "imapmailboxname", UCNV_IMAP_MAILBOX }, |
| +#endif |
| +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION |
| + { "iscii", UCNV_ISCII }, |
| +#endif |
| #if !UCONFIG_NO_LEGACY_CONVERSION |
| - { "iscii", UCNV_ISCII }, |
| { "iso2022", UCNV_ISO_2022 }, |
| #endif |
| { "iso88591", UCNV_LATIN_1 }, |
| -#if !UCONFIG_NO_LEGACY_CONVERSION |
| +#if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION |
| { "lmbcs1", UCNV_LMBCS_1 }, |
| { "lmbcs11",UCNV_LMBCS_11 }, |
| { "lmbcs16",UCNV_LMBCS_16 }, |
| @@ -130,7 +149,9 @@ |
| { "lmbcs6", UCNV_LMBCS_6 }, |
| { "lmbcs8", UCNV_LMBCS_8 }, |
| #endif |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| { "scsu", UCNV_SCSU }, |
| +#endif |
| { "usascii", UCNV_US_ASCII }, |
| { "utf16", UCNV_UTF16 }, |
| { "utf16be", UCNV_UTF16_BigEndian }, |
| @@ -152,9 +173,13 @@ |
| { "utf32oppositeendian", UCNV_UTF32_BigEndian }, |
| { "utf32platformendian", UCNV_UTF32_LittleEndian }, |
| #endif |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| { "utf7", UCNV_UTF7 }, |
| +#endif |
| { "utf8", UCNV_UTF8 }, |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| { "x11compoundtext", UCNV_COMPOUND_TEXT} |
| +#endif |
| }; |
| |
| |
| Index: source/common/ucnv_u8.c |
| =================================================================== |
| --- source/common/ucnv_u8.c (revision 259715) |
| +++ source/common/ucnv_u8.c (working copy) |
| @@ -87,6 +87,15 @@ |
| static const uint32_t |
| utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; |
| |
| +static UBool hasCESU8Data(const UConverter *cnv) |
| +{ |
| +#if UCONFIG_NO_NON_HTML5_CONVERSION |
| + return FALSE; |
| +#else |
| + return (UBool)(cnv->sharedData == &_CESU8Data); |
| +#endif |
| +} |
| + |
| static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, |
| UErrorCode * err) |
| { |
| @@ -96,10 +105,10 @@ |
| const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; |
| const UChar *targetLimit = args->targetLimit; |
| unsigned char *toUBytes = cnv->toUBytes; |
| - UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); |
| + UBool isCESU8 = hasCESU8Data(cnv); |
| uint32_t ch, ch2 = 0; |
| int32_t i, inBytes; |
| - |
| + |
| /* Restore size of current sequence */ |
| if (cnv->toUnicodeStatus && myTarget < targetLimit) |
| { |
| @@ -226,7 +235,7 @@ |
| const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; |
| const UChar *targetLimit = args->targetLimit; |
| unsigned char *toUBytes = cnv->toUBytes; |
| - UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); |
| + UBool isCESU8 = hasCESU8Data(cnv); |
| uint32_t ch, ch2 = 0; |
| int32_t i, inBytes; |
| |
| @@ -357,7 +366,7 @@ |
| UChar32 ch; |
| uint8_t tempBuf[4]; |
| int32_t indexToWrite; |
| - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); |
| + UBool isNotCESU8 = !hasCESU8Data(cnv); |
| |
| if (cnv->fromUChar32 && myTarget < targetLimit) |
| { |
| @@ -473,7 +482,7 @@ |
| int32_t offsetNum, nextSourceIndex; |
| int32_t indexToWrite; |
| uint8_t tempBuf[4]; |
| - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); |
| + UBool isNotCESU8 = !hasCESU8Data(cnv); |
| |
| if (cnv->fromUChar32 && myTarget < targetLimit) |
| { |
| Index: source/common/unicode/urename.h |
| =================================================================== |
| --- source/common/unicode/urename.h (revision 259715) |
| +++ source/common/unicode/urename.h (working copy) |
| @@ -73,12 +73,14 @@ |
| #define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData) |
| #define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign) |
| #define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData) |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| #define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data) |
| #define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data) |
| #define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData) |
| #define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData) |
| #define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData) |
| #define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData) |
| +#endif |
| #define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data) |
| #define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1) |
| #define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11) |
| @@ -94,14 +96,18 @@ |
| #define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8) |
| #define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data) |
| #define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData) |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| #define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData) |
| +#endif |
| #define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData) |
| #define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data) |
| #define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData) |
| #define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData) |
| #define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data) |
| #define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData) |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) |
| +#endif |
| #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) |
| #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) |
| #define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse) |
| Index: source/common/ucnv_cnv.h |
| =================================================================== |
| --- source/common/ucnv_cnv.h (revision 259715) |
| +++ source/common/ucnv_cnv.h (working copy) |
| @@ -256,11 +256,15 @@ |
| extern const UConverterSharedData |
| _MBCSData, _Latin1Data, |
| _UTF8Data, _UTF16BEData, _UTF16LEData, _UTF32BEData, _UTF32LEData, |
| - _ISO2022Data, |
| + _ISO2022Data, |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6, |
| _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19, |
| _HZData,_ISCIIData, _SCSUData, _ASCIIData, |
| _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _CompoundTextData; |
| +#else |
| + _ASCIIData, _UTF16Data, _UTF32Data; |
| +#endif |
| |
| U_CDECL_END |
| |
| Index: source/common/ucnv_lmb.c |
| =================================================================== |
| --- source/common/ucnv_lmb.c (revision 291619) |
| +++ source/common/ucnv_lmb.c (working copy) |
| @@ -25,7 +25,7 @@ |
| |
| #include "unicode/utypes.h" |
| |
| -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
| +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION |
| |
| #include "unicode/ucnv_err.h" |
| #include "unicode/ucnv.h" |
| Index: source/common/ucnvhz.c |
| =================================================================== |
| --- source/common/ucnvhz.c (revision 291619) |
| +++ source/common/ucnvhz.c (working copy) |
| @@ -16,7 +16,7 @@ |
| |
| #include "unicode/utypes.h" |
| |
| -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
| +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION |
| |
| #include "cmemory.h" |
| #include "unicode/ucnv.h" |
| @@ -637,4 +637,4 @@ |
| 0 |
| }; |
| |
| -#endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */ |
| +#endif /* #if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION */ |
| Index: source/common/ucnv_ct.c |
| =================================================================== |
| --- source/common/ucnv_ct.c (revision 291619) |
| +++ source/common/ucnv_ct.c (working copy) |
| @@ -14,7 +14,7 @@ |
| |
| #include "unicode/utypes.h" |
| |
| -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION |
| +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION |
| |
| #include "unicode/ucnv.h" |
| #include "unicode/uset.h" |
| Index: source/i18n/csrsbcs.h |
| =================================================================== |
| --- source/i18n/csrsbcs.h (revision 291619) |
| +++ source/i18n/csrsbcs.h (working copy) |
| @@ -50,6 +50,7 @@ |
| |
| }; |
| |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| class NGramParser_IBM420 : public NGramParser |
| { |
| private: |
| @@ -61,6 +62,7 @@ |
| public: |
| NGramParser_IBM420(const int32_t *theNgramList, const uint8_t *theCharMap); |
| }; |
| +#endif |
| |
| |
| class CharsetRecog_sbcs : public CharsetRecognizer |
| @@ -229,6 +231,7 @@ |
| virtual UBool match(InputText *det, CharsetMatch *results) const; |
| }; |
| |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| class CharsetRecog_IBM424_he : public CharsetRecog_sbcs |
| { |
| public: |
| @@ -280,6 +283,7 @@ |
| |
| virtual UBool match(InputText *det, CharsetMatch *results) const; |
| }; |
| +#endif |
| |
| U_NAMESPACE_END |
| |
| Index: source/i18n/csr2022.h |
| =================================================================== |
| --- source/i18n/csr2022.h (revision 291619) |
| +++ source/i18n/csr2022.h (working copy) |
| @@ -65,6 +65,7 @@ |
| UBool match(InputText *textIn, CharsetMatch *results) const; |
| }; |
| |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| class CharsetRecog_2022KR :public CharsetRecog_2022 { |
| public: |
| virtual ~CharsetRecog_2022KR(); |
| @@ -84,6 +85,7 @@ |
| |
| UBool match(InputText *textIn, CharsetMatch *results) const; |
| }; |
| +#endif |
| |
| U_NAMESPACE_END |
| |
| Index: source/i18n/csr2022.cpp |
| =================================================================== |
| --- source/i18n/csr2022.cpp (revision 291619) |
| +++ source/i18n/csr2022.cpp (working copy) |
| @@ -119,6 +119,7 @@ |
| {0x1b, 0x2e, 0x46, 0x00, 0x00} // ISO 8859-7 |
| }; |
| |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| static const uint8_t escapeSequences_2022KR[][5] = { |
| {0x1b, 0x24, 0x29, 0x43, 0x00} |
| }; |
| @@ -136,6 +137,7 @@ |
| {0x1b, 0x4e, 0x00, 0x00, 0x00}, // SS2 |
| {0x1b, 0x4f, 0x00, 0x00, 0x00}, // SS3 |
| }; |
| +#endif |
| |
| CharsetRecog_2022JP::~CharsetRecog_2022JP() {} |
| |
| @@ -152,6 +154,7 @@ |
| return (confidence > 0); |
| } |
| |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| CharsetRecog_2022KR::~CharsetRecog_2022KR() {} |
| |
| const char *CharsetRecog_2022KR::getName() const { |
| @@ -181,6 +184,7 @@ |
| results->set(textIn, this, confidence); |
| return (confidence > 0); |
| } |
| +#endif |
| |
| CharsetRecog_2022::~CharsetRecog_2022() { |
| // nothing to do |
| Index: source/i18n/csdetect.cpp |
| =================================================================== |
| --- source/i18n/csdetect.cpp (revision 291619) |
| +++ source/i18n/csdetect.cpp (working copy) |
| @@ -110,6 +110,7 @@ |
| new CSRecognizerInfo(new CharsetRecog_big5(), TRUE), |
| |
| new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE), |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE), |
| new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE), |
| |
| @@ -117,6 +118,7 @@ |
| new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE), |
| new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE), |
| new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE) |
| +#endif |
| }; |
| int32_t rCount = ARRAY_SIZE(tempArray); |
| |
| Index: source/i18n/csrsbcs.cpp |
| =================================================================== |
| --- source/i18n/csrsbcs.cpp (revision 291619) |
| +++ source/i18n/csrsbcs.cpp (working copy) |
| @@ -137,6 +137,7 @@ |
| return (int32_t) (rawPercent * 300.0); |
| } |
| |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| static const uint8_t unshapeMap_IBM420[] = { |
| /* -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -A -B -C -D -E -F */ |
| /* 0- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, |
| @@ -232,6 +233,7 @@ |
| } |
| } |
| } |
| +#endif |
| |
| CharsetRecog_sbcs::CharsetRecog_sbcs() |
| { |
| @@ -624,6 +626,7 @@ |
| 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, |
| }; |
| |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| static const int32_t ngrams_IBM424_he_rtl[] = { |
| 0x404146, 0x404148, 0x404151, 0x404171, 0x404251, 0x404256, 0x404541, 0x404546, 0x404551, 0x404556, 0x404562, 0x404569, 0x404571, 0x405441, 0x405445, 0x405641, |
| 0x406254, 0x406954, 0x417140, 0x454041, 0x454042, 0x454045, 0x454054, 0x454056, 0x454069, 0x454641, 0x464140, 0x465540, 0x465740, 0x466840, 0x467140, 0x514045, |
| @@ -691,6 +694,7 @@ |
| /* E- */ 0x40, 0x40, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xEA, 0xEB, 0x40, 0xED, 0xEE, 0xEF, |
| /* F- */ 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0xFB, 0xFC, 0xFD, 0xFE, 0x40, |
| }; |
| +#endif |
| |
| //ISO-8859-1,2,5,6,7,8,9 Ngrams |
| |
| @@ -1155,6 +1159,7 @@ |
| return (confidence > 0); |
| } |
| |
| +#if !UCONFIG_NO_NON_HTML5_CONVERSION |
| CharsetRecog_IBM424_he::~CharsetRecog_IBM424_he() |
| { |
| // nothing to do |
| @@ -1253,6 +1258,7 @@ |
| results->set(textIn, this, confidence); |
| return (confidence > 0); |
| } |
| +#endif |
| |
| U_NAMESPACE_END |
| #endif |