| /* GENERATED SOURCE. DO NOT MODIFY. */ |
| // © 2018 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| |
| // created: 2018may04 Markus W. Scherer |
| |
| package android.icu.util; |
| |
| import java.io.DataOutputStream; |
| import java.io.IOException; |
| import java.io.OutputStream; |
| import java.nio.ByteBuffer; |
| import java.nio.ByteOrder; |
| |
| import android.icu.impl.ICUBinary; |
| import android.icu.impl.Normalizer2Impl.UTF16Plus; |
| |
| /** |
| * Immutable Unicode code point trie. |
| * Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values. |
| * For details see https://icu.unicode.org/design/struct/utrie |
| * |
| * <p>This class is not intended for public subclassing. |
| * |
| * @see MutableCodePointTrie |
| * @hide Only a subset of ICU is exposed in Android |
| */ |
| public abstract class CodePointTrie extends CodePointMap { |
| /** |
| * Selectors for the type of a CodePointTrie. |
| * Different trade-offs for size vs. speed. |
| * |
| * <p>Use null for {@link #fromBinary} to accept any type; |
| * {@link #getType} will return the actual type. |
| * |
| * @see MutableCodePointTrie#buildImmutable(CodePointTrie.Type, CodePointTrie.ValueWidth) |
| * @see #fromBinary |
| * @see #getType |
| * @hide Only a subset of ICU is exposed in Android |
| */ |
| public enum Type { |
| /** |
| * Fast/simple/larger BMP data structure. |
| * The {@link Fast} subclasses have additional functions for lookup for BMP and supplementary code points. |
| * |
| * @see Fast |
| */ |
| FAST, |
| /** |
| * Small/slower BMP data structure. |
| * |
| * @see Small |
| */ |
| SMALL |
| } |
| |
| /** |
| * Selectors for the number of bits in a CodePointTrie data value. |
| * |
| * <p>Use null for {@link #fromBinary} to accept any data value width; |
| * {@link #getValueWidth} will return the actual data value width. |
| * |
| * @hide Only a subset of ICU is exposed in Android |
| */ |
| public enum ValueWidth { |
| /** |
| * The trie stores 16 bits per data value. |
| * It returns them as unsigned values 0..0xffff=65535. |
| */ |
| BITS_16, |
| /** |
| * The trie stores 32 bits per data value. |
| */ |
| BITS_32, |
| /** |
| * The trie stores 8 bits per data value. |
| * It returns them as unsigned values 0..0xff=255. |
| */ |
| BITS_8 |
| } |
| |
| private CodePointTrie(char[] index, Data data, int highStart, |
| int index3NullOffset, int dataNullOffset) { |
| this.ascii = new int[ASCII_LIMIT]; |
| this.index = index; |
| this.data = data; |
| this.dataLength = data.getDataLength(); |
| this.highStart = highStart; |
| this.index3NullOffset = index3NullOffset; |
| this.dataNullOffset = dataNullOffset; |
| |
| for (int c = 0; c < ASCII_LIMIT; ++c) { |
| ascii[c] = data.getFromIndex(c); |
| } |
| |
| int nullValueOffset = dataNullOffset; |
| if (nullValueOffset >= dataLength) { |
| nullValueOffset = dataLength - HIGH_VALUE_NEG_DATA_OFFSET; |
| } |
| nullValue = data.getFromIndex(nullValueOffset); |
| } |
| |
| /** |
| * Creates a trie from its binary form, |
| * stored in the ByteBuffer starting at the current position. |
| * Advances the buffer position to just after the trie data. |
| * Inverse of {@link #toBinary(OutputStream)}. |
| * |
| * <p>The data is copied from the buffer; |
| * later modification of the buffer will not affect the trie. |
| * |
| * @param type selects the trie type; this method throws an exception |
| * if the type does not match the binary data; |
| * use null to accept any type |
| * @param valueWidth selects the number of bits in a data value; this method throws an exception |
| * if the valueWidth does not match the binary data; |
| * use null to accept any data value width |
| * @param bytes a buffer containing the binary data of a CodePointTrie |
| * @return the trie |
| * @see MutableCodePointTrie#MutableCodePointTrie(int, int) |
| * @see MutableCodePointTrie#buildImmutable(CodePointTrie.Type, CodePointTrie.ValueWidth) |
| * @see #toBinary(OutputStream) |
| */ |
| public static CodePointTrie fromBinary(Type type, ValueWidth valueWidth, ByteBuffer bytes) { |
| ByteOrder outerByteOrder = bytes.order(); |
| try { |
| // Enough data for a trie header? |
| if (bytes.remaining() < 16 /* sizeof(UCPTrieHeader) */) { |
| throw new ICUUncheckedIOException("Buffer too short for a CodePointTrie header"); |
| } |
| |
| // struct UCPTrieHeader |
| /** "Tri3" in big-endian US-ASCII (0x54726933) */ |
| int signature = bytes.getInt(); |
| |
| // Check the signature. |
| switch (signature) { |
| case 0x54726933: |
| // The buffer is already set to the trie data byte order. |
| break; |
| case 0x33697254: |
| // Temporarily reverse the byte order. |
| boolean isBigEndian = outerByteOrder == ByteOrder.BIG_ENDIAN; |
| bytes.order(isBigEndian ? ByteOrder.LITTLE_ENDIAN : ByteOrder.BIG_ENDIAN); |
| signature = 0x54726933; |
| break; |
| default: |
| throw new ICUUncheckedIOException("Buffer does not contain a serialized CodePointTrie"); |
| } |
| |
| // struct UCPTrieHeader continued |
| /** |
| * Options bit field: |
| * Bits 15..12: Data length bits 19..16. |
| * Bits 11..8: Data null block offset bits 19..16. |
| * Bits 7..6: UCPTrieType |
| * Bits 5..3: Reserved (0). |
| * Bits 2..0: UCPTrieValueWidth |
| */ |
| int options = bytes.getChar(); |
| |
| /** Total length of the index tables. */ |
| int indexLength = bytes.getChar(); |
| |
| /** Data length bits 15..0. */ |
| int dataLength = bytes.getChar(); |
| |
| /** Index-3 null block offset, 0x7fff or 0xffff if none. */ |
| int index3NullOffset = bytes.getChar(); |
| |
| /** Data null block offset bits 15..0, 0xfffff if none. */ |
| int dataNullOffset = bytes.getChar(); |
| |
| /** |
| * First code point of the single-value range ending with U+10ffff, |
| * rounded up and then shifted right by SHIFT_2. |
| */ |
| int shiftedHighStart = bytes.getChar(); |
| // struct UCPTrieHeader end |
| |
| int typeInt = (options >> 6) & 3; |
| Type actualType; |
| switch (typeInt) { |
| case 0: actualType = Type.FAST; break; |
| case 1: actualType = Type.SMALL; break; |
| default: |
| throw new ICUUncheckedIOException("CodePointTrie data header has an unsupported type"); |
| } |
| |
| int valueWidthInt = options & OPTIONS_VALUE_BITS_MASK; |
| ValueWidth actualValueWidth; |
| switch (valueWidthInt) { |
| case 0: actualValueWidth = ValueWidth.BITS_16; break; |
| case 1: actualValueWidth = ValueWidth.BITS_32; break; |
| case 2: actualValueWidth = ValueWidth.BITS_8; break; |
| default: |
| throw new ICUUncheckedIOException("CodePointTrie data header has an unsupported value width"); |
| } |
| |
| if ((options & OPTIONS_RESERVED_MASK) != 0) { |
| throw new ICUUncheckedIOException("CodePointTrie data header has unsupported options"); |
| } |
| |
| if (type == null) { |
| type = actualType; |
| } |
| if (valueWidth == null) { |
| valueWidth = actualValueWidth; |
| } |
| if (type != actualType || valueWidth != actualValueWidth) { |
| throw new ICUUncheckedIOException("CodePointTrie data header has a different type or value width than required"); |
| } |
| |
| // Get the length values and offsets. |
| dataLength |= ((options & OPTIONS_DATA_LENGTH_MASK) << 4); |
| dataNullOffset |= ((options & OPTIONS_DATA_NULL_OFFSET_MASK) << 8); |
| |
| int highStart = shiftedHighStart << SHIFT_2; |
| |
| // Calculate the actual length, minus the header. |
| int actualLength = indexLength * 2; |
| if (valueWidth == ValueWidth.BITS_16) { |
| actualLength += dataLength * 2; |
| } else if (valueWidth == ValueWidth.BITS_32) { |
| actualLength += dataLength * 4; |
| } else { |
| actualLength += dataLength; |
| } |
| if (bytes.remaining() < actualLength) { |
| throw new ICUUncheckedIOException("Buffer too short for the CodePointTrie data"); |
| } |
| |
| char[] index = ICUBinary.getChars(bytes, indexLength, 0); |
| switch (valueWidth) { |
| case BITS_16: { |
| char[] data16 = ICUBinary.getChars(bytes, dataLength, 0); |
| return type == Type.FAST ? |
| new Fast16(index, data16, highStart, index3NullOffset, dataNullOffset) : |
| new Small16(index, data16, highStart, index3NullOffset, dataNullOffset); |
| } |
| case BITS_32: { |
| int[] data32 = ICUBinary.getInts(bytes, dataLength, 0); |
| return type == Type.FAST ? |
| new Fast32(index, data32, highStart, index3NullOffset, dataNullOffset) : |
| new Small32(index, data32, highStart, index3NullOffset, dataNullOffset); |
| } |
| case BITS_8: { |
| byte[] data8 = ICUBinary.getBytes(bytes, dataLength, 0); |
| return type == Type.FAST ? |
| new Fast8(index, data8, highStart, index3NullOffset, dataNullOffset) : |
| new Small8(index, data8, highStart, index3NullOffset, dataNullOffset); |
| } |
| default: |
| throw new AssertionError("should be unreachable"); |
| } |
| } finally { |
| bytes.order(outerByteOrder); |
| } |
| } |
| |
| /** |
| * Returns the trie type. |
| * |
| * @return the trie type |
| */ |
| public abstract Type getType(); |
| /** |
| * Returns the number of bits in a trie data value. |
| * |
| * @return the number of bits in a trie data value |
| */ |
| public final ValueWidth getValueWidth() { return data.getValueWidth(); } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public int get(int c) { |
| return data.getFromIndex(cpIndex(c)); |
| } |
| |
| /** |
| * Returns a trie value for an ASCII code point, without range checking. |
| * |
| * @param c the input code point; must be U+0000..U+007F |
| * @return The ASCII code point's trie value. |
| */ |
| public final int asciiGet(int c) { |
| return ascii[c]; |
| } |
| |
| private static final int MAX_UNICODE = 0x10ffff; |
| |
| private static final int ASCII_LIMIT = 0x80; |
| |
| private static final int maybeFilterValue(int value, int trieNullValue, int nullValue, |
| ValueFilter filter) { |
| if (value == trieNullValue) { |
| value = nullValue; |
| } else if (filter != null) { |
| value = filter.apply(value); |
| } |
| return value; |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public final boolean getRange(int start, ValueFilter filter, Range range) { |
| if (start < 0 || MAX_UNICODE < start) { |
| return false; |
| } |
| if (start >= highStart) { |
| int di = dataLength - HIGH_VALUE_NEG_DATA_OFFSET; |
| int value = data.getFromIndex(di); |
| if (filter != null) { value = filter.apply(value); } |
| range.set(start, MAX_UNICODE, value); |
| return true; |
| } |
| |
| int nullValue = this.nullValue; |
| if (filter != null) { nullValue = filter.apply(nullValue); } |
| Type type = getType(); |
| |
| int prevI3Block = -1; |
| int prevBlock = -1; |
| int c = start; |
| // Initialize to make compiler happy. Real value when haveValue is true. |
| int trieValue = 0, value = 0; |
| boolean haveValue = false; |
| do { |
| int i3Block; |
| int i3; |
| int i3BlockLength; |
| int dataBlockLength; |
| if (c <= 0xffff && (type == Type.FAST || c <= SMALL_MAX)) { |
| i3Block = 0; |
| i3 = c >> FAST_SHIFT; |
| i3BlockLength = type == Type.FAST ? BMP_INDEX_LENGTH : SMALL_INDEX_LENGTH; |
| dataBlockLength = FAST_DATA_BLOCK_LENGTH; |
| } else { |
| // Use the multi-stage index. |
| int i1 = c >> SHIFT_1; |
| if (type == Type.FAST) { |
| assert(0xffff < c && c < highStart); |
| i1 += BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH; |
| } else { |
| assert(c < highStart && highStart > SMALL_LIMIT); |
| i1 += SMALL_INDEX_LENGTH; |
| } |
| i3Block = index[index[i1] + ((c >> SHIFT_2) & INDEX_2_MASK)]; |
| if (i3Block == prevI3Block && (c - start) >= CP_PER_INDEX_2_ENTRY) { |
| // The index-3 block is the same as the previous one, and filled with value. |
| assert((c & (CP_PER_INDEX_2_ENTRY - 1)) == 0); |
| c += CP_PER_INDEX_2_ENTRY; |
| continue; |
| } |
| prevI3Block = i3Block; |
| if (i3Block == index3NullOffset) { |
| // This is the index-3 null block. |
| if (haveValue) { |
| if (nullValue != value) { |
| range.set(start, c - 1, value); |
| return true; |
| } |
| } else { |
| trieValue = this.nullValue; |
| value = nullValue; |
| haveValue = true; |
| } |
| prevBlock = dataNullOffset; |
| c = (c + CP_PER_INDEX_2_ENTRY) & ~(CP_PER_INDEX_2_ENTRY - 1); |
| continue; |
| } |
| i3 = (c >> SHIFT_3) & INDEX_3_MASK; |
| i3BlockLength = INDEX_3_BLOCK_LENGTH; |
| dataBlockLength = SMALL_DATA_BLOCK_LENGTH; |
| } |
| // Enumerate data blocks for one index-3 block. |
| do { |
| int block; |
| if ((i3Block & 0x8000) == 0) { |
| block = index[i3Block + i3]; |
| } else { |
| // 18-bit indexes stored in groups of 9 entries per 8 indexes. |
| int group = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); |
| int gi = i3 & 7; |
| block = (index[group++] << (2 + (2 * gi))) & 0x30000; |
| block |= index[group + gi]; |
| } |
| if (block == prevBlock && (c - start) >= dataBlockLength) { |
| // The block is the same as the previous one, and filled with value. |
| assert((c & (dataBlockLength - 1)) == 0); |
| c += dataBlockLength; |
| } else { |
| int dataMask = dataBlockLength - 1; |
| prevBlock = block; |
| if (block == dataNullOffset) { |
| // This is the data null block. |
| if (haveValue) { |
| if (nullValue != value) { |
| range.set(start, c - 1, value); |
| return true; |
| } |
| } else { |
| trieValue = this.nullValue; |
| value = nullValue; |
| haveValue = true; |
| } |
| c = (c + dataBlockLength) & ~dataMask; |
| } else { |
| int di = block + (c & dataMask); |
| int trieValue2 = data.getFromIndex(di); |
| if (haveValue) { |
| if (trieValue2 != trieValue) { |
| if (filter == null || |
| maybeFilterValue(trieValue2, this.nullValue, nullValue, |
| filter) != value) { |
| range.set(start, c - 1, value); |
| return true; |
| } |
| trieValue = trieValue2; // may or may not help |
| } |
| } else { |
| trieValue = trieValue2; |
| value = maybeFilterValue(trieValue2, this.nullValue, nullValue, filter); |
| haveValue = true; |
| } |
| while ((++c & dataMask) != 0) { |
| trieValue2 = data.getFromIndex(++di); |
| if (trieValue2 != trieValue) { |
| if (filter == null || |
| maybeFilterValue(trieValue2, this.nullValue, nullValue, |
| filter) != value) { |
| range.set(start, c - 1, value); |
| return true; |
| } |
| trieValue = trieValue2; // may or may not help |
| } |
| } |
| } |
| } |
| } while (++i3 < i3BlockLength); |
| } while (c < highStart); |
| assert(haveValue); |
| int di = dataLength - HIGH_VALUE_NEG_DATA_OFFSET; |
| int highValue = data.getFromIndex(di); |
| if (maybeFilterValue(highValue, this.nullValue, nullValue, filter) != value) { |
| --c; |
| } else { |
| c = MAX_UNICODE; |
| } |
| range.set(start, c, value); |
| return true; |
| } |
| |
| /** |
| * Writes a representation of the trie to the output stream. |
| * Inverse of {@link #fromBinary}. |
| * |
| * @param os the output stream |
| * @return the number of bytes written |
| */ |
| public final int toBinary(OutputStream os) { |
| try { |
| DataOutputStream dos = new DataOutputStream(os); |
| |
| // Write the UCPTrieHeader |
| dos.writeInt(0x54726933); // signature="Tri3" |
| dos.writeChar( // options |
| ((dataLength & 0xf0000) >> 4) | |
| ((dataNullOffset & 0xf0000) >> 8) | |
| (getType().ordinal() << 6) | |
| getValueWidth().ordinal()); |
| dos.writeChar(index.length); |
| dos.writeChar(dataLength); |
| dos.writeChar(index3NullOffset); |
| dos.writeChar(dataNullOffset); |
| dos.writeChar(highStart >> SHIFT_2); // shiftedHighStart |
| int length = 16; // sizeof(UCPTrieHeader) |
| |
| for (char i : index) { dos.writeChar(i); } |
| length += index.length * 2; |
| length += data.write(dos); |
| return length; |
| } catch (IOException e) { |
| throw new ICUUncheckedIOException(e); |
| } |
| } |
| |
| /** @hide draft / provisional / internal are hidden on Android*/ |
| static final int FAST_SHIFT = 6; |
| |
| /** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */ |
| static final int FAST_DATA_BLOCK_LENGTH = 1 << FAST_SHIFT; |
| |
| /** Mask for getting the lower bits for the in-fast-data-block offset. @internal */ |
| private static final int FAST_DATA_MASK = FAST_DATA_BLOCK_LENGTH - 1; |
| |
| /** @hide draft / provisional / internal are hidden on Android*/ |
| private static final int SMALL_MAX = 0xfff; |
| |
| /** |
| * Offset from dataLength (to be subtracted) for fetching the |
| * value returned for out-of-range code points and ill-formed UTF-8/16. |
| * @hide draft / provisional / internal are hidden on Android |
| */ |
| private static final int ERROR_VALUE_NEG_DATA_OFFSET = 1; |
| /** |
| * Offset from dataLength (to be subtracted) for fetching the |
| * value returned for code points highStart..U+10FFFF. |
| * @hide draft / provisional / internal are hidden on Android |
| */ |
| private static final int HIGH_VALUE_NEG_DATA_OFFSET = 2; |
| |
| // ucptrie_impl.h |
| |
| /** The length of the BMP index table. 1024=0x400 */ |
| private static final int BMP_INDEX_LENGTH = 0x10000 >> FAST_SHIFT; |
| |
| static final int SMALL_LIMIT = 0x1000; |
| private static final int SMALL_INDEX_LENGTH = SMALL_LIMIT >> FAST_SHIFT; |
| |
| /** Shift size for getting the index-3 table offset. */ |
| static final int SHIFT_3 = 4; |
| |
| /** Shift size for getting the index-2 table offset. */ |
| private static final int SHIFT_2 = 5 + SHIFT_3; |
| |
| /** Shift size for getting the index-1 table offset. */ |
| private static final int SHIFT_1 = 5 + SHIFT_2; |
| |
| /** |
| * Difference between two shift sizes, |
| * for getting an index-2 offset from an index-3 offset. 5=9-4 |
| */ |
| static final int SHIFT_2_3 = SHIFT_2 - SHIFT_3; |
| |
| /** |
| * Difference between two shift sizes, |
| * for getting an index-1 offset from an index-2 offset. 5=14-9 |
| */ |
| static final int SHIFT_1_2 = SHIFT_1 - SHIFT_2; |
| |
| /** |
| * Number of index-1 entries for the BMP. (4) |
| * This part of the index-1 table is omitted from the serialized form. |
| */ |
| private static final int OMITTED_BMP_INDEX_1_LENGTH = 0x10000 >> SHIFT_1; |
| |
| /** Number of entries in an index-2 block. 32=0x20 */ |
| static final int INDEX_2_BLOCK_LENGTH = 1 << SHIFT_1_2; |
| |
| /** Mask for getting the lower bits for the in-index-2-block offset. */ |
| static final int INDEX_2_MASK = INDEX_2_BLOCK_LENGTH - 1; |
| |
| /** Number of code points per index-2 table entry. 512=0x200 */ |
| static final int CP_PER_INDEX_2_ENTRY = 1 << SHIFT_2; |
| |
| /** Number of entries in an index-3 block. 32=0x20 */ |
| static final int INDEX_3_BLOCK_LENGTH = 1 << SHIFT_2_3; |
| |
| /** Mask for getting the lower bits for the in-index-3-block offset. */ |
| private static final int INDEX_3_MASK = INDEX_3_BLOCK_LENGTH - 1; |
| |
| /** Number of entries in a small data block. 16=0x10 */ |
| static final int SMALL_DATA_BLOCK_LENGTH = 1 << SHIFT_3; |
| |
| /** Mask for getting the lower bits for the in-small-data-block offset. */ |
| static final int SMALL_DATA_MASK = SMALL_DATA_BLOCK_LENGTH - 1; |
| |
| // ucptrie_impl.h: Constants for use with UCPTrieHeader.options. |
| private static final int OPTIONS_DATA_LENGTH_MASK = 0xf000; |
| private static final int OPTIONS_DATA_NULL_OFFSET_MASK = 0xf00; |
| private static final int OPTIONS_RESERVED_MASK = 0x38; |
| private static final int OPTIONS_VALUE_BITS_MASK = 7; |
| /** |
| * Value for index3NullOffset which indicates that there is no index-3 null block. |
| * Bit 15 is unused for this value because this bit is used if the index-3 contains |
| * 18-bit indexes. |
| */ |
| static final int NO_INDEX3_NULL_OFFSET = 0x7fff; |
| static final int NO_DATA_NULL_OFFSET = 0xfffff; |
| |
| private static abstract class Data { |
| abstract ValueWidth getValueWidth(); |
| abstract int getDataLength(); |
| abstract int getFromIndex(int index); |
| abstract int write(DataOutputStream dos) throws IOException; |
| } |
| |
| private static final class Data16 extends Data { |
| char[] array; |
| Data16(char[] a) { array = a; } |
| @Override ValueWidth getValueWidth() { return ValueWidth.BITS_16; } |
| @Override int getDataLength() { return array.length; } |
| @Override int getFromIndex(int index) { return array[index]; } |
| @Override int write(DataOutputStream dos) throws IOException { |
| for (char v : array) { dos.writeChar(v); } |
| return array.length * 2; |
| } |
| } |
| |
| private static final class Data32 extends Data { |
| int[] array; |
| Data32(int[] a) { array = a; } |
| @Override ValueWidth getValueWidth() { return ValueWidth.BITS_32; } |
| @Override int getDataLength() { return array.length; } |
| @Override int getFromIndex(int index) { return array[index]; } |
| @Override int write(DataOutputStream dos) throws IOException { |
| for (int v : array) { dos.writeInt(v); } |
| return array.length * 4; |
| } |
| } |
| |
| private static final class Data8 extends Data { |
| byte[] array; |
| Data8(byte[] a) { array = a; } |
| @Override ValueWidth getValueWidth() { return ValueWidth.BITS_8; } |
| @Override int getDataLength() { return array.length; } |
| @Override int getFromIndex(int index) { return array[index] & 0xff; } |
| @Override int write(DataOutputStream dos) throws IOException { |
| for (byte v : array) { dos.writeByte(v); } |
| return array.length; |
| } |
| } |
| |
| /** @hide draft / provisional / internal are hidden on Android*/ |
| private final int[] ascii; |
| |
| /** @hide draft / provisional / internal are hidden on Android*/ |
| private final char[] index; |
| |
| /** |
| * @deprecated This API is ICU internal only. |
| * @hide draft / provisional / internal are hidden on Android |
| */ |
| @Deprecated |
| protected final Data data; |
| /** |
| * @deprecated This API is ICU internal only. |
| * @hide draft / provisional / internal are hidden on Android |
| */ |
| @Deprecated |
| protected final int dataLength; |
| /** |
| * Start of the last range which ends at U+10FFFF. |
| * @deprecated This API is ICU internal only. |
| * @hide draft / provisional / internal are hidden on Android |
| */ |
| @Deprecated |
| protected final int highStart; |
| |
| /** |
| * Internal index-3 null block offset. |
| * Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block. |
| * @hide draft / provisional / internal are hidden on Android |
| */ |
| private final int index3NullOffset; |
| /** |
| * Internal data null block offset, not shifted. |
| * Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block. |
| * @hide draft / provisional / internal are hidden on Android |
| */ |
| private final int dataNullOffset; |
| /** @hide draft / provisional / internal are hidden on Android*/ |
| private final int nullValue; |
| |
| /** |
| * @deprecated This API is ICU internal only. |
| * @hide draft / provisional / internal are hidden on Android |
| */ |
| @Deprecated |
| protected final int fastIndex(int c) { |
| return index[c >> FAST_SHIFT] + (c & FAST_DATA_MASK); |
| } |
| |
| /** |
| * @deprecated This API is ICU internal only. |
| * @hide draft / provisional / internal are hidden on Android |
| */ |
| @Deprecated |
| protected final int smallIndex(Type type, int c) { |
| // Split into two methods to make this part inline-friendly. |
| // In C, this part is a macro. |
| if (c >= highStart) { |
| return dataLength - HIGH_VALUE_NEG_DATA_OFFSET; |
| } |
| return internalSmallIndex(type, c); |
| } |
| |
| private final int internalSmallIndex(Type type, int c) { |
| int i1 = c >> SHIFT_1; |
| if (type == Type.FAST) { |
| assert(0xffff < c && c < highStart); |
| i1 += BMP_INDEX_LENGTH - OMITTED_BMP_INDEX_1_LENGTH; |
| } else { |
| assert(0 <= c && c < highStart && highStart > SMALL_LIMIT); |
| i1 += SMALL_INDEX_LENGTH; |
| } |
| int i3Block = index[index[i1] + ((c >> SHIFT_2) & INDEX_2_MASK)]; |
| int i3 = (c >> SHIFT_3) & INDEX_3_MASK; |
| int dataBlock; |
| if ((i3Block & 0x8000) == 0) { |
| // 16-bit indexes |
| dataBlock = index[i3Block + i3]; |
| } else { |
| // 18-bit indexes stored in groups of 9 entries per 8 indexes. |
| i3Block = (i3Block & 0x7fff) + (i3 & ~7) + (i3 >> 3); |
| i3 &= 7; |
| dataBlock = (index[i3Block++] << (2 + (2 * i3))) & 0x30000; |
| dataBlock |= index[i3Block + i3]; |
| } |
| return dataBlock + (c & SMALL_DATA_MASK); |
| } |
| |
| /** |
| * @deprecated This API is ICU internal only. |
| * @hide draft / provisional / internal are hidden on Android |
| */ |
| @Deprecated |
| protected abstract int cpIndex(int c); |
| |
| /** |
| * A CodePointTrie with {@link Type#FAST}. |
| * |
| * @hide Only a subset of ICU is exposed in Android |
| */ |
| public static abstract class Fast extends CodePointTrie { |
| private Fast(char[] index, Data data, int highStart, |
| int index3NullOffset, int dataNullOffset) { |
| super(index, data, highStart, index3NullOffset, dataNullOffset); |
| } |
| |
| /** |
| * Creates a trie from its binary form. |
| * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} |
| * with {@link Type#FAST}. |
| * |
| * @param valueWidth selects the number of bits in a data value; this method throws an exception |
| * if the valueWidth does not match the binary data; |
| * use null to accept any data value width |
| * @param bytes a buffer containing the binary data of a CodePointTrie |
| * @return the trie |
| */ |
| public static Fast fromBinary(ValueWidth valueWidth, ByteBuffer bytes) { |
| return (Fast) CodePointTrie.fromBinary(Type.FAST, valueWidth, bytes); |
| } |
| |
| /** |
| * @return {@link Type#FAST} |
| */ |
| @Override |
| public final Type getType() { return Type.FAST; } |
| |
| /** |
| * Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking. |
| * Can be used to look up a value for a UTF-16 code unit if other parts of |
| * the string processing check for surrogates. |
| * |
| * @param c the input code point, must be U+0000..U+FFFF |
| * @return The BMP code point's trie value. |
| */ |
| public abstract int bmpGet(int c); |
| |
| /** |
| * Returns a trie value for a supplementary code point (U+10000..U+10FFFF), |
| * without range checking. |
| * |
| * @param c the input code point, must be U+10000..U+10FFFF |
| * @return The supplementary code point's trie value. |
| */ |
| public abstract int suppGet(int c); |
| |
| /** |
| * @deprecated This API is ICU internal only. |
| * @hide draft / provisional / internal are hidden on Android |
| */ |
| @Deprecated |
| @Override |
| protected final int cpIndex(int c) { |
| if (c >= 0) { |
| if (c <= 0xffff) { |
| return fastIndex(c); |
| } else if (c <= 0x10ffff) { |
| return smallIndex(Type.FAST, c); |
| } |
| } |
| return dataLength - ERROR_VALUE_NEG_DATA_OFFSET; |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public final StringIterator stringIterator(CharSequence s, int sIndex) { |
| return new FastStringIterator(s, sIndex); |
| } |
| |
| private final class FastStringIterator extends StringIterator { |
| private FastStringIterator(CharSequence s, int sIndex) { |
| super(s, sIndex); |
| } |
| |
| @Override |
| public boolean next() { |
| if (sIndex >= s.length()) { |
| return false; |
| } |
| char lead = s.charAt(sIndex++); |
| c = lead; |
| int dataIndex; |
| if (!Character.isSurrogate(lead)) { |
| dataIndex = fastIndex(c); |
| } else { |
| char trail; |
| if (UTF16Plus.isSurrogateLead(lead) && sIndex < s.length() && |
| Character.isLowSurrogate(trail = s.charAt(sIndex))) { |
| ++sIndex; |
| c = Character.toCodePoint(lead, trail); |
| dataIndex = smallIndex(Type.FAST, c); |
| } else { |
| dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET; |
| } |
| } |
| value = data.getFromIndex(dataIndex); |
| return true; |
| } |
| |
| @Override |
| public boolean previous() { |
| if (sIndex <= 0) { |
| return false; |
| } |
| char trail = s.charAt(--sIndex); |
| c = trail; |
| int dataIndex; |
| if (!Character.isSurrogate(trail)) { |
| dataIndex = fastIndex(c); |
| } else { |
| char lead; |
| if (!UTF16Plus.isSurrogateLead(trail) && sIndex > 0 && |
| Character.isHighSurrogate(lead = s.charAt(sIndex - 1))) { |
| --sIndex; |
| c = Character.toCodePoint(lead, trail); |
| dataIndex = smallIndex(Type.FAST, c); |
| } else { |
| dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET; |
| } |
| } |
| value = data.getFromIndex(dataIndex); |
| return true; |
| } |
| } |
| } |
| |
| /** |
| * A CodePointTrie with {@link Type#SMALL}. |
| * |
| * @hide Only a subset of ICU is exposed in Android |
| */ |
| public static abstract class Small extends CodePointTrie { |
| private Small(char[] index, Data data, int highStart, |
| int index3NullOffset, int dataNullOffset) { |
| super(index, data, highStart, index3NullOffset, dataNullOffset); |
| } |
| |
| /** |
| * Creates a trie from its binary form. |
| * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} |
| * with {@link Type#SMALL}. |
| * |
| * @param valueWidth selects the number of bits in a data value; this method throws an exception |
| * if the valueWidth does not match the binary data; |
| * use null to accept any data value width |
| * @param bytes a buffer containing the binary data of a CodePointTrie |
| * @return the trie |
| */ |
| public static Small fromBinary(ValueWidth valueWidth, ByteBuffer bytes) { |
| return (Small) CodePointTrie.fromBinary(Type.SMALL, valueWidth, bytes); |
| } |
| |
| /** |
| * @return {@link Type#SMALL} |
| */ |
| @Override |
| public final Type getType() { return Type.SMALL; } |
| |
| /** |
| * @deprecated This API is ICU internal only. |
| * @hide draft / provisional / internal are hidden on Android |
| */ |
| @Deprecated |
| @Override |
| protected final int cpIndex(int c) { |
| if (c >= 0) { |
| if (c <= SMALL_MAX) { |
| return fastIndex(c); |
| } else if (c <= 0x10ffff) { |
| return smallIndex(Type.SMALL, c); |
| } |
| } |
| return dataLength - ERROR_VALUE_NEG_DATA_OFFSET; |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public final StringIterator stringIterator(CharSequence s, int sIndex) { |
| return new SmallStringIterator(s, sIndex); |
| } |
| |
| private final class SmallStringIterator extends StringIterator { |
| private SmallStringIterator(CharSequence s, int sIndex) { |
| super(s, sIndex); |
| } |
| |
| @Override |
| public boolean next() { |
| if (sIndex >= s.length()) { |
| return false; |
| } |
| char lead = s.charAt(sIndex++); |
| c = lead; |
| int dataIndex; |
| if (!Character.isSurrogate(lead)) { |
| dataIndex = cpIndex(c); |
| } else { |
| char trail; |
| if (UTF16Plus.isSurrogateLead(lead) && sIndex < s.length() && |
| Character.isLowSurrogate(trail = s.charAt(sIndex))) { |
| ++sIndex; |
| c = Character.toCodePoint(lead, trail); |
| dataIndex = smallIndex(Type.SMALL, c); |
| } else { |
| dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET; |
| } |
| } |
| value = data.getFromIndex(dataIndex); |
| return true; |
| } |
| |
| @Override |
| public boolean previous() { |
| if (sIndex <= 0) { |
| return false; |
| } |
| char trail = s.charAt(--sIndex); |
| c = trail; |
| int dataIndex; |
| if (!Character.isSurrogate(trail)) { |
| dataIndex = cpIndex(c); |
| } else { |
| char lead; |
| if (!UTF16Plus.isSurrogateLead(trail) && sIndex > 0 && |
| Character.isHighSurrogate(lead = s.charAt(sIndex - 1))) { |
| --sIndex; |
| c = Character.toCodePoint(lead, trail); |
| dataIndex = smallIndex(Type.SMALL, c); |
| } else { |
| dataIndex = dataLength - ERROR_VALUE_NEG_DATA_OFFSET; |
| } |
| } |
| value = data.getFromIndex(dataIndex); |
| return true; |
| } |
| } |
| } |
| |
| /** |
| * A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_16}. |
| * |
| * @hide Only a subset of ICU is exposed in Android |
| */ |
| public static final class Fast16 extends Fast { |
| private final char[] dataArray; |
| |
| Fast16(char[] index, char[] data16, int highStart, |
| int index3NullOffset, int dataNullOffset) { |
| super(index, new Data16(data16), highStart, index3NullOffset, dataNullOffset); |
| this.dataArray = data16; |
| } |
| |
| /** |
| * Creates a trie from its binary form. |
| * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} |
| * with {@link Type#FAST} and {@link ValueWidth#BITS_16}. |
| * |
| * @param bytes a buffer containing the binary data of a CodePointTrie |
| * @return the trie |
| */ |
| public static Fast16 fromBinary(ByteBuffer bytes) { |
| return (Fast16) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_16, bytes); |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public final int get(int c) { |
| return dataArray[cpIndex(c)]; |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public final int bmpGet(int c) { |
| assert 0 <= c && c <= 0xffff; |
| return dataArray[fastIndex(c)]; |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public final int suppGet(int c) { |
| assert 0x10000 <= c && c <= 0x10ffff; |
| return dataArray[smallIndex(Type.FAST, c)]; |
| } |
| } |
| |
| /** |
| * A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_32}. |
| * |
| * @hide Only a subset of ICU is exposed in Android |
| */ |
| public static final class Fast32 extends Fast { |
| private final int[] dataArray; |
| |
| Fast32(char[] index, int[] data32, int highStart, |
| int index3NullOffset, int dataNullOffset) { |
| super(index, new Data32(data32), highStart, index3NullOffset, dataNullOffset); |
| this.dataArray = data32; |
| } |
| |
| /** |
| * Creates a trie from its binary form. |
| * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} |
| * with {@link Type#FAST} and {@link ValueWidth#BITS_32}. |
| * |
| * @param bytes a buffer containing the binary data of a CodePointTrie |
| * @return the trie |
| */ |
| public static Fast32 fromBinary(ByteBuffer bytes) { |
| return (Fast32) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_32, bytes); |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public final int get(int c) { |
| return dataArray[cpIndex(c)]; |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public final int bmpGet(int c) { |
| assert 0 <= c && c <= 0xffff; |
| return dataArray[fastIndex(c)]; |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public final int suppGet(int c) { |
| assert 0x10000 <= c && c <= 0x10ffff; |
| return dataArray[smallIndex(Type.FAST, c)]; |
| } |
| } |
| |
| /** |
| * A CodePointTrie with {@link Type#FAST} and {@link ValueWidth#BITS_8}. |
| * |
| * @hide Only a subset of ICU is exposed in Android |
| */ |
| public static final class Fast8 extends Fast { |
| private final byte[] dataArray; |
| |
| Fast8(char[] index, byte[] data8, int highStart, |
| int index3NullOffset, int dataNullOffset) { |
| super(index, new Data8(data8), highStart, index3NullOffset, dataNullOffset); |
| this.dataArray = data8; |
| } |
| |
| /** |
| * Creates a trie from its binary form. |
| * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} |
| * with {@link Type#FAST} and {@link ValueWidth#BITS_8}. |
| * |
| * @param bytes a buffer containing the binary data of a CodePointTrie |
| * @return the trie |
| */ |
| public static Fast8 fromBinary(ByteBuffer bytes) { |
| return (Fast8) CodePointTrie.fromBinary(Type.FAST, ValueWidth.BITS_8, bytes); |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public final int get(int c) { |
| return dataArray[cpIndex(c)] & 0xff; |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public final int bmpGet(int c) { |
| assert 0 <= c && c <= 0xffff; |
| return dataArray[fastIndex(c)] & 0xff; |
| } |
| |
| /** |
| * {@inheritDoc} |
| */ |
| @Override |
| public final int suppGet(int c) { |
| assert 0x10000 <= c && c <= 0x10ffff; |
| return dataArray[smallIndex(Type.FAST, c)] & 0xff; |
| } |
| } |
| |
| /** |
| * A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_16}. |
| * |
| * @hide Only a subset of ICU is exposed in Android |
| */ |
| public static final class Small16 extends Small { |
| Small16(char[] index, char[] data16, int highStart, |
| int index3NullOffset, int dataNullOffset) { |
| super(index, new Data16(data16), highStart, index3NullOffset, dataNullOffset); |
| } |
| |
| /** |
| * Creates a trie from its binary form. |
| * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} |
| * with {@link Type#SMALL} and {@link ValueWidth#BITS_16}. |
| * |
| * @param bytes a buffer containing the binary data of a CodePointTrie |
| * @return the trie |
| */ |
| public static Small16 fromBinary(ByteBuffer bytes) { |
| return (Small16) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_16, bytes); |
| } |
| } |
| |
| /** |
| * A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_32}. |
| * |
| * @hide Only a subset of ICU is exposed in Android |
| */ |
| public static final class Small32 extends Small { |
| Small32(char[] index, int[] data32, int highStart, |
| int index3NullOffset, int dataNullOffset) { |
| super(index, new Data32(data32), highStart, index3NullOffset, dataNullOffset); |
| } |
| |
| /** |
| * Creates a trie from its binary form. |
| * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} |
| * with {@link Type#SMALL} and {@link ValueWidth#BITS_32}. |
| * |
| * @param bytes a buffer containing the binary data of a CodePointTrie |
| * @return the trie |
| */ |
| public static Small32 fromBinary(ByteBuffer bytes) { |
| return (Small32) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_32, bytes); |
| } |
| } |
| |
| /** |
| * A CodePointTrie with {@link Type#SMALL} and {@link ValueWidth#BITS_8}. |
| * |
| * @hide Only a subset of ICU is exposed in Android |
| */ |
| public static final class Small8 extends Small { |
| Small8(char[] index, byte[] data8, int highStart, |
| int index3NullOffset, int dataNullOffset) { |
| super(index, new Data8(data8), highStart, index3NullOffset, dataNullOffset); |
| } |
| |
| /** |
| * Creates a trie from its binary form. |
| * Same as {@link CodePointTrie#fromBinary(Type, ValueWidth, ByteBuffer)} |
| * with {@link Type#SMALL} and {@link ValueWidth#BITS_8}. |
| * |
| * @param bytes a buffer containing the binary data of a CodePointTrie |
| * @return the trie |
| */ |
| public static Small8 fromBinary(ByteBuffer bytes) { |
| return (Small8) CodePointTrie.fromBinary(Type.SMALL, ValueWidth.BITS_8, bytes); |
| } |
| } |
| } |