| /* GENERATED SOURCE. DO NOT MODIFY. */ |
| // © 2016 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| package android.icu.impl; |
| |
| import java.io.IOException; |
| import java.text.CharacterIterator; |
| import java.util.Locale; |
| |
| import android.icu.lang.UCharacter; |
| import android.icu.lang.UCharacterCategory; |
| import android.icu.text.BreakIterator; |
| import android.icu.text.Edits; |
| import android.icu.util.ICUUncheckedIOException; |
| import android.icu.util.ULocale; |
| |
| /** |
| * @hide Only a subset of ICU is exposed in Android |
| */ |
| public final class CaseMapImpl { |
| /** |
| * Implementation of UCaseProps.ContextIterator, iterates over a String. |
| * See ustrcase.c/utf16_caseContextIterator(). |
| * @hide Only a subset of ICU is exposed in Android |
| */ |
| public static final class StringContextIterator implements UCaseProps.ContextIterator { |
| /** |
| * Constructor. |
| * @param src String to iterate over. |
| */ |
| public StringContextIterator(CharSequence src) { |
| this.s=src; |
| limit=src.length(); |
| cpStart=cpLimit=index=0; |
| dir=0; |
| } |
| |
| /** |
| * Constructor. |
| * @param src String to iterate over. |
| * @param cpStart Start index of the current code point. |
| * @param cpLimit Limit index of the current code point. |
| */ |
| public StringContextIterator(CharSequence src, int cpStart, int cpLimit) { |
| s = src; |
| index = 0; |
| limit = src.length(); |
| this.cpStart = cpStart; |
| this.cpLimit = cpLimit; |
| dir = 0; |
| } |
| |
| /** |
| * Set the iteration limit for nextCaseMapCP() to an index within the string. |
| * If the limit parameter is negative or past the string, then the |
| * string length is restored as the iteration limit. |
| * |
| * <p>This limit does not affect the next() function which always |
| * iterates to the very end of the string. |
| * |
| * @param lim The iteration limit. |
| */ |
| public void setLimit(int lim) { |
| if(0<=lim && lim<=s.length()) { |
| limit=lim; |
| } else { |
| limit=s.length(); |
| } |
| } |
| |
| /** |
| * Move to the iteration limit without fetching code points up to there. |
| */ |
| public void moveToLimit() { |
| cpStart=cpLimit=limit; |
| } |
| |
| public void moveTo(int i) { |
| cpStart=cpLimit=i; |
| } |
| |
| /** |
| * Iterate forward through the string to fetch the next code point |
| * to be case-mapped, and set the context indexes for it. |
| * |
| * <p>When the iteration limit is reached (and -1 is returned), |
| * getCPStart() will be at the iteration limit. |
| * |
| * <p>Iteration with next() does not affect the position for nextCaseMapCP(). |
| * |
| * @return The next code point to be case-mapped, or <0 when the iteration is done. |
| */ |
| public int nextCaseMapCP() { |
| cpStart=cpLimit; |
| if(cpLimit<limit) { |
| int c=Character.codePointAt(s, cpLimit); |
| cpLimit+=Character.charCount(c); |
| return c; |
| } else { |
| return -1; |
| } |
| } |
| |
| public void setCPStartAndLimit(int s, int l) { |
| cpStart = s; |
| cpLimit = l; |
| dir = 0; |
| } |
| /** |
| * Returns the start of the code point that was last returned |
| * by nextCaseMapCP(). |
| */ |
| public int getCPStart() { |
| return cpStart; |
| } |
| |
| /** |
| * Returns the limit of the code point that was last returned |
| * by nextCaseMapCP(). |
| */ |
| public int getCPLimit() { |
| return cpLimit; |
| } |
| |
| public int getCPLength() { |
| return cpLimit-cpStart; |
| } |
| |
| // implement UCaseProps.ContextIterator |
| // The following code is not used anywhere in this private class |
| @Override |
| public void reset(int direction) { |
| if(direction>0) { |
| /* reset for forward iteration */ |
| dir=1; |
| index=cpLimit; |
| } else if(direction<0) { |
| /* reset for backward iteration */ |
| dir=-1; |
| index=cpStart; |
| } else { |
| // not a valid direction |
| dir=0; |
| index=0; |
| } |
| } |
| |
| @Override |
| public int next() { |
| int c; |
| |
| if(dir>0 && index<s.length()) { |
| c=Character.codePointAt(s, index); |
| index+=Character.charCount(c); |
| return c; |
| } else if(dir<0 && index>0) { |
| c=Character.codePointBefore(s, index); |
| index-=Character.charCount(c); |
| return c; |
| } |
| return -1; |
| } |
| |
| // variables |
| protected CharSequence s; |
| protected int index, limit, cpStart, cpLimit; |
| protected int dir; // 0=initial state >0=forward <0=backward |
| } |
| |
| public static final int TITLECASE_WHOLE_STRING = 0x20; |
| public static final int TITLECASE_SENTENCES = 0x40; |
| |
| /** |
| * Bit mask for the titlecasing iterator options bit field. |
| * Currently only 3 out of 8 values are used: |
| * 0 (words), TITLECASE_WHOLE_STRING, TITLECASE_SENTENCES. |
| * See stringoptions.h. |
| * @hide draft / provisional / internal are hidden on Android |
| */ |
| private static final int TITLECASE_ITERATOR_MASK = 0xe0; |
| |
| public static final int TITLECASE_ADJUST_TO_CASED = 0x400; |
| |
| /** |
| * Bit mask for the titlecasing index adjustment options bit set. |
| * Currently two bits are defined: |
| * TITLECASE_NO_BREAK_ADJUSTMENT, TITLECASE_ADJUST_TO_CASED. |
| * See stringoptions.h. |
| * @hide draft / provisional / internal are hidden on Android |
| */ |
| private static final int TITLECASE_ADJUSTMENT_MASK = 0x600; |
| |
| public static int addTitleAdjustmentOption(int options, int newOption) { |
| int adjOptions = options & TITLECASE_ADJUSTMENT_MASK; |
| if (adjOptions !=0 && adjOptions != newOption) { |
| throw new IllegalArgumentException("multiple titlecasing index adjustment options"); |
| } |
| return options | newOption; |
| } |
| |
| private static final char ACUTE = '\u0301'; |
| |
| private static final int U_GC_M_MASK = |
| (1 << UCharacterCategory.NON_SPACING_MARK) | |
| (1 << UCharacterCategory.COMBINING_SPACING_MARK) | |
| (1 << UCharacterCategory.ENCLOSING_MARK); |
| |
| private static final int LNS = |
| (1 << UCharacterCategory.UPPERCASE_LETTER) | |
| (1 << UCharacterCategory.LOWERCASE_LETTER) | |
| (1 << UCharacterCategory.TITLECASE_LETTER) | |
| // Not MODIFIER_LETTER: We count only cased modifier letters. |
| (1 << UCharacterCategory.OTHER_LETTER) | |
| |
| (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) | |
| (1 << UCharacterCategory.LETTER_NUMBER) | |
| (1 << UCharacterCategory.OTHER_NUMBER) | |
| |
| (1 << UCharacterCategory.MATH_SYMBOL) | |
| (1 << UCharacterCategory.CURRENCY_SYMBOL) | |
| (1 << UCharacterCategory.MODIFIER_SYMBOL) | |
| (1 << UCharacterCategory.OTHER_SYMBOL) | |
| |
| (1 << UCharacterCategory.PRIVATE_USE); |
| |
| private static boolean isLNS(int c) { |
| // Letter, number, symbol, |
| // or a private use code point because those are typically used as letters or numbers. |
| // Consider modifier letters only if they are cased. |
| int gc = UCharacterProperty.INSTANCE.getType(c); |
| return ((1 << gc) & LNS) != 0 || |
| (gc == UCharacterCategory.MODIFIER_LETTER && |
| UCaseProps.INSTANCE.getType(c) != UCaseProps.NONE); |
| } |
| |
| public static int addTitleIteratorOption(int options, int newOption) { |
| int iterOptions = options & TITLECASE_ITERATOR_MASK; |
| if (iterOptions !=0 && iterOptions != newOption) { |
| throw new IllegalArgumentException("multiple titlecasing iterator options"); |
| } |
| return options | newOption; |
| } |
| |
| public static BreakIterator getTitleBreakIterator( |
| Locale locale, int options, BreakIterator iter) { |
| options &= TITLECASE_ITERATOR_MASK; |
| if (options != 0 && iter != null) { |
| throw new IllegalArgumentException( |
| "titlecasing iterator option together with an explicit iterator"); |
| } |
| if (iter == null) { |
| switch (options) { |
| case 0: |
| iter = BreakIterator.getWordInstance(locale); |
| break; |
| case TITLECASE_WHOLE_STRING: |
| iter = new WholeStringBreakIterator(); |
| break; |
| case TITLECASE_SENTENCES: |
| iter = BreakIterator.getSentenceInstance(locale); |
| break; |
| default: |
| throw new IllegalArgumentException("unknown titlecasing iterator option"); |
| } |
| } |
| return iter; |
| } |
| |
| public static BreakIterator getTitleBreakIterator( |
| ULocale locale, int options, BreakIterator iter) { |
| options &= TITLECASE_ITERATOR_MASK; |
| if (options != 0 && iter != null) { |
| throw new IllegalArgumentException( |
| "titlecasing iterator option together with an explicit iterator"); |
| } |
| if (iter == null) { |
| switch (options) { |
| case 0: |
| iter = BreakIterator.getWordInstance(locale); |
| break; |
| case TITLECASE_WHOLE_STRING: |
| iter = new WholeStringBreakIterator(); |
| break; |
| case TITLECASE_SENTENCES: |
| iter = BreakIterator.getSentenceInstance(locale); |
| break; |
| default: |
| throw new IllegalArgumentException("unknown titlecasing iterator option"); |
| } |
| } |
| return iter; |
| } |
| |
| /** |
| * Omit unchanged text when case-mapping with Edits. |
| */ |
| public static final int OMIT_UNCHANGED_TEXT = 0x4000; |
| |
| private static final class WholeStringBreakIterator extends BreakIterator { |
| private int length; |
| |
| private static void notImplemented() { |
| throw new UnsupportedOperationException("should not occur"); |
| } |
| |
| @Override |
| public int first() { |
| return 0; |
| } |
| |
| @Override |
| public int last() { |
| notImplemented(); |
| return 0; |
| } |
| |
| @Override |
| public int next(int n) { |
| notImplemented(); |
| return 0; |
| } |
| |
| @Override |
| public int next() { |
| return length; |
| } |
| |
| @Override |
| public int previous() { |
| notImplemented(); |
| return 0; |
| } |
| |
| @Override |
| public int following(int offset) { |
| notImplemented(); |
| return 0; |
| } |
| |
| @Override |
| public int current() { |
| notImplemented(); |
| return 0; |
| } |
| |
| @Override |
| public CharacterIterator getText() { |
| notImplemented(); |
| return null; |
| } |
| |
| @Override |
| public void setText(CharacterIterator newText) { |
| length = newText.getEndIndex(); |
| } |
| |
| @Override |
| public void setText(CharSequence newText) { |
| length = newText.length(); |
| } |
| |
| @Override |
| public void setText(String newText) { |
| length = newText.length(); |
| } |
| } |
| |
| private static int appendCodePoint(Appendable a, int c) throws IOException { |
| if (c <= Character.MAX_VALUE) { |
| a.append((char)c); |
| return 1; |
| } else { |
| a.append((char)(0xd7c0 + (c >> 10))); |
| a.append((char)(Character.MIN_LOW_SURROGATE + (c & 0x3ff))); |
| return 2; |
| } |
| } |
| |
| /** |
| * Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}. |
| * @throws IOException |
| */ |
| private static void appendResult(int result, Appendable dest, |
| int cpLength, int options, Edits edits) throws IOException { |
| // Decode the result. |
| if (result < 0) { |
| // (not) original code point |
| if (edits != null) { |
| edits.addUnchanged(cpLength); |
| } |
| if ((options & OMIT_UNCHANGED_TEXT) != 0) { |
| return; |
| } |
| appendCodePoint(dest, ~result); |
| } else if (result <= UCaseProps.MAX_STRING_LENGTH) { |
| // The mapping has already been appended to result. |
| if (edits != null) { |
| edits.addReplace(cpLength, result); |
| } |
| } else { |
| // Append the single-code point mapping. |
| int length = appendCodePoint(dest, result); |
| if (edits != null) { |
| edits.addReplace(cpLength, length); |
| } |
| } |
| } |
| |
| private static final void appendUnchanged(CharSequence src, int start, int length, |
| Appendable dest, int options, Edits edits) throws IOException { |
| if (length > 0) { |
| if (edits != null) { |
| edits.addUnchanged(length); |
| } |
| if ((options & OMIT_UNCHANGED_TEXT) != 0) { |
| return; |
| } |
| dest.append(src, start, start + length); |
| } |
| } |
| |
| private static String applyEdits(CharSequence src, StringBuilder replacementChars, Edits edits) { |
| if (!edits.hasChanges()) { |
| return src.toString(); |
| } |
| StringBuilder result = new StringBuilder(src.length() + edits.lengthDelta()); |
| for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) { |
| if (ei.hasChange()) { |
| int i = ei.replacementIndex(); |
| result.append(replacementChars, i, i + ei.newLength()); |
| } else { |
| int i = ei.sourceIndex(); |
| result.append(src, i, i + ei.oldLength()); |
| } |
| } |
| return result.toString(); |
| } |
| |
| private static final Trie2_16 CASE_TRIE = UCaseProps.getTrie(); |
| |
| /** |
| * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account. |
| * caseLocale < 0: Case-folds [srcStart..srcLimit[. |
| */ |
| private static void internalToLower(int caseLocale, int options, |
| CharSequence src, int srcStart, int srcLimit, StringContextIterator iter, |
| Appendable dest, Edits edits) throws IOException { |
| byte[] latinToLower; |
| if (caseLocale == UCaseProps.LOC_ROOT || |
| (caseLocale >= 0 ? |
| !(caseLocale == UCaseProps.LOC_TURKISH || caseLocale == UCaseProps.LOC_LITHUANIAN) : |
| (options & UCaseProps.FOLD_CASE_OPTIONS_MASK) == UCharacter.FOLD_CASE_DEFAULT)) { |
| latinToLower = UCaseProps.LatinCase.TO_LOWER_NORMAL; |
| } else { |
| latinToLower = UCaseProps.LatinCase.TO_LOWER_TR_LT; |
| } |
| int prev = srcStart; |
| int srcIndex = srcStart; |
| outerLoop: |
| for (;;) { |
| // fast path for simple cases |
| char lead; |
| for (;;) { |
| if (srcIndex >= srcLimit) { |
| break outerLoop; |
| } |
| lead = src.charAt(srcIndex); |
| int delta; |
| if (lead < UCaseProps.LatinCase.LONG_S) { |
| byte d = latinToLower[lead]; |
| if (d == UCaseProps.LatinCase.EXC) { break; } |
| ++srcIndex; |
| if (d == 0) { continue; } |
| delta = d; |
| } else if (lead >= 0xd800) { |
| break; // surrogate or higher |
| } else { |
| int props = CASE_TRIE.getFromU16SingleLead(lead); |
| if (UCaseProps.propsHasException(props)) { break; } |
| ++srcIndex; |
| if (!UCaseProps.isUpperOrTitleFromProps(props) || |
| (delta = UCaseProps.getDelta(props)) == 0) { |
| continue; |
| } |
| } |
| lead += delta; |
| appendUnchanged(src, prev, srcIndex - 1 - prev, dest, options, edits); |
| dest.append(lead); |
| if (edits != null) { |
| edits.addReplace(1, 1); |
| } |
| prev = srcIndex; |
| } |
| // slow path |
| int cpStart = srcIndex++; |
| char trail; |
| int c; |
| if (Character.isHighSurrogate(lead) && srcIndex < srcLimit && |
| Character.isLowSurrogate(trail = src.charAt(srcIndex))) { |
| c = Character.toCodePoint(lead, trail); |
| ++srcIndex; |
| } else { |
| c = lead; |
| } |
| // We need to append unchanged text before calling the UCaseProps.toFullXyz() methods |
| // because they will sometimes append their mapping to dest, |
| // and that must be after copying the previous text. |
| appendUnchanged(src, prev, cpStart - prev, dest, options, edits); |
| prev = cpStart; |
| if (caseLocale >= 0) { |
| if (iter == null) { |
| iter = new StringContextIterator(src, cpStart, srcIndex); |
| } else { |
| iter.setCPStartAndLimit(cpStart, srcIndex); |
| } |
| c = UCaseProps.INSTANCE.toFullLower(c, iter, dest, caseLocale); |
| } else { |
| c = UCaseProps.INSTANCE.toFullFolding(c, dest, options); |
| } |
| if (c >= 0) { |
| appendResult(c, dest, srcIndex - cpStart, options, edits); |
| prev = srcIndex; |
| } |
| } |
| appendUnchanged(src, prev, srcIndex - prev, dest, options, edits); |
| } |
| |
| private static void internalToUpper(int caseLocale, int options, |
| CharSequence src, Appendable dest, Edits edits) throws IOException { |
| StringContextIterator iter = null; |
| byte[] latinToUpper; |
| if (caseLocale == UCaseProps.LOC_TURKISH) { |
| latinToUpper = UCaseProps.LatinCase.TO_UPPER_TR; |
| } else { |
| latinToUpper = UCaseProps.LatinCase.TO_UPPER_NORMAL; |
| } |
| int prev = 0; |
| int srcIndex = 0; |
| int srcLength = src.length(); |
| outerLoop: |
| for (;;) { |
| // fast path for simple cases |
| char lead; |
| for (;;) { |
| if (srcIndex >= srcLength) { |
| break outerLoop; |
| } |
| lead = src.charAt(srcIndex); |
| int delta; |
| if (lead < UCaseProps.LatinCase.LONG_S) { |
| byte d = latinToUpper[lead]; |
| if (d == UCaseProps.LatinCase.EXC) { break; } |
| ++srcIndex; |
| if (d == 0) { continue; } |
| delta = d; |
| } else if (lead >= 0xd800) { |
| break; // surrogate or higher |
| } else { |
| int props = CASE_TRIE.getFromU16SingleLead(lead); |
| if (UCaseProps.propsHasException(props)) { break; } |
| ++srcIndex; |
| if (UCaseProps.getTypeFromProps(props) != UCaseProps.LOWER || |
| (delta = UCaseProps.getDelta(props)) == 0) { |
| continue; |
| } |
| } |
| lead += delta; |
| appendUnchanged(src, prev, srcIndex - 1 - prev, dest, options, edits); |
| dest.append(lead); |
| if (edits != null) { |
| edits.addReplace(1, 1); |
| } |
| prev = srcIndex; |
| } |
| // slow path |
| int cpStart = srcIndex++; |
| char trail; |
| int c; |
| if (Character.isHighSurrogate(lead) && srcIndex < srcLength && |
| Character.isLowSurrogate(trail = src.charAt(srcIndex))) { |
| c = Character.toCodePoint(lead, trail); |
| ++srcIndex; |
| } else { |
| c = lead; |
| } |
| if (iter == null) { |
| iter = new StringContextIterator(src, cpStart, srcIndex); |
| } else { |
| iter.setCPStartAndLimit(cpStart, srcIndex); |
| } |
| // We need to append unchanged text before calling UCaseProps.toFullUpper() |
| // because it will sometimes append its mapping to dest, |
| // and that must be after copying the previous text. |
| appendUnchanged(src, prev, cpStart - prev, dest, options, edits); |
| prev = cpStart; |
| c = UCaseProps.INSTANCE.toFullUpper(c, iter, dest, caseLocale); |
| if (c >= 0) { |
| appendResult(c, dest, srcIndex - cpStart, options, edits); |
| prev = srcIndex; |
| } |
| } |
| appendUnchanged(src, prev, srcIndex - prev, dest, options, edits); |
| } |
| |
| public static String toLower(int caseLocale, int options, CharSequence src) { |
| if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { |
| if (src.length() == 0) { |
| return src.toString(); |
| } |
| // Collect and apply only changes. |
| // Good if no or few changes. Bad (slow) if many changes. |
| Edits edits = new Edits(); |
| StringBuilder replacementChars = toLower( |
| caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits); |
| return applyEdits(src, replacementChars, edits); |
| } else { |
| return toLower(caseLocale, options, src, |
| new StringBuilder(src.length()), null).toString(); |
| } |
| } |
| |
| public static <A extends Appendable> A toLower(int caseLocale, int options, |
| CharSequence src, A dest, Edits edits) { |
| try { |
| if (edits != null) { |
| edits.reset(); |
| } |
| internalToLower(caseLocale, options, src, 0, src.length(), null, dest, edits); |
| return dest; |
| } catch (IOException e) { |
| throw new ICUUncheckedIOException(e); |
| } |
| } |
| |
| public static String toUpper(int caseLocale, int options, CharSequence src) { |
| if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { |
| if (src.length() == 0) { |
| return src.toString(); |
| } |
| // Collect and apply only changes. |
| // Good if no or few changes. Bad (slow) if many changes. |
| Edits edits = new Edits(); |
| StringBuilder replacementChars = toUpper( |
| caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits); |
| return applyEdits(src, replacementChars, edits); |
| } else { |
| return toUpper(caseLocale, options, src, |
| new StringBuilder(src.length()), null).toString(); |
| } |
| } |
| |
| public static <A extends Appendable> A toUpper(int caseLocale, int options, |
| CharSequence src, A dest, Edits edits) { |
| try { |
| if (edits != null) { |
| edits.reset(); |
| } |
| if (caseLocale == UCaseProps.LOC_GREEK) { |
| return GreekUpper.toUpper(options, src, dest, edits); |
| } |
| internalToUpper(caseLocale, options, src, dest, edits); |
| return dest; |
| } catch (IOException e) { |
| throw new ICUUncheckedIOException(e); |
| } |
| } |
| |
| public static String toTitle(int caseLocale, int options, BreakIterator iter, CharSequence src) { |
| if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { |
| if (src.length() == 0) { |
| return src.toString(); |
| } |
| // Collect and apply only changes. |
| // Good if no or few changes. Bad (slow) if many changes. |
| Edits edits = new Edits(); |
| StringBuilder replacementChars = toTitle( |
| caseLocale, options | OMIT_UNCHANGED_TEXT, iter, src, |
| new StringBuilder(), edits); |
| return applyEdits(src, replacementChars, edits); |
| } else { |
| return toTitle(caseLocale, options, iter, src, |
| new StringBuilder(src.length()), null).toString(); |
| } |
| } |
| |
| public static <A extends Appendable> A toTitle( |
| int caseLocale, int options, BreakIterator titleIter, |
| CharSequence src, A dest, Edits edits) { |
| try { |
| if (edits != null) { |
| edits.reset(); |
| } |
| |
| /* set up local variables */ |
| StringContextIterator iter = new StringContextIterator(src); |
| int srcLength = src.length(); |
| int prev=0; |
| boolean isFirstIndex=true; |
| |
| /* titlecasing loop */ |
| while(prev<srcLength) { |
| /* find next index where to titlecase */ |
| int index; |
| if(isFirstIndex) { |
| isFirstIndex=false; |
| index=titleIter.first(); |
| } else { |
| index=titleIter.next(); |
| } |
| if(index==BreakIterator.DONE || index>srcLength) { |
| index=srcLength; |
| } |
| |
| /* |
| * Segment [prev..index[ into 3 parts: |
| * a) skipped characters (copy as-is) [prev..titleStart[ |
| * b) first letter (titlecase) [titleStart..titleLimit[ |
| * c) subsequent characters (lowercase) [titleLimit..index[ |
| */ |
| if(prev<index) { |
| // Find and copy skipped characters [prev..titleStart[ |
| int titleStart=prev; |
| iter.setLimit(index); |
| int c=iter.nextCaseMapCP(); |
| if ((options&UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT)==0) { |
| // Adjust the titlecasing index to the next cased character, |
| // or to the next letter/number/symbol/private use. |
| // Stop with titleStart<titleLimit<=index |
| // if there is a character to be titlecased, |
| // or else stop with titleStart==titleLimit==index. |
| boolean toCased = (options&CaseMapImpl.TITLECASE_ADJUST_TO_CASED) != 0; |
| while ((toCased ? |
| UCaseProps.NONE==UCaseProps.INSTANCE.getType(c) : |
| !CaseMapImpl.isLNS(c)) && |
| (c=iter.nextCaseMapCP())>=0) {} |
| // If c<0 then we have only uncased characters in [prev..index[ |
| // and stopped with titleStart==titleLimit==index. |
| titleStart=iter.getCPStart(); |
| if (prev < titleStart) { |
| appendUnchanged(src, prev, titleStart-prev, dest, options, edits); |
| } |
| } |
| |
| if(titleStart<index) { |
| // titlecase c which is from [titleStart..titleLimit[ |
| c = UCaseProps.INSTANCE.toFullTitle(c, iter, dest, caseLocale); |
| appendResult(c, dest, iter.getCPLength(), options, edits); |
| |
| // Special case Dutch IJ titlecasing |
| int titleLimit; |
| if (titleStart+1 < index && caseLocale == UCaseProps.LOC_DUTCH) { |
| if (c < 0) { |
| c = ~c; |
| } |
| if (c == 'I' || c == 'Í') { |
| titleLimit = maybeTitleDutchIJ(src, c, titleStart + 1, index, dest, options, edits); |
| iter.moveTo(titleLimit); |
| } |
| else { |
| titleLimit = iter.getCPLimit(); |
| } |
| } else { |
| titleLimit = iter.getCPLimit(); |
| } |
| |
| // lowercase [titleLimit..index[ |
| if(titleLimit<index) { |
| if((options&UCharacter.TITLECASE_NO_LOWERCASE)==0) { |
| // Normal operation: Lowercase the rest of the word. |
| internalToLower(caseLocale, options, |
| src, titleLimit, index, iter, dest, edits); |
| } else { |
| // Optionally just copy the rest of the word unchanged. |
| appendUnchanged(src, titleLimit, index-titleLimit, dest, options, edits); |
| } |
| iter.moveToLimit(); |
| } |
| } |
| } |
| |
| prev=index; |
| } |
| return dest; |
| } catch (IOException e) { |
| throw new ICUUncheckedIOException(e); |
| } |
| } |
| |
| /** |
| * Input: c is a letter I with or without acute accent. |
| * start is the index in src after c, and is less than segmentLimit. |
| * If a plain i/I is followed by a plain j/J, |
| * or an i/I with acute (precomposed or decomposed) is followed by a j/J with acute, |
| * then we output accordingly. |
| * |
| * @return the src index after the titlecased sequence, or the start index if no Dutch IJ |
| * @throws IOException |
| */ |
| private static <A extends Appendable> int maybeTitleDutchIJ( |
| CharSequence src, int c, int start, int segmentLimit, |
| A dest, int options, Edits edits) throws IOException { |
| assert start < segmentLimit; |
| |
| int index = start; |
| boolean withAcute = false; |
| |
| // If the conditions are met, then the following variables tell us what to output. |
| int unchanged1 = 0; // code units before the j, or the whole sequence (0..3) |
| boolean doTitleJ = false; // true if the j needs to be titlecased |
| int unchanged2 = 0; // after the j (0 or 1) |
| |
| // next character after the first letter |
| char c2 = src.charAt(index++); |
| |
| // Is the first letter an i/I with accent? |
| if (c == 'I') { |
| if (c2 == ACUTE) { |
| withAcute = true; |
| unchanged1 = 1; |
| if (index == segmentLimit) { return start; } |
| c2 = src.charAt(index++); |
| } |
| } else { // Í |
| withAcute = true; |
| } |
| // Is the next character a j/J? |
| if (c2 == 'j') { |
| doTitleJ = true; |
| } else if (c2 == 'J') { |
| ++unchanged1; |
| } else { |
| return start; |
| } |
| // A plain i/I must be followed by a plain j/J. |
| // An i/I with acute must be followed by a j/J with acute. |
| if (withAcute) { |
| if (index == segmentLimit || src.charAt(index++) != ACUTE) { return start; } |
| if (doTitleJ) { |
| unchanged2 = 1; |
| } else { |
| ++unchanged1; |
| } |
| } |
| // There must not be another combining mark. |
| if (index < segmentLimit) { |
| int cp = Character.codePointAt(src, index); |
| int bit = 1 << UCharacter.getType(cp); |
| if ((bit & U_GC_M_MASK) != 0) { |
| return start; |
| } |
| } |
| // Output the rest of the Dutch IJ. |
| appendUnchanged(src, start, unchanged1, dest, options, edits); |
| start += unchanged1; |
| if (doTitleJ) { |
| dest.append('J'); |
| if (edits != null) { |
| edits.addReplace(1, 1); |
| } |
| ++start; |
| } |
| appendUnchanged(src, start, unchanged2, dest, options, edits); |
| assert start + unchanged2 == index; |
| return index; |
| } |
| |
| public static String fold(int options, CharSequence src) { |
| if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { |
| if (src.length() == 0) { |
| return src.toString(); |
| } |
| // Collect and apply only changes. |
| // Good if no or few changes. Bad (slow) if many changes. |
| Edits edits = new Edits(); |
| StringBuilder replacementChars = fold( |
| options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits); |
| return applyEdits(src, replacementChars, edits); |
| } else { |
| return fold(options, src, new StringBuilder(src.length()), null).toString(); |
| } |
| } |
| |
| public static <A extends Appendable> A fold(int options, |
| CharSequence src, A dest, Edits edits) { |
| try { |
| if (edits != null) { |
| edits.reset(); |
| } |
| internalToLower(-1, options, src, 0, src.length(), null, dest, edits); |
| return dest; |
| } catch (IOException e) { |
| throw new ICUUncheckedIOException(e); |
| } |
| } |
| |
| private static final class GreekUpper { |
| // Data bits. |
| private static final int UPPER_MASK = 0x3ff; |
| private static final int HAS_VOWEL = 0x1000; |
| private static final int HAS_YPOGEGRAMMENI = 0x2000; |
| private static final int HAS_ACCENT = 0x4000; |
| private static final int HAS_DIALYTIKA = 0x8000; |
| // Further bits during data building and processing, not stored in the data map. |
| private static final int HAS_COMBINING_DIALYTIKA = 0x10000; |
| private static final int HAS_OTHER_GREEK_DIACRITIC = 0x20000; |
| |
| private static final int HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT; |
| private static final int HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA = |
| HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA; |
| private static final int HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA; |
| |
| // State bits. |
| private static final int AFTER_CASED = 1; |
| private static final int AFTER_VOWEL_WITH_COMBINING_ACCENT = 2; |
| private static final int AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT = 4; |
| |
| // Data generated by prototype code, see |
| // https://icu.unicode.org/design/case/greek-upper |
| // TODO: Move this data into ucase.icu. |
| private static final char[] data0370 = { |
| // U+0370..03FF |
| 0x0370, // Ͱ |
| 0x0370, // ͱ |
| 0x0372, // Ͳ |
| 0x0372, // ͳ |
| 0, |
| 0, |
| 0x0376, // Ͷ |
| 0x0376, // ͷ |
| 0, |
| 0, |
| 0x037A, // ͺ |
| 0x03FD, // ͻ |
| 0x03FE, // ͼ |
| 0x03FF, // ͽ |
| 0, |
| 0x037F, // Ϳ |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ά |
| 0, |
| 0x0395 | HAS_VOWEL | HAS_ACCENT, // Έ |
| 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ή |
| 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ί |
| 0, |
| 0x039F | HAS_VOWEL | HAS_ACCENT, // Ό |
| 0, |
| 0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ύ |
| 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ώ |
| 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ΐ |
| 0x0391 | HAS_VOWEL, // Α |
| 0x0392, // Β |
| 0x0393, // Γ |
| 0x0394, // Δ |
| 0x0395 | HAS_VOWEL, // Ε |
| 0x0396, // Ζ |
| 0x0397 | HAS_VOWEL, // Η |
| 0x0398, // Θ |
| 0x0399 | HAS_VOWEL, // Ι |
| 0x039A, // Κ |
| 0x039B, // Λ |
| 0x039C, // Μ |
| 0x039D, // Ν |
| 0x039E, // Ξ |
| 0x039F | HAS_VOWEL, // Ο |
| 0x03A0, // Π |
| 0x03A1, // Ρ |
| 0, |
| 0x03A3, // Σ |
| 0x03A4, // Τ |
| 0x03A5 | HAS_VOWEL, // Υ |
| 0x03A6, // Φ |
| 0x03A7, // Χ |
| 0x03A8, // Ψ |
| 0x03A9 | HAS_VOWEL, // Ω |
| 0x0399 | HAS_VOWEL | HAS_DIALYTIKA, // Ϊ |
| 0x03A5 | HAS_VOWEL | HAS_DIALYTIKA, // Ϋ |
| 0x0391 | HAS_VOWEL | HAS_ACCENT, // ά |
| 0x0395 | HAS_VOWEL | HAS_ACCENT, // έ |
| 0x0397 | HAS_VOWEL | HAS_ACCENT, // ή |
| 0x0399 | HAS_VOWEL | HAS_ACCENT, // ί |
| 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ΰ |
| 0x0391 | HAS_VOWEL, // α |
| 0x0392, // β |
| 0x0393, // γ |
| 0x0394, // δ |
| 0x0395 | HAS_VOWEL, // ε |
| 0x0396, // ζ |
| 0x0397 | HAS_VOWEL, // η |
| 0x0398, // θ |
| 0x0399 | HAS_VOWEL, // ι |
| 0x039A, // κ |
| 0x039B, // λ |
| 0x039C, // μ |
| 0x039D, // ν |
| 0x039E, // ξ |
| 0x039F | HAS_VOWEL, // ο |
| 0x03A0, // π |
| 0x03A1, // ρ |
| 0x03A3, // ς |
| 0x03A3, // σ |
| 0x03A4, // τ |
| 0x03A5 | HAS_VOWEL, // υ |
| 0x03A6, // φ |
| 0x03A7, // χ |
| 0x03A8, // ψ |
| 0x03A9 | HAS_VOWEL, // ω |
| 0x0399 | HAS_VOWEL | HAS_DIALYTIKA, // ϊ |
| 0x03A5 | HAS_VOWEL | HAS_DIALYTIKA, // ϋ |
| 0x039F | HAS_VOWEL | HAS_ACCENT, // ό |
| 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ύ |
| 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ώ |
| 0x03CF, // Ϗ |
| 0x0392, // ϐ |
| 0x0398, // ϑ |
| 0x03D2, // ϒ |
| 0x03D2 | HAS_ACCENT, // ϓ |
| 0x03D2 | HAS_DIALYTIKA, // ϔ |
| 0x03A6, // ϕ |
| 0x03A0, // ϖ |
| 0x03CF, // ϗ |
| 0x03D8, // Ϙ |
| 0x03D8, // ϙ |
| 0x03DA, // Ϛ |
| 0x03DA, // ϛ |
| 0x03DC, // Ϝ |
| 0x03DC, // ϝ |
| 0x03DE, // Ϟ |
| 0x03DE, // ϟ |
| 0x03E0, // Ϡ |
| 0x03E0, // ϡ |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0x039A, // ϰ |
| 0x03A1, // ϱ |
| 0x03F9, // ϲ |
| 0x037F, // ϳ |
| 0x03F4, // ϴ |
| 0x0395 | HAS_VOWEL, // ϵ |
| 0, |
| 0x03F7, // Ϸ |
| 0x03F7, // ϸ |
| 0x03F9, // Ϲ |
| 0x03FA, // Ϻ |
| 0x03FA, // ϻ |
| 0x03FC, // ϼ |
| 0x03FD, // Ͻ |
| 0x03FE, // Ͼ |
| 0x03FF, // Ͽ |
| }; |
| |
| private static final char[] data1F00 = { |
| // U+1F00..1FFF |
| 0x0391 | HAS_VOWEL, // ἀ |
| 0x0391 | HAS_VOWEL, // ἁ |
| 0x0391 | HAS_VOWEL | HAS_ACCENT, // ἂ |
| 0x0391 | HAS_VOWEL | HAS_ACCENT, // ἃ |
| 0x0391 | HAS_VOWEL | HAS_ACCENT, // ἄ |
| 0x0391 | HAS_VOWEL | HAS_ACCENT, // ἅ |
| 0x0391 | HAS_VOWEL | HAS_ACCENT, // ἆ |
| 0x0391 | HAS_VOWEL | HAS_ACCENT, // ἇ |
| 0x0391 | HAS_VOWEL, // Ἀ |
| 0x0391 | HAS_VOWEL, // Ἁ |
| 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἂ |
| 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἃ |
| 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἄ |
| 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἅ |
| 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἆ |
| 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ἇ |
| 0x0395 | HAS_VOWEL, // ἐ |
| 0x0395 | HAS_VOWEL, // ἑ |
| 0x0395 | HAS_VOWEL | HAS_ACCENT, // ἒ |
| 0x0395 | HAS_VOWEL | HAS_ACCENT, // ἓ |
| 0x0395 | HAS_VOWEL | HAS_ACCENT, // ἔ |
| 0x0395 | HAS_VOWEL | HAS_ACCENT, // ἕ |
| 0, |
| 0, |
| 0x0395 | HAS_VOWEL, // Ἐ |
| 0x0395 | HAS_VOWEL, // Ἑ |
| 0x0395 | HAS_VOWEL | HAS_ACCENT, // Ἒ |
| 0x0395 | HAS_VOWEL | HAS_ACCENT, // Ἓ |
| 0x0395 | HAS_VOWEL | HAS_ACCENT, // Ἔ |
| 0x0395 | HAS_VOWEL | HAS_ACCENT, // Ἕ |
| 0, |
| 0, |
| 0x0397 | HAS_VOWEL, // ἠ |
| 0x0397 | HAS_VOWEL, // ἡ |
| 0x0397 | HAS_VOWEL | HAS_ACCENT, // ἢ |
| 0x0397 | HAS_VOWEL | HAS_ACCENT, // ἣ |
| 0x0397 | HAS_VOWEL | HAS_ACCENT, // ἤ |
| 0x0397 | HAS_VOWEL | HAS_ACCENT, // ἥ |
| 0x0397 | HAS_VOWEL | HAS_ACCENT, // ἦ |
| 0x0397 | HAS_VOWEL | HAS_ACCENT, // ἧ |
| 0x0397 | HAS_VOWEL, // Ἠ |
| 0x0397 | HAS_VOWEL, // Ἡ |
| 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἢ |
| 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἣ |
| 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἤ |
| 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἥ |
| 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἦ |
| 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ἧ |
| 0x0399 | HAS_VOWEL, // ἰ |
| 0x0399 | HAS_VOWEL, // ἱ |
| 0x0399 | HAS_VOWEL | HAS_ACCENT, // ἲ |
| 0x0399 | HAS_VOWEL | HAS_ACCENT, // ἳ |
| 0x0399 | HAS_VOWEL | HAS_ACCENT, // ἴ |
| 0x0399 | HAS_VOWEL | HAS_ACCENT, // ἵ |
| 0x0399 | HAS_VOWEL | HAS_ACCENT, // ἶ |
| 0x0399 | HAS_VOWEL | HAS_ACCENT, // ἷ |
| 0x0399 | HAS_VOWEL, // Ἰ |
| 0x0399 | HAS_VOWEL, // Ἱ |
| 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἲ |
| 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἳ |
| 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἴ |
| 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἵ |
| 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἶ |
| 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ἷ |
| 0x039F | HAS_VOWEL, // ὀ |
| 0x039F | HAS_VOWEL, // ὁ |
| 0x039F | HAS_VOWEL | HAS_ACCENT, // ὂ |
| 0x039F | HAS_VOWEL | HAS_ACCENT, // ὃ |
| 0x039F | HAS_VOWEL | HAS_ACCENT, // ὄ |
| 0x039F | HAS_VOWEL | HAS_ACCENT, // ὅ |
| 0, |
| 0, |
| 0x039F | HAS_VOWEL, // Ὀ |
| 0x039F | HAS_VOWEL, // Ὁ |
| 0x039F | HAS_VOWEL | HAS_ACCENT, // Ὂ |
| 0x039F | HAS_VOWEL | HAS_ACCENT, // Ὃ |
| 0x039F | HAS_VOWEL | HAS_ACCENT, // Ὄ |
| 0x039F | HAS_VOWEL | HAS_ACCENT, // Ὅ |
| 0, |
| 0, |
| 0x03A5 | HAS_VOWEL, // ὐ |
| 0x03A5 | HAS_VOWEL, // ὑ |
| 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὒ |
| 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὓ |
| 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὔ |
| 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὕ |
| 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὖ |
| 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὗ |
| 0, |
| 0x03A5 | HAS_VOWEL, // Ὑ |
| 0, |
| 0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ὓ |
| 0, |
| 0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ὕ |
| 0, |
| 0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ὗ |
| 0x03A9 | HAS_VOWEL, // ὠ |
| 0x03A9 | HAS_VOWEL, // ὡ |
| 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὢ |
| 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὣ |
| 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὤ |
| 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὥ |
| 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὦ |
| 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὧ |
| 0x03A9 | HAS_VOWEL, // Ὠ |
| 0x03A9 | HAS_VOWEL, // Ὡ |
| 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὢ |
| 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὣ |
| 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὤ |
| 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὥ |
| 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὦ |
| 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὧ |
| 0x0391 | HAS_VOWEL | HAS_ACCENT, // ὰ |
| 0x0391 | HAS_VOWEL | HAS_ACCENT, // ά |
| 0x0395 | HAS_VOWEL | HAS_ACCENT, // ὲ |
| 0x0395 | HAS_VOWEL | HAS_ACCENT, // έ |
| 0x0397 | HAS_VOWEL | HAS_ACCENT, // ὴ |
| 0x0397 | HAS_VOWEL | HAS_ACCENT, // ή |
| 0x0399 | HAS_VOWEL | HAS_ACCENT, // ὶ |
| 0x0399 | HAS_VOWEL | HAS_ACCENT, // ί |
| 0x039F | HAS_VOWEL | HAS_ACCENT, // ὸ |
| 0x039F | HAS_VOWEL | HAS_ACCENT, // ό |
| 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ὺ |
| 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ύ |
| 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ὼ |
| 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ώ |
| 0, |
| 0, |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾀ |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾁ |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾂ |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾃ |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾄ |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾅ |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾆ |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾇ |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾈ |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾉ |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾊ |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾋ |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾌ |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾍ |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾎ |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾏ |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾐ |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾑ |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾒ |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾓ |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾔ |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾕ |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾖ |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾗ |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾘ |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾙ |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾚ |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾛ |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾜ |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾝ |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾞ |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾟ |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾠ |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾡ |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾢ |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾣ |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾤ |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾥ |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾦ |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾧ |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾨ |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾩ |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾪ |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾫ |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾬ |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾭ |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾮ |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾯ |
| 0x0391 | HAS_VOWEL, // ᾰ |
| 0x0391 | HAS_VOWEL, // ᾱ |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾲ |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾳ |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾴ |
| 0, |
| 0x0391 | HAS_VOWEL | HAS_ACCENT, // ᾶ |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ᾷ |
| 0x0391 | HAS_VOWEL, // Ᾰ |
| 0x0391 | HAS_VOWEL, // Ᾱ |
| 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ὰ |
| 0x0391 | HAS_VOWEL | HAS_ACCENT, // Ά |
| 0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ᾼ |
| 0, |
| 0x0399 | HAS_VOWEL, // ι |
| 0, |
| 0, |
| 0, |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῂ |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ῃ |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῄ |
| 0, |
| 0x0397 | HAS_VOWEL | HAS_ACCENT, // ῆ |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῇ |
| 0x0395 | HAS_VOWEL | HAS_ACCENT, // Ὲ |
| 0x0395 | HAS_VOWEL | HAS_ACCENT, // Έ |
| 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ὴ |
| 0x0397 | HAS_VOWEL | HAS_ACCENT, // Ή |
| 0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ῌ |
| 0, |
| 0, |
| 0, |
| 0x0399 | HAS_VOWEL, // ῐ |
| 0x0399 | HAS_VOWEL, // ῑ |
| 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ῒ |
| 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ΐ |
| 0, |
| 0, |
| 0x0399 | HAS_VOWEL | HAS_ACCENT, // ῖ |
| 0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ῗ |
| 0x0399 | HAS_VOWEL, // Ῐ |
| 0x0399 | HAS_VOWEL, // Ῑ |
| 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ὶ |
| 0x0399 | HAS_VOWEL | HAS_ACCENT, // Ί |
| 0, |
| 0, |
| 0, |
| 0, |
| 0x03A5 | HAS_VOWEL, // ῠ |
| 0x03A5 | HAS_VOWEL, // ῡ |
| 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ῢ |
| 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ΰ |
| 0x03A1, // ῤ |
| 0x03A1, // ῥ |
| 0x03A5 | HAS_VOWEL | HAS_ACCENT, // ῦ |
| 0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA, // ῧ |
| 0x03A5 | HAS_VOWEL, // Ῠ |
| 0x03A5 | HAS_VOWEL, // Ῡ |
| 0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ὺ |
| 0x03A5 | HAS_VOWEL | HAS_ACCENT, // Ύ |
| 0x03A1, // Ῥ |
| 0, |
| 0, |
| 0, |
| 0, |
| 0, |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῲ |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ῳ |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῴ |
| 0, |
| 0x03A9 | HAS_VOWEL | HAS_ACCENT, // ῶ |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT, // ῷ |
| 0x039F | HAS_VOWEL | HAS_ACCENT, // Ὸ |
| 0x039F | HAS_VOWEL | HAS_ACCENT, // Ό |
| 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ὼ |
| 0x03A9 | HAS_VOWEL | HAS_ACCENT, // Ώ |
| 0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI, // ῼ |
| 0, |
| 0, |
| 0, |
| }; |
| |
| // U+2126 Ohm sign |
| private static final char data2126 = 0x03A9 | HAS_VOWEL; // Ω |
| |
| private static final int getLetterData(int c) { |
| if (c < 0x370 || 0x2126 < c || (0x3ff < c && c < 0x1f00)) { |
| return 0; |
| } else if (c <= 0x3ff) { |
| return data0370[c - 0x370]; |
| } else if (c <= 0x1fff) { |
| return data1F00[c - 0x1f00]; |
| } else if (c == 0x2126) { |
| return data2126; |
| } else { |
| return 0; |
| } |
| } |
| |
| /** |
| * Returns a non-zero value for each of the Greek combining diacritics |
| * listed in The Unicode Standard, version 8, chapter 7.2 Greek, |
| * plus some perispomeni look-alikes. |
| */ |
| private static final int getDiacriticData(int c) { |
| switch (c) { |
| case '\u0300': // varia |
| case '\u0301': // tonos = oxia |
| case '\u0342': // perispomeni |
| case '\u0302': // circumflex can look like perispomeni |
| case '\u0303': // tilde can look like perispomeni |
| case '\u0311': // inverted breve can look like perispomeni |
| return HAS_ACCENT; |
| case '\u0308': // dialytika = diaeresis |
| return HAS_COMBINING_DIALYTIKA; |
| case '\u0344': // dialytika tonos |
| return HAS_COMBINING_DIALYTIKA | HAS_ACCENT; |
| case '\u0345': // ypogegrammeni = iota subscript |
| return HAS_YPOGEGRAMMENI; |
| case '\u0304': // macron |
| case '\u0306': // breve |
| case '\u0313': // comma above |
| case '\u0314': // reversed comma above |
| case '\u0343': // koronis |
| return HAS_OTHER_GREEK_DIACRITIC; |
| default: |
| return 0; |
| } |
| } |
| |
| private static boolean isFollowedByCasedLetter(CharSequence s, int i) { |
| while (i < s.length()) { |
| int c = Character.codePointAt(s, i); |
| int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c); |
| if ((type & UCaseProps.IGNORABLE) != 0) { |
| // Case-ignorable, continue with the loop. |
| i += Character.charCount(c); |
| } else if (type != UCaseProps.NONE) { |
| return true; // Followed by cased letter. |
| } else { |
| return false; // Uncased and not case-ignorable. |
| } |
| } |
| return false; // Not followed by cased letter. |
| } |
| |
| /** |
| * Greek string uppercasing with a state machine. |
| * Probably simpler than a stateless function that has to figure out complex context-before |
| * for each character. |
| * TODO: Try to re-consolidate one way or another with the non-Greek function. |
| * |
| * <p>Keep this consistent with the C++ versions in ustrcase.cpp (UTF-16) and ucasemap.cpp (UTF-8). |
| * @throws IOException |
| */ |
| private static <A extends Appendable> A toUpper(int options, |
| CharSequence src, A dest, Edits edits) throws IOException { |
| int state = 0; |
| for (int i = 0; i < src.length();) { |
| int c = Character.codePointAt(src, i); |
| int nextIndex = i + Character.charCount(c); |
| int nextState = 0; |
| int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c); |
| if ((type & UCaseProps.IGNORABLE) != 0) { |
| // c is case-ignorable |
| nextState |= (state & AFTER_CASED); |
| } else if (type != UCaseProps.NONE) { |
| // c is cased |
| nextState |= AFTER_CASED; |
| } |
| int data = getLetterData(c); |
| if (data > 0) { |
| int upper = data & UPPER_MASK; |
| // Add a dialytika to this iota or ypsilon vowel |
| // if we removed a tonos from the previous vowel, |
| // and that previous vowel did not also have (or gain) a dialytika. |
| // Adding one only to the final vowel in a longer sequence |
| // (which does not occur in normal writing) would require lookahead. |
| // Set the same flag as for preserving an existing dialytika. |
| if ((data & HAS_VOWEL) != 0 |
| && (state & (AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT | AFTER_VOWEL_WITH_COMBINING_ACCENT)) != 0 |
| && (upper == 'Ι' || upper == 'Υ')) { |
| data |= (state & AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT) != 0 ? HAS_DIALYTIKA |
| : HAS_COMBINING_DIALYTIKA; |
| } |
| int numYpogegrammeni = 0; // Map each one to a trailing, spacing, capital iota. |
| if ((data & HAS_YPOGEGRAMMENI) != 0) { |
| numYpogegrammeni = 1; |
| } |
| final boolean hasPrecomposedAccent = (data & HAS_ACCENT) != 0; |
| // Skip combining diacritics after this Greek letter. |
| while (nextIndex < src.length()) { |
| int diacriticData = getDiacriticData(src.charAt(nextIndex)); |
| if (diacriticData != 0) { |
| data |= diacriticData; |
| if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) { |
| ++numYpogegrammeni; |
| } |
| ++nextIndex; |
| } else { |
| break; // not a Greek diacritic |
| } |
| } |
| if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) { |
| nextState |= hasPrecomposedAccent ? AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT |
| : AFTER_VOWEL_WITH_COMBINING_ACCENT; |
| } |
| // Map according to Greek rules. |
| boolean addTonos = false; |
| if (upper == 'Η' && |
| (data & HAS_ACCENT) != 0 && |
| numYpogegrammeni == 0 && |
| (state & AFTER_CASED) == 0 && |
| !isFollowedByCasedLetter(src, nextIndex)) { |
| // Keep disjunctive "or" with (only) a tonos. |
| // We use the same "word boundary" conditions as for the Final_Sigma test. |
| if (hasPrecomposedAccent) { |
| upper = 'Ή'; // Preserve the precomposed form. |
| } else { |
| addTonos = true; |
| } |
| } else if ((data & HAS_DIALYTIKA) != 0) { |
| // Preserve a vowel with dialytika in precomposed form if it exists. |
| if (upper == 'Ι') { |
| upper = 'Ϊ'; |
| data &= ~HAS_EITHER_DIALYTIKA; |
| } else if (upper == 'Υ') { |
| upper = 'Ϋ'; |
| data &= ~HAS_EITHER_DIALYTIKA; |
| } |
| } |
| |
| boolean change; |
| if (edits == null && (options & OMIT_UNCHANGED_TEXT) == 0) { |
| change = true; // common, simple usage |
| } else { |
| // Find out first whether we are changing the text. |
| change = src.charAt(i) != upper || numYpogegrammeni > 0; |
| int i2 = i + 1; |
| if ((data & HAS_EITHER_DIALYTIKA) != 0) { |
| change |= i2 >= nextIndex || src.charAt(i2) != 0x308; |
| ++i2; |
| } |
| if (addTonos) { |
| change |= i2 >= nextIndex || src.charAt(i2) != 0x301; |
| ++i2; |
| } |
| int oldLength = nextIndex - i; |
| int newLength = (i2 - i) + numYpogegrammeni; |
| change |= oldLength != newLength; |
| if (change) { |
| if (edits != null) { |
| edits.addReplace(oldLength, newLength); |
| } |
| } else { |
| if (edits != null) { |
| edits.addUnchanged(oldLength); |
| } |
| // Write unchanged text? |
| change = (options & OMIT_UNCHANGED_TEXT) == 0; |
| } |
| } |
| |
| if (change) { |
| dest.append((char)upper); |
| if ((data & HAS_EITHER_DIALYTIKA) != 0) { |
| dest.append('\u0308'); // restore or add a dialytika |
| } |
| if (addTonos) { |
| dest.append('\u0301'); |
| } |
| while (numYpogegrammeni > 0) { |
| dest.append('Ι'); |
| --numYpogegrammeni; |
| } |
| } |
| } else { |
| c = UCaseProps.INSTANCE.toFullUpper(c, null, dest, UCaseProps.LOC_GREEK); |
| appendResult(c, dest, nextIndex - i, options, edits); |
| } |
| i = nextIndex; |
| state = nextState; |
| } |
| return dest; |
| } |
| } |
| } |