crates/icu_casemap/src/internals.rs - platform/external/rust/android-crates-io - Git at Google

 // This file is part of ICU4X. For terms of use, please see the file
 // called LICENSE at the top level of the ICU4X source tree
 // (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

 //! This module contains most of the actual algorithms for case mapping.
 //!
 //! Primarily, it implements methods on `CaseMap`, which contains the data model.

 use crate::greek_to_me::{
     self, GreekCombiningCharacterSequenceDiacritics, GreekDiacritics, GreekPrecomposedLetterData,
     GreekVowel,
 };
 use crate::provider::data::{DotType, MappingKind};
 use crate::provider::exception_helpers::ExceptionSlot;
 use crate::provider::{CaseMap, CaseMapUnfold};
 use crate::set::ClosureSink;
 use crate::titlecase::TrailingCase;
 use core::fmt;
 use icu_locale_core::LanguageIdentifier;
 use writeable::Writeable;

 const ACUTE: char = '\u{301}';

 // Used to control the behavior of CaseMapper::fold.
 // Currently only used to decide whether to use Turkic (T) mappings for dotted/dotless i.
 #[derive(Copy, Clone, Default)]
 pub(crate) struct FoldOptions {
     exclude_special_i: bool,
 }

 impl FoldOptions {
     pub fn with_turkic_mappings() -> Self {
         Self {
             exclude_special_i: true,
         }
     }
 }

 /// Helper type that wraps a writeable in a prefix string
 pub(crate) struct StringAndWriteable<'a, W> {
     pub string: &'a str,
     pub writeable: W,
 }

 impl<Wr: Writeable> Writeable for StringAndWriteable<'_, Wr> {
     fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
         sink.write_str(self.string)?;
         self.writeable.write_to(sink)
     }
     fn writeable_length_hint(&self) -> writeable::LengthHint {
         writeable::LengthHint::exact(self.string.len()) + self.writeable.writeable_length_hint()
     }
 }

 pub(crate) struct FullCaseWriteable<'a, const IS_TITLE_CONTEXT: bool> {
     data: &'a CaseMap<'a>,
     src: &'a str,
     locale: CaseMapLocale,
     mapping: MappingKind,
     titlecase_tail_casing: TrailingCase,
 }

 impl<const IS_TITLE_CONTEXT: bool> Writeable for FullCaseWriteable<'_, IS_TITLE_CONTEXT> {
     #[allow(clippy::indexing_slicing)] // last_uncopied_index and i are known to be in bounds
     fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
         let src = self.src;
         let mut mapping = self.mapping;
         let mut iter = src.char_indices();
         for (i, c) in &mut iter {
             let context = ContextIterator::new(&src[..i], &src[i..]);
             self.data
                 .full_helper::<IS_TITLE_CONTEXT, W>(c, context, self.locale, mapping, sink)?;
             if IS_TITLE_CONTEXT {
                 if self.titlecase_tail_casing == TrailingCase::Lower {
                     mapping = MappingKind::Lower;
                 } else {
                     break;
                 }
             }
         }
         // Write the rest of the string
         if IS_TITLE_CONTEXT && self.titlecase_tail_casing == TrailingCase::Unchanged {
             sink.write_str(iter.as_str())?;
         }
         Ok(())
     }
     fn writeable_length_hint(&self) -> writeable::LengthHint {
         writeable::LengthHint::at_least(self.src.len())
     }
 }

 impl<'data> CaseMap<'data> {
     fn simple_helper(&self, c: char, kind: MappingKind) -> char {
         let data = self.lookup_data(c);
         if !data.has_exception() {
             if data.is_relevant_to(kind) {
                 let folded = c as i32 + data.delta() as i32;
                 // GIGO: delta should be valid
                 char::from_u32(folded as u32).unwrap_or(c)
             } else {
                 c
             }
         } else {
             let idx = data.exception_index();
             let exception = self.exceptions.get(idx);
             if data.is_relevant_to(kind) {
                 if let Some(simple) = exception.get_simple_case_slot_for(c) {
                     return simple;
                 }
             }
             exception.slot_char_for_kind(kind).unwrap_or(c)
         }
     }

     // Returns the lowercase mapping of the given `char`.
     #[inline]
     pub(crate) fn simple_lower(&self, c: char) -> char {
         self.simple_helper(c, MappingKind::Lower)
     }

     // Returns the uppercase mapping of the given `char`.
     #[inline]
     pub(crate) fn simple_upper(&self, c: char) -> char {
         self.simple_helper(c, MappingKind::Upper)
     }

     // Returns the titlecase mapping of the given `char`.
     #[inline]
     pub(crate) fn simple_title(&self, c: char) -> char {
         self.simple_helper(c, MappingKind::Title)
     }

     // Return the simple case folding mapping of the given char.
     #[inline]
     pub(crate) fn simple_fold(&self, c: char, options: FoldOptions) -> char {
         let data = self.lookup_data(c);
         if !data.has_exception() {
             if data.is_upper_or_title() {
                 let folded = c as i32 + data.delta() as i32;
                 // GIGO: delta should be valid
                 char::from_u32(folded as u32).unwrap_or(c)
             } else {
                 c
             }
         } else {
             // TODO: if we move conditional fold and no_simple_case_folding into
             // simple_helper, this function can just call simple_helper.
             let idx = data.exception_index();
             let exception = self.exceptions.get(idx);
             if exception.bits.has_conditional_fold() {
                 self.simple_fold_special_case(c, options)
             } else if exception.bits.no_simple_case_folding() {
                 c
             } else if data.is_upper_or_title() && exception.has_slot(ExceptionSlot::Delta) {
                 // unwrap_or case should never happen but best to avoid panics
                 exception.get_simple_case_slot_for(c).unwrap_or('\0')
             } else if let Some(slot_char) = exception.slot_char_for_kind(MappingKind::Fold) {
                 slot_char
             } else {
                 c
             }
         }
     }

     fn dot_type(&self, c: char) -> DotType {
         let data = self.lookup_data(c);
         if !data.has_exception() {
             data.dot_type()
         } else {
             let idx = data.exception_index();
             self.exceptions.get(idx).bits.dot_type()
         }
     }

     // Returns true if this code point is is case-sensitive.
     // This is not currently exposed.
     #[allow(dead_code)]
     fn is_case_sensitive(&self, c: char) -> bool {
         let data = self.lookup_data(c);
         if !data.has_exception() {
             data.is_sensitive()
         } else {
             let idx = data.exception_index();
             self.exceptions.get(idx).bits.is_sensitive()
         }
     }

     /// Returns whether the character is cased
     pub(crate) fn is_cased(&self, c: char) -> bool {
         self.lookup_data(c).case_type().is_some()
     }

     #[inline(always)]
     // IS_TITLE_CONTEXT must be true if kind is MappingKind::Title
     // The kind may be a different kind with IS_TITLE_CONTEXT still true because
     // titlecasing a segment involves switching to lowercase later
     fn full_helper<const IS_TITLE_CONTEXT: bool, W: fmt::Write + ?Sized>(
         &self,
         c: char,
         context: ContextIterator,
         locale: CaseMapLocale,
         kind: MappingKind,
         sink: &mut W,
     ) -> fmt::Result {
         // If using a title mapping IS_TITLE_CONTEXT must be true
         debug_assert!(kind != MappingKind::Title || IS_TITLE_CONTEXT);
         // In a title context, kind MUST be Title or Lower
         debug_assert!(
             !IS_TITLE_CONTEXT || kind == MappingKind::Title || kind == MappingKind::Lower
         );

         // ICU4C's non-standard extension for Dutch IJ titlecasing
         // handled here instead of in full_lower_special_case because J does not have conditional
         // special casemapping.
         if IS_TITLE_CONTEXT && locale == CaseMapLocale::Dutch && kind == MappingKind::Lower {
             // When titlecasing, a J found immediately after an I at the beginning of the segment
             // should also uppercase. They are both allowed to have an acute accent but it must
             // be present on both letters or neither. They may not have any other combining marks.
             if (c == 'j' || c == 'J') && context.is_dutch_ij_pair_at_beginning(self) {
                 return sink.write_char('J');
             }
         }

         // ICU4C's non-standard extension for Greek uppercasing:
         // https://icu.unicode.org/design/case/greek-upper.
         // Effectively removes Greek accents from Greek vowels during uppercasing,
         // whilst attempting to preserve additional marks like the dialytika (diæresis)
         // and ypogegrammeni (combining small iota).
         if !IS_TITLE_CONTEXT && locale == CaseMapLocale::Greek && kind == MappingKind::Upper {
             // Remove all combining diacritics on a Greek letter.
             // Ypogegrammeni is not an accent mark and is handled by regular casemapping (it turns into
             // a capital iota).
             // The dialytika is removed here, but it may be added again when the base letter is being processed.
             if greek_to_me::is_greek_diacritic_except_ypogegrammeni(c)
                 && context.preceded_by_greek_letter()
             {
                 return Ok(());
             }
             let data = greek_to_me::get_data(c);
             // Check if the character is a Greek vowel
             match data {
                 Some(GreekPrecomposedLetterData::Vowel(vowel, mut precomposed_diacritics)) => {
                     // Get the diacritics on the character itself, and add any further combining diacritics
                     // from the context.
                     let mut diacritics = context.add_greek_diacritics(precomposed_diacritics);
                     // If the previous vowel had an accent (which would be removed) but no dialytika,
                     // and this is an iota or upsilon, add a dialytika since it is necessary to disambiguate
                     // the now-unaccented adjacent vowels from a digraph/diphthong.
                     // Use a precomposed dialytika if the accent was precomposed, and a combining dialytika
                     // if the accent was combining, so as to map NFD to NFD and NFC to NFC.
                     if !diacritics.dialytika && (vowel == GreekVowel::Ι || vowel == GreekVowel::Υ)
                     {
                         if let Some(preceding_vowel) = context.preceding_greek_vowel_diacritics() {
                             if !preceding_vowel.combining.dialytika
                                 && !preceding_vowel.precomposed.dialytika
                             {
                                 if preceding_vowel.combining.accented {
                                     diacritics.dialytika = true;
                                 } else {
                                     precomposed_diacritics.dialytika =
                                         preceding_vowel.precomposed.accented;
                                 }
                             }
                         }
                     }
                     // Write the base of the uppercased combining character sequence.
                     // In most branches this is [`upper_base`], i.e., the uppercase letter with all accents removed.
                     // In some branches the base has a precomposed diacritic.
                     // In the case of the Greek disjunctive "or", a combining tonos may also be written.
                     match vowel {
                         GreekVowel::Η => {
                             // The letter η (eta) is allowed to retain a tonos when it is form a single-letter word to distinguish
                             // the feminine definite article ἡ (monotonic η) from the disjunctive "or" ἤ (monotonic ή).
                             //
                             // A lone η with an accent other than the oxia/tonos is not expected,
                             // so there is no need to special-case the oxia/tonos.
                             // The ancient ᾖ (exist.PRS.SUBJ.3s) has a iota subscript as well as the circumflex,
                             // so it would not be given an oxia/tonos under this rule, and the subjunctive is formed with a particle
                             // (e.g. να είναι) since Byzantine times anyway.
                             if diacritics.accented
                                 && !context.followed_by_cased_letter(self)
                                 && !context.preceded_by_cased_letter(self)
                                 && !diacritics.ypogegrammeni
                             {
                                 if precomposed_diacritics.accented {
                                     sink.write_char('Ή')?;
                                 } else {
                                     sink.write_char('Η')?;
                                     sink.write_char(greek_to_me::TONOS)?;
                                 }
                             } else {
                                 sink.write_char('Η')?;
                             }
                         }
                         GreekVowel::Ι => sink.write_char(if precomposed_diacritics.dialytika {
                             diacritics.dialytika = false;
                             'Ϊ'
                         } else {
                             vowel.into()
                         })?,
                         GreekVowel::Υ => sink.write_char(if precomposed_diacritics.dialytika {
                             diacritics.dialytika = false;
                             'Ϋ'
                         } else {
                             vowel.into()
                         })?,
                         _ => sink.write_char(vowel.into())?,
                     };
                     if diacritics.dialytika {
                         sink.write_char(greek_to_me::DIALYTIKA)?;
                     }
                     if precomposed_diacritics.ypogegrammeni {
                         sink.write_char('Ι')?;
                     }

                     return Ok(());
                 }
                 // Rho might have breathing marks, we handle it specially
                 // to remove them
                 Some(GreekPrecomposedLetterData::Consonant(true)) => {
                     sink.write_char(greek_to_me::CAPITAL_RHO)?;
                     return Ok(());
                 }
                 _ => (),
             }
         }

         let data = self.lookup_data(c);
         if !data.has_exception() {
             if data.is_relevant_to(kind) {
                 let mapped = c as i32 + data.delta() as i32;
                 // GIGO: delta should be valid
                 let mapped = char::from_u32(mapped as u32).unwrap_or(c);
                 sink.write_char(mapped)
             } else {
                 sink.write_char(c)
             }
         } else {
             let idx = data.exception_index();
             let exception = self.exceptions.get(idx);
             if exception.bits.has_conditional_special() {
                 if let Some(special) = match kind {
                     MappingKind::Lower => {
                         self.full_lower_special_case::<IS_TITLE_CONTEXT>(c, context, locale)
                     }
                     MappingKind::Fold => self.full_fold_special_case(c, context, locale),
                     MappingKind::Upper | MappingKind::Title => self
                         .full_upper_or_title_special_case::<IS_TITLE_CONTEXT>(c, context, locale),
                 } {
                     return special.write_to(sink);
                 }
             }
             if let Some(mapped_string) = exception.get_fullmappings_slot_for_kind(kind) {
                 if !mapped_string.is_empty() {
                     return sink.write_str(mapped_string);
                 }
             }

             if kind == MappingKind::Fold && exception.bits.no_simple_case_folding() {
                 return sink.write_char(c);
             }

             if data.is_relevant_to(kind) {
                 if let Some(simple) = exception.get_simple_case_slot_for(c) {
                     return sink.write_char(simple);
                 }
             }

             if let Some(slot_char) = exception.slot_char_for_kind(kind) {
                 sink.write_char(slot_char)
             } else {
                 sink.write_char(c)
             }
         }
     }

     // These constants are used for hardcoded locale-specific foldings.
     const I_DOT: &'static str = "\u{69}\u{307}";
     const J_DOT: &'static str = "\u{6a}\u{307}";
     const I_OGONEK_DOT: &'static str = "\u{12f}\u{307}";
     const I_DOT_GRAVE: &'static str = "\u{69}\u{307}\u{300}";
     const I_DOT_ACUTE: &'static str = "\u{69}\u{307}\u{301}";
     const I_DOT_TILDE: &'static str = "\u{69}\u{307}\u{303}";

     // Special case folding mappings, hardcoded.
     // This handles the special Turkic mappings for uppercase I and dotted uppercase I
     // For non-Turkic languages, this mapping is normally not used.
     // For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
     fn simple_fold_special_case(&self, c: char, options: FoldOptions) -> char {
         debug_assert!(c == '\u{49}' || c == '\u{130}');
         let is_turkic = options.exclude_special_i;
         match (c, is_turkic) {
             // Turkic mappings
             ('\u{49}', true) => '\u{131}', // 0049; T; 0131; # LATIN CAPITAL LETTER I
             ('\u{130}', true) => '\u{69}', /* 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE */

             // Default mappings
             ('\u{49}', false) => '\u{69}', // 0049; C; 0069; # LATIN CAPITAL LETTER I

             // There is no simple case folding for U+130.
             (c, _) => c,
         }
     }

     fn full_lower_special_case<const IS_TITLE_CONTEXT: bool>(
         &self,
         c: char,
         context: ContextIterator,
         locale: CaseMapLocale,
     ) -> Option<FullMappingResult> {
         if locale == CaseMapLocale::Lithuanian {
             // Lithuanian retains the dot in a lowercase i when followed by accents.
             // Introduce an explicit dot above when lowercasing capital I's and J's
             // whenever there are more accents above (of the accents used in
             // Lithuanian: grave, acute, and tilde above).

             // Check for accents above I, J, and I-with-ogonek.
             if c == 'I' && context.followed_by_more_above(self) {
                 return Some(FullMappingResult::String(Self::I_DOT));
             } else if c == 'J' && context.followed_by_more_above(self) {
                 return Some(FullMappingResult::String(Self::J_DOT));
             } else if c == '\u{12e}' && context.followed_by_more_above(self) {
                 return Some(FullMappingResult::String(Self::I_OGONEK_DOT));
             }

             // These characters are precomposed with accents above, so we don't
             // have to look at the context.
             if c == '\u{cc}' {
                 return Some(FullMappingResult::String(Self::I_DOT_GRAVE));
             } else if c == '\u{cd}' {
                 return Some(FullMappingResult::String(Self::I_DOT_ACUTE));
             } else if c == '\u{128}' {
                 return Some(FullMappingResult::String(Self::I_DOT_TILDE));
             }
         }

         if locale == CaseMapLocale::Turkish {
             if c == '\u{130}' {
                 // I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
                 return Some(FullMappingResult::CodePoint('i'));
             } else if c == '\u{307}' && context.preceded_by_capital_i::<IS_TITLE_CONTEXT>(self) {
                 // When lowercasing, remove dot_above in the sequence I + dot_above,
                 // which will turn into i. This matches the behaviour of the
                 // canonically equivalent I-dot_above.
                 //
                 // In a titlecase context, we do not want to apply this behavior to cases where the I
                 // was at the beginning of the string, as that I and its marks should be handled by the
                 // uppercasing rules (which ignore it, see below)

                 return Some(FullMappingResult::Remove);
             } else if c == 'I' && !context.followed_by_dot_above(self) {
                 // When lowercasing, unless an I is before a dot_above, it turns
                 // into a dotless i.
                 return Some(FullMappingResult::CodePoint('\u{131}'));
             }
         }

         if c == '\u{130}' {
             // Preserve canonical equivalence for I with dot. Turkic is handled above.
             return Some(FullMappingResult::String(Self::I_DOT));
         }

         if c == '\u{3a3}'
             && context.preceded_by_cased_letter(self)
             && !context.followed_by_cased_letter(self)
         {
             // Greek capital sigman maps depending on surrounding cased letters.
             return Some(FullMappingResult::CodePoint('\u{3c2}'));
         }

         // No relevant special case mapping. Use a normal mapping.
         None
     }

     fn full_upper_or_title_special_case<const IS_TITLE_CONTEXT: bool>(
         &self,
         c: char,
         context: ContextIterator,
         locale: CaseMapLocale,
     ) -> Option<FullMappingResult> {
         if locale == CaseMapLocale::Turkish && c == 'i' {
             // In Turkic languages, i turns into a dotted capital I.
             return Some(FullMappingResult::CodePoint('\u{130}'));
         }
         if locale == CaseMapLocale::Lithuanian
             && c == '\u{307}'
             && context.preceded_by_soft_dotted(self)
         {
             // Lithuanian retains the dot in a lowercase i when followed by accents.
             // Remove dot_above after i with upper or titlecase.
             return Some(FullMappingResult::Remove);
         }
         // ICU4C's non-standard extension for Armenian ligature ech-yiwn.
         if c == '\u{587}' {
             return match (locale, IS_TITLE_CONTEXT) {
                 (CaseMapLocale::Armenian, false) => Some(FullMappingResult::String("ԵՎ")),
                 (CaseMapLocale::Armenian, true) => Some(FullMappingResult::String("Եվ")),
                 (_, false) => Some(FullMappingResult::String("ԵՒ")),
                 (_, true) => Some(FullMappingResult::String("Եւ")),
             };
         }
         None
     }

     fn full_fold_special_case(
         &self,
         c: char,
         _context: ContextIterator,
         locale: CaseMapLocale,
     ) -> Option<FullMappingResult> {
         let is_turkic = locale == CaseMapLocale::Turkish;
         match (c, is_turkic) {
             // Turkic mappings
             ('\u{49}', true) => Some(FullMappingResult::CodePoint('\u{131}')),
             ('\u{130}', true) => Some(FullMappingResult::CodePoint('\u{69}')),

             // Default mappings
             ('\u{49}', false) => Some(FullMappingResult::CodePoint('\u{69}')),
             ('\u{130}', false) => Some(FullMappingResult::String(Self::I_DOT)),
             (_, _) => None,
         }
     }
     /// IS_TITLE_CONTEXT is true iff the mapping is MappingKind::Title, primarily exists
     /// to avoid perf impacts on other more common modes of operation
     ///
     /// titlecase_tail_casing is only read in IS_TITLE_CONTEXT
     pub(crate) fn full_helper_writeable<'a: 'data, const IS_TITLE_CONTEXT: bool>(
         &'a self,
         src: &'a str,
         locale: CaseMapLocale,
         mapping: MappingKind,
         titlecase_tail_casing: TrailingCase,
     ) -> FullCaseWriteable<'a, IS_TITLE_CONTEXT> {
         // Ensure that they are either both true or both false, i.e. an XNOR operation
         debug_assert!(!(IS_TITLE_CONTEXT ^ (mapping == MappingKind::Title)));

         FullCaseWriteable::<IS_TITLE_CONTEXT> {
             data: self,
             src,
             locale,
             mapping,
             titlecase_tail_casing,
         }
     }

     /// Adds all simple case mappings and the full case folding for `c` to `set`.
     /// Also adds special case closure mappings.
     /// The character itself is not added.
     /// For example, the mappings
     /// - for s include long s
     /// - for sharp s include ss
     /// - for k include the Kelvin sign
     pub(crate) fn add_case_closure_to<S: ClosureSink>(&self, c: char, set: &mut S) {
         // Hardcode the case closure of i and its relatives and ignore the
         // data file data for these characters.
         // The Turkic dotless i and dotted I with their case mapping conditions
         // and case folding option make the related characters behave specially.
         // This code matches their closure behavior to their case folding behavior.
         match c {
             // Regular i and I are in one equivalence class.
             '\u{49}' => {
                 set.add_char('\u{69}');
                 return;
             }
             '\u{69}' => {
                 set.add_char('\u{49}');
                 return;
             }

             // Dotted I is in a class with <0069 0307> (for canonical equivalence with <0049 0307>)
             '\u{130}' => {
                 set.add_string(Self::I_DOT);
                 return;
             }

             // Dotless i is in a class by itself
             '\u{131}' => {
                 return;
             }

             _ => {}
         }

         let data = self.lookup_data(c);
         if !data.has_exception() {
             if data.case_type().is_some() {
                 let delta = data.delta() as i32;
                 if delta != 0 {
                     // Add the one simple case mapping, no matter what type it is.
                     let codepoint = c as i32 + delta;
                     // GIGO: delta should be valid
                     let mapped = char::from_u32(codepoint as u32).unwrap_or(c);
                     set.add_char(mapped);
                 }
             }
             return;
         }

         // c has exceptions, so there may be multiple simple and/or full case mappings.
         let idx = data.exception_index();
         let exception = self.exceptions.get(idx);

         // Add all simple case mappings.
         for slot in [
             ExceptionSlot::Lower,
             ExceptionSlot::Fold,
             ExceptionSlot::Upper,
             ExceptionSlot::Title,
         ] {
             if let Some(simple) = exception.get_char_slot(slot) {
                 set.add_char(simple);
             }
         }
         if let Some(simple) = exception.get_simple_case_slot_for(c) {
             set.add_char(simple);
         }

         exception.add_full_and_closure_mappings(set);
     }

     /// Maps the string to single code points and adds the associated case closure
     /// mappings.
     ///
     /// (see docs on CaseMapper::add_string_case_closure_to)
     pub(crate) fn add_string_case_closure_to<S: ClosureSink>(
         &self,
         s: &str,
         set: &mut S,
         unfold_data: &CaseMapUnfold,
     ) -> bool {
         if s.chars().count() <= 1 {
             // The string is too short to find any match.
             return false;
         }
         match unfold_data.get(s) {
             Some(closure_string) => {
                 for c in closure_string.chars() {
                     set.add_char(c);
                     self.add_case_closure_to(c, set);
                 }
                 true
             }
             None => false,
         }
     }
 }

 // An internal representation of locale. Non-Root values of this
 // enumeration imply that hard-coded special cases exist for this
 // language.
 #[derive(Copy, Clone, Eq, PartialEq, Debug)]
 pub enum CaseMapLocale {
     Root,
     Turkish,
     Lithuanian,
     Greek,
     Dutch,
     Armenian,
 }

 impl CaseMapLocale {
     pub const fn from_langid(langid: &LanguageIdentifier) -> Self {
         use icu_locale_core::subtags::{language, Language};
         const TR: Language = language!("tr");
         const AZ: Language = language!("az");
         const LT: Language = language!("lt");
         const EL: Language = language!("el");
         const NL: Language = language!("nl");
         const HY: Language = language!("hy");
         match langid.language {
             TR | AZ => Self::Turkish,
             LT => Self::Lithuanian,
             EL => Self::Greek,
             NL => Self::Dutch,
             HY => Self::Armenian,
             _ => Self::Root,
         }
     }
 }

 pub enum FullMappingResult<'a> {
     Remove,
     CodePoint(char),
     String(&'a str),
 }

 impl FullMappingResult<'_> {
     #[allow(dead_code)]
     fn add_to_set<S: ClosureSink>(&self, set: &mut S) {
         match *self {
             FullMappingResult::CodePoint(c) => set.add_char(c),
             FullMappingResult::String(s) => set.add_string(s),
             FullMappingResult::Remove => {}
         }
     }
 }

 impl Writeable for FullMappingResult<'_> {
     fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
         match *self {
             FullMappingResult::CodePoint(c) => sink.write_char(c),
             FullMappingResult::String(s) => sink.write_str(s),
             FullMappingResult::Remove => Ok(()),
         }
     }
 }

 pub(crate) struct ContextIterator<'a> {
     before: &'a str,
     after: &'a str,
 }

 impl<'a> ContextIterator<'a> {
     // Returns a context iterator with the characters before
     // and after the character at a given index, given the preceding
     // string and the succeeding string including the character itself
     pub fn new(before: &'a str, char_and_after: &'a str) -> Self {
         let mut char_and_after = char_and_after.chars();
         char_and_after.next(); // skip the character itself
         let after = char_and_after.as_str();
         Self { before, after }
     }

     fn add_greek_diacritics(&self, mut diacritics: GreekDiacritics) -> GreekDiacritics {
         diacritics.consume_greek_diacritics(self.after);
         diacritics
     }

     fn preceded_by_greek_letter(&self) -> bool {
         greek_to_me::preceded_by_greek_letter(self.before)
     }

     fn preceding_greek_vowel_diacritics(
         &self,
     ) -> Option<GreekCombiningCharacterSequenceDiacritics> {
         greek_to_me::preceding_greek_vowel_diacritics(self.before)
     }

     fn preceded_by_soft_dotted(&self, mapping: &CaseMap) -> bool {
         for c in self.before.chars().rev() {
             match mapping.dot_type(c) {
                 DotType::SoftDotted => return true,
                 DotType::OtherAccent => continue,
                 _ => return false,
             }
         }
         false
     }
     /// Checks if the preceding character is a capital I, allowing for non-Above combining characters in between.
     ///
     /// If I_MUST_NOT_START_STRING is true, additionally will require that the capital I does not start the string
     fn preceded_by_capital_i<const I_MUST_NOT_START_STRING: bool>(
         &self,
         mapping: &CaseMap,
     ) -> bool {
         let mut iter = self.before.chars().rev();
         while let Some(c) = iter.next() {
             if c == 'I' {
                 if I_MUST_NOT_START_STRING {
                     return iter.next().is_some();
                 } else {
                     return true;
                 }
             }
             if mapping.dot_type(c) != DotType::OtherAccent {
                 break;
             }
         }
         false
     }
     fn preceded_by_cased_letter(&self, mapping: &CaseMap) -> bool {
         for c in self.before.chars().rev() {
             let data = mapping.lookup_data(c);
             if !data.is_ignorable() {
                 return data.case_type().is_some();
             }
         }
         false
     }
     fn followed_by_cased_letter(&self, mapping: &CaseMap) -> bool {
         for c in self.after.chars() {
             let data = mapping.lookup_data(c);
             if !data.is_ignorable() {
                 return data.case_type().is_some();
             }
         }
         false
     }
     fn followed_by_more_above(&self, mapping: &CaseMap) -> bool {
         for c in self.after.chars() {
             match mapping.dot_type(c) {
                 DotType::Above => return true,
                 DotType::OtherAccent => continue,
                 _ => return false,
             }
         }
         false
     }
     fn followed_by_dot_above(&self, mapping: &CaseMap) -> bool {
         for c in self.after.chars() {
             if c == '\u{307}' {
                 return true;
             }
             if mapping.dot_type(c) != DotType::OtherAccent {
                 return false;
             }
         }
         false
     }

     /// Checks the preceding and surrounding context of a j or J
     /// and returns true if it is preceded by an i or I at the start of the string.
     /// If one has an acute accent,
     /// both must have the accent for this to return true. No other accents are handled.
     fn is_dutch_ij_pair_at_beginning(&self, mapping: &CaseMap) -> bool {
         let mut before = self.before.chars().rev();
         let mut i_has_acute = false;
         loop {
             match before.next() {
                 Some('i') | Some('I') => break,
                 Some('í') | Some('Í') => {
                     i_has_acute = true;
                     break;
                 }
                 Some(ACUTE) => i_has_acute = true,
                 _ => return false,
             }
         }

         if before.next().is_some() {
             // not at the beginning of a string, doesn't matter
             return false;
         }
         let mut j_has_acute = false;
         for c in self.after.chars() {
             if c == ACUTE {
                 j_has_acute = true;
                 continue;
             }
             // We are supposed to check that `j` has no other combining marks aside
             // from potentially an acute accent. Once we hit the first non-combining mark
             // we are done.
             //
             // ICU4C checks for `gc=Mn` to determine if something is a combining mark,
             // however this requires extra data (and is the *only* point in the casemapping algorithm
             // where there is a direct dependency on properties data not mediated by the casemapping data trie).
             //
             // Instead, we can check for ccc via dot_type, the same way the rest of the algorithm does.
             //
             // See https://unicode-org.atlassian.net/browse/ICU-22429
             match mapping.dot_type(c) {
                 // Not a combining character; ccc = 0
                 DotType::NoDot | DotType::SoftDotted => break,
                 // found combining character, bail
                 _ => return false,
             }
         }

         // either both should have an acute accent, or none. this is an XNOR operation
         !(j_has_acute ^ i_has_acute)
     }
 }