vendor/unic-langid-impl-0.9.5/src/lib.rs - toolchain/rustc - Git at Google

 mod errors;
 mod layout_table;
 #[cfg(feature = "likelysubtags")]
 pub mod likelysubtags;
 #[doc(hidden)]
 pub mod parser;
 #[cfg(feature = "serde")]
 mod serde;
 pub mod subtags;

 pub use crate::errors::LanguageIdentifierError;
 use std::fmt::Write;
 use std::iter::Peekable;
 use std::str::FromStr;

 /// Enum representing available character direction orientations.
 #[derive(Clone, Copy, Debug, PartialEq)]
 pub enum CharacterDirection {
     /// Right To Left
     ///
     /// Used in languages such as Arabic, Hebrew, Fula, Kurdish etc.
     RTL,
     /// Left To Right
     ///
     /// Used in languages such as French, Spanish, English, German etc.
     LTR,
     /// Top To Bottom
     ///
     /// Used in Traditional Mongolian
     TTB,
 }

 type PartsTuple = (
     subtags::Language,
     Option<subtags::Script>,
     Option<subtags::Region>,
     Vec<subtags::Variant>,
 );

 /// `LanguageIdentifier` is a core struct representing a Unicode Language Identifier.
 ///
 /// # Examples
 ///
 /// ```
 /// use unic_langid_impl::LanguageIdentifier;
 ///
 /// let li: LanguageIdentifier = "en-US".parse()
 ///     .expect("Failed to parse.");
 ///
 /// assert_eq!(li.language, "en");
 /// assert_eq!(li.script, None);
 /// assert_eq!(li.region.as_ref().map(Into::into), Some("US"));
 /// assert_eq!(li.variants().len(), 0);
 /// ```
 ///
 /// # Parsing
 ///
 /// Unicode recognizes three levels of standard conformance for any language identifier:
 ///
 ///  * *well-formed* - syntactically correct
 ///  * *valid* - well-formed and only uses registered language subtags, extensions, keywords, types...
 ///  * *canonical* - valid and no deprecated codes or structure.
 ///
 /// At the moment parsing normalizes a well-formed language identifier converting
 /// `_` separators to `-` and adjusting casing to conform to the Unicode standard.
 ///
 /// Any bogus subtags will cause the parsing to fail with an error.
 /// No subtag validation is performed.
 ///
 /// # Examples:
 ///
 /// ```
 /// use unic_langid_impl::LanguageIdentifier;
 ///
 /// let li: LanguageIdentifier = "eN_latn_Us-Valencia".parse()
 ///     .expect("Failed to parse.");
 ///
 /// assert_eq!(li.language, "en");
 /// assert_eq!(li.script.as_ref().map(Into::into), Some("Latn"));
 /// assert_eq!(li.region.as_ref().map(Into::into), Some("US"));
 /// assert_eq!(li.variants().map(|v| v.as_str()).collect::<Vec<_>>(), &["valencia"]);
 /// ```
 #[derive(Default, Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
 pub struct LanguageIdentifier {
     pub language: subtags::Language,
     pub script: Option<subtags::Script>,
     pub region: Option<subtags::Region>,
     variants: Option<Box<[subtags::Variant]>>,
 }

 impl LanguageIdentifier {
     /// A constructor which takes a utf8 slice, parses it and
     /// produces a well-formed `LanguageIdentifier`.
     ///
     /// # Examples
     ///
     /// ```
     /// use unic_langid_impl::LanguageIdentifier;
     ///
     /// let li = LanguageIdentifier::from_bytes("en-US".as_bytes())
     ///     .expect("Parsing failed.");
     ///
     /// assert_eq!(li.to_string(), "en-US");
     /// ```
     pub fn from_bytes(v: &[u8]) -> Result<Self, LanguageIdentifierError> {
         Ok(parser::parse_language_identifier(v)?)
     }

     /// A constructor which takes optional subtags as `AsRef<[u8]>`, parses them and
     /// produces a well-formed `LanguageIdentifier`.
     ///
     /// # Examples
     ///
     /// ```
     /// use unic_langid_impl::LanguageIdentifier;
     ///
     /// let li = LanguageIdentifier::from_parts(
     ///     "fr".parse().expect("Parsing failed."),
     ///     None,
     ///     Some("CA".parse().expect("Parsing failed.")),
     ///     &[]
     /// );
     ///
     /// assert_eq!(li.to_string(), "fr-CA");
     /// ```
     pub fn from_parts(
         language: subtags::Language,
         script: Option<subtags::Script>,
         region: Option<subtags::Region>,
         variants: &[subtags::Variant],
     ) -> Self {
         let variants = if !variants.is_empty() {
             let mut v = variants.to_vec();
             v.sort_unstable();
             v.dedup();
             Some(v.into_boxed_slice())
         } else {
             None
         };

         Self {
             language,
             script,
             region,
             variants,
         }
     }

     /// # Unchecked
     ///
     /// This function accepts subtags expecting variants
     /// to be deduplicated and ordered.
     pub const fn from_raw_parts_unchecked(
         language: subtags::Language,
         script: Option<subtags::Script>,
         region: Option<subtags::Region>,
         variants: Option<Box<[subtags::Variant]>>,
     ) -> Self {
         Self {
             language,
             script,
             region,
             variants,
         }
     }

     #[doc(hidden)]
     /// This method is used by `unic-locale` to handle partial
     /// subtag iterator.
     ///
     /// Not stable.
     pub fn try_from_iter<'a>(
         iter: &mut Peekable<impl Iterator<Item = &'a [u8]>>,
         allow_extension: bool,
     ) -> Result<LanguageIdentifier, LanguageIdentifierError> {
         Ok(parser::parse_language_identifier_from_iter(
             iter,
             allow_extension,
         )?)
     }

     /// Consumes `LanguageIdentifier` and produces raw internal representations
     /// of all subtags in form of `u64`/`u32`.
     ///
     /// Primarily used for storing internal representation and restoring via
     /// `from_raw_parts_unchecked`.
     ///
     /// # Examples
     ///
     /// ```
     /// use unic_langid_impl::LanguageIdentifier;
     /// use tinystr::{TinyStr8, TinyStr4};
     ///
     /// let li: LanguageIdentifier = "en-US".parse()
     ///     .expect("Parsing failed.");
     ///
     /// let (lang, script, region, variants) = li.into_parts();
     ///
     /// // let li2 = LanguageIdentifier::from_raw_parts_unchecked(
     /// //     lang.map(|l| unsafe { TinyStr8::new_unchecked(l) }),
     /// //    script.map(|s| unsafe { TinyStr4::new_unchecked(s) }),
     /// //    region.map(|r| unsafe { TinyStr4::new_unchecked(r) }),
     /// //    variants.map(|v| v.into_iter().map(|v| unsafe { TinyStr8::new_unchecked(*v) }).collect()),
     /// //);
     ///
     /// //assert_eq!(li2.to_string(), "en-US");
     /// ```
     pub fn into_parts(self) -> PartsTuple {
         (
             self.language,
             self.script,
             self.region,
             self.variants.map_or_else(Vec::new, |v| v.to_vec()),
         )
     }

     /// Compares a `LanguageIdentifier` to another `AsRef<LanguageIdentifier`
     /// allowing for either side to use the missing fields as wildcards.
     ///
     /// This allows for matching between `en` (treated as `en-*-*-*`) and `en-US`.
     ///
     /// # Examples
     ///
     /// ```
     /// use unic_langid_impl::LanguageIdentifier;
     ///
     /// let li1: LanguageIdentifier = "en".parse()
     ///     .expect("Parsing failed.");
     ///
     /// let li2: LanguageIdentifier = "en-US".parse()
     ///     .expect("Parsing failed.");
     ///
     /// assert_ne!(li1, li2); // "en" != "en-US"
     /// assert_ne!(li1.to_string(), li2.to_string()); // "en" != "en-US"
     ///
     /// assert_eq!(li1.matches(&li2, false, false), false); // "en" != "en-US"
     /// assert_eq!(li1.matches(&li2, true, false), true); // "en-*-*-*" == "en-US"
     /// assert_eq!(li1.matches(&li2, false, true), false); // "en" != "en-*-US-*"
     /// assert_eq!(li1.matches(&li2, true, true), true); // "en-*-*-*" == "en-*-US-*"
     /// ```
     pub fn matches<O: AsRef<Self>>(
         &self,
         other: &O,
         self_as_range: bool,
         other_as_range: bool,
     ) -> bool {
         let other = other.as_ref();
         self.language
             .matches(other.language, self_as_range, other_as_range)
             && subtag_matches(&self.script, &other.script, self_as_range, other_as_range)
             && subtag_matches(&self.region, &other.region, self_as_range, other_as_range)
             && subtags_match(
                 &self.variants,
                 &other.variants,
                 self_as_range,
                 other_as_range,
             )
     }

     /// Returns a vector of variants subtags of the `LanguageIdentifier`.
     ///
     /// # Examples
     ///
     /// ```
     /// use unic_langid_impl::LanguageIdentifier;
     ///
     /// let li1: LanguageIdentifier = "ca-ES-valencia".parse()
     ///     .expect("Parsing failed.");
     ///
     /// assert_eq!(li1.variants().map(|v| v.as_str()).collect::<Vec<_>>(), &["valencia"]);
     ///
     /// let li2: LanguageIdentifier = "de".parse()
     ///     .expect("Parsing failed.");
     ///
     /// assert_eq!(li2.variants().len(), 0);
     /// ```
     pub fn variants(&self) -> impl ExactSizeIterator<Item = &subtags::Variant> {
         let variants: &[_] = match self.variants {
             Some(ref v) => v,
             None => &[],
         };

         variants.iter()
     }

     /// Sets variant subtags of the `LanguageIdentifier`.
     ///
     /// # Examples
     ///
     /// ```
     /// use unic_langid_impl::LanguageIdentifier;
     ///
     /// let mut li: LanguageIdentifier = "ca-ES".parse()
     ///     .expect("Parsing failed.");
     ///
     /// li.set_variants(&["valencia".parse().expect("Parsing failed.")]);
     ///
     /// assert_eq!(li.to_string(), "ca-ES-valencia");
     /// ```
     pub fn set_variants(&mut self, variants: &[subtags::Variant]) {
         let mut v = variants.to_vec();

         if v.is_empty() {
             self.variants = None;
         } else {
             v.sort_unstable();
             v.dedup();
             self.variants = Some(v.into_boxed_slice());
         }
     }

     /// Tests if a variant subtag is present in the `LanguageIdentifier`.
     ///
     /// # Examples
     ///
     /// ```
     /// use unic_langid_impl::LanguageIdentifier;
     ///
     /// let mut li: LanguageIdentifier = "ca-ES-macos".parse()
     ///     .expect("Parsing failed.");
     ///
     /// assert_eq!(li.has_variant("valencia".parse().unwrap()), false);
     /// assert_eq!(li.has_variant("macos".parse().unwrap()), true);
     /// ```
     pub fn has_variant(&self, variant: subtags::Variant) -> bool {
         if let Some(variants) = &self.variants {
             variants.contains(&variant)
         } else {
             false
         }
     }

     /// Clears variant subtags of the `LanguageIdentifier`.
     ///
     /// # Examples
     ///
     /// ```
     /// use unic_langid_impl::LanguageIdentifier;
     ///
     /// let mut li: LanguageIdentifier = "ca-ES-valencia".parse()
     ///     .expect("Parsing failed.");
     ///
     /// li.clear_variants();
     ///
     /// assert_eq!(li.to_string(), "ca-ES");
     /// ```
     pub fn clear_variants(&mut self) {
         self.variants = None;
     }

     /// Extends the `LanguageIdentifier` adding likely subtags based
     /// on tables provided by CLDR.
     ///
     /// # Examples
     ///
     /// ```
     /// use unic_langid_impl::LanguageIdentifier;
     ///
     /// let mut li: LanguageIdentifier = "en-US".parse()
     ///     .expect("Parsing failed.");
     ///
     /// assert_eq!(li.maximize(), true);
     /// assert_eq!(li.to_string(), "en-Latn-US");
     /// ```
     #[cfg(feature = "likelysubtags")]
     pub fn maximize(&mut self) -> bool {
         if let Some(new_li) = likelysubtags::maximize(self.language, self.script, self.region) {
             self.language = new_li.0;
             self.script = new_li.1;
             self.region = new_li.2;
             true
         } else {
             false
         }
     }

     /// Extends the `LanguageIdentifier` removing likely subtags based
     /// on tables provided by CLDR.
     ///
     /// # Examples
     ///
     /// ```
     /// use unic_langid_impl::LanguageIdentifier;
     ///
     /// let mut li: LanguageIdentifier = "en-Latn-US".parse()
     ///     .expect("Parsing failed.");
     ///
     /// assert_eq!(li.minimize(), true);
     /// assert_eq!(li.to_string(), "en");
     /// ```
     #[cfg(feature = "likelysubtags")]
     pub fn minimize(&mut self) -> bool {
         if let Some(new_li) = likelysubtags::minimize(self.language, self.script, self.region) {
             self.language = new_li.0;
             self.script = new_li.1;
             self.region = new_li.2;
             true
         } else {
             false
         }
     }

     /// Returns character direction of the `LanguageIdentifier`.
     ///
     /// # Examples
     ///
     /// ```
     /// use unic_langid_impl::{LanguageIdentifier, CharacterDirection};
     ///
     /// let li1: LanguageIdentifier = "es-AR".parse()
     ///     .expect("Parsing failed.");
     /// let li2: LanguageIdentifier = "fa".parse()
     ///     .expect("Parsing failed.");
     ///
     /// assert_eq!(li1.character_direction(), CharacterDirection::LTR);
     /// assert_eq!(li2.character_direction(), CharacterDirection::RTL);
     /// ```
     pub fn character_direction(&self) -> CharacterDirection {
         match (self.language.into(), self.script) {
             (_, Some(script))
                 if layout_table::SCRIPTS_CHARACTER_DIRECTION_LTR.contains(&script.into()) =>
             {
                 CharacterDirection::LTR
             }
             (_, Some(script))
                 if layout_table::SCRIPTS_CHARACTER_DIRECTION_RTL.contains(&script.into()) =>
             {
                 CharacterDirection::RTL
             }
             (_, Some(script))
                 if layout_table::SCRIPTS_CHARACTER_DIRECTION_TTB.contains(&script.into()) =>
             {
                 CharacterDirection::TTB
             }
             (Some(lang), _) if layout_table::LANGS_CHARACTER_DIRECTION_RTL.contains(&lang) => {
                 #[cfg(feature = "likelysubtags")]
                 if let Some((_, Some(script), _)) =
                     likelysubtags::maximize(self.language, None, self.region)
                 {
                     if layout_table::SCRIPTS_CHARACTER_DIRECTION_LTR.contains(&script.into()) {
                         return CharacterDirection::LTR;
                     }
                 }
                 CharacterDirection::RTL
             }
             _ => CharacterDirection::LTR,
         }
     }
 }

 impl FromStr for LanguageIdentifier {
     type Err = LanguageIdentifierError;

     fn from_str(source: &str) -> Result<Self, Self::Err> {
         Self::from_bytes(source.as_bytes())
     }
 }

 impl AsRef<LanguageIdentifier> for LanguageIdentifier {
     #[inline(always)]
     fn as_ref(&self) -> &LanguageIdentifier {
         self
     }
 }

 impl std::fmt::Display for LanguageIdentifier {
     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
         self.language.fmt(f)?;
         if let Some(ref script) = self.script {
             f.write_char('-')?;
             script.fmt(f)?;
         }
         if let Some(ref region) = self.region {
             f.write_char('-')?;
             region.fmt(f)?;
         }
         if let Some(variants) = &self.variants {
             for variant in variants.iter() {
                 f.write_char('-')?;
                 variant.fmt(f)?;
             }
         }
         Ok(())
     }
 }

 impl PartialEq<&str> for LanguageIdentifier {
     fn eq(&self, other: &&str) -> bool {
         self.to_string().as_str() == *other
     }
 }

 fn subtag_matches<P: PartialEq>(
     subtag1: &Option<P>,
     subtag2: &Option<P>,
     as_range1: bool,
     as_range2: bool,
 ) -> bool {
     (as_range1 && subtag1.is_none()) || (as_range2 && subtag2.is_none()) || subtag1 == subtag2
 }

 fn is_option_empty<P: PartialEq>(subtag: &Option<Box<[P]>>) -> bool {
     subtag.as_ref().map_or(true, |t| t.is_empty())
 }

 fn subtags_match<P: PartialEq>(
     subtag1: &Option<Box<[P]>>,
     subtag2: &Option<Box<[P]>>,
     as_range1: bool,
     as_range2: bool,
 ) -> bool {
     // or is some and is empty!
     (as_range1 && is_option_empty(subtag1))
         || (as_range2 && is_option_empty(subtag2))
         || subtag1 == subtag2
 }

 /// This is a best-effort operation that performs all available levels of canonicalization.
 ///
 /// At the moment the operation will normalize casing and the separator, but in the future
 /// it may also validate and update from deprecated subtags to canonical ones.
 ///
 /// # Examples
 ///
 /// ```
 /// use unic_langid_impl::canonicalize;
 ///
 /// assert_eq!(canonicalize("pL_latn_pl"), Ok("pl-Latn-PL".to_string()));
 /// ```
 pub fn canonicalize<S: AsRef<[u8]>>(input: S) -> Result<String, LanguageIdentifierError> {
     let lang_id = LanguageIdentifier::from_bytes(input.as_ref())?;
     Ok(lang_id.to_string())
 }

 #[test]
 fn invalid_subtag() {
     assert!(LanguageIdentifier::from_bytes("en-ÁÁÁÁ".as_bytes()).is_err());
 }
	mod errors;
	mod layout_table;
	#[cfg(feature = "likelysubtags")]
	pub mod likelysubtags;
	#[doc(hidden)]
	pub mod parser;
	#[cfg(feature = "serde")]
	mod serde;
	pub mod subtags;

	pub use crate::errors::LanguageIdentifierError;
	use std::fmt::Write;
	use std::iter::Peekable;
	use std::str::FromStr;

	/// Enum representing available character direction orientations.
	#[derive(Clone, Copy, Debug, PartialEq)]
	pub enum CharacterDirection {
	/// Right To Left
	///
	/// Used in languages such as Arabic, Hebrew, Fula, Kurdish etc.
	RTL,
	/// Left To Right
	///
	/// Used in languages such as French, Spanish, English, German etc.
	LTR,
	/// Top To Bottom
	///
	/// Used in Traditional Mongolian
	TTB,
	}

	type PartsTuple = (
	subtags::Language,
	Option<subtags::Script>,
	Option<subtags::Region>,
	Vec<subtags::Variant>,
	);

	/// `LanguageIdentifier` is a core struct representing a Unicode Language Identifier.
	///
	/// # Examples
	///
	/// ```
	/// use unic_langid_impl::LanguageIdentifier;
	///
	/// let li: LanguageIdentifier = "en-US".parse()
	/// .expect("Failed to parse.");
	///
	/// assert_eq!(li.language, "en");
	/// assert_eq!(li.script, None);
	/// assert_eq!(li.region.as_ref().map(Into::into), Some("US"));
	/// assert_eq!(li.variants().len(), 0);
	/// ```
	///
	/// # Parsing
	///
	/// Unicode recognizes three levels of standard conformance for any language identifier:
	///
	/// * well-formed - syntactically correct
	/// * valid - well-formed and only uses registered language subtags, extensions, keywords, types...
	/// * canonical - valid and no deprecated codes or structure.
	///
	/// At the moment parsing normalizes a well-formed language identifier converting
	/// `_` separators to `-` and adjusting casing to conform to the Unicode standard.
	///
	/// Any bogus subtags will cause the parsing to fail with an error.
	/// No subtag validation is performed.
	///
	/// # Examples:
	///
	/// ```
	/// use unic_langid_impl::LanguageIdentifier;
	///
	/// let li: LanguageIdentifier = "eN_latn_Us-Valencia".parse()
	/// .expect("Failed to parse.");
	///
	/// assert_eq!(li.language, "en");
	/// assert_eq!(li.script.as_ref().map(Into::into), Some("Latn"));
	/// assert_eq!(li.region.as_ref().map(Into::into), Some("US"));
	/// assert_eq!(li.variants().map(\|v\| v.as_str()).collect::<Vec<_>>(), &["valencia"]);
	/// ```
	#[derive(Default, Debug, PartialEq, Eq, Clone, Hash, PartialOrd, Ord)]
	pub struct LanguageIdentifier {
	pub language: subtags::Language,
	pub script: Option<subtags::Script>,
	pub region: Option<subtags::Region>,
	variants: Option<Box<[subtags::Variant]>>,
	}

	impl LanguageIdentifier {
	/// A constructor which takes a utf8 slice, parses it and
	/// produces a well-formed `LanguageIdentifier`.
	///
	/// # Examples
	///
	/// ```
	/// use unic_langid_impl::LanguageIdentifier;
	///
	/// let li = LanguageIdentifier::from_bytes("en-US".as_bytes())
	/// .expect("Parsing failed.");
	///
	/// assert_eq!(li.to_string(), "en-US");
	/// ```
	pub fn from_bytes(v: &[u8]) -> Result<Self, LanguageIdentifierError> {
	Ok(parser::parse_language_identifier(v)?)
	}

	/// A constructor which takes optional subtags as `AsRef<[u8]>`, parses them and
	/// produces a well-formed `LanguageIdentifier`.
	///
	/// # Examples
	///
	/// ```
	/// use unic_langid_impl::LanguageIdentifier;
	///
	/// let li = LanguageIdentifier::from_parts(
	/// "fr".parse().expect("Parsing failed."),
	/// None,
	/// Some("CA".parse().expect("Parsing failed.")),
	/// &[]
	/// );
	///
	/// assert_eq!(li.to_string(), "fr-CA");
	/// ```
	pub fn from_parts(
	language: subtags::Language,
	script: Option<subtags::Script>,
	region: Option<subtags::Region>,
	variants: &[subtags::Variant],
	) -> Self {
	let variants = if !variants.is_empty() {
	let mut v = variants.to_vec();
	v.sort_unstable();
	v.dedup();
	Some(v.into_boxed_slice())
	} else {
	None
	};

	Self {
	language,
	script,
	region,
	variants,
	}
	}

	/// # Unchecked
	///
	/// This function accepts subtags expecting variants
	/// to be deduplicated and ordered.
	pub const fn from_raw_parts_unchecked(
	language: subtags::Language,
	script: Option<subtags::Script>,
	region: Option<subtags::Region>,
	variants: Option<Box<[subtags::Variant]>>,
	) -> Self {
	Self {
	language,
	script,
	region,
	variants,
	}
	}

	#[doc(hidden)]
	/// This method is used by `unic-locale` to handle partial
	/// subtag iterator.
	///
	/// Not stable.
	pub fn try_from_iter<'a>(
	iter: &mut Peekable<impl Iterator<Item = &'a [u8]>>,
	allow_extension: bool,
	) -> Result<LanguageIdentifier, LanguageIdentifierError> {
	Ok(parser::parse_language_identifier_from_iter(
	iter,
	allow_extension,
	)?)
	}

	/// Consumes `LanguageIdentifier` and produces raw internal representations
	/// of all subtags in form of `u64`/`u32`.
	///
	/// Primarily used for storing internal representation and restoring via
	/// `from_raw_parts_unchecked`.
	///
	/// # Examples
	///
	/// ```
	/// use unic_langid_impl::LanguageIdentifier;
	/// use tinystr::{TinyStr8, TinyStr4};
	///
	/// let li: LanguageIdentifier = "en-US".parse()
	/// .expect("Parsing failed.");
	///
	/// let (lang, script, region, variants) = li.into_parts();
	///
	/// // let li2 = LanguageIdentifier::from_raw_parts_unchecked(
	/// // lang.map(\|l\| unsafe { TinyStr8::new_unchecked(l) }),
	/// // script.map(\|s\| unsafe { TinyStr4::new_unchecked(s) }),
	/// // region.map(\|r\| unsafe { TinyStr4::new_unchecked(r) }),
	/// // variants.map(\|v\| v.into_iter().map(\|v\| unsafe { TinyStr8::new_unchecked(*v) }).collect()),
	/// //);
	///
	/// //assert_eq!(li2.to_string(), "en-US");
	/// ```
	pub fn into_parts(self) -> PartsTuple {
	(
	self.language,
	self.script,
	self.region,
	self.variants.map_or_else(Vec::new, \|v\| v.to_vec()),
	)
	}

	/// Compares a `LanguageIdentifier` to another `AsRef<LanguageIdentifier`
	/// allowing for either side to use the missing fields as wildcards.
	///
	/// This allows for matching between `en` (treated as `en---*`) and `en-US`.
	///
	/// # Examples
	///
	/// ```
	/// use unic_langid_impl::LanguageIdentifier;
	///
	/// let li1: LanguageIdentifier = "en".parse()
	/// .expect("Parsing failed.");
	///
	/// let li2: LanguageIdentifier = "en-US".parse()
	/// .expect("Parsing failed.");
	///
	/// assert_ne!(li1, li2); // "en" != "en-US"
	/// assert_ne!(li1.to_string(), li2.to_string()); // "en" != "en-US"
	///
	/// assert_eq!(li1.matches(&li2, false, false), false); // "en" != "en-US"
	/// assert_eq!(li1.matches(&li2, true, false), true); // "en---*" == "en-US"
	/// assert_eq!(li1.matches(&li2, false, true), false); // "en" != "en--US-"
	/// assert_eq!(li1.matches(&li2, true, true), true); // "en---" == "en--US-*"
	/// ```
	pub fn matches<O: AsRef<Self>>(
	&self,
	other: &O,
	self_as_range: bool,
	other_as_range: bool,
	) -> bool {
	let other = other.as_ref();
	self.language
	.matches(other.language, self_as_range, other_as_range)
	&& subtag_matches(&self.script, &other.script, self_as_range, other_as_range)
	&& subtag_matches(&self.region, &other.region, self_as_range, other_as_range)
	&& subtags_match(
	&self.variants,
	&other.variants,
	self_as_range,
	other_as_range,
	)
	}

	/// Returns a vector of variants subtags of the `LanguageIdentifier`.
	///
	/// # Examples
	///
	/// ```
	/// use unic_langid_impl::LanguageIdentifier;
	///
	/// let li1: LanguageIdentifier = "ca-ES-valencia".parse()
	/// .expect("Parsing failed.");
	///
	/// assert_eq!(li1.variants().map(\|v\| v.as_str()).collect::<Vec<_>>(), &["valencia"]);
	///
	/// let li2: LanguageIdentifier = "de".parse()
	/// .expect("Parsing failed.");
	///
	/// assert_eq!(li2.variants().len(), 0);
	/// ```
	pub fn variants(&self) -> impl ExactSizeIterator<Item = &subtags::Variant> {
	let variants: &[_] = match self.variants {
	Some(ref v) => v,
	None => &[],
	};

	variants.iter()
	}

	/// Sets variant subtags of the `LanguageIdentifier`.
	///
	/// # Examples
	///
	/// ```
	/// use unic_langid_impl::LanguageIdentifier;
	///
	/// let mut li: LanguageIdentifier = "ca-ES".parse()
	/// .expect("Parsing failed.");
	///
	/// li.set_variants(&["valencia".parse().expect("Parsing failed.")]);
	///
	/// assert_eq!(li.to_string(), "ca-ES-valencia");
	/// ```
	pub fn set_variants(&mut self, variants: &[subtags::Variant]) {
	let mut v = variants.to_vec();

	if v.is_empty() {
	self.variants = None;
	} else {
	v.sort_unstable();
	v.dedup();
	self.variants = Some(v.into_boxed_slice());
	}
	}

	/// Tests if a variant subtag is present in the `LanguageIdentifier`.
	///
	/// # Examples
	///
	/// ```
	/// use unic_langid_impl::LanguageIdentifier;
	///
	/// let mut li: LanguageIdentifier = "ca-ES-macos".parse()
	/// .expect("Parsing failed.");
	///
	/// assert_eq!(li.has_variant("valencia".parse().unwrap()), false);
	/// assert_eq!(li.has_variant("macos".parse().unwrap()), true);
	/// ```
	pub fn has_variant(&self, variant: subtags::Variant) -> bool {
	if let Some(variants) = &self.variants {
	variants.contains(&variant)
	} else {
	false
	}
	}

	/// Clears variant subtags of the `LanguageIdentifier`.
	///
	/// # Examples
	///
	/// ```
	/// use unic_langid_impl::LanguageIdentifier;
	///
	/// let mut li: LanguageIdentifier = "ca-ES-valencia".parse()
	/// .expect("Parsing failed.");
	///
	/// li.clear_variants();
	///
	/// assert_eq!(li.to_string(), "ca-ES");
	/// ```
	pub fn clear_variants(&mut self) {
	self.variants = None;
	}

	/// Extends the `LanguageIdentifier` adding likely subtags based
	/// on tables provided by CLDR.
	///
	/// # Examples
	///
	/// ```
	/// use unic_langid_impl::LanguageIdentifier;
	///
	/// let mut li: LanguageIdentifier = "en-US".parse()
	/// .expect("Parsing failed.");
	///
	/// assert_eq!(li.maximize(), true);
	/// assert_eq!(li.to_string(), "en-Latn-US");
	/// ```
	#[cfg(feature = "likelysubtags")]
	pub fn maximize(&mut self) -> bool {
	if let Some(new_li) = likelysubtags::maximize(self.language, self.script, self.region) {
	self.language = new_li.0;
	self.script = new_li.1;
	self.region = new_li.2;
	true
	} else {
	false
	}
	}

	/// Extends the `LanguageIdentifier` removing likely subtags based
	/// on tables provided by CLDR.
	///
	/// # Examples
	///
	/// ```
	/// use unic_langid_impl::LanguageIdentifier;
	///
	/// let mut li: LanguageIdentifier = "en-Latn-US".parse()
	/// .expect("Parsing failed.");
	///
	/// assert_eq!(li.minimize(), true);
	/// assert_eq!(li.to_string(), "en");
	/// ```
	#[cfg(feature = "likelysubtags")]
	pub fn minimize(&mut self) -> bool {
	if let Some(new_li) = likelysubtags::minimize(self.language, self.script, self.region) {
	self.language = new_li.0;
	self.script = new_li.1;
	self.region = new_li.2;
	true
	} else {
	false
	}
	}

	/// Returns character direction of the `LanguageIdentifier`.
	///
	/// # Examples
	///
	/// ```
	/// use unic_langid_impl::{LanguageIdentifier, CharacterDirection};
	///
	/// let li1: LanguageIdentifier = "es-AR".parse()
	/// .expect("Parsing failed.");
	/// let li2: LanguageIdentifier = "fa".parse()
	/// .expect("Parsing failed.");
	///
	/// assert_eq!(li1.character_direction(), CharacterDirection::LTR);
	/// assert_eq!(li2.character_direction(), CharacterDirection::RTL);
	/// ```
	pub fn character_direction(&self) -> CharacterDirection {
	match (self.language.into(), self.script) {
	(_, Some(script))
	if layout_table::SCRIPTS_CHARACTER_DIRECTION_LTR.contains(&script.into()) =>
	{
	CharacterDirection::LTR
	}
	(_, Some(script))
	if layout_table::SCRIPTS_CHARACTER_DIRECTION_RTL.contains(&script.into()) =>
	{
	CharacterDirection::RTL
	}
	(_, Some(script))
	if layout_table::SCRIPTS_CHARACTER_DIRECTION_TTB.contains(&script.into()) =>
	{
	CharacterDirection::TTB
	}
	(Some(lang), _) if layout_table::LANGS_CHARACTER_DIRECTION_RTL.contains(&lang) => {
	#[cfg(feature = "likelysubtags")]
	if let Some((_, Some(script), _)) =
	likelysubtags::maximize(self.language, None, self.region)
	{
	if layout_table::SCRIPTS_CHARACTER_DIRECTION_LTR.contains(&script.into()) {
	return CharacterDirection::LTR;
	}
	}
	CharacterDirection::RTL
	}
	_ => CharacterDirection::LTR,
	}
	}
	}

	impl FromStr for LanguageIdentifier {
	type Err = LanguageIdentifierError;

	fn from_str(source: &str) -> Result<Self, Self::Err> {
	Self::from_bytes(source.as_bytes())
	}
	}

	impl AsRef<LanguageIdentifier> for LanguageIdentifier {
	#[inline(always)]
	fn as_ref(&self) -> &LanguageIdentifier {
	self
	}
	}

	impl std::fmt::Display for LanguageIdentifier {
	fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
	self.language.fmt(f)?;
	if let Some(ref script) = self.script {
	f.write_char('-')?;
	script.fmt(f)?;
	}
	if let Some(ref region) = self.region {
	f.write_char('-')?;
	region.fmt(f)?;
	}
	if let Some(variants) = &self.variants {
	for variant in variants.iter() {
	f.write_char('-')?;
	variant.fmt(f)?;
	}
	}
	Ok(())
	}
	}

	impl PartialEq<&str> for LanguageIdentifier {
	fn eq(&self, other: &&str) -> bool {
	self.to_string().as_str() == *other
	}
	}

	fn subtag_matches<P: PartialEq>(
	subtag1: &Option<P>,
	subtag2: &Option<P>,
	as_range1: bool,
	as_range2: bool,
	) -> bool {
	(as_range1 && subtag1.is_none()) \|\| (as_range2 && subtag2.is_none()) \|\| subtag1 == subtag2
	}

	fn is_option_empty<P: PartialEq>(subtag: &Option<Box<[P]>>) -> bool {
	subtag.as_ref().map_or(true, \|t\| t.is_empty())
	}

	fn subtags_match<P: PartialEq>(
	subtag1: &Option<Box<[P]>>,
	subtag2: &Option<Box<[P]>>,
	as_range1: bool,
	as_range2: bool,
	) -> bool {
	// or is some and is empty!
	(as_range1 && is_option_empty(subtag1))
	\|\| (as_range2 && is_option_empty(subtag2))
	\|\| subtag1 == subtag2
	}

	/// This is a best-effort operation that performs all available levels of canonicalization.
	///
	/// At the moment the operation will normalize casing and the separator, but in the future
	/// it may also validate and update from deprecated subtags to canonical ones.
	///
	/// # Examples
	///
	/// ```
	/// use unic_langid_impl::canonicalize;
	///
	/// assert_eq!(canonicalize("pL_latn_pl"), Ok("pl-Latn-PL".to_string()));
	/// ```
	pub fn canonicalize<S: AsRef<[u8]>>(input: S) -> Result<String, LanguageIdentifierError> {
	let lang_id = LanguageIdentifier::from_bytes(input.as_ref())?;
	Ok(lang_id.to_string())
	}

	#[test]
	fn invalid_subtag() {
	assert!(LanguageIdentifier::from_bytes("en-ÁÁÁÁ".as_bytes()).is_err());
	}