vendor/idna-0.5.0/src/uts46.rs - toolchain/rustc - Git at Google

 // Copyright 2013-2014 The rust-url developers.
 //
 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

 //! [*Unicode IDNA Compatibility Processing*
 //! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)

 use self::Mapping::*;
 use crate::punycode;

 use alloc::string::String;
 use core::fmt;
 use unicode_bidi::{bidi_class, BidiClass};
 use unicode_normalization::char::is_combining_mark;
 use unicode_normalization::{is_nfc, UnicodeNormalization};

 include!("uts46_mapping_table.rs");

 const PUNYCODE_PREFIX: &str = "xn--";

 #[derive(Debug)]
 struct StringTableSlice {
     // Store these as separate fields so the structure will have an
     // alignment of 1 and thus pack better into the Mapping enum, below.
     byte_start_lo: u8,
     byte_start_hi: u8,
     byte_len: u8,
 }

 fn decode_slice(slice: &StringTableSlice) -> &'static str {
     let lo = slice.byte_start_lo as usize;
     let hi = slice.byte_start_hi as usize;
     let start = (hi << 8) | lo;
     let len = slice.byte_len as usize;
     &STRING_TABLE[start..(start + len)]
 }

 #[repr(u8)]
 #[derive(Debug)]
 enum Mapping {
     Valid,
     Ignored,
     Mapped(StringTableSlice),
     Deviation(StringTableSlice),
     Disallowed,
     DisallowedStd3Valid,
     DisallowedStd3Mapped(StringTableSlice),
     DisallowedIdna2008,
 }

 fn find_char(codepoint: char) -> &'static Mapping {
     let idx = match TABLE.binary_search_by_key(&codepoint, |&val| val.0) {
         Ok(idx) => idx,
         Err(idx) => idx - 1,
     };

     const SINGLE_MARKER: u16 = 1 << 15;

     let (base, x) = TABLE[idx];
     let single = (x & SINGLE_MARKER) != 0;
     let offset = !SINGLE_MARKER & x;

     if single {
         &MAPPING_TABLE[offset as usize]
     } else {
         &MAPPING_TABLE[(offset + (codepoint as u16 - base as u16)) as usize]
     }
 }

 struct Mapper<'a> {
     chars: core::str::Chars<'a>,
     config: Config,
     errors: &'a mut Errors,
     slice: Option<core::str::Chars<'static>>,
 }

 impl<'a> Iterator for Mapper<'a> {
     type Item = char;

     fn next(&mut self) -> Option<Self::Item> {
         loop {
             if let Some(s) = &mut self.slice {
                 match s.next() {
                     Some(c) => return Some(c),
                     None => {
                         self.slice = None;
                     }
                 }
             }

             let codepoint = self.chars.next()?;
             if let '.' | '-' | 'a'..='z' | '0'..='9' = codepoint {
                 return Some(codepoint);
             }

             return Some(match *find_char(codepoint) {
                 Mapping::Valid => codepoint,
                 Mapping::Ignored => continue,
                 Mapping::Mapped(ref slice) => {
                     self.slice = Some(decode_slice(slice).chars());
                     continue;
                 }
                 Mapping::Deviation(ref slice) => {
                     if self.config.transitional_processing {
                         self.slice = Some(decode_slice(slice).chars());
                         continue;
                     } else {
                         codepoint
                     }
                 }
                 Mapping::Disallowed => {
                     self.errors.disallowed_character = true;
                     codepoint
                 }
                 Mapping::DisallowedStd3Valid => {
                     if self.config.use_std3_ascii_rules {
                         self.errors.disallowed_by_std3_ascii_rules = true;
                     };
                     codepoint
                 }
                 Mapping::DisallowedStd3Mapped(ref slice) => {
                     if self.config.use_std3_ascii_rules {
                         self.errors.disallowed_mapped_in_std3 = true;
                     };
                     self.slice = Some(decode_slice(slice).chars());
                     continue;
                 }
                 Mapping::DisallowedIdna2008 => {
                     if self.config.use_idna_2008_rules {
                         self.errors.disallowed_in_idna_2008 = true;
                     }
                     codepoint
                 }
             });
         }
     }
 }

 // http://tools.ietf.org/html/rfc5893#section-2
 fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool {
     // Rule 0: Bidi Rules apply to Bidi Domain Names: a name with at least one RTL label.  A label
     // is RTL if it contains at least one character of bidi class R, AL or AN.
     if !is_bidi_domain {
         return true;
     }

     let mut chars = label.chars();
     let first_char_class = match chars.next() {
         Some(c) => bidi_class(c),
         None => return true, // empty string
     };

     match first_char_class {
         // LTR label
         BidiClass::L => {
             // Rule 5
             for c in chars.by_ref() {
                 if !matches!(
                     bidi_class(c),
                     BidiClass::L
                         | BidiClass::EN
                         | BidiClass::ES
                         | BidiClass::CS
                         | BidiClass::ET
                         | BidiClass::ON
                         | BidiClass::BN
                         | BidiClass::NSM
                 ) {
                     return false;
                 }
             }

             // Rule 6
             // must end in L or EN followed by 0 or more NSM
             let mut rev_chars = label.chars().rev();
             let mut last_non_nsm = rev_chars.next();
             loop {
                 match last_non_nsm {
                     Some(c) if bidi_class(c) == BidiClass::NSM => {
                         last_non_nsm = rev_chars.next();
                         continue;
                     }
                     _ => {
                         break;
                     }
                 }
             }
             match last_non_nsm {
                 Some(c) if bidi_class(c) == BidiClass::L || bidi_class(c) == BidiClass::EN => {}
                 Some(_) => {
                     return false;
                 }
                 _ => {}
             }
         }

         // RTL label
         BidiClass::R | BidiClass::AL => {
             let mut found_en = false;
             let mut found_an = false;

             // Rule 2
             for c in chars {
                 let char_class = bidi_class(c);
                 if char_class == BidiClass::EN {
                     found_en = true;
                 } else if char_class == BidiClass::AN {
                     found_an = true;
                 }

                 if !matches!(
                     char_class,
                     BidiClass::R
                         | BidiClass::AL
                         | BidiClass::AN
                         | BidiClass::EN
                         | BidiClass::ES
                         | BidiClass::CS
                         | BidiClass::ET
                         | BidiClass::ON
                         | BidiClass::BN
                         | BidiClass::NSM
                 ) {
                     return false;
                 }
             }
             // Rule 3
             let mut rev_chars = label.chars().rev();
             let mut last = rev_chars.next();
             loop {
                 // must end in L or EN followed by 0 or more NSM
                 match last {
                     Some(c) if bidi_class(c) == BidiClass::NSM => {
                         last = rev_chars.next();
                         continue;
                     }
                     _ => {
                         break;
                     }
                 }
             }
             match last {
                 Some(c)
                     if matches!(
                         bidi_class(c),
                         BidiClass::R | BidiClass::AL | BidiClass::EN | BidiClass::AN
                     ) => {}
                 _ => {
                     return false;
                 }
             }

             // Rule 4
             if found_an && found_en {
                 return false;
             }
         }

         // Rule 1: Should start with L or R/AL
         _ => {
             return false;
         }
     }

     true
 }

 /// Check the validity criteria for the given label
 ///
 /// V1 (NFC) and V8 (Bidi) are checked inside `processing()` to prevent doing duplicate work.
 ///
 /// http://www.unicode.org/reports/tr46/#Validity_Criteria
 fn check_validity(label: &str, config: Config, errors: &mut Errors) {
     let first_char = label.chars().next();
     if first_char.is_none() {
         // Empty string, pass
         return;
     }

     // V2: No U+002D HYPHEN-MINUS in both third and fourth positions.
     //
     // NOTE: Spec says that the label must not contain a HYPHEN-MINUS character in both the
     // third and fourth positions. But nobody follows this criteria. See the spec issue below:
     // https://github.com/whatwg/url/issues/53

     // V3: neither begin nor end with a U+002D HYPHEN-MINUS
     if config.check_hyphens && (label.starts_with('-') || label.ends_with('-')) {
         errors.check_hyphens = true;
         return;
     }

     // V4: not contain a U+002E FULL STOP
     //
     // Here, label can't contain '.' since the input is from .split('.')

     // V5: not begin with a GC=Mark
     if is_combining_mark(first_char.unwrap()) {
         errors.start_combining_mark = true;
         return;
     }

     // V6: Check against Mapping Table
     if label.chars().any(|c| match *find_char(c) {
         Mapping::Valid | Mapping::DisallowedIdna2008 => false,
         Mapping::Deviation(_) => config.transitional_processing,
         Mapping::DisallowedStd3Valid => config.use_std3_ascii_rules,
         _ => true,
     }) {
         errors.invalid_mapping = true;
     }

     // V7: ContextJ rules
     //
     // TODO: Implement rules and add *CheckJoiners* flag.

     // V8: Bidi rules are checked inside `processing()`
 }

 // Detect simple cases: all lowercase ASCII characters and digits where none
 // of the labels start with PUNYCODE_PREFIX and labels don't start or end with hyphen.
 fn is_simple(domain: &str) -> bool {
     if domain.is_empty() {
         return false;
     }
     let (mut prev, mut puny_prefix) = ('?', 0);
     for c in domain.chars() {
         if c == '.' {
             if prev == '-' {
                 return false;
             }
             puny_prefix = 0;
             continue;
         } else if puny_prefix == 0 && c == '-' {
             return false;
         } else if puny_prefix < 5 {
             if c == ['x', 'n', '-', '-'][puny_prefix] {
                 puny_prefix += 1;
                 if puny_prefix == 4 {
                     return false;
                 }
             } else {
                 puny_prefix = 5;
             }
         }
         if !c.is_ascii_lowercase() && !c.is_ascii_digit() {
             return false;
         }
         prev = c;
     }

     true
 }

 /// http://www.unicode.org/reports/tr46/#Processing
 fn processing(
     domain: &str,
     config: Config,
     normalized: &mut String,
     output: &mut String,
 ) -> Errors {
     normalized.clear();
     let mut errors = Errors::default();
     let offset = output.len();

     let iter = Mapper {
         chars: domain.chars(),
         config,
         errors: &mut errors,
         slice: None,
     };

     normalized.extend(iter.nfc());

     let mut decoder = punycode::Decoder::default();
     let non_transitional = config.transitional_processing(false);
     let (mut first, mut has_bidi_labels) = (true, false);
     for label in normalized.split('.') {
         if !first {
             output.push('.');
         }
         first = false;
         if let Some(remainder) = label.strip_prefix(PUNYCODE_PREFIX) {
             match decoder.decode(remainder) {
                 Ok(decode) => {
                     let start = output.len();
                     output.extend(decode);
                     let decoded_label = &output[start..];

                     if !has_bidi_labels {
                         has_bidi_labels |= is_bidi_domain(decoded_label);
                     }

                     if !errors.is_err() {
                         if !is_nfc(decoded_label) {
                             errors.nfc = true;
                         } else {
                             check_validity(decoded_label, non_transitional, &mut errors);
                         }
                     }
                 }
                 Err(()) => {
                     has_bidi_labels = true;
                     errors.punycode = true;
                 }
             }
         } else {
             if !has_bidi_labels {
                 has_bidi_labels |= is_bidi_domain(label);
             }

             // `normalized` is already `NFC` so we can skip that check
             check_validity(label, config, &mut errors);
             output.push_str(label)
         }
     }

     for label in output[offset..].split('.') {
         // V8: Bidi rules
         //
         // TODO: Add *CheckBidi* flag
         if !passes_bidi(label, has_bidi_labels) {
             errors.check_bidi = true;
             break;
         }
     }

     errors
 }

 #[derive(Default)]
 pub struct Idna {
     config: Config,
     normalized: String,
     output: String,
 }

 impl Idna {
     pub fn new(config: Config) -> Self {
         Self {
             config,
             normalized: String::new(),
             output: String::new(),
         }
     }

     pub fn to_ascii_inner(&mut self, domain: &str, out: &mut String) -> Errors {
         if is_simple(domain) {
             out.push_str(domain);
             return Errors::default();
         }
         let mut errors = processing(domain, self.config, &mut self.normalized, out);
         self.output = core::mem::replace(out, String::with_capacity(out.len()));
         let mut first = true;
         for label in self.output.split('.') {
             if !first {
                 out.push('.');
             }
             first = false;

             if label.is_ascii() {
                 out.push_str(label);
             } else {
                 let offset = out.len();
                 out.push_str(PUNYCODE_PREFIX);
                 if let Err(()) = punycode::encode_into(label.chars(), out) {
                     errors.punycode = true;
                     out.truncate(offset);
                 }
             }
         }
         errors
     }

     /// http://www.unicode.org/reports/tr46/#ToASCII
     #[allow(clippy::wrong_self_convention)]
     pub fn to_ascii(&mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
         let mut errors = self.to_ascii_inner(domain, out);

         if self.config.verify_dns_length {
             let domain = if out.ends_with('.') {
                 &out[..out.len() - 1]
             } else {
                 &*out
             };
             if domain.is_empty() || domain.split('.').any(|label| label.is_empty()) {
                 errors.too_short_for_dns = true;
             }
             if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) {
                 errors.too_long_for_dns = true;
             }
         }

         errors.into()
     }

     /// http://www.unicode.org/reports/tr46/#ToUnicode
     #[allow(clippy::wrong_self_convention)]
     pub fn to_unicode(&mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
         if is_simple(domain) {
             out.push_str(domain);
             return Errors::default().into();
         }
         processing(domain, self.config, &mut self.normalized, out).into()
     }
 }

 #[derive(Clone, Copy)]
 #[must_use]
 pub struct Config {
     use_std3_ascii_rules: bool,
     transitional_processing: bool,
     verify_dns_length: bool,
     check_hyphens: bool,
     use_idna_2008_rules: bool,
 }

 /// The defaults are that of https://url.spec.whatwg.org/#idna
 impl Default for Config {
     fn default() -> Self {
         Config {
             use_std3_ascii_rules: false,
             transitional_processing: false,
             check_hyphens: false,
             // check_bidi: true,
             // check_joiners: true,

             // Only use for to_ascii, not to_unicode
             verify_dns_length: false,
             use_idna_2008_rules: false,
         }
     }
 }

 impl Config {
     #[inline]
     pub fn use_std3_ascii_rules(mut self, value: bool) -> Self {
         self.use_std3_ascii_rules = value;
         self
     }

     #[inline]
     pub fn transitional_processing(mut self, value: bool) -> Self {
         self.transitional_processing = value;
         self
     }

     #[inline]
     pub fn verify_dns_length(mut self, value: bool) -> Self {
         self.verify_dns_length = value;
         self
     }

     #[inline]
     pub fn check_hyphens(mut self, value: bool) -> Self {
         self.check_hyphens = value;
         self
     }

     #[inline]
     pub fn use_idna_2008_rules(mut self, value: bool) -> Self {
         self.use_idna_2008_rules = value;
         self
     }

     /// http://www.unicode.org/reports/tr46/#ToASCII
     pub fn to_ascii(self, domain: &str) -> Result<String, Errors> {
         let mut result = String::with_capacity(domain.len());
         let mut codec = Idna::new(self);
         codec.to_ascii(domain, &mut result).map(|()| result)
     }

     /// http://www.unicode.org/reports/tr46/#ToUnicode
     pub fn to_unicode(self, domain: &str) -> (String, Result<(), Errors>) {
         let mut codec = Idna::new(self);
         let mut out = String::with_capacity(domain.len());
         let result = codec.to_unicode(domain, &mut out);
         (out, result)
     }
 }

 fn is_bidi_domain(s: &str) -> bool {
     for c in s.chars() {
         if c.is_ascii_graphic() {
             continue;
         }
         match bidi_class(c) {
             BidiClass::R | BidiClass::AL | BidiClass::AN => return true,
             _ => {}
         }
     }
     false
 }

 /// Errors recorded during UTS #46 processing.
 ///
 /// This is opaque for now, indicating what types of errors have been encountered at least once.
 /// More details may be exposed in the future.
 #[derive(Default)]
 pub struct Errors {
     punycode: bool,
     check_hyphens: bool,
     check_bidi: bool,
     start_combining_mark: bool,
     invalid_mapping: bool,
     nfc: bool,
     disallowed_by_std3_ascii_rules: bool,
     disallowed_mapped_in_std3: bool,
     disallowed_character: bool,
     too_long_for_dns: bool,
     too_short_for_dns: bool,
     disallowed_in_idna_2008: bool,
 }

 impl Errors {
     fn is_err(&self) -> bool {
         let Errors {
             punycode,
             check_hyphens,
             check_bidi,
             start_combining_mark,
             invalid_mapping,
             nfc,
             disallowed_by_std3_ascii_rules,
             disallowed_mapped_in_std3,
             disallowed_character,
             too_long_for_dns,
             too_short_for_dns,
             disallowed_in_idna_2008,
         } = *self;
         punycode
             || check_hyphens
             || check_bidi
             || start_combining_mark
             || invalid_mapping
             || nfc
             || disallowed_by_std3_ascii_rules
             || disallowed_mapped_in_std3
             || disallowed_character
             || too_long_for_dns
             || too_short_for_dns
             || disallowed_in_idna_2008
     }
 }

 impl fmt::Debug for Errors {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         let Errors {
             punycode,
             check_hyphens,
             check_bidi,
             start_combining_mark,
             invalid_mapping,
             nfc,
             disallowed_by_std3_ascii_rules,
             disallowed_mapped_in_std3,
             disallowed_character,
             too_long_for_dns,
             too_short_for_dns,
             disallowed_in_idna_2008,
         } = *self;

         let fields = [
             ("punycode", punycode),
             ("check_hyphens", check_hyphens),
             ("check_bidi", check_bidi),
             ("start_combining_mark", start_combining_mark),
             ("invalid_mapping", invalid_mapping),
             ("nfc", nfc),
             (
                 "disallowed_by_std3_ascii_rules",
                 disallowed_by_std3_ascii_rules,
             ),
             ("disallowed_mapped_in_std3", disallowed_mapped_in_std3),
             ("disallowed_character", disallowed_character),
             ("too_long_for_dns", too_long_for_dns),
             ("too_short_for_dns", too_short_for_dns),
             ("disallowed_in_idna_2008", disallowed_in_idna_2008),
         ];

         let mut empty = true;
         f.write_str("Errors { ")?;
         for (name, val) in &fields {
             if *val {
                 if !empty {
                     f.write_str(", ")?;
                 }
                 f.write_str(name)?;
                 empty = false;
             }
         }

         if !empty {
             f.write_str(" }")
         } else {
             f.write_str("}")
         }
     }
 }

 impl From<Errors> for Result<(), Errors> {
     fn from(e: Errors) -> Result<(), Errors> {
         if !e.is_err() {
             Ok(())
         } else {
             Err(e)
         }
     }
 }

 #[cfg(feature = "std")]
 impl std::error::Error for Errors {}

 impl fmt::Display for Errors {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         fmt::Debug::fmt(self, f)
     }
 }

 #[cfg(test)]
 mod tests {
     use super::{find_char, Mapping};

     #[test]
     fn mapping_fast_path() {
         assert_matches!(find_char('-'), &Mapping::Valid);
         assert_matches!(find_char('.'), &Mapping::Valid);
         for c in &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] {
             assert_matches!(find_char(*c), &Mapping::Valid);
         }
         for c in &[
             'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
             'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
         ] {
             assert_matches!(find_char(*c), &Mapping::Valid);
         }
     }
 }
	// Copyright 2013-2014 The rust-url developers.
	//
	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
	// option. This file may not be copied, modified, or distributed
	// except according to those terms.

	//! [Unicode IDNA Compatibility Processing
	//! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)

	use self::Mapping::*;
	use crate::punycode;

	use alloc::string::String;
	use core::fmt;
	use unicode_bidi::{bidi_class, BidiClass};
	use unicode_normalization::char::is_combining_mark;
	use unicode_normalization::{is_nfc, UnicodeNormalization};

	include!("uts46_mapping_table.rs");

	const PUNYCODE_PREFIX: &str = "xn--";

	#[derive(Debug)]
	struct StringTableSlice {
	// Store these as separate fields so the structure will have an
	// alignment of 1 and thus pack better into the Mapping enum, below.
	byte_start_lo: u8,
	byte_start_hi: u8,
	byte_len: u8,
	}

	fn decode_slice(slice: &StringTableSlice) -> &'static str {
	let lo = slice.byte_start_lo as usize;
	let hi = slice.byte_start_hi as usize;
	let start = (hi << 8) \| lo;
	let len = slice.byte_len as usize;
	&STRING_TABLE[start..(start + len)]
	}

	#[repr(u8)]
	#[derive(Debug)]
	enum Mapping {
	Valid,
	Ignored,
	Mapped(StringTableSlice),
	Deviation(StringTableSlice),
	Disallowed,
	DisallowedStd3Valid,
	DisallowedStd3Mapped(StringTableSlice),
	DisallowedIdna2008,
	}

	fn find_char(codepoint: char) -> &'static Mapping {
	let idx = match TABLE.binary_search_by_key(&codepoint, \|&val\| val.0) {
	Ok(idx) => idx,
	Err(idx) => idx - 1,
	};

	const SINGLE_MARKER: u16 = 1 << 15;

	let (base, x) = TABLE[idx];
	let single = (x & SINGLE_MARKER) != 0;
	let offset = !SINGLE_MARKER & x;

	if single {
	&MAPPING_TABLE[offset as usize]
	} else {
	&MAPPING_TABLE[(offset + (codepoint as u16 - base as u16)) as usize]
	}
	}

	struct Mapper<'a> {
	chars: core::str::Chars<'a>,
	config: Config,
	errors: &'a mut Errors,
	slice: Option<core::str::Chars<'static>>,
	}

	impl<'a> Iterator for Mapper<'a> {
	type Item = char;

	fn next(&mut self) -> Option<Self::Item> {
	loop {
	if let Some(s) = &mut self.slice {
	match s.next() {
	Some(c) => return Some(c),
	None => {
	self.slice = None;
	}
	}
	}

	let codepoint = self.chars.next()?;
	if let '.' \| '-' \| 'a'..='z' \| '0'..='9' = codepoint {
	return Some(codepoint);
	}

	return Some(match *find_char(codepoint) {
	Mapping::Valid => codepoint,
	Mapping::Ignored => continue,
	Mapping::Mapped(ref slice) => {
	self.slice = Some(decode_slice(slice).chars());
	continue;
	}
	Mapping::Deviation(ref slice) => {
	if self.config.transitional_processing {
	self.slice = Some(decode_slice(slice).chars());
	continue;
	} else {
	codepoint
	}
	}
	Mapping::Disallowed => {
	self.errors.disallowed_character = true;
	codepoint
	}
	Mapping::DisallowedStd3Valid => {
	if self.config.use_std3_ascii_rules {
	self.errors.disallowed_by_std3_ascii_rules = true;
	};
	codepoint
	}
	Mapping::DisallowedStd3Mapped(ref slice) => {
	if self.config.use_std3_ascii_rules {
	self.errors.disallowed_mapped_in_std3 = true;
	};
	self.slice = Some(decode_slice(slice).chars());
	continue;
	}
	Mapping::DisallowedIdna2008 => {
	if self.config.use_idna_2008_rules {
	self.errors.disallowed_in_idna_2008 = true;
	}
	codepoint
	}
	});
	}
	}
	}

	// http://tools.ietf.org/html/rfc5893#section-2
	fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool {
	// Rule 0: Bidi Rules apply to Bidi Domain Names: a name with at least one RTL label. A label
	// is RTL if it contains at least one character of bidi class R, AL or AN.
	if !is_bidi_domain {
	return true;
	}

	let mut chars = label.chars();
	let first_char_class = match chars.next() {
	Some(c) => bidi_class(c),
	None => return true, // empty string
	};

	match first_char_class {
	// LTR label
	BidiClass::L => {
	// Rule 5
	for c in chars.by_ref() {
	if !matches!(
	bidi_class(c),
	BidiClass::L
	\| BidiClass::EN
	\| BidiClass::ES
	\| BidiClass::CS
	\| BidiClass::ET
	\| BidiClass::ON
	\| BidiClass::BN
	\| BidiClass::NSM
	) {
	return false;
	}
	}

	// Rule 6
	// must end in L or EN followed by 0 or more NSM
	let mut rev_chars = label.chars().rev();
	let mut last_non_nsm = rev_chars.next();
	loop {
	match last_non_nsm {
	Some(c) if bidi_class(c) == BidiClass::NSM => {
	last_non_nsm = rev_chars.next();
	continue;
	}
	_ => {
	break;
	}
	}
	}
	match last_non_nsm {
	Some(c) if bidi_class(c) == BidiClass::L \|\| bidi_class(c) == BidiClass::EN => {}
	Some(_) => {
	return false;
	}
	_ => {}
	}
	}

	// RTL label
	BidiClass::R \| BidiClass::AL => {
	let mut found_en = false;
	let mut found_an = false;

	// Rule 2
	for c in chars {
	let char_class = bidi_class(c);
	if char_class == BidiClass::EN {
	found_en = true;
	} else if char_class == BidiClass::AN {
	found_an = true;
	}

	if !matches!(
	char_class,
	BidiClass::R
	\| BidiClass::AL
	\| BidiClass::AN
	\| BidiClass::EN
	\| BidiClass::ES
	\| BidiClass::CS
	\| BidiClass::ET
	\| BidiClass::ON
	\| BidiClass::BN
	\| BidiClass::NSM
	) {
	return false;
	}
	}
	// Rule 3
	let mut rev_chars = label.chars().rev();
	let mut last = rev_chars.next();
	loop {
	// must end in L or EN followed by 0 or more NSM
	match last {
	Some(c) if bidi_class(c) == BidiClass::NSM => {
	last = rev_chars.next();
	continue;
	}
	_ => {
	break;
	}
	}
	}
	match last {
	Some(c)
	if matches!(
	bidi_class(c),
	BidiClass::R \| BidiClass::AL \| BidiClass::EN \| BidiClass::AN
	) => {}
	_ => {
	return false;
	}
	}

	// Rule 4
	if found_an && found_en {
	return false;
	}
	}

	// Rule 1: Should start with L or R/AL
	_ => {
	return false;
	}
	}

	true
	}

	/// Check the validity criteria for the given label
	///
	/// V1 (NFC) and V8 (Bidi) are checked inside `processing()` to prevent doing duplicate work.
	///
	/// http://www.unicode.org/reports/tr46/#Validity_Criteria
	fn check_validity(label: &str, config: Config, errors: &mut Errors) {
	let first_char = label.chars().next();
	if first_char.is_none() {
	// Empty string, pass
	return;
	}

	// V2: No U+002D HYPHEN-MINUS in both third and fourth positions.
	//
	// NOTE: Spec says that the label must not contain a HYPHEN-MINUS character in both the
	// third and fourth positions. But nobody follows this criteria. See the spec issue below:
	// https://github.com/whatwg/url/issues/53

	// V3: neither begin nor end with a U+002D HYPHEN-MINUS
	if config.check_hyphens && (label.starts_with('-') \|\| label.ends_with('-')) {
	errors.check_hyphens = true;
	return;
	}

	// V4: not contain a U+002E FULL STOP
	//
	// Here, label can't contain '.' since the input is from .split('.')

	// V5: not begin with a GC=Mark
	if is_combining_mark(first_char.unwrap()) {
	errors.start_combining_mark = true;
	return;
	}

	// V6: Check against Mapping Table
	if label.chars().any(\|c\| match *find_char(c) {
	Mapping::Valid \| Mapping::DisallowedIdna2008 => false,
	Mapping::Deviation(_) => config.transitional_processing,
	Mapping::DisallowedStd3Valid => config.use_std3_ascii_rules,
	_ => true,
	}) {
	errors.invalid_mapping = true;
	}

	// V7: ContextJ rules
	//
	// TODO: Implement rules and add CheckJoiners flag.

	// V8: Bidi rules are checked inside `processing()`
	}

	// Detect simple cases: all lowercase ASCII characters and digits where none
	// of the labels start with PUNYCODE_PREFIX and labels don't start or end with hyphen.
	fn is_simple(domain: &str) -> bool {
	if domain.is_empty() {
	return false;
	}
	let (mut prev, mut puny_prefix) = ('?', 0);
	for c in domain.chars() {
	if c == '.' {
	if prev == '-' {
	return false;
	}
	puny_prefix = 0;
	continue;
	} else if puny_prefix == 0 && c == '-' {
	return false;
	} else if puny_prefix < 5 {
	if c == ['x', 'n', '-', '-'][puny_prefix] {
	puny_prefix += 1;
	if puny_prefix == 4 {
	return false;
	}
	} else {
	puny_prefix = 5;
	}
	}
	if !c.is_ascii_lowercase() && !c.is_ascii_digit() {
	return false;
	}
	prev = c;
	}

	true
	}

	/// http://www.unicode.org/reports/tr46/#Processing
	fn processing(
	domain: &str,
	config: Config,
	normalized: &mut String,
	output: &mut String,
	) -> Errors {
	normalized.clear();
	let mut errors = Errors::default();
	let offset = output.len();

	let iter = Mapper {
	chars: domain.chars(),
	config,
	errors: &mut errors,
	slice: None,
	};

	normalized.extend(iter.nfc());

	let mut decoder = punycode::Decoder::default();
	let non_transitional = config.transitional_processing(false);
	let (mut first, mut has_bidi_labels) = (true, false);
	for label in normalized.split('.') {
	if !first {
	output.push('.');
	}
	first = false;
	if let Some(remainder) = label.strip_prefix(PUNYCODE_PREFIX) {
	match decoder.decode(remainder) {
	Ok(decode) => {
	let start = output.len();
	output.extend(decode);
	let decoded_label = &output[start..];

	if !has_bidi_labels {
	has_bidi_labels \|= is_bidi_domain(decoded_label);
	}

	if !errors.is_err() {
	if !is_nfc(decoded_label) {
	errors.nfc = true;
	} else {
	check_validity(decoded_label, non_transitional, &mut errors);
	}
	}
	}
	Err(()) => {
	has_bidi_labels = true;
	errors.punycode = true;
	}
	}
	} else {
	if !has_bidi_labels {
	has_bidi_labels \|= is_bidi_domain(label);
	}

	// `normalized` is already `NFC` so we can skip that check
	check_validity(label, config, &mut errors);
	output.push_str(label)
	}
	}

	for label in output[offset..].split('.') {
	// V8: Bidi rules
	//
	// TODO: Add CheckBidi flag
	if !passes_bidi(label, has_bidi_labels) {
	errors.check_bidi = true;
	break;
	}
	}

	errors
	}

	#[derive(Default)]
	pub struct Idna {
	config: Config,
	normalized: String,
	output: String,
	}

	impl Idna {
	pub fn new(config: Config) -> Self {
	Self {
	config,
	normalized: String::new(),
	output: String::new(),
	}
	}

	pub fn to_ascii_inner(&mut self, domain: &str, out: &mut String) -> Errors {
	if is_simple(domain) {
	out.push_str(domain);
	return Errors::default();
	}
	let mut errors = processing(domain, self.config, &mut self.normalized, out);
	self.output = core::mem::replace(out, String::with_capacity(out.len()));
	let mut first = true;
	for label in self.output.split('.') {
	if !first {
	out.push('.');
	}
	first = false;

	if label.is_ascii() {
	out.push_str(label);
	} else {
	let offset = out.len();
	out.push_str(PUNYCODE_PREFIX);
	if let Err(()) = punycode::encode_into(label.chars(), out) {
	errors.punycode = true;
	out.truncate(offset);
	}
	}
	}
	errors
	}

	/// http://www.unicode.org/reports/tr46/#ToASCII
	#[allow(clippy::wrong_self_convention)]
	pub fn to_ascii(&mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
	let mut errors = self.to_ascii_inner(domain, out);

	if self.config.verify_dns_length {
	let domain = if out.ends_with('.') {
	&out[..out.len() - 1]
	} else {
	&*out
	};
	if domain.is_empty() \|\| domain.split('.').any(\|label\| label.is_empty()) {
	errors.too_short_for_dns = true;
	}
	if domain.len() > 253 \|\| domain.split('.').any(\|label\| label.len() > 63) {
	errors.too_long_for_dns = true;
	}
	}

	errors.into()
	}

	/// http://www.unicode.org/reports/tr46/#ToUnicode
	#[allow(clippy::wrong_self_convention)]
	pub fn to_unicode(&mut self, domain: &str, out: &mut String) -> Result<(), Errors> {
	if is_simple(domain) {
	out.push_str(domain);
	return Errors::default().into();
	}
	processing(domain, self.config, &mut self.normalized, out).into()
	}
	}

	#[derive(Clone, Copy)]
	#[must_use]
	pub struct Config {
	use_std3_ascii_rules: bool,
	transitional_processing: bool,
	verify_dns_length: bool,
	check_hyphens: bool,
	use_idna_2008_rules: bool,
	}

	/// The defaults are that of https://url.spec.whatwg.org/#idna
	impl Default for Config {
	fn default() -> Self {
	Config {
	use_std3_ascii_rules: false,
	transitional_processing: false,
	check_hyphens: false,
	// check_bidi: true,
	// check_joiners: true,

	// Only use for to_ascii, not to_unicode
	verify_dns_length: false,
	use_idna_2008_rules: false,
	}
	}
	}

	impl Config {
	#[inline]
	pub fn use_std3_ascii_rules(mut self, value: bool) -> Self {
	self.use_std3_ascii_rules = value;
	self
	}

	#[inline]
	pub fn transitional_processing(mut self, value: bool) -> Self {
	self.transitional_processing = value;
	self
	}

	#[inline]
	pub fn verify_dns_length(mut self, value: bool) -> Self {
	self.verify_dns_length = value;
	self
	}

	#[inline]
	pub fn check_hyphens(mut self, value: bool) -> Self {
	self.check_hyphens = value;
	self
	}

	#[inline]
	pub fn use_idna_2008_rules(mut self, value: bool) -> Self {
	self.use_idna_2008_rules = value;
	self
	}

	/// http://www.unicode.org/reports/tr46/#ToASCII
	pub fn to_ascii(self, domain: &str) -> Result<String, Errors> {
	let mut result = String::with_capacity(domain.len());
	let mut codec = Idna::new(self);
	codec.to_ascii(domain, &mut result).map(\|()\| result)
	}

	/// http://www.unicode.org/reports/tr46/#ToUnicode
	pub fn to_unicode(self, domain: &str) -> (String, Result<(), Errors>) {
	let mut codec = Idna::new(self);
	let mut out = String::with_capacity(domain.len());
	let result = codec.to_unicode(domain, &mut out);
	(out, result)
	}
	}

	fn is_bidi_domain(s: &str) -> bool {
	for c in s.chars() {
	if c.is_ascii_graphic() {
	continue;
	}
	match bidi_class(c) {
	BidiClass::R \| BidiClass::AL \| BidiClass::AN => return true,
	_ => {}
	}
	}
	false
	}

	/// Errors recorded during UTS #46 processing.
	///
	/// This is opaque for now, indicating what types of errors have been encountered at least once.
	/// More details may be exposed in the future.
	#[derive(Default)]
	pub struct Errors {
	punycode: bool,
	check_hyphens: bool,
	check_bidi: bool,
	start_combining_mark: bool,
	invalid_mapping: bool,
	nfc: bool,
	disallowed_by_std3_ascii_rules: bool,
	disallowed_mapped_in_std3: bool,
	disallowed_character: bool,
	too_long_for_dns: bool,
	too_short_for_dns: bool,
	disallowed_in_idna_2008: bool,
	}

	impl Errors {
	fn is_err(&self) -> bool {
	let Errors {
	punycode,
	check_hyphens,
	check_bidi,
	start_combining_mark,
	invalid_mapping,
	nfc,
	disallowed_by_std3_ascii_rules,
	disallowed_mapped_in_std3,
	disallowed_character,
	too_long_for_dns,
	too_short_for_dns,
	disallowed_in_idna_2008,
	} = *self;
	punycode
	\|\| check_hyphens
	\|\| check_bidi
	\|\| start_combining_mark
	\|\| invalid_mapping
	\|\| nfc
	\|\| disallowed_by_std3_ascii_rules
	\|\| disallowed_mapped_in_std3
	\|\| disallowed_character
	\|\| too_long_for_dns
	\|\| too_short_for_dns
	\|\| disallowed_in_idna_2008
	}
	}

	impl fmt::Debug for Errors {
	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
	let Errors {
	punycode,
	check_hyphens,
	check_bidi,
	start_combining_mark,
	invalid_mapping,
	nfc,
	disallowed_by_std3_ascii_rules,
	disallowed_mapped_in_std3,
	disallowed_character,
	too_long_for_dns,
	too_short_for_dns,
	disallowed_in_idna_2008,
	} = *self;

	let fields = [
	("punycode", punycode),
	("check_hyphens", check_hyphens),
	("check_bidi", check_bidi),
	("start_combining_mark", start_combining_mark),
	("invalid_mapping", invalid_mapping),
	("nfc", nfc),
	(
	"disallowed_by_std3_ascii_rules",
	disallowed_by_std3_ascii_rules,
	),
	("disallowed_mapped_in_std3", disallowed_mapped_in_std3),
	("disallowed_character", disallowed_character),
	("too_long_for_dns", too_long_for_dns),
	("too_short_for_dns", too_short_for_dns),
	("disallowed_in_idna_2008", disallowed_in_idna_2008),
	];

	let mut empty = true;
	f.write_str("Errors { ")?;
	for (name, val) in &fields {
	if *val {
	if !empty {
	f.write_str(", ")?;
	}
	f.write_str(name)?;
	empty = false;
	}
	}

	if !empty {
	f.write_str(" }")
	} else {
	f.write_str("}")
	}
	}
	}

	impl From<Errors> for Result<(), Errors> {
	fn from(e: Errors) -> Result<(), Errors> {
	if !e.is_err() {
	Ok(())
	} else {
	Err(e)
	}
	}
	}

	#[cfg(feature = "std")]
	impl std::error::Error for Errors {}

	impl fmt::Display for Errors {
	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
	fmt::Debug::fmt(self, f)
	}
	}

	#[cfg(test)]
	mod tests {
	use super::{find_char, Mapping};

	#[test]
	fn mapping_fast_path() {
	assert_matches!(find_char('-'), &Mapping::Valid);
	assert_matches!(find_char('.'), &Mapping::Valid);
	for c in &['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] {
	assert_matches!(find_char(*c), &Mapping::Valid);
	}
	for c in &[
	'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
	'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
	] {
	assert_matches!(find_char(*c), &Mapping::Valid);
	}
	}
	}