vendor/idna-0.2.3/src/punycode.rs - toolchain/rustc - Git at Google

 // Copyright 2013 The rust-url developers.
 //
 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

 //! Punycode ([RFC 3492](http://tools.ietf.org/html/rfc3492)) implementation.
 //!
 //! Since Punycode fundamentally works on unicode code points,
 //! `encode` and `decode` take and return slices and vectors of `char`.
 //! `encode_str` and `decode_to_string` provide convenience wrappers
 //! that convert from and to Rust’s UTF-8 based `str` and `String` types.

 use std::char;
 use std::u32;

 // Bootstring parameters for Punycode
 static BASE: u32 = 36;
 static T_MIN: u32 = 1;
 static T_MAX: u32 = 26;
 static SKEW: u32 = 38;
 static DAMP: u32 = 700;
 static INITIAL_BIAS: u32 = 72;
 static INITIAL_N: u32 = 0x80;
 static DELIMITER: char = '-';

 #[inline]
 fn adapt(mut delta: u32, num_points: u32, first_time: bool) -> u32 {
     delta /= if first_time { DAMP } else { 2 };
     delta += delta / num_points;
     let mut k = 0;
     while delta > ((BASE - T_MIN) * T_MAX) / 2 {
         delta /= BASE - T_MIN;
         k += BASE;
     }
     k + (((BASE - T_MIN + 1) * delta) / (delta + SKEW))
 }

 /// Convert Punycode to an Unicode `String`.
 ///
 /// This is a convenience wrapper around `decode`.
 #[inline]
 pub fn decode_to_string(input: &str) -> Option<String> {
     decode(input).map(|chars| chars.into_iter().collect())
 }

 /// Convert Punycode to Unicode.
 ///
 /// Return None on malformed input or overflow.
 /// Overflow can only happen on inputs that take more than
 /// 63 encoded bytes, the DNS limit on domain name labels.
 pub fn decode(input: &str) -> Option<Vec<char>> {
     Some(Decoder::default().decode(input).ok()?.collect())
 }

 #[derive(Default)]
 pub(crate) struct Decoder {
     insertions: Vec<(usize, char)>,
 }

 impl Decoder {
     /// Split the input iterator and return a Vec with insertions of encoded characters
     pub(crate) fn decode<'a>(&'a mut self, input: &'a str) -> Result<Decode<'a>, ()> {
         self.insertions.clear();
         // Handle "basic" (ASCII) code points.
         // They are encoded as-is before the last delimiter, if any.
         let (base, input) = match input.rfind(DELIMITER) {
             None => ("", input),
             Some(position) => (
                 &input[..position],
                 if position > 0 {
                     &input[position + 1..]
                 } else {
                     input
                 },
             ),
         };

         if !base.is_ascii() {
             return Err(());
         }

         let base_len = base.len();
         let mut length = base_len as u32;
         let mut code_point = INITIAL_N;
         let mut bias = INITIAL_BIAS;
         let mut i = 0;
         let mut iter = input.bytes();
         loop {
             let previous_i = i;
             let mut weight = 1;
             let mut k = BASE;
             let mut byte = match iter.next() {
                 None => break,
                 Some(byte) => byte,
             };

             // Decode a generalized variable-length integer into delta,
             // which gets added to i.
             loop {
                 let digit = match byte {
                     byte @ b'0'..=b'9' => byte - b'0' + 26,
                     byte @ b'A'..=b'Z' => byte - b'A',
                     byte @ b'a'..=b'z' => byte - b'a',
                     _ => return Err(()),
                 } as u32;
                 if digit > (u32::MAX - i) / weight {
                     return Err(()); // Overflow
                 }
                 i += digit * weight;
                 let t = if k <= bias {
                     T_MIN
                 } else if k >= bias + T_MAX {
                     T_MAX
                 } else {
                     k - bias
                 };
                 if digit < t {
                     break;
                 }
                 if weight > u32::MAX / (BASE - t) {
                     return Err(()); // Overflow
                 }
                 weight *= BASE - t;
                 k += BASE;
                 byte = match iter.next() {
                     None => return Err(()), // End of input before the end of this delta
                     Some(byte) => byte,
                 };
             }

             bias = adapt(i - previous_i, length + 1, previous_i == 0);
             if i / (length + 1) > u32::MAX - code_point {
                 return Err(()); // Overflow
             }

             // i was supposed to wrap around from length+1 to 0,
             // incrementing code_point each time.
             code_point += i / (length + 1);
             i %= length + 1;
             let c = match char::from_u32(code_point) {
                 Some(c) => c,
                 None => return Err(()),
             };

             // Move earlier insertions farther out in the string
             for (idx, _) in &mut self.insertions {
                 if *idx >= i as usize {
                     *idx += 1;
                 }
             }
             self.insertions.push((i as usize, c));
             length += 1;
             i += 1;
         }

         self.insertions.sort_by_key(|(i, _)| *i);
         Ok(Decode {
             base: base.chars(),
             insertions: &self.insertions,
             inserted: 0,
             position: 0,
             len: base_len + self.insertions.len(),
         })
     }
 }

 pub(crate) struct Decode<'a> {
     base: std::str::Chars<'a>,
     pub(crate) insertions: &'a [(usize, char)],
     inserted: usize,
     position: usize,
     len: usize,
 }

 impl<'a> Iterator for Decode<'a> {
     type Item = char;

     fn next(&mut self) -> Option<Self::Item> {
         loop {
             match self.insertions.get(self.inserted) {
                 Some((pos, c)) if *pos == self.position => {
                     self.inserted += 1;
                     self.position += 1;
                     return Some(*c);
                 }
                 _ => {}
             }
             if let Some(c) = self.base.next() {
                 self.position += 1;
                 return Some(c);
             } else if self.inserted >= self.insertions.len() {
                 return None;
             }
         }
     }

     fn size_hint(&self) -> (usize, Option<usize>) {
         let len = self.len - self.position;
         (len, Some(len))
     }
 }

 impl<'a> ExactSizeIterator for Decode<'a> {
     fn len(&self) -> usize {
         self.len - self.position
     }
 }

 /// Convert an Unicode `str` to Punycode.
 ///
 /// This is a convenience wrapper around `encode`.
 #[inline]
 pub fn encode_str(input: &str) -> Option<String> {
     let mut buf = String::with_capacity(input.len());
     encode_into(input.chars(), &mut buf).ok().map(|()| buf)
 }

 /// Convert Unicode to Punycode.
 ///
 /// Return None on overflow, which can only happen on inputs that would take more than
 /// 63 encoded bytes, the DNS limit on domain name labels.
 pub fn encode(input: &[char]) -> Option<String> {
     let mut buf = String::with_capacity(input.len());
     encode_into(input.iter().copied(), &mut buf)
         .ok()
         .map(|()| buf)
 }

 pub(crate) fn encode_into<I>(input: I, output: &mut String) -> Result<(), ()>
 where
     I: Iterator<Item = char> + Clone,
 {
     // Handle "basic" (ASCII) code points. They are encoded as-is.
     let (mut input_length, mut basic_length) = (0, 0);
     for c in input.clone() {
         input_length += 1;
         if c.is_ascii() {
             output.push(c);
             basic_length += 1;
         }
     }

     if basic_length > 0 {
         output.push('-')
     }
     let mut code_point = INITIAL_N;
     let mut delta = 0;
     let mut bias = INITIAL_BIAS;
     let mut processed = basic_length;
     while processed < input_length {
         // All code points < code_point have been handled already.
         // Find the next larger one.
         let min_code_point = input
             .clone()
             .map(|c| c as u32)
             .filter(|&c| c >= code_point)
             .min()
             .unwrap();
         if min_code_point - code_point > (u32::MAX - delta) / (processed + 1) {
             return Err(()); // Overflow
         }
         // Increase delta to advance the decoder’s <code_point,i> state to <min_code_point,0>
         delta += (min_code_point - code_point) * (processed + 1);
         code_point = min_code_point;
         for c in input.clone() {
             let c = c as u32;
             if c < code_point {
                 delta += 1;
                 if delta == 0 {
                     return Err(()); // Overflow
                 }
             }
             if c == code_point {
                 // Represent delta as a generalized variable-length integer:
                 let mut q = delta;
                 let mut k = BASE;
                 loop {
                     let t = if k <= bias {
                         T_MIN
                     } else if k >= bias + T_MAX {
                         T_MAX
                     } else {
                         k - bias
                     };
                     if q < t {
                         break;
                     }
                     let value = t + ((q - t) % (BASE - t));
                     output.push(value_to_digit(value));
                     q = (q - t) / (BASE - t);
                     k += BASE;
                 }
                 output.push(value_to_digit(q));
                 bias = adapt(delta, processed + 1, processed == basic_length);
                 delta = 0;
                 processed += 1;
             }
         }
         delta += 1;
         code_point += 1;
     }
     Ok(())
 }

 #[inline]
 fn value_to_digit(value: u32) -> char {
     match value {
         0..=25 => (value as u8 + b'a') as char,       // a..z
         26..=35 => (value as u8 - 26 + b'0') as char, // 0..9
         _ => panic!(),
     }
 }
	// Copyright 2013 The rust-url developers.
	//
	// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
	// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
	// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
	// option. This file may not be copied, modified, or distributed
	// except according to those terms.

	//! Punycode ([RFC 3492](http://tools.ietf.org/html/rfc3492)) implementation.
	//!
	//! Since Punycode fundamentally works on unicode code points,
	//! `encode` and `decode` take and return slices and vectors of `char`.
	//! `encode_str` and `decode_to_string` provide convenience wrappers
	//! that convert from and to Rust’s UTF-8 based `str` and `String` types.

	use std::char;
	use std::u32;

	// Bootstring parameters for Punycode
	static BASE: u32 = 36;
	static T_MIN: u32 = 1;
	static T_MAX: u32 = 26;
	static SKEW: u32 = 38;
	static DAMP: u32 = 700;
	static INITIAL_BIAS: u32 = 72;
	static INITIAL_N: u32 = 0x80;
	static DELIMITER: char = '-';

	#[inline]
	fn adapt(mut delta: u32, num_points: u32, first_time: bool) -> u32 {
	delta /= if first_time { DAMP } else { 2 };
	delta += delta / num_points;
	let mut k = 0;
	while delta > ((BASE - T_MIN) * T_MAX) / 2 {
	delta /= BASE - T_MIN;
	k += BASE;
	}
	k + (((BASE - T_MIN + 1) * delta) / (delta + SKEW))
	}

	/// Convert Punycode to an Unicode `String`.
	///
	/// This is a convenience wrapper around `decode`.
	#[inline]
	pub fn decode_to_string(input: &str) -> Option<String> {
	decode(input).map(\|chars\| chars.into_iter().collect())
	}

	/// Convert Punycode to Unicode.
	///
	/// Return None on malformed input or overflow.
	/// Overflow can only happen on inputs that take more than
	/// 63 encoded bytes, the DNS limit on domain name labels.
	pub fn decode(input: &str) -> Option<Vec<char>> {
	Some(Decoder::default().decode(input).ok()?.collect())
	}

	#[derive(Default)]
	pub(crate) struct Decoder {
	insertions: Vec<(usize, char)>,
	}

	impl Decoder {
	/// Split the input iterator and return a Vec with insertions of encoded characters
	pub(crate) fn decode<'a>(&'a mut self, input: &'a str) -> Result<Decode<'a>, ()> {
	self.insertions.clear();
	// Handle "basic" (ASCII) code points.
	// They are encoded as-is before the last delimiter, if any.
	let (base, input) = match input.rfind(DELIMITER) {
	None => ("", input),
	Some(position) => (
	&input[..position],
	if position > 0 {
	&input[position + 1..]
	} else {
	input
	},
	),
	};

	if !base.is_ascii() {
	return Err(());
	}

	let base_len = base.len();
	let mut length = base_len as u32;
	let mut code_point = INITIAL_N;
	let mut bias = INITIAL_BIAS;
	let mut i = 0;
	let mut iter = input.bytes();
	loop {
	let previous_i = i;
	let mut weight = 1;
	let mut k = BASE;
	let mut byte = match iter.next() {
	None => break,
	Some(byte) => byte,
	};

	// Decode a generalized variable-length integer into delta,
	// which gets added to i.
	loop {
	let digit = match byte {
	byte @ b'0'..=b'9' => byte - b'0' + 26,
	byte @ b'A'..=b'Z' => byte - b'A',
	byte @ b'a'..=b'z' => byte - b'a',
	_ => return Err(()),
	} as u32;
	if digit > (u32::MAX - i) / weight {
	return Err(()); // Overflow
	}
	i += digit * weight;
	let t = if k <= bias {
	T_MIN
	} else if k >= bias + T_MAX {
	T_MAX
	} else {
	k - bias
	};
	if digit < t {
	break;
	}
	if weight > u32::MAX / (BASE - t) {
	return Err(()); // Overflow
	}
	weight *= BASE - t;
	k += BASE;
	byte = match iter.next() {
	None => return Err(()), // End of input before the end of this delta
	Some(byte) => byte,
	};
	}

	bias = adapt(i - previous_i, length + 1, previous_i == 0);
	if i / (length + 1) > u32::MAX - code_point {
	return Err(()); // Overflow
	}

	// i was supposed to wrap around from length+1 to 0,
	// incrementing code_point each time.
	code_point += i / (length + 1);
	i %= length + 1;
	let c = match char::from_u32(code_point) {
	Some(c) => c,
	None => return Err(()),
	};

	// Move earlier insertions farther out in the string
	for (idx, _) in &mut self.insertions {
	if *idx >= i as usize {
	*idx += 1;
	}
	}
	self.insertions.push((i as usize, c));
	length += 1;
	i += 1;
	}

	self.insertions.sort_by_key(\|(i, _)\| *i);
	Ok(Decode {
	base: base.chars(),
	insertions: &self.insertions,
	inserted: 0,
	position: 0,
	len: base_len + self.insertions.len(),
	})
	}
	}

	pub(crate) struct Decode<'a> {
	base: std::str::Chars<'a>,
	pub(crate) insertions: &'a [(usize, char)],
	inserted: usize,
	position: usize,
	len: usize,
	}

	impl<'a> Iterator for Decode<'a> {
	type Item = char;

	fn next(&mut self) -> Option<Self::Item> {
	loop {
	match self.insertions.get(self.inserted) {
	Some((pos, c)) if *pos == self.position => {
	self.inserted += 1;
	self.position += 1;
	return Some(*c);
	}
	_ => {}
	}
	if let Some(c) = self.base.next() {
	self.position += 1;
	return Some(c);
	} else if self.inserted >= self.insertions.len() {
	return None;
	}
	}
	}

	fn size_hint(&self) -> (usize, Option<usize>) {
	let len = self.len - self.position;
	(len, Some(len))
	}
	}

	impl<'a> ExactSizeIterator for Decode<'a> {
	fn len(&self) -> usize {
	self.len - self.position
	}
	}

	/// Convert an Unicode `str` to Punycode.
	///
	/// This is a convenience wrapper around `encode`.
	#[inline]
	pub fn encode_str(input: &str) -> Option<String> {
	let mut buf = String::with_capacity(input.len());
	encode_into(input.chars(), &mut buf).ok().map(\|()\| buf)
	}

	/// Convert Unicode to Punycode.
	///
	/// Return None on overflow, which can only happen on inputs that would take more than
	/// 63 encoded bytes, the DNS limit on domain name labels.
	pub fn encode(input: &[char]) -> Option<String> {
	let mut buf = String::with_capacity(input.len());
	encode_into(input.iter().copied(), &mut buf)
	.ok()
	.map(\|()\| buf)
	}

	pub(crate) fn encode_into<I>(input: I, output: &mut String) -> Result<(), ()>
	where
	I: Iterator<Item = char> + Clone,
	{
	// Handle "basic" (ASCII) code points. They are encoded as-is.
	let (mut input_length, mut basic_length) = (0, 0);
	for c in input.clone() {
	input_length += 1;
	if c.is_ascii() {
	output.push(c);
	basic_length += 1;
	}
	}

	if basic_length > 0 {
	output.push('-')
	}
	let mut code_point = INITIAL_N;
	let mut delta = 0;
	let mut bias = INITIAL_BIAS;
	let mut processed = basic_length;
	while processed < input_length {
	// All code points < code_point have been handled already.
	// Find the next larger one.
	let min_code_point = input
	.clone()
	.map(\|c\| c as u32)
	.filter(\|&c\| c >= code_point)
	.min()
	.unwrap();
	if min_code_point - code_point > (u32::MAX - delta) / (processed + 1) {
	return Err(()); // Overflow
	}
	// Increase delta to advance the decoder’s <code_point,i> state to <min_code_point,0>
	delta += (min_code_point - code_point) * (processed + 1);
	code_point = min_code_point;
	for c in input.clone() {
	let c = c as u32;
	if c < code_point {
	delta += 1;
	if delta == 0 {
	return Err(()); // Overflow
	}
	}
	if c == code_point {
	// Represent delta as a generalized variable-length integer:
	let mut q = delta;
	let mut k = BASE;
	loop {
	let t = if k <= bias {
	T_MIN
	} else if k >= bias + T_MAX {
	T_MAX
	} else {
	k - bias
	};
	if q < t {
	break;
	}
	let value = t + ((q - t) % (BASE - t));
	output.push(value_to_digit(value));
	q = (q - t) / (BASE - t);
	k += BASE;
	}
	output.push(value_to_digit(q));
	bias = adapt(delta, processed + 1, processed == basic_length);
	delta = 0;
	processed += 1;
	}
	}
	delta += 1;
	code_point += 1;
	}
	Ok(())
	}

	#[inline]
	fn value_to_digit(value: u32) -> char {
	match value {
	0..=25 => (value as u8 + b'a') as char, // a..z
	26..=35 => (value as u8 - 26 + b'0') as char, // 0..9
	_ => panic!(),
	}
	}