src/escape.rs - platform/external/rust/crates/litrs - Git at Google

 use crate::{ParseError, err::{perr, ParseErrorKind::*}, parse::hex_digit_value};


 /// Must start with `\`
 pub(crate) fn unescape<E: Escapee>(input: &str, offset: usize) -> Result<(E, usize), ParseError> {
     let first = input.as_bytes().get(1)
         .ok_or(perr(offset, UnterminatedEscape))?;
     let out = match first {
         // Quote escapes
         b'\'' => (E::from_byte(b'\''), 2),
         b'"' => (E::from_byte(b'"'), 2),

         // Ascii escapes
         b'n' => (E::from_byte(b'\n'), 2),
         b'r' => (E::from_byte(b'\r'), 2),
         b't' => (E::from_byte(b'\t'), 2),
         b'\\' => (E::from_byte(b'\\'), 2),
         b'0' => (E::from_byte(b'\0'), 2),
         b'x' => {
             let hex_string = input.get(2..4)
                 .ok_or(perr(offset..offset + input.len(), UnterminatedEscape))?
                 .as_bytes();
             let first = hex_digit_value(hex_string[0])
                 .ok_or(perr(offset..offset + 4, InvalidXEscape))?;
             let second = hex_digit_value(hex_string[1])
                 .ok_or(perr(offset..offset + 4, InvalidXEscape))?;
             let value = second + 16 * first;

             if E::SUPPORTS_UNICODE && value > 0x7F {
                 return Err(perr(offset..offset + 4, NonAsciiXEscape));
             }

             (E::from_byte(value), 4)
         },

         // Unicode escape
         b'u' => {
             if !E::SUPPORTS_UNICODE {
                 return Err(perr(offset..offset + 2, UnicodeEscapeInByteLiteral));
             }

             if input.as_bytes().get(2) != Some(&b'{') {
                 return Err(perr(offset..offset + 2, UnicodeEscapeWithoutBrace));
             }

             let closing_pos = input.bytes().position(|b| b == b'}')
                 .ok_or(perr(offset..offset + input.len(), UnterminatedUnicodeEscape))?;

             let inner = &input[3..closing_pos];
             if inner.as_bytes().first() == Some(&b'_') {
                 return Err(perr(4, InvalidStartOfUnicodeEscape));
             }

             let mut v: u32 = 0;
             let mut digit_count = 0;
             for (i, b) in inner.bytes().enumerate() {
                 if b == b'_'{
                     continue;
                 }

                 let digit = hex_digit_value(b)
                     .ok_or(perr(offset + 3 + i, NonHexDigitInUnicodeEscape))?;

                 if digit_count == 6 {
                     return Err(perr(offset + 3 + i, TooManyDigitInUnicodeEscape));
                 }
                 digit_count += 1;
                 v = 16 * v + digit as u32;
             }

             let c = std::char::from_u32(v)
                 .ok_or(perr(offset..closing_pos + 1, InvalidUnicodeEscapeChar))?;

             (E::from_char(c), closing_pos + 1)
         }

         _ => return Err(perr(offset..offset + 2, UnknownEscape)),
     };

     Ok(out)
 }

 pub(crate) trait Escapee: Into<char> {
     const SUPPORTS_UNICODE: bool;
     fn from_byte(b: u8) -> Self;
     fn from_char(c: char) -> Self;
 }

 impl Escapee for u8 {
     const SUPPORTS_UNICODE: bool = false;
     fn from_byte(b: u8) -> Self {
         b
     }
     fn from_char(_: char) -> Self {
         panic!("bug: `<u8 as Escapee>::from_char` was called");
     }
 }

 impl Escapee for char {
     const SUPPORTS_UNICODE: bool = true;
     fn from_byte(b: u8) -> Self {
         b.into()
     }
     fn from_char(c: char) -> Self {
         c
     }
 }

 /// Checks whether the character is skipped after a string continue start
 /// (unescaped backlash followed by `\n`).
 fn is_string_continue_skipable_whitespace(b: u8) -> bool {
     b == b' ' || b == b'\t' || b == b'\n' || b == b'\r'
 }

 /// Unescapes a whole string or byte string.
 #[inline(never)]
 pub(crate) fn unescape_string<E: Escapee>(
     input: &str,
     offset: usize,
 ) -> Result<Option<String>, ParseError> {
     let mut i = offset;
     let mut end_last_escape = offset;
     let mut value = String::new();
     while i < input.len() - 1 {
         match input.as_bytes()[i] {
             // Handle "string continue".
             b'\\' if input.as_bytes()[i + 1] == b'\n' => {
                 value.push_str(&input[end_last_escape..i]);

                 // Find the first non-whitespace character.
                 let end_escape = input[i + 2..].bytes()
                     .position(|b| !is_string_continue_skipable_whitespace(b))
                     .ok_or(perr(None, UnterminatedString))?;

                 i += 2 + end_escape;
                 end_last_escape = i;
             }
             b'\\' => {
                 let (c, len) = unescape::<E>(&input[i..input.len() - 1], i)?;
                 value.push_str(&input[end_last_escape..i]);
                 value.push(c.into());
                 i += len;
                 end_last_escape = i;
             }
             b'\r' => {
                 if input.as_bytes()[i + 1] == b'\n' {
                     value.push_str(&input[end_last_escape..i]);
                     value.push('\n');
                     i += 2;
                     end_last_escape = i;
                 } else {
                     return Err(perr(i, IsolatedCr))
                 }
             }
             b'"' => return Err(perr(i + 1..input.len(), UnexpectedChar)),
             b if !E::SUPPORTS_UNICODE && !b.is_ascii()
                 => return Err(perr(i, NonAsciiInByteLiteral)),
             _ => i += 1,
         }
     }

     if input.as_bytes()[input.len() - 1] != b'"' || input.len() == offset {
         return Err(perr(None, UnterminatedString));
     }

     // `value` is only empty if there was no escape in the input string
     // (with the special case of the input being empty). This means the
     // string value basically equals the input, so we store `None`.
     let value = if value.is_empty() {
         None
     } else {
         // There was an escape in the string, so we need to push the
         // remaining unescaped part of the string still.
         value.push_str(&input[end_last_escape..input.len() - 1]);
         Some(value)
     };

     Ok(value)
 }

 /// Reads and checks a raw (byte) string literal, converting `\r\n` sequences to
 /// just `\n` sequences. Returns an optional new string (if the input contained
 /// any `\r\n`) and the number of hashes used by the literal.
 #[inline(never)]
 pub(crate) fn scan_raw_string<E: Escapee>(
     input: &str,
     offset: usize,
 ) -> Result<(Option<String>, u32), ParseError> {
     // Raw string literal
     let num_hashes = input[offset..].bytes().position(|b| b != b'#')
         .ok_or(perr(None, InvalidLiteral))?;

     if input.as_bytes().get(offset + num_hashes) != Some(&b'"') {
         return Err(perr(None, InvalidLiteral));
     }
     let start_inner = offset + num_hashes + 1;
     let hashes = &input[offset..num_hashes + offset];

     let mut closing_quote_pos = None;
     let mut i = start_inner;
     let mut end_last_escape = start_inner;
     let mut value = String::new();
     while i < input.len() {
         let b = input.as_bytes()[i];
         if b == b'"' && input[i + 1..].starts_with(hashes) {
             closing_quote_pos = Some(i);
             break;
         }

         if b == b'\r' {
             // Convert `\r\n` into `\n`. This is currently not well documented
             // in the Rust reference, but is done even for raw strings. That's
             // because rustc simply converts all line endings when reading
             // source files.
             if input.as_bytes().get(i + 1) == Some(&b'\n') {
                 value.push_str(&input[end_last_escape..i]);
                 value.push('\n');
                 i += 2;
                 end_last_escape = i;
                 continue;
             } else if E::SUPPORTS_UNICODE {
                 // If no \n follows the \r and we are scanning a raw string
                 // (not raw byte string), we error.
                 return Err(perr(i, IsolatedCr))
             }
         }

         if !E::SUPPORTS_UNICODE {
             if !b.is_ascii() {
                 return Err(perr(i, NonAsciiInByteLiteral));
             }
         }

         i += 1;
     }

     let closing_quote_pos = closing_quote_pos
         .ok_or(perr(None, UnterminatedRawString))?;

     if closing_quote_pos + num_hashes != input.len() - 1 {
         return Err(perr(closing_quote_pos + num_hashes + 1..input.len(), UnexpectedChar));
     }

     // `value` is only empty if there was no \r\n in the input string (with the
     // special case of the input being empty). This means the string value
     // equals the input, so we store `None`.
     let value = if value.is_empty() {
         None
     } else {
         // There was an \r\n in the string, so we need to push the remaining
         // unescaped part of the string still.
         value.push_str(&input[end_last_escape..closing_quote_pos]);
         Some(value)
     };

     Ok((value, num_hashes as u32))
 }
	use crate::{ParseError, err::{perr, ParseErrorKind::*}, parse::hex_digit_value};


	/// Must start with `\`
	pub(crate) fn unescape<E: Escapee>(input: &str, offset: usize) -> Result<(E, usize), ParseError> {
	let first = input.as_bytes().get(1)
	.ok_or(perr(offset, UnterminatedEscape))?;
	let out = match first {
	// Quote escapes
	b'\'' => (E::from_byte(b'\''), 2),
	b'"' => (E::from_byte(b'"'), 2),

	// Ascii escapes
	b'n' => (E::from_byte(b'\n'), 2),
	b'r' => (E::from_byte(b'\r'), 2),
	b't' => (E::from_byte(b'\t'), 2),
	b'\\' => (E::from_byte(b'\\'), 2),
	b'0' => (E::from_byte(b'\0'), 2),
	b'x' => {
	let hex_string = input.get(2..4)
	.ok_or(perr(offset..offset + input.len(), UnterminatedEscape))?
	.as_bytes();
	let first = hex_digit_value(hex_string[0])
	.ok_or(perr(offset..offset + 4, InvalidXEscape))?;
	let second = hex_digit_value(hex_string[1])
	.ok_or(perr(offset..offset + 4, InvalidXEscape))?;
	let value = second + 16 * first;

	if E::SUPPORTS_UNICODE && value > 0x7F {
	return Err(perr(offset..offset + 4, NonAsciiXEscape));
	}

	(E::from_byte(value), 4)
	},

	// Unicode escape
	b'u' => {
	if !E::SUPPORTS_UNICODE {
	return Err(perr(offset..offset + 2, UnicodeEscapeInByteLiteral));
	}

	if input.as_bytes().get(2) != Some(&b'{') {
	return Err(perr(offset..offset + 2, UnicodeEscapeWithoutBrace));
	}

	let closing_pos = input.bytes().position(\|b\| b == b'}')
	.ok_or(perr(offset..offset + input.len(), UnterminatedUnicodeEscape))?;

	let inner = &input[3..closing_pos];
	if inner.as_bytes().first() == Some(&b'_') {
	return Err(perr(4, InvalidStartOfUnicodeEscape));
	}

	let mut v: u32 = 0;
	let mut digit_count = 0;
	for (i, b) in inner.bytes().enumerate() {
	if b == b'_'{
	continue;
	}

	let digit = hex_digit_value(b)
	.ok_or(perr(offset + 3 + i, NonHexDigitInUnicodeEscape))?;

	if digit_count == 6 {
	return Err(perr(offset + 3 + i, TooManyDigitInUnicodeEscape));
	}
	digit_count += 1;
	v = 16 * v + digit as u32;
	}

	let c = std::char::from_u32(v)
	.ok_or(perr(offset..closing_pos + 1, InvalidUnicodeEscapeChar))?;

	(E::from_char(c), closing_pos + 1)
	}

	_ => return Err(perr(offset..offset + 2, UnknownEscape)),
	};

	Ok(out)
	}

	pub(crate) trait Escapee: Into<char> {
	const SUPPORTS_UNICODE: bool;
	fn from_byte(b: u8) -> Self;
	fn from_char(c: char) -> Self;
	}

	impl Escapee for u8 {
	const SUPPORTS_UNICODE: bool = false;
	fn from_byte(b: u8) -> Self {
	b
	}
	fn from_char(_: char) -> Self {
	panic!("bug: `<u8 as Escapee>::from_char` was called");
	}
	}

	impl Escapee for char {
	const SUPPORTS_UNICODE: bool = true;
	fn from_byte(b: u8) -> Self {
	b.into()
	}
	fn from_char(c: char) -> Self {
	c
	}
	}

	/// Checks whether the character is skipped after a string continue start
	/// (unescaped backlash followed by `\n`).
	fn is_string_continue_skipable_whitespace(b: u8) -> bool {
	b == b' ' \|\| b == b'\t' \|\| b == b'\n' \|\| b == b'\r'
	}

	/// Unescapes a whole string or byte string.
	#[inline(never)]
	pub(crate) fn unescape_string<E: Escapee>(
	input: &str,
	offset: usize,
	) -> Result<Option<String>, ParseError> {
	let mut i = offset;
	let mut end_last_escape = offset;
	let mut value = String::new();
	while i < input.len() - 1 {
	match input.as_bytes()[i] {
	// Handle "string continue".
	b'\\' if input.as_bytes()[i + 1] == b'\n' => {
	value.push_str(&input[end_last_escape..i]);

	// Find the first non-whitespace character.
	let end_escape = input[i + 2..].bytes()
	.position(\|b\| !is_string_continue_skipable_whitespace(b))
	.ok_or(perr(None, UnterminatedString))?;

	i += 2 + end_escape;
	end_last_escape = i;
	}
	b'\\' => {
	let (c, len) = unescape::<E>(&input[i..input.len() - 1], i)?;
	value.push_str(&input[end_last_escape..i]);
	value.push(c.into());
	i += len;
	end_last_escape = i;
	}
	b'\r' => {
	if input.as_bytes()[i + 1] == b'\n' {
	value.push_str(&input[end_last_escape..i]);
	value.push('\n');
	i += 2;
	end_last_escape = i;
	} else {
	return Err(perr(i, IsolatedCr))
	}
	}
	b'"' => return Err(perr(i + 1..input.len(), UnexpectedChar)),
	b if !E::SUPPORTS_UNICODE && !b.is_ascii()
	=> return Err(perr(i, NonAsciiInByteLiteral)),
	_ => i += 1,
	}
	}

	if input.as_bytes()[input.len() - 1] != b'"' \|\| input.len() == offset {
	return Err(perr(None, UnterminatedString));
	}

	// `value` is only empty if there was no escape in the input string
	// (with the special case of the input being empty). This means the
	// string value basically equals the input, so we store `None`.
	let value = if value.is_empty() {
	None
	} else {
	// There was an escape in the string, so we need to push the
	// remaining unescaped part of the string still.
	value.push_str(&input[end_last_escape..input.len() - 1]);
	Some(value)
	};

	Ok(value)
	}

	/// Reads and checks a raw (byte) string literal, converting `\r\n` sequences to
	/// just `\n` sequences. Returns an optional new string (if the input contained
	/// any `\r\n`) and the number of hashes used by the literal.
	#[inline(never)]
	pub(crate) fn scan_raw_string<E: Escapee>(
	input: &str,
	offset: usize,
	) -> Result<(Option<String>, u32), ParseError> {
	// Raw string literal
	let num_hashes = input[offset..].bytes().position(\|b\| b != b'#')
	.ok_or(perr(None, InvalidLiteral))?;

	if input.as_bytes().get(offset + num_hashes) != Some(&b'"') {
	return Err(perr(None, InvalidLiteral));
	}
	let start_inner = offset + num_hashes + 1;
	let hashes = &input[offset..num_hashes + offset];

	let mut closing_quote_pos = None;
	let mut i = start_inner;
	let mut end_last_escape = start_inner;
	let mut value = String::new();
	while i < input.len() {
	let b = input.as_bytes()[i];
	if b == b'"' && input[i + 1..].starts_with(hashes) {
	closing_quote_pos = Some(i);
	break;
	}

	if b == b'\r' {
	// Convert `\r\n` into `\n`. This is currently not well documented
	// in the Rust reference, but is done even for raw strings. That's
	// because rustc simply converts all line endings when reading
	// source files.
	if input.as_bytes().get(i + 1) == Some(&b'\n') {
	value.push_str(&input[end_last_escape..i]);
	value.push('\n');
	i += 2;
	end_last_escape = i;
	continue;
	} else if E::SUPPORTS_UNICODE {
	// If no \n follows the \r and we are scanning a raw string
	// (not raw byte string), we error.
	return Err(perr(i, IsolatedCr))
	}
	}

	if !E::SUPPORTS_UNICODE {
	if !b.is_ascii() {
	return Err(perr(i, NonAsciiInByteLiteral));
	}
	}

	i += 1;
	}

	let closing_quote_pos = closing_quote_pos
	.ok_or(perr(None, UnterminatedRawString))?;

	if closing_quote_pos + num_hashes != input.len() - 1 {
	return Err(perr(closing_quote_pos + num_hashes + 1..input.len(), UnexpectedChar));
	}

	// `value` is only empty if there was no \r\n in the input string (with the
	// special case of the input being empty). This means the string value
	// equals the input, so we store `None`.
	let value = if value.is_empty() {
	None
	} else {
	// There was an \r\n in the string, so we need to push the remaining
	// unescaped part of the string still.
	value.push_str(&input[end_last_escape..closing_quote_pos]);
	Some(value)
	};

	Ok((value, num_hashes as u32))
	}