Blame - src/escape.rs - platform/external/rust/crates/litrs

blob: 19b63a106f967243daa7d64f4131492a4a613c44 [file] [log] [blame]

Frederick Mayle	7bad3ce	2022-10-27 16:29:18 -0700	[diff] [blame]	1	use crate::{ParseError, err::{perr, ParseErrorKind::*}, parse::hex_digit_value};
				2
				3
				4	/// Must start with `\`
				5	pub(crate) fn unescape<E: Escapee>(input: &str, offset: usize) -> Result<(E, usize), ParseError> {
				6	let first = input.as_bytes().get(1)
				7	.ok_or(perr(offset, UnterminatedEscape))?;
				8	let out = match first {
				9	// Quote escapes
				10	b'\'' => (E::from_byte(b'\''), 2),
				11	b'"' => (E::from_byte(b'"'), 2),
				12
				13	// Ascii escapes
				14	b'n' => (E::from_byte(b'\n'), 2),
				15	b'r' => (E::from_byte(b'\r'), 2),
				16	b't' => (E::from_byte(b'\t'), 2),
				17	b'\\' => (E::from_byte(b'\\'), 2),
				18	b'0' => (E::from_byte(b'\0'), 2),
				19	b'x' => {
				20	let hex_string = input.get(2..4)
				21	.ok_or(perr(offset..offset + input.len(), UnterminatedEscape))?
				22	.as_bytes();
				23	let first = hex_digit_value(hex_string[0])
				24	.ok_or(perr(offset..offset + 4, InvalidXEscape))?;
				25	let second = hex_digit_value(hex_string[1])
				26	.ok_or(perr(offset..offset + 4, InvalidXEscape))?;
				27	let value = second + 16 * first;
				28
				29	if E::SUPPORTS_UNICODE && value > 0x7F {
				30	return Err(perr(offset..offset + 4, NonAsciiXEscape));
				31	}
				32
				33	(E::from_byte(value), 4)
				34	},
				35
				36	// Unicode escape
				37	b'u' => {
				38	if !E::SUPPORTS_UNICODE {
				39	return Err(perr(offset..offset + 2, UnicodeEscapeInByteLiteral));
				40	}
				41
				42	if input.as_bytes().get(2) != Some(&b'{') {
				43	return Err(perr(offset..offset + 2, UnicodeEscapeWithoutBrace));
				44	}
				45
				46	let closing_pos = input.bytes().position(\|b\| b == b'}')
				47	.ok_or(perr(offset..offset + input.len(), UnterminatedUnicodeEscape))?;
				48
				49	let inner = &input[3..closing_pos];
				50	if inner.as_bytes().first() == Some(&b'_') {
				51	return Err(perr(4, InvalidStartOfUnicodeEscape));
				52	}
				53
				54	let mut v: u32 = 0;
				55	let mut digit_count = 0;
				56	for (i, b) in inner.bytes().enumerate() {
				57	if b == b'_'{
				58	continue;
				59	}
				60
				61	let digit = hex_digit_value(b)
				62	.ok_or(perr(offset + 3 + i, NonHexDigitInUnicodeEscape))?;
				63
				64	if digit_count == 6 {
				65	return Err(perr(offset + 3 + i, TooManyDigitInUnicodeEscape));
				66	}
				67	digit_count += 1;
				68	v = 16 * v + digit as u32;
				69	}
				70
				71	let c = std::char::from_u32(v)
				72	.ok_or(perr(offset..closing_pos + 1, InvalidUnicodeEscapeChar))?;
				73
				74	(E::from_char(c), closing_pos + 1)
				75	}
				76
				77	_ => return Err(perr(offset..offset + 2, UnknownEscape)),
				78	};
				79
				80	Ok(out)
				81	}
				82
				83	pub(crate) trait Escapee: Into<char> {
				84	const SUPPORTS_UNICODE: bool;
				85	fn from_byte(b: u8) -> Self;
				86	fn from_char(c: char) -> Self;
				87	}
				88
				89	impl Escapee for u8 {
				90	const SUPPORTS_UNICODE: bool = false;
				91	fn from_byte(b: u8) -> Self {
				92	b
				93	}
				94	fn from_char(_: char) -> Self {
				95	panic!("bug: `<u8 as Escapee>::from_char` was called");
				96	}
				97	}
				98
				99	impl Escapee for char {
				100	const SUPPORTS_UNICODE: bool = true;
				101	fn from_byte(b: u8) -> Self {
				102	b.into()
				103	}
				104	fn from_char(c: char) -> Self {
				105	c
				106	}
				107	}
				108
				109	/// Checks whether the character is skipped after a string continue start
				110	/// (unescaped backlash followed by `\n`).
Jeff Vander Stoep	d362f28	2023-02-03 10:26:47 +0100	[diff] [blame]	111	fn is_string_continue_skipable_whitespace(b: u8) -> bool {
Frederick Mayle	7bad3ce	2022-10-27 16:29:18 -0700	[diff] [blame]	112	b == b' ' \|\| b == b'\t' \|\| b == b'\n' \|\| b == b'\r'
				113	}
				114
				115	/// Unescapes a whole string or byte string.
Jeff Vander Stoep	d362f28	2023-02-03 10:26:47 +0100	[diff] [blame]	116	#[inline(never)]
Frederick Mayle	7bad3ce	2022-10-27 16:29:18 -0700	[diff] [blame]	117	pub(crate) fn unescape_string<E: Escapee>(
				118	input: &str,
				119	offset: usize,
				120	) -> Result<Option<String>, ParseError> {
				121	let mut i = offset;
				122	let mut end_last_escape = offset;
				123	let mut value = String::new();
				124	while i < input.len() - 1 {
				125	match input.as_bytes()[i] {
				126	// Handle "string continue".
				127	b'\\' if input.as_bytes()[i + 1] == b'\n' => {
				128	value.push_str(&input[end_last_escape..i]);
				129
				130	// Find the first non-whitespace character.
				131	let end_escape = input[i + 2..].bytes()
				132	.position(\|b\| !is_string_continue_skipable_whitespace(b))
				133	.ok_or(perr(None, UnterminatedString))?;
				134
				135	i += 2 + end_escape;
				136	end_last_escape = i;
				137	}
				138	b'\\' => {
				139	let (c, len) = unescape::<E>(&input[i..input.len() - 1], i)?;
				140	value.push_str(&input[end_last_escape..i]);
				141	value.push(c.into());
				142	i += len;
				143	end_last_escape = i;
				144	}
				145	b'\r' => {
				146	if input.as_bytes()[i + 1] == b'\n' {
				147	value.push_str(&input[end_last_escape..i]);
				148	value.push('\n');
				149	i += 2;
				150	end_last_escape = i;
				151	} else {
				152	return Err(perr(i, IsolatedCr))
				153	}
				154	}
				155	b'"' => return Err(perr(i + 1..input.len(), UnexpectedChar)),
				156	b if !E::SUPPORTS_UNICODE && !b.is_ascii()
				157	=> return Err(perr(i, NonAsciiInByteLiteral)),
				158	_ => i += 1,
				159	}
				160	}
				161
				162	if input.as_bytes()[input.len() - 1] != b'"' \|\| input.len() == offset {
				163	return Err(perr(None, UnterminatedString));
				164	}
				165
				166	// `value` is only empty if there was no escape in the input string
				167	// (with the special case of the input being empty). This means the
				168	// string value basically equals the input, so we store `None`.
				169	let value = if value.is_empty() {
				170	None
				171	} else {
				172	// There was an escape in the string, so we need to push the
				173	// remaining unescaped part of the string still.
				174	value.push_str(&input[end_last_escape..input.len() - 1]);
				175	Some(value)
				176	};
				177
				178	Ok(value)
				179	}
				180
				181	/// Reads and checks a raw (byte) string literal, converting `\r\n` sequences to
				182	/// just `\n` sequences. Returns an optional new string (if the input contained
				183	/// any `\r\n`) and the number of hashes used by the literal.
Jeff Vander Stoep	d362f28	2023-02-03 10:26:47 +0100	[diff] [blame]	184	#[inline(never)]
Frederick Mayle	7bad3ce	2022-10-27 16:29:18 -0700	[diff] [blame]	185	pub(crate) fn scan_raw_string<E: Escapee>(
				186	input: &str,
				187	offset: usize,
				188	) -> Result<(Option<String>, u32), ParseError> {
				189	// Raw string literal
				190	let num_hashes = input[offset..].bytes().position(\|b\| b != b'#')
				191	.ok_or(perr(None, InvalidLiteral))?;
				192
				193	if input.as_bytes().get(offset + num_hashes) != Some(&b'"') {
				194	return Err(perr(None, InvalidLiteral));
				195	}
				196	let start_inner = offset + num_hashes + 1;
				197	let hashes = &input[offset..num_hashes + offset];
				198
				199	let mut closing_quote_pos = None;
				200	let mut i = start_inner;
				201	let mut end_last_escape = start_inner;
				202	let mut value = String::new();
				203	while i < input.len() {
				204	let b = input.as_bytes()[i];
				205	if b == b'"' && input[i + 1..].starts_with(hashes) {
				206	closing_quote_pos = Some(i);
				207	break;
				208	}
				209
				210	if b == b'\r' {
				211	// Convert `\r\n` into `\n`. This is currently not well documented
				212	// in the Rust reference, but is done even for raw strings. That's
				213	// because rustc simply converts all line endings when reading
				214	// source files.
				215	if input.as_bytes().get(i + 1) == Some(&b'\n') {
				216	value.push_str(&input[end_last_escape..i]);
				217	value.push('\n');
				218	i += 2;
				219	end_last_escape = i;
				220	continue;
				221	} else if E::SUPPORTS_UNICODE {
				222	// If no \n follows the \r and we are scanning a raw string
				223	// (not raw byte string), we error.
				224	return Err(perr(i, IsolatedCr))
				225	}
				226	}
				227
				228	if !E::SUPPORTS_UNICODE {
				229	if !b.is_ascii() {
				230	return Err(perr(i, NonAsciiInByteLiteral));
				231	}
				232	}
				233
				234	i += 1;
				235	}
				236
				237	let closing_quote_pos = closing_quote_pos
				238	.ok_or(perr(None, UnterminatedRawString))?;
				239
				240	if closing_quote_pos + num_hashes != input.len() - 1 {
				241	return Err(perr(closing_quote_pos + num_hashes + 1..input.len(), UnexpectedChar));
				242	}
				243
				244	// `value` is only empty if there was no \r\n in the input string (with the
				245	// special case of the input being empty). This means the string value
				246	// equals the input, so we store `None`.
				247	let value = if value.is_empty() {
				248	None
				249	} else {
				250	// There was an \r\n in the string, so we need to push the remaining
				251	// unescaped part of the string still.
				252	value.push_str(&input[end_last_escape..closing_quote_pos]);
				253	Some(value)
				254	};
				255
				256	Ok((value, num_hashes as u32))
				257	}