blob: c17a15892713a011f5a70c7c51bb800933029b07 [file] [log] [blame] [edit]
use std::{char, str};
use std::num::ParseIntError;
use synom::IResult;
pub fn cooked_string(input: &str) -> IResult<&str, String> {
let mut s = String::new();
let mut chars = input.char_indices().peekable();
while let Some((byte_offset, ch)) = chars.next() {
match ch {
'"' => {
return IResult::Done(&input[byte_offset..], s);
}
'\r' => {
if let Some((_, '\n')) = chars.next() {
s.push('\n');
} else {
break;
}
}
'\\' => {
match chars.next() {
Some((_, 'x')) => {
match backslash_x_char(&mut chars) {
Some(ch) => s.push(ch),
None => break,
}
}
Some((_, 'n')) => s.push('\n'),
Some((_, 'r')) => s.push('\r'),
Some((_, 't')) => s.push('\t'),
Some((_, '\\')) => s.push('\\'),
Some((_, '0')) => s.push('\0'),
Some((_, 'u')) => {
match backslash_u(&mut chars) {
Some(ch) => s.push(ch),
None => break,
}
}
Some((_, '\'')) => s.push('\''),
Some((_, '"')) => s.push('"'),
Some((_, '\n')) | Some((_, '\r')) => {
while let Some(&(_, ch)) = chars.peek() {
if ch.is_whitespace() {
chars.next();
} else {
break;
}
}
}
_ => break,
}
}
ch => {
s.push(ch);
}
}
}
IResult::Error
}
pub fn cooked_byte_string(mut input: &str) -> IResult<&str, Vec<u8>> {
let mut vec = Vec::new();
let mut bytes = input.bytes().enumerate();
'outer: while let Some((offset, b)) = bytes.next() {
match b {
b'"' => {
return IResult::Done(&input[offset..], vec);
}
b'\r' => {
if let Some((_, b'\n')) = bytes.next() {
vec.push(b'\n');
} else {
break;
}
}
b'\\' => {
match bytes.next() {
Some((_, b'x')) => {
match backslash_x_byte(&mut bytes) {
Some(b) => vec.push(b),
None => break,
}
}
Some((_, b'n')) => vec.push(b'\n'),
Some((_, b'r')) => vec.push(b'\r'),
Some((_, b't')) => vec.push(b'\t'),
Some((_, b'\\')) => vec.push(b'\\'),
Some((_, b'0')) => vec.push(b'\0'),
Some((_, b'\'')) => vec.push(b'\''),
Some((_, b'"')) => vec.push(b'"'),
Some((newline, b'\n')) |
Some((newline, b'\r')) => {
let rest = &input[newline + 1..];
for (offset, ch) in rest.char_indices() {
if !ch.is_whitespace() {
input = &rest[offset..];
bytes = input.bytes().enumerate();
continue 'outer;
}
}
break;
}
_ => break,
}
}
b if b < 0x80 => {
vec.push(b);
}
_ => break,
}
}
IResult::Error
}
pub fn cooked_char(input: &str) -> IResult<&str, char> {
let mut chars = input.char_indices();
let ch = match chars.next().map(|(_, ch)| ch) {
Some('\\') => {
match chars.next().map(|(_, ch)| ch) {
Some('x') => backslash_x_char(&mut chars),
Some('n') => Some('\n'),
Some('r') => Some('\r'),
Some('t') => Some('\t'),
Some('\\') => Some('\\'),
Some('0') => Some('\0'),
Some('u') => backslash_u(&mut chars),
Some('\'') => Some('\''),
Some('"') => Some('"'),
_ => None,
}
}
ch => ch,
};
match ch {
Some(ch) => IResult::Done(chars.as_str(), ch),
None => IResult::Error,
}
}
pub fn cooked_byte(input: &str) -> IResult<&str, u8> {
let mut bytes = input.bytes().enumerate();
let b = match bytes.next().map(|(_, b)| b) {
Some(b'\\') => {
match bytes.next().map(|(_, b)| b) {
Some(b'x') => backslash_x_byte(&mut bytes),
Some(b'n') => Some(b'\n'),
Some(b'r') => Some(b'\r'),
Some(b't') => Some(b'\t'),
Some(b'\\') => Some(b'\\'),
Some(b'0') => Some(b'\0'),
Some(b'\'') => Some(b'\''),
Some(b'"') => Some(b'"'),
_ => None,
}
}
b => b,
};
match b {
Some(b) => {
match bytes.next() {
Some((offset, _)) => IResult::Done(&input[offset..], b),
None => IResult::Done("", b),
}
}
None => IResult::Error,
}
}
pub fn raw_string(input: &str) -> IResult<&str, (String, usize)> {
let mut chars = input.char_indices();
let mut n = 0;
while let Some((byte_offset, ch)) = chars.next() {
match ch {
'"' => {
n = byte_offset;
break;
}
'#' => {}
_ => return IResult::Error,
}
}
let mut s = String::new();
for (byte_offset, ch) in chars {
match ch {
'"' if input[byte_offset + 1..].starts_with(&input[..n]) => {
let rest = &input[byte_offset + 1 + n..];
return IResult::Done(rest, (s, n));
}
'\r' => {}
_ => s.push(ch),
}
}
IResult::Error
}
macro_rules! next_ch {
($chars:ident @ $pat:pat $(| $rest:pat)*) => {
match $chars.next() {
Some((_, ch)) => match ch {
$pat $(| $rest)* => ch,
_ => return None,
},
None => return None,
}
};
}
trait FromStrRadix: Sized {
fn from_str_radix(src: &str, radix: u32) -> Result<Self, ParseIntError>;
}
impl FromStrRadix for u8 {
fn from_str_radix(src: &str, radix: u32) -> Result<Self, ParseIntError> {
u8::from_str_radix(src, radix)
}
}
impl FromStrRadix for u32 {
fn from_str_radix(src: &str, radix: u32) -> Result<Self, ParseIntError> {
u32::from_str_radix(src, radix)
}
}
macro_rules! from_hex {
($($ch:ident)+) => {{
let hex_bytes = &[$($ch as u8),*];
let hex_str = str::from_utf8(hex_bytes).unwrap();
FromStrRadix::from_str_radix(hex_str, 16).unwrap()
}};
}
#[cfg_attr(feature = "cargo-clippy", allow(diverging_sub_expression))]
fn backslash_x_char<I>(chars: &mut I) -> Option<char>
where I: Iterator<Item = (usize, char)>
{
let a = next_ch!(chars @ '0'...'7');
let b = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
char::from_u32(from_hex!(a b))
}
#[cfg_attr(feature = "cargo-clippy", allow(diverging_sub_expression))]
fn backslash_x_byte<I>(chars: &mut I) -> Option<u8>
where I: Iterator<Item = (usize, u8)>
{
let a = next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
let b = next_ch!(chars @ b'0'...b'9' | b'a'...b'f' | b'A'...b'F');
Some(from_hex!(a b))
}
#[cfg_attr(feature = "cargo-clippy", allow(diverging_sub_expression, many_single_char_names))]
fn backslash_u<I>(chars: &mut I) -> Option<char>
where I: Iterator<Item = (usize, char)>
{
next_ch!(chars @ '{');
let a = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F');
let b = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
if b == '}' {
return char::from_u32(from_hex!(a));
}
let c = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
if c == '}' {
return char::from_u32(from_hex!(a b));
}
let d = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
if d == '}' {
return char::from_u32(from_hex!(a b c));
}
let e = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
if e == '}' {
return char::from_u32(from_hex!(a b c d));
}
let f = next_ch!(chars @ '0'...'9' | 'a'...'f' | 'A'...'F' | '}');
if f == '}' {
return char::from_u32(from_hex!(a b c d e));
}
next_ch!(chars @ '}');
char::from_u32(from_hex!(a b c d e f))
}
#[test]
fn test_cooked_string() {
let input = "\\x62 \\\n \\u{7} \\u{64} \\u{bf5} \\u{12ba} \\u{1F395} \\u{102345}\"";
let expected = "\x62 \u{7} \u{64} \u{bf5} \u{12ba} \u{1F395} \u{102345}";
assert_eq!(cooked_string(input),
IResult::Done("\"", expected.to_string()));
}
#[test]
fn test_cooked_byte_string() {
let input = "\\x62 \\\n \\xEF\"";
let expected = b"\x62 \xEF";
assert_eq!(cooked_byte_string(input),
IResult::Done("\"", expected.to_vec()));
}