| #[allow(unused, deprecated)] |
| use std::ascii::AsciiExt; |
| use std::error::Error; |
| use std::fmt; |
| use std::iter::Enumerate; |
| use std::str::Bytes; |
| |
| use super::{Mime, MimeIter, Source, ParamSource, Indexed, CHARSET, UTF_8}; |
| |
| #[derive(Debug)] |
| pub enum ParseError { |
| MissingSlash, |
| MissingEqual, |
| MissingQuote, |
| InvalidToken { |
| pos: usize, |
| byte: u8, |
| }, |
| } |
| |
| impl ParseError { |
| fn s(&self) -> &str { |
| use self::ParseError::*; |
| |
| match *self { |
| MissingSlash => "a slash (/) was missing between the type and subtype", |
| MissingEqual => "an equals sign (=) was missing between a parameter and its value", |
| MissingQuote => "a quote (\") was missing from a parameter value", |
| InvalidToken { .. } => "an invalid token was encountered", |
| } |
| } |
| } |
| |
| impl fmt::Display for ParseError { |
| fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| if let ParseError::InvalidToken { pos, byte } = *self { |
| write!(f, "{}, {:X} at position {}", self.s(), byte, pos) |
| } else { |
| f.write_str(self.s()) |
| } |
| } |
| } |
| |
| impl Error for ParseError { |
| // Minimum Rust is 1.15, Error::description was still required then |
| #[allow(deprecated)] |
| fn description(&self) -> &str { |
| self.s() |
| } |
| } |
| |
| impl<'a> MimeIter<'a> { |
| /// A new iterator over mimes or media types |
| pub fn new(s: &'a str) -> Self { |
| Self { |
| pos: 0, |
| source: s, |
| } |
| } |
| } |
| |
| impl<'a> Iterator for MimeIter<'a> { |
| type Item = Result<Mime, &'a str>; |
| |
| fn next(&mut self) -> Option<Self::Item> { |
| let start = self.pos; |
| let len = self.source.bytes().len(); |
| |
| if start >= len { |
| return None |
| } |
| |
| // Try parsing the whole remaining slice, until the end |
| match parse(&self.source[start ..len]) { |
| Ok(value) => { |
| self.pos = len; |
| Some(Ok(value)) |
| } |
| Err(ParseError::InvalidToken { pos, .. }) => { |
| // The first token is immediately found to be wrong by `parse`. Skip it |
| if pos == 0 { |
| self.pos += 1; |
| return self.next() |
| } |
| let slice = &self.source[start .. start + pos]; |
| // Try parsing the longest slice (until the first invalid token) |
| return match parse(slice) { |
| Ok(mime) => { |
| self.pos = start + pos + 1; |
| Some(Ok(mime)) |
| } |
| Err(_) => { |
| if start + pos < len { |
| // Skip this invalid slice, |
| // try parsing the remaining slice in the next iteration |
| self.pos = start + pos; |
| Some(Err(slice)) |
| } else { |
| None |
| } |
| } |
| } |
| } |
| // Do not process any other error condition: the slice is malformed and |
| // no character is found to be invalid: a character is missing |
| Err(_) => None, |
| } |
| } |
| } |
| |
| pub fn parse(s: &str) -> Result<Mime, ParseError> { |
| if s == "*/*" { |
| return Ok(::STAR_STAR); |
| } |
| |
| let mut iter = s.bytes().enumerate(); |
| // toplevel |
| let mut start; |
| let slash; |
| loop { |
| match iter.next() { |
| Some((_, c)) if is_token(c) => (), |
| Some((i, b'/')) if i > 0 => { |
| slash = i; |
| start = i + 1; |
| break; |
| }, |
| None => return Err(ParseError::MissingSlash), // EOF and no toplevel is no Mime |
| Some((pos, byte)) => return Err(ParseError::InvalidToken { |
| pos: pos, |
| byte: byte, |
| }) |
| }; |
| |
| } |
| |
| // sublevel |
| let mut plus = None; |
| loop { |
| match iter.next() { |
| Some((i, b'+')) if i > start => { |
| plus = Some(i); |
| }, |
| Some((i, b';')) if i > start => { |
| start = i; |
| break; |
| }, |
| Some((_, c)) if is_token(c) => (), |
| None => { |
| return Ok(Mime { |
| source: Source::Dynamic(s.to_ascii_lowercase()), |
| slash: slash, |
| plus: plus, |
| params: ParamSource::None, |
| }); |
| }, |
| Some((pos, byte)) => return Err(ParseError::InvalidToken { |
| pos: pos, |
| byte: byte, |
| }) |
| }; |
| } |
| |
| // params |
| let params = params_from_str(s, &mut iter, start)?; |
| |
| let src = match params { |
| ParamSource::Utf8(_) => s.to_ascii_lowercase(), |
| ParamSource::Custom(semicolon, ref indices) => lower_ascii_with_params(s, semicolon, indices), |
| ParamSource::None => { |
| // Chop off the empty list |
| s[..start].to_ascii_lowercase() |
| } |
| }; |
| |
| Ok(Mime { |
| source: Source::Dynamic(src), |
| slash: slash, |
| plus: plus, |
| params: params, |
| }) |
| } |
| |
| |
| fn params_from_str(s: &str, iter: &mut Enumerate<Bytes>, mut start: usize) -> Result<ParamSource, ParseError> { |
| let semicolon = start; |
| start += 1; |
| let mut params = ParamSource::None; |
| 'params: while start < s.len() { |
| let name; |
| // name |
| 'name: loop { |
| match iter.next() { |
| Some((i, b' ')) if i == start => { |
| start = i + 1; |
| continue 'params; |
| }, |
| Some((_, c)) if is_token(c) => (), |
| Some((i, b'=')) if i > start => { |
| name = Indexed(start, i); |
| start = i + 1; |
| break 'name; |
| }, |
| None => return Err(ParseError::MissingEqual), |
| Some((pos, byte)) => return Err(ParseError::InvalidToken { |
| pos: pos, |
| byte: byte, |
| }), |
| } |
| } |
| |
| let value; |
| // values must be restrict-name-char or "anything goes" |
| let mut is_quoted = false; |
| |
| 'value: loop { |
| if is_quoted { |
| match iter.next() { |
| Some((i, b'"')) if i > start => { |
| value = Indexed(start, i); |
| break 'value; |
| }, |
| Some((_, c)) if is_restricted_quoted_char(c) => (), |
| None => return Err(ParseError::MissingQuote), |
| Some((pos, byte)) => return Err(ParseError::InvalidToken { |
| pos: pos, |
| byte: byte, |
| }), |
| } |
| } else { |
| match iter.next() { |
| Some((i, b'"')) if i == start => { |
| is_quoted = true; |
| start = i + 1; |
| }, |
| Some((_, c)) if is_token(c) => (), |
| Some((i, b';')) if i > start => { |
| value = Indexed(start, i); |
| start = i + 1; |
| break 'value; |
| } |
| None => { |
| value = Indexed(start, s.len()); |
| start = s.len(); |
| break 'value; |
| }, |
| |
| Some((pos, byte)) => return Err(ParseError::InvalidToken { |
| pos: pos, |
| byte: byte, |
| }), |
| } |
| } |
| } |
| |
| if is_quoted { |
| 'ws: loop { |
| match iter.next() { |
| Some((i, b';')) => { |
| // next param |
| start = i + 1; |
| break 'ws; |
| }, |
| Some((_, b' ')) => { |
| // skip whitespace |
| }, |
| None => { |
| // eof |
| start = s.len(); |
| break 'ws; |
| }, |
| Some((pos, byte)) => return Err(ParseError::InvalidToken { |
| pos: pos, |
| byte: byte, |
| }), |
| } |
| } |
| } |
| |
| match params { |
| ParamSource::Utf8(i) => { |
| let i = i + 2; |
| let charset = Indexed(i, "charset".len() + i); |
| let utf8 = Indexed(charset.1 + 1, charset.1 + "utf-8".len() + 1); |
| params = ParamSource::Custom(semicolon, vec![ |
| (charset, utf8), |
| (name, value), |
| ]); |
| }, |
| ParamSource::Custom(_, ref mut vec) => { |
| vec.push((name, value)); |
| }, |
| ParamSource::None => { |
| if semicolon + 2 == name.0 && CHARSET == &s[name.0..name.1] { |
| if UTF_8 == &s[value.0..value.1] { |
| params = ParamSource::Utf8(semicolon); |
| continue 'params; |
| } |
| } |
| params = ParamSource::Custom(semicolon, vec![(name, value)]); |
| }, |
| } |
| } |
| Ok(params) |
| } |
| |
| fn lower_ascii_with_params(s: &str, semi: usize, params: &[(Indexed, Indexed)]) -> String { |
| let mut owned = s.to_owned(); |
| owned[..semi].make_ascii_lowercase(); |
| |
| for &(ref name, ref value) in params { |
| owned[name.0..name.1].make_ascii_lowercase(); |
| // Since we just converted this part of the string to lowercase, |
| // we can skip the `Name == &str` unicase check and do a faster |
| // memcmp instead. |
| if &owned[name.0..name.1] == CHARSET.source { |
| owned[value.0..value.1].make_ascii_lowercase(); |
| } |
| } |
| |
| owned |
| } |
| |
| // From [RFC6838](http://tools.ietf.org/html/rfc6838#section-4.2): |
| // |
| // > All registered media types MUST be assigned top-level type and |
| // > subtype names. The combination of these names serves to uniquely |
| // > identify the media type, and the subtype name facet (or the absence |
| // > of one) identifies the registration tree. Both top-level type and |
| // > subtype names are case-insensitive. |
| // > |
| // > Type and subtype names MUST conform to the following ABNF: |
| // > |
| // > type-name = restricted-name |
| // > subtype-name = restricted-name |
| // > |
| // > restricted-name = restricted-name-first *126restricted-name-chars |
| // > restricted-name-first = ALPHA / DIGIT |
| // > restricted-name-chars = ALPHA / DIGIT / "!" / "#" / |
| // > "$" / "&" / "-" / "^" / "_" |
| // > restricted-name-chars =/ "." ; Characters before first dot always |
| // > ; specify a facet name |
| // > restricted-name-chars =/ "+" ; Characters after last plus always |
| // > ; specify a structured syntax suffix |
| |
| // However, [HTTP](https://tools.ietf.org/html/rfc7231#section-3.1.1.1): |
| // |
| // > media-type = type "/" subtype *( OWS ";" OWS parameter ) |
| // > type = token |
| // > subtype = token |
| // > parameter = token "=" ( token / quoted-string ) |
| // |
| // Where token is defined as: |
| // |
| // > token = 1*tchar |
| // > tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / |
| // > "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA |
| // |
| // So, clearly, ¯\_(Ä_/¯ |
| |
| macro_rules! byte_map { |
| ($($flag:expr,)*) => ([ |
| $($flag != 0,)* |
| ]) |
| } |
| |
| static TOKEN_MAP: [bool; 256] = byte_map![ |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, |
| 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
| 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| ]; |
| |
| fn is_token(c: u8) -> bool { |
| TOKEN_MAP[c as usize] |
| } |
| |
| fn is_restricted_quoted_char(c: u8) -> bool { |
| c > 31 && c != 127 |
| } |
| |
| #[test] |
| #[allow(warnings)] // ... ranges deprecated |
| fn test_lookup_tables() { |
| for (i, &valid) in TOKEN_MAP.iter().enumerate() { |
| let i = i as u8; |
| let should = match i { |
| b'a'...b'z' | |
| b'A'...b'Z' | |
| b'0'...b'9' | |
| b'!' | |
| b'#' | |
| b'$' | |
| b'%' | |
| b'&' | |
| b'\'' | |
| b'*' | |
| b'+' | |
| b'-' | |
| b'.' | |
| b'^' | |
| b'_' | |
| b'`' | |
| b'|' | |
| b'~' => true, |
| _ => false |
| }; |
| assert_eq!(valid, should, "{:?} ({}) should be {}", i as char, i, should); |
| } |
| } |
| |
| #[test] |
| fn test_parse_iterator() { |
| let mut iter = MimeIter::new("application/json, application/json"); |
| assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap()); |
| assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap()); |
| assert_eq!(iter.next(), None); |
| |
| let mut iter = MimeIter::new("application/json"); |
| assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap()); |
| assert_eq!(iter.next(), None); |
| |
| let mut iter = MimeIter::new("application/json; "); |
| assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap()); |
| assert_eq!(iter.next(), None); |
| } |
| |
| #[test] |
| fn test_parse_iterator_invalid() { |
| let mut iter = MimeIter::new("application/json, invalid, application/json"); |
| assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap()); |
| assert_eq!(iter.next().unwrap().unwrap_err(), "invalid"); |
| assert_eq!(iter.next().unwrap().unwrap(), parse("application/json").unwrap()); |
| assert_eq!(iter.next(), None); |
| } |
| |
| #[test] |
| fn test_parse_iterator_all_invalid() { |
| let mut iter = MimeIter::new("application/json, text/html"); |
| assert_eq!(iter.next().unwrap().unwrap_err(), "application/json"); |
| assert_eq!(iter.next(), None); |
| } |