| // Copyright (C) 2021 Scott Lamb <[email protected]> |
| // SPDX-License-Identifier: MIT OR Apache-2.0 |
| |
| //! Builds and offers lookup on a table of byte values to the character |
| //! classes the respective bytes are part of. Most classes are referenced from |
| //! [RFC 7235 Appendix B: Imported ABNF](https://datatracker.ietf.org/doc/html/rfc7235#appendix-B) |
| //! or [RFC 7235 Appendix C: Collected ABNF](https://datatracker.ietf.org/doc/html/rfc7235#appendix-C). |
| |
| pub(crate) const C_TCHAR: u8 = 1; |
| pub(crate) const C_QDTEXT: u8 = 2; |
| pub(crate) const C_ESCAPABLE: u8 = 4; |
| pub(crate) const C_OWS: u8 = 8; |
| pub(crate) const C_ATTR: u8 = 16; |
| |
| static TABLE: [u8; 128] = build_table(); |
| |
| pub(crate) fn char_classes(b: u8) -> u8 { |
| *TABLE.get(usize::from(b)).unwrap_or(&0) |
| } |
| |
| const fn build_table() -> [u8; 128] { |
| // It'd be nice to use array::from_fn here, but it wasn't stablized until Rust 1.63. |
| let mut table = [0u8; 128]; |
| let mut i = 0; |
| while i < 128 { |
| let b = i as u8; |
| let mut classes = 0; |
| if is_tchar(b) { |
| classes |= C_TCHAR; |
| } |
| if is_qdtext(b) { |
| classes |= C_QDTEXT; |
| } |
| if is_escapable(b) { |
| classes |= C_ESCAPABLE; |
| } |
| if is_ows(b) { |
| classes |= C_OWS; |
| } |
| if is_attr(b) { |
| classes |= C_ATTR; |
| } |
| table[i] = classes; |
| i += 1; |
| } |
| table |
| } |
| |
| /// Returns if the byte is a `tchar` as defined in |
| /// [RFC 7230 section 3.2.6](https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.6). |
| const fn is_tchar(b: u8) -> bool { |
| // tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" |
| // / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" |
| // / DIGIT / ALPHA |
| // ; any VCHAR, except delimiters |
| matches!(b, |
| b'!' |
| | b'#' |
| | b'$' |
| | b'%' |
| | b'&' |
| | b'\'' |
| | b'*' |
| | b'+' |
| | b'-' |
| | b'.' |
| | b'^' |
| | b'_' |
| | b'`' |
| | b'|' |
| | b'~' |
| | b'0'..=b'9' |
| | b'a'..=b'z' |
| | b'A'..=b'Z') |
| } |
| |
| /// Returns true if the byte is a valid `qdtext` (excluding `obs-text`), as defined in |
| /// [RFC 7230 section 3.2.6](https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.6). |
| /// |
| /// ```text |
| /// quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE |
| /// qdtext = HTAB / SP /%x21 / %x23-5B / %x5D-7E / obs-text |
| /// obs-text = %x80-FF |
| /// quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text ) |
| /// VCHAR = %x21-7E |
| /// ; visible (printing) characters |
| /// ``` |
| const fn is_qdtext(b: u8) -> bool { |
| matches!(b, b'\t' | b' ' | 0x21 | 0x23..=0x5B | 0x5D..=0x7E) |
| } |
| |
| /// Returns true if the byte is a valid end of a `quoted-pair`, as defined in |
| /// [RFC 7230 section 3.2.6](https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.6). |
| const fn is_escapable(b: u8) -> bool { |
| matches!(b, b'\t' | b' ' | 0x21..=0x7E | 0x80..=0xFF) |
| } |
| |
| /// Returns true if the byte is a valid `attr-char` as defined in |
| /// [RFC 5987 section 3.2.1](https://datatracker.ietf.org/doc/html/rfc5987#section-3.2.1). |
| /// |
| /// ```text |
| /// attr-char = ALPHA / DIGIT |
| /// / "!" / "#" / "$" / "&" / "+" / "-" / "." |
| /// / "^" / "_" / "`" / "|" / "~" |
| /// ; token except ( "*" / "'" / "%" ) |
| /// ``` |
| const fn is_attr(b: u8) -> bool { |
| matches!(b, |
| b'a'..=b'z' |
| | b'A'..=b'Z' |
| | b'0'..=b'9' |
| | b'!' |
| | b'#' |
| | b'$' |
| | b'&' |
| | b'+' |
| | b'-' |
| | b'.' |
| | b'^' |
| | b'_' |
| | b'`' |
| | b'|' |
| | b'~') |
| } |
| |
| /// Returns true if the byte is valid optional whitespace as in [RFC 7230 section |
| /// 3.2.3](https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.3). |
| /// |
| /// ```text |
| /// OWS = *( SP / HTAB ) |
| /// ; optional whitespace |
| /// ``` |
| const fn is_ows(b: u8) -> bool { |
| matches!(b, b' ' | b'\t') |
| } |