| //! Helper functions and rules for enforcing the ABNF grammar for |
| //! RFC 7468-flavored PEM as described in Section 3. |
| //! |
| //! The grammar described below is intended to follow the "ABNF (Strict)" |
| //! subset of the grammar as described in Section 3 Figure 3. |
| |
| use crate::{Error, Result, PRE_ENCAPSULATION_BOUNDARY}; |
| use core::str; |
| |
| /// NUL char |
| pub(crate) const CHAR_NUL: u8 = 0x00; |
| |
| /// Horizontal tab |
| pub(crate) const CHAR_HT: u8 = 0x09; |
| |
| /// Space |
| pub(crate) const CHAR_SP: u8 = 0x20; |
| |
| /// Carriage return |
| pub(crate) const CHAR_CR: u8 = 0x0d; |
| |
| /// Line feed |
| pub(crate) const CHAR_LF: u8 = 0x0a; |
| |
| /// Colon ':' |
| pub(crate) const CHAR_COLON: u8 = 0x3A; |
| |
| /// Any printable character except hyphen-minus, as defined in the |
| /// 'labelchar' production in the RFC 7468 ABNF grammar |
| pub(crate) fn is_labelchar(char: u8) -> bool { |
| matches!(char, 0x21..=0x2C | 0x2E..=0x7E) |
| } |
| |
| /// Does the provided byte match a character allowed in a label? |
| // TODO: allow hyphen-minus to match the 'label' production in the ABNF grammar |
| pub(crate) fn is_allowed_in_label(char: u8) -> bool { |
| is_labelchar(char) || matches!(char, CHAR_HT | CHAR_SP) |
| } |
| |
| /// Does the provided byte match the "WSP" ABNF production from Section 3? |
| /// |
| /// > The common ABNF production WSP is congruent with "blank"; |
| /// > a new production W is used for "whitespace" |
| pub(crate) fn is_wsp(char: u8) -> bool { |
| matches!(char, CHAR_HT | CHAR_SP) |
| } |
| |
| /// Strip the "preamble", i.e. data that appears before the PEM |
| /// pre-encapsulation boundary. |
| /// |
| /// Presently no attempt is made to ensure the preamble decodes successfully |
| /// under any particular character encoding. The only byte which is disallowed |
| /// is the NUL byte. This restriction does not appear in RFC7468, but rather |
| /// is inspired by the OpenSSL PEM decoder. |
| /// |
| /// Returns a slice which starts at the beginning of the encapsulated text. |
| /// |
| /// From RFC7468: |
| /// > Data before the encapsulation boundaries are permitted, and |
| /// > parsers MUST NOT malfunction when processing such data. |
| pub(crate) fn strip_preamble(mut bytes: &[u8]) -> Result<&[u8]> { |
| if bytes.starts_with(PRE_ENCAPSULATION_BOUNDARY) { |
| return Ok(bytes); |
| } |
| |
| while let Some((byte, remaining)) = bytes.split_first() { |
| match *byte { |
| CHAR_NUL => { |
| return Err(Error::Preamble); |
| } |
| CHAR_LF if remaining.starts_with(PRE_ENCAPSULATION_BOUNDARY) => { |
| return Ok(remaining); |
| } |
| _ => (), |
| } |
| |
| bytes = remaining; |
| } |
| |
| Err(Error::Preamble) |
| } |
| |
| /// Strip a newline (`eol`) from the beginning of the provided byte slice. |
| /// |
| /// The newline is considered mandatory and a decoding error will occur if it |
| /// is not present. |
| /// |
| /// From RFC 7468 Section 3: |
| /// > lines are divided with CRLF, CR, or LF. |
| pub(crate) fn strip_leading_eol(bytes: &[u8]) -> Option<&[u8]> { |
| match bytes { |
| [CHAR_LF, rest @ ..] => Some(rest), |
| [CHAR_CR, CHAR_LF, rest @ ..] => Some(rest), |
| [CHAR_CR, rest @ ..] => Some(rest), |
| _ => None, |
| } |
| } |
| |
| /// Strip a newline (`eol`) from the end of the provided byte slice. |
| /// |
| /// The newline is considered mandatory and a decoding error will occur if it |
| /// is not present. |
| /// |
| /// From RFC 7468 Section 3: |
| /// > lines are divided with CRLF, CR, or LF. |
| pub(crate) fn strip_trailing_eol(bytes: &[u8]) -> Option<&[u8]> { |
| match bytes { |
| [head @ .., CHAR_CR, CHAR_LF] => Some(head), |
| [head @ .., CHAR_LF] => Some(head), |
| [head @ .., CHAR_CR] => Some(head), |
| _ => None, |
| } |
| } |
| |
| /// Split a slice beginning with a type label as located in an encapsulation |
| /// boundary. Returns the label as a `&str`, and slice beginning with the |
| /// encapsulated text with leading `-----` and newline removed. |
| /// |
| /// This implementation follows the rules put forth in Section 2, which are |
| /// stricter than those found in the ABNF grammar: |
| /// |
| /// > Labels are formally case-sensitive, uppercase, and comprised of zero or more |
| /// > characters; they do not contain consecutive spaces or hyphen-minuses, |
| /// > nor do they contain spaces or hyphen-minuses at either end. |
| /// |
| /// We apply a slightly stricter interpretation: |
| /// - Labels MAY be empty |
| /// - Non-empty labels MUST start with an upper-case letter: `'A'..='Z'` |
| /// - The only allowable characters subsequently are `'A'..='Z'` or WSP. |
| /// (NOTE: this is an overly strict initial implementation and should be relaxed) |
| /// - Whitespace MUST NOT contain more than one consecutive WSP character |
| // TODO(tarcieri): evaluate whether this is too strict; support '-' |
| pub(crate) fn split_label(bytes: &[u8]) -> Option<(&str, &[u8])> { |
| let mut n = 0usize; |
| |
| // TODO(tarcieri): handle hyphens in labels as well as spaces |
| let mut last_was_wsp = false; |
| |
| for &char in bytes { |
| // Validate character |
| if is_labelchar(char) { |
| last_was_wsp = false; |
| } else if char == b'-' { |
| // Possible start of encapsulation boundary delimiter |
| break; |
| } else if n != 0 && is_wsp(char) { |
| // Repeated whitespace disallowed |
| if last_was_wsp { |
| return None; |
| } |
| |
| last_was_wsp = true; |
| } else { |
| return None; |
| } |
| |
| n = n.checked_add(1)?; |
| } |
| |
| let (raw_label, rest) = bytes.split_at(n); |
| let label = str::from_utf8(raw_label).ok()?; |
| |
| match rest { |
| [b'-', b'-', b'-', b'-', b'-', body @ ..] => Some((label, strip_leading_eol(body)?)), |
| _ => None, |
| } |
| } |
| |
| /// Validate that the given bytes are allowed as a PEM type label, i.e. the |
| /// label encoded in the `BEGIN` and `END` encapsulation boundaries. |
| pub(crate) fn validate_label(label: &[u8]) -> Result<()> { |
| // TODO(tarcieri): handle hyphens in labels as well as spaces |
| let mut last_was_wsp = false; |
| |
| for &char in label { |
| if !is_allowed_in_label(char) { |
| return Err(Error::Label); |
| } |
| |
| if is_wsp(char) { |
| // Double sequential whitespace characters disallowed |
| if last_was_wsp { |
| return Err(Error::Label); |
| } |
| |
| last_was_wsp = true; |
| } else { |
| last_was_wsp = false; |
| } |
| } |
| |
| Ok(()) |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| |
| /// Empty label is OK. |
| #[test] |
| fn split_label_empty() { |
| let (label, body) = split_label(b"-----\nBODY").unwrap(); |
| assert_eq!(label, ""); |
| assert_eq!(body, b"BODY"); |
| } |
| |
| /// Label containing text. |
| #[test] |
| fn split_label_with_text() { |
| let (label, body) = split_label(b"PRIVATE KEY-----\nBODY").unwrap(); |
| assert_eq!(label, "PRIVATE KEY"); |
| assert_eq!(body, b"BODY"); |
| } |
| |
| /// Reject labels containing repeated spaces |
| #[test] |
| fn split_label_with_repeat_wsp_is_err() { |
| assert!(split_label(b"PRIVATE KEY-----\nBODY").is_none()); |
| } |
| |
| /// Basic validation of a label |
| #[test] |
| fn validate_private_key_label() { |
| assert_eq!(validate_label(b"PRIVATE KEY"), Ok(())); |
| } |
| |
| /// Reject labels with double spaces |
| #[test] |
| fn validate_private_key_label_reject_double_space() { |
| assert_eq!(validate_label(b"PRIVATE KEY"), Err(Error::Label)); |
| } |
| } |