| //! Contains functions for performing XML special characters escaping. |
| |
| use std::{borrow::Cow, marker::PhantomData, fmt::{Display, Result, Formatter}}; |
| |
| pub(crate) trait Escapes { |
| fn escape(c: u8) -> Option<&'static str>; |
| |
| fn byte_needs_escaping(c: u8) -> bool { |
| Self::escape(c).is_some() |
| } |
| |
| fn str_needs_escaping(s: &str) -> bool { |
| s.bytes().any(|c| Self::escape(c).is_some()) |
| } |
| } |
| |
| pub(crate) struct Escaped<'a, E: Escapes> { |
| _escape_phantom: PhantomData<E>, |
| to_escape: &'a str, |
| } |
| |
| impl<'a, E: Escapes> Escaped<'a, E> { |
| pub fn new(s: &'a str) -> Self { |
| Escaped { |
| _escape_phantom: PhantomData, |
| to_escape: s, |
| } |
| } |
| } |
| |
| impl<'a, E: Escapes> Display for Escaped<'a, E> { |
| fn fmt(&self, f: &mut Formatter<'_>) -> Result { |
| let mut total_remaining = self.to_escape; |
| |
| // find the next occurence |
| while let Some(n) = total_remaining |
| .bytes() |
| .position(E::byte_needs_escaping) |
| { |
| let (start, remaining) = total_remaining.split_at(n); |
| |
| f.write_str(start)?; |
| |
| // unwrap is safe because we checked is_some for position n earlier |
| let next_byte = remaining.bytes().next().unwrap(); |
| let replacement = E::escape(next_byte).unwrap(); |
| f.write_str(replacement)?; |
| |
| total_remaining = &remaining[1..]; |
| } |
| |
| f.write_str(total_remaining) |
| } |
| } |
| |
| fn escape_str<E: Escapes>(s: &str) -> Cow<'_, str> { |
| if E::str_needs_escaping(s) { |
| Cow::Owned(format!("{}", Escaped::<E>::new(s))) |
| } else { |
| Cow::Borrowed(s) |
| } |
| } |
| |
| macro_rules! escapes { |
| { |
| $name: ident, |
| $($k: expr => $v: expr),* $(,)? |
| } => { |
| pub(crate) struct $name; |
| |
| impl Escapes for $name { |
| fn escape(c: u8) -> Option<&'static str> { |
| match c { |
| $( $k => Some($v),)* |
| _ => None |
| } |
| } |
| } |
| }; |
| } |
| |
| escapes!( |
| AttributeEscapes, |
| b'<' => "<", |
| b'>' => ">", |
| b'"' => """, |
| b'\'' => "'", |
| b'&' => "&", |
| b'\n' => "
", |
| b'\r' => "
", |
| ); |
| |
| escapes!( |
| PcDataEscapes, |
| b'<' => "<", |
| b'&' => "&", |
| ); |
| |
| /// Performs escaping of common XML characters inside an attribute value. |
| /// |
| /// This function replaces several important markup characters with their |
| /// entity equivalents: |
| /// |
| /// * `<` → `<` |
| /// * `>` → `>` |
| /// * `"` → `"` |
| /// * `'` → `'` |
| /// * `&` → `&` |
| /// |
| /// The following characters are escaped so that attributes are printed on |
| /// a single line: |
| /// * `\n` → `
` |
| /// * `\r` → `
` |
| /// |
| /// The resulting string is safe to use inside XML attribute values or in PCDATA sections. |
| /// |
| /// Does not perform allocations if the given string does not contain escapable characters. |
| #[inline] |
| #[must_use] |
| pub fn escape_str_attribute(s: &str) -> Cow<'_, str> { |
| escape_str::<AttributeEscapes>(s) |
| } |
| |
| /// Performs escaping of common XML characters inside PCDATA. |
| /// |
| /// This function replaces several important markup characters with their |
| /// entity equivalents: |
| /// |
| /// * `<` → `<` |
| /// * `&` → `&` |
| /// |
| /// The resulting string is safe to use inside PCDATA sections but NOT inside attribute values. |
| /// |
| /// Does not perform allocations if the given string does not contain escapable characters. |
| #[inline] |
| #[must_use] |
| pub fn escape_str_pcdata(s: &str) -> Cow<'_, str> { |
| escape_str::<PcDataEscapes>(s) |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::{escape_str_attribute, escape_str_pcdata}; |
| |
| #[test] |
| fn test_escape_str_attribute() { |
| assert_eq!(escape_str_attribute("<>'\"&\n\r"), "<>'"&

"); |
| assert_eq!(escape_str_attribute("no_escapes"), "no_escapes"); |
| } |
| |
| #[test] |
| fn test_escape_str_pcdata() { |
| assert_eq!(escape_str_pcdata("<&"), "<&"); |
| assert_eq!(escape_str_pcdata("no_escapes"), "no_escapes"); |
| } |
| |
| #[test] |
| fn test_escape_multibyte_code_points() { |
| assert_eq!(escape_str_attribute("☃<"), "☃<"); |
| assert_eq!(escape_str_pcdata("☃<"), "☃<"); |
| } |
| } |