| // Copyright (C) Michael Howell and others |
| // this library is released under the same terms as Rust itself. |
| |
| #![deny(unsafe_code)] |
| #![deny(missing_docs)] |
| |
| //! Ammonia is a whitelist-based HTML sanitization library. It is designed to |
| //! prevent cross-site scripting, layout breaking, and clickjacking caused |
| //! by untrusted user-provided HTML being mixed into a larger web page. |
| //! |
| //! Ammonia uses [html5ever] to parse and serialize document fragments the same way browsers do, |
| //! so it is extremely resilient to syntactic obfuscation. |
| //! |
| //! Ammonia parses its input exactly according to the HTML5 specification; |
| //! it will not linkify bare URLs, insert line or paragraph breaks, or convert `(C)` into ©. |
| //! If you want that, use a markup processor before running the sanitizer, like [pulldown-cmark]. |
| //! |
| //! # Examples |
| //! |
| //! ``` |
| //! let result = ammonia::clean( |
| //! "<b><img src='' onerror=alert('hax')>I'm not trying to XSS you</b>" |
| //! ); |
| //! assert_eq!(result, "<b><img src=\"\">I'm not trying to XSS you</b>"); |
| //! ``` |
| //! |
| //! [html5ever]: https://github.com/servo/html5ever "The HTML parser in Servo" |
| //! [pulldown-cmark]: https://github.com/google/pulldown-cmark "CommonMark parser" |
| |
| #[cfg(ammonia_unstable)] |
| pub mod rcdom; |
| |
| #[cfg(not(ammonia_unstable))] |
| mod rcdom; |
| |
| use html5ever::interface::Attribute; |
| use html5ever::serialize::{serialize, SerializeOpts}; |
| use html5ever::tree_builder::{NodeOrText, TreeSink}; |
| use html5ever::{driver as html, local_name, namespace_url, ns, QualName}; |
| use maplit::{hashmap, hashset}; |
| use once_cell::sync::Lazy; |
| use rcdom::{Handle, NodeData, RcDom, SerializableHandle}; |
| use std::borrow::{Borrow, Cow}; |
| use std::cmp::max; |
| use std::collections::{HashMap, HashSet}; |
| use std::fmt::{self, Display}; |
| use std::io; |
| use std::iter::IntoIterator as IntoIter; |
| use std::mem; |
| use std::rc::Rc; |
| use std::str::FromStr; |
| use tendril::stream::TendrilSink; |
| use tendril::StrTendril; |
| use tendril::{format_tendril, ByteTendril}; |
| pub use url::Url; |
| |
| use html5ever::buffer_queue::BufferQueue; |
| use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, Tokenizer}; |
| pub use url; |
| |
| static AMMONIA: Lazy<Builder<'static>> = Lazy::new(Builder::default); |
| |
| /// Clean HTML with a conservative set of defaults. |
| /// |
| /// * [tags](struct.Builder.html#defaults) |
| /// * [`script` and `style` have their contents stripped](struct.Builder.html#defaults-1) |
| /// * [attributes on specific tags](struct.Builder.html#defaults-2) |
| /// * [attributes on all tags](struct.Builder.html#defaults-6) |
| /// * [url schemes](struct.Builder.html#defaults-7) |
| /// * [relative URLs are passed through, unchanged, by default](struct.Builder.html#defaults-8) |
| /// * [links are marked `noopener noreferrer` by default](struct.Builder.html#defaults-9) |
| /// * all `class=""` settings are blocked by default |
| /// * comments are stripped by default |
| /// * no generic attribute prefixes are turned on by default |
| /// * no specific tag-attribute-value settings are configured by default |
| /// |
| /// [opener]: https://mathiasbynens.github.io/rel-noopener/ |
| /// [referrer]: https://en.wikipedia.org/wiki/HTTP_referer |
| /// |
| /// # Examples |
| /// |
| /// assert_eq!(ammonia::clean("XSS<script>attack</script>"), "XSS") |
| pub fn clean(src: &str) -> String { |
| AMMONIA.clean(src).to_string() |
| } |
| |
| /// Turn an arbitrary string into unformatted HTML. |
| /// |
| /// This function is roughly equivalent to PHP's `htmlspecialchars` and `htmlentities`. |
| /// It is as strict as possible, encoding every character that has special meaning to the |
| /// HTML parser. |
| /// |
| /// # Warnings |
| /// |
| /// This function cannot be used to package strings into a `<script>` or `<style>` tag; |
| /// you need a JavaScript or CSS escaper to do that. |
| /// |
| /// // DO NOT DO THIS |
| /// # use ammonia::clean_text; |
| /// let untrusted = "Robert\"); abuse();//"; |
| /// let html = format!("<script>invoke(\"{}\")</script>", clean_text(untrusted)); |
| /// |
| /// `<textarea>` tags will strip the first newline, if present, even if that newline is encoded. |
| /// If you want to build an editor that works the way most folks expect them to, you should put a |
| /// newline at the beginning of the tag, like this: |
| /// |
| /// # use ammonia::{Builder, clean_text}; |
| /// let untrusted = "\n\nhi!"; |
| /// let mut b = Builder::new(); |
| /// b.add_tags(&["textarea"]); |
| /// // This is the bad version |
| /// // The user put two newlines at the beginning, but the first one was removed |
| /// let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted))).to_string(); |
| /// assert_eq!("<textarea>\nhi!</textarea>", sanitized); |
| /// // This is a good version |
| /// // The user put two newlines at the beginning, and we add a third one, |
| /// // so the result still has two |
| /// let sanitized = b.clean(&format!("<textarea>\n{}</textarea>", clean_text(untrusted))).to_string(); |
| /// assert_eq!("<textarea>\n\nhi!</textarea>", sanitized); |
| /// // This version is also often considered good |
| /// // For many applications, leading and trailing whitespace is probably unwanted |
| /// let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted.trim()))).to_string(); |
| /// assert_eq!("<textarea>hi!</textarea>", sanitized); |
| /// |
| /// It also does not make user text safe for HTML attribute microsyntaxes such as `class` or `id`. |
| /// Only use this function for places where HTML accepts unrestricted text such as `title` attributes |
| /// and paragraph contents. |
| pub fn clean_text(src: &str) -> String { |
| let mut ret_val = String::with_capacity(max(4, src.len())); |
| for c in src.chars() { |
| let replacement = match c { |
| // this character, when confronted, will start a tag |
| '<' => "<", |
| // in an unquoted attribute, will end the attribute value |
| '>' => ">", |
| // in an attribute surrounded by double quotes, this character will end the attribute value |
| '\"' => """, |
| // in an attribute surrounded by single quotes, this character will end the attribute value |
| '\'' => "'", |
| // in HTML5, returns a bogus parse error in an unquoted attribute, while in SGML/HTML, it will end an attribute value surrounded by backquotes |
| '`' => "`", |
| // in an unquoted attribute, this character will end the attribute |
| '/' => "/", |
| // starts an entity reference |
| '&' => "&", |
| // if at the beginning of an unquoted attribute, will get ignored |
| '=' => "=", |
| // will end an unquoted attribute |
| ' ' => " ", |
| '\t' => "	", |
| '\n' => " ", |
| '\x0c' => "", |
| '\r' => " ", |
| // a spec-compliant browser will perform this replacement anyway, but the middleware might not |
| '\0' => "�", |
| // ALL OTHER CHARACTERS ARE PASSED THROUGH VERBATIM |
| _ => { |
| ret_val.push(c); |
| continue; |
| } |
| }; |
| ret_val.push_str(replacement); |
| } |
| ret_val |
| } |
| |
| /// Determine if a given string contains HTML |
| /// |
| /// This function is parses the full string into HTML and checks if the input contained any |
| /// HTML syntax. |
| /// |
| /// # Note |
| /// This function will return positively for strings that contain invalid HTML syntax like |
| /// `<g>` and even `Vec::<u8>::new()`. |
| pub fn is_html(input: &str) -> bool { |
| let santok = SanitizationTokenizer::new(); |
| let mut chunk = ByteTendril::new(); |
| chunk.push_slice(input.as_bytes()); |
| let mut input = BufferQueue::default(); |
| input.push_back(chunk.try_reinterpret().unwrap()); |
| |
| let mut tok = Tokenizer::new(santok, Default::default()); |
| let _ = tok.feed(&mut input); |
| tok.end(); |
| tok.sink.was_sanitized |
| } |
| |
| #[derive(Copy, Clone)] |
| struct SanitizationTokenizer { |
| was_sanitized: bool, |
| } |
| |
| impl SanitizationTokenizer { |
| pub fn new() -> SanitizationTokenizer { |
| SanitizationTokenizer { |
| was_sanitized: false, |
| } |
| } |
| } |
| |
| impl TokenSink for SanitizationTokenizer { |
| type Handle = (); |
| fn process_token(&mut self, token: Token, _line_number: u64) -> TokenSinkResult<()> { |
| match token { |
| Token::CharacterTokens(_) | Token::EOFToken | Token::ParseError(_) => {} |
| _ => { |
| self.was_sanitized = true; |
| } |
| } |
| TokenSinkResult::Continue |
| } |
| fn end(&mut self) {} |
| } |
| |
| /// An HTML sanitizer. |
| /// |
| /// Given a fragment of HTML, Ammonia will parse it according to the HTML5 |
| /// parsing algorithm and sanitize any disallowed tags or attributes. This |
| /// algorithm also takes care of things like unclosed and (some) misnested |
| /// tags. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::{Builder, UrlRelative}; |
| /// |
| /// let a = Builder::default() |
| /// .link_rel(None) |
| /// .url_relative(UrlRelative::PassThrough) |
| /// .clean("<a href=/>test") |
| /// .to_string(); |
| /// assert_eq!( |
| /// a, |
| /// "<a href=\"/\">test</a>"); |
| /// |
| /// # Panics |
| /// |
| /// Running [`clean`] or [`clean_from_reader`] may cause a panic if the builder is |
| /// configured with any of these (contradictory) settings: |
| /// |
| /// * The `rel` attribute is added to [`generic_attributes`] or the |
| /// [`tag_attributes`] for the `<a>` tag, and [`link_rel`] is not set to `None`. |
| /// |
| /// For example, this is going to panic, since [`link_rel`] is set to |
| /// `Some("noopener noreferrer")` by default, |
| /// and it makes no sense to simultaneously say that the user is allowed to |
| /// set their own `rel` attribute while saying that every link shall be set to |
| /// a particular value: |
| /// |
| /// ```should_panic |
| /// use ammonia::Builder; |
| /// use maplit::hashset; |
| /// |
| /// # fn main() { |
| /// Builder::default() |
| /// .generic_attributes(hashset!["rel"]) |
| /// .clean(""); |
| /// # } |
| /// ``` |
| /// |
| /// This, however, is perfectly valid: |
| /// |
| /// ``` |
| /// use ammonia::Builder; |
| /// use maplit::hashset; |
| /// |
| /// # fn main() { |
| /// Builder::default() |
| /// .generic_attributes(hashset!["rel"]) |
| /// .link_rel(None) |
| /// .clean(""); |
| /// # } |
| /// ``` |
| /// |
| /// * The `class` attribute is in [`allowed_classes`] and is in the |
| /// corresponding [`tag_attributes`] or in [`generic_attributes`]. |
| /// |
| /// This is done both to line up with the treatment of `rel`, |
| /// and to prevent people from accidentally allowing arbitrary |
| /// classes on a particular element. |
| /// |
| /// This will panic: |
| /// |
| /// ```should_panic |
| /// use ammonia::Builder; |
| /// use maplit::{hashmap, hashset}; |
| /// |
| /// # fn main() { |
| /// Builder::default() |
| /// .generic_attributes(hashset!["class"]) |
| /// .allowed_classes(hashmap!["span" => hashset!["hidden"]]) |
| /// .clean(""); |
| /// # } |
| /// ``` |
| /// |
| /// This, however, is perfectly valid: |
| /// |
| /// ``` |
| /// use ammonia::Builder; |
| /// use maplit::{hashmap, hashset}; |
| /// |
| /// # fn main() { |
| /// Builder::default() |
| /// .allowed_classes(hashmap!["span" => hashset!["hidden"]]) |
| /// .clean(""); |
| /// # } |
| /// ``` |
| /// |
| /// * A tag is in either [`tags`] or [`tag_attributes`] while also |
| /// being in [`clean_content_tags`]. |
| /// |
| /// Both [`tags`] and [`tag_attributes`] are whitelists but |
| /// [`clean_content_tags`] is a blacklist, so it doesn't make sense |
| /// to have the same tag in both. |
| /// |
| /// For example, this will panic, since the `aside` tag is in |
| /// [`tags`] by default: |
| /// |
| /// ```should_panic |
| /// use ammonia::Builder; |
| /// use maplit::hashset; |
| /// |
| /// # fn main() { |
| /// Builder::default() |
| /// .clean_content_tags(hashset!["aside"]) |
| /// .clean(""); |
| /// # } |
| /// ``` |
| /// |
| /// This, however, is valid: |
| /// |
| /// ``` |
| /// use ammonia::Builder; |
| /// use maplit::hashset; |
| /// |
| /// # fn main() { |
| /// Builder::default() |
| /// .rm_tags(&["aside"]) |
| /// .clean_content_tags(hashset!["aside"]) |
| /// .clean(""); |
| /// # } |
| /// ``` |
| /// |
| /// [`clean`]: #method.clean |
| /// [`clean_from_reader`]: #method.clean_from_reader |
| /// [`generic_attributes`]: #method.generic_attributes |
| /// [`tag_attributes`]: #method.tag_attributes |
| /// [`generic_attributes`]: #method.generic_attributes |
| /// [`link_rel`]: #method.link_rel |
| /// [`allowed_classes`]: #method.allowed_classes |
| /// [`id_prefix`]: #method.id_prefix |
| /// [`tags`]: #method.tags |
| /// [`clean_content_tags`]: #method.clean_content_tags |
| #[derive(Debug)] |
| pub struct Builder<'a> { |
| tags: HashSet<&'a str>, |
| clean_content_tags: HashSet<&'a str>, |
| tag_attributes: HashMap<&'a str, HashSet<&'a str>>, |
| tag_attribute_values: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>, |
| set_tag_attribute_values: HashMap<&'a str, HashMap<&'a str, &'a str>>, |
| generic_attributes: HashSet<&'a str>, |
| url_schemes: HashSet<&'a str>, |
| url_relative: UrlRelative<'a>, |
| attribute_filter: Option<Box<dyn AttributeFilter>>, |
| link_rel: Option<&'a str>, |
| allowed_classes: HashMap<&'a str, HashSet<&'a str>>, |
| strip_comments: bool, |
| id_prefix: Option<&'a str>, |
| generic_attribute_prefixes: Option<HashSet<&'a str>>, |
| } |
| |
| impl<'a> Default for Builder<'a> { |
| fn default() -> Self { |
| #[rustfmt::skip] |
| let tags = hashset![ |
| "a", "abbr", "acronym", "area", "article", "aside", "b", "bdi", |
| "bdo", "blockquote", "br", "caption", "center", "cite", "code", |
| "col", "colgroup", "data", "dd", "del", "details", "dfn", "div", |
| "dl", "dt", "em", "figcaption", "figure", "footer", "h1", "h2", |
| "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "i", "img", |
| "ins", "kbd", "li", "map", "mark", "nav", "ol", "p", "pre", |
| "q", "rp", "rt", "rtc", "ruby", "s", "samp", "small", "span", |
| "strike", "strong", "sub", "summary", "sup", "table", "tbody", |
| "td", "th", "thead", "time", "tr", "tt", "u", "ul", "var", "wbr" |
| ]; |
| let clean_content_tags = hashset!["script", "style"]; |
| let generic_attributes = hashset!["lang", "title"]; |
| let tag_attributes = hashmap![ |
| "a" => hashset![ |
| "href", "hreflang" |
| ], |
| "bdo" => hashset![ |
| "dir" |
| ], |
| "blockquote" => hashset![ |
| "cite" |
| ], |
| "col" => hashset![ |
| "align", "char", "charoff", "span" |
| ], |
| "colgroup" => hashset![ |
| "align", "char", "charoff", "span" |
| ], |
| "del" => hashset![ |
| "cite", "datetime" |
| ], |
| "hr" => hashset![ |
| "align", "size", "width" |
| ], |
| "img" => hashset![ |
| "align", "alt", "height", "src", "width" |
| ], |
| "ins" => hashset![ |
| "cite", "datetime" |
| ], |
| "ol" => hashset![ |
| "start" |
| ], |
| "q" => hashset![ |
| "cite" |
| ], |
| "table" => hashset![ |
| "align", "char", "charoff", "summary" |
| ], |
| "tbody" => hashset![ |
| "align", "char", "charoff" |
| ], |
| "td" => hashset![ |
| "align", "char", "charoff", "colspan", "headers", "rowspan" |
| ], |
| "tfoot" => hashset![ |
| "align", "char", "charoff" |
| ], |
| "th" => hashset![ |
| "align", "char", "charoff", "colspan", "headers", "rowspan", "scope" |
| ], |
| "thead" => hashset![ |
| "align", "char", "charoff" |
| ], |
| "tr" => hashset![ |
| "align", "char", "charoff" |
| ], |
| ]; |
| let tag_attribute_values = hashmap![]; |
| let set_tag_attribute_values = hashmap![]; |
| let url_schemes = hashset![ |
| "bitcoin", |
| "ftp", |
| "ftps", |
| "geo", |
| "http", |
| "https", |
| "im", |
| "irc", |
| "ircs", |
| "magnet", |
| "mailto", |
| "mms", |
| "mx", |
| "news", |
| "nntp", |
| "openpgp4fpr", |
| "sip", |
| "sms", |
| "smsto", |
| "ssh", |
| "tel", |
| "url", |
| "webcal", |
| "wtai", |
| "xmpp" |
| ]; |
| let allowed_classes = hashmap![]; |
| |
| Builder { |
| tags, |
| clean_content_tags, |
| tag_attributes, |
| tag_attribute_values, |
| set_tag_attribute_values, |
| generic_attributes, |
| url_schemes, |
| url_relative: UrlRelative::PassThrough, |
| attribute_filter: None, |
| link_rel: Some("noopener noreferrer"), |
| allowed_classes, |
| strip_comments: true, |
| id_prefix: None, |
| generic_attribute_prefixes: None, |
| } |
| } |
| } |
| |
| impl<'a> Builder<'a> { |
| /// Sets the tags that are allowed. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::Builder; |
| /// use maplit::hashset; |
| /// |
| /// # fn main() { |
| /// let tags = hashset!["my-tag"]; |
| /// let a = Builder::new() |
| /// .tags(tags) |
| /// .clean("<my-tag>") |
| /// .to_string(); |
| /// assert_eq!(a, "<my-tag></my-tag>"); |
| /// # } |
| /// |
| /// # Defaults |
| /// |
| /// ```notest |
| /// a, abbr, acronym, area, article, aside, b, bdi, |
| /// bdo, blockquote, br, caption, center, cite, code, |
| /// col, colgroup, data, dd, del, details, dfn, div, |
| /// dl, dt, em, figcaption, figure, footer, h1, h2, |
| /// h3, h4, h5, h6, header, hgroup, hr, i, img, |
| /// ins, kbd, li, map, mark, nav, ol, p, pre, |
| /// q, rp, rt, rtc, ruby, s, samp, small, span, |
| /// strike, strong, sub, summary, sup, table, tbody, |
| /// td, th, thead, time, tr, tt, u, ul, var, wbr |
| /// ``` |
| pub fn tags(&mut self, value: HashSet<&'a str>) -> &mut Self { |
| self.tags = value; |
| self |
| } |
| |
| /// Add additonal whitelisted tags without overwriting old ones. |
| /// |
| /// Does nothing if the tag is already there. |
| /// |
| /// # Examples |
| /// |
| /// let a = ammonia::Builder::default() |
| /// .add_tags(&["my-tag"]) |
| /// .clean("<my-tag>test</my-tag> <span>mess</span>").to_string(); |
| /// assert_eq!("<my-tag>test</my-tag> <span>mess</span>", a); |
| pub fn add_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>( |
| &mut self, |
| it: I, |
| ) -> &mut Self { |
| self.tags.extend(it.into_iter().map(Borrow::borrow)); |
| self |
| } |
| |
| /// Remove already-whitelisted tags. |
| /// |
| /// Does nothing if the tags is already gone. |
| /// |
| /// # Examples |
| /// |
| /// let a = ammonia::Builder::default() |
| /// .rm_tags(&["span"]) |
| /// .clean("<span></span>").to_string(); |
| /// assert_eq!("", a); |
| pub fn rm_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>( |
| &mut self, |
| it: I, |
| ) -> &mut Self { |
| for i in it { |
| self.tags.remove(i.borrow()); |
| } |
| self |
| } |
| |
| /// Returns a copy of the set of whitelisted tags. |
| /// |
| /// # Examples |
| /// |
| /// use maplit::hashset; |
| /// |
| /// let tags = hashset!["my-tag-1", "my-tag-2"]; |
| /// |
| /// let mut b = ammonia::Builder::default(); |
| /// b.tags(Clone::clone(&tags)); |
| /// assert_eq!(tags, b.clone_tags()); |
| pub fn clone_tags(&self) -> HashSet<&'a str> { |
| self.tags.clone() |
| } |
| |
| /// Sets the tags whose contents will be completely removed from the output. |
| /// |
| /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause |
| /// a panic. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::Builder; |
| /// use maplit::hashset; |
| /// |
| /// # fn main() { |
| /// let tag_blacklist = hashset!["script", "style"]; |
| /// let a = Builder::new() |
| /// .clean_content_tags(tag_blacklist) |
| /// .clean("<script>alert('hello')</script><style>a { background: #fff }</style>") |
| /// .to_string(); |
| /// assert_eq!(a, ""); |
| /// # } |
| /// |
| /// # Defaults |
| /// |
| /// ```notest |
| /// script, style |
| /// ``` |
| pub fn clean_content_tags(&mut self, value: HashSet<&'a str>) -> &mut Self { |
| self.clean_content_tags = value; |
| self |
| } |
| |
| /// Add additonal blacklisted clean-content tags without overwriting old ones. |
| /// |
| /// Does nothing if the tag is already there. |
| /// |
| /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause |
| /// a panic. |
| /// |
| /// # Examples |
| /// |
| /// let a = ammonia::Builder::default() |
| /// .add_clean_content_tags(&["my-tag"]) |
| /// .clean("<my-tag>test</my-tag><span>mess</span>").to_string(); |
| /// assert_eq!("<span>mess</span>", a); |
| pub fn add_clean_content_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>( |
| &mut self, |
| it: I, |
| ) -> &mut Self { |
| self.clean_content_tags |
| .extend(it.into_iter().map(Borrow::borrow)); |
| self |
| } |
| |
| /// Remove already-blacklisted clean-content tags. |
| /// |
| /// Does nothing if the tags aren't blacklisted. |
| /// |
| /// # Examples |
| /// use ammonia::Builder; |
| /// use maplit::hashset; |
| /// |
| /// # fn main() { |
| /// let tag_blacklist = hashset!["script"]; |
| /// let a = ammonia::Builder::default() |
| /// .clean_content_tags(tag_blacklist) |
| /// .rm_clean_content_tags(&["script"]) |
| /// .clean("<script>XSS</script>").to_string(); |
| /// assert_eq!("XSS", a); |
| /// # } |
| pub fn rm_clean_content_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>( |
| &mut self, |
| it: I, |
| ) -> &mut Self { |
| for i in it { |
| self.clean_content_tags.remove(i.borrow()); |
| } |
| self |
| } |
| |
| /// Returns a copy of the set of blacklisted clean-content tags. |
| /// |
| /// # Examples |
| /// # use maplit::hashset; |
| /// |
| /// let tags = hashset!["my-tag-1", "my-tag-2"]; |
| /// |
| /// let mut b = ammonia::Builder::default(); |
| /// b.clean_content_tags(Clone::clone(&tags)); |
| /// assert_eq!(tags, b.clone_clean_content_tags()); |
| pub fn clone_clean_content_tags(&self) -> HashSet<&'a str> { |
| self.clean_content_tags.clone() |
| } |
| |
| /// Sets the HTML attributes that are allowed on specific tags. |
| /// |
| /// The value is structured as a map from tag names to a set of attribute names. |
| /// |
| /// If a tag is not itself whitelisted, adding entries to this map will do nothing. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::Builder; |
| /// use maplit::{hashmap, hashset}; |
| /// |
| /// # fn main() { |
| /// let tags = hashset!["my-tag"]; |
| /// let tag_attributes = hashmap![ |
| /// "my-tag" => hashset!["val"] |
| /// ]; |
| /// let a = Builder::new().tags(tags).tag_attributes(tag_attributes) |
| /// .clean("<my-tag val=1>") |
| /// .to_string(); |
| /// assert_eq!(a, "<my-tag val=\"1\"></my-tag>"); |
| /// # } |
| /// |
| /// # Defaults |
| /// |
| /// ```notest |
| /// a => |
| /// href, hreflang |
| /// bdo => |
| /// dir |
| /// blockquote => |
| /// cite |
| /// col => |
| /// align, char, charoff, span |
| /// colgroup => |
| /// align, char, charoff, span |
| /// del => |
| /// cite, datetime |
| /// hr => |
| /// align, size, width |
| /// img => |
| /// align, alt, height, src, width |
| /// ins => |
| /// cite, datetime |
| /// ol => |
| /// start |
| /// q => |
| /// cite |
| /// table => |
| /// align, char, charoff, summary |
| /// tbody => |
| /// align, char, charoff |
| /// td => |
| /// align, char, charoff, colspan, headers, rowspan |
| /// tfoot => |
| /// align, char, charoff |
| /// th => |
| /// align, char, charoff, colspan, headers, rowspan, scope |
| /// thead => |
| /// align, char, charoff |
| /// tr => |
| /// align, char, charoff |
| /// ``` |
| pub fn tag_attributes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self { |
| self.tag_attributes = value; |
| self |
| } |
| |
| /// Add additonal whitelisted tag-specific attributes without overwriting old ones. |
| /// |
| /// # Examples |
| /// |
| /// let a = ammonia::Builder::default() |
| /// .add_tags(&["my-tag"]) |
| /// .add_tag_attributes("my-tag", &["my-attr"]) |
| /// .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string(); |
| /// assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a); |
| pub fn add_tag_attributes< |
| T: 'a + ?Sized + Borrow<str>, |
| U: 'a + ?Sized + Borrow<str>, |
| I: IntoIter<Item = &'a T>, |
| >( |
| &mut self, |
| tag: &'a U, |
| it: I, |
| ) -> &mut Self { |
| self.tag_attributes |
| .entry(tag.borrow()) |
| .or_default() |
| .extend(it.into_iter().map(Borrow::borrow)); |
| self |
| } |
| |
| /// Remove already-whitelisted tag-specific attributes. |
| /// |
| /// Does nothing if the attribute is already gone. |
| /// |
| /// # Examples |
| /// |
| /// let a = ammonia::Builder::default() |
| /// .rm_tag_attributes("a", &["href"]) |
| /// .clean("<a href=\"/\"></a>").to_string(); |
| /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a); |
| pub fn rm_tag_attributes< |
| 'b, |
| 'c, |
| T: 'b + ?Sized + Borrow<str>, |
| U: 'c + ?Sized + Borrow<str>, |
| I: IntoIter<Item = &'b T>, |
| >( |
| &mut self, |
| tag: &'c U, |
| it: I, |
| ) -> &mut Self { |
| if let Some(tag) = self.tag_attributes.get_mut(tag.borrow()) { |
| for i in it { |
| tag.remove(i.borrow()); |
| } |
| } |
| self |
| } |
| |
| /// Returns a copy of the set of whitelisted tag-specific attributes. |
| /// |
| /// # Examples |
| /// use maplit::{hashmap, hashset}; |
| /// |
| /// let tag_attributes = hashmap![ |
| /// "my-tag" => hashset!["my-attr-1", "my-attr-2"] |
| /// ]; |
| /// |
| /// let mut b = ammonia::Builder::default(); |
| /// b.tag_attributes(Clone::clone(&tag_attributes)); |
| /// assert_eq!(tag_attributes, b.clone_tag_attributes()); |
| pub fn clone_tag_attributes(&self) -> HashMap<&'a str, HashSet<&'a str>> { |
| self.tag_attributes.clone() |
| } |
| |
| /// Sets the values of HTML attributes that are allowed on specific tags. |
| /// |
| /// The value is structured as a map from tag names to a map from attribute names to a set of |
| /// attribute values. |
| /// |
| /// If a tag is not itself whitelisted, adding entries to this map will do nothing. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::Builder; |
| /// use maplit::{hashmap, hashset}; |
| /// |
| /// # fn main() { |
| /// let tags = hashset!["my-tag"]; |
| /// let tag_attribute_values = hashmap![ |
| /// "my-tag" => hashmap![ |
| /// "my-attr" => hashset!["val"], |
| /// ], |
| /// ]; |
| /// let a = Builder::new().tags(tags).tag_attribute_values(tag_attribute_values) |
| /// .clean("<my-tag my-attr=val>") |
| /// .to_string(); |
| /// assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>"); |
| /// # } |
| /// |
| /// # Defaults |
| /// |
| /// None. |
| pub fn tag_attribute_values( |
| &mut self, |
| value: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>, |
| ) -> &mut Self { |
| self.tag_attribute_values = value; |
| self |
| } |
| |
| /// Add additonal whitelisted tag-specific attribute values without overwriting old ones. |
| /// |
| /// # Examples |
| /// |
| /// let a = ammonia::Builder::default() |
| /// .add_tags(&["my-tag"]) |
| /// .add_tag_attribute_values("my-tag", "my-attr", &[""]) |
| /// .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string(); |
| /// assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a); |
| pub fn add_tag_attribute_values< |
| T: 'a + ?Sized + Borrow<str>, |
| U: 'a + ?Sized + Borrow<str>, |
| V: 'a + ?Sized + Borrow<str>, |
| I: IntoIter<Item = &'a T>, |
| >( |
| &mut self, |
| tag: &'a U, |
| attribute: &'a V, |
| it: I, |
| ) -> &mut Self { |
| self.tag_attribute_values |
| .entry(tag.borrow()) |
| .or_default() |
| .entry(attribute.borrow()) |
| .or_default() |
| .extend(it.into_iter().map(Borrow::borrow)); |
| |
| self |
| } |
| |
| /// Remove already-whitelisted tag-specific attribute values. |
| /// |
| /// Does nothing if the attribute or the value is already gone. |
| /// |
| /// # Examples |
| /// |
| /// let a = ammonia::Builder::default() |
| /// .rm_tag_attributes("a", &["href"]) |
| /// .add_tag_attribute_values("a", "href", &["/"]) |
| /// .rm_tag_attribute_values("a", "href", &["/"]) |
| /// .clean("<a href=\"/\"></a>").to_string(); |
| /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a); |
| pub fn rm_tag_attribute_values< |
| 'b, |
| 'c, |
| T: 'b + ?Sized + Borrow<str>, |
| U: 'c + ?Sized + Borrow<str>, |
| V: 'c + ?Sized + Borrow<str>, |
| I: IntoIter<Item = &'b T>, |
| >( |
| &mut self, |
| tag: &'c U, |
| attribute: &'c V, |
| it: I, |
| ) -> &mut Self { |
| if let Some(attrs) = self |
| .tag_attribute_values |
| .get_mut(tag.borrow()) |
| .and_then(|map| map.get_mut(attribute.borrow())) |
| { |
| for i in it { |
| attrs.remove(i.borrow()); |
| } |
| } |
| self |
| } |
| |
| /// Returns a copy of the set of whitelisted tag-specific attribute values. |
| /// |
| /// # Examples |
| /// |
| /// use maplit::{hashmap, hashset}; |
| /// |
| /// let attribute_values = hashmap![ |
| /// "my-attr-1" => hashset!["foo"], |
| /// "my-attr-2" => hashset!["baz", "bar"], |
| /// ]; |
| /// let tag_attribute_values = hashmap![ |
| /// "my-tag" => attribute_values |
| /// ]; |
| /// |
| /// let mut b = ammonia::Builder::default(); |
| /// b.tag_attribute_values(Clone::clone(&tag_attribute_values)); |
| /// assert_eq!(tag_attribute_values, b.clone_tag_attribute_values()); |
| pub fn clone_tag_attribute_values( |
| &self, |
| ) -> HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>> { |
| self.tag_attribute_values.clone() |
| } |
| |
| /// Sets the values of HTML attributes that are to be set on specific tags. |
| /// |
| /// The value is structured as a map from tag names to a map from attribute names to an |
| /// attribute value. |
| /// |
| /// If a tag is not itself whitelisted, adding entries to this map will do nothing. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::Builder; |
| /// use maplit::{hashmap, hashset}; |
| /// |
| /// # fn main() { |
| /// let tags = hashset!["my-tag"]; |
| /// let set_tag_attribute_values = hashmap![ |
| /// "my-tag" => hashmap![ |
| /// "my-attr" => "val", |
| /// ], |
| /// ]; |
| /// let a = Builder::new().tags(tags).set_tag_attribute_values(set_tag_attribute_values) |
| /// .clean("<my-tag>") |
| /// .to_string(); |
| /// assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>"); |
| /// # } |
| /// |
| /// # Defaults |
| /// |
| /// None. |
| pub fn set_tag_attribute_values( |
| &mut self, |
| value: HashMap<&'a str, HashMap<&'a str, &'a str>>, |
| ) -> &mut Self { |
| self.set_tag_attribute_values = value; |
| self |
| } |
| |
| /// Add an attribute value to set on a specific element. |
| /// |
| /// # Examples |
| /// |
| /// let a = ammonia::Builder::default() |
| /// .add_tags(&["my-tag"]) |
| /// .set_tag_attribute_value("my-tag", "my-attr", "val") |
| /// .clean("<my-tag>test</my-tag> <span>mess</span>").to_string(); |
| /// assert_eq!("<my-tag my-attr=\"val\">test</my-tag> <span>mess</span>", a); |
| pub fn set_tag_attribute_value< |
| T: 'a + ?Sized + Borrow<str>, |
| A: 'a + ?Sized + Borrow<str>, |
| V: 'a + ?Sized + Borrow<str>, |
| >( |
| &mut self, |
| tag: &'a T, |
| attribute: &'a A, |
| value: &'a V, |
| ) -> &mut Self { |
| self.set_tag_attribute_values |
| .entry(tag.borrow()) |
| .or_default() |
| .insert(attribute.borrow(), value.borrow()); |
| self |
| } |
| |
| /// Remove existing tag-specific attribute values to be set. |
| /// |
| /// Does nothing if the attribute is already gone. |
| /// |
| /// # Examples |
| /// |
| /// let a = ammonia::Builder::default() |
| /// // this does nothing, since no value is set for this tag attribute yet |
| /// .rm_set_tag_attribute_value("a", "target") |
| /// .set_tag_attribute_value("a", "target", "_blank") |
| /// .rm_set_tag_attribute_value("a", "target") |
| /// .clean("<a href=\"/\"></a>").to_string(); |
| /// assert_eq!("<a href=\"/\" rel=\"noopener noreferrer\"></a>", a); |
| pub fn rm_set_tag_attribute_value< |
| T: 'a + ?Sized + Borrow<str>, |
| A: 'a + ?Sized + Borrow<str>, |
| >( |
| &mut self, |
| tag: &'a T, |
| attribute: &'a A, |
| ) -> &mut Self { |
| if let Some(attributes) = self.set_tag_attribute_values.get_mut(tag.borrow()) { |
| attributes.remove(attribute.borrow()); |
| } |
| self |
| } |
| |
| /// Returns the value that will be set for the attribute on the element, if any. |
| /// |
| /// # Examples |
| /// |
| /// let mut b = ammonia::Builder::default(); |
| /// b.set_tag_attribute_value("a", "target", "_blank"); |
| /// let value = b.get_set_tag_attribute_value("a", "target"); |
| /// assert_eq!(value, Some("_blank")); |
| pub fn get_set_tag_attribute_value< |
| T: 'a + ?Sized + Borrow<str>, |
| A: 'a + ?Sized + Borrow<str>, |
| >( |
| &self, |
| tag: &'a T, |
| attribute: &'a A, |
| ) -> Option<&'a str> { |
| self.set_tag_attribute_values |
| .get(tag.borrow()) |
| .and_then(|map| map.get(attribute.borrow())) |
| .copied() |
| } |
| |
| /// Returns a copy of the set of tag-specific attribute values to be set. |
| /// |
| /// # Examples |
| /// |
| /// use maplit::{hashmap, hashset}; |
| /// |
| /// let attribute_values = hashmap![ |
| /// "my-attr-1" => "foo", |
| /// "my-attr-2" => "bar", |
| /// ]; |
| /// let set_tag_attribute_values = hashmap![ |
| /// "my-tag" => attribute_values, |
| /// ]; |
| /// |
| /// let mut b = ammonia::Builder::default(); |
| /// b.set_tag_attribute_values(Clone::clone(&set_tag_attribute_values)); |
| /// assert_eq!(set_tag_attribute_values, b.clone_set_tag_attribute_values()); |
| pub fn clone_set_tag_attribute_values(&self) -> HashMap<&'a str, HashMap<&'a str, &'a str>> { |
| self.set_tag_attribute_values.clone() |
| } |
| |
| /// Sets the prefix of attributes that are allowed on any tag. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::Builder; |
| /// use maplit::hashset; |
| /// |
| /// # fn main() { |
| /// let prefixes = hashset!["data-"]; |
| /// let a = Builder::new() |
| /// .generic_attribute_prefixes(prefixes) |
| /// .clean("<b data-val=1>") |
| /// .to_string(); |
| /// assert_eq!(a, "<b data-val=\"1\"></b>"); |
| /// # } |
| /// |
| /// # Defaults |
| /// |
| /// No attribute prefixes are allowed by default. |
| pub fn generic_attribute_prefixes(&mut self, value: HashSet<&'a str>) -> &mut Self { |
| self.generic_attribute_prefixes = Some(value); |
| self |
| } |
| |
| /// Add additional whitelisted attribute prefix without overwriting old ones. |
| /// |
| /// # Examples |
| /// |
| /// let a = ammonia::Builder::default() |
| /// .add_generic_attribute_prefixes(&["my-"]) |
| /// .clean("<span my-attr>mess</span>").to_string(); |
| /// assert_eq!("<span my-attr=\"\">mess</span>", a); |
| pub fn add_generic_attribute_prefixes< |
| T: 'a + ?Sized + Borrow<str>, |
| I: IntoIter<Item = &'a T>, |
| >( |
| &mut self, |
| it: I, |
| ) -> &mut Self { |
| self.generic_attribute_prefixes |
| .get_or_insert_with(HashSet::new) |
| .extend(it.into_iter().map(Borrow::borrow)); |
| self |
| } |
| |
| /// Remove already-whitelisted attribute prefixes. |
| /// |
| /// Does nothing if the attribute prefix is already gone. |
| /// |
| /// # Examples |
| /// |
| /// let a = ammonia::Builder::default() |
| /// .add_generic_attribute_prefixes(&["data-", "code-"]) |
| /// .rm_generic_attribute_prefixes(&["data-"]) |
| /// .clean("<span code-test=\"foo\" data-test=\"cool\"></span>").to_string(); |
| /// assert_eq!("<span code-test=\"foo\"></span>", a); |
| pub fn rm_generic_attribute_prefixes< |
| 'b, |
| T: 'b + ?Sized + Borrow<str>, |
| I: IntoIter<Item = &'b T>, |
| >( |
| &mut self, |
| it: I, |
| ) -> &mut Self { |
| if let Some(true) = self.generic_attribute_prefixes.as_mut().map(|prefixes| { |
| for i in it { |
| let _ = prefixes.remove(i.borrow()); |
| } |
| prefixes.is_empty() |
| }) { |
| self.generic_attribute_prefixes = None; |
| } |
| self |
| } |
| |
| /// Returns a copy of the set of whitelisted attribute prefixes. |
| /// |
| /// # Examples |
| /// |
| /// use maplit::hashset; |
| /// |
| /// let generic_attribute_prefixes = hashset!["my-prfx-1-", "my-prfx-2-"]; |
| /// |
| /// let mut b = ammonia::Builder::default(); |
| /// b.generic_attribute_prefixes(Clone::clone(&generic_attribute_prefixes)); |
| /// assert_eq!(Some(generic_attribute_prefixes), b.clone_generic_attribute_prefixes()); |
| pub fn clone_generic_attribute_prefixes(&self) -> Option<HashSet<&'a str>> { |
| self.generic_attribute_prefixes.clone() |
| } |
| |
| /// Sets the attributes that are allowed on any tag. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::Builder; |
| /// use maplit::hashset; |
| /// |
| /// # fn main() { |
| /// let attributes = hashset!["data-val"]; |
| /// let a = Builder::new() |
| /// .generic_attributes(attributes) |
| /// .clean("<b data-val=1>") |
| /// .to_string(); |
| /// assert_eq!(a, "<b data-val=\"1\"></b>"); |
| /// # } |
| /// |
| /// # Defaults |
| /// |
| /// ```notest |
| /// lang, title |
| /// ``` |
| pub fn generic_attributes(&mut self, value: HashSet<&'a str>) -> &mut Self { |
| self.generic_attributes = value; |
| self |
| } |
| |
| /// Add additonal whitelisted attributes without overwriting old ones. |
| /// |
| /// # Examples |
| /// |
| /// let a = ammonia::Builder::default() |
| /// .add_generic_attributes(&["my-attr"]) |
| /// .clean("<span my-attr>mess</span>").to_string(); |
| /// assert_eq!("<span my-attr=\"\">mess</span>", a); |
| pub fn add_generic_attributes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>( |
| &mut self, |
| it: I, |
| ) -> &mut Self { |
| self.generic_attributes |
| .extend(it.into_iter().map(Borrow::borrow)); |
| self |
| } |
| |
| /// Remove already-whitelisted attributes. |
| /// |
| /// Does nothing if the attribute is already gone. |
| /// |
| /// # Examples |
| /// |
| /// let a = ammonia::Builder::default() |
| /// .rm_generic_attributes(&["title"]) |
| /// .clean("<span title=\"cool\"></span>").to_string(); |
| /// assert_eq!("<span></span>", a); |
| pub fn rm_generic_attributes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>( |
| &mut self, |
| it: I, |
| ) -> &mut Self { |
| for i in it { |
| self.generic_attributes.remove(i.borrow()); |
| } |
| self |
| } |
| |
| /// Returns a copy of the set of whitelisted attributes. |
| /// |
| /// # Examples |
| /// |
| /// use maplit::hashset; |
| /// |
| /// let generic_attributes = hashset!["my-attr-1", "my-attr-2"]; |
| /// |
| /// let mut b = ammonia::Builder::default(); |
| /// b.generic_attributes(Clone::clone(&generic_attributes)); |
| /// assert_eq!(generic_attributes, b.clone_generic_attributes()); |
| pub fn clone_generic_attributes(&self) -> HashSet<&'a str> { |
| self.generic_attributes.clone() |
| } |
| |
| /// Sets the URL schemes permitted on `href` and `src` attributes. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::Builder; |
| /// use maplit::hashset; |
| /// |
| /// # fn main() { |
| /// let url_schemes = hashset![ |
| /// "http", "https", "mailto", "magnet" |
| /// ]; |
| /// let a = Builder::new().url_schemes(url_schemes) |
| /// .clean("<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&xl=0&dn=zero_len.fil&xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\">zero-length file</a>") |
| /// .to_string(); |
| /// |
| /// // See `link_rel` for information on the rel="noopener noreferrer" attribute |
| /// // in the cleaned HTML. |
| /// assert_eq!(a, |
| /// "<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&xl=0&dn=zero_len.fil&xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\" rel=\"noopener noreferrer\">zero-length file</a>"); |
| /// # } |
| /// |
| /// # Defaults |
| /// |
| /// ```notest |
| /// bitcoin, ftp, ftps, geo, http, https, im, irc, |
| /// ircs, magnet, mailto, mms, mx, news, nntp, |
| /// openpgp4fpr, sip, sms, smsto, ssh, tel, url, |
| /// webcal, wtai, xmpp |
| /// ``` |
| pub fn url_schemes(&mut self, value: HashSet<&'a str>) -> &mut Self { |
| self.url_schemes = value; |
| self |
| } |
| |
| /// Add additonal whitelisted URL schemes without overwriting old ones. |
| /// |
| /// # Examples |
| /// |
| /// let a = ammonia::Builder::default() |
| /// .add_url_schemes(&["my-scheme"]) |
| /// .clean("<a href=my-scheme:home>mess</span>").to_string(); |
| /// assert_eq!("<a href=\"my-scheme:home\" rel=\"noopener noreferrer\">mess</a>", a); |
| pub fn add_url_schemes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>( |
| &mut self, |
| it: I, |
| ) -> &mut Self { |
| self.url_schemes.extend(it.into_iter().map(Borrow::borrow)); |
| self |
| } |
| |
| /// Remove already-whitelisted attributes. |
| /// |
| /// Does nothing if the attribute is already gone. |
| /// |
| /// # Examples |
| /// |
| /// let a = ammonia::Builder::default() |
| /// .rm_url_schemes(&["ftp"]) |
| /// .clean("<a href=\"ftp://ftp.mozilla.org/\"></a>").to_string(); |
| /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a); |
| pub fn rm_url_schemes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>( |
| &mut self, |
| it: I, |
| ) -> &mut Self { |
| for i in it { |
| self.url_schemes.remove(i.borrow()); |
| } |
| self |
| } |
| |
| /// Returns a copy of the set of whitelisted URL schemes. |
| /// |
| /// # Examples |
| /// use maplit::hashset; |
| /// |
| /// let url_schemes = hashset!["my-scheme-1", "my-scheme-2"]; |
| /// |
| /// let mut b = ammonia::Builder::default(); |
| /// b.url_schemes(Clone::clone(&url_schemes)); |
| /// assert_eq!(url_schemes, b.clone_url_schemes()); |
| pub fn clone_url_schemes(&self) -> HashSet<&'a str> { |
| self.url_schemes.clone() |
| } |
| |
| /// Configures the behavior for relative URLs: pass-through, resolve-with-base, or deny. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::{Builder, UrlRelative}; |
| /// |
| /// let a = Builder::new().url_relative(UrlRelative::PassThrough) |
| /// .clean("<a href=/>Home</a>") |
| /// .to_string(); |
| /// |
| /// // See `link_rel` for information on the rel="noopener noreferrer" attribute |
| /// // in the cleaned HTML. |
| /// assert_eq!( |
| /// a, |
| /// "<a href=\"/\" rel=\"noopener noreferrer\">Home</a>"); |
| /// |
| /// # Defaults |
| /// |
| /// ```notest |
| /// UrlRelative::PassThrough |
| /// ``` |
| pub fn url_relative(&mut self, value: UrlRelative<'a>) -> &mut Self { |
| self.url_relative = value; |
| self |
| } |
| |
| /// Allows rewriting of all attributes using a callback. |
| /// |
| /// The callback takes name of the element, attribute and its value. |
| /// Returns `None` to remove the attribute, or a value to use. |
| /// |
| /// Rewriting of attributes with URLs is done before `url_relative()`. |
| /// |
| /// # Panics |
| /// |
| /// If more than one callback is set. |
| /// |
| /// # Examples |
| /// |
| /// ```rust |
| /// use ammonia::Builder; |
| /// let a = Builder::new() |
| /// .attribute_filter(|element, attribute, value| { |
| /// match (element, attribute) { |
| /// ("img", "src") => None, |
| /// _ => Some(value.into()) |
| /// } |
| /// }) |
| /// .link_rel(None) |
| /// .clean("<a href=/><img alt=Home src=foo></a>") |
| /// .to_string(); |
| /// assert_eq!(a, |
| /// r#"<a href="/"><img alt="Home"></a>"#); |
| /// ``` |
| pub fn attribute_filter<'cb, CallbackFn>(&mut self, callback: CallbackFn) -> &mut Self |
| where |
| CallbackFn: for<'u> Fn(&str, &str, &'u str) -> Option<Cow<'u, str>> + Send + Sync + 'static, |
| { |
| assert!( |
| self.attribute_filter.is_none(), |
| "attribute_filter can be set only once" |
| ); |
| self.attribute_filter = Some(Box::new(callback)); |
| self |
| } |
| |
| /// Returns `true` if the relative URL resolver is set to `Deny`. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::{Builder, UrlRelative}; |
| /// let mut a = Builder::default(); |
| /// a.url_relative(UrlRelative::Deny); |
| /// assert!(a.is_url_relative_deny()); |
| /// a.url_relative(UrlRelative::PassThrough); |
| /// assert!(!a.is_url_relative_deny()); |
| pub fn is_url_relative_deny(&self) -> bool { |
| matches!(self.url_relative, UrlRelative::Deny) |
| } |
| |
| /// Returns `true` if the relative URL resolver is set to `PassThrough`. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::{Builder, UrlRelative}; |
| /// let mut a = Builder::default(); |
| /// a.url_relative(UrlRelative::Deny); |
| /// assert!(!a.is_url_relative_pass_through()); |
| /// a.url_relative(UrlRelative::PassThrough); |
| /// assert!(a.is_url_relative_pass_through()); |
| pub fn is_url_relative_pass_through(&self) -> bool { |
| matches!(self.url_relative, UrlRelative::PassThrough) |
| } |
| |
| /// Returns `true` if the relative URL resolver is set to `Custom`. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::{Builder, UrlRelative}; |
| /// use std::borrow::Cow; |
| /// fn test(a: &str) -> Option<Cow<str>> { None } |
| /// # fn main() { |
| /// let mut a = Builder::default(); |
| /// a.url_relative(UrlRelative::Custom(Box::new(test))); |
| /// assert!(a.is_url_relative_custom()); |
| /// a.url_relative(UrlRelative::PassThrough); |
| /// assert!(!a.is_url_relative_custom()); |
| /// a.url_relative(UrlRelative::Deny); |
| /// assert!(!a.is_url_relative_custom()); |
| /// # } |
| pub fn is_url_relative_custom(&self) -> bool { |
| matches!(self.url_relative, UrlRelative::Custom(_)) |
| } |
| |
| /// Configures a `rel` attribute that will be added on links. |
| /// |
| /// If `rel` is in the generic or tag attributes, this must be set to `None`. |
| /// Common `rel` values to include: |
| /// |
| /// * `noopener`: This prevents [a particular type of XSS attack], |
| /// and should usually be turned on for untrusted HTML. |
| /// * `noreferrer`: This prevents the browser from [sending the source URL] |
| /// to the website that is linked to. |
| /// * `nofollow`: This prevents search engines from [using this link for |
| /// ranking], which disincentivizes spammers. |
| /// |
| /// To turn on rel-insertion, call this function with a space-separated list. |
| /// Ammonia does not parse rel-attributes; |
| /// it just puts the given string into the attribute directly. |
| /// |
| /// [a particular type of XSS attack]: https://mathiasbynens.github.io/rel-noopener/ |
| /// [sending the source URL]: https://en.wikipedia.org/wiki/HTTP_referer |
| /// [using this link for ranking]: https://en.wikipedia.org/wiki/Nofollow |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::Builder; |
| /// |
| /// let a = Builder::new().link_rel(None) |
| /// .clean("<a href=https://rust-lang.org/>Rust</a>") |
| /// .to_string(); |
| /// assert_eq!( |
| /// a, |
| /// "<a href=\"https://rust-lang.org/\">Rust</a>"); |
| /// |
| /// # Defaults |
| /// |
| /// ```notest |
| /// Some("noopener noreferrer") |
| /// ``` |
| pub fn link_rel(&mut self, value: Option<&'a str>) -> &mut Self { |
| self.link_rel = value; |
| self |
| } |
| |
| /// Returns the settings for links' `rel` attribute, if one is set. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::{Builder, UrlRelative}; |
| /// let mut a = Builder::default(); |
| /// a.link_rel(Some("a b")); |
| /// assert_eq!(a.get_link_rel(), Some("a b")); |
| pub fn get_link_rel(&self) -> Option<&str> { |
| self.link_rel |
| } |
| |
| /// Sets the CSS classes that are allowed on specific tags. |
| /// |
| /// The values is structured as a map from tag names to a set of class names. |
| /// |
| /// If the `class` attribute is itself whitelisted for a tag, then adding entries to |
| /// this map will cause a panic. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::Builder; |
| /// use maplit::{hashmap, hashset}; |
| /// |
| /// # fn main() { |
| /// let allowed_classes = hashmap![ |
| /// "code" => hashset!["rs", "ex", "c", "cxx", "js"] |
| /// ]; |
| /// let a = Builder::new() |
| /// .allowed_classes(allowed_classes) |
| /// .clean("<code class=rs>fn main() {}</code>") |
| /// .to_string(); |
| /// assert_eq!( |
| /// a, |
| /// "<code class=\"rs\">fn main() {}</code>"); |
| /// # } |
| /// |
| /// # Defaults |
| /// |
| /// The set of allowed classes is empty by default. |
| pub fn allowed_classes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self { |
| self.allowed_classes = value; |
| self |
| } |
| |
| /// Add additonal whitelisted classes without overwriting old ones. |
| /// |
| /// # Examples |
| /// |
| /// let a = ammonia::Builder::default() |
| /// .add_allowed_classes("a", &["onebox"]) |
| /// .clean("<a href=/ class=onebox>mess</span>").to_string(); |
| /// assert_eq!("<a href=\"/\" class=\"onebox\" rel=\"noopener noreferrer\">mess</a>", a); |
| pub fn add_allowed_classes< |
| T: 'a + ?Sized + Borrow<str>, |
| U: 'a + ?Sized + Borrow<str>, |
| I: IntoIter<Item = &'a T>, |
| >( |
| &mut self, |
| tag: &'a U, |
| it: I, |
| ) -> &mut Self { |
| self.allowed_classes |
| .entry(tag.borrow()) |
| .or_default() |
| .extend(it.into_iter().map(Borrow::borrow)); |
| self |
| } |
| |
| /// Remove already-whitelisted attributes. |
| /// |
| /// Does nothing if the attribute is already gone. |
| /// |
| /// # Examples |
| /// |
| /// let a = ammonia::Builder::default() |
| /// .add_allowed_classes("span", &["active"]) |
| /// .rm_allowed_classes("span", &["active"]) |
| /// .clean("<span class=active>").to_string(); |
| /// assert_eq!("<span class=\"\"></span>", a); |
| pub fn rm_allowed_classes< |
| 'b, |
| 'c, |
| T: 'b + ?Sized + Borrow<str>, |
| U: 'c + ?Sized + Borrow<str>, |
| I: IntoIter<Item = &'b T>, |
| >( |
| &mut self, |
| tag: &'c U, |
| it: I, |
| ) -> &mut Self { |
| if let Some(tag) = self.allowed_classes.get_mut(tag.borrow()) { |
| for i in it { |
| tag.remove(i.borrow()); |
| } |
| } |
| self |
| } |
| |
| /// Returns a copy of the set of whitelisted class attributes. |
| /// |
| /// # Examples |
| /// |
| /// use maplit::{hashmap, hashset}; |
| /// |
| /// let allowed_classes = hashmap![ |
| /// "my-tag" => hashset!["my-class-1", "my-class-2"] |
| /// ]; |
| /// |
| /// let mut b = ammonia::Builder::default(); |
| /// b.allowed_classes(Clone::clone(&allowed_classes)); |
| /// assert_eq!(allowed_classes, b.clone_allowed_classes()); |
| pub fn clone_allowed_classes(&self) -> HashMap<&'a str, HashSet<&'a str>> { |
| self.allowed_classes.clone() |
| } |
| |
| /// Configures the handling of HTML comments. |
| /// |
| /// If this option is false, comments will be preserved. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::Builder; |
| /// |
| /// let a = Builder::new().strip_comments(false) |
| /// .clean("<!-- yes -->") |
| /// .to_string(); |
| /// assert_eq!( |
| /// a, |
| /// "<!-- yes -->"); |
| /// |
| /// # Defaults |
| /// |
| /// `true` |
| pub fn strip_comments(&mut self, value: bool) -> &mut Self { |
| self.strip_comments = value; |
| self |
| } |
| |
| /// Returns `true` if comment stripping is turned on. |
| /// |
| /// # Examples |
| /// |
| /// let mut a = ammonia::Builder::new(); |
| /// a.strip_comments(true); |
| /// assert!(a.will_strip_comments()); |
| /// a.strip_comments(false); |
| /// assert!(!a.will_strip_comments()); |
| pub fn will_strip_comments(&self) -> bool { |
| self.strip_comments |
| } |
| |
| /// Prefixes all "id" attribute values with a given string. Note that the tag and |
| /// attribute themselves must still be whitelisted. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::Builder; |
| /// use maplit::hashset; |
| /// |
| /// # fn main() { |
| /// let attributes = hashset!["id"]; |
| /// let a = Builder::new() |
| /// .generic_attributes(attributes) |
| /// .id_prefix(Some("safe-")) |
| /// .clean("<b id=42>") |
| /// .to_string(); |
| /// assert_eq!(a, "<b id=\"safe-42\"></b>"); |
| /// # } |
| |
| /// |
| /// # Defaults |
| /// |
| /// `None` |
| pub fn id_prefix(&mut self, value: Option<&'a str>) -> &mut Self { |
| self.id_prefix = value; |
| self |
| } |
| |
| /// Constructs a [`Builder`] instance configured with the [default options]. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::{Builder, Url, UrlRelative}; |
| /// # use std::error::Error; |
| /// |
| /// # fn do_main() -> Result<(), Box<dyn Error>> { |
| /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>."; |
| /// let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>."; |
| /// |
| /// let result = Builder::new() // <-- |
| /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?)) |
| /// .clean(input) |
| /// .to_string(); |
| /// assert_eq!(result, output); |
| /// # Ok(()) |
| /// # } |
| /// # fn main() { do_main().unwrap() } |
| /// |
| /// [default options]: fn.clean.html |
| /// [`Builder`]: struct.Builder.html |
| pub fn new() -> Self { |
| Self::default() |
| } |
| |
| /// Constructs a [`Builder`] instance configured with no allowed tags. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::{Builder, Url, UrlRelative}; |
| /// # use std::error::Error; |
| /// |
| /// # fn do_main() -> Result<(), Box<dyn Error>> { |
| /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>empty()</code> function</a>."; |
| /// let output = "This is an Ammonia example using the empty() function."; |
| /// |
| /// let result = Builder::empty() // <-- |
| /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?)) |
| /// .clean(input) |
| /// .to_string(); |
| /// assert_eq!(result, output); |
| /// # Ok(()) |
| /// # } |
| /// # fn main() { do_main().unwrap() } |
| /// |
| /// [default options]: fn.clean.html |
| /// [`Builder`]: struct.Builder.html |
| pub fn empty() -> Self { |
| Self { |
| tags: hashset![], |
| ..Self::default() |
| } |
| } |
| |
| /// Sanitizes an HTML fragment in a string according to the configured options. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::{Builder, Url, UrlRelative}; |
| /// # use std::error::Error; |
| /// |
| /// # fn do_main() -> Result<(), Box<dyn Error>> { |
| /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>."; |
| /// let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>."; |
| /// |
| /// let result = Builder::new() |
| /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?)) |
| /// .clean(input) |
| /// .to_string(); // <-- |
| /// assert_eq!(result, output); |
| /// # Ok(()) |
| /// # } |
| /// # fn main() { do_main().unwrap() } |
| pub fn clean(&self, src: &str) -> Document { |
| let parser = Self::make_parser(); |
| let dom = parser.one(src); |
| self.clean_dom(dom) |
| } |
| |
| /// Sanitizes an HTML fragment from a reader according to the configured options. |
| /// |
| /// The input should be in UTF-8 encoding, otherwise the decoding is lossy, just |
| /// like when using [`String::from_utf8_lossy`]. |
| /// |
| /// To avoid consuming the reader, a mutable reference can be passed to this method. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::Builder; |
| /// # use std::error::Error; |
| /// |
| /// # fn do_main() -> Result<(), Box<dyn Error>> { |
| /// let a = Builder::new() |
| /// .clean_from_reader(&b"<!-- no -->"[..])? // notice the `b` |
| /// .to_string(); |
| /// assert_eq!(a, ""); |
| /// # Ok(()) } |
| /// # fn main() { do_main().unwrap() } |
| /// |
| /// [`String::from_utf8_lossy`]: https://doc.rust-lang.org/std/string/struct.String.html#method.from_utf8_lossy |
| pub fn clean_from_reader<R>(&self, mut src: R) -> io::Result<Document> |
| where |
| R: io::Read, |
| { |
| let parser = Self::make_parser().from_utf8(); |
| let dom = parser.read_from(&mut src)?; |
| Ok(self.clean_dom(dom)) |
| } |
| |
| /// Clean a post-parsing DOM. |
| /// |
| /// This is not a public API because RcDom isn't really stable. |
| /// We want to be able to take breaking changes to html5ever itself |
| /// without having to break Ammonia's API. |
| fn clean_dom(&self, mut dom: RcDom) -> Document { |
| let mut stack = Vec::new(); |
| let mut removed = Vec::new(); |
| let link_rel = self |
| .link_rel |
| .map(|link_rel| format_tendril!("{}", link_rel)); |
| if link_rel.is_some() { |
| assert!(self.generic_attributes.get("rel").is_none()); |
| assert!(self |
| .tag_attributes |
| .get("a") |
| .and_then(|a| a.get("rel")) |
| .is_none()); |
| } |
| assert!(self.allowed_classes.is_empty() || !self.generic_attributes.contains("class")); |
| for tag_name in self.allowed_classes.keys() { |
| assert!(self |
| .tag_attributes |
| .get(tag_name) |
| .and_then(|a| a.get("class")) |
| .is_none()); |
| } |
| for tag_name in &self.clean_content_tags { |
| assert!(!self.tags.contains(tag_name)); |
| assert!(!self.tag_attributes.contains_key(tag_name)); |
| } |
| let body = { |
| let children = dom.document.children.borrow(); |
| children[0].clone() |
| }; |
| stack.extend( |
| mem::take(&mut *body.children.borrow_mut()) |
| .into_iter() |
| .rev(), |
| ); |
| // This design approach is used to prevent pathological content from producing |
| // a stack overflow. The `stack` contains to-be-cleaned nodes, while `remove`, |
| // of course, contains nodes that need to be dropped (we can't just drop them, |
| // because they could have a very deep child tree). |
| while let Some(mut node) = stack.pop() { |
| let parent = node.parent |
| .replace(None).expect("a node in the DOM will have a parent, except the root, which is not processed") |
| .upgrade().expect("a node's parent will be pointed to by its parent (or the root pointer), and will not be dropped"); |
| if self.clean_node_content(&node) { |
| removed.push(node); |
| continue; |
| } |
| let pass_clean = self.clean_child(&mut node); |
| let pass = pass_clean && self.check_expected_namespace(&parent, &node); |
| if pass { |
| self.adjust_node_attributes(&mut node, &link_rel, self.id_prefix); |
| dom.append(&parent.clone(), NodeOrText::AppendNode(node.clone())); |
| } else { |
| for sub in node.children.borrow_mut().iter_mut() { |
| sub.parent.replace(Some(Rc::downgrade(&parent))); |
| } |
| } |
| stack.extend( |
| mem::take(&mut *node.children.borrow_mut()) |
| .into_iter() |
| .rev(), |
| ); |
| if !pass { |
| removed.push(node); |
| } |
| } |
| // Now, imperatively clean up all of the child nodes. |
| // Otherwise, we could wind up with a DoS, either caused by a memory leak, |
| // or caused by a stack overflow. |
| while let Some(node) = removed.pop() { |
| removed.extend_from_slice(&mem::take(&mut *node.children.borrow_mut())[..]); |
| } |
| Document(dom) |
| } |
| |
| /// Returns `true` if a node and all its content should be removed. |
| fn clean_node_content(&self, node: &Handle) -> bool { |
| match node.data { |
| NodeData::Text { .. } |
| | NodeData::Comment { .. } |
| | NodeData::Doctype { .. } |
| | NodeData::Document |
| | NodeData::ProcessingInstruction { .. } => false, |
| NodeData::Element { ref name, .. } => self.clean_content_tags.contains(&*name.local), |
| } |
| } |
| |
| /// Remove unwanted attributes, and check if the node should be kept or not. |
| /// |
| /// The root node doesn't need cleaning because we create the root node ourselves, |
| /// and it doesn't get serialized, and ... it just exists to give the parser |
| /// a context (in this case, a div-like block context). |
| fn clean_child(&self, child: &mut Handle) -> bool { |
| match child.data { |
| NodeData::Text { .. } => true, |
| NodeData::Comment { .. } => !self.strip_comments, |
| NodeData::Doctype { .. } |
| | NodeData::Document |
| | NodeData::ProcessingInstruction { .. } => false, |
| NodeData::Element { |
| ref name, |
| ref attrs, |
| .. |
| } => { |
| if self.tags.contains(&*name.local) { |
| let attr_filter = |attr: &html5ever::Attribute| { |
| let whitelisted = self.generic_attributes.contains(&*attr.name.local) |
| || self.generic_attribute_prefixes.as_ref().map(|prefixes| { |
| prefixes.iter().any(|&p| attr.name.local.starts_with(p)) |
| }) == Some(true) |
| || self |
| .tag_attributes |
| .get(&*name.local) |
| .map(|ta| ta.contains(&*attr.name.local)) |
| == Some(true) |
| || self |
| .tag_attribute_values |
| .get(&*name.local) |
| .and_then(|tav| tav.get(&*attr.name.local)) |
| .map(|vs| { |
| let attr_val = attr.value.to_lowercase(); |
| vs.iter().any(|v| v.to_lowercase() == attr_val) |
| }) |
| == Some(true); |
| if !whitelisted { |
| // If the class attribute is not whitelisted, |
| // but there is a whitelisted set of allowed_classes, |
| // do not strip out the class attribute. |
| // Banned classes will be filtered later. |
| &*attr.name.local == "class" |
| && self.allowed_classes.contains_key(&*name.local) |
| } else if is_url_attr(&name.local, &attr.name.local) { |
| let url = Url::parse(&attr.value); |
| if let Ok(url) = url { |
| self.url_schemes.contains(url.scheme()) |
| } else if url == Err(url::ParseError::RelativeUrlWithoutBase) { |
| !matches!(self.url_relative, UrlRelative::Deny) |
| } else { |
| false |
| } |
| } else { |
| true |
| } |
| }; |
| attrs.borrow_mut().retain(attr_filter); |
| true |
| } else { |
| false |
| } |
| } |
| } |
| } |
| |
| // Check for unexpected namespace changes. |
| // |
| // The issue happens if developers added to the list of allowed tags any |
| // tag which is parsed in RCDATA state, PLAINTEXT state or RAWTEXT state, |
| // that is: |
| // |
| // * title |
| // * textarea |
| // * xmp |
| // * iframe |
| // * noembed |
| // * noframes |
| // * plaintext |
| // * noscript |
| // * style |
| // * script |
| // |
| // An example in the wild is Plume, that allows iframe [1]. So in next |
| // examples I'll assume the following policy: |
| // |
| // Builder::new() |
| // .add_tags(&["iframe"]) |
| // |
| // In HTML namespace `<iframe>` is parsed specially; that is, its content is |
| // treated as text. For instance, the following html: |
| // |
| // <iframe><a>test |
| // |
| // Is parsed into the following DOM tree: |
| // |
| // iframe |
| // └─ #text: <a>test |
| // |
| // So iframe cannot have any children other than a text node. |
| // |
| // The same is not true, though, in "foreign content"; that is, within |
| // <svg> or <math> tags. The following html: |
| // |
| // <svg><iframe><a>test |
| // |
| // is parsed differently: |
| // |
| // svg |
| // └─ iframe |
| // └─ a |
| // └─ #text: test |
| // |
| // So in SVG namespace iframe can have children. |
| // |
| // Ammonia disallows <svg> but it keeps its content after deleting it. And |
| // the parser internally keeps track of the namespace of the element. So |
| // assume we have the following snippet: |
| // |
| // <svg><iframe><a title="</iframe><img src onerror=alert(1)>">test |
| // |
| // It is parsed into: |
| // |
| // svg |
| // └─ iframe |
| // └─ a title="</iframe><img src onerror=alert(1)>" |
| // └─ #text: test |
| // |
| // This DOM tree is harmless from ammonia point of view because the piece |
| // of code that looks like XSS is in a title attribute. Hence, the |
| // resulting "safe" HTML from ammonia would be: |
| // |
| // <iframe><a title="</iframe><img src onerror=alert(1)>" rel="noopener |
| // noreferrer">test</a></iframe> |
| // |
| // However, at this point, the information about namespace is lost, which |
| // means that the browser will parse this snippet into: |
| // |
| // ├─ iframe |
| // │ └─ #text: <a title=" |
| // ├─ img src="" onerror="alert(1)" |
| // └─ #text: " rel="noopener noreferrer">test |
| // |
| // Leading to XSS. |
| // |
| // To solve this issue, check for unexpected namespace switches after cleanup. |
| // Elements which change namespace at an unexpected point are removed. |
| // This function returns `true` if `child` should be kept, and `false` if it |
| // should be removed. |
| // |
| // [1]: https://github.com/Plume-org/Plume/blob/main/plume-models/src/safe_string.rs#L21 |
| fn check_expected_namespace(&self, parent: &Handle, child: &Handle) -> bool { |
| let (parent, child) = match (&parent.data, &child.data) { |
| (NodeData::Element { name: pn, .. }, NodeData::Element { name: cn, .. }) => (pn, cn), |
| _ => return true, |
| }; |
| // The only way to switch from html to svg is with the <svg> tag |
| if parent.ns == ns!(html) && child.ns == ns!(svg) { |
| child.local == local_name!("svg") |
| // The only way to switch from html to mathml is with the <math> tag |
| } else if parent.ns == ns!(html) && child.ns == ns!(mathml) { |
| child.local == local_name!("math") |
| // The only way to switch from mathml to svg/html is with a text integration point |
| } else if parent.ns == ns!(mathml) && child.ns != ns!(mathml) { |
| // https://html.spec.whatwg.org/#mathml |
| matches!( |
| &*parent.local, |
| "mi" | "mo" | "mn" | "ms" | "mtext" | "annotation-xml" |
| ) |
| // The only way to switch from svg to mathml/html is with an html integration point |
| } else if parent.ns == ns!(svg) && child.ns != ns!(svg) { |
| // https://html.spec.whatwg.org/#svg-0 |
| matches!(&*parent.local, "foreignObject") |
| } else if child.ns == ns!(svg) { |
| is_svg_tag(&child.local) |
| } else if child.ns == ns!(mathml) { |
| is_mathml_tag(&child.local) |
| } else if child.ns == ns!(html) { |
| (!is_svg_tag(&child.local) && !is_mathml_tag(&child.local)) |
| || matches!( |
| &*child.local, |
| "title" | "style" | "font" | "a" | "script" | "span" |
| ) |
| } else { |
| // There are no other supported ways to switch namespace |
| parent.ns == child.ns |
| } |
| } |
| |
| /// Add and transform special-cased attributes and elements. |
| /// |
| /// This function handles: |
| /// |
| /// * relative URL rewriting |
| /// * adding `<a rel>` attributes |
| /// * filtering out banned classes |
| fn adjust_node_attributes( |
| &self, |
| child: &mut Handle, |
| link_rel: &Option<StrTendril>, |
| id_prefix: Option<&'a str>, |
| ) { |
| if let NodeData::Element { |
| ref name, |
| ref attrs, |
| .. |
| } = child.data |
| { |
| if let Some(set_attrs) = self.set_tag_attribute_values.get(&*name.local) { |
| let mut attrs = attrs.borrow_mut(); |
| for (&set_name, &set_value) in set_attrs { |
| // set the value of the attribute if the attribute is already present |
| if let Some(attr) = attrs.iter_mut().find(|attr| &*attr.name.local == set_name) |
| { |
| if &*attr.value != set_value { |
| attr.value = set_value.into(); |
| } |
| } else { |
| // otherwise, add the attribute |
| let attr = Attribute { |
| name: QualName::new(None, ns!(), set_name.into()), |
| value: set_value.into(), |
| }; |
| attrs.push(attr); |
| } |
| } |
| } |
| if let Some(ref link_rel) = *link_rel { |
| if &*name.local == "a" { |
| attrs.borrow_mut().push(Attribute { |
| name: QualName::new(None, ns!(), local_name!("rel")), |
| value: link_rel.clone(), |
| }) |
| } |
| } |
| if let Some(ref id_prefix) = id_prefix { |
| for attr in &mut *attrs.borrow_mut() { |
| if &attr.name.local == "id" && !attr.value.starts_with(id_prefix) { |
| attr.value = format_tendril!("{}{}", id_prefix, attr.value); |
| } |
| } |
| } |
| if let Some(ref attr_filter) = self.attribute_filter { |
| let mut drop_attrs = Vec::new(); |
| let mut attrs = attrs.borrow_mut(); |
| for (i, attr) in &mut attrs.iter_mut().enumerate() { |
| let replace_with = if let Some(new) = |
| attr_filter.filter(&name.local, &attr.name.local, &attr.value) |
| { |
| if *new != *attr.value { |
| Some(format_tendril!("{}", new)) |
| } else { |
| None // no need to replace the attr if filter returned the same value |
| } |
| } else { |
| drop_attrs.push(i); |
| None |
| }; |
| if let Some(replace_with) = replace_with { |
| attr.value = replace_with; |
| } |
| } |
| for i in drop_attrs.into_iter().rev() { |
| attrs.swap_remove(i); |
| } |
| } |
| { |
| let mut drop_attrs = Vec::new(); |
| let mut attrs = attrs.borrow_mut(); |
| for (i, attr) in attrs.iter_mut().enumerate() { |
| if is_url_attr(&name.local, &attr.name.local) && is_url_relative(&attr.value) { |
| let new_value = self.url_relative.evaluate(&attr.value); |
| if let Some(new_value) = new_value { |
| attr.value = new_value; |
| } else { |
| drop_attrs.push(i); |
| } |
| } |
| } |
| // Swap remove scrambles the vector after the current point. |
| // We will not do anything except with items before the current point. |
| // The `rev()` is, as such, necessary for correctness. |
| // We could use regular `remove(usize)` and a forward iterator, |
| // but that's slower. |
| for i in drop_attrs.into_iter().rev() { |
| attrs.swap_remove(i); |
| } |
| } |
| if let Some(allowed_values) = self.allowed_classes.get(&*name.local) { |
| for attr in &mut *attrs.borrow_mut() { |
| if &attr.name.local == "class" { |
| let mut classes = vec![]; |
| // https://html.spec.whatwg.org/#global-attributes:classes-2 |
| for class in attr.value.split_ascii_whitespace() { |
| if allowed_values.contains(class) { |
| classes.push(class.to_owned()); |
| } |
| } |
| attr.value = format_tendril!("{}", classes.join(" ")); |
| } |
| } |
| } |
| } |
| } |
| |
| /// Initializes an HTML fragment parser. |
| /// |
| /// Ammonia conforms to the HTML5 fragment parsing rules, |
| /// by parsing the given fragment as if it were included in a <div> tag. |
| fn make_parser() -> html::Parser<RcDom> { |
| html::parse_fragment( |
| RcDom::default(), |
| html::ParseOpts::default(), |
| QualName::new(None, ns!(html), local_name!("div")), |
| vec![], |
| ) |
| } |
| } |
| |
| /// Given an element name and attribute name, determine if the given attribute contains a URL. |
| fn is_url_attr(element: &str, attr: &str) -> bool { |
| attr == "href" |
| || attr == "src" |
| || (element == "form" && attr == "action") |
| || (element == "object" && attr == "data") |
| || ((element == "button" || element == "input") && attr == "formaction") |
| || (element == "a" && attr == "ping") |
| || (element == "video" && attr == "poster") |
| } |
| |
| /// Given an element name, check if it's SVG |
| fn is_svg_tag(element: &str) -> bool { |
| // https://svgwg.org/svg2-draft/eltindex.html |
| matches!( |
| element, |
| "a" | "animate" |
| | "animateMotion" |
| | "animateTransform" |
| | "circle" |
| | "clipPath" |
| | "defs" |
| | "desc" |
| | "discard" |
| | "ellipse" |
| | "feBlend" |
| | "feColorMatrix" |
| | "feComponentTransfer" |
| | "feComposite" |
| | "feConvolveMatrix" |
| | "feDiffuseLighting" |
| | "feDisplacementMap" |
| | "feDistantLight" |
| | "feDropShadow" |
| | "feFlood" |
| | "feFuncA" |
| | "feFuncB" |
| | "feFuncG" |
| | "feFuncR" |
| | "feGaussianBlur" |
| | "feImage" |
| | "feMerge" |
| | "feMergeNode" |
| | "feMorphology" |
| | "feOffset" |
| | "fePointLight" |
| | "feSpecularLighting" |
| | "feSpotLight" |
| | "feTile" |
| | "feTurbulence" |
| | "filter" |
| | "foreignObject" |
| | "g" |
| | "image" |
| | "line" |
| | "linearGradient" |
| | "marker" |
| | "mask" |
| | "metadata" |
| | "mpath" |
| | "path" |
| | "pattern" |
| | "polygon" |
| | "polyline" |
| | "radialGradient" |
| | "rect" |
| | "script" |
| | "set" |
| | "stop" |
| | "style" |
| | "svg" |
| | "switch" |
| | "symbol" |
| | "text" |
| | "textPath" |
| | "title" |
| | "tspan" |
| | "use" |
| | "view" |
| ) |
| } |
| |
| /// Given an element name, check if it's Math |
| fn is_mathml_tag(element: &str) -> bool { |
| // https://svgwg.org/svg2-draft/eltindex.html |
| matches!( |
| element, |
| "abs" |
| | "and" |
| | "annotation" |
| | "annotation-xml" |
| | "apply" |
| | "approx" |
| | "arccos" |
| | "arccosh" |
| | "arccot" |
| | "arccoth" |
| | "arccsc" |
| | "arccsch" |
| | "arcsec" |
| | "arcsech" |
| | "arcsin" |
| | "arcsinh" |
| | "arctan" |
| | "arctanh" |
| | "arg" |
| | "bind" |
| | "bvar" |
| | "card" |
| | "cartesianproduct" |
| | "cbytes" |
| | "ceiling" |
| | "cerror" |
| | "ci" |
| | "cn" |
| | "codomain" |
| | "complexes" |
| | "compose" |
| | "condition" |
| | "conjugate" |
| | "cos" |
| | "cosh" |
| | "cot" |
| | "coth" |
| | "cs" |
| | "csc" |
| | "csch" |
| | "csymbol" |
| | "curl" |
| | "declare" |
| | "degree" |
| | "determinant" |
| | "diff" |
| | "divergence" |
| | "divide" |
| | "domain" |
| | "domainofapplication" |
| | "emptyset" |
| | "eq" |
| | "equivalent" |
| | "eulergamma" |
| | "exists" |
| | "exp" |
| | "exponentiale" |
| | "factorial" |
| | "factorof" |
| | "false" |
| | "floor" |
| | "fn" |
| | "forall" |
| | "gcd" |
| | "geq" |
| | "grad" |
| | "gt" |
| | "ident" |
| | "image" |
| | "imaginary" |
| | "imaginaryi" |
| | "implies" |
| | "in" |
| | "infinity" |
| | "int" |
| | "integers" |
| | "intersect" |
| | "interval" |
| | "inverse" |
| | "lambda" |
| | "laplacian" |
| | "lcm" |
| | "leq" |
| | "limit" |
| | "list" |
| | "ln" |
| | "log" |
| | "logbase" |
| | "lowlimit" |
| | "lt" |
| | "maction" |
| | "maligngroup" |
| | "malignmark" |
| | "math" |
| | "matrix" |
| | "matrixrow" |
| | "max" |
| | "mean" |
| | "median" |
| | "menclose" |
| | "merror" |
| | "mfenced" |
| | "mfrac" |
| | "mglyph" |
| | "mi" |
| | "min" |
| | "minus" |
| | "mlabeledtr" |
| | "mlongdiv" |
| | "mmultiscripts" |
| | "mn" |
| | "mo" |
| | "mode" |
| | "moment" |
| | "momentabout" |
| | "mover" |
| | "mpadded" |
| | "mphantom" |
| | "mprescripts" |
| | "mroot" |
| | "mrow" |
| | "ms" |
| | "mscarries" |
| | "mscarry" |
| | "msgroup" |
| | "msline" |
| | "mspace" |
| | "msqrt" |
| | "msrow" |
| | "mstack" |
| | "mstyle" |
| | "msub" |
| | "msubsup" |
| | "msup" |
| | "mtable" |
| | "mtd" |
| | "mtext" |
| | "mtr" |
| | "munder" |
| | "munderover" |
| | "naturalnumbers" |
| | "neq" |
| | "none" |
| | "not" |
| | "notanumber" |
| | "notin" |
| | "notprsubset" |
| | "notsubset" |
| | "or" |
| | "otherwise" |
| | "outerproduct" |
| | "partialdiff" |
| | "pi" |
| | "piece" |
| | "piecewise" |
| | "plus" |
| | "power" |
| | "primes" |
| | "product" |
| | "prsubset" |
| | "quotient" |
| | "rationals" |
| | "real" |
| | "reals" |
| | "reln" |
| | "rem" |
| | "root" |
| | "scalarproduct" |
| | "sdev" |
| | "sec" |
| | "sech" |
| | "selector" |
| | "semantics" |
| | "sep" |
| | "set" |
| | "setdiff" |
| | "share" |
| | "sin" |
| | "sinh" |
| | "span" |
| | "subset" |
| | "sum" |
| | "tan" |
| | "tanh" |
| | "tendsto" |
| | "times" |
| | "transpose" |
| | "true" |
| | "union" |
| | "uplimit" |
| | "variance" |
| | "vector" |
| | "vectorproduct" |
| | "xor" |
| ) |
| } |
| |
| fn is_url_relative(url: &str) -> bool { |
| matches!( |
| Url::parse(url), |
| Err(url::ParseError::RelativeUrlWithoutBase) |
| ) |
| } |
| |
| /// Policy for [relative URLs], that is, URLs that do not specify the scheme in full. |
| /// |
| /// This policy kicks in, if set, for any attribute named `src` or `href`, |
| /// as well as the `data` attribute of an `object` tag. |
| /// |
| /// [relative URLs]: struct.Builder.html#method.url_relative |
| /// |
| /// # Examples |
| /// |
| /// ## `Deny` |
| /// |
| /// * `<a href="test">` is a file-relative URL, and will be removed |
| /// * `<a href="/test">` is a domain-relative URL, and will be removed |
| /// * `<a href="//example.com/test">` is a scheme-relative URL, and will be removed |
| /// * `<a href="http://example.com/test">` is an absolute URL, and will be kept |
| /// |
| /// ## `PassThrough` |
| /// |
| /// No changes will be made to any URLs, except if a disallowed scheme is used. |
| /// |
| /// ## `RewriteWithBase` |
| /// |
| /// If the base is set to `http://notriddle.com/some-directory/some-file` |
| /// |
| /// * `<a href="test">` will be rewritten to `<a href="http://notriddle.com/some-directory/test">` |
| /// * `<a href="/test">` will be rewritten to `<a href="http://notriddle.com/test">` |
| /// * `<a href="//example.com/test">` will be rewritten to `<a href="http://example.com/test">` |
| /// * `<a href="http://example.com/test">` is an absolute URL, so it will be kept as-is |
| /// |
| /// ## `Custom` |
| /// |
| /// Pass the relative URL to a function. |
| /// If it returns `Some(string)`, then that one gets used. |
| /// Otherwise, it will remove the attribute (like `Deny` does). |
| /// |
| /// use std::borrow::Cow; |
| /// fn is_absolute_path(url: &str) -> bool { |
| /// let u = url.as_bytes(); |
| /// // `//a/b/c` is "protocol-relative", meaning "a" is a hostname |
| /// // `/a/b/c` is an absolute path, and what we want to do stuff to. |
| /// u.get(0) == Some(&b'/') && u.get(1) != Some(&b'/') |
| /// } |
| /// fn evaluate(url: &str) -> Option<Cow<str>> { |
| /// if is_absolute_path(url) { |
| /// Some(Cow::Owned(String::from("/root") + url)) |
| /// } else { |
| /// Some(Cow::Borrowed(url)) |
| /// } |
| /// } |
| /// fn main() { |
| /// let a = ammonia::Builder::new() |
| /// .url_relative(ammonia::UrlRelative::Custom(Box::new(evaluate))) |
| /// .clean("<a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>") |
| /// .to_string(); |
| /// assert_eq!(a, "<a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>"); |
| /// } |
| /// |
| /// This function is only applied to relative URLs. |
| /// To filter all of the URLs, |
| /// use the not-yet-implemented Content Security Policy. |
| #[non_exhaustive] |
| pub enum UrlRelative<'a> { |
| /// Relative URLs will be completely stripped from the document. |
| Deny, |
| /// Relative URLs will be passed through unchanged. |
| PassThrough, |
| /// Relative URLs will be changed into absolute URLs, based on this base URL. |
| RewriteWithBase(Url), |
| /// Force absolute and relative paths into a particular directory. |
| /// |
| /// Since the resolver does not affect fully-qualified URLs, it doesn't |
| /// prevent users from linking wherever they want. This feature only |
| /// serves to make content more portable. |
| /// |
| /// # Examples |
| /// |
| /// <table> |
| /// <thead> |
| /// <tr> |
| /// <th>root</th> |
| /// <th>path</th> |
| /// <th>url</th> |
| /// <th>result</th> |
| /// </tr> |
| /// </thead> |
| /// <tbody> |
| /// <tr> |
| /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
| /// <td>README.md</td> |
| /// <td></td> |
| /// <td>https://github.com/rust-ammonia/ammonia/blob/master/README.md</td> |
| /// </tr><tr> |
| /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
| /// <td>README.md</td> |
| /// <td>/</td> |
| /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
| /// </tr><tr> |
| /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
| /// <td>README.md</td> |
| /// <td>/CONTRIBUTING.md</td> |
| /// <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td> |
| /// </tr><tr> |
| /// <td>https://github.com/rust-ammonia/ammonia/blob/master</td> |
| /// <td>README.md</td> |
| /// <td></td> |
| /// <td>https://github.com/rust-ammonia/ammonia/blob/README.md</td> |
| /// </tr><tr> |
| /// <td>https://github.com/rust-ammonia/ammonia/blob/master</td> |
| /// <td>README.md</td> |
| /// <td>/</td> |
| /// <td>https://github.com/rust-ammonia/ammonia/blob/</td> |
| /// </tr><tr> |
| /// <td>https://github.com/rust-ammonia/ammonia/blob/master</td> |
| /// <td>README.md</td> |
| /// <td>/CONTRIBUTING.md</td> |
| /// <td>https://github.com/rust-ammonia/ammonia/blob/CONTRIBUTING.md</td> |
| /// </tr><tr> |
| /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
| /// <td></td> |
| /// <td></td> |
| /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
| /// </tr><tr> |
| /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
| /// <td></td> |
| /// <td>/</td> |
| /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
| /// </tr><tr> |
| /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
| /// <td></td> |
| /// <td>/CONTRIBUTING.md</td> |
| /// <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td> |
| /// </tr><tr> |
| /// <td>https://github.com/</td> |
| /// <td>rust-ammonia/ammonia/blob/master/README.md</td> |
| /// <td></td> |
| /// <td>https://github.com/rust-ammonia/ammonia/blob/master/README.md</td> |
| /// </tr><tr> |
| /// <td>https://github.com/</td> |
| /// <td>rust-ammonia/ammonia/blob/master/README.md</td> |
| /// <td>/</td> |
| /// <td>https://github.com/</td> |
| /// </tr><tr> |
| /// <td>https://github.com/</td> |
| /// <td>rust-ammonia/ammonia/blob/master/README.md</td> |
| /// <td>CONTRIBUTING.md</td> |
| /// <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td> |
| /// </tr><tr> |
| /// <td>https://github.com/</td> |
| /// <td>rust-ammonia/ammonia/blob/master/README.md</td> |
| /// <td>/CONTRIBUTING.md</td> |
| /// <td>https://github.com/CONTRIBUTING.md</td> |
| /// </tr> |
| /// </tbody> |
| /// </table> |
| RewriteWithRoot { |
| /// The URL that is treated as the root by the resolver. |
| root: Url, |
| /// The "current path" used to resolve relative paths. |
| path: String, |
| }, |
| /// Rewrite URLs with a custom function. |
| Custom(Box<dyn UrlRelativeEvaluate<'a>>), |
| } |
| |
| impl<'a> UrlRelative<'a> { |
| fn evaluate(&self, url: &str) -> Option<tendril::StrTendril> { |
| match self { |
| UrlRelative::RewriteWithBase(ref url_base) => url_base |
| .join(url) |
| .ok() |
| .and_then(|x| StrTendril::from_str(x.as_str()).ok()), |
| UrlRelative::RewriteWithRoot { ref root, ref path } => { |
| (match url.as_bytes() { |
| // Scheme-relative URL |
| [b'/', b'/', ..] => root.join(url), |
| // Path-absolute URL |
| b"/" => root.join("."), |
| [b'/', ..] => root.join(&url[1..]), |
| // Path-relative URL |
| _ => root.join(path).and_then(|r| r.join(url)), |
| }) |
| .ok() |
| .and_then(|x| StrTendril::from_str(x.as_str()).ok()) |
| } |
| UrlRelative::Custom(ref evaluate) => evaluate |
| .evaluate(url) |
| .as_ref() |
| .map(Cow::as_ref) |
| .map(StrTendril::from_str) |
| .and_then(Result::ok), |
| UrlRelative::PassThrough => StrTendril::from_str(url).ok(), |
| UrlRelative::Deny => None, |
| } |
| } |
| } |
| |
| impl<'a> fmt::Debug for UrlRelative<'a> { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| match *self { |
| UrlRelative::Deny => write!(f, "UrlRelative::Deny"), |
| UrlRelative::PassThrough => write!(f, "UrlRelative::PassThrough"), |
| UrlRelative::RewriteWithBase(ref base) => { |
| write!(f, "UrlRelative::RewriteWithBase({})", base) |
| } |
| UrlRelative::RewriteWithRoot { ref root, ref path } => { |
| write!( |
| f, |
| "UrlRelative::RewriteWithRoot {{ root: {root}, path: {path} }}" |
| ) |
| } |
| UrlRelative::Custom(_) => write!(f, "UrlRelative::Custom"), |
| } |
| } |
| } |
| |
| /// Types that implement this trait can be used to convert a relative URL into an absolute URL. |
| /// |
| /// This evaluator is only called when the URL is relative; absolute URLs are not evaluated. |
| /// |
| /// See [`url_relative`][url_relative] for more details. |
| /// |
| /// [url_relative]: struct.Builder.html#method.url_relative |
| pub trait UrlRelativeEvaluate<'a>: Send + Sync + 'a { |
| /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string. |
| fn evaluate<'url>(&self, _: &'url str) -> Option<Cow<'url, str>>; |
| } |
| impl<'a, T> UrlRelativeEvaluate<'a> for T |
| where |
| T: Fn(&str) -> Option<Cow<'_, str>> + Send + Sync + 'a, |
| { |
| fn evaluate<'url>(&self, url: &'url str) -> Option<Cow<'url, str>> { |
| self(url) |
| } |
| } |
| |
| impl fmt::Debug for dyn AttributeFilter { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| f.write_str("AttributeFilter") |
| } |
| } |
| |
| /// Types that implement this trait can be used to remove or rewrite arbitrary attributes. |
| /// |
| /// See [`attribute_filter`][attribute_filter] for more details. |
| /// |
| /// [attribute_filter]: struct.Builder.html#method.attribute_filter |
| pub trait AttributeFilter: Send + Sync { |
| /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string. |
| fn filter<'a>(&self, _: &str, _: &str, _: &'a str) -> Option<Cow<'a, str>>; |
| } |
| |
| impl<T> AttributeFilter for T |
| where |
| T: for<'a> Fn(&str, &str, &'a str) -> Option<Cow<'a, str>> + Send + Sync + 'static, |
| { |
| fn filter<'a>(&self, element: &str, attribute: &str, value: &'a str) -> Option<Cow<'a, str>> { |
| self(element, attribute, value) |
| } |
| } |
| |
| /// A sanitized HTML document. |
| /// |
| /// The `Document` type is an opaque struct representing an HTML fragment that was sanitized by |
| /// `ammonia`. It can be converted to a [`String`] or written to a [`Write`] instance. This allows |
| /// users to avoid buffering the serialized representation to a [`String`] when desired. |
| /// |
| /// This type is opaque to insulate the caller from breaking changes in the `html5ever` interface. |
| /// |
| /// Note that this type wraps an `html5ever` DOM tree. `ammonia` does not support streaming, so |
| /// the complete fragment needs to be stored in memory during processing. |
| /// |
| /// [`String`]: https://doc.rust-lang.org/nightly/std/string/struct.String.html |
| /// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::Builder; |
| /// |
| /// let input = "<!-- comments will be stripped -->This is an Ammonia example."; |
| /// let output = "This is an Ammonia example."; |
| /// |
| /// let document = Builder::new() |
| /// .clean(input); |
| /// assert_eq!(document.to_string(), output); |
| pub struct Document(RcDom); |
| |
| impl Document { |
| /// Serializes a `Document` instance to a writer. |
| /// |
| /// This method writes the sanitized HTML to a [`Write`] instance, avoiding a buffering step. |
| /// |
| /// To avoid consuming the writer, a mutable reference can be passed, like in the example below. |
| /// |
| /// Note that the in-memory representation of `Document` is larger than the serialized |
| /// `String`. |
| /// |
| /// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::Builder; |
| /// |
| /// let input = "Some <style></style>HTML here"; |
| /// let expected = b"Some HTML here"; |
| /// |
| /// let document = Builder::new() |
| /// .clean(input); |
| /// |
| /// let mut sanitized = Vec::new(); |
| /// document.write_to(&mut sanitized) |
| /// .expect("Writing to a string should not fail (except on OOM)"); |
| /// assert_eq!(sanitized, expected); |
| pub fn write_to<W>(&self, writer: W) -> io::Result<()> |
| where |
| W: io::Write, |
| { |
| let opts = Self::serialize_opts(); |
| let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into(); |
| serialize(writer, &inner, opts) |
| } |
| |
| /// Exposes the `Document` instance as an [`rcdom::Handle`]. |
| /// |
| /// This method returns the inner object backing the `Document` instance. This allows |
| /// making further changes to the DOM without introducing redundant serialization and |
| /// parsing. |
| /// |
| /// Note that this method should be considered unstable and sits outside of the semver |
| /// stability guarantees. It may change, break, or go away at any time, either because |
| /// of `html5ever` changes or `ammonia` implementation changes. |
| /// |
| /// For this method to be accessible, a `cfg` flag is required. The easiest way is to |
| /// use the `RUSTFLAGS` environment variable: |
| /// |
| /// ```text |
| /// RUSTFLAGS='--cfg ammonia_unstable' cargo build |
| /// ``` |
| /// |
| /// on Unix-like platforms, or |
| /// |
| /// ```text |
| /// set RUSTFLAGS=--cfg ammonia_unstable |
| /// cargo build |
| /// ``` |
| /// |
| /// on Windows. |
| /// |
| /// This requirement also applies to crates that transitively depend on crates that use |
| /// this flag. |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::Builder; |
| /// use maplit::hashset; |
| /// use html5ever::serialize::{serialize, SerializeOpts}; |
| /// |
| /// # use std::error::Error; |
| /// # fn do_main() -> Result<(), Box<dyn Error>> { |
| /// let input = "<a>one link</a> and <a>one more</a>"; |
| /// let expected = "<a>one more</a> and <a>one link</a>"; |
| /// |
| /// let document = Builder::new() |
| /// .link_rel(None) |
| /// .clean(input); |
| /// |
| /// let mut node = document.to_dom_node(); |
| /// node.children.borrow_mut().reverse(); |
| /// |
| /// let mut buf = Vec::new(); |
| /// serialize(&mut buf, &node, SerializeOpts::default())?; |
| /// let output = String::from_utf8(buf)?; |
| /// |
| /// assert_eq!(output, expected); |
| /// # Ok(()) |
| /// # } |
| /// # fn main() { do_main().unwrap() } |
| #[cfg(ammonia_unstable)] |
| pub fn to_dom_node(&self) -> Handle { |
| self.0.document.children.borrow()[0].clone() |
| } |
| |
| fn serialize_opts() -> SerializeOpts { |
| SerializeOpts::default() |
| } |
| } |
| |
| impl Clone for Document { |
| fn clone(&self) -> Self { |
| let parser = Builder::make_parser(); |
| let dom = parser.one(&self.to_string()[..]); |
| Document(dom) |
| } |
| } |
| |
| /// Convert a `Document` to stringified HTML. |
| /// |
| /// Since [`Document`] implements [`Display`], it can be converted to a [`String`] using the |
| /// standard [`ToString::to_string`] method. This is the simplest way to use `ammonia`. |
| /// |
| /// [`Document`]: ammonia::Document |
| /// [`Display`]: std::fmt::Display |
| /// [`ToString::to_string`]: std::string::ToString |
| /// |
| /// # Examples |
| /// |
| /// use ammonia::Builder; |
| /// |
| /// let input = "Some <style></style>HTML here"; |
| /// let output = "Some HTML here"; |
| /// |
| /// let document = Builder::new() |
| /// .clean(input); |
| /// assert_eq!(document.to_string(), output); |
| impl Display for Document { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| let opts = Self::serialize_opts(); |
| let mut ret_val = Vec::new(); |
| let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into(); |
| serialize(&mut ret_val, &inner, opts) |
| .expect("Writing to a string shouldn't fail (expect on OOM)"); |
| String::from_utf8(ret_val) |
| .expect("html5ever only supports UTF8") |
| .fmt(f) |
| } |
| } |
| |
| impl fmt::Debug for Document { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| write!(f, "Document({})", self) |
| } |
| } |
| |
| impl From<Document> for String { |
| fn from(document: Document) -> Self { |
| document.to_string() |
| } |
| } |
| |
| #[cfg(test)] |
| mod test { |
| use super::*; |
| #[test] |
| fn deeply_nested_whitelisted() { |
| clean(&"<b>".repeat(60_000)); |
| } |
| #[test] |
| fn deeply_nested_blacklisted() { |
| clean(&"<b-b>".repeat(60_000)); |
| } |
| #[test] |
| fn deeply_nested_alternating() { |
| clean(&"<b-b>".repeat(35_000)); |
| } |
| #[test] |
| fn included_angles() { |
| let fragment = "1 < 2"; |
| let result = clean(fragment); |
| assert_eq!(result, "1 < 2"); |
| } |
| #[test] |
| fn remove_script() { |
| let fragment = "an <script>evil()</script> example"; |
| let result = clean(fragment); |
| assert_eq!(result, "an example"); |
| } |
| #[test] |
| fn ignore_link() { |
| let fragment = "a <a href=\"http://www.google.com\">good</a> example"; |
| let expected = "a <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">\ |
| good</a> example"; |
| let result = clean(fragment); |
| assert_eq!(result, expected); |
| } |
| #[test] |
| fn remove_unsafe_link() { |
| let fragment = "an <a onclick=\"evil()\" href=\"http://www.google.com\">evil</a> example"; |
| let result = clean(fragment); |
| assert_eq!( |
| result, |
| "an <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">evil</a> example" |
| ); |
| } |
| #[test] |
| fn remove_js_link() { |
| let fragment = "an <a href=\"javascript:evil()\">evil</a> example"; |
| let result = clean(fragment); |
| assert_eq!(result, "an <a rel=\"noopener noreferrer\">evil</a> example"); |
| } |
| #[test] |
| fn tag_rebalance() { |
| let fragment = "<b>AWESOME!"; |
| let result = clean(fragment); |
| assert_eq!(result, "<b>AWESOME!</b>"); |
| } |
| #[test] |
| fn allow_url_relative() { |
| let fragment = "<a href=test>Test</a>"; |
| let result = Builder::new() |
| .url_relative(UrlRelative::PassThrough) |
| .clean(fragment) |
| .to_string(); |
| assert_eq!( |
| result, |
| "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>" |
| ); |
| } |
| #[test] |
| fn rewrite_url_relative() { |
| let fragment = "<a href=test>Test</a>"; |
| let result = Builder::new() |
| .url_relative(UrlRelative::RewriteWithBase( |
| Url::parse("http://example.com/").unwrap(), |
| )) |
| .clean(fragment) |
| .to_string(); |
| assert_eq!( |
| result, |
| "<a href=\"http://example.com/test\" rel=\"noopener noreferrer\">Test</a>" |
| ); |
| } |
| #[test] |
| fn rewrite_url_relative_with_invalid_url() { |
| // Reduced from https://github.com/Bauke/ammonia-crash-test |
| let fragment = r##"<a href="\\"https://example.com\\"">test</a>"##; |
| let result = Builder::new() |
| .url_relative(UrlRelative::RewriteWithBase( |
| Url::parse("http://example.com/").unwrap(), |
| )) |
| .clean(fragment) |
| .to_string(); |
| assert_eq!(result, r##"<a rel="noopener noreferrer">test</a>"##); |
| } |
| #[test] |
| fn attribute_filter_nop() { |
| let fragment = "<a href=test>Test</a>"; |
| let result = Builder::new() |
| .attribute_filter(|elem, attr, value| { |
| assert_eq!("a", elem); |
| assert!( |
| matches!( |
| (attr, value), |
| ("href", "test") | ("rel", "noopener noreferrer") |
| ), |
| "{}", |
| value.to_string() |
| ); |
| Some(value.into()) |
| }) |
| .clean(fragment) |
| .to_string(); |
| assert_eq!( |
| result, |
| "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>" |
| ); |
| } |
| |
| #[test] |
| fn attribute_filter_drop() { |
| let fragment = "Test<img alt=test src=imgtest>"; |
| let result = Builder::new() |
| .attribute_filter(|elem, attr, value| { |
| assert_eq!("img", elem); |
| match (attr, value) { |
| ("src", "imgtest") => None, |
| ("alt", "test") => Some(value.into()), |
| _ => panic!("unexpected"), |
| } |
| }) |
| .clean(fragment) |
| .to_string(); |
| assert_eq!(result, r#"Test<img alt="test">"#); |
| } |
| |
| #[test] |
| fn url_filter_absolute() { |
| let fragment = "Test<img alt=test src=imgtest>"; |
| let result = Builder::new() |
| .attribute_filter(|elem, attr, value| { |
| assert_eq!("img", elem); |
| match (attr, value) { |
| ("src", "imgtest") => { |
| Some(format!("https://example.com/images/{}", value).into()) |
| } |
| ("alt", "test") => None, |
| _ => panic!("unexpected"), |
| } |
| }) |
| .url_relative(UrlRelative::RewriteWithBase( |
| Url::parse("http://wrong.invalid/").unwrap(), |
| )) |
| .clean(fragment) |
| .to_string(); |
| assert_eq!( |
| result, |
| r#"Test<img src="https://example.com/images/imgtest">"# |
| ); |
| } |
| |
| #[test] |
| fn url_filter_relative() { |
| let fragment = "Test<img alt=test src=imgtest>"; |
| let result = Builder::new() |
| .attribute_filter(|elem, attr, value| { |
| assert_eq!("img", elem); |
| match (attr, value) { |
| ("src", "imgtest") => Some("rewrite".into()), |
| ("alt", "test") => Some("altalt".into()), |
| _ => panic!("unexpected"), |
| } |
| }) |
| .url_relative(UrlRelative::RewriteWithBase( |
| Url::parse("https://example.com/base/#").unwrap(), |
| )) |
| .clean(fragment) |
| .to_string(); |
| assert_eq!( |
| result, |
| r#"Test<img alt="altalt" src="https://example.com/base/rewrite">"# |
| ); |
| } |
| |
| #[test] |
| fn rewrite_url_relative_no_rel() { |
| let fragment = "<a href=test>Test</a>"; |
| let result = Builder::new() |
| .url_relative(UrlRelative::RewriteWithBase( |
| Url::parse("http://example.com/").unwrap(), |
| )) |
| .link_rel(None) |
| .clean(fragment) |
| .to_string(); |
| assert_eq!(result, "<a href=\"http://example.com/test\">Test</a>"); |
| } |
| #[test] |
| fn deny_url_relative() { |
| let fragment = "<a href=test>Test</a>"; |
| let result = Builder::new() |
| .url_relative(UrlRelative::Deny) |
| .clean(fragment) |
| .to_string(); |
| assert_eq!(result, "<a rel=\"noopener noreferrer\">Test</a>"); |
| } |
| #[test] |
| fn replace_rel() { |
| let fragment = "<a href=test rel=\"garbage\">Test</a>"; |
| let result = Builder::new() |
| .url_relative(UrlRelative::PassThrough) |
| .clean(fragment) |
| .to_string(); |
| assert_eq!( |
| result, |
| "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>" |
| ); |
| } |
| #[test] |
| fn consider_rel_still_banned() { |
| let fragment = "<a href=test rel=\"garbage\">Test</a>"; |
| let result = Builder::new() |
| .url_relative(UrlRelative::PassThrough) |
| .link_rel(None) |
| .clean(fragment) |
| .to_string(); |
| assert_eq!(result, "<a href=\"test\">Test</a>"); |
| } |
| #[test] |
| fn object_data() { |
| let fragment = "<span data=\"javascript:evil()\">Test</span>\ |
| <object data=\"javascript:evil()\"></object>M"; |
| let expected = r#"<span data="javascript:evil()">Test</span><object></object>M"#; |
| let result = Builder::new() |
| .tags(hashset!["span", "object"]) |
| .generic_attributes(hashset!["data"]) |
| .clean(fragment) |
| .to_string(); |
| assert_eq!(result, expected); |
| } |
| #[test] |
| fn remove_attributes() { |
| let fragment = "<table border=\"1\"><tr></tr></table>"; |
| let result = Builder::new().clean(fragment); |
| assert_eq!( |
| result.to_string(), |
| "<table><tbody><tr></tr></tbody></table>" |
| ); |
| } |
| #[test] |
| fn quotes_in_attrs() { |
| let fragment = "<b title='\"'>contents</b>"; |
| let result = clean(fragment); |
| assert_eq!(result, "<b title=\""\">contents</b>"); |
| } |
| #[test] |
| #[should_panic] |
| fn panic_if_rel_is_allowed_and_replaced_generic() { |
| Builder::new() |
| .link_rel(Some("noopener noreferrer")) |
| .generic_attributes(hashset!["rel"]) |
| .clean("something"); |
| } |
| #[test] |
| #[should_panic] |
| fn panic_if_rel_is_allowed_and_replaced_a() { |
| Builder::new() |
| .link_rel(Some("noopener noreferrer")) |
| .tag_attributes(hashmap![ |
| "a" => hashset!["rel"], |
| ]) |
| .clean("something"); |
| } |
| #[test] |
| fn no_panic_if_rel_is_allowed_and_replaced_span() { |
| Builder::new() |
| .link_rel(Some("noopener noreferrer")) |
| .tag_attributes(hashmap![ |
| "span" => hashset!["rel"], |
| ]) |
| .clean("<span rel=\"what\">s</span>"); |
| } |
| #[test] |
| fn no_panic_if_rel_is_allowed_and_not_replaced_generic() { |
| Builder::new() |
| .link_rel(None) |
| .generic_attributes(hashset!["rel"]) |
| .clean("<a rel=\"what\">s</a>"); |
| } |
| #[test] |
| fn no_panic_if_rel_is_allowed_and_not_replaced_a() { |
| Builder::new() |
| .link_rel(None) |
| .tag_attributes(hashmap![ |
| "a" => hashset!["rel"], |
| ]) |
| .clean("<a rel=\"what\">s</a>"); |
| } |
| #[test] |
| fn dont_close_void_elements() { |
| let fragment = "<br>"; |
| let result = clean(fragment); |
| assert_eq!(result.to_string(), "<br>"); |
| } |
| #[should_panic] |
| #[test] |
| fn panic_on_allowed_classes_tag_attributes() { |
| let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>"; |
| Builder::new() |
| .link_rel(None) |
| .tag_attributes(hashmap![ |
| "p" => hashset!["class"], |
| "a" => hashset!["class"], |
| ]) |
| .allowed_classes(hashmap![ |
| "p" => hashset!["foo", "bar"], |
| "a" => hashset!["baz"], |
| ]) |
| .clean(fragment); |
| } |
| #[should_panic] |
| #[test] |
| fn panic_on_allowed_classes_generic_attributes() { |
| let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>"; |
| Builder::new() |
| .link_rel(None) |
| .generic_attributes(hashset!["class", "href", "some-foo"]) |
| .allowed_classes(hashmap![ |
| "p" => hashset!["foo", "bar"], |
| "a" => hashset!["baz"], |
| ]) |
| .clean(fragment); |
| } |
| #[test] |
| fn remove_non_allowed_classes() { |
| let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>"; |
| let result = Builder::new() |
| .link_rel(None) |
| .allowed_classes(hashmap![ |
| "p" => hashset!["foo", "bar"], |
| "a" => hashset!["baz"], |
| ]) |
| .clean(fragment); |
| assert_eq!( |
| result.to_string(), |
| "<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>" |
| ); |
| } |
| #[test] |
| fn remove_non_allowed_classes_with_tag_class() { |
| let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>"; |
| let result = Builder::new() |
| .link_rel(None) |
| .tag_attributes(hashmap![ |
| "div" => hashset!["class"], |
| ]) |
| .allowed_classes(hashmap![ |
| "p" => hashset!["foo", "bar"], |
| "a" => hashset!["baz"], |
| ]) |
| .clean(fragment); |
| assert_eq!( |
| result.to_string(), |
| "<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>" |
| ); |
| } |
| #[test] |
| fn allowed_classes_ascii_whitespace() { |
| // According to https://infra.spec.whatwg.org/#ascii-whitespace, |
| // TAB (\t), LF (\n), FF (\x0C), CR (\x0D) and SPACE (\x20) are |
| // considered to be ASCII whitespace. Unicode whitespace characters |
| // and VT (\x0B) aren't ASCII whitespace. |
| let fragment = "<p class=\"a\tb\nc\x0Cd\re f\x0B g\u{2000}\">"; |
| let result = Builder::new() |
| .allowed_classes(hashmap![ |
| "p" => hashset!["a", "b", "c", "d", "e", "f", "g"], |
| ]) |
| .clean(fragment); |
| assert_eq!(result.to_string(), r#"<p class="a b c d e"></p>"#); |
| } |
| #[test] |
| fn remove_non_allowed_attributes_with_tag_attribute_values() { |
| let fragment = "<p data-label=\"baz\" name=\"foo\"></p>"; |
| let result = Builder::new() |
| .tag_attribute_values(hashmap![ |
| "p" => hashmap![ |
| "data-label" => hashset!["bar"], |
| ], |
| ]) |
| .tag_attributes(hashmap![ |
| "p" => hashset!["name"], |
| ]) |
| .clean(fragment); |
| assert_eq!(result.to_string(), "<p name=\"foo\"></p>",); |
| } |
| #[test] |
| fn keep_allowed_attributes_with_tag_attribute_values() { |
| let fragment = "<p data-label=\"bar\" name=\"foo\"></p>"; |
| let result = Builder::new() |
| .tag_attribute_values(hashmap![ |
| "p" => hashmap![ |
| "data-label" => hashset!["bar"], |
| ], |
| ]) |
| .tag_attributes(hashmap![ |
| "p" => hashset!["name"], |
| ]) |
| .clean(fragment); |
| assert_eq!( |
| result.to_string(), |
| "<p data-label=\"bar\" name=\"foo\"></p>", |
| ); |
| } |
| #[test] |
| fn tag_attribute_values_case_insensitive() { |
| let fragment = "<input type=\"CHECKBOX\" name=\"foo\">"; |
| let result = Builder::new() |
| .tags(hashset!["input"]) |
| .tag_attribute_values(hashmap![ |
| "input" => hashmap![ |
| "type" => hashset!["checkbox"], |
| ], |
| ]) |
| .tag_attributes(hashmap![ |
| "input" => hashset!["name"], |
| ]) |
| .clean(fragment); |
| assert_eq!(result.to_string(), "<input type=\"CHECKBOX\" name=\"foo\">",); |
| } |
| #[test] |
| fn set_tag_attribute_values() { |
| let fragment = "<a href=\"https://example.com/\">Link</a>"; |
| let result = Builder::new() |
| .link_rel(None) |
| .add_tag_attributes("a", &["target"]) |
| .set_tag_attribute_value("a", "target", "_blank") |
| .clean(fragment); |
| assert_eq!( |
| result.to_string(), |
| "<a href=\"https://example.com/\" target=\"_blank\">Link</a>", |
| ); |
| } |
| #[test] |
| fn update_existing_set_tag_attribute_values() { |
| let fragment = "<a target=\"bad\" href=\"https://example.com/\">Link</a>"; |
| let result = Builder::new() |
| .link_rel(None) |
| .add_tag_attributes("a", &["target"]) |
| .set_tag_attribute_value("a", "target", "_blank") |
| .clean(fragment); |
| assert_eq!( |
| result.to_string(), |
| "<a target=\"_blank\" href=\"https://example.com/\">Link</a>", |
| ); |
| } |
| #[test] |
| fn unwhitelisted_set_tag_attribute_values() { |
| let fragment = "<span>hi</span><my-elem>"; |
| let result = Builder::new() |
| .set_tag_attribute_value("my-elem", "my-attr", "val") |
| .clean(fragment); |
| assert_eq!(result.to_string(), "<span>hi</span>",); |
| } |
| #[test] |
| fn remove_entity_link() { |
| let fragment = "<a href=\"javascript:a\ |
| lert('XSS')\">Click me!</a>"; |
| let result = clean(fragment); |
| assert_eq!( |
| result.to_string(), |
| "<a rel=\"noopener noreferrer\">Click me!</a>" |
| ); |
| } |
| #[test] |
| fn remove_relative_url_evaluate() { |
| fn is_absolute_path(url: &str) -> bool { |
| let u = url.as_bytes(); |
| // `//a/b/c` is "protocol-relative", meaning "a" is a hostname |
| // `/a/b/c` is an absolute path, and what we want to do stuff to. |
| u.first() == Some(&b'/') && u.get(1) != Some(&b'/') |
| } |
| fn is_banned(url: &str) -> bool { |
| let u = url.as_bytes(); |
| u.first() == Some(&b'b') && u.get(1) == Some(&b'a') |
| } |
| fn evaluate(url: &str) -> Option<Cow<'_, str>> { |
| if is_absolute_path(url) { |
| Some(Cow::Owned(String::from("/root") + url)) |
| } else if is_banned(url) { |
| None |
| } else { |
| Some(Cow::Borrowed(url)) |
| } |
| } |
| let a = Builder::new() |
| .url_relative(UrlRelative::Custom(Box::new(evaluate))) |
| .clean("<a href=banned>banned</a><a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>") |
| .to_string(); |
| assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>"); |
| } |
| #[test] |
| fn remove_relative_url_evaluate_b() { |
| fn is_absolute_path(url: &str) -> bool { |
| let u = url.as_bytes(); |
| // `//a/b/c` is "protocol-relative", meaning "a" is a hostname |
| // `/a/b/c` is an absolute path, and what we want to do stuff to. |
| u.first() == Some(&b'/') && u.get(1) != Some(&b'/') |
| } |
| fn is_banned(url: &str) -> bool { |
| let u = url.as_bytes(); |
| u.first() == Some(&b'b') && u.get(1) == Some(&b'a') |
| } |
| fn evaluate(url: &str) -> Option<Cow<'_, str>> { |
| if is_absolute_path(url) { |
| Some(Cow::Owned(String::from("/root") + url)) |
| } else if is_banned(url) { |
| None |
| } else { |
| Some(Cow::Borrowed(url)) |
| } |
| } |
| let a = Builder::new() |
| .url_relative(UrlRelative::Custom(Box::new(evaluate))) |
| .clean("<a href=banned>banned</a><a href=banned title=test>banned</a><a title=test href=banned>banned</a>") |
| .to_string(); |
| assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a rel=\"noopener noreferrer\" title=\"test\">banned</a><a title=\"test\" rel=\"noopener noreferrer\">banned</a>"); |
| } |
| #[test] |
| fn remove_relative_url_evaluate_c() { |
| // Don't run on absolute URLs. |
| fn evaluate(_: &str) -> Option<Cow<'_, str>> { |
| return Some(Cow::Owned(String::from("invalid"))); |
| } |
| let a = Builder::new() |
| .url_relative(UrlRelative::Custom(Box::new(evaluate))) |
| .clean("<a href=\"https://www.google.com/\">google</a>") |
| .to_string(); |
| assert_eq!( |
| a, |
| "<a href=\"https://www.google.com/\" rel=\"noopener noreferrer\">google</a>" |
| ); |
| } |
| #[test] |
| fn clean_children_of_bad_element() { |
| let fragment = "<bad><evil>a</evil>b</bad>"; |
| let result = Builder::new().clean(fragment); |
| assert_eq!(result.to_string(), "ab"); |
| } |
| #[test] |
| fn reader_input() { |
| let fragment = b"an <script>evil()</script> example"; |
| let result = Builder::new().clean_from_reader(&fragment[..]); |
| assert!(result.is_ok()); |
| assert_eq!(result.unwrap().to_string(), "an example"); |
| } |
| #[test] |
| fn reader_non_utf8() { |
| let fragment = b"non-utf8 \xF0\x90\x80string"; |
| let result = Builder::new().clean_from_reader(&fragment[..]); |
| assert!(result.is_ok()); |
| assert_eq!(result.unwrap().to_string(), "non-utf8 \u{fffd}string"); |
| } |
| #[test] |
| fn display_impl() { |
| let fragment = r#"a <a>link</a>"#; |
| let result = Builder::new().link_rel(None).clean(fragment); |
| assert_eq!(format!("{}", result), "a <a>link</a>"); |
| } |
| #[test] |
| fn debug_impl() { |
| let fragment = r#"a <a>link</a>"#; |
| let result = Builder::new().link_rel(None).clean(fragment); |
| assert_eq!(format!("{:?}", result), "Document(a <a>link</a>)"); |
| } |
| #[cfg(ammonia_unstable)] |
| #[test] |
| fn to_dom_node() { |
| let fragment = r#"a <a>link</a>"#; |
| let result = Builder::new().link_rel(None).clean(fragment); |
| let _node = result.to_dom_node(); |
| } |
| #[test] |
| fn string_from_document() { |
| let fragment = r#"a <a>link"#; |
| let result = String::from(Builder::new().link_rel(None).clean(fragment)); |
| assert_eq!(format!("{}", result), "a <a>link</a>"); |
| } |
| fn require_sync<T: Sync>(_: T) {} |
| fn require_send<T: Send>(_: T) {} |
| #[test] |
| fn require_sync_and_send() { |
| require_sync(Builder::new()); |
| require_send(Builder::new()); |
| } |
| #[test] |
| fn id_prefixed() { |
| let fragment = "<a id=\"hello\"></a><b id=\"hello\"></a>"; |
| let result = String::from( |
| Builder::new() |
| .tag_attributes(hashmap![ |
| "a" => hashset!["id"], |
| ]) |
| .id_prefix(Some("prefix-")) |
| .clean(fragment), |
| ); |
| assert_eq!( |
| result.to_string(), |
| "<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a><b></b>" |
| ); |
| } |
| #[test] |
| fn id_already_prefixed() { |
| let fragment = "<a id=\"prefix-hello\"></a>"; |
| let result = String::from( |
| Builder::new() |
| .tag_attributes(hashmap![ |
| "a" => hashset!["id"], |
| ]) |
| .id_prefix(Some("prefix-")) |
| .clean(fragment), |
| ); |
| assert_eq!( |
| result.to_string(), |
| "<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a>" |
| ); |
| } |
| #[test] |
| fn clean_content_tags() { |
| let fragment = "<script type=\"text/javascript\"><a>Hello!</a></script>"; |
| let result = String::from( |
| Builder::new() |
| .clean_content_tags(hashset!["script"]) |
| .clean(fragment), |
| ); |
| assert_eq!(result.to_string(), ""); |
| } |
| #[test] |
| fn only_clean_content_tags() { |
| let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>"; |
| let result = String::from( |
| Builder::new() |
| .clean_content_tags(hashset!["script"]) |
| .clean(fragment), |
| ); |
| assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>"); |
| } |
| #[test] |
| fn clean_removed_default_tag() { |
| let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>"; |
| let result = String::from( |
| Builder::new() |
| .rm_tags(hashset!["a"]) |
| .rm_tag_attributes("a", hashset!["href", "hreflang"]) |
| .clean_content_tags(hashset!["script"]) |
| .clean(fragment), |
| ); |
| assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>"); |
| } |
| #[test] |
| #[should_panic] |
| fn panic_on_clean_content_tag_attribute() { |
| Builder::new() |
| .rm_tags(std::iter::once("a")) |
| .clean_content_tags(hashset!["a"]) |
| .clean(""); |
| } |
| #[test] |
| #[should_panic] |
| fn panic_on_clean_content_tag() { |
| Builder::new().clean_content_tags(hashset!["a"]).clean(""); |
| } |
| |
| #[test] |
| fn clean_text_test() { |
| assert_eq!( |
| clean_text("<this> is <a test function"), |
| "<this> is <a test function" |
| ); |
| } |
| |
| #[test] |
| fn clean_text_spaces_test() { |
| assert_eq!(clean_text("\x09\x0a\x0c\x20"), "	  "); |
| } |
| |
| #[test] |
| fn ns_svg() { |
| // https://github.com/cure53/DOMPurify/pull/495 |
| let fragment = r##"<svg><iframe><a title="</iframe><img src onerror=alert(1)>">test"##; |
| let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment)); |
| assert_eq!(result.to_string(), "test"); |
| |
| let fragment = "<svg><iframe>remove me</iframe></svg><iframe>keep me</iframe>"; |
| let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment)); |
| assert_eq!(result.to_string(), "remove me<iframe>keep me</iframe>"); |
| |
| let fragment = "<svg><a>remove me</a></svg><iframe>keep me</iframe>"; |
| let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment)); |
| assert_eq!(result.to_string(), "remove me<iframe>keep me</iframe>"); |
| |
| let fragment = "<svg><a>keep me</a></svg><iframe>keep me</iframe>"; |
| let result = String::from(Builder::new().add_tags(&["iframe", "svg"]).clean(fragment)); |
| assert_eq!( |
| result.to_string(), |
| "<svg><a rel=\"noopener noreferrer\">keep me</a></svg><iframe>keep me</iframe>" |
| ); |
| } |
| |
| #[test] |
| fn ns_mathml() { |
| // https://github.com/cure53/DOMPurify/pull/495 |
| let fragment = "<mglyph></mglyph>"; |
| let result = String::from( |
| Builder::new() |
| .add_tags(&["math", "mtext", "mglyph"]) |
| .clean(fragment), |
| ); |
| assert_eq!(result.to_string(), ""); |
| let fragment = "<math><mtext><div><mglyph>"; |
| let result = String::from( |
| Builder::new() |
| .add_tags(&["math", "mtext", "mglyph"]) |
| .clean(fragment), |
| ); |
| assert_eq!( |
| result.to_string(), |
| "<math><mtext><div></div></mtext></math>" |
| ); |
| let fragment = "<math><mtext><mglyph>"; |
| let result = String::from( |
| Builder::new() |
| .add_tags(&["math", "mtext", "mglyph"]) |
| .clean(fragment), |
| ); |
| assert_eq!( |
| result.to_string(), |
| "<math><mtext><mglyph></mglyph></mtext></math>" |
| ); |
| } |
| |
| #[test] |
| fn generic_attribute_prefixes() { |
| let prefix_data = ["data-"]; |
| let prefix_code = ["code-"]; |
| let mut b = Builder::new(); |
| let mut hs: HashSet<&'_ str> = HashSet::new(); |
| hs.insert("data-"); |
| assert!(b.generic_attribute_prefixes.is_none()); |
| b.generic_attribute_prefixes(hs); |
| assert!(b.generic_attribute_prefixes.is_some()); |
| assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1); |
| b.add_generic_attribute_prefixes(&prefix_data); |
| assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1); |
| b.add_generic_attribute_prefixes(&prefix_code); |
| assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 2); |
| b.rm_generic_attribute_prefixes(&prefix_code); |
| assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1); |
| b.rm_generic_attribute_prefixes(&prefix_code); |
| assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1); |
| b.rm_generic_attribute_prefixes(&prefix_data); |
| assert!(b.generic_attribute_prefixes.is_none()); |
| } |
| |
| #[test] |
| fn generic_attribute_prefixes_clean() { |
| let fragment = r#"<a data-1 data-2 code-1 code-2><a>Hello!</a></a>"#; |
| let result_cleaned = String::from( |
| Builder::new() |
| .add_tag_attributes("a", &["data-1"]) |
| .clean(fragment), |
| ); |
| assert_eq!( |
| result_cleaned, |
| r#"<a data-1="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"# |
| ); |
| let result_allowed = String::from( |
| Builder::new() |
| .add_tag_attributes("a", &["data-1"]) |
| .add_generic_attribute_prefixes(&["data-"]) |
| .clean(fragment), |
| ); |
| assert_eq!( |
| result_allowed, |
| r#"<a data-1="" data-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"# |
| ); |
| let result_allowed = String::from( |
| Builder::new() |
| .add_tag_attributes("a", &["data-1", "code-1"]) |
| .add_generic_attribute_prefixes(&["data-", "code-"]) |
| .clean(fragment), |
| ); |
| assert_eq!( |
| result_allowed, |
| r#"<a data-1="" data-2="" code-1="" code-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"# |
| ); |
| } |
| #[test] |
| fn lesser_than_isnt_html() { |
| let fragment = "1 < 2"; |
| assert!(!is_html(fragment)); |
| } |
| #[test] |
| fn dense_lesser_than_isnt_html() { |
| let fragment = "1<2"; |
| assert!(!is_html(fragment)); |
| } |
| #[test] |
| fn what_about_number_elements() { |
| let fragment = "foo<2>bar"; |
| assert!(!is_html(fragment)); |
| } |
| #[test] |
| fn turbofish_is_html_sadly() { |
| let fragment = "Vec::<u8>::new()"; |
| assert!(is_html(fragment)); |
| } |
| #[test] |
| fn stop_grinning() { |
| let fragment = "did you really believe me? <g>"; |
| assert!(is_html(fragment)); |
| } |
| #[test] |
| fn dont_be_bold() { |
| let fragment = "<b>"; |
| assert!(is_html(fragment)); |
| } |
| |
| #[test] |
| fn rewrite_with_root() { |
| let tests = [ |
| ( |
| "https://github.com/rust-ammonia/ammonia/blob/master/", |
| "README.md", |
| "", |
| "https://github.com/rust-ammonia/ammonia/blob/master/README.md", |
| ), |
| ( |
| "https://github.com/rust-ammonia/ammonia/blob/master/", |
| "README.md", |
| "/", |
| "https://github.com/rust-ammonia/ammonia/blob/master/", |
| ), |
| ( |
| "https://github.com/rust-ammonia/ammonia/blob/master/", |
| "README.md", |
| "/CONTRIBUTING.md", |
| "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md", |
| ), |
| ( |
| "https://github.com/rust-ammonia/ammonia/blob/master", |
| "README.md", |
| "", |
| "https://github.com/rust-ammonia/ammonia/blob/README.md", |
| ), |
| ( |
| "https://github.com/rust-ammonia/ammonia/blob/master", |
| "README.md", |
| "/", |
| "https://github.com/rust-ammonia/ammonia/blob/", |
| ), |
| ( |
| "https://github.com/rust-ammonia/ammonia/blob/master", |
| "README.md", |
| "/CONTRIBUTING.md", |
| "https://github.com/rust-ammonia/ammonia/blob/CONTRIBUTING.md", |
| ), |
| ( |
| "https://github.com/rust-ammonia/ammonia/blob/master/", |
| "", |
| "", |
| "https://github.com/rust-ammonia/ammonia/blob/master/", |
| ), |
| ( |
| "https://github.com/rust-ammonia/ammonia/blob/master/", |
| "", |
| "/", |
| "https://github.com/rust-ammonia/ammonia/blob/master/", |
| ), |
| ( |
| "https://github.com/rust-ammonia/ammonia/blob/master/", |
| "", |
| "/CONTRIBUTING.md", |
| "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md", |
| ), |
| ( |
| "https://github.com/", |
| "rust-ammonia/ammonia/blob/master/README.md", |
| "", |
| "https://github.com/rust-ammonia/ammonia/blob/master/README.md", |
| ), |
| ( |
| "https://github.com/", |
| "rust-ammonia/ammonia/blob/master/README.md", |
| "/", |
| "https://github.com/", |
| ), |
| ( |
| "https://github.com/", |
| "rust-ammonia/ammonia/blob/master/README.md", |
| "CONTRIBUTING.md", |
| "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md", |
| ), |
| ( |
| "https://github.com/", |
| "rust-ammonia/ammonia/blob/master/README.md", |
| "/CONTRIBUTING.md", |
| "https://github.com/CONTRIBUTING.md", |
| ), |
| ]; |
| for (root, path, url, result) in tests { |
| let h = format!(r#"<a href="{url}">test</a>"#); |
| let r = format!(r#"<a href="{result}" rel="noopener noreferrer">test</a>"#); |
| let a = Builder::new() |
| .url_relative(UrlRelative::RewriteWithRoot { |
| root: Url::parse(root).unwrap(), |
| path: path.to_string(), |
| }) |
| .clean(&h) |
| .to_string(); |
| if r != a { |
| println!( |
| "failed to check ({root}, {path}, {url}, {result})\n{r} != {a}", |
| r = r |
| ); |
| assert_eq!(r, a); |
| } |
| } |
| } |
| } |