blob: d599cbb85d1f5cae31e871a71f08f6cf86dcb6d3 [file] [log] [blame]
// Copyright 2014-2017 The html5ever Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use html5ever::driver::ParseOpts;
use html5ever::serialize::{Serialize, SerializeOpts, Serializer, TraversalScope};
use html5ever::tendril::{SliceExt, StrTendril, TendrilSink};
use html5ever::tokenizer::{TagKind, Token, TokenSink, TokenSinkResult, Tokenizer};
use html5ever::{parse_document, parse_fragment, serialize, QualName};
use markup5ever::{local_name, namespace_url, ns};
use markup5ever_rcdom::{RcDom, SerializableHandle};
use std::io;
struct Tokens(Vec<Token>);
impl TokenSink for Tokens {
type Handle = ();
fn process_token(&mut self, token: Token, _: u64) -> TokenSinkResult<()> {
self.0.push(token);
TokenSinkResult::Continue
}
}
impl Serialize for Tokens {
fn serialize<S>(&self, serializer: &mut S, _: TraversalScope) -> io::Result<()>
where
S: Serializer,
{
for t in self.0.iter() {
match t {
// TODO: check whether this is an IE conditional comment or a spec comment
&Token::TagToken(ref tag) => {
let name = QualName::new(
None,
"http://www.w3.org/1999/xhtml".into(),
tag.name.as_ref().into(),
);
match tag.kind {
TagKind::StartTag => serializer.start_elem(
name,
tag.attrs.iter().map(|at| (&at.name, &at.value[..])),
)?,
TagKind::EndTag => serializer.end_elem(name)?,
}
},
&Token::DoctypeToken(ref dt) => match dt.name {
Some(ref name) => serializer.write_doctype(&name)?,
None => {},
},
&Token::CommentToken(ref chars) => serializer.write_comment(&chars)?,
&Token::CharacterTokens(ref chars) => serializer.write_text(&chars)?,
&Token::NullCharacterToken | &Token::EOFToken => {},
&Token::ParseError(ref e) => println!("parse error: {:#?}", e),
}
}
Ok(())
}
}
fn tokenize_and_serialize(input: StrTendril) -> StrTendril {
let mut input = {
let mut q = ::html5ever::tokenizer::BufferQueue::new();
q.push_front(input.into());
q
};
let mut tokenizer = Tokenizer::new(Tokens(vec![]), Default::default());
let _ = tokenizer.feed(&mut input);
tokenizer.end();
let mut output = ::std::io::Cursor::new(vec![]);
serialize(
&mut output,
&tokenizer.sink,
SerializeOpts {
create_missing_parent: true,
..Default::default()
},
)
.unwrap();
StrTendril::try_from_byte_slice(&output.into_inner()).unwrap()
}
fn parse_and_serialize(input: StrTendril) -> StrTendril {
let dom = parse_fragment(
RcDom::default(),
ParseOpts::default(),
QualName::new(None, ns!(html), local_name!("body")),
vec![],
)
.one(input);
let inner: SerializableHandle = dom.document.children.borrow()[0].clone().into();
let mut result = vec![];
serialize(&mut result, &inner, Default::default()).unwrap();
StrTendril::try_from_byte_slice(&result).unwrap()
}
macro_rules! test_fn {
($f:ident, $name:ident, $input:expr, $output:expr) => {
#[test]
fn $name() {
assert_eq!($output, &*$f($input.to_tendril()));
}
};
// Shorthand for $output = $input
($f:ident, $name:ident, $input:expr) => {
test_fn!($f, $name, $input, $input);
};
}
macro_rules! test {
($($t:tt)*) => {
test_fn!(parse_and_serialize, $($t)*);
};
}
macro_rules! test_no_parse {
($($t:tt)*) => {
test_fn!(tokenize_and_serialize, $($t)*);
};
}
test!(empty, r#""#);
test!(fuzz, "<a a=\r\n", "");
test!(smoke_test, r#"<p><i>Hello</i>, World!</p>"#);
test!(
misnest,
r#"<p><i>Hello!</p>, World!</i>"#,
r#"<p><i>Hello!</i></p><i>, World!</i>"#
);
test!(attr_literal, r#"<base foo="<'>">"#);
test!(attr_escape_amp, r#"<base foo="&amp;">"#);
test!(
attr_escape_amp_2,
r#"<base foo=&amp>"#,
r#"<base foo="&amp;">"#
);
test!(
attr_escape_nbsp,
"<base foo=x\u{a0}y>",
r#"<base foo="x&nbsp;y">"#
);
test!(
attr_escape_quot,
r#"<base foo='"'>"#,
r#"<base foo="&quot;">"#
);
test!(
attr_escape_several,
r#"<span foo=3 title='test "with" &amp;quot;'>"#,
r#"<span foo="3" title="test &quot;with&quot; &amp;quot;"></span>"#
);
test!(text_literal, r#"<p>"'"</p>"#);
test!(text_escape_amp, r#"<p>&amp;</p>"#);
test!(text_escape_amp_2, r#"<p>&amp</p>"#, r#"<p>&amp;</p>"#);
test!(text_escape_nbsp, "<p>x\u{a0}y</p>", r#"<p>x&nbsp;y</p>"#);
test!(text_escape_lt, r#"<p>&lt;</p>"#);
test!(text_escape_gt, r#"<p>&gt;</p>"#);
test!(text_escape_gt2, r#"<p>></p>"#, r#"<p>&gt;</p>"#);
test!(
script_literal,
r#"<script>(x & 1) < 2; y > "foo" + 'bar'</script>"#
);
test!(
style_literal,
r#"<style>(x & 1) < 2; y > "foo" + 'bar'</style>"#
);
test!(xmp_literal, r#"<xmp>(x & 1) < 2; y > "foo" + 'bar'</xmp>"#);
test!(
iframe_literal,
r#"<iframe>(x & 1) < 2; y > "foo" + 'bar'</iframe>"#
);
test!(
noembed_literal,
r#"<noembed>(x & 1) < 2; y > "foo" + 'bar'</noembed>"#
);
test!(
noframes_literal,
r#"<noframes>(x & 1) < 2; y > "foo" + 'bar'</noframes>"#
);
test!(pre_lf_0, "<pre>foo bar</pre>");
test!(pre_lf_1, "<pre>\nfoo bar</pre>", "<pre>foo bar</pre>");
test!(pre_lf_2, "<pre>\n\nfoo bar</pre>", "<pre>\nfoo bar</pre>");
test!(textarea_lf_0, "<textarea>foo bar</textarea>");
test!(
textarea_lf_1,
"<textarea>\nfoo bar</textarea>",
"<textarea>foo bar</textarea>"
);
test!(
textarea_lf_2,
"<textarea>\n\nfoo bar</textarea>",
"<textarea>\nfoo bar</textarea>"
);
test!(listing_lf_0, "<listing>foo bar</listing>");
test!(
listing_lf_1,
"<listing>\nfoo bar</listing>",
"<listing>foo bar</listing>"
);
test!(
listing_lf_2,
"<listing>\n\nfoo bar</listing>",
"<listing>\nfoo bar</listing>"
);
test!(comment_1, r#"<p>hi <!--world--></p>"#);
test!(comment_2, r#"<p>hi <!-- world--></p>"#);
test!(comment_3, r#"<p>hi <!--world --></p>"#);
test!(comment_4, r#"<p>hi <!-- world --></p>"#);
// FIXME: test serialization of qualified tag/attribute names that can't be
// parsed from HTML
test!(attr_ns_1, r#"<svg xmlns="bleh"></svg>"#);
test!(attr_ns_2, r#"<svg xmlns:foo="bleh"></svg>"#);
test!(attr_ns_3, r#"<svg xmlns:xlink="bleh"></svg>"#);
test!(attr_ns_4, r#"<svg xlink:href="bleh"></svg>"#);
test_no_parse!(malformed_tokens, r#"foo</div><div>"#);
#[test]
fn doctype() {
let dom = parse_document(RcDom::default(), ParseOpts::default()).one("<!doctype html>");
dom.document.children.borrow_mut().truncate(1); // Remove <html>
let mut result = vec![];
let document: SerializableHandle = dom.document.clone().into();
serialize(&mut result, &document, Default::default()).unwrap();
assert_eq!(String::from_utf8(result).unwrap(), "<!DOCTYPE html>");
}
#[test]
fn deep_tree() {
let parser = parse_fragment(
RcDom::default(),
ParseOpts::default(),
QualName::new(None, ns!(html), local_name!("div")),
vec![],
);
let src = String::from("<b>".repeat(60_000));
let dom = parser.one(src);
let opts = SerializeOpts::default();
let mut ret_val = Vec::new();
let document: SerializableHandle = dom.document.clone().into();
serialize(&mut ret_val, &document, opts)
.expect("Writing to a string shouldn't fail (expect on OOM)");
}