blob: d084e0115a8b2c91acf65cd605c126cb22e559da [file] [log] [blame] [edit]
// Copyright 2014-2017 The html5ever Project Developers. See the
// COPYRIGHT file at the top-level directory of this distribution.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
extern crate html5ever;
extern crate typed_arena;
use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
use html5ever::tendril::{StrTendril, TendrilSink};
use html5ever::{parse_document, Attribute, ExpandedName, QualName};
use std::borrow::Cow;
use std::cell::{Cell, RefCell};
use std::collections::HashSet;
use std::io::{self, Read};
use std::ptr;
fn main() {
let mut bytes = Vec::new();
io::stdin().read_to_end(&mut bytes).unwrap();
let arena = typed_arena::Arena::new();
html5ever_parse_slice_into_arena(&bytes, &arena);
}
fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'a> {
let sink = Sink {
arena,
document: arena.alloc(Node::new(NodeData::Document)),
quirks_mode: QuirksMode::NoQuirks,
};
parse_document(sink, Default::default())
.from_utf8()
.one(bytes)
}
type Arena<'arena> = &'arena typed_arena::Arena<Node<'arena>>;
type Ref<'arena> = &'arena Node<'arena>;
type Link<'arena> = Cell<Option<Ref<'arena>>>;
struct Sink<'arena> {
arena: Arena<'arena>,
document: Ref<'arena>,
quirks_mode: QuirksMode,
}
pub struct Node<'arena> {
parent: Link<'arena>,
next_sibling: Link<'arena>,
previous_sibling: Link<'arena>,
first_child: Link<'arena>,
last_child: Link<'arena>,
data: NodeData<'arena>,
}
pub enum NodeData<'arena> {
Document,
Doctype {
name: StrTendril,
public_id: StrTendril,
system_id: StrTendril,
},
Text {
contents: RefCell<StrTendril>,
},
Comment {
contents: StrTendril,
},
Element {
name: QualName,
attrs: RefCell<Vec<Attribute>>,
template_contents: Option<Ref<'arena>>,
mathml_annotation_xml_integration_point: bool,
},
ProcessingInstruction {
target: StrTendril,
contents: StrTendril,
},
}
impl<'arena> Node<'arena> {
fn new(data: NodeData<'arena>) -> Self {
Node {
parent: Cell::new(None),
previous_sibling: Cell::new(None),
next_sibling: Cell::new(None),
first_child: Cell::new(None),
last_child: Cell::new(None),
data,
}
}
fn detach(&self) {
let parent = self.parent.take();
let previous_sibling = self.previous_sibling.take();
let next_sibling = self.next_sibling.take();
if let Some(next_sibling) = next_sibling {
next_sibling.previous_sibling.set(previous_sibling);
} else if let Some(parent) = parent {
parent.last_child.set(previous_sibling);
}
if let Some(previous_sibling) = previous_sibling {
previous_sibling.next_sibling.set(next_sibling);
} else if let Some(parent) = parent {
parent.first_child.set(next_sibling);
}
}
fn append(&'arena self, new_child: &'arena Self) {
new_child.detach();
new_child.parent.set(Some(self));
if let Some(last_child) = self.last_child.take() {
new_child.previous_sibling.set(Some(last_child));
debug_assert!(last_child.next_sibling.get().is_none());
last_child.next_sibling.set(Some(new_child));
} else {
debug_assert!(self.first_child.get().is_none());
self.first_child.set(Some(new_child));
}
self.last_child.set(Some(new_child));
}
fn insert_before(&'arena self, new_sibling: &'arena Self) {
new_sibling.detach();
new_sibling.parent.set(self.parent.get());
new_sibling.next_sibling.set(Some(self));
if let Some(previous_sibling) = self.previous_sibling.take() {
new_sibling.previous_sibling.set(Some(previous_sibling));
debug_assert!(ptr::eq::<Node>(
previous_sibling.next_sibling.get().unwrap(),
self
));
previous_sibling.next_sibling.set(Some(new_sibling));
} else if let Some(parent) = self.parent.get() {
debug_assert!(ptr::eq::<Node>(parent.first_child.get().unwrap(), self));
parent.first_child.set(Some(new_sibling));
}
self.previous_sibling.set(Some(new_sibling));
}
}
impl<'arena> Sink<'arena> {
fn new_node(&self, data: NodeData<'arena>) -> Ref<'arena> {
self.arena.alloc(Node::new(data))
}
fn append_common<P, A>(&self, child: NodeOrText<Ref<'arena>>, previous: P, append: A)
where
P: FnOnce() -> Option<Ref<'arena>>,
A: FnOnce(Ref<'arena>),
{
let new_node = match child {
NodeOrText::AppendText(text) => {
// Append to an existing Text node if we have one.
if let Some(&Node {
data: NodeData::Text { ref contents },
..
}) = previous()
{
contents.borrow_mut().push_tendril(&text);
return;
}
self.new_node(NodeData::Text {
contents: RefCell::new(text),
})
},
NodeOrText::AppendNode(node) => node,
};
append(new_node)
}
}
impl<'arena> TreeSink for Sink<'arena> {
type Handle = Ref<'arena>;
type Output = Ref<'arena>;
fn finish(self) -> Ref<'arena> {
self.document
}
fn parse_error(&mut self, _: Cow<'static, str>) {}
fn get_document(&mut self) -> Ref<'arena> {
self.document
}
fn set_quirks_mode(&mut self, mode: QuirksMode) {
self.quirks_mode = mode;
}
fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool {
ptr::eq::<Node>(*x, *y)
}
fn elem_name<'a>(&self, target: &'a Ref<'arena>) -> ExpandedName<'a> {
match target.data {
NodeData::Element { ref name, .. } => name.expanded(),
_ => panic!("not an element!"),
}
}
fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> {
if let NodeData::Element {
template_contents: Some(contents),
..
} = target.data
{
contents
} else {
panic!("not a template element!")
}
}
fn is_mathml_annotation_xml_integration_point(&self, target: &Ref<'arena>) -> bool {
if let NodeData::Element {
mathml_annotation_xml_integration_point,
..
} = target.data
{
mathml_annotation_xml_integration_point
} else {
panic!("not an element!")
}
}
fn create_element(
&mut self,
name: QualName,
attrs: Vec<Attribute>,
flags: ElementFlags,
) -> Ref<'arena> {
self.new_node(NodeData::Element {
name,
attrs: RefCell::new(attrs),
template_contents: if flags.template {
Some(self.new_node(NodeData::Document))
} else {
None
},
mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point,
})
}
fn create_comment(&mut self, text: StrTendril) -> Ref<'arena> {
self.new_node(NodeData::Comment { contents: text })
}
fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> {
self.new_node(NodeData::ProcessingInstruction {
target,
contents: data,
})
}
fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
self.append_common(
child,
|| parent.last_child.get(),
|new_node| parent.append(new_node),
)
}
fn append_before_sibling(&mut self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) {
self.append_common(
child,
|| sibling.previous_sibling.get(),
|new_node| sibling.insert_before(new_node),
)
}
fn append_based_on_parent_node(
&mut self,
element: &Ref<'arena>,
prev_element: &Ref<'arena>,
child: NodeOrText<Ref<'arena>>,
) {
if element.parent.get().is_some() {
self.append_before_sibling(element, child)
} else {
self.append(prev_element, child)
}
}
fn append_doctype_to_document(
&mut self,
name: StrTendril,
public_id: StrTendril,
system_id: StrTendril,
) {
self.document.append(self.new_node(NodeData::Doctype {
name,
public_id,
system_id,
}))
}
fn add_attrs_if_missing(&mut self, target: &Ref<'arena>, attrs: Vec<Attribute>) {
let mut existing = if let NodeData::Element { ref attrs, .. } = target.data {
attrs.borrow_mut()
} else {
panic!("not an element")
};
let existing_names = existing
.iter()
.map(|e| e.name.clone())
.collect::<HashSet<_>>();
existing.extend(
attrs
.into_iter()
.filter(|attr| !existing_names.contains(&attr.name)),
);
}
fn remove_from_parent(&mut self, target: &Ref<'arena>) {
target.detach()
}
fn reparent_children(&mut self, node: &Ref<'arena>, new_parent: &Ref<'arena>) {
let mut next_child = node.first_child.get();
while let Some(child) = next_child {
debug_assert!(ptr::eq::<Node>(child.parent.get().unwrap(), *node));
next_child = child.next_sibling.get();
new_parent.append(child)
}
}
}