| // Copyright 2014-2017 The html5ever Project Developers. See the |
| // COPYRIGHT file at the top-level directory of this distribution. |
| // |
| // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| // option. This file may not be copied, modified, or distributed |
| // except according to those terms. |
| |
| extern crate html5ever; |
| extern crate typed_arena; |
| |
| use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; |
| use html5ever::tendril::{StrTendril, TendrilSink}; |
| use html5ever::{parse_document, Attribute, ExpandedName, QualName}; |
| use std::borrow::Cow; |
| use std::cell::{Cell, RefCell}; |
| use std::collections::HashSet; |
| use std::io::{self, Read}; |
| use std::ptr; |
| |
| fn main() { |
| let mut bytes = Vec::new(); |
| io::stdin().read_to_end(&mut bytes).unwrap(); |
| let arena = typed_arena::Arena::new(); |
| html5ever_parse_slice_into_arena(&bytes, &arena); |
| } |
| |
| fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'a> { |
| let sink = Sink { |
| arena, |
| document: arena.alloc(Node::new(NodeData::Document)), |
| quirks_mode: QuirksMode::NoQuirks, |
| }; |
| parse_document(sink, Default::default()) |
| .from_utf8() |
| .one(bytes) |
| } |
| |
| type Arena<'arena> = &'arena typed_arena::Arena<Node<'arena>>; |
| |
| type Ref<'arena> = &'arena Node<'arena>; |
| |
| type Link<'arena> = Cell<Option<Ref<'arena>>>; |
| |
| struct Sink<'arena> { |
| arena: Arena<'arena>, |
| document: Ref<'arena>, |
| quirks_mode: QuirksMode, |
| } |
| |
| pub struct Node<'arena> { |
| parent: Link<'arena>, |
| next_sibling: Link<'arena>, |
| previous_sibling: Link<'arena>, |
| first_child: Link<'arena>, |
| last_child: Link<'arena>, |
| data: NodeData<'arena>, |
| } |
| |
| pub enum NodeData<'arena> { |
| Document, |
| Doctype { |
| name: StrTendril, |
| public_id: StrTendril, |
| system_id: StrTendril, |
| }, |
| Text { |
| contents: RefCell<StrTendril>, |
| }, |
| Comment { |
| contents: StrTendril, |
| }, |
| Element { |
| name: QualName, |
| attrs: RefCell<Vec<Attribute>>, |
| template_contents: Option<Ref<'arena>>, |
| mathml_annotation_xml_integration_point: bool, |
| }, |
| ProcessingInstruction { |
| target: StrTendril, |
| contents: StrTendril, |
| }, |
| } |
| |
| impl<'arena> Node<'arena> { |
| fn new(data: NodeData<'arena>) -> Self { |
| Node { |
| parent: Cell::new(None), |
| previous_sibling: Cell::new(None), |
| next_sibling: Cell::new(None), |
| first_child: Cell::new(None), |
| last_child: Cell::new(None), |
| data, |
| } |
| } |
| |
| fn detach(&self) { |
| let parent = self.parent.take(); |
| let previous_sibling = self.previous_sibling.take(); |
| let next_sibling = self.next_sibling.take(); |
| |
| if let Some(next_sibling) = next_sibling { |
| next_sibling.previous_sibling.set(previous_sibling); |
| } else if let Some(parent) = parent { |
| parent.last_child.set(previous_sibling); |
| } |
| |
| if let Some(previous_sibling) = previous_sibling { |
| previous_sibling.next_sibling.set(next_sibling); |
| } else if let Some(parent) = parent { |
| parent.first_child.set(next_sibling); |
| } |
| } |
| |
| fn append(&'arena self, new_child: &'arena Self) { |
| new_child.detach(); |
| new_child.parent.set(Some(self)); |
| if let Some(last_child) = self.last_child.take() { |
| new_child.previous_sibling.set(Some(last_child)); |
| debug_assert!(last_child.next_sibling.get().is_none()); |
| last_child.next_sibling.set(Some(new_child)); |
| } else { |
| debug_assert!(self.first_child.get().is_none()); |
| self.first_child.set(Some(new_child)); |
| } |
| self.last_child.set(Some(new_child)); |
| } |
| |
| fn insert_before(&'arena self, new_sibling: &'arena Self) { |
| new_sibling.detach(); |
| new_sibling.parent.set(self.parent.get()); |
| new_sibling.next_sibling.set(Some(self)); |
| if let Some(previous_sibling) = self.previous_sibling.take() { |
| new_sibling.previous_sibling.set(Some(previous_sibling)); |
| debug_assert!(ptr::eq::<Node>( |
| previous_sibling.next_sibling.get().unwrap(), |
| self |
| )); |
| previous_sibling.next_sibling.set(Some(new_sibling)); |
| } else if let Some(parent) = self.parent.get() { |
| debug_assert!(ptr::eq::<Node>(parent.first_child.get().unwrap(), self)); |
| parent.first_child.set(Some(new_sibling)); |
| } |
| self.previous_sibling.set(Some(new_sibling)); |
| } |
| } |
| |
| impl<'arena> Sink<'arena> { |
| fn new_node(&self, data: NodeData<'arena>) -> Ref<'arena> { |
| self.arena.alloc(Node::new(data)) |
| } |
| |
| fn append_common<P, A>(&self, child: NodeOrText<Ref<'arena>>, previous: P, append: A) |
| where |
| P: FnOnce() -> Option<Ref<'arena>>, |
| A: FnOnce(Ref<'arena>), |
| { |
| let new_node = match child { |
| NodeOrText::AppendText(text) => { |
| // Append to an existing Text node if we have one. |
| if let Some(&Node { |
| data: NodeData::Text { ref contents }, |
| .. |
| }) = previous() |
| { |
| contents.borrow_mut().push_tendril(&text); |
| return; |
| } |
| self.new_node(NodeData::Text { |
| contents: RefCell::new(text), |
| }) |
| }, |
| NodeOrText::AppendNode(node) => node, |
| }; |
| |
| append(new_node) |
| } |
| } |
| |
| impl<'arena> TreeSink for Sink<'arena> { |
| type Handle = Ref<'arena>; |
| type Output = Ref<'arena>; |
| |
| fn finish(self) -> Ref<'arena> { |
| self.document |
| } |
| |
| fn parse_error(&mut self, _: Cow<'static, str>) {} |
| |
| fn get_document(&mut self) -> Ref<'arena> { |
| self.document |
| } |
| |
| fn set_quirks_mode(&mut self, mode: QuirksMode) { |
| self.quirks_mode = mode; |
| } |
| |
| fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool { |
| ptr::eq::<Node>(*x, *y) |
| } |
| |
| fn elem_name<'a>(&self, target: &'a Ref<'arena>) -> ExpandedName<'a> { |
| match target.data { |
| NodeData::Element { ref name, .. } => name.expanded(), |
| _ => panic!("not an element!"), |
| } |
| } |
| |
| fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> { |
| if let NodeData::Element { |
| template_contents: Some(contents), |
| .. |
| } = target.data |
| { |
| contents |
| } else { |
| panic!("not a template element!") |
| } |
| } |
| |
| fn is_mathml_annotation_xml_integration_point(&self, target: &Ref<'arena>) -> bool { |
| if let NodeData::Element { |
| mathml_annotation_xml_integration_point, |
| .. |
| } = target.data |
| { |
| mathml_annotation_xml_integration_point |
| } else { |
| panic!("not an element!") |
| } |
| } |
| |
| fn create_element( |
| &mut self, |
| name: QualName, |
| attrs: Vec<Attribute>, |
| flags: ElementFlags, |
| ) -> Ref<'arena> { |
| self.new_node(NodeData::Element { |
| name, |
| attrs: RefCell::new(attrs), |
| template_contents: if flags.template { |
| Some(self.new_node(NodeData::Document)) |
| } else { |
| None |
| }, |
| mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point, |
| }) |
| } |
| |
| fn create_comment(&mut self, text: StrTendril) -> Ref<'arena> { |
| self.new_node(NodeData::Comment { contents: text }) |
| } |
| |
| fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> { |
| self.new_node(NodeData::ProcessingInstruction { |
| target, |
| contents: data, |
| }) |
| } |
| |
| fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) { |
| self.append_common( |
| child, |
| || parent.last_child.get(), |
| |new_node| parent.append(new_node), |
| ) |
| } |
| |
| fn append_before_sibling(&mut self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) { |
| self.append_common( |
| child, |
| || sibling.previous_sibling.get(), |
| |new_node| sibling.insert_before(new_node), |
| ) |
| } |
| |
| fn append_based_on_parent_node( |
| &mut self, |
| element: &Ref<'arena>, |
| prev_element: &Ref<'arena>, |
| child: NodeOrText<Ref<'arena>>, |
| ) { |
| if element.parent.get().is_some() { |
| self.append_before_sibling(element, child) |
| } else { |
| self.append(prev_element, child) |
| } |
| } |
| |
| fn append_doctype_to_document( |
| &mut self, |
| name: StrTendril, |
| public_id: StrTendril, |
| system_id: StrTendril, |
| ) { |
| self.document.append(self.new_node(NodeData::Doctype { |
| name, |
| public_id, |
| system_id, |
| })) |
| } |
| |
| fn add_attrs_if_missing(&mut self, target: &Ref<'arena>, attrs: Vec<Attribute>) { |
| let mut existing = if let NodeData::Element { ref attrs, .. } = target.data { |
| attrs.borrow_mut() |
| } else { |
| panic!("not an element") |
| }; |
| |
| let existing_names = existing |
| .iter() |
| .map(|e| e.name.clone()) |
| .collect::<HashSet<_>>(); |
| existing.extend( |
| attrs |
| .into_iter() |
| .filter(|attr| !existing_names.contains(&attr.name)), |
| ); |
| } |
| |
| fn remove_from_parent(&mut self, target: &Ref<'arena>) { |
| target.detach() |
| } |
| |
| fn reparent_children(&mut self, node: &Ref<'arena>, new_parent: &Ref<'arena>) { |
| let mut next_child = node.first_child.get(); |
| while let Some(child) = next_child { |
| debug_assert!(ptr::eq::<Node>(child.parent.get().unwrap(), *node)); |
| next_child = child.next_sibling.get(); |
| new_parent.append(child) |
| } |
| } |
| } |