| //! Contains an implementation of pull-based XML parser. |
| |
| use crate::common::{is_xml10_char, is_xml11_char, is_xml11_char_not_restricted, is_name_char, is_name_start_char, is_whitespace_char}; |
| use crate::common::{Position, TextPosition, XmlVersion}; |
| use crate::name::OwnedName; |
| use crate::namespace::NamespaceStack; |
| use crate::reader::config::ParserConfig2; |
| use crate::reader::error::SyntaxError; |
| use crate::reader::events::XmlEvent; |
| use crate::reader::indexset::AttributesSet; |
| use crate::reader::lexer::{Lexer, Token}; |
| use super::{Error, ErrorKind}; |
| |
| use std::collections::HashMap; |
| use std::io::Read; |
| |
| macro_rules! gen_takes( |
| ($($field:ident -> $method:ident, $t:ty, $def:expr);+) => ( |
| $( |
| impl MarkupData { |
| #[inline] |
| #[allow(clippy::mem_replace_option_with_none)] |
| fn $method(&mut self) -> $t { |
| std::mem::replace(&mut self.$field, $def) |
| } |
| } |
| )+ |
| ) |
| ); |
| |
| gen_takes!( |
| name -> take_name, String, String::new(); |
| ref_data -> take_ref_data, String, String::new(); |
| |
| encoding -> take_encoding, Option<String>, None; |
| |
| element_name -> take_element_name, Option<OwnedName>, None; |
| |
| attr_name -> take_attr_name, Option<OwnedName>, None; |
| attributes -> take_attributes, AttributesSet, AttributesSet::new() |
| ); |
| |
| mod inside_cdata; |
| mod inside_closing_tag_name; |
| mod inside_comment; |
| mod inside_declaration; |
| mod inside_doctype; |
| mod inside_opening_tag; |
| mod inside_processing_instruction; |
| mod inside_reference; |
| mod outside_tag; |
| |
| static DEFAULT_VERSION: XmlVersion = XmlVersion::Version10; |
| static DEFAULT_STANDALONE: Option<bool> = None; |
| |
| type ElementStack = Vec<OwnedName>; |
| pub type Result = super::Result<XmlEvent>; |
| |
| /// Pull-based XML parser. |
| pub(crate) struct PullParser { |
| config: ParserConfig2, |
| lexer: Lexer, |
| st: State, |
| state_after_reference: State, |
| buf: String, |
| |
| /// From DTD internal subset |
| entities: HashMap<String, String>, |
| |
| nst: NamespaceStack, |
| |
| data: MarkupData, |
| final_result: Option<Result>, |
| next_event: Option<Result>, |
| est: ElementStack, |
| pos: Vec<TextPosition>, |
| |
| encountered: Encountered, |
| inside_whitespace: bool, |
| read_prefix_separator: bool, |
| pop_namespace: bool, |
| } |
| |
| // Keeps track when XML declaration can happen |
| #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] |
| enum Encountered { |
| None = 0, |
| AnyChars, // whitespace before <?xml is not allowed |
| Declaration, |
| Comment, |
| Doctype, |
| Element, |
| } |
| |
| impl PullParser { |
| /// Returns a new parser using the given config. |
| #[inline] |
| pub fn new(config: impl Into<ParserConfig2>) -> PullParser { |
| let config = config.into(); |
| Self::new_with_config2(config) |
| } |
| |
| #[inline] |
| fn new_with_config2(config: ParserConfig2) -> PullParser { |
| let mut lexer = Lexer::new(&config); |
| if let Some(enc) = config.override_encoding { |
| lexer.set_encoding(enc); |
| } |
| |
| let mut pos = Vec::with_capacity(16); |
| pos.push(TextPosition::new()); |
| |
| PullParser { |
| config, |
| lexer, |
| st: State::DocumentStart, |
| state_after_reference: State::OutsideTag, |
| buf: String::new(), |
| entities: HashMap::new(), |
| nst: NamespaceStack::default(), |
| |
| data: MarkupData { |
| name: String::new(), |
| version: None, |
| encoding: None, |
| standalone: None, |
| ref_data: String::new(), |
| element_name: None, |
| quote: None, |
| attr_name: None, |
| attributes: AttributesSet::new(), |
| }, |
| final_result: None, |
| next_event: None, |
| est: Vec::new(), |
| pos, |
| |
| encountered: Encountered::None, |
| inside_whitespace: true, |
| read_prefix_separator: false, |
| pop_namespace: false, |
| } |
| } |
| |
| /// Checks if this parser ignores the end of stream errors. |
| pub fn is_ignoring_end_of_stream(&self) -> bool { self.config.c.ignore_end_of_stream } |
| |
| #[inline(never)] |
| fn set_encountered(&mut self, new_encounter: Encountered) -> Option<Result> { |
| if new_encounter <= self.encountered { |
| return None; |
| } |
| let prev_enc = self.encountered; |
| self.encountered = new_encounter; |
| |
| // If declaration was not parsed and we have encountered an element, |
| // emit this declaration as the next event. |
| if prev_enc == Encountered::None { |
| self.push_pos(); |
| Some(Ok(XmlEvent::StartDocument { |
| version: DEFAULT_VERSION, |
| encoding: self.lexer.encoding().to_string(), |
| standalone: DEFAULT_STANDALONE, |
| })) |
| } else { |
| None |
| } |
| } |
| } |
| |
| impl Position for PullParser { |
| /// Returns the position of the last event produced by the parser |
| #[inline] |
| fn position(&self) -> TextPosition { |
| self.pos[0] |
| } |
| } |
| |
| #[derive(Copy, Clone, PartialEq)] |
| pub enum State { |
| OutsideTag, |
| InsideOpeningTag(OpeningTagSubstate), |
| InsideClosingTag(ClosingTagSubstate), |
| InsideProcessingInstruction(ProcessingInstructionSubstate), |
| InsideComment, |
| InsideCData, |
| InsideDeclaration(DeclarationSubstate), |
| InsideDoctype(DoctypeSubstate), |
| InsideReference, |
| DocumentStart, |
| } |
| |
| #[derive(Copy, Clone, PartialEq)] |
| pub enum DoctypeSubstate { |
| Outside, |
| String, |
| InsideName, |
| BeforeEntityName, |
| EntityName, |
| BeforeEntityValue, |
| EntityValue, |
| NumericReferenceStart, |
| NumericReference, |
| /// expansion |
| PEReferenceInValue, |
| PEReferenceInDtd, |
| /// name definition |
| PEReferenceDefinitionStart, |
| PEReferenceDefinition, |
| SkipDeclaration, |
| Comment, |
| } |
| |
| #[derive(Copy, Clone, PartialEq)] |
| pub enum OpeningTagSubstate { |
| InsideName, |
| |
| InsideTag, |
| |
| InsideAttributeName, |
| AfterAttributeName, |
| |
| InsideAttributeValue, |
| AfterAttributeValue, |
| } |
| |
| #[derive(Copy, Clone, PartialEq)] |
| pub enum ClosingTagSubstate { |
| CTInsideName, |
| CTAfterName, |
| } |
| |
| #[derive(Copy, Clone, PartialEq)] |
| pub enum ProcessingInstructionSubstate { |
| PIInsideName, |
| PIInsideData, |
| } |
| |
| #[derive(Copy, Clone, PartialEq)] |
| pub enum DeclarationSubstate { |
| BeforeVersion, |
| InsideVersion, |
| AfterVersion, |
| |
| InsideVersionValue, |
| AfterVersionValue, |
| |
| BeforeEncoding, |
| InsideEncoding, |
| AfterEncoding, |
| |
| InsideEncodingValue, |
| AfterEncodingValue, |
| |
| BeforeStandaloneDecl, |
| InsideStandaloneDecl, |
| AfterStandaloneDecl, |
| |
| InsideStandaloneDeclValue, |
| AfterStandaloneDeclValue, |
| } |
| |
| #[derive(PartialEq)] |
| enum QualifiedNameTarget { |
| AttributeNameTarget, |
| OpeningTagNameTarget, |
| ClosingTagNameTarget, |
| } |
| |
| #[derive(Copy, Clone, PartialEq, Eq)] |
| enum QuoteToken { |
| SingleQuoteToken, |
| DoubleQuoteToken, |
| } |
| |
| impl QuoteToken { |
| fn from_token(t: &Token) -> QuoteToken { |
| match *t { |
| Token::SingleQuote => QuoteToken::SingleQuoteToken, |
| Token::DoubleQuote => QuoteToken::DoubleQuoteToken, |
| _ => panic!("Unexpected token: {t}"), |
| } |
| } |
| |
| fn as_token(self) -> Token { |
| match self { |
| QuoteToken::SingleQuoteToken => Token::SingleQuote, |
| QuoteToken::DoubleQuoteToken => Token::DoubleQuote, |
| } |
| } |
| } |
| |
| struct MarkupData { |
| name: String, // used for processing instruction name |
| ref_data: String, // used for reference content |
| |
| version: Option<XmlVersion>, // used for XML declaration version |
| encoding: Option<String>, // used for XML declaration encoding |
| standalone: Option<bool>, // used for XML declaration standalone parameter |
| |
| element_name: Option<OwnedName>, // used for element name |
| |
| quote: Option<QuoteToken>, // used to hold opening quote for attribute value |
| attr_name: Option<OwnedName>, // used to hold attribute name |
| attributes: AttributesSet, // used to hold all accumulated attributes |
| } |
| |
| impl PullParser { |
| /// Returns next event read from the given buffer. |
| /// |
| /// This method should be always called with the same buffer. If you call it |
| /// providing different buffers each time, the result will be undefined. |
| pub fn next<R: Read>(&mut self, r: &mut R) -> Result { |
| if let Some(ref ev) = self.final_result { |
| return ev.clone(); |
| } |
| |
| if let Some(ev) = self.next_event.take() { |
| return ev; |
| } |
| |
| if self.pop_namespace { |
| self.pop_namespace = false; |
| self.nst.pop(); |
| } |
| |
| loop { |
| debug_assert!(self.next_event.is_none()); |
| debug_assert!(!self.pop_namespace); |
| |
| // While lexer gives us Ok(maybe_token) -- we loop. |
| // Upon having a complete XML-event -- we return from the whole function. |
| match self.lexer.next_token(r) { |
| Ok(Some(token)) => { |
| match self.dispatch_token(token) { |
| None => {} // continue |
| Some(Ok(xml_event)) => { |
| self.next_pos(); |
| return Ok(xml_event) |
| }, |
| Some(Err(xml_error)) => { |
| self.next_pos(); |
| return self.set_final_result(Err(xml_error)) |
| }, |
| } |
| }, |
| Ok(None) => break, |
| Err(lexer_error) => { |
| return self.set_final_result(Err(lexer_error)) |
| }, |
| } |
| } |
| |
| self.handle_eof() |
| } |
| |
| /// Handle end of stream |
| fn handle_eof(&mut self) -> std::result::Result<XmlEvent, super::Error> { |
| // Forward pos to the lexer head |
| self.next_pos(); |
| let ev = if self.depth() == 0 { |
| if self.encountered == Encountered::Element && self.st == State::OutsideTag { // all is ok |
| Ok(XmlEvent::EndDocument) |
| } else if self.encountered < Encountered::Element { |
| self.error(SyntaxError::NoRootElement) |
| } else { // self.st != State::OutsideTag |
| self.error(SyntaxError::UnexpectedEof) // TODO: add expected hint? |
| } |
| } else if self.config.c.ignore_end_of_stream { |
| self.final_result = None; |
| self.lexer.reset_eof_handled(); |
| return self.error(SyntaxError::UnbalancedRootElement); |
| } else { |
| self.error(SyntaxError::UnbalancedRootElement) |
| }; |
| self.set_final_result(ev) |
| } |
| |
| // This function is to be called when a terminal event is reached. |
| // The function sets up the `self.final_result` into `Some(result)` and return `result`. |
| #[inline] |
| fn set_final_result(&mut self, result: Result) -> Result { |
| self.final_result = Some(result.clone()); |
| result |
| } |
| |
| #[cold] |
| fn error(&self, e: SyntaxError) -> Result { |
| Err(Error { |
| pos: self.lexer.position(), |
| kind: ErrorKind::Syntax(e.to_cow()), |
| }) |
| } |
| |
| #[inline] |
| fn next_pos(&mut self) { |
| // unfortunately calls to next_pos will never be perfectly balanced with push_pos, |
| // at very least because parse errors and EOF can happen unexpectedly without a prior push. |
| if !self.pos.is_empty() { |
| if self.pos.len() > 1 { |
| self.pos.remove(0); |
| } else { |
| self.pos[0] = self.lexer.position(); |
| } |
| } |
| } |
| |
| #[inline] |
| #[track_caller] |
| fn push_pos(&mut self) { |
| debug_assert!(self.pos.len() != self.pos.capacity(), "You've found a bug in xml-rs, caused by calls to push_pos() in states that don't end up emitting events. |
| This case is ignored in release mode, and merely causes document positions to be out of sync. |
| Please file a bug and include the XML document that triggers this assert."); |
| |
| // it has capacity preallocated for more than it ever needs, so this reduces code size |
| if self.pos.len() != self.pos.capacity() { |
| self.pos.push(self.lexer.position()); |
| } else if self.pos.len() > 1 { |
| self.pos.remove(0); // this mitigates the excessive push_pos() call |
| } |
| } |
| |
| #[inline(never)] |
| fn dispatch_token(&mut self, t: Token) -> Option<Result> { |
| match self.st { |
| State::OutsideTag => self.outside_tag(t), |
| State::InsideOpeningTag(s) => self.inside_opening_tag(t, s), |
| State::InsideClosingTag(s) => self.inside_closing_tag_name(t, s), |
| State::InsideReference => self.inside_reference(t), |
| State::InsideComment => self.inside_comment(t), |
| State::InsideCData => self.inside_cdata(t), |
| State::InsideProcessingInstruction(s) => self.inside_processing_instruction(t, s), |
| State::InsideDoctype(s) => self.inside_doctype(t, s), |
| State::InsideDeclaration(s) => self.inside_declaration(t, s), |
| State::DocumentStart => self.document_start(t), |
| } |
| } |
| |
| #[inline] |
| fn depth(&self) -> usize { |
| self.est.len() |
| } |
| |
| #[inline] |
| fn buf_has_data(&self) -> bool { |
| !self.buf.is_empty() |
| } |
| |
| #[inline] |
| fn take_buf(&mut self) -> String { |
| std::mem::take(&mut self.buf) |
| } |
| |
| #[inline] |
| fn into_state(&mut self, st: State, ev: Option<Result>) -> Option<Result> { |
| self.st = st; |
| ev |
| } |
| |
| #[inline] |
| fn into_state_continue(&mut self, st: State) -> Option<Result> { |
| self.into_state(st, None) |
| } |
| |
| #[inline] |
| fn into_state_emit(&mut self, st: State, ev: Result) -> Option<Result> { |
| self.into_state(st, Some(ev)) |
| } |
| |
| /// Dispatches tokens in order to process qualified name. If qualified name cannot be parsed, |
| /// an error is returned. |
| /// |
| /// # Parameters |
| /// * `t` --- next token; |
| /// * `on_name` --- a callback which is executed when whitespace is encountered. |
| fn read_qualified_name<F>(&mut self, t: Token, target: QualifiedNameTarget, on_name: F) -> Option<Result> |
| where F: Fn(&mut PullParser, Token, OwnedName) -> Option<Result> { |
| // We can get here for the first time only when self.data.name contains zero or one character, |
| // but first character cannot be a colon anyway |
| if self.buf.len() <= 1 { |
| self.read_prefix_separator = false; |
| } |
| |
| let invoke_callback = move |this: &mut PullParser, t| { |
| let name = this.take_buf(); |
| match name.parse() { |
| Ok(name) => on_name(this, t, name), |
| Err(_) => Some(this.error(SyntaxError::InvalidQualifiedName(name.into()))), |
| } |
| }; |
| |
| match t { |
| // There can be only one colon, and not as the first character |
| Token::Character(':') if self.buf_has_data() && !self.read_prefix_separator => { |
| self.buf.push(':'); |
| self.read_prefix_separator = true; |
| None |
| } |
| |
| Token::Character(c) if c != ':' && (self.buf.is_empty() && is_name_start_char(c) || |
| self.buf_has_data() && is_name_char(c)) => { |
| if self.buf.len() > self.config.max_name_length { |
| return Some(self.error(SyntaxError::ExceededConfiguredLimit)); |
| } |
| self.buf.push(c); |
| None |
| }, |
| |
| Token::EqualsSign if target == QualifiedNameTarget::AttributeNameTarget => invoke_callback(self, t), |
| |
| Token::EmptyTagEnd if target == QualifiedNameTarget::OpeningTagNameTarget => invoke_callback(self, t), |
| |
| Token::TagEnd if target == QualifiedNameTarget::OpeningTagNameTarget || |
| target == QualifiedNameTarget::ClosingTagNameTarget => invoke_callback(self, t), |
| |
| Token::Character(c) if is_whitespace_char(c) => invoke_callback(self, t), |
| |
| _ => Some(self.error(SyntaxError::UnexpectedQualifiedName(t))), |
| } |
| } |
| |
| /// Dispatches tokens in order to process attribute value. |
| /// |
| /// # Parameters |
| /// * `t` --- next token; |
| /// * `on_value` --- a callback which is called when terminating quote is encountered. |
| fn read_attribute_value<F>(&mut self, t: Token, on_value: F) -> Option<Result> |
| where F: Fn(&mut PullParser, String) -> Option<Result> { |
| match t { |
| Token::Character(c) if self.data.quote.is_none() && is_whitespace_char(c) => None, // skip leading whitespace |
| |
| Token::DoubleQuote | Token::SingleQuote => match self.data.quote { |
| None => { // Entered attribute value |
| self.data.quote = Some(QuoteToken::from_token(&t)); |
| None |
| } |
| Some(q) if q.as_token() == t => { |
| self.data.quote = None; |
| let value = self.take_buf(); |
| on_value(self, value) |
| } |
| _ => { |
| if let Token::Character(c) = t { |
| if !self.is_valid_xml_char_not_restricted(c) { |
| return Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32))); |
| } |
| } |
| if self.buf.len() > self.config.max_attribute_length { |
| return Some(self.error(SyntaxError::ExceededConfiguredLimit)); |
| } |
| t.push_to_string(&mut self.buf); |
| None |
| } |
| }, |
| |
| Token::ReferenceStart if self.data.quote.is_some() => { |
| self.state_after_reference = self.st; |
| self.into_state_continue(State::InsideReference) |
| }, |
| |
| Token::OpeningTagStart => Some(self.error(SyntaxError::UnexpectedOpeningTag)), |
| |
| Token::Character(c) if !self.is_valid_xml_char_not_restricted(c) => { |
| Some(self.error(SyntaxError::InvalidCharacterEntity(c as u32))) |
| }, |
| |
| // Every character except " and ' and < is okay |
| _ if self.data.quote.is_some() => { |
| if self.buf.len() > self.config.max_attribute_length { |
| return Some(self.error(SyntaxError::ExceededConfiguredLimit)); |
| } |
| t.push_to_string(&mut self.buf); |
| None |
| } |
| |
| _ => Some(self.error(SyntaxError::UnexpectedToken(t))), |
| } |
| } |
| |
| fn emit_start_element(&mut self, emit_end_element: bool) -> Option<Result> { |
| let mut name = self.data.take_element_name()?; |
| let mut attributes = self.data.take_attributes().into_vec(); |
| |
| // check whether the name prefix is bound and fix its namespace |
| match self.nst.get(name.borrow().prefix_repr()) { |
| Some("") => name.namespace = None, // default namespace |
| Some(ns) => name.namespace = Some(ns.into()), |
| None => return Some(self.error(SyntaxError::UnboundElementPrefix(name.to_string().into()))) |
| } |
| |
| // check and fix accumulated attributes prefixes |
| for attr in &mut attributes { |
| if let Some(ref pfx) = attr.name.prefix { |
| let new_ns = match self.nst.get(pfx) { |
| Some("") => None, // default namespace |
| Some(ns) => Some(ns.into()), |
| None => return Some(self.error(SyntaxError::UnboundAttribute(attr.name.to_string().into()))) |
| }; |
| attr.name.namespace = new_ns; |
| } |
| } |
| |
| if emit_end_element { |
| self.pop_namespace = true; |
| self.next_event = Some(Ok(XmlEvent::EndElement { |
| name: name.clone() |
| })); |
| } else { |
| self.est.push(name.clone()); |
| } |
| let namespace = self.nst.squash(); |
| self.into_state_emit(State::OutsideTag, Ok(XmlEvent::StartElement { |
| name, |
| attributes, |
| namespace |
| })) |
| } |
| |
| fn emit_end_element(&mut self) -> Option<Result> { |
| let mut name = self.data.take_element_name()?; |
| |
| // check whether the name prefix is bound and fix its namespace |
| match self.nst.get(name.borrow().prefix_repr()) { |
| Some("") => name.namespace = None, // default namespace |
| Some(ns) => name.namespace = Some(ns.into()), |
| None => return Some(self.error(SyntaxError::UnboundElementPrefix(name.to_string().into()))) |
| } |
| |
| let op_name = self.est.pop()?; |
| |
| if name == op_name { |
| self.pop_namespace = true; |
| self.into_state_emit(State::OutsideTag, Ok(XmlEvent::EndElement { name })) |
| } else { |
| Some(self.error(SyntaxError::UnexpectedClosingTag(format!("{name} != {op_name}").into()))) |
| } |
| } |
| |
| #[inline] |
| fn is_valid_xml_char(&self, c: char) -> bool { |
| if Some(XmlVersion::Version11) == self.data.version { |
| is_xml11_char(c) |
| } else { |
| is_xml10_char(c) |
| } |
| } |
| |
| #[inline] |
| fn is_valid_xml_char_not_restricted(&self, c: char) -> bool { |
| if Some(XmlVersion::Version11) == self.data.version { |
| is_xml11_char_not_restricted(c) |
| } else { |
| is_xml10_char(c) |
| } |
| } |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use std::io::BufReader; |
| use crate::attribute::OwnedAttribute; |
| use crate::common::TextPosition; |
| use crate::name::OwnedName; |
| use crate::reader::events::XmlEvent; |
| use crate::reader::parser::PullParser; |
| use crate::reader::ParserConfig; |
| |
| fn new_parser() -> PullParser { |
| PullParser::new(ParserConfig::new()) |
| } |
| |
| macro_rules! expect_event( |
| ($r:expr, $p:expr, $t:pat) => ( |
| match $p.next(&mut $r) { |
| $t => {} |
| e => panic!("Unexpected event: {e:?}\nExpected: {}", stringify!($t)) |
| } |
| ); |
| ($r:expr, $p:expr, $t:pat => $c:expr ) => ( |
| match $p.next(&mut $r) { |
| $t if $c => {} |
| e => panic!("Unexpected event: {e:?}\nExpected: {} if {}", stringify!($t), stringify!($c)) |
| } |
| ) |
| ); |
| |
| macro_rules! test_data( |
| ($d:expr) => ({ |
| static DATA: &'static str = $d; |
| let r = BufReader::new(DATA.as_bytes()); |
| let p = new_parser(); |
| (r, p) |
| }) |
| ); |
| |
| #[test] |
| fn issue_3_semicolon_in_attribute_value() { |
| let (mut r, mut p) = test_data!(r#" |
| <a attr="zzz;zzz" /> |
| "#); |
| |
| expect_event!(r, p, Ok(XmlEvent::StartDocument { .. })); |
| expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, ref attributes, ref namespace }) => |
| *name == OwnedName::local("a") && |
| attributes.len() == 1 && |
| attributes[0] == OwnedAttribute::new(OwnedName::local("attr"), "zzz;zzz") && |
| namespace.is_essentially_empty() |
| ); |
| expect_event!(r, p, Ok(XmlEvent::EndElement { ref name }) => *name == OwnedName::local("a")); |
| expect_event!(r, p, Ok(XmlEvent::EndDocument)); |
| } |
| |
| #[test] |
| fn issue_140_entity_reference_inside_tag() { |
| let (mut r, mut p) = test_data!(r#" |
| <bla>♫</bla> |
| "#); |
| |
| expect_event!(r, p, Ok(XmlEvent::StartDocument { .. })); |
| expect_event!(r, p, Ok(XmlEvent::StartElement { ref name, .. }) => *name == OwnedName::local("bla")); |
| expect_event!(r, p, Ok(XmlEvent::Characters(ref s)) => s == "\u{266b}"); |
| expect_event!(r, p, Ok(XmlEvent::EndElement { ref name, .. }) => *name == OwnedName::local("bla")); |
| expect_event!(r, p, Ok(XmlEvent::EndDocument)); |
| } |
| |
| #[test] |
| fn issue_220_comment() { |
| let (mut r, mut p) = test_data!(r#"<x><!-- <!--></x>"#); |
| expect_event!(r, p, Ok(XmlEvent::StartDocument { .. })); |
| expect_event!(r, p, Ok(XmlEvent::StartElement { .. })); |
| expect_event!(r, p, Ok(XmlEvent::EndElement { .. })); |
| expect_event!(r, p, Ok(XmlEvent::EndDocument)); |
| |
| let (mut r, mut p) = test_data!(r#"<x><!-- <!---></x>"#); |
| expect_event!(r, p, Ok(XmlEvent::StartDocument { .. })); |
| expect_event!(r, p, Ok(XmlEvent::StartElement { .. })); |
| expect_event!(r, p, Err(_)); // ---> is forbidden in comments |
| |
| let (mut r, mut p) = test_data!(r#"<x><!--<text&x;> <!--></x>"#); |
| p.config.c.ignore_comments = false; |
| expect_event!(r, p, Ok(XmlEvent::StartDocument { .. })); |
| expect_event!(r, p, Ok(XmlEvent::StartElement { .. })); |
| expect_event!(r, p, Ok(XmlEvent::Comment(s)) => s == "<text&x;> <!"); |
| expect_event!(r, p, Ok(XmlEvent::EndElement { .. })); |
| expect_event!(r, p, Ok(XmlEvent::EndDocument)); |
| } |
| |
| #[test] |
| fn malformed_declaration_attrs() { |
| let (mut r, mut p) = test_data!(r#"<?xml version x="1.0"?>"#); |
| expect_event!(r, p, Err(_)); |
| |
| let (mut r, mut p) = test_data!(r#"<?xml version="1.0" version="1.0"?>"#); |
| expect_event!(r, p, Err(_)); |
| |
| let (mut r, mut p) = test_data!(r#"<?xml version="1.0"encoding="utf-8"?>"#); |
| expect_event!(r, p, Err(_)); |
| |
| let (mut r, mut p) = test_data!(r#"<?xml version="1.0"standalone="yes"?>"#); |
| expect_event!(r, p, Err(_)); |
| |
| let (mut r, mut p) = test_data!(r#"<?xml version="1.0" encoding="utf-8"standalone="yes"?>"#); |
| expect_event!(r, p, Err(_)); |
| } |
| |
| #[test] |
| fn opening_tag_in_attribute_value() { |
| use crate::reader::error::{SyntaxError, Error, ErrorKind}; |
| |
| let (mut r, mut p) = test_data!(r#" |
| <a attr="zzz<zzz" /> |
| "#); |
| |
| expect_event!(r, p, Ok(XmlEvent::StartDocument { .. })); |
| expect_event!(r, p, Err(ref e) => |
| *e == Error { |
| kind: ErrorKind::Syntax(SyntaxError::UnexpectedOpeningTag.to_cow()), |
| pos: TextPosition { row: 1, column: 24 } |
| } |
| ); |
| } |
| |
| #[test] |
| fn reference_err() { |
| let (mut r, mut p) = test_data!(r#" |
| <a>&&</a> |
| "#); |
| |
| expect_event!(r, p, Ok(XmlEvent::StartDocument { .. })); |
| expect_event!(r, p, Ok(XmlEvent::StartElement { .. })); |
| expect_event!(r, p, Err(_)); |
| } |
| |
| #[test] |
| fn state_size() { |
| assert_eq!(2, std::mem::size_of::<super::State>()); |
| assert_eq!(1, std::mem::size_of::<super::DoctypeSubstate>()); |
| } |
| } |