| use std::collections::VecDeque; |
| use std::error::Error; |
| use std::{char, fmt}; |
| |
| #[derive(Clone, Copy, PartialEq, Debug, Eq)] |
| pub enum TEncoding { |
| Utf8, |
| } |
| |
| #[derive(Clone, Copy, PartialEq, Debug, Eq)] |
| pub enum TScalarStyle { |
| Any, |
| Plain, |
| SingleQuoted, |
| DoubleQuoted, |
| |
| Literal, |
| Foled, |
| } |
| |
| #[derive(Clone, Copy, PartialEq, Debug, Eq)] |
| pub struct Marker { |
| index: usize, |
| line: usize, |
| col: usize, |
| } |
| |
| impl Marker { |
| fn new(index: usize, line: usize, col: usize) -> Marker { |
| Marker { index, line, col } |
| } |
| |
| pub fn index(&self) -> usize { |
| self.index |
| } |
| |
| pub fn line(&self) -> usize { |
| self.line |
| } |
| |
| pub fn col(&self) -> usize { |
| self.col |
| } |
| } |
| |
| #[derive(Clone, PartialEq, Debug, Eq)] |
| pub struct ScanError { |
| mark: Marker, |
| info: String, |
| } |
| |
| impl ScanError { |
| pub fn new(loc: Marker, info: &str) -> ScanError { |
| ScanError { |
| mark: loc, |
| info: info.to_owned(), |
| } |
| } |
| |
| pub fn marker(&self) -> &Marker { |
| &self.mark |
| } |
| } |
| |
| impl Error for ScanError { |
| fn description(&self) -> &str { |
| self.info.as_ref() |
| } |
| |
| fn cause(&self) -> Option<&dyn Error> { |
| None |
| } |
| } |
| |
| impl fmt::Display for ScanError { |
| // col starts from 0 |
| fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { |
| write!( |
| formatter, |
| "{} at line {} column {}", |
| self.info, |
| self.mark.line, |
| self.mark.col + 1 |
| ) |
| } |
| } |
| |
| #[derive(Clone, PartialEq, Debug, Eq)] |
| pub enum TokenType { |
| NoToken, |
| StreamStart(TEncoding), |
| StreamEnd, |
| /// major, minor |
| VersionDirective(u32, u32), |
| /// handle, prefix |
| TagDirective(String, String), |
| DocumentStart, |
| DocumentEnd, |
| BlockSequenceStart, |
| BlockMappingStart, |
| BlockEnd, |
| FlowSequenceStart, |
| FlowSequenceEnd, |
| FlowMappingStart, |
| FlowMappingEnd, |
| BlockEntry, |
| FlowEntry, |
| Key, |
| Value, |
| Alias(String), |
| Anchor(String), |
| /// handle, suffix |
| Tag(String, String), |
| Scalar(TScalarStyle, String), |
| } |
| |
| #[derive(Clone, PartialEq, Debug, Eq)] |
| pub struct Token(pub Marker, pub TokenType); |
| |
| #[derive(Clone, PartialEq, Debug, Eq)] |
| struct SimpleKey { |
| possible: bool, |
| required: bool, |
| token_number: usize, |
| mark: Marker, |
| } |
| |
| impl SimpleKey { |
| fn new(mark: Marker) -> SimpleKey { |
| SimpleKey { |
| possible: false, |
| required: false, |
| token_number: 0, |
| mark, |
| } |
| } |
| } |
| |
| #[derive(Debug)] |
| pub struct Scanner<T> { |
| rdr: T, |
| mark: Marker, |
| tokens: VecDeque<Token>, |
| buffer: VecDeque<char>, |
| error: Option<ScanError>, |
| |
| stream_start_produced: bool, |
| stream_end_produced: bool, |
| adjacent_value_allowed_at: usize, |
| simple_key_allowed: bool, |
| simple_keys: Vec<SimpleKey>, |
| indent: isize, |
| indents: Vec<isize>, |
| flow_level: u8, |
| tokens_parsed: usize, |
| token_available: bool, |
| } |
| |
| impl<T: Iterator<Item = char>> Iterator for Scanner<T> { |
| type Item = Token; |
| fn next(&mut self) -> Option<Token> { |
| if self.error.is_some() { |
| return None; |
| } |
| match self.next_token() { |
| Ok(tok) => tok, |
| Err(e) => { |
| self.error = Some(e); |
| None |
| } |
| } |
| } |
| } |
| |
| #[inline] |
| fn is_z(c: char) -> bool { |
| c == '\0' |
| } |
| #[inline] |
| fn is_break(c: char) -> bool { |
| c == '\n' || c == '\r' |
| } |
| #[inline] |
| fn is_breakz(c: char) -> bool { |
| is_break(c) || is_z(c) |
| } |
| #[inline] |
| fn is_blank(c: char) -> bool { |
| c == ' ' || c == '\t' |
| } |
| #[inline] |
| fn is_blankz(c: char) -> bool { |
| is_blank(c) || is_breakz(c) |
| } |
| #[inline] |
| fn is_digit(c: char) -> bool { |
| c >= '0' && c <= '9' |
| } |
| #[inline] |
| fn is_alpha(c: char) -> bool { |
| match c { |
| '0'..='9' | 'a'..='z' | 'A'..='Z' => true, |
| '_' | '-' => true, |
| _ => false, |
| } |
| } |
| #[inline] |
| fn is_hex(c: char) -> bool { |
| (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F') |
| } |
| #[inline] |
| fn as_hex(c: char) -> u32 { |
| match c { |
| '0'..='9' => (c as u32) - ('0' as u32), |
| 'a'..='f' => (c as u32) - ('a' as u32) + 10, |
| 'A'..='F' => (c as u32) - ('A' as u32) + 10, |
| _ => unreachable!(), |
| } |
| } |
| #[inline] |
| fn is_flow(c: char) -> bool { |
| match c { |
| ',' | '[' | ']' | '{' | '}' => true, |
| _ => false, |
| } |
| } |
| |
| pub type ScanResult = Result<(), ScanError>; |
| |
| impl<T: Iterator<Item = char>> Scanner<T> { |
| /// Creates the YAML tokenizer. |
| pub fn new(rdr: T) -> Scanner<T> { |
| Scanner { |
| rdr, |
| buffer: VecDeque::new(), |
| mark: Marker::new(0, 1, 0), |
| tokens: VecDeque::new(), |
| error: None, |
| |
| stream_start_produced: false, |
| stream_end_produced: false, |
| adjacent_value_allowed_at: 0, |
| simple_key_allowed: true, |
| simple_keys: Vec::new(), |
| indent: -1, |
| indents: Vec::new(), |
| flow_level: 0, |
| tokens_parsed: 0, |
| token_available: false, |
| } |
| } |
| #[inline] |
| pub fn get_error(&self) -> Option<ScanError> { |
| match self.error { |
| None => None, |
| Some(ref e) => Some(e.clone()), |
| } |
| } |
| |
| #[inline] |
| fn lookahead(&mut self, count: usize) { |
| if self.buffer.len() >= count { |
| return; |
| } |
| for _ in 0..(count - self.buffer.len()) { |
| self.buffer.push_back(self.rdr.next().unwrap_or('\0')); |
| } |
| } |
| #[inline] |
| fn skip(&mut self) { |
| let c = self.buffer.pop_front().unwrap(); |
| |
| self.mark.index += 1; |
| if c == '\n' { |
| self.mark.line += 1; |
| self.mark.col = 0; |
| } else { |
| self.mark.col += 1; |
| } |
| } |
| #[inline] |
| fn skip_line(&mut self) { |
| if self.buffer[0] == '\r' && self.buffer[1] == '\n' { |
| self.skip(); |
| self.skip(); |
| } else if is_break(self.buffer[0]) { |
| self.skip(); |
| } |
| } |
| #[inline] |
| fn ch(&self) -> char { |
| self.buffer[0] |
| } |
| #[inline] |
| fn ch_is(&self, c: char) -> bool { |
| self.buffer[0] == c |
| } |
| #[allow(dead_code)] |
| #[inline] |
| fn eof(&self) -> bool { |
| self.ch_is('\0') |
| } |
| #[inline] |
| pub fn stream_started(&self) -> bool { |
| self.stream_start_produced |
| } |
| #[inline] |
| pub fn stream_ended(&self) -> bool { |
| self.stream_end_produced |
| } |
| #[inline] |
| pub fn mark(&self) -> Marker { |
| self.mark |
| } |
| #[inline] |
| fn read_break(&mut self, s: &mut String) { |
| if self.buffer[0] == '\r' && self.buffer[1] == '\n' { |
| s.push('\n'); |
| self.skip(); |
| self.skip(); |
| } else if self.buffer[0] == '\r' || self.buffer[0] == '\n' { |
| s.push('\n'); |
| self.skip(); |
| } else { |
| unreachable!(); |
| } |
| } |
| fn insert_token(&mut self, pos: usize, tok: Token) { |
| let old_len = self.tokens.len(); |
| assert!(pos <= old_len); |
| self.tokens.push_back(tok); |
| for i in 0..old_len - pos { |
| self.tokens.swap(old_len - i, old_len - i - 1); |
| } |
| } |
| fn allow_simple_key(&mut self) { |
| self.simple_key_allowed = true; |
| } |
| fn disallow_simple_key(&mut self) { |
| self.simple_key_allowed = false; |
| } |
| |
| pub fn fetch_next_token(&mut self) -> ScanResult { |
| self.lookahead(1); |
| // println!("--> fetch_next_token Cur {:?} {:?}", self.mark, self.ch()); |
| |
| if !self.stream_start_produced { |
| self.fetch_stream_start(); |
| return Ok(()); |
| } |
| self.skip_to_next_token(); |
| |
| self.stale_simple_keys()?; |
| |
| let mark = self.mark; |
| self.unroll_indent(mark.col as isize); |
| |
| self.lookahead(4); |
| |
| if is_z(self.ch()) { |
| self.fetch_stream_end()?; |
| return Ok(()); |
| } |
| |
| // Is it a directive? |
| if self.mark.col == 0 && self.ch_is('%') { |
| return self.fetch_directive(); |
| } |
| |
| if self.mark.col == 0 |
| && self.buffer[0] == '-' |
| && self.buffer[1] == '-' |
| && self.buffer[2] == '-' |
| && is_blankz(self.buffer[3]) |
| { |
| self.fetch_document_indicator(TokenType::DocumentStart)?; |
| return Ok(()); |
| } |
| |
| if self.mark.col == 0 |
| && self.buffer[0] == '.' |
| && self.buffer[1] == '.' |
| && self.buffer[2] == '.' |
| && is_blankz(self.buffer[3]) |
| { |
| self.fetch_document_indicator(TokenType::DocumentEnd)?; |
| return Ok(()); |
| } |
| |
| let c = self.buffer[0]; |
| let nc = self.buffer[1]; |
| match c { |
| '[' => self.fetch_flow_collection_start(TokenType::FlowSequenceStart), |
| '{' => self.fetch_flow_collection_start(TokenType::FlowMappingStart), |
| ']' => self.fetch_flow_collection_end(TokenType::FlowSequenceEnd), |
| '}' => self.fetch_flow_collection_end(TokenType::FlowMappingEnd), |
| ',' => self.fetch_flow_entry(), |
| '-' if is_blankz(nc) => self.fetch_block_entry(), |
| '?' if is_blankz(nc) => self.fetch_key(), |
| ':' if is_blankz(nc) |
| || (self.flow_level > 0 |
| && (is_flow(nc) || self.mark.index == self.adjacent_value_allowed_at)) => |
| { |
| self.fetch_value() |
| } |
| // Is it an alias? |
| '*' => self.fetch_anchor(true), |
| // Is it an anchor? |
| '&' => self.fetch_anchor(false), |
| '!' => self.fetch_tag(), |
| // Is it a literal scalar? |
| '|' if self.flow_level == 0 => self.fetch_block_scalar(true), |
| // Is it a folded scalar? |
| '>' if self.flow_level == 0 => self.fetch_block_scalar(false), |
| '\'' => self.fetch_flow_scalar(true), |
| '"' => self.fetch_flow_scalar(false), |
| // plain scalar |
| '-' if !is_blankz(nc) => self.fetch_plain_scalar(), |
| ':' | '?' if !is_blankz(nc) && self.flow_level == 0 => self.fetch_plain_scalar(), |
| '%' | '@' | '`' => Err(ScanError::new( |
| self.mark, |
| &format!("unexpected character: `{}'", c), |
| )), |
| _ => self.fetch_plain_scalar(), |
| } |
| } |
| |
| pub fn next_token(&mut self) -> Result<Option<Token>, ScanError> { |
| if self.stream_end_produced { |
| return Ok(None); |
| } |
| |
| if !self.token_available { |
| self.fetch_more_tokens()?; |
| } |
| let t = self.tokens.pop_front().unwrap(); |
| self.token_available = false; |
| self.tokens_parsed += 1; |
| |
| if let TokenType::StreamEnd = t.1 { |
| self.stream_end_produced = true; |
| } |
| Ok(Some(t)) |
| } |
| |
| pub fn fetch_more_tokens(&mut self) -> ScanResult { |
| let mut need_more; |
| loop { |
| need_more = false; |
| if self.tokens.is_empty() { |
| need_more = true; |
| } else { |
| self.stale_simple_keys()?; |
| for sk in &self.simple_keys { |
| if sk.possible && sk.token_number == self.tokens_parsed { |
| need_more = true; |
| break; |
| } |
| } |
| } |
| |
| if !need_more { |
| break; |
| } |
| self.fetch_next_token()?; |
| } |
| self.token_available = true; |
| |
| Ok(()) |
| } |
| |
| fn stale_simple_keys(&mut self) -> ScanResult { |
| for sk in &mut self.simple_keys { |
| if sk.possible |
| && (sk.mark.line < self.mark.line || sk.mark.index + 1024 < self.mark.index) |
| { |
| if sk.required { |
| return Err(ScanError::new(self.mark, "simple key expect ':'")); |
| } |
| sk.possible = false; |
| } |
| } |
| Ok(()) |
| } |
| |
| fn skip_to_next_token(&mut self) { |
| loop { |
| self.lookahead(1); |
| // TODO(chenyh) BOM |
| match self.ch() { |
| ' ' => self.skip(), |
| '\t' if self.flow_level > 0 || !self.simple_key_allowed => self.skip(), |
| '\n' | '\r' => { |
| self.lookahead(2); |
| self.skip_line(); |
| if self.flow_level == 0 { |
| self.allow_simple_key(); |
| } |
| } |
| '#' => { |
| while !is_breakz(self.ch()) { |
| self.skip(); |
| self.lookahead(1); |
| } |
| } |
| _ => break, |
| } |
| } |
| } |
| |
| fn fetch_stream_start(&mut self) { |
| let mark = self.mark; |
| self.indent = -1; |
| self.stream_start_produced = true; |
| self.allow_simple_key(); |
| self.tokens |
| .push_back(Token(mark, TokenType::StreamStart(TEncoding::Utf8))); |
| self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0))); |
| } |
| |
| fn fetch_stream_end(&mut self) -> ScanResult { |
| // force new line |
| if self.mark.col != 0 { |
| self.mark.col = 0; |
| self.mark.line += 1; |
| } |
| |
| self.unroll_indent(-1); |
| self.remove_simple_key()?; |
| self.disallow_simple_key(); |
| |
| self.tokens |
| .push_back(Token(self.mark, TokenType::StreamEnd)); |
| Ok(()) |
| } |
| |
| fn fetch_directive(&mut self) -> ScanResult { |
| self.unroll_indent(-1); |
| self.remove_simple_key()?; |
| |
| self.disallow_simple_key(); |
| |
| let tok = self.scan_directive()?; |
| |
| self.tokens.push_back(tok); |
| |
| Ok(()) |
| } |
| |
| fn scan_directive(&mut self) -> Result<Token, ScanError> { |
| let start_mark = self.mark; |
| self.skip(); |
| |
| let name = self.scan_directive_name()?; |
| let tok = match name.as_ref() { |
| "YAML" => self.scan_version_directive_value(&start_mark)?, |
| "TAG" => self.scan_tag_directive_value(&start_mark)?, |
| // XXX This should be a warning instead of an error |
| _ => { |
| // skip current line |
| self.lookahead(1); |
| while !is_breakz(self.ch()) { |
| self.skip(); |
| self.lookahead(1); |
| } |
| // XXX return an empty TagDirective token |
| Token( |
| start_mark, |
| TokenType::TagDirective(String::new(), String::new()), |
| ) |
| // return Err(ScanError::new(start_mark, |
| // "while scanning a directive, found unknown directive name")) |
| } |
| }; |
| self.lookahead(1); |
| |
| while is_blank(self.ch()) { |
| self.skip(); |
| self.lookahead(1); |
| } |
| |
| if self.ch() == '#' { |
| while !is_breakz(self.ch()) { |
| self.skip(); |
| self.lookahead(1); |
| } |
| } |
| |
| if !is_breakz(self.ch()) { |
| return Err(ScanError::new( |
| start_mark, |
| "while scanning a directive, did not find expected comment or line break", |
| )); |
| } |
| |
| // Eat a line break |
| if is_break(self.ch()) { |
| self.lookahead(2); |
| self.skip_line(); |
| } |
| |
| Ok(tok) |
| } |
| |
| fn scan_version_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> { |
| self.lookahead(1); |
| |
| while is_blank(self.ch()) { |
| self.skip(); |
| self.lookahead(1); |
| } |
| |
| let major = self.scan_version_directive_number(mark)?; |
| |
| if self.ch() != '.' { |
| return Err(ScanError::new( |
| *mark, |
| "while scanning a YAML directive, did not find expected digit or '.' character", |
| )); |
| } |
| |
| self.skip(); |
| |
| let minor = self.scan_version_directive_number(mark)?; |
| |
| Ok(Token(*mark, TokenType::VersionDirective(major, minor))) |
| } |
| |
| fn scan_directive_name(&mut self) -> Result<String, ScanError> { |
| let start_mark = self.mark; |
| let mut string = String::new(); |
| self.lookahead(1); |
| while is_alpha(self.ch()) { |
| string.push(self.ch()); |
| self.skip(); |
| self.lookahead(1); |
| } |
| |
| if string.is_empty() { |
| return Err(ScanError::new( |
| start_mark, |
| "while scanning a directive, could not find expected directive name", |
| )); |
| } |
| |
| if !is_blankz(self.ch()) { |
| return Err(ScanError::new( |
| start_mark, |
| "while scanning a directive, found unexpected non-alphabetical character", |
| )); |
| } |
| |
| Ok(string) |
| } |
| |
| fn scan_version_directive_number(&mut self, mark: &Marker) -> Result<u32, ScanError> { |
| let mut val = 0u32; |
| let mut length = 0usize; |
| self.lookahead(1); |
| while is_digit(self.ch()) { |
| if length + 1 > 9 { |
| return Err(ScanError::new( |
| *mark, |
| "while scanning a YAML directive, found extremely long version number", |
| )); |
| } |
| length += 1; |
| val = val * 10 + ((self.ch() as u32) - ('0' as u32)); |
| self.skip(); |
| self.lookahead(1); |
| } |
| |
| if length == 0 { |
| return Err(ScanError::new( |
| *mark, |
| "while scanning a YAML directive, did not find expected version number", |
| )); |
| } |
| |
| Ok(val) |
| } |
| |
| fn scan_tag_directive_value(&mut self, mark: &Marker) -> Result<Token, ScanError> { |
| self.lookahead(1); |
| /* Eat whitespaces. */ |
| while is_blank(self.ch()) { |
| self.skip(); |
| self.lookahead(1); |
| } |
| let handle = self.scan_tag_handle(true, mark)?; |
| |
| self.lookahead(1); |
| /* Eat whitespaces. */ |
| while is_blank(self.ch()) { |
| self.skip(); |
| self.lookahead(1); |
| } |
| |
| let is_secondary = handle == "!!"; |
| let prefix = self.scan_tag_uri(true, is_secondary, &String::new(), mark)?; |
| |
| self.lookahead(1); |
| |
| if is_blankz(self.ch()) { |
| Ok(Token(*mark, TokenType::TagDirective(handle, prefix))) |
| } else { |
| Err(ScanError::new( |
| *mark, |
| "while scanning TAG, did not find expected whitespace or line break", |
| )) |
| } |
| } |
| |
| fn fetch_tag(&mut self) -> ScanResult { |
| self.save_simple_key()?; |
| self.disallow_simple_key(); |
| |
| let tok = self.scan_tag()?; |
| self.tokens.push_back(tok); |
| Ok(()) |
| } |
| |
| fn scan_tag(&mut self) -> Result<Token, ScanError> { |
| let start_mark = self.mark; |
| let mut handle = String::new(); |
| let mut suffix; |
| let mut secondary = false; |
| |
| // Check if the tag is in the canonical form (verbatim). |
| self.lookahead(2); |
| |
| if self.buffer[1] == '<' { |
| // Eat '!<' |
| self.skip(); |
| self.skip(); |
| suffix = self.scan_tag_uri(false, false, &String::new(), &start_mark)?; |
| |
| if self.ch() != '>' { |
| return Err(ScanError::new( |
| start_mark, |
| "while scanning a tag, did not find the expected '>'", |
| )); |
| } |
| |
| self.skip(); |
| } else { |
| // The tag has either the '!suffix' or the '!handle!suffix' |
| handle = self.scan_tag_handle(false, &start_mark)?; |
| // Check if it is, indeed, handle. |
| if handle.len() >= 2 && handle.starts_with('!') && handle.ends_with('!') { |
| if handle == "!!" { |
| secondary = true; |
| } |
| suffix = self.scan_tag_uri(false, secondary, &String::new(), &start_mark)?; |
| } else { |
| suffix = self.scan_tag_uri(false, false, &handle, &start_mark)?; |
| handle = "!".to_owned(); |
| // A special case: the '!' tag. Set the handle to '' and the |
| // suffix to '!'. |
| if suffix.is_empty() { |
| handle.clear(); |
| suffix = "!".to_owned(); |
| } |
| } |
| } |
| |
| self.lookahead(1); |
| if is_blankz(self.ch()) { |
| // XXX: ex 7.2, an empty scalar can follow a secondary tag |
| Ok(Token(start_mark, TokenType::Tag(handle, suffix))) |
| } else { |
| Err(ScanError::new( |
| start_mark, |
| "while scanning a tag, did not find expected whitespace or line break", |
| )) |
| } |
| } |
| |
| fn scan_tag_handle(&mut self, directive: bool, mark: &Marker) -> Result<String, ScanError> { |
| let mut string = String::new(); |
| self.lookahead(1); |
| if self.ch() != '!' { |
| return Err(ScanError::new( |
| *mark, |
| "while scanning a tag, did not find expected '!'", |
| )); |
| } |
| |
| string.push(self.ch()); |
| self.skip(); |
| |
| self.lookahead(1); |
| while is_alpha(self.ch()) { |
| string.push(self.ch()); |
| self.skip(); |
| self.lookahead(1); |
| } |
| |
| // Check if the trailing character is '!' and copy it. |
| if self.ch() == '!' { |
| string.push(self.ch()); |
| self.skip(); |
| } else if directive && string != "!" { |
| // It's either the '!' tag or not really a tag handle. If it's a %TAG |
| // directive, it's an error. If it's a tag token, it must be a part of |
| // URI. |
| return Err(ScanError::new( |
| *mark, |
| "while parsing a tag directive, did not find expected '!'", |
| )); |
| } |
| Ok(string) |
| } |
| |
| fn scan_tag_uri( |
| &mut self, |
| directive: bool, |
| _is_secondary: bool, |
| head: &str, |
| mark: &Marker, |
| ) -> Result<String, ScanError> { |
| let mut length = head.len(); |
| let mut string = String::new(); |
| |
| // Copy the head if needed. |
| // Note that we don't copy the leading '!' character. |
| if length > 1 { |
| string.extend(head.chars().skip(1)); |
| } |
| |
| self.lookahead(1); |
| /* |
| * The set of characters that may appear in URI is as follows: |
| * |
| * '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&', |
| * '=', '+', '$', ',', '.', '!', '~', '*', '\'', '(', ')', '[', ']', |
| * '%'. |
| */ |
| while match self.ch() { |
| ';' | '/' | '?' | ':' | '@' | '&' => true, |
| '=' | '+' | '$' | ',' | '.' | '!' | '~' | '*' | '\'' | '(' | ')' | '[' | ']' => true, |
| '%' => true, |
| c if is_alpha(c) => true, |
| _ => false, |
| } { |
| // Check if it is a URI-escape sequence. |
| if self.ch() == '%' { |
| string.push(self.scan_uri_escapes(directive, mark)?); |
| } else { |
| string.push(self.ch()); |
| self.skip(); |
| } |
| |
| length += 1; |
| self.lookahead(1); |
| } |
| |
| if length == 0 { |
| return Err(ScanError::new( |
| *mark, |
| "while parsing a tag, did not find expected tag URI", |
| )); |
| } |
| |
| Ok(string) |
| } |
| |
| fn scan_uri_escapes(&mut self, _directive: bool, mark: &Marker) -> Result<char, ScanError> { |
| let mut width = 0usize; |
| let mut code = 0u32; |
| loop { |
| self.lookahead(3); |
| |
| if !(self.ch() == '%' && is_hex(self.buffer[1]) && is_hex(self.buffer[2])) { |
| return Err(ScanError::new( |
| *mark, |
| "while parsing a tag, did not find URI escaped octet", |
| )); |
| } |
| |
| let octet = (as_hex(self.buffer[1]) << 4) + as_hex(self.buffer[2]); |
| if width == 0 { |
| width = match octet { |
| _ if octet & 0x80 == 0x00 => 1, |
| _ if octet & 0xE0 == 0xC0 => 2, |
| _ if octet & 0xF0 == 0xE0 => 3, |
| _ if octet & 0xF8 == 0xF0 => 4, |
| _ => { |
| return Err(ScanError::new( |
| *mark, |
| "while parsing a tag, found an incorrect leading UTF-8 octet", |
| )); |
| } |
| }; |
| code = octet; |
| } else { |
| if octet & 0xc0 != 0x80 { |
| return Err(ScanError::new( |
| *mark, |
| "while parsing a tag, found an incorrect trailing UTF-8 octet", |
| )); |
| } |
| code = (code << 8) + octet; |
| } |
| |
| self.skip(); |
| self.skip(); |
| self.skip(); |
| |
| width -= 1; |
| if width == 0 { |
| break; |
| } |
| } |
| |
| match char::from_u32(code) { |
| Some(ch) => Ok(ch), |
| None => Err(ScanError::new( |
| *mark, |
| "while parsing a tag, found an invalid UTF-8 codepoint", |
| )), |
| } |
| } |
| |
| fn fetch_anchor(&mut self, alias: bool) -> ScanResult { |
| self.save_simple_key()?; |
| self.disallow_simple_key(); |
| |
| let tok = self.scan_anchor(alias)?; |
| |
| self.tokens.push_back(tok); |
| |
| Ok(()) |
| } |
| |
| fn scan_anchor(&mut self, alias: bool) -> Result<Token, ScanError> { |
| let mut string = String::new(); |
| let start_mark = self.mark; |
| |
| self.skip(); |
| self.lookahead(1); |
| while is_alpha(self.ch()) { |
| string.push(self.ch()); |
| self.skip(); |
| self.lookahead(1); |
| } |
| |
| if string.is_empty() |
| || match self.ch() { |
| c if is_blankz(c) => false, |
| '?' | ':' | ',' | ']' | '}' | '%' | '@' | '`' => false, |
| _ => true, |
| } |
| { |
| return Err(ScanError::new(start_mark, "while scanning an anchor or alias, did not find expected alphabetic or numeric character")); |
| } |
| |
| if alias { |
| Ok(Token(start_mark, TokenType::Alias(string))) |
| } else { |
| Ok(Token(start_mark, TokenType::Anchor(string))) |
| } |
| } |
| |
| fn fetch_flow_collection_start(&mut self, tok: TokenType) -> ScanResult { |
| // The indicators '[' and '{' may start a simple key. |
| self.save_simple_key()?; |
| |
| self.increase_flow_level()?; |
| |
| self.allow_simple_key(); |
| |
| let start_mark = self.mark; |
| self.skip(); |
| |
| self.tokens.push_back(Token(start_mark, tok)); |
| Ok(()) |
| } |
| |
| fn fetch_flow_collection_end(&mut self, tok: TokenType) -> ScanResult { |
| self.remove_simple_key()?; |
| self.decrease_flow_level(); |
| |
| self.disallow_simple_key(); |
| |
| let start_mark = self.mark; |
| self.skip(); |
| |
| self.tokens.push_back(Token(start_mark, tok)); |
| Ok(()) |
| } |
| |
| fn fetch_flow_entry(&mut self) -> ScanResult { |
| self.remove_simple_key()?; |
| self.allow_simple_key(); |
| |
| let start_mark = self.mark; |
| self.skip(); |
| |
| self.tokens |
| .push_back(Token(start_mark, TokenType::FlowEntry)); |
| Ok(()) |
| } |
| |
| fn increase_flow_level(&mut self) -> ScanResult { |
| self.simple_keys.push(SimpleKey::new(Marker::new(0, 0, 0))); |
| self.flow_level = self |
| .flow_level |
| .checked_add(1) |
| .ok_or_else(|| ScanError::new(self.mark, "recursion limit exceeded"))?; |
| Ok(()) |
| } |
| fn decrease_flow_level(&mut self) { |
| if self.flow_level > 0 { |
| self.flow_level -= 1; |
| self.simple_keys.pop().unwrap(); |
| } |
| } |
| |
| fn fetch_block_entry(&mut self) -> ScanResult { |
| if self.flow_level == 0 { |
| // Check if we are allowed to start a new entry. |
| if !self.simple_key_allowed { |
| return Err(ScanError::new( |
| self.mark, |
| "block sequence entries are not allowed in this context", |
| )); |
| } |
| |
| let mark = self.mark; |
| // generate BLOCK-SEQUENCE-START if indented |
| self.roll_indent(mark.col, None, TokenType::BlockSequenceStart, mark); |
| } else { |
| // - * only allowed in block |
| return Err(ScanError::new( |
| self.mark, |
| r#""-" is only valid inside a block"#, |
| )); |
| } |
| self.remove_simple_key()?; |
| self.allow_simple_key(); |
| |
| let start_mark = self.mark; |
| self.skip(); |
| |
| self.tokens |
| .push_back(Token(start_mark, TokenType::BlockEntry)); |
| Ok(()) |
| } |
| |
| fn fetch_document_indicator(&mut self, t: TokenType) -> ScanResult { |
| self.unroll_indent(-1); |
| self.remove_simple_key()?; |
| self.disallow_simple_key(); |
| |
| let mark = self.mark; |
| |
| self.skip(); |
| self.skip(); |
| self.skip(); |
| |
| self.tokens.push_back(Token(mark, t)); |
| Ok(()) |
| } |
| |
| fn fetch_block_scalar(&mut self, literal: bool) -> ScanResult { |
| self.save_simple_key()?; |
| self.allow_simple_key(); |
| let tok = self.scan_block_scalar(literal)?; |
| |
| self.tokens.push_back(tok); |
| Ok(()) |
| } |
| |
| fn scan_block_scalar(&mut self, literal: bool) -> Result<Token, ScanError> { |
| let start_mark = self.mark; |
| let mut chomping: i32 = 0; |
| let mut increment: usize = 0; |
| let mut indent: usize = 0; |
| let mut trailing_blank: bool; |
| let mut leading_blank: bool = false; |
| |
| let mut string = String::new(); |
| let mut leading_break = String::new(); |
| let mut trailing_breaks = String::new(); |
| |
| // skip '|' or '>' |
| self.skip(); |
| self.lookahead(1); |
| |
| if self.ch() == '+' || self.ch() == '-' { |
| if self.ch() == '+' { |
| chomping = 1; |
| } else { |
| chomping = -1; |
| } |
| self.skip(); |
| self.lookahead(1); |
| if is_digit(self.ch()) { |
| if self.ch() == '0' { |
| return Err(ScanError::new( |
| start_mark, |
| "while scanning a block scalar, found an indentation indicator equal to 0", |
| )); |
| } |
| increment = (self.ch() as usize) - ('0' as usize); |
| self.skip(); |
| } |
| } else if is_digit(self.ch()) { |
| if self.ch() == '0' { |
| return Err(ScanError::new( |
| start_mark, |
| "while scanning a block scalar, found an indentation indicator equal to 0", |
| )); |
| } |
| |
| increment = (self.ch() as usize) - ('0' as usize); |
| self.skip(); |
| self.lookahead(1); |
| if self.ch() == '+' || self.ch() == '-' { |
| if self.ch() == '+' { |
| chomping = 1; |
| } else { |
| chomping = -1; |
| } |
| self.skip(); |
| } |
| } |
| |
| // Eat whitespaces and comments to the end of the line. |
| self.lookahead(1); |
| |
| while is_blank(self.ch()) { |
| self.skip(); |
| self.lookahead(1); |
| } |
| |
| if self.ch() == '#' { |
| while !is_breakz(self.ch()) { |
| self.skip(); |
| self.lookahead(1); |
| } |
| } |
| |
| // Check if we are at the end of the line. |
| if !is_breakz(self.ch()) { |
| return Err(ScanError::new( |
| start_mark, |
| "while scanning a block scalar, did not find expected comment or line break", |
| )); |
| } |
| |
| if is_break(self.ch()) { |
| self.lookahead(2); |
| self.skip_line(); |
| } |
| |
| if increment > 0 { |
| indent = if self.indent >= 0 { |
| (self.indent + increment as isize) as usize |
| } else { |
| increment |
| } |
| } |
| // Scan the leading line breaks and determine the indentation level if needed. |
| self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?; |
| |
| self.lookahead(1); |
| |
| let start_mark = self.mark; |
| |
| while self.mark.col == indent && !is_z(self.ch()) { |
| // We are at the beginning of a non-empty line. |
| trailing_blank = is_blank(self.ch()); |
| if !literal && !leading_break.is_empty() && !leading_blank && !trailing_blank { |
| if trailing_breaks.is_empty() { |
| string.push(' '); |
| } |
| leading_break.clear(); |
| } else { |
| string.push_str(&leading_break); |
| leading_break.clear(); |
| } |
| |
| string.push_str(&trailing_breaks); |
| trailing_breaks.clear(); |
| |
| leading_blank = is_blank(self.ch()); |
| |
| while !is_breakz(self.ch()) { |
| string.push(self.ch()); |
| self.skip(); |
| self.lookahead(1); |
| } |
| // break on EOF |
| if is_z(self.ch()) { |
| break; |
| } |
| |
| self.lookahead(2); |
| self.read_break(&mut leading_break); |
| |
| // Eat the following indentation spaces and line breaks. |
| self.block_scalar_breaks(&mut indent, &mut trailing_breaks)?; |
| } |
| |
| // Chomp the tail. |
| if chomping != -1 { |
| string.push_str(&leading_break); |
| } |
| |
| if chomping == 1 { |
| string.push_str(&trailing_breaks); |
| } |
| |
| if literal { |
| Ok(Token( |
| start_mark, |
| TokenType::Scalar(TScalarStyle::Literal, string), |
| )) |
| } else { |
| Ok(Token( |
| start_mark, |
| TokenType::Scalar(TScalarStyle::Foled, string), |
| )) |
| } |
| } |
| |
| fn block_scalar_breaks(&mut self, indent: &mut usize, breaks: &mut String) -> ScanResult { |
| let mut max_indent = 0; |
| loop { |
| self.lookahead(1); |
| while (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == ' ' { |
| self.skip(); |
| self.lookahead(1); |
| } |
| |
| if self.mark.col > max_indent { |
| max_indent = self.mark.col; |
| } |
| |
| // Check for a tab character messing the indentation. |
| if (*indent == 0 || self.mark.col < *indent) && self.buffer[0] == '\t' { |
| return Err(ScanError::new(self.mark, |
| "while scanning a block scalar, found a tab character where an indentation space is expected")); |
| } |
| |
| if !is_break(self.ch()) { |
| break; |
| } |
| |
| self.lookahead(2); |
| // Consume the line break. |
| self.read_break(breaks); |
| } |
| |
| if *indent == 0 { |
| *indent = max_indent; |
| if *indent < (self.indent + 1) as usize { |
| *indent = (self.indent + 1) as usize; |
| } |
| if *indent < 1 { |
| *indent = 1; |
| } |
| } |
| Ok(()) |
| } |
| |
| fn fetch_flow_scalar(&mut self, single: bool) -> ScanResult { |
| self.save_simple_key()?; |
| self.disallow_simple_key(); |
| |
| let tok = self.scan_flow_scalar(single)?; |
| |
| // From spec: To ensure JSON compatibility, if a key inside a flow mapping is JSON-like, |
| // YAML allows the following value to be specified adjacent to the “:”. |
| self.adjacent_value_allowed_at = self.mark.index; |
| |
| self.tokens.push_back(tok); |
| Ok(()) |
| } |
| |
| fn scan_flow_scalar(&mut self, single: bool) -> Result<Token, ScanError> { |
| let start_mark = self.mark; |
| |
| let mut string = String::new(); |
| let mut leading_break = String::new(); |
| let mut trailing_breaks = String::new(); |
| let mut whitespaces = String::new(); |
| let mut leading_blanks; |
| |
| /* Eat the left quote. */ |
| self.skip(); |
| |
| loop { |
| /* Check for a document indicator. */ |
| self.lookahead(4); |
| |
| if self.mark.col == 0 |
| && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-')) |
| || ((self.buffer[0] == '.') |
| && (self.buffer[1] == '.') |
| && (self.buffer[2] == '.'))) |
| && is_blankz(self.buffer[3]) |
| { |
| return Err(ScanError::new( |
| start_mark, |
| "while scanning a quoted scalar, found unexpected document indicator", |
| )); |
| } |
| |
| if is_z(self.ch()) { |
| return Err(ScanError::new( |
| start_mark, |
| "while scanning a quoted scalar, found unexpected end of stream", |
| )); |
| } |
| |
| self.lookahead(2); |
| |
| leading_blanks = false; |
| // Consume non-blank characters. |
| |
| while !is_blankz(self.ch()) { |
| match self.ch() { |
| // Check for an escaped single quote. |
| '\'' if self.buffer[1] == '\'' && single => { |
| string.push('\''); |
| self.skip(); |
| self.skip(); |
| } |
| // Check for the right quote. |
| '\'' if single => break, |
| '"' if !single => break, |
| // Check for an escaped line break. |
| '\\' if !single && is_break(self.buffer[1]) => { |
| self.lookahead(3); |
| self.skip(); |
| self.skip_line(); |
| leading_blanks = true; |
| break; |
| } |
| // Check for an escape sequence. |
| '\\' if !single => { |
| let mut code_length = 0usize; |
| match self.buffer[1] { |
| '0' => string.push('\0'), |
| 'a' => string.push('\x07'), |
| 'b' => string.push('\x08'), |
| 't' | '\t' => string.push('\t'), |
| 'n' => string.push('\n'), |
| 'v' => string.push('\x0b'), |
| 'f' => string.push('\x0c'), |
| 'r' => string.push('\x0d'), |
| 'e' => string.push('\x1b'), |
| ' ' => string.push('\x20'), |
| '"' => string.push('"'), |
| '\'' => string.push('\''), |
| '\\' => string.push('\\'), |
| // NEL (#x85) |
| 'N' => string.push(char::from_u32(0x85).unwrap()), |
| // #xA0 |
| '_' => string.push(char::from_u32(0xA0).unwrap()), |
| // LS (#x2028) |
| 'L' => string.push(char::from_u32(0x2028).unwrap()), |
| // PS (#x2029) |
| 'P' => string.push(char::from_u32(0x2029).unwrap()), |
| 'x' => code_length = 2, |
| 'u' => code_length = 4, |
| 'U' => code_length = 8, |
| _ => { |
| return Err(ScanError::new( |
| start_mark, |
| "while parsing a quoted scalar, found unknown escape character", |
| )) |
| } |
| } |
| self.skip(); |
| self.skip(); |
| // Consume an arbitrary escape code. |
| if code_length > 0 { |
| self.lookahead(code_length); |
| let mut value = 0u32; |
| for i in 0..code_length { |
| if !is_hex(self.buffer[i]) { |
| return Err(ScanError::new(start_mark, |
| "while parsing a quoted scalar, did not find expected hexadecimal number")); |
| } |
| value = (value << 4) + as_hex(self.buffer[i]); |
| } |
| |
| let ch = match char::from_u32(value) { |
| Some(v) => v, |
| None => { |
| return Err(ScanError::new(start_mark, |
| "while parsing a quoted scalar, found invalid Unicode character escape code")); |
| } |
| }; |
| string.push(ch); |
| |
| for _ in 0..code_length { |
| self.skip(); |
| } |
| } |
| } |
| c => { |
| string.push(c); |
| self.skip(); |
| } |
| } |
| self.lookahead(2); |
| } |
| self.lookahead(1); |
| match self.ch() { |
| '\'' if single => break, |
| '"' if !single => break, |
| _ => {} |
| } |
| |
| // Consume blank characters. |
| while is_blank(self.ch()) || is_break(self.ch()) { |
| if is_blank(self.ch()) { |
| // Consume a space or a tab character. |
| if leading_blanks { |
| self.skip(); |
| } else { |
| whitespaces.push(self.ch()); |
| self.skip(); |
| } |
| } else { |
| self.lookahead(2); |
| // Check if it is a first line break. |
| if leading_blanks { |
| self.read_break(&mut trailing_breaks); |
| } else { |
| whitespaces.clear(); |
| self.read_break(&mut leading_break); |
| leading_blanks = true; |
| } |
| } |
| self.lookahead(1); |
| } |
| // Join the whitespaces or fold line breaks. |
| if leading_blanks { |
| if leading_break.is_empty() { |
| string.push_str(&leading_break); |
| string.push_str(&trailing_breaks); |
| trailing_breaks.clear(); |
| leading_break.clear(); |
| } else { |
| if trailing_breaks.is_empty() { |
| string.push(' '); |
| } else { |
| string.push_str(&trailing_breaks); |
| trailing_breaks.clear(); |
| } |
| leading_break.clear(); |
| } |
| } else { |
| string.push_str(&whitespaces); |
| whitespaces.clear(); |
| } |
| } // loop |
| |
| // Eat the right quote. |
| self.skip(); |
| |
| if single { |
| Ok(Token( |
| start_mark, |
| TokenType::Scalar(TScalarStyle::SingleQuoted, string), |
| )) |
| } else { |
| Ok(Token( |
| start_mark, |
| TokenType::Scalar(TScalarStyle::DoubleQuoted, string), |
| )) |
| } |
| } |
| |
| fn fetch_plain_scalar(&mut self) -> ScanResult { |
| self.save_simple_key()?; |
| self.disallow_simple_key(); |
| |
| let tok = self.scan_plain_scalar()?; |
| |
| self.tokens.push_back(tok); |
| Ok(()) |
| } |
| |
| fn scan_plain_scalar(&mut self) -> Result<Token, ScanError> { |
| let indent = self.indent + 1; |
| let start_mark = self.mark; |
| |
| let mut string = String::new(); |
| let mut leading_break = String::new(); |
| let mut trailing_breaks = String::new(); |
| let mut whitespaces = String::new(); |
| let mut leading_blanks = false; |
| |
| loop { |
| /* Check for a document indicator. */ |
| self.lookahead(4); |
| |
| if self.mark.col == 0 |
| && (((self.buffer[0] == '-') && (self.buffer[1] == '-') && (self.buffer[2] == '-')) |
| || ((self.buffer[0] == '.') |
| && (self.buffer[1] == '.') |
| && (self.buffer[2] == '.'))) |
| && is_blankz(self.buffer[3]) |
| { |
| break; |
| } |
| |
| if self.ch() == '#' { |
| break; |
| } |
| while !is_blankz(self.ch()) { |
| // indicators can end a plain scalar, see 7.3.3. Plain Style |
| match self.ch() { |
| ':' if is_blankz(self.buffer[1]) |
| || (self.flow_level > 0 && is_flow(self.buffer[1])) => |
| { |
| break; |
| } |
| ',' | '[' | ']' | '{' | '}' if self.flow_level > 0 => break, |
| _ => {} |
| } |
| |
| if leading_blanks || !whitespaces.is_empty() { |
| if leading_blanks { |
| if leading_break.is_empty() { |
| string.push_str(&leading_break); |
| string.push_str(&trailing_breaks); |
| trailing_breaks.clear(); |
| leading_break.clear(); |
| } else { |
| if trailing_breaks.is_empty() { |
| string.push(' '); |
| } else { |
| string.push_str(&trailing_breaks); |
| trailing_breaks.clear(); |
| } |
| leading_break.clear(); |
| } |
| leading_blanks = false; |
| } else { |
| string.push_str(&whitespaces); |
| whitespaces.clear(); |
| } |
| } |
| |
| string.push(self.ch()); |
| self.skip(); |
| self.lookahead(2); |
| } |
| // is the end? |
| if !(is_blank(self.ch()) || is_break(self.ch())) { |
| break; |
| } |
| self.lookahead(1); |
| |
| while is_blank(self.ch()) || is_break(self.ch()) { |
| if is_blank(self.ch()) { |
| if leading_blanks && (self.mark.col as isize) < indent && self.ch() == '\t' { |
| return Err(ScanError::new( |
| start_mark, |
| "while scanning a plain scalar, found a tab", |
| )); |
| } |
| |
| if leading_blanks { |
| self.skip(); |
| } else { |
| whitespaces.push(self.ch()); |
| self.skip(); |
| } |
| } else { |
| self.lookahead(2); |
| // Check if it is a first line break |
| if leading_blanks { |
| self.read_break(&mut trailing_breaks); |
| } else { |
| whitespaces.clear(); |
| self.read_break(&mut leading_break); |
| leading_blanks = true; |
| } |
| } |
| self.lookahead(1); |
| } |
| |
| // check indentation level |
| if self.flow_level == 0 && (self.mark.col as isize) < indent { |
| break; |
| } |
| } |
| |
| if leading_blanks { |
| self.allow_simple_key(); |
| } |
| |
| Ok(Token( |
| start_mark, |
| TokenType::Scalar(TScalarStyle::Plain, string), |
| )) |
| } |
| |
| fn fetch_key(&mut self) -> ScanResult { |
| let start_mark = self.mark; |
| if self.flow_level == 0 { |
| // Check if we are allowed to start a new key (not necessarily simple). |
| if !self.simple_key_allowed { |
| return Err(ScanError::new( |
| self.mark, |
| "mapping keys are not allowed in this context", |
| )); |
| } |
| self.roll_indent( |
| start_mark.col, |
| None, |
| TokenType::BlockMappingStart, |
| start_mark, |
| ); |
| } |
| |
| self.remove_simple_key()?; |
| |
| if self.flow_level == 0 { |
| self.allow_simple_key(); |
| } else { |
| self.disallow_simple_key(); |
| } |
| |
| self.skip(); |
| self.tokens.push_back(Token(start_mark, TokenType::Key)); |
| Ok(()) |
| } |
| |
| fn fetch_value(&mut self) -> ScanResult { |
| let sk = self.simple_keys.last().unwrap().clone(); |
| let start_mark = self.mark; |
| if sk.possible { |
| // insert simple key |
| let tok = Token(sk.mark, TokenType::Key); |
| let tokens_parsed = self.tokens_parsed; |
| self.insert_token(sk.token_number - tokens_parsed, tok); |
| |
| // Add the BLOCK-MAPPING-START token if needed. |
| self.roll_indent( |
| sk.mark.col, |
| Some(sk.token_number), |
| TokenType::BlockMappingStart, |
| start_mark, |
| ); |
| |
| self.simple_keys.last_mut().unwrap().possible = false; |
| self.disallow_simple_key(); |
| } else { |
| // The ':' indicator follows a complex key. |
| if self.flow_level == 0 { |
| if !self.simple_key_allowed { |
| return Err(ScanError::new( |
| start_mark, |
| "mapping values are not allowed in this context", |
| )); |
| } |
| |
| self.roll_indent( |
| start_mark.col, |
| None, |
| TokenType::BlockMappingStart, |
| start_mark, |
| ); |
| } |
| |
| if self.flow_level == 0 { |
| self.allow_simple_key(); |
| } else { |
| self.disallow_simple_key(); |
| } |
| } |
| self.skip(); |
| self.tokens.push_back(Token(start_mark, TokenType::Value)); |
| |
| Ok(()) |
| } |
| |
| fn roll_indent(&mut self, col: usize, number: Option<usize>, tok: TokenType, mark: Marker) { |
| if self.flow_level > 0 { |
| return; |
| } |
| |
| if self.indent < col as isize { |
| self.indents.push(self.indent); |
| self.indent = col as isize; |
| let tokens_parsed = self.tokens_parsed; |
| match number { |
| Some(n) => self.insert_token(n - tokens_parsed, Token(mark, tok)), |
| None => self.tokens.push_back(Token(mark, tok)), |
| } |
| } |
| } |
| |
| fn unroll_indent(&mut self, col: isize) { |
| if self.flow_level > 0 { |
| return; |
| } |
| while self.indent > col { |
| self.tokens.push_back(Token(self.mark, TokenType::BlockEnd)); |
| self.indent = self.indents.pop().unwrap(); |
| } |
| } |
| |
| fn save_simple_key(&mut self) -> Result<(), ScanError> { |
| let required = self.flow_level > 0 && self.indent == (self.mark.col as isize); |
| if self.simple_key_allowed { |
| let mut sk = SimpleKey::new(self.mark); |
| sk.possible = true; |
| sk.required = required; |
| sk.token_number = self.tokens_parsed + self.tokens.len(); |
| |
| self.remove_simple_key()?; |
| |
| self.simple_keys.pop(); |
| self.simple_keys.push(sk); |
| } |
| Ok(()) |
| } |
| |
| fn remove_simple_key(&mut self) -> ScanResult { |
| let last = self.simple_keys.last_mut().unwrap(); |
| if last.possible && last.required { |
| return Err(ScanError::new(self.mark, "simple key expected")); |
| } |
| |
| last.possible = false; |
| Ok(()) |
| } |
| } |
| |
| #[cfg(test)] |
| mod test { |
| use super::TokenType::*; |
| use super::*; |
| |
| macro_rules! next { |
| ($p:ident, $tk:pat) => {{ |
| let tok = $p.next().unwrap(); |
| match tok.1 { |
| $tk => {} |
| _ => panic!("unexpected token: {:?}", tok), |
| } |
| }}; |
| } |
| |
| macro_rules! next_scalar { |
| ($p:ident, $tk:expr, $v:expr) => {{ |
| let tok = $p.next().unwrap(); |
| match tok.1 { |
| Scalar(style, ref v) => { |
| assert_eq!(style, $tk); |
| assert_eq!(v, $v); |
| } |
| _ => panic!("unexpected token: {:?}", tok), |
| } |
| }}; |
| } |
| |
| macro_rules! end { |
| ($p:ident) => {{ |
| assert_eq!($p.next(), None); |
| }}; |
| } |
| /// test cases in libyaml scanner.c |
| #[test] |
| fn test_empty() { |
| let s = ""; |
| let mut p = Scanner::new(s.chars()); |
| next!(p, StreamStart(..)); |
| next!(p, StreamEnd); |
| end!(p); |
| } |
| |
| #[test] |
| fn test_scalar() { |
| let s = "a scalar"; |
| let mut p = Scanner::new(s.chars()); |
| next!(p, StreamStart(..)); |
| next!(p, Scalar(TScalarStyle::Plain, _)); |
| next!(p, StreamEnd); |
| end!(p); |
| } |
| |
| #[test] |
| fn test_explicit_scalar() { |
| let s = "--- |
| 'a scalar' |
| ... |
| "; |
| let mut p = Scanner::new(s.chars()); |
| next!(p, StreamStart(..)); |
| next!(p, DocumentStart); |
| next!(p, Scalar(TScalarStyle::SingleQuoted, _)); |
| next!(p, DocumentEnd); |
| next!(p, StreamEnd); |
| end!(p); |
| } |
| |
| #[test] |
| fn test_multiple_documents() { |
| let s = " |
| 'a scalar' |
| --- |
| 'a scalar' |
| --- |
| 'a scalar' |
| "; |
| let mut p = Scanner::new(s.chars()); |
| next!(p, StreamStart(..)); |
| next!(p, Scalar(TScalarStyle::SingleQuoted, _)); |
| next!(p, DocumentStart); |
| next!(p, Scalar(TScalarStyle::SingleQuoted, _)); |
| next!(p, DocumentStart); |
| next!(p, Scalar(TScalarStyle::SingleQuoted, _)); |
| next!(p, StreamEnd); |
| end!(p); |
| } |
| |
| #[test] |
| fn test_a_flow_sequence() { |
| let s = "[item 1, item 2, item 3]"; |
| let mut p = Scanner::new(s.chars()); |
| next!(p, StreamStart(..)); |
| next!(p, FlowSequenceStart); |
| next_scalar!(p, TScalarStyle::Plain, "item 1"); |
| next!(p, FlowEntry); |
| next!(p, Scalar(TScalarStyle::Plain, _)); |
| next!(p, FlowEntry); |
| next!(p, Scalar(TScalarStyle::Plain, _)); |
| next!(p, FlowSequenceEnd); |
| next!(p, StreamEnd); |
| end!(p); |
| } |
| |
| #[test] |
| fn test_a_flow_mapping() { |
| let s = " |
| { |
| a simple key: a value, # Note that the KEY token is produced. |
| ? a complex key: another value, |
| } |
| "; |
| let mut p = Scanner::new(s.chars()); |
| next!(p, StreamStart(..)); |
| next!(p, FlowMappingStart); |
| next!(p, Key); |
| next!(p, Scalar(TScalarStyle::Plain, _)); |
| next!(p, Value); |
| next!(p, Scalar(TScalarStyle::Plain, _)); |
| next!(p, FlowEntry); |
| next!(p, Key); |
| next_scalar!(p, TScalarStyle::Plain, "a complex key"); |
| next!(p, Value); |
| next!(p, Scalar(TScalarStyle::Plain, _)); |
| next!(p, FlowEntry); |
| next!(p, FlowMappingEnd); |
| next!(p, StreamEnd); |
| end!(p); |
| } |
| |
| #[test] |
| fn test_block_sequences() { |
| let s = " |
| - item 1 |
| - item 2 |
| - |
| - item 3.1 |
| - item 3.2 |
| - |
| key 1: value 1 |
| key 2: value 2 |
| "; |
| let mut p = Scanner::new(s.chars()); |
| next!(p, StreamStart(..)); |
| next!(p, BlockSequenceStart); |
| next!(p, BlockEntry); |
| next_scalar!(p, TScalarStyle::Plain, "item 1"); |
| next!(p, BlockEntry); |
| next_scalar!(p, TScalarStyle::Plain, "item 2"); |
| next!(p, BlockEntry); |
| next!(p, BlockSequenceStart); |
| next!(p, BlockEntry); |
| next_scalar!(p, TScalarStyle::Plain, "item 3.1"); |
| next!(p, BlockEntry); |
| next_scalar!(p, TScalarStyle::Plain, "item 3.2"); |
| next!(p, BlockEnd); |
| next!(p, BlockEntry); |
| next!(p, BlockMappingStart); |
| next!(p, Key); |
| next_scalar!(p, TScalarStyle::Plain, "key 1"); |
| next!(p, Value); |
| next_scalar!(p, TScalarStyle::Plain, "value 1"); |
| next!(p, Key); |
| next_scalar!(p, TScalarStyle::Plain, "key 2"); |
| next!(p, Value); |
| next_scalar!(p, TScalarStyle::Plain, "value 2"); |
| next!(p, BlockEnd); |
| next!(p, BlockEnd); |
| next!(p, StreamEnd); |
| end!(p); |
| } |
| |
| #[test] |
| fn test_block_mappings() { |
| let s = " |
| a simple key: a value # The KEY token is produced here. |
| ? a complex key |
| : another value |
| a mapping: |
| key 1: value 1 |
| key 2: value 2 |
| a sequence: |
| - item 1 |
| - item 2 |
| "; |
| let mut p = Scanner::new(s.chars()); |
| next!(p, StreamStart(..)); |
| next!(p, BlockMappingStart); |
| next!(p, Key); |
| next!(p, Scalar(_, _)); |
| next!(p, Value); |
| next!(p, Scalar(_, _)); |
| next!(p, Key); |
| next!(p, Scalar(_, _)); |
| next!(p, Value); |
| next!(p, Scalar(_, _)); |
| next!(p, Key); |
| next!(p, Scalar(_, _)); |
| next!(p, Value); // libyaml comment seems to be wrong |
| next!(p, BlockMappingStart); |
| next!(p, Key); |
| next!(p, Scalar(_, _)); |
| next!(p, Value); |
| next!(p, Scalar(_, _)); |
| next!(p, Key); |
| next!(p, Scalar(_, _)); |
| next!(p, Value); |
| next!(p, Scalar(_, _)); |
| next!(p, BlockEnd); |
| next!(p, Key); |
| next!(p, Scalar(_, _)); |
| next!(p, Value); |
| next!(p, BlockSequenceStart); |
| next!(p, BlockEntry); |
| next!(p, Scalar(_, _)); |
| next!(p, BlockEntry); |
| next!(p, Scalar(_, _)); |
| next!(p, BlockEnd); |
| next!(p, BlockEnd); |
| next!(p, StreamEnd); |
| end!(p); |
| } |
| |
| #[test] |
| fn test_no_block_sequence_start() { |
| let s = " |
| key: |
| - item 1 |
| - item 2 |
| "; |
| let mut p = Scanner::new(s.chars()); |
| next!(p, StreamStart(..)); |
| next!(p, BlockMappingStart); |
| next!(p, Key); |
| next_scalar!(p, TScalarStyle::Plain, "key"); |
| next!(p, Value); |
| next!(p, BlockEntry); |
| next_scalar!(p, TScalarStyle::Plain, "item 1"); |
| next!(p, BlockEntry); |
| next_scalar!(p, TScalarStyle::Plain, "item 2"); |
| next!(p, BlockEnd); |
| next!(p, StreamEnd); |
| end!(p); |
| } |
| |
| #[test] |
| fn test_collections_in_sequence() { |
| let s = " |
| - - item 1 |
| - item 2 |
| - key 1: value 1 |
| key 2: value 2 |
| - ? complex key |
| : complex value |
| "; |
| let mut p = Scanner::new(s.chars()); |
| next!(p, StreamStart(..)); |
| next!(p, BlockSequenceStart); |
| next!(p, BlockEntry); |
| next!(p, BlockSequenceStart); |
| next!(p, BlockEntry); |
| next_scalar!(p, TScalarStyle::Plain, "item 1"); |
| next!(p, BlockEntry); |
| next_scalar!(p, TScalarStyle::Plain, "item 2"); |
| next!(p, BlockEnd); |
| next!(p, BlockEntry); |
| next!(p, BlockMappingStart); |
| next!(p, Key); |
| next_scalar!(p, TScalarStyle::Plain, "key 1"); |
| next!(p, Value); |
| next_scalar!(p, TScalarStyle::Plain, "value 1"); |
| next!(p, Key); |
| next_scalar!(p, TScalarStyle::Plain, "key 2"); |
| next!(p, Value); |
| next_scalar!(p, TScalarStyle::Plain, "value 2"); |
| next!(p, BlockEnd); |
| next!(p, BlockEntry); |
| next!(p, BlockMappingStart); |
| next!(p, Key); |
| next_scalar!(p, TScalarStyle::Plain, "complex key"); |
| next!(p, Value); |
| next_scalar!(p, TScalarStyle::Plain, "complex value"); |
| next!(p, BlockEnd); |
| next!(p, BlockEnd); |
| next!(p, StreamEnd); |
| end!(p); |
| } |
| |
| #[test] |
| fn test_collections_in_mapping() { |
| let s = " |
| ? a sequence |
| : - item 1 |
| - item 2 |
| ? a mapping |
| : key 1: value 1 |
| key 2: value 2 |
| "; |
| let mut p = Scanner::new(s.chars()); |
| next!(p, StreamStart(..)); |
| next!(p, BlockMappingStart); |
| next!(p, Key); |
| next_scalar!(p, TScalarStyle::Plain, "a sequence"); |
| next!(p, Value); |
| next!(p, BlockSequenceStart); |
| next!(p, BlockEntry); |
| next_scalar!(p, TScalarStyle::Plain, "item 1"); |
| next!(p, BlockEntry); |
| next_scalar!(p, TScalarStyle::Plain, "item 2"); |
| next!(p, BlockEnd); |
| next!(p, Key); |
| next_scalar!(p, TScalarStyle::Plain, "a mapping"); |
| next!(p, Value); |
| next!(p, BlockMappingStart); |
| next!(p, Key); |
| next_scalar!(p, TScalarStyle::Plain, "key 1"); |
| next!(p, Value); |
| next_scalar!(p, TScalarStyle::Plain, "value 1"); |
| next!(p, Key); |
| next_scalar!(p, TScalarStyle::Plain, "key 2"); |
| next!(p, Value); |
| next_scalar!(p, TScalarStyle::Plain, "value 2"); |
| next!(p, BlockEnd); |
| next!(p, BlockEnd); |
| next!(p, StreamEnd); |
| end!(p); |
| } |
| |
| #[test] |
| fn test_spec_ex7_3() { |
| let s = " |
| { |
| ? foo :, |
| : bar, |
| } |
| "; |
| let mut p = Scanner::new(s.chars()); |
| next!(p, StreamStart(..)); |
| next!(p, FlowMappingStart); |
| next!(p, Key); |
| next_scalar!(p, TScalarStyle::Plain, "foo"); |
| next!(p, Value); |
| next!(p, FlowEntry); |
| next!(p, Value); |
| next_scalar!(p, TScalarStyle::Plain, "bar"); |
| next!(p, FlowEntry); |
| next!(p, FlowMappingEnd); |
| next!(p, StreamEnd); |
| end!(p); |
| } |
| |
| #[test] |
| fn test_plain_scalar_starting_with_indicators_in_flow() { |
| // "Plain scalars must not begin with most indicators, as this would cause ambiguity with |
| // other YAML constructs. However, the “:”, “?” and “-” indicators may be used as the first |
| // character if followed by a non-space “safe” character, as this causes no ambiguity." |
| |
| let s = "{a: :b}"; |
| let mut p = Scanner::new(s.chars()); |
| next!(p, StreamStart(..)); |
| next!(p, FlowMappingStart); |
| next!(p, Key); |
| next_scalar!(p, TScalarStyle::Plain, "a"); |
| next!(p, Value); |
| next_scalar!(p, TScalarStyle::Plain, ":b"); |
| next!(p, FlowMappingEnd); |
| next!(p, StreamEnd); |
| end!(p); |
| |
| let s = "{a: ?b}"; |
| let mut p = Scanner::new(s.chars()); |
| next!(p, StreamStart(..)); |
| next!(p, FlowMappingStart); |
| next!(p, Key); |
| next_scalar!(p, TScalarStyle::Plain, "a"); |
| next!(p, Value); |
| next_scalar!(p, TScalarStyle::Plain, "?b"); |
| next!(p, FlowMappingEnd); |
| next!(p, StreamEnd); |
| end!(p); |
| } |
| |
| #[test] |
| fn test_plain_scalar_starting_with_indicators_in_block() { |
| let s = ":a"; |
| let mut p = Scanner::new(s.chars()); |
| next!(p, StreamStart(..)); |
| next_scalar!(p, TScalarStyle::Plain, ":a"); |
| next!(p, StreamEnd); |
| end!(p); |
| |
| let s = "?a"; |
| let mut p = Scanner::new(s.chars()); |
| next!(p, StreamStart(..)); |
| next_scalar!(p, TScalarStyle::Plain, "?a"); |
| next!(p, StreamEnd); |
| end!(p); |
| } |
| |
| #[test] |
| fn test_plain_scalar_containing_indicators_in_block() { |
| let s = "a:,b"; |
| let mut p = Scanner::new(s.chars()); |
| next!(p, StreamStart(..)); |
| next_scalar!(p, TScalarStyle::Plain, "a:,b"); |
| next!(p, StreamEnd); |
| end!(p); |
| |
| let s = ":,b"; |
| let mut p = Scanner::new(s.chars()); |
| next!(p, StreamStart(..)); |
| next_scalar!(p, TScalarStyle::Plain, ":,b"); |
| next!(p, StreamEnd); |
| end!(p); |
| } |
| |
| #[test] |
| fn test_scanner_cr() { |
| let s = "---\r\n- tok1\r\n- tok2"; |
| let mut p = Scanner::new(s.chars()); |
| next!(p, StreamStart(..)); |
| next!(p, DocumentStart); |
| next!(p, BlockSequenceStart); |
| next!(p, BlockEntry); |
| next_scalar!(p, TScalarStyle::Plain, "tok1"); |
| next!(p, BlockEntry); |
| next_scalar!(p, TScalarStyle::Plain, "tok2"); |
| next!(p, BlockEnd); |
| next!(p, StreamEnd); |
| end!(p); |
| } |
| |
| #[test] |
| fn test_uri() { |
| // TODO |
| } |
| |
| #[test] |
| fn test_uri_escapes() { |
| // TODO |
| } |
| } |