| // pest. The Elegant Parser |
| // Copyright (c) 2018 DragoČ™ Tiselice |
| // |
| // Licensed under the Apache License, Version 2.0 |
| // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT |
| // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| // option. All files in the project carrying such notice may not be copied, |
| // modified, or distributed except according to those terms. |
| |
| use core::fmt; |
| use core::hash::{Hash, Hasher}; |
| use core::ops::{Bound, RangeBounds}; |
| use core::ptr; |
| use core::str; |
| |
| use crate::position; |
| |
| /// A span over a `&str`. It is created from either [two `Position`s] or from a [`Pair`]. |
| /// |
| /// [two `Position`s]: struct.Position.html#method.span |
| /// [`Pair`]: ../iterators/struct.Pair.html#method.span |
| #[derive(Clone, Copy)] |
| pub struct Span<'i> { |
| input: &'i str, |
| /// # Safety |
| /// |
| /// Must be a valid character boundary index into `input`. |
| start: usize, |
| /// # Safety |
| /// |
| /// Must be a valid character boundary index into `input`. |
| end: usize, |
| } |
| |
| impl<'i> Span<'i> { |
| /// Create a new `Span` without checking invariants. (Checked with `debug_assertions`.) |
| /// |
| /// # Safety |
| /// |
| /// `input[start..end]` must be a valid subslice; that is, said indexing should not panic. |
| pub(crate) unsafe fn new_unchecked(input: &str, start: usize, end: usize) -> Span<'_> { |
| debug_assert!(input.get(start..end).is_some()); |
| Span { input, start, end } |
| } |
| |
| /// Attempts to create a new span. Will return `None` if `input[start..end]` is an invalid index |
| /// into `input`. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// # use pest::Span; |
| /// let input = "Hello!"; |
| /// assert_eq!(None, Span::new(input, 100, 0)); |
| /// assert!(Span::new(input, 0, input.len()).is_some()); |
| /// ``` |
| pub fn new(input: &str, start: usize, end: usize) -> Option<Span<'_>> { |
| if input.get(start..end).is_some() { |
| Some(Span { input, start, end }) |
| } else { |
| None |
| } |
| } |
| |
| /// Attempts to create a new span based on a sub-range. |
| /// |
| /// ``` |
| /// use pest::Span; |
| /// let input = "Hello World!"; |
| /// let world = Span::new(input, 6, input.len()).unwrap(); |
| /// let orl = world.get(1..=3); |
| /// assert!(orl.is_some()); |
| /// assert_eq!(orl.unwrap().as_str(), "orl"); |
| /// ``` |
| /// |
| /// # Examples |
| pub fn get(&self, range: impl RangeBounds<usize>) -> Option<Span<'i>> { |
| let start = match range.start_bound() { |
| Bound::Included(offset) => *offset, |
| Bound::Excluded(offset) => *offset + 1, |
| Bound::Unbounded => 0, |
| }; |
| let end = match range.end_bound() { |
| Bound::Included(offset) => *offset + 1, |
| Bound::Excluded(offset) => *offset, |
| Bound::Unbounded => self.as_str().len(), |
| }; |
| |
| self.as_str().get(start..end).map(|_| Span { |
| input: self.input, |
| start: self.start + start, |
| end: self.start + end, |
| }) |
| } |
| |
| /// Returns the `Span`'s start byte position as a `usize`. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// # use pest::Position; |
| /// let input = "ab"; |
| /// let start = Position::from_start(input); |
| /// let end = start.clone(); |
| /// let span = start.span(&end); |
| /// |
| /// assert_eq!(span.start(), 0); |
| /// ``` |
| #[inline] |
| pub fn start(&self) -> usize { |
| self.start |
| } |
| |
| /// Returns the `Span`'s end byte position as a `usize`. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// # use pest::Position; |
| /// let input = "ab"; |
| /// let start = Position::from_start(input); |
| /// let end = start.clone(); |
| /// let span = start.span(&end); |
| /// |
| /// assert_eq!(span.end(), 0); |
| /// ``` |
| #[inline] |
| pub fn end(&self) -> usize { |
| self.end |
| } |
| |
| /// Returns the `Span`'s start `Position`. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// # use pest::Position; |
| /// let input = "ab"; |
| /// let start = Position::from_start(input); |
| /// let end = start.clone(); |
| /// let span = start.clone().span(&end); |
| /// |
| /// assert_eq!(span.start_pos(), start); |
| /// ``` |
| #[inline] |
| pub fn start_pos(&self) -> position::Position<'i> { |
| // Span's start position is always a UTF-8 border. |
| unsafe { position::Position::new_unchecked(self.input, self.start) } |
| } |
| |
| /// Returns the `Span`'s end `Position`. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// # use pest::Position; |
| /// let input = "ab"; |
| /// let start = Position::from_start(input); |
| /// let end = start.clone(); |
| /// let span = start.span(&end); |
| /// |
| /// assert_eq!(span.end_pos(), end); |
| /// ``` |
| #[inline] |
| pub fn end_pos(&self) -> position::Position<'i> { |
| // Span's end position is always a UTF-8 border. |
| unsafe { position::Position::new_unchecked(self.input, self.end) } |
| } |
| |
| /// Splits the `Span` into a pair of `Position`s. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// # use pest::Position; |
| /// let input = "ab"; |
| /// let start = Position::from_start(input); |
| /// let end = start.clone(); |
| /// let span = start.clone().span(&end); |
| /// |
| /// assert_eq!(span.split(), (start, end)); |
| /// ``` |
| #[inline] |
| pub fn split(self) -> (position::Position<'i>, position::Position<'i>) { |
| // Span's start and end positions are always a UTF-8 borders. |
| let pos1 = unsafe { position::Position::new_unchecked(self.input, self.start) }; |
| let pos2 = unsafe { position::Position::new_unchecked(self.input, self.end) }; |
| |
| (pos1, pos2) |
| } |
| |
| /// Captures a slice from the `&str` defined by the `Span`. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// # use pest; |
| /// # #[allow(non_camel_case_types)] |
| /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| /// enum Rule {} |
| /// |
| /// let input = "abc"; |
| /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(1).unwrap(); |
| /// let start_pos = state.position().clone(); |
| /// state = state.match_string("b").unwrap(); |
| /// let span = start_pos.span(&state.position().clone()); |
| /// assert_eq!(span.as_str(), "b"); |
| /// ``` |
| #[inline] |
| pub fn as_str(&self) -> &'i str { |
| // Span's start and end positions are always a UTF-8 borders. |
| &self.input[self.start..self.end] |
| } |
| |
| /// Iterates over all lines (partially) covered by this span. Yielding a `&str` for each line. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// # use pest; |
| /// # #[allow(non_camel_case_types)] |
| /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| /// enum Rule {} |
| /// |
| /// let input = "a\nb\nc"; |
| /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(2).unwrap(); |
| /// let start_pos = state.position().clone(); |
| /// state = state.match_string("b\nc").unwrap(); |
| /// let span = start_pos.span(&state.position().clone()); |
| /// assert_eq!(span.lines().collect::<Vec<_>>(), vec!["b\n", "c"]); |
| /// ``` |
| #[inline] |
| pub fn lines(&self) -> Lines<'_> { |
| Lines { |
| inner: self.lines_span(), |
| } |
| } |
| |
| /// Iterates over all lines (partially) covered by this span. Yielding a `Span` for each line. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// # use pest; |
| /// # use pest::Span; |
| /// # #[allow(non_camel_case_types)] |
| /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] |
| /// enum Rule {} |
| /// |
| /// let input = "a\nb\nc"; |
| /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(2).unwrap(); |
| /// let start_pos = state.position().clone(); |
| /// state = state.match_string("b\nc").unwrap(); |
| /// let span = start_pos.span(&state.position().clone()); |
| /// assert_eq!(span.lines_span().collect::<Vec<_>>(), vec![Span::new(input, 2, 4).unwrap(), Span::new(input, 4, 5).unwrap()]); |
| /// ``` |
| pub fn lines_span(&self) -> LinesSpan<'_> { |
| LinesSpan { |
| span: self, |
| pos: self.start, |
| } |
| } |
| } |
| |
| impl<'i> fmt::Debug for Span<'i> { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| f.debug_struct("Span") |
| .field("str", &self.as_str()) |
| .field("start", &self.start) |
| .field("end", &self.end) |
| .finish() |
| } |
| } |
| |
| impl<'i> PartialEq for Span<'i> { |
| fn eq(&self, other: &Span<'i>) -> bool { |
| ptr::eq(self.input, other.input) && self.start == other.start && self.end == other.end |
| } |
| } |
| |
| impl<'i> Eq for Span<'i> {} |
| |
| impl<'i> Hash for Span<'i> { |
| fn hash<H: Hasher>(&self, state: &mut H) { |
| (self.input as *const str).hash(state); |
| self.start.hash(state); |
| self.end.hash(state); |
| } |
| } |
| |
| /// Line iterator for Spans, created by [`Span::lines_span()`]. |
| /// |
| /// Iterates all lines that are at least _partially_ covered by the span. Yielding a `Span` for each. |
| /// |
| /// [`Span::lines_span()`]: struct.Span.html#method.lines_span |
| pub struct LinesSpan<'i> { |
| span: &'i Span<'i>, |
| pos: usize, |
| } |
| |
| impl<'i> Iterator for LinesSpan<'i> { |
| type Item = Span<'i>; |
| fn next(&mut self) -> Option<Self::Item> { |
| if self.pos > self.span.end { |
| return None; |
| } |
| let pos = position::Position::new(self.span.input, self.pos)?; |
| if pos.at_end() { |
| return None; |
| } |
| |
| let line_start = pos.find_line_start(); |
| self.pos = pos.find_line_end(); |
| |
| Span::new(self.span.input, line_start, self.pos) |
| } |
| } |
| |
| /// Line iterator for Spans, created by [`Span::lines()`]. |
| /// |
| /// Iterates all lines that are at least _partially_ covered by the span. Yielding a `&str` for each. |
| /// |
| /// [`Span::lines()`]: struct.Span.html#method.lines |
| pub struct Lines<'i> { |
| inner: LinesSpan<'i>, |
| } |
| |
| impl<'i> Iterator for Lines<'i> { |
| type Item = &'i str; |
| fn next(&mut self) -> Option<Self::Item> { |
| self.inner.next().map(|span| span.as_str()) |
| } |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| use alloc::borrow::ToOwned; |
| use alloc::vec::Vec; |
| |
| #[test] |
| fn get() { |
| let input = "abc123abc"; |
| let span = Span::new(input, 3, input.len()).unwrap(); |
| assert_eq!(span.as_str(), "123abc"); |
| assert_eq!(span.input, input); |
| |
| let span1 = span.get(..=2); |
| assert!(span1.is_some()); |
| assert_eq!(span1.unwrap().input, input); |
| assert_eq!(span1.unwrap().as_str(), "123"); |
| |
| let span2 = span.get(..); |
| assert!(span2.is_some()); |
| assert_eq!(span2.unwrap().input, input); |
| assert_eq!(span2.unwrap().as_str(), "123abc"); |
| |
| let span3 = span.get(3..); |
| assert!(span3.is_some()); |
| assert_eq!(span3.unwrap().input, input); |
| assert_eq!(span3.unwrap().as_str(), "abc"); |
| |
| let span4 = span.get(0..0); |
| assert!(span4.is_some()); |
| assert_eq!(span4.unwrap().input, input); |
| assert_eq!(span4.unwrap().as_str(), ""); |
| } |
| |
| #[test] |
| fn get_fails() { |
| let input = "abc"; |
| let span = Span::new(input, 0, input.len()).unwrap(); |
| |
| let span1 = span.get(0..100); |
| assert!(span1.is_none()); |
| |
| let span2 = span.get(100..200); |
| assert!(span2.is_none()); |
| } |
| |
| #[test] |
| fn span_comp() { |
| let input = "abc\ndef\nghi"; |
| let span = Span::new(input, 1, 7).unwrap(); |
| let span2 = Span::new(input, 50, 51); |
| assert!(span2.is_none()); |
| let span3 = Span::new(input, 0, 8).unwrap(); |
| assert!(span != span3); |
| } |
| |
| #[test] |
| fn split() { |
| let input = "a"; |
| let start = position::Position::from_start(input); |
| let mut end = start; |
| |
| assert!(end.skip(1)); |
| |
| let span = start.clone().span(&end.clone()); |
| |
| assert_eq!(span.split(), (start, end)); |
| } |
| |
| #[test] |
| fn lines_mid() { |
| let input = "abc\ndef\nghi"; |
| let span = Span::new(input, 1, 7).unwrap(); |
| let lines: Vec<_> = span.lines().collect(); |
| let lines_span: Vec<_> = span.lines_span().map(|span| span.as_str()).collect(); |
| |
| assert_eq!(lines.len(), 2); |
| assert_eq!(lines[0], "abc\n".to_owned()); |
| assert_eq!(lines[1], "def\n".to_owned()); |
| assert_eq!(lines, lines_span) // Verify parity with lines_span() |
| } |
| |
| #[test] |
| fn lines_eof() { |
| let input = "abc\ndef\nghi"; |
| let span = Span::new(input, 5, 11).unwrap(); |
| assert!(span.end_pos().at_end()); |
| assert_eq!(span.end(), 11); |
| let lines: Vec<_> = span.lines().collect(); |
| let lines_span: Vec<_> = span.lines_span().map(|span| span.as_str()).collect(); |
| |
| assert_eq!(lines.len(), 2); |
| assert_eq!(lines[0], "def\n".to_owned()); |
| assert_eq!(lines[1], "ghi".to_owned()); |
| assert_eq!(lines, lines_span) // Verify parity with lines_span() |
| } |
| |
| #[test] |
| fn lines_span() { |
| let input = "abc\ndef\nghi"; |
| let span = Span::new(input, 1, 7).unwrap(); |
| let lines_span: Vec<_> = span.lines_span().collect(); |
| let lines: Vec<_> = span.lines().collect(); |
| |
| assert_eq!(lines_span.len(), 2); |
| assert_eq!(lines_span[0], Span::new(input, 0, 4).unwrap()); |
| assert_eq!(lines_span[1], Span::new(input, 4, 8).unwrap()); |
| assert_eq!( |
| lines_span |
| .iter() |
| .map(|span| span.as_str()) |
| .collect::<Vec<_>>(), |
| lines |
| ); |
| } |
| } |