blob: 02c57268305ac56783b50d50cf8793b6e5b251d9 [file] [log] [blame]
// pest. The Elegant Parser
// Copyright (c) 2018 Dragoș Tiselice
//
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.
//! The core functionality of parsing grammar.
//! State of parser during the process of rules handling.
use alloc::borrow::ToOwned;
use alloc::boxed::Box;
use alloc::collections::BTreeSet;
use alloc::rc::Rc;
use alloc::string::String;
use alloc::vec;
use alloc::vec::Vec;
use core::fmt::{Debug, Display, Formatter};
use core::num::NonZeroUsize;
use core::ops::Range;
use core::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use crate::error::{Error, ErrorVariant};
use crate::iterators::pairs::new;
use crate::iterators::{pairs, QueueableToken};
use crate::position::Position;
use crate::span::Span;
use crate::stack::Stack;
use crate::RuleType;
/// The current lookahead status of a [`ParserState`].
///
/// [`ParserState`]: struct.ParserState.html
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Lookahead {
/// The positive predicate, written as an ampersand &,
/// attempts to match its inner expression.
/// If the inner expression succeeds, parsing continues,
/// but at the same position as the predicate —
/// &foo ~ bar is thus a kind of "AND" statement:
/// "the input string must match foo AND bar".
/// If the inner expression fails,
/// the whole expression fails too.
Positive,
/// The negative predicate, written as an exclamation mark !,
/// attempts to match its inner expression.
/// If the inner expression fails, the predicate succeeds
/// and parsing continues at the same position as the predicate.
/// If the inner expression succeeds, the predicate fails —
/// !foo ~ bar is thus a kind of "NOT" statement:
/// "the input string must match bar but NOT foo".
Negative,
/// No lookahead (i.e. it will consume input).
None,
}
/// The current atomicity of a [`ParserState`].
///
/// [`ParserState`]: struct.ParserState.html
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum Atomicity {
/// prevents implicit whitespace: inside an atomic rule,
/// the tilde ~ means "immediately followed by",
/// and repetition operators (asterisk * and plus sign +)
/// have no implicit separation. In addition, all other rules
/// called from an atomic rule are also treated as atomic.
/// (interior matching rules are silent)
Atomic,
/// The same as atomic, but inner tokens are produced as normal.
CompoundAtomic,
/// implicit whitespace is enabled
NonAtomic,
}
/// Type alias to simplify specifying the return value of chained closures.
pub type ParseResult<S> = Result<S, S>;
/// Match direction for the stack. Used in `PEEK[a..b]`/`stack_match_peek_slice`.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum MatchDir {
/// from the bottom to the top of the stack
BottomToTop,
/// from the top to the bottom of the stack
TopToBottom,
}
static CALL_LIMIT: AtomicUsize = AtomicUsize::new(0);
/// Sets the maximum call limit for the parser state
/// to prevent stack overflows or excessive execution times
/// in some grammars.
/// If set, the calls are tracked as a running total
/// over all non-terminal rules that can nest closures
/// (which are passed to transform the parser state).
///
/// # Arguments
///
/// * `limit` - The maximum number of calls. If None,
/// the number of calls is unlimited.
pub fn set_call_limit(limit: Option<NonZeroUsize>) {
CALL_LIMIT.store(limit.map(|f| f.get()).unwrap_or(0), Ordering::Relaxed);
}
static ERROR_DETAIL: AtomicBool = AtomicBool::new(false);
/// Sets whether information for more error details
/// should be collected. This is useful for debugging
/// parser errors (as it leads to more comprehensive
/// error messages), but it has a higher performance cost.
/// (hence, it's off by default)
///
/// # Arguments
///
/// * `enabled` - Whether to enable the collection for
/// more error details.
pub fn set_error_detail(enabled: bool) {
ERROR_DETAIL.store(enabled, Ordering::Relaxed);
}
#[derive(Debug)]
struct CallLimitTracker {
current_call_limit: Option<(usize, usize)>,
}
impl Default for CallLimitTracker {
fn default() -> Self {
let limit = CALL_LIMIT.load(Ordering::Relaxed);
let current_call_limit = if limit > 0 { Some((0, limit)) } else { None };
Self { current_call_limit }
}
}
impl CallLimitTracker {
fn limit_reached(&self) -> bool {
self.current_call_limit
.map_or(false, |(current, limit)| current >= limit)
}
fn increment_depth(&mut self) {
if let Some((current, _)) = &mut self.current_call_limit {
*current += 1;
}
}
}
/// Number of call stacks that may result from a sequence of rules parsing.
const CALL_STACK_INITIAL_CAPACITY: usize = 20;
/// Max (un)expected number of tokens that we may see on the parsing error position.
const EXPECTED_TOKENS_INITIAL_CAPACITY: usize = 30;
/// Max rule children number for which we'll extend calls stacks.
///
/// In case rule we're working with has too many children rules that failed in parsing,
/// we don't want to store long stacks for all of them. If rule has more than this number
/// of failed children, they all will be collapsed in a parent rule.
const CALL_STACK_CHILDREN_THRESHOLD: usize = 4;
/// Structure tracking errored parsing call (associated with specific `ParserState` function).
#[derive(Debug, Hash, PartialEq, Eq, Clone, PartialOrd, Ord)]
pub enum ParseAttempt<R> {
/// Call of `rule` errored.
Rule(R),
/// Call of token element (e.g., `match_string` or `match_insensitive`) errored.
/// Works as indicator of that leaf node is not a rule. In order to get the token value we
/// can address `ParseAttempts` `(un)expected_tokens`.
Token,
}
impl<R> ParseAttempt<R> {
pub fn get_rule(&self) -> Option<&R> {
match self {
ParseAttempt::Rule(r) => Some(r),
ParseAttempt::Token => None,
}
}
}
/// Rules call stack.
/// Contains sequence of rule calls that resulted in new parsing attempt.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct RulesCallStack<R> {
/// Deepest rule caused a parsing error (ParseAttempt::Token transformed into a rule).
pub deepest: ParseAttempt<R>,
/// Most top rule covering `deepest`.
pub parent: Option<R>,
}
impl<R> RulesCallStack<R> {
fn new(deepest: ParseAttempt<R>) -> RulesCallStack<R> {
RulesCallStack {
deepest,
parent: None,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum ParsingToken {
Sensitive { token: String },
Insensitive { token: String },
Range { start: char, end: char },
BuiltInRule,
}
impl Display for ParsingToken {
fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
match self {
ParsingToken::Sensitive { token } => write!(f, "{token}"),
ParsingToken::Insensitive { token } => write!(f, "{}", token.to_uppercase()),
ParsingToken::Range { start, end } => write!(f, "{start}..{end}"),
ParsingToken::BuiltInRule => write!(f, "BUILTIN_RULE"),
}
}
}
/// Structure that tracks all the parsing attempts made on the max position.
/// We want to give an error hint about parsing rules that succeeded
/// at the farthest input position.
/// The intuition is such rules will be most likely the query user initially wanted to write.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct ParseAttempts<R> {
/// Indicates whether the parsing attempts are tracked.
enabled: bool,
/// Vec of rule calls sequences awaiting tokens at the same `max_position`.
/// If there are several stacks in vec, it means all those rule stacks are "equal"
/// because their attempts occurred on the same position.
pub call_stacks: Vec<RulesCallStack<R>>,
/// Tokens that could be putted at `max_position`
/// in order to get a valid grammar query.
expected_tokens: Vec<ParsingToken>,
/// Tokens that we've prohibited to be putted at `max_position`
/// in order to get a valid grammar query.
unexpected_tokens: Vec<ParsingToken>,
/// Max position at which we were expecting to see one of `expected_tokens`.
pub max_position: usize,
}
impl<R: RuleType> ParseAttempts<R> {
/// Create new `ParseAttempts` instance with `call_stacks` and `expected_tokens`
/// initialized with capacity.
pub fn new() -> Self {
Self {
call_stacks: Vec::with_capacity(CALL_STACK_INITIAL_CAPACITY),
expected_tokens: Vec::with_capacity(EXPECTED_TOKENS_INITIAL_CAPACITY),
unexpected_tokens: Vec::with_capacity(EXPECTED_TOKENS_INITIAL_CAPACITY),
max_position: 0,
enabled: ERROR_DETAIL.load(Ordering::Relaxed),
}
}
/// Get number of currently present call stacks.
fn call_stacks_number(&self) -> usize {
self.call_stacks.len()
}
pub fn expected_tokens(&self) -> Vec<ParsingToken> {
self.expected_tokens
.iter()
.cloned()
.collect::<BTreeSet<_>>()
.into_iter()
.collect()
}
pub fn unexpected_tokens(&self) -> Vec<ParsingToken> {
self.unexpected_tokens
.iter()
.cloned()
.collect::<BTreeSet<_>>()
.into_iter()
.collect()
}
/// Retrieve call stacks.
pub fn call_stacks(&self) -> Vec<RulesCallStack<R>> {
self.call_stacks
.iter()
.cloned()
.collect::<BTreeSet<_>>()
.into_iter()
.collect()
}
/// In case we've tried to parse a rule, which start position is bigger than previous
/// `max_position` it means that we've advanced in our parsing and found better candidate.
///
/// `start_index` is:
/// * Number of call stacks present in state at the moment current `rule` was called. The idea
/// is that we'd like to update only those stacks that originated from the current `rule` and
/// not from those that were called previously.
/// * 0 in case we've successfully parsed some token since the moment `rule` was called.
fn try_add_new_stack_rule(&mut self, rule: R, start_index: usize) {
let mut non_token_call_stacks = Vec::new();
let mut token_call_stack_met = false;
for call_stack in self.call_stacks.iter().skip(start_index) {
if matches!(call_stack.deepest, ParseAttempt::Token) {
token_call_stack_met = true;
} else {
non_token_call_stacks.push(call_stack.clone())
}
}
if token_call_stack_met && non_token_call_stacks.is_empty() {
// If `non_token_call_stacks` is not empty we wouldn't like to add a new standalone
// `RulesCallStack::new(ParseAttempt::Token)` (that will later be transformed into a
// rule) as soon as it doesn't give us any useful additional info.
non_token_call_stacks.push(RulesCallStack::new(ParseAttempt::Token));
}
self.call_stacks
.splice(start_index.., non_token_call_stacks);
let children_number_over_threshold =
self.call_stacks_number() - start_index >= CALL_STACK_CHILDREN_THRESHOLD;
if children_number_over_threshold {
self.call_stacks.truncate(start_index);
self.call_stacks
.push(RulesCallStack::new(ParseAttempt::Rule(rule)));
} else {
for call_stack in self.call_stacks.iter_mut().skip(start_index) {
if matches!(call_stack.deepest, ParseAttempt::Token) {
call_stack.deepest = ParseAttempt::Rule(rule);
} else {
call_stack.parent = Some(rule);
}
}
}
}
/// If `expected` flag is set to false, it means we've successfully parsed token being in the
/// state of negative lookahead and want to track `token` in the `unexpected_tokens`. Otherwise,
/// we want to track it the `expected_tokens`. Let's call chosen vec a `target_vec`.
///
/// In case `position` is:
/// * Equal to `max_position`, add `token` to `target_vec`,
/// * Bigger than `max_position`, set `token` as the only new element of `target_vec`.
#[allow(clippy::comparison_chain)]
fn try_add_new_token(
&mut self,
token: ParsingToken,
start_position: usize,
position: usize,
negative_lookahead: bool,
) {
let target_vec_push_token = |attempts: &mut ParseAttempts<R>| {
let target_vec = if negative_lookahead {
&mut attempts.unexpected_tokens
} else {
&mut attempts.expected_tokens
};
target_vec.push(token);
};
if position > self.max_position {
if negative_lookahead && start_position > self.max_position {
// We encountered a sequence under negative lookahead.
// We would like to track only first failed token in this sequence (which
// `start_position` should be equal to `self.max_position`).
return;
}
target_vec_push_token(self);
if negative_lookahead {
// In case of successful parsing of token under negative lookahead the only
// thing we'd like to do is to track the token in the `unexpected_tokens`.
return;
}
self.max_position = position;
self.expected_tokens.clear();
self.unexpected_tokens.clear();
self.call_stacks.clear();
self.call_stacks
.push(RulesCallStack::new(ParseAttempt::Token));
} else if position == self.max_position {
target_vec_push_token(self);
self.call_stacks
.push(RulesCallStack::new(ParseAttempt::Token));
}
}
/// Reset state in case we've successfully parsed some token in
/// `match_string` or `match_insensetive`.
fn nullify_expected_tokens(&mut self, new_max_position: usize) {
self.call_stacks.clear();
self.expected_tokens.clear();
self.unexpected_tokens.clear();
self.max_position = new_max_position;
}
}
impl<R: RuleType> Default for ParseAttempts<R> {
fn default() -> Self {
Self::new()
}
}
/// The complete state of a [`Parser`].
///
/// [`Parser`]: trait.Parser.html
#[derive(Debug)]
pub struct ParserState<'i, R: RuleType> {
/// Current position from which we try to apply some parser function.
/// Initially is 0.
/// E.g., we are parsing `create user 'Bobby'` query, we parsed "create" via `match_insensitive`
/// and switched our `position` from 0 to the length of "create".
///
/// E.g., see `match_string` -> `self.position.match_string(string)` which updates `self.pos`.
position: Position<'i>,
/// Queue representing rules partially (`QueueableToken::Start`) and
/// totally (`QueueableToken::End`) parsed. When entering rule we put it in the queue in a state
/// of `Start` and after all its sublogic (subrules or strings) are parsed, we change it to
/// `End` state.
queue: Vec<QueueableToken<'i, R>>,
/// Status set in case specific lookahead logic is used in grammar.
/// See `Lookahead` for more information.
lookahead: Lookahead,
/// Rules that we HAVE expected, tried to parse, but failed.
pos_attempts: Vec<R>,
/// Rules that we have NOT expected, tried to parse, but failed.
neg_attempts: Vec<R>,
/// Max position in the query from which we've tried to parse some rule but failed.
attempt_pos: usize,
/// Current atomicity status. For more information see `Atomicity`.
atomicity: Atomicity,
/// Helper structure tracking `Stack` status (used in case grammar contains stack PUSH/POP
/// invocations).
stack: Stack<Span<'i>>,
/// Used for setting max parser calls limit.
call_tracker: CallLimitTracker,
/// Together with tracking of `pos_attempts` and `attempt_pos`
/// as a pair of (list of rules that we've tried to parse but failed, max parsed position)
/// we track those rules (which we've tried to parse at the same max pos) at this helper struct.
///
/// Note, that we may try to parse several rules on different positions. We want to track only
/// those rules, which attempt position is bigger, because we consider that it's nearer to the
/// query that user really wanted to pass.
///
/// E.g. we have a query `create user "Bobby"` and two root rules:
/// * CreateUser = { "create" ~ "user" ~ Name }
/// * CreateTable = { "create" ~ "table" ~ Name }
/// * Name = { SOME_DEFINITION }
///
/// While parsing the query we'll update tracker position to the start of "Bobby", because we'd
/// successfully parse "create" + "user" (and not "table").
parse_attempts: ParseAttempts<R>,
}
/// Creates a `ParserState` from a `&str`, supplying it to a closure `f`.
///
/// # Examples
///
/// ```
/// # use pest;
/// let input = "";
/// pest::state::<(), _>(input, |s| Ok(s)).unwrap();
/// ```
#[allow(clippy::perf)]
pub fn state<'i, R: RuleType, F>(input: &'i str, f: F) -> Result<pairs::Pairs<'i, R>, Error<R>>
where
F: FnOnce(Box<ParserState<'i, R>>) -> ParseResult<Box<ParserState<'i, R>>>,
{
let state = ParserState::new(input);
match f(state) {
Ok(state) => {
let len = state.queue.len();
Ok(new(Rc::new(state.queue), input, None, 0, len))
}
Err(mut state) => {
let variant = if state.reached_call_limit() {
ErrorVariant::CustomError {
message: "call limit reached".to_owned(),
}
} else {
state.pos_attempts.sort();
state.pos_attempts.dedup();
state.neg_attempts.sort();
state.neg_attempts.dedup();
ErrorVariant::ParsingError {
positives: state.pos_attempts.clone(),
negatives: state.neg_attempts.clone(),
}
};
if state.parse_attempts.enabled {
Err(Error::new_from_pos_with_parsing_attempts(
variant,
Position::new_internal(input, state.attempt_pos),
state.parse_attempts.clone(),
))
} else {
Err(Error::new_from_pos(
variant,
Position::new_internal(input, state.attempt_pos),
))
}
}
}
}
impl<'i, R: RuleType> ParserState<'i, R> {
/// Allocates a fresh `ParserState` object to the heap and returns the owned `Box`. This `Box`
/// will be passed from closure to closure based on the needs of the specified `Parser`.
///
/// # Examples
///
/// ```
/// # use pest;
/// let input = "";
/// let state: Box<pest::ParserState<&str>> = pest::ParserState::new(input);
/// ```
pub fn new(input: &'i str) -> Box<Self> {
Box::new(ParserState {
position: Position::from_start(input),
queue: vec![],
lookahead: Lookahead::None,
pos_attempts: vec![],
neg_attempts: vec![],
attempt_pos: 0,
atomicity: Atomicity::NonAtomic,
stack: Stack::new(),
call_tracker: Default::default(),
parse_attempts: ParseAttempts::new(),
})
}
/// Get all parse attempts after process of parsing is finished.
pub fn get_parse_attempts(&self) -> &ParseAttempts<R> {
&self.parse_attempts
}
/// Returns a reference to the current `Position` of the `ParserState`.
///
/// # Examples
///
/// ```
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {
/// ab
/// }
///
/// let input = "ab";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let position = state.position();
/// assert_eq!(position.pos(), 0);
/// ```
pub fn position(&self) -> &Position<'i> {
&self.position
}
/// Returns the current atomicity of the `ParserState`.
///
/// # Examples
///
/// ```
/// # use pest;
/// # use pest::Atomicity;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {
/// ab
/// }
///
/// let input = "ab";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let atomicity = state.atomicity();
/// assert_eq!(atomicity, Atomicity::NonAtomic);
/// ```
pub fn atomicity(&self) -> Atomicity {
self.atomicity
}
#[inline]
fn inc_call_check_limit(mut self: Box<Self>) -> ParseResult<Box<Self>> {
if self.call_tracker.limit_reached() {
return Err(self);
}
self.call_tracker.increment_depth();
Ok(self)
}
#[inline]
fn reached_call_limit(&self) -> bool {
self.call_tracker.limit_reached()
}
/// Wrapper needed to generate tokens. This will associate the `R` type rule to the closure
/// meant to match the rule.
///
/// # Examples
///
/// ```
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {
/// a
/// }
///
/// let input = "a";
/// let pairs: Vec<_> = pest::state(input, |state| {
/// state.rule(Rule::a, |s| Ok(s))
/// }).unwrap().collect();
///
/// assert_eq!(pairs.len(), 1);
/// ```
#[inline]
pub fn rule<F>(mut self: Box<Self>, rule: R, f: F) -> ParseResult<Box<Self>>
where
F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
{
self = self.inc_call_check_limit()?;
// Position from which this `rule` starts parsing.
let actual_pos = self.position.pos();
// Remember index of the `self.queue` element that will be associated with this `rule`.
let index = self.queue.len();
let (pos_attempts_index, neg_attempts_index) = if actual_pos == self.attempt_pos {
(self.pos_attempts.len(), self.neg_attempts.len())
} else {
// Attempts have not been cleared yet since the attempt_pos is older.
(0, 0)
};
if self.lookahead == Lookahead::None && self.atomicity != Atomicity::Atomic {
// Pair's position will only be known after running the closure.
self.queue.push(QueueableToken::Start {
end_token_index: 0,
input_pos: actual_pos,
});
}
// Remember attempts number before `f` call.
// In `track` using this variable we can say, how many attempts were added
// during children rules traversal.
let attempts = self.attempts_at(actual_pos);
// Number of call stacks present in `self.parse_attempts` before `f` call.
// We need to remember this number only in case there wasn't found any farther attempt.
// E.g. we are handling rule, on start position of which may be tested two
// children rules. At the moment we'll return from `f` call below,
// there will be two more children rules in `self.parse_attempts` that we'll
// consider to be the children of current `rule`.
let mut remember_call_stacks_number = self.parse_attempts.call_stacks_number();
// Max parsing attempt position at the moment of `rule` handling.
// It case it's raised during children rules handling, it means
// we've made a parsing progress.
let remember_max_position = self.parse_attempts.max_position;
let result = f(self);
let mut try_add_rule_to_stack = |new_state: &mut Box<ParserState<'_, R>>| {
if new_state.parse_attempts.max_position > remember_max_position {
// It means that one of `match_string` or e.g. `match_insensetive` function calls
// have already erased `self.parse_attempts.call_stacks` and that previously
// remembered values are not valid anymore.
remember_call_stacks_number = 0;
}
if !matches!(new_state.atomicity, Atomicity::Atomic) {
new_state
.parse_attempts
.try_add_new_stack_rule(rule, remember_call_stacks_number);
}
};
match result {
Ok(mut new_state) => {
if new_state.lookahead == Lookahead::Negative {
new_state.track(
rule,
actual_pos,
pos_attempts_index,
neg_attempts_index,
attempts,
);
}
if new_state.lookahead == Lookahead::None
&& new_state.atomicity != Atomicity::Atomic
{
// Index of `QueueableToken::End` token added below
// that corresponds to previously added `QueueableToken::Start` token.
let new_index = new_state.queue.len();
match new_state.queue[index] {
QueueableToken::Start {
ref mut end_token_index,
..
} => *end_token_index = new_index,
_ => unreachable!(),
};
let new_pos = new_state.position.pos();
new_state.queue.push(QueueableToken::End {
start_token_index: index,
rule,
tag: None,
input_pos: new_pos,
});
}
// Note, that we need to count positive parsing results too, because we can fail in
// optional rule call inside which may lie the farthest
// parsed token.
if new_state.parse_attempts.enabled {
try_add_rule_to_stack(&mut new_state);
}
Ok(new_state)
}
Err(mut new_state) => {
if new_state.lookahead != Lookahead::Negative {
new_state.track(
rule,
actual_pos,
pos_attempts_index,
neg_attempts_index,
attempts,
);
if new_state.parse_attempts.enabled {
try_add_rule_to_stack(&mut new_state);
}
}
if new_state.lookahead == Lookahead::None
&& new_state.atomicity != Atomicity::Atomic
{
new_state.queue.truncate(index);
}
Err(new_state)
}
}
}
/// Tag current node
///
/// # Examples
///
/// Try to recognize the one specified in a set of characters
///
/// ```
/// use pest::{state, ParseResult, ParserState, iterators::Pair};
/// #[allow(non_camel_case_types)]
/// #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {
/// character,
/// }
/// fn mark_c(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
/// state.sequence(|state| {
/// character(state)
/// .and_then(|state| character(state))
/// .and_then(|state| character(state))
/// .and_then(|state| state.tag_node("c"))
/// .and_then(|state| character(state))
/// })
/// }
/// fn character(state: Box<ParserState<Rule>>) -> ParseResult<Box<ParserState<Rule>>> {
/// state.rule(Rule::character, |state| state.match_range('a'..'z'))
/// }
///
/// let input = "abcd";
/// let pairs = state(input, mark_c).unwrap();
/// // find all node tag as `c`
/// let find: Vec<Pair<Rule>> = pairs.filter(|s| s.as_node_tag() == Some("c")).collect();
/// assert_eq!(find[0].as_str(), "c")
/// ```
#[inline]
pub fn tag_node(mut self: Box<Self>, tag: &'i str) -> ParseResult<Box<Self>> {
if let Some(QueueableToken::End { tag: old, .. }) = self.queue.last_mut() {
*old = Some(tag)
}
Ok(self)
}
/// Get number of allowed rules attempts + prohibited rules attempts.
fn attempts_at(&self, pos: usize) -> usize {
if self.attempt_pos == pos {
self.pos_attempts.len() + self.neg_attempts.len()
} else {
0
}
}
fn track(
&mut self,
rule: R,
pos: usize,
pos_attempts_index: usize,
neg_attempts_index: usize,
prev_attempts: usize,
) {
if self.atomicity == Atomicity::Atomic {
return;
}
// If nested rules made no progress, there is no use to report them; it's only useful to
// track the current rule, the exception being when only one attempt has been made during
// the children rules.
let curr_attempts = self.attempts_at(pos);
if curr_attempts > prev_attempts && curr_attempts - prev_attempts == 1 {
return;
}
if pos == self.attempt_pos {
self.pos_attempts.truncate(pos_attempts_index);
self.neg_attempts.truncate(neg_attempts_index);
}
if pos > self.attempt_pos {
self.pos_attempts.clear();
self.neg_attempts.clear();
self.attempt_pos = pos;
}
let attempts = if self.lookahead != Lookahead::Negative {
&mut self.pos_attempts
} else {
&mut self.neg_attempts
};
if pos == self.attempt_pos {
attempts.push(rule);
}
}
/// Starts a sequence of transformations provided by `f` from the `Box<ParserState>`. Returns
/// the same `Result` returned by `f` in the case of an `Ok`, or `Err` with the current
/// `Box<ParserState>` otherwise.
///
/// This method is useful to parse sequences that only match together which usually come in the
/// form of chained `Result`s with
/// [`Result::and_then`](https://doc.rust-lang.org/std/result/enum.Result.html#method.and_then).
///
///
/// # Examples
///
/// ```
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {
/// a
/// }
///
/// let input = "a";
/// let pairs: Vec<_> = pest::state(input, |state| {
/// state.sequence(|s| {
/// s.rule(Rule::a, |s| Ok(s)).and_then(|s| {
/// s.match_string("b")
/// })
/// }).or_else(|s| {
/// Ok(s)
/// })
/// }).unwrap().collect();
///
/// assert_eq!(pairs.len(), 0);
/// ```
#[inline]
pub fn sequence<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
where
F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
{
self = self.inc_call_check_limit()?;
let token_index = self.queue.len();
let initial_pos = self.position;
let result = f(self);
match result {
Ok(new_state) => Ok(new_state),
Err(mut new_state) => {
// Restore the initial position and truncate the token queue.
new_state.position = initial_pos;
new_state.queue.truncate(token_index);
Err(new_state)
}
}
}
/// Repeatedly applies the transformation provided by `f` from the `Box<ParserState>`. Returns
/// `Ok` with the updated `Box<ParserState>` returned by `f` wrapped up in an `Err`.
///
/// # Examples
///
/// ```
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {
/// ab
/// }
///
/// let input = "aab";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let mut result = state.repeat(|s| {
/// s.match_string("a")
/// });
/// assert!(result.is_ok());
/// assert_eq!(result.unwrap().position().pos(), 2);
///
/// state = pest::ParserState::new(input);
/// result = state.repeat(|s| {
/// s.match_string("b")
/// });
/// assert!(result.is_ok());
/// assert_eq!(result.unwrap().position().pos(), 0);
/// ```
#[inline]
pub fn repeat<F>(mut self: Box<Self>, mut f: F) -> ParseResult<Box<Self>>
where
F: FnMut(Box<Self>) -> ParseResult<Box<Self>>,
{
self = self.inc_call_check_limit()?;
let mut result = f(self);
loop {
match result {
Ok(state) => result = f(state),
Err(state) => return Ok(state),
};
}
}
/// Optionally applies the transformation provided by `f` from the `Box<ParserState>`. Returns
/// `Ok` with the updated `Box<ParserState>` returned by `f` regardless of the `Result`.
///
/// # Examples
///
/// ```
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {
/// ab
/// }
///
/// let input = "ab";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let result = state.optional(|s| {
/// s.match_string("ab")
/// });
/// assert!(result.is_ok());
///
/// state = pest::ParserState::new(input);
/// let result = state.optional(|s| {
/// s.match_string("ac")
/// });
/// assert!(result.is_ok());
/// ```
#[inline]
pub fn optional<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
where
F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
{
self = self.inc_call_check_limit()?;
match f(self) {
Ok(state) | Err(state) => Ok(state),
}
}
/// Generic function to handle result of char/string/range parsing
/// in order to track (un)expected tokens.
fn handle_token_parse_result(
&mut self,
start_position: usize,
token: ParsingToken,
parse_succeeded: bool,
) {
// New position after tracked parsed element for case of `parse_succeded` is true.
// Position of parsing failure otherwise.
let current_pos = self.position.pos();
if parse_succeeded {
if self.lookahead == Lookahead::Negative {
self.parse_attempts
.try_add_new_token(token, start_position, current_pos, true);
} else if current_pos > self.parse_attempts.max_position {
self.parse_attempts.nullify_expected_tokens(current_pos);
}
} else if self.lookahead != Lookahead::Negative {
self.parse_attempts
.try_add_new_token(token, start_position, current_pos, false);
}
}
/// Attempts to match a single character based on a filter function. Returns `Ok` with the
/// updated `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>`
/// otherwise.
///
/// # Examples
///
/// ```
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {}
///
/// let input = "ab";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let result = state.match_char_by(|c| c.is_ascii());
/// assert!(result.is_ok());
/// assert_eq!(result.unwrap().position().pos(), 1);
///
/// let input = "❤";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let result = state.match_char_by(|c| c.is_ascii());
/// assert!(result.is_err());
/// assert_eq!(result.unwrap_err().position().pos(), 0);
/// ```
#[inline]
pub fn match_char_by<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
where
F: FnOnce(char) -> bool,
{
let start_position = self.position.pos();
let succeeded = self.position.match_char_by(f);
if self.parse_attempts.enabled {
let token = ParsingToken::BuiltInRule;
self.handle_token_parse_result(start_position, token, succeeded);
}
if succeeded {
Ok(self)
} else {
Err(self)
}
}
/// Attempts to match the given string. Returns `Ok` with the updated `Box<ParserState>` if
/// successful, or `Err` with the updated `Box<ParserState>` otherwise.
///
/// # Examples
///
/// ```
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {}
///
/// let input = "ab";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let mut result = state.match_string("ab");
/// assert!(result.is_ok());
/// assert_eq!(result.unwrap().position().pos(), 2);
///
/// state = pest::ParserState::new(input);
/// result = state.match_string("ac");
/// assert!(result.is_err());
/// assert_eq!(result.unwrap_err().position().pos(), 0);
/// ```
#[inline]
pub fn match_string(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
let start_position = self.position.pos();
let succeeded = self.position.match_string(string);
if self.parse_attempts.enabled {
let token = ParsingToken::Sensitive {
token: String::from(string),
};
self.handle_token_parse_result(start_position, token, succeeded);
}
if succeeded {
Ok(self)
} else {
Err(self)
}
}
/// Attempts to case-insensitively match the given string. Returns `Ok` with the updated
/// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
///
/// # Examples
///
/// ```
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {}
///
/// let input = "ab";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let mut result = state.match_insensitive("AB");
/// assert!(result.is_ok());
/// assert_eq!(result.unwrap().position().pos(), 2);
///
/// state = pest::ParserState::new(input);
/// result = state.match_insensitive("AC");
/// assert!(result.is_err());
/// assert_eq!(result.unwrap_err().position().pos(), 0);
/// ```
#[inline]
pub fn match_insensitive(mut self: Box<Self>, string: &str) -> ParseResult<Box<Self>> {
let start_position: usize = self.position().pos();
let succeeded = self.position.match_insensitive(string);
if self.parse_attempts.enabled {
let token = ParsingToken::Insensitive {
token: String::from(string),
};
self.handle_token_parse_result(start_position, token, succeeded);
}
if succeeded {
Ok(self)
} else {
Err(self)
}
}
/// Attempts to match a single character from the given range. Returns `Ok` with the updated
/// `Box<ParserState>` if successful, or `Err` with the updated `Box<ParserState>` otherwise.
///
/// # Caution
/// The provided `range` is interpreted as inclusive.
///
/// # Examples
///
/// ```
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {}
///
/// let input = "ab";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let mut result = state.match_range('a'..'z');
/// assert!(result.is_ok());
/// assert_eq!(result.unwrap().position().pos(), 1);
///
/// state = pest::ParserState::new(input);
/// result = state.match_range('A'..'Z');
/// assert!(result.is_err());
/// assert_eq!(result.unwrap_err().position().pos(), 0);
/// ```
#[inline]
pub fn match_range(mut self: Box<Self>, range: Range<char>) -> ParseResult<Box<Self>> {
let start_position = self.position().pos();
let token = ParsingToken::Range {
start: range.start,
end: range.end,
};
let succeeded = self.position.match_range(range);
if self.parse_attempts.enabled {
self.handle_token_parse_result(start_position, token, succeeded);
}
if succeeded {
Ok(self)
} else {
Err(self)
}
}
/// Attempts to skip `n` characters forward. Returns `Ok` with the updated `Box<ParserState>`
/// if successful, or `Err` with the updated `Box<ParserState>` otherwise.
///
/// # Examples
///
/// ```
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {}
///
/// let input = "ab";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let mut result = state.skip(1);
/// assert!(result.is_ok());
/// assert_eq!(result.unwrap().position().pos(), 1);
///
/// state = pest::ParserState::new(input);
/// result = state.skip(3);
/// assert!(result.is_err());
/// assert_eq!(result.unwrap_err().position().pos(), 0);
/// ```
#[inline]
pub fn skip(mut self: Box<Self>, n: usize) -> ParseResult<Box<Self>> {
if self.position.skip(n) {
Ok(self)
} else {
Err(self)
}
}
/// Attempts to skip forward until one of the given strings is found. Returns `Ok` with the
/// updated `Box<ParserState>` whether or not one of the strings is found.
///
/// # Examples
///
/// ```
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {}
///
/// let input = "abcd";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let mut result = state.skip_until(&["c", "d"]);
/// assert!(result.is_ok());
/// assert_eq!(result.unwrap().position().pos(), 2);
/// ```
#[inline]
pub fn skip_until(mut self: Box<Self>, strings: &[&str]) -> ParseResult<Box<Self>> {
self.position.skip_until(strings);
Ok(self)
}
/// Attempts to match the start of the input. Returns `Ok` with the current `Box<ParserState>`
/// if the parser has not yet advanced, or `Err` with the current `Box<ParserState>` otherwise.
///
/// # Examples
///
/// ```
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {}
///
/// let input = "ab";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let mut result = state.start_of_input();
/// assert!(result.is_ok());
///
/// state = pest::ParserState::new(input);
/// state = state.match_string("ab").unwrap();
/// result = state.start_of_input();
/// assert!(result.is_err());
/// ```
#[inline]
pub fn start_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
if self.position.at_start() {
Ok(self)
} else {
Err(self)
}
}
/// Attempts to match the end of the input. Returns `Ok` with the current `Box<ParserState>` if
/// there is no input remaining, or `Err` with the current `Box<ParserState>` otherwise.
///
/// # Examples
///
/// ```
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {}
///
/// let input = "ab";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let mut result = state.end_of_input();
/// assert!(result.is_err());
///
/// state = pest::ParserState::new(input);
/// state = state.match_string("ab").unwrap();
/// result = state.end_of_input();
/// assert!(result.is_ok());
/// ```
#[inline]
pub fn end_of_input(self: Box<Self>) -> ParseResult<Box<Self>> {
if self.position.at_end() {
Ok(self)
} else {
Err(self)
}
}
/// Starts a lookahead transformation provided by `f` from the `Box<ParserState>`. It returns
/// `Ok` with the current `Box<ParserState>` if `f` also returns an `Ok`, or `Err` with the current
/// `Box<ParserState>` otherwise. If `is_positive` is `false`, it swaps the `Ok` and `Err`
/// together, negating the `Result`.
///
/// # Examples
///
/// ```
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {
/// a
/// }
///
/// let input = "a";
/// let pairs: Vec<_> = pest::state(input, |state| {
/// state.lookahead(true, |state| {
/// state.rule(Rule::a, |s| Ok(s))
/// })
/// }).unwrap().collect();
///
/// assert_eq!(pairs.len(), 0);
/// ```
#[inline]
pub fn lookahead<F>(mut self: Box<Self>, is_positive: bool, f: F) -> ParseResult<Box<Self>>
where
F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
{
self = self.inc_call_check_limit()?;
let initial_lookahead = self.lookahead;
self.lookahead = if is_positive {
match initial_lookahead {
Lookahead::None | Lookahead::Positive => Lookahead::Positive,
Lookahead::Negative => Lookahead::Negative,
}
} else {
match initial_lookahead {
Lookahead::None | Lookahead::Positive => Lookahead::Negative,
Lookahead::Negative => Lookahead::Positive,
}
};
let initial_pos = self.position;
let result = f(self.checkpoint());
let result_state = match result {
Ok(mut new_state) => {
new_state.position = initial_pos;
new_state.lookahead = initial_lookahead;
Ok(new_state.restore())
}
Err(mut new_state) => {
new_state.position = initial_pos;
new_state.lookahead = initial_lookahead;
Err(new_state.restore())
}
};
if is_positive {
result_state
} else {
match result_state {
Ok(state) => Err(state),
Err(state) => Ok(state),
}
}
}
/// Transformation which stops `Token`s from being generated according to `is_atomic`.
/// Used as wrapper over `rule` (or even another `atomic`) call.
///
/// # Examples
///
/// ```
/// # use pest::{self, Atomicity};
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {
/// a
/// }
///
/// let input = "a";
/// let pairs: Vec<_> = pest::state(input, |state| {
/// state.atomic(Atomicity::Atomic, |s| {
/// s.rule(Rule::a, |s| Ok(s))
/// })
/// }).unwrap().collect();
///
/// assert_eq!(pairs.len(), 0);
/// ```
#[inline]
pub fn atomic<F>(mut self: Box<Self>, atomicity: Atomicity, f: F) -> ParseResult<Box<Self>>
where
F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
{
self = self.inc_call_check_limit()?;
// In case child parsing call is another `atomic` it will have its own atomicity status.
let initial_atomicity = self.atomicity;
// In case child atomicity is the same as we've demanded, we shouldn't do nothing.
// E.g. we have the following rules:
// * RootRule = @{ InnerRule }
// * InnerRule = @{ ... }
let should_toggle = self.atomicity != atomicity;
// Note that we take atomicity of the top rule and not of the leaf (inner).
if should_toggle {
self.atomicity = atomicity;
}
let result = f(self);
match result {
Ok(mut new_state) => {
if should_toggle {
new_state.atomicity = initial_atomicity;
}
Ok(new_state)
}
Err(mut new_state) => {
if should_toggle {
new_state.atomicity = initial_atomicity;
}
Err(new_state)
}
}
}
/// Evaluates the result of closure `f` and pushes the span of the input consumed from before
/// `f` is called to after `f` is called to the stack. Returns `Ok(Box<ParserState>)` if `f` is
/// called successfully, or `Err(Box<ParserState>)` otherwise.
///
/// # Examples
///
/// ```
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {}
///
/// let input = "ab";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let mut result = state.stack_push(|state| state.match_string("a"));
/// assert!(result.is_ok());
/// assert_eq!(result.unwrap().position().pos(), 1);
/// ```
#[inline]
pub fn stack_push<F>(mut self: Box<Self>, f: F) -> ParseResult<Box<Self>>
where
F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
{
self = self.inc_call_check_limit()?;
let start = self.position;
let result = f(self);
match result {
Ok(mut state) => {
let end = state.position;
state.stack.push(start.span(&end));
Ok(state)
}
Err(state) => Err(state),
}
}
/// Peeks the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
/// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
///
/// # Examples
///
/// ```
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {}
///
/// let input = "aa";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
/// |state| state.stack_peek()
/// );
/// assert!(result.is_ok());
/// assert_eq!(result.unwrap().position().pos(), 2);
/// ```
#[inline]
pub fn stack_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
let string = self
.stack
.peek()
.expect("peek was called on empty stack")
.as_str();
self.match_string(string)
}
/// Pops the top of the stack and attempts to match the string. Returns `Ok(Box<ParserState>)`
/// if the string is matched successfully, or `Err(Box<ParserState>)` otherwise.
///
/// # Examples
///
/// ```
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {}
///
/// let input = "aa";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
/// |state| state.stack_pop()
/// );
/// assert!(result.is_ok());
/// assert_eq!(result.unwrap().position().pos(), 2);
/// ```
#[inline]
pub fn stack_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
let string = self
.stack
.pop()
.expect("pop was called on empty stack")
.as_str();
self.match_string(string)
}
/// Matches part of the state of the stack.
///
/// # Examples
///
/// ```
/// # use pest::{self, MatchDir};
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {}
///
/// let input = "abcd cd cb";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let mut result = state
/// .stack_push(|state| state.match_string("a"))
/// .and_then(|state| state.stack_push(|state| state.match_string("b")))
/// .and_then(|state| state.stack_push(|state| state.match_string("c")))
/// .and_then(|state| state.stack_push(|state| state.match_string("d")))
/// .and_then(|state| state.match_string(" "))
/// .and_then(|state| state.stack_match_peek_slice(2, None, MatchDir::BottomToTop))
/// .and_then(|state| state.match_string(" "))
/// .and_then(|state| state.stack_match_peek_slice(1, Some(-1), MatchDir::TopToBottom));
/// assert!(result.is_ok());
/// assert_eq!(result.unwrap().position().pos(), 10);
/// ```
#[inline]
pub fn stack_match_peek_slice(
mut self: Box<Self>,
start: i32,
end: Option<i32>,
match_dir: MatchDir,
) -> ParseResult<Box<Self>> {
let range = match constrain_idxs(start, end, self.stack.len()) {
Some(r) => r,
None => return Err(self),
};
// return true if an empty sequence is requested
if range.end <= range.start {
return Ok(self);
}
let mut position = self.position;
let result = {
let mut iter_b2t = self.stack[range].iter();
let matcher = |span: &Span<'_>| position.match_string(span.as_str());
match match_dir {
MatchDir::BottomToTop => iter_b2t.all(matcher),
MatchDir::TopToBottom => iter_b2t.rev().all(matcher),
}
};
if result {
self.position = position;
Ok(self)
} else {
Err(self)
}
}
/// Matches the full state of the stack.
///
/// # Examples
///
/// ```
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {}
///
/// let input = "abba";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let mut result = state
/// .stack_push(|state| state.match_string("a"))
/// .and_then(|state| { state.stack_push(|state| state.match_string("b")) })
/// .and_then(|state| state.stack_match_peek());
/// assert!(result.is_ok());
/// assert_eq!(result.unwrap().position().pos(), 4);
/// ```
#[inline]
pub fn stack_match_peek(self: Box<Self>) -> ParseResult<Box<Self>> {
self.stack_match_peek_slice(0, None, MatchDir::TopToBottom)
}
/// Matches the full state of the stack. This method will clear the stack as it evaluates.
///
/// # Examples
///
/// ```
/// /// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {}
///
/// let input = "aaaa";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let mut result = state.stack_push(|state| state.match_string("a")).and_then(|state| {
/// state.stack_push(|state| state.match_string("a"))
/// }).and_then(|state| state.stack_match_peek());
/// assert!(result.is_ok());
/// assert_eq!(result.unwrap().position().pos(), 4);
/// ```
#[inline]
pub fn stack_match_pop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
let mut position = self.position;
let mut result = true;
while let Some(span) = self.stack.pop() {
result = position.match_string(span.as_str());
if !result {
break;
}
}
if result {
self.position = position;
Ok(self)
} else {
Err(self)
}
}
/// Drops the top of the stack. Returns `Ok(Box<ParserState>)` if there was a value to drop, or
/// `Err(Box<ParserState>)` otherwise.
///
/// # Examples
///
/// ```
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {}
///
/// let input = "aa";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let mut result = state.stack_push(|state| state.match_string("a")).and_then(
/// |state| state.stack_drop()
/// );
/// assert!(result.is_ok());
/// assert_eq!(result.unwrap().position().pos(), 1);
/// ```
#[inline]
pub fn stack_drop(mut self: Box<Self>) -> ParseResult<Box<Self>> {
match self.stack.pop() {
Some(_) => Ok(self),
None => Err(self),
}
}
/// Restores the original state of the `ParserState` when `f` returns an `Err`. Currently,
/// this method only restores the stack.
///
/// # Examples
///
/// ```
/// # use pest;
/// # #[allow(non_camel_case_types)]
/// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
/// enum Rule {}
///
/// let input = "ab";
/// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input);
/// let mut result = state.restore_on_err(|state| state.stack_push(|state|
/// state.match_string("a")).and_then(|state| state.match_string("a"))
/// );
///
/// assert!(result.is_err());
///
/// // Since the the rule doesn't match, the "a" pushed to the stack will be removed.
/// let catch_panic = std::panic::catch_unwind(|| result.unwrap_err().stack_pop());
/// assert!(catch_panic.is_err());
/// ```
#[inline]
pub fn restore_on_err<F>(self: Box<Self>, f: F) -> ParseResult<Box<Self>>
where
F: FnOnce(Box<Self>) -> ParseResult<Box<Self>>,
{
match f(self.checkpoint()) {
Ok(state) => Ok(state.checkpoint_ok()),
Err(state) => Err(state.restore()),
}
}
// Mark the current state as a checkpoint and return the `Box`.
#[inline]
pub(crate) fn checkpoint(mut self: Box<Self>) -> Box<Self> {
self.stack.snapshot();
self
}
// The checkpoint was cleared successfully
// so remove it without touching other stack state.
#[inline]
pub(crate) fn checkpoint_ok(mut self: Box<Self>) -> Box<Self> {
self.stack.clear_snapshot();
self
}
// Restore the current state to the most recent checkpoint.
#[inline]
pub(crate) fn restore(mut self: Box<Self>) -> Box<Self> {
self.stack.restore();
self
}
}
/// Helper function used only in case stack operations (PUSH/POP) are used in grammar.
fn constrain_idxs(start: i32, end: Option<i32>, len: usize) -> Option<Range<usize>> {
let start_norm = normalize_index(start, len)?;
let end_norm = end.map_or(Some(len), |e| normalize_index(e, len))?;
Some(start_norm..end_norm)
}
/// `constrain_idxs` helper function.
/// Normalizes the index using its sequence’s length.
/// Returns `None` if the normalized index is OOB.
fn normalize_index(i: i32, len: usize) -> Option<usize> {
if i > len as i32 {
None
} else if i >= 0 {
Some(i as usize)
} else {
let real_i = len as i32 + i;
if real_i >= 0 {
Some(real_i as usize)
} else {
None
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn normalize_index_pos() {
assert_eq!(normalize_index(4, 6), Some(4));
assert_eq!(normalize_index(5, 5), Some(5));
assert_eq!(normalize_index(6, 3), None);
}
#[test]
fn normalize_index_neg() {
assert_eq!(normalize_index(-4, 6), Some(2));
assert_eq!(normalize_index(-5, 5), Some(0));
assert_eq!(normalize_index(-6, 3), None);
}
}