vendor/syntect-5.2.0/src/parsing/parser.rs - toolchain/rustc - Git at Google

 // Suppression of a false positive clippy lint. Upstream issue:
 //
 //   mutable_key_type false positive for raw pointers
 //   https://github.com/rust-lang/rust-clippy/issues/6745
 //
 // We use `*const MatchPattern` as key in our `SearchCache` hash map.
 // Clippy thinks this is a problem since `MatchPattern` has interior mutability
 // via `MatchPattern::regex::regex` which is an `AtomicLazyCell`.
 // But raw pointers are hashed via the pointer itself, not what is pointed to.
 // See https://github.com/rust-lang/rust/blob/1.54.0/library/core/src/hash/mod.rs#L717-L725
 #![allow(clippy::mutable_key_type)]

 use super::syntax_definition::*;
 use super::scope::*;
 use super::regex::Region;
 use std::usize;
 use std::collections::HashMap;
 use std::i32;
 use std::hash::BuildHasherDefault;
 use fnv::FnvHasher;
 use crate::parsing::syntax_set::{SyntaxSet, SyntaxReference};
 use crate::parsing::syntax_definition::ContextId;

 /// Errors that can occur while parsing.
 #[derive(Debug, thiserror::Error)]
 #[non_exhaustive]
 pub enum ParsingError {
     #[error("Somehow main context was popped from the stack")]
     MissingMainContext,
     /// A context is missing. Usually caused by a syntax referencing a another
     /// syntax that is not known to syntect. See e.g. <https://github.com/trishume/syntect/issues/421>
     #[error("Missing context with ID '{0:?}'")]
     MissingContext(ContextId),
     #[error("Bad index to match_at: {0}")]
     BadMatchIndex(usize),
     #[error("Tried to use a ContextReference that has not bee resolved yet: {0:?}")]
     UnresolvedContextReference(ContextReference),
 }

 /// Keeps the current parser state (the internal syntax interpreter stack) between lines of parsing.
 ///
 /// If you are parsing an entire file you create one of these at the start and use it
 /// all the way to the end.
 ///
 /// # Caching
 ///
 /// One reason this is exposed is that since it implements `Clone` you can actually cache
 /// these (probably along with a [`HighlightState`]) and only re-start parsing from the point of a change.
 /// See the docs for [`HighlightState`] for more in-depth discussion of caching.
 ///
 /// This state doesn't keep track of the current scope stack and parsing only returns changes to this stack
 /// so if you want to construct scope stacks you'll need to keep track of that as well.
 /// Note that [`HighlightState`] contains exactly this as a public field that you can use.
 ///
 /// **Note:** Caching is for advanced users who have tons of time to maximize performance or want to do so eventually.
 /// It is not recommended that you try caching the first time you implement highlighting.
 ///
 /// [`HighlightState`]: ../highlighting/struct.HighlightState.html
 #[derive(Debug, Clone, Eq, PartialEq)]
 pub struct ParseState {
     stack: Vec<StateLevel>,
     first_line: bool,
     // See issue #101. Contains indices of frames pushed by `with_prototype`s.
     // Doesn't look at `with_prototype`s below top of stack.
     proto_starts: Vec<usize>,
 }

 #[derive(Debug, Clone, Eq, PartialEq)]
 struct StateLevel {
     context: ContextId,
     prototypes: Vec<ContextId>,
     captures: Option<(Region, String)>,
 }

 #[derive(Debug)]
 struct RegexMatch<'a> {
     regions: Region,
     context: &'a Context,
     pat_index: usize,
     from_with_prototype: bool,
     would_loop: bool,
 }

 /// Maps the pattern to the start index, which is -1 if not found.
 type SearchCache = HashMap<*const MatchPattern, Option<Region>, BuildHasherDefault<FnvHasher>>;

 // To understand the implementation of this, here's an introduction to how
 // Sublime Text syntax definitions work.
 //
 // Let's say we have the following made-up syntax definition:
 //
 //     contexts:
 //       main:
 //         - match: A
 //           scope: scope.a.first
 //           push: context-a
 //         - match: b
 //           scope: scope.b
 //         - match: \w+
 //           scope: scope.other
 //       context-a:
 //         - match: a+
 //           scope: scope.a.rest
 //         - match: (?=.)
 //           pop: true
 //
 // There are two contexts, `main` and `context-a`. Each context contains a list
 // of match rules with instructions for how to proceed.
 //
 // Let's say we have the input string " Aaaabxxx". We start at position 0 in
 // the string. We keep a stack of contexts, which at the beginning is just main.
 //
 // So we start by looking at the top of the context stack (main), and look at
 // the rules in order. The rule that wins is the first one that matches
 // "earliest" in the input string. In our example:
 //
 // 1. The first one matches "A". Note that matches are not anchored, so this
 //    matches at position 1.
 // 2. The second one matches "b", so position 5. The first rule is winning.
 // 3. The third one matches "\w+", so also position 1. But because the first
 //    rule comes first, it wins.
 //
 // So now we execute the winning rule. Whenever we matched some text, we assign
 // the scope (if there is one) to the matched text and advance our position to
 // after the matched text. The scope is "scope.a.first" and our new position is
 // after the "A", so 2. The "push" means that we should change our stack by
 // pushing `context-a` on top of it.
 //
 // In the next step, we repeat the above, but now with the rules in `context-a`.
 // The result is that we match "a+" and assign "scope.a.rest" to "aaa", and our
 // new position is now after the "aaa". Note that there was no instruction for
 // changing the stack, so we stay in that context.
 //
 // In the next step, the first rule doesn't match anymore, so we go to the next
 // rule where "(?=.)" matches. The instruction is to "pop", which means we
 // pop the top of our context stack, which means we're now back in main.
 //
 // This time in main, we match "b", and in the next step we match the rest with
 // "\w+", and we're done.
 //
 //
 // ## Preventing loops
 //
 // These are the basics of how matching works. Now, you saw that you can write
 // patterns that result in an empty match and don't change the position. These
 // are called non-consuming matches. The problem with them is that they could
 // result in infinite loops. Let's look at a syntax where that is the case:
 //
 //     contexts:
 //       main:
 //         - match: (?=.)
 //           push: test
 //       test:
 //         - match: \w+
 //           scope: word
 //         - match: (?=.)
 //           pop: true
 //
 // This is a bit silly, but it's a minimal example for explaining how matching
 // works in that case.
 //
 // Let's say we have the input string " hello". In `main`, our rule matches and
 // we go into `test` and stay at position 0. Now, the best match is the rule
 // with "pop". But if we used that rule, we'd pop back to `main` and would still
 // be at the same position we started at! So this would be an infinite loop,
 // which we don't want.
 //
 // So what Sublime Text does in case a looping rule "won":
 //
 // * If there's another rule that matches at the same position and does not
 //   result in a loop, use that instead.
 // * Otherwise, go to the next position and go through all the rules in the
 //   current context again. Note that it means that the "pop" could again be the
 //   winning rule, but that's ok as it wouldn't result in a loop anymore.
 //
 // So in our input string, we'd skip one character and try to match the rules
 // again. This time, the "\w+" wins because it comes first.

 impl ParseState {
     /// Creates a state from a syntax definition, keeping its own reference-counted point to the
     /// main context of the syntax
     pub fn new(syntax: &SyntaxReference) -> ParseState {
         let start_state = StateLevel {
             context: syntax.context_ids()["__start"],
             prototypes: Vec::new(),
             captures: None,
         };
         ParseState {
             stack: vec![start_state],
             first_line: true,
             proto_starts: Vec::new(),
         }
     }

     /// Parses a single line of the file. Because of the way regex engines work you unfortunately
     /// have to pass in a single line contiguous in memory. This can be bad for really long lines.
     /// Sublime Text avoids this by just not highlighting lines that are too long (thousands of characters).
     ///
     /// For efficiency reasons this returns only the changes to the current scope at each point in the line.
     /// You can use [`ScopeStack::apply`] on each operation in succession to get the stack for a given point.
     /// Look at the code in `highlighter.rs` for an example of doing this for highlighting purposes.
     ///
     /// The returned vector is in order both by index to apply at (the `usize`) and also by order to apply them at a
     /// given index (e.g popping old scopes before pushing new scopes).
     ///
     /// The [`SyntaxSet`] has to be the one that contained the syntax that was used to construct
     /// this [`ParseState`], or an extended version of it. Otherwise the parsing would return the
     /// wrong result or even panic. The reason for this is that contexts within the [`SyntaxSet`]
     /// are referenced via indexes.
     ///
     /// [`ScopeStack::apply`]: struct.ScopeStack.html#method.apply
     /// [`SyntaxSet`]: struct.SyntaxSet.html
     /// [`ParseState`]: struct.ParseState.html
     pub fn parse_line(
         &mut self,
         line: &str,
         syntax_set: &SyntaxSet,
     ) -> Result<Vec<(usize, ScopeStackOp)>, ParsingError> {
         if self.stack.is_empty() {
             return Err(ParsingError::MissingMainContext)
         }
         let mut match_start = 0;
         let mut res = Vec::new();

         if self.first_line {
             let cur_level = &self.stack[self.stack.len() - 1];
             let context = syntax_set.get_context(&cur_level.context)?;
             if !context.meta_content_scope.is_empty() {
                 res.push((0, ScopeStackOp::Push(context.meta_content_scope[0])));
             }
             self.first_line = false;
         }

         let mut regions = Region::new();
         let fnv = BuildHasherDefault::<FnvHasher>::default();
         let mut search_cache: SearchCache = HashMap::with_capacity_and_hasher(128, fnv);
         // Used for detecting loops with push/pop, see long comment above.
         let mut non_consuming_push_at = (0, 0);

         while self.parse_next_token(
             line,
             syntax_set,
             &mut match_start,
             &mut search_cache,
             &mut regions,
             &mut non_consuming_push_at,
             &mut res
         )? {}

         Ok(res)
     }

     #[allow(clippy::too_many_arguments)]
     fn parse_next_token(
         &mut self,
         line: &str,
         syntax_set: &SyntaxSet,
         start: &mut usize,
         search_cache: &mut SearchCache,
         regions: &mut Region,
         non_consuming_push_at: &mut (usize, usize),
         ops: &mut Vec<(usize, ScopeStackOp)>,
     ) -> Result<bool, ParsingError> {
         let check_pop_loop = {
             let (pos, stack_depth) = *non_consuming_push_at;
             pos == *start && stack_depth == self.stack.len()
         };

         // Trim proto_starts that are no longer valid
         while self.proto_starts.last().map(|start| *start >= self.stack.len()).unwrap_or(false) {
             self.proto_starts.pop();
         }

         let best_match = self.find_best_match(line, *start, syntax_set, search_cache, regions, check_pop_loop)?;

         if let Some(reg_match) = best_match {
             if reg_match.would_loop {
                 // A push that doesn't consume anything (a regex that resulted
                 // in an empty match at the current position) can not be
                 // followed by a non-consuming pop. Otherwise we're back where
                 // we started and would try the same sequence of matches again,
                 // resulting in an infinite loop. In this case, Sublime Text
                 // advances one character and tries again, thus preventing the
                 // loop.

                 // println!("pop_would_loop for match {:?}, start {}", reg_match, *start);

                 // nth(1) gets the next character if there is one. Need to do
                 // this instead of just += 1 because we have byte indices and
                 // unicode characters can be more than 1 byte.
                 if let Some((i, _)) = line[*start..].char_indices().nth(1) {
                     *start += i;
                     return Ok(true);
                 } else {
                     // End of line, no character to advance and no point trying
                     // any more patterns.
                     return Ok(false);
                 }
             }

             let match_end = reg_match.regions.pos(0).unwrap().1;

             let consuming = match_end > *start;
             if !consuming {
                 // The match doesn't consume any characters. If this is a
                 // "push", remember the position and stack size so that we can
                 // check the next "pop" for loops. Otherwise leave the state,
                 // e.g. non-consuming "set" could also result in a loop.
                 let context = reg_match.context;
                 let match_pattern = context.match_at(reg_match.pat_index)?;
                 if let MatchOperation::Push(_) = match_pattern.operation {
                     *non_consuming_push_at = (match_end, self.stack.len() + 1);
                 }
             }

             *start = match_end;

             // ignore `with_prototype`s below this if a context is pushed
             if reg_match.from_with_prototype {
                 // use current height, since we're before the actual push
                 self.proto_starts.push(self.stack.len());
             }

             let level_context = {
                 let id = &self.stack[self.stack.len() - 1].context;
                 syntax_set.get_context(id)?
             };
             self.exec_pattern(line, &reg_match, level_context, syntax_set, ops)?;

             Ok(true)
         } else {
             Ok(false)
         }
     }

     fn find_best_match<'a>(
         &self,
         line: &str,
         start: usize,
         syntax_set: &'a SyntaxSet,
         search_cache: &mut SearchCache,
         regions: &mut Region,
         check_pop_loop: bool,
     ) -> Result<Option<RegexMatch<'a>>, ParsingError> {
         let cur_level = &self.stack[self.stack.len() - 1];
         let context = syntax_set.get_context(&cur_level.context)?;
         let prototype = if let Some(ref p) = context.prototype {
             Some(p)
         } else {
             None
         };

         // Build an iterator for the contexts we want to visit in order
         let context_chain = {
             let proto_start = self.proto_starts.last().cloned().unwrap_or(0);
             // Sublime applies with_prototypes from bottom to top
             let with_prototypes = self.stack[proto_start..].iter().flat_map(|lvl| lvl.prototypes.iter().map(move |ctx| (true, ctx, lvl.captures.as_ref())));
             let cur_prototype = prototype.into_iter().map(|ctx| (false, ctx, None));
             let cur_context = Some((false, &cur_level.context, cur_level.captures.as_ref())).into_iter();
             with_prototypes.chain(cur_prototype).chain(cur_context)
         };

         // println!("{:#?}", cur_level);
         // println!("token at {} on {}", start, line.trim_right());

         let mut min_start = usize::MAX;
         let mut best_match: Option<RegexMatch<'_>> = None;
         let mut pop_would_loop = false;

         for (from_with_proto, ctx, captures) in context_chain {
             for (pat_context, pat_index) in context_iter(syntax_set, syntax_set.get_context(ctx)?) {
                 let match_pat = pat_context.match_at(pat_index)?;

                 if let Some(match_region) = self.search(
                     line, start, match_pat, captures, search_cache, regions
                 ) {
                     let (match_start, match_end) = match_region.pos(0).unwrap();

                     // println!("matched pattern {:?} at start {} end {}", match_pat.regex_str, match_start, match_end);

                     if match_start < min_start || (match_start == min_start && pop_would_loop) {
                         // New match is earlier in text than old match,
                         // or old match was a looping pop at the same
                         // position.

                         // println!("setting as current match");

                         min_start = match_start;

                         let consuming = match_end > start;
                         pop_would_loop = check_pop_loop
                             && !consuming
                             && matches!(match_pat.operation, MatchOperation::Pop);

                         best_match = Some(RegexMatch {
                             regions: match_region,
                             context: pat_context,
                             pat_index,
                             from_with_prototype: from_with_proto,
                             would_loop: pop_would_loop,
                         });

                         if match_start == start && !pop_would_loop {
                             // We're not gonna find a better match after this,
                             // so as an optimization we can stop matching now.
                             return Ok(best_match);
                         }
                     }
                 }
             }
         }
         Ok(best_match)
     }

     fn search(&self,
               line: &str,
               start: usize,
               match_pat: &MatchPattern,
               captures: Option<&(Region, String)>,
               search_cache: &mut SearchCache,
               regions: &mut Region,
     ) -> Option<Region> {
         // println!("{} - {:?} - {:?}", match_pat.regex_str, match_pat.has_captures, cur_level.captures.is_some());
         let match_ptr = match_pat as *const MatchPattern;

         if let Some(maybe_region) = search_cache.get(&match_ptr) {
             if let Some(ref region) = *maybe_region {
                 let match_start = region.pos(0).unwrap().0;
                 if match_start >= start {
                     // Cached match is valid, return it. Otherwise do another
                     // search below.
                     return Some(region.clone());
                 }
             } else {
                 // Didn't find a match earlier, so no point trying to match it again
                 return None;
             }
         }

         let (matched, can_cache) = match (match_pat.has_captures, captures) {
             (true, Some(captures)) => {
                 let (region, s) = captures;
                 let regex = match_pat.regex_with_refs(region, s);
                 let matched = regex.search(line, start, line.len(), Some(regions));
                 (matched, false)
             }
             _ => {
                 let regex = match_pat.regex();
                 let matched = regex.search(line, start, line.len(), Some(regions));
                 (matched, true)
             }
         };

         if matched {
             let (match_start, match_end) = regions.pos(0).unwrap();
             // this is necessary to avoid infinite looping on dumb patterns
             let does_something = match match_pat.operation {
                 MatchOperation::None => match_start != match_end,
                 _ => true,
             };
             if can_cache && does_something {
                 search_cache.insert(match_pat, Some(regions.clone()));
             }
             if does_something {
                 // print!("catch {} at {} on {}", match_pat.regex_str, match_start, line);
                 return Some(regions.clone());
             }
         } else if can_cache {
             search_cache.insert(match_pat, None);
         }
         None
     }

     /// Returns true if the stack was changed
     fn exec_pattern<'a>(
         &mut self,
         line: &str,
         reg_match: &RegexMatch<'a>,
         level_context: &'a Context,
         syntax_set: &'a SyntaxSet,
         ops: &mut Vec<(usize, ScopeStackOp)>,
     ) -> Result<bool, ParsingError> {
         let (match_start, match_end) = reg_match.regions.pos(0).unwrap();
         let context = reg_match.context;
         let pat = context.match_at(reg_match.pat_index)?;
         // println!("running pattern {:?} on '{}' at {}, operation {:?}", pat.regex_str, line, match_start, pat.operation);

         self.push_meta_ops(true, match_start, level_context, &pat.operation, syntax_set, ops)?;
         for s in &pat.scope {
             // println!("pushing {:?} at {}", s, match_start);
             ops.push((match_start, ScopeStackOp::Push(*s)));
         }
         if let Some(ref capture_map) = pat.captures {
             // captures could appear in an arbitrary order, have to produce ops in right order
             // ex: ((bob)|(hi))* could match hibob in wrong order, and outer has to push first
             // we don't have to handle a capture matching multiple times, Sublime doesn't
             let mut map: Vec<((usize, i32), ScopeStackOp)> = Vec::new();
             for &(cap_index, ref scopes) in capture_map.iter() {
                 if let Some((cap_start, cap_end)) = reg_match.regions.pos(cap_index) {
                     // marking up empty captures causes pops to be sorted wrong
                     if cap_start == cap_end {
                         continue;
                     }
                     // println!("capture {:?} at {:?}-{:?}", scopes[0], cap_start, cap_end);
                     for scope in scopes.iter() {
                         map.push(((cap_start, -((cap_end - cap_start) as i32)),
                                   ScopeStackOp::Push(*scope)));
                     }
                     map.push(((cap_end, i32::MIN), ScopeStackOp::Pop(scopes.len())));
                 }
             }
             map.sort_by(|a, b| a.0.cmp(&b.0));
             for ((index, _), op) in map.into_iter() {
                 ops.push((index, op));
             }
         }
         if !pat.scope.is_empty() {
             // println!("popping at {}", match_end);
             ops.push((match_end, ScopeStackOp::Pop(pat.scope.len())));
         }
         self.push_meta_ops(false, match_end, level_context, &pat.operation, syntax_set, ops)?;

         self.perform_op(line, &reg_match.regions, pat, syntax_set)
     }

     fn push_meta_ops(
         &self,
         initial: bool,
         index: usize,
         cur_context: &Context,
         match_op: &MatchOperation,
         syntax_set: &SyntaxSet,
         ops: &mut Vec<(usize, ScopeStackOp)>,
     ) -> Result<(), ParsingError>{
         // println!("metas ops for {:?}, initial: {}",
         //          match_op,
         //          initial);
         // println!("{:?}", cur_context.meta_scope);
         match *match_op {
             MatchOperation::Pop => {
                 let v = if initial {
                     &cur_context.meta_content_scope
                 } else {
                     &cur_context.meta_scope
                 };
                 if !v.is_empty() {
                     ops.push((index, ScopeStackOp::Pop(v.len())));
                 }

                 // cleared scopes are restored after the scopes from match pattern that invoked the pop are applied
                 if !initial && cur_context.clear_scopes.is_some() {
                     ops.push((index, ScopeStackOp::Restore))
                 }
             },
             // for some reason the ST3 behaviour of set is convoluted and is inconsistent with the docs and other ops
             // - the meta_content_scope of the current context is applied to the matched thing, unlike pop
             // - the clear_scopes are applied after the matched token, unlike push
             // - the interaction with meta scopes means that the token has the meta scopes of both the current scope and the new scope.
             MatchOperation::Push(ref context_refs) |
             MatchOperation::Set(ref context_refs) => {
                 let is_set = matches!(*match_op, MatchOperation::Set(_));
                 // a match pattern that "set"s keeps the meta_content_scope and meta_scope from the previous context
                 if initial {
                     if is_set && cur_context.clear_scopes.is_some() {
                         // cleared scopes from the old context are restored immediately
                         ops.push((index, ScopeStackOp::Restore));
                     }
                     // add each context's meta scope
                     for r in context_refs.iter() {
                         let ctx = r.resolve(syntax_set)?;

                         if !is_set {
                             if let Some(clear_amount) = ctx.clear_scopes {
                                 ops.push((index, ScopeStackOp::Clear(clear_amount)));
                             }
                         }

                         for scope in ctx.meta_scope.iter() {
                             ops.push((index, ScopeStackOp::Push(*scope)));
                         }
                     }
                 } else {
                     let repush = (is_set && (!cur_context.meta_scope.is_empty() || !cur_context.meta_content_scope.is_empty())) || context_refs.iter().any(|r| {
                         let ctx = r.resolve(syntax_set).unwrap();

                         !ctx.meta_content_scope.is_empty() || (ctx.clear_scopes.is_some() && is_set)
                     });
                     if repush {
                         // remove previously pushed meta scopes, so that meta content scopes will be applied in the correct order
                         let mut num_to_pop : usize = context_refs.iter().map(|r| {
                             let ctx = r.resolve(syntax_set).unwrap();
                             ctx.meta_scope.len()
                         }).sum();

                         // also pop off the original context's meta scopes
                         if is_set {
                             num_to_pop += cur_context.meta_content_scope.len() + cur_context.meta_scope.len();
                         }

                         // do all the popping as one operation
                         if num_to_pop > 0 {
                             ops.push((index, ScopeStackOp::Pop(num_to_pop)));
                         }

                         // now we push meta scope and meta context scope for each context pushed
                         for r in context_refs {
                             let ctx = r.resolve(syntax_set)?;

                             // for some reason, contrary to my reading of the docs, set does this after the token
                             if is_set {
                                 if let Some(clear_amount) = ctx.clear_scopes {
                                     ops.push((index, ScopeStackOp::Clear(clear_amount)));
                                 }
                             }

                             for scope in ctx.meta_scope.iter() {
                                 ops.push((index, ScopeStackOp::Push(*scope)));
                             }
                             for scope in ctx.meta_content_scope.iter() {
                                 ops.push((index, ScopeStackOp::Push(*scope)));
                             }
                         }
                     }
                 }
             },
             MatchOperation::None => (),
         }

         Ok(())
     }

     /// Returns true if the stack was changed
     fn perform_op(
         &mut self,
         line: &str,
         regions: &Region,
         pat: &MatchPattern,
         syntax_set: &SyntaxSet
     ) -> Result<bool, ParsingError> {
         let (ctx_refs, old_proto_ids) = match pat.operation {
             MatchOperation::Push(ref ctx_refs) => (ctx_refs, None),
             MatchOperation::Set(ref ctx_refs) => {
                 // a `with_prototype` stays active when the context is `set`
                 // until the context layer in the stack (where the `with_prototype`
                 // was initially applied) is popped off.
                 (ctx_refs, self.stack.pop().map(|s| s.prototypes))
             }
             MatchOperation::Pop => {
                 self.stack.pop();
                 return Ok(true);
             }
             MatchOperation::None => return Ok(false),
         };
         for (i, r) in ctx_refs.iter().enumerate() {
             let mut proto_ids = if i == 0 {
                 // it is only necessary to preserve the old prototypes
                 // at the first stack frame pushed
                 old_proto_ids.clone().unwrap_or_else(Vec::new)
             } else {
                 Vec::new()
             };
             if i == ctx_refs.len() - 1 {
                 // if a with_prototype was specified, and multiple contexts were pushed,
                 // then the with_prototype applies only to the last context pushed, i.e.
                 // top most on the stack after all the contexts are pushed - this is also
                 // referred to as the "target" of the push by sublimehq - see
                 // https://forum.sublimetext.com/t/dev-build-3111/19240/17 for more info
                 if let Some(ref p) = pat.with_prototype {
                     proto_ids.push(p.id()?);
                 }
             }
             let context_id = r.id()?;
             let context = syntax_set.get_context(&context_id)?;
             let captures = {
                 let mut uses_backrefs = context.uses_backrefs;
                 if !proto_ids.is_empty() {
                     uses_backrefs = uses_backrefs || proto_ids.iter().any(|id| syntax_set.get_context(id).unwrap().uses_backrefs);
                 }
                 if uses_backrefs {
                     Some((regions.clone(), line.to_owned()))
                 } else {
                     None
                 }
             };
             self.stack.push(StateLevel {
                 context: context_id,
                 prototypes: proto_ids,
                 captures,
             });
         }
         Ok(true)
     }
 }

 #[cfg(feature = "yaml-load")]
 #[cfg(test)]
 mod tests {
     use super::*;
     use crate::parsing::{SyntaxSet, SyntaxSetBuilder, Scope, ScopeStack};
     use crate::parsing::ScopeStackOp::{Push, Pop, Clear, Restore};
     use crate::util::debug_print_ops;

     const TEST_SYNTAX: &str = include_str!("../../testdata/parser_tests.sublime-syntax");

     #[test]
     fn can_parse_simple() {
         let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
         let mut state = {
             let syntax = ss.find_syntax_by_name("Ruby on Rails").unwrap();
             ParseState::new(syntax)
         };

         let ops1 = ops(&mut state, "module Bob::Wow::Troll::Five; 5; end", &ss);
         let test_ops1 = vec![
             (0, Push(Scope::new("source.ruby.rails").unwrap())),
             (0, Push(Scope::new("meta.module.ruby").unwrap())),
             (0, Push(Scope::new("keyword.control.module.ruby").unwrap())),
             (6, Pop(2)),
             (6, Push(Scope::new("meta.module.ruby").unwrap())),
             (7, Pop(1)),
             (7, Push(Scope::new("meta.module.ruby").unwrap())),
             (7, Push(Scope::new("entity.name.module.ruby").unwrap())),
             (7, Push(Scope::new("support.other.namespace.ruby").unwrap())),
             (10, Pop(1)),
             (10, Push(Scope::new("punctuation.accessor.ruby").unwrap())),
         ];
         assert_eq!(&ops1[0..test_ops1.len()], &test_ops1[..]);

         let ops2 = ops(&mut state, "def lol(wow = 5)", &ss);
         let test_ops2 = vec![
             (0, Push(Scope::new("meta.function.ruby").unwrap())),
             (0, Push(Scope::new("keyword.control.def.ruby").unwrap())),
             (3, Pop(2)),
             (3, Push(Scope::new("meta.function.ruby").unwrap())),
             (4, Push(Scope::new("entity.name.function.ruby").unwrap())),
             (7, Pop(1))
         ];
         assert_eq!(&ops2[0..test_ops2.len()], &test_ops2[..]);
     }

     #[test]
     fn can_parse_yaml() {
         let ps = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
         let mut state = {
             let syntax = ps.find_syntax_by_name("YAML").unwrap();
             ParseState::new(syntax)
         };

         assert_eq!(ops(&mut state, "key: value\n", &ps), vec![
             (0, Push(Scope::new("source.yaml").unwrap())),
             (0, Push(Scope::new("string.unquoted.plain.out.yaml").unwrap())),
             (0, Push(Scope::new("entity.name.tag.yaml").unwrap())),
             (3, Pop(2)),
             (3, Push(Scope::new("punctuation.separator.key-value.mapping.yaml").unwrap())),
             (4, Pop(1)),
             (5, Push(Scope::new("string.unquoted.plain.out.yaml").unwrap())),
             (10, Pop(1)),
         ]);
     }

     #[test]
     fn can_parse_includes() {
         let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
         let mut state = {
             let syntax = ss.find_syntax_by_name("HTML (Rails)").unwrap();
             ParseState::new(syntax)
         };

         let ops = ops(&mut state, "<script>var lol = '<% def wow(", &ss);

         let mut test_stack = ScopeStack::new();
         test_stack.push(Scope::new("text.html.ruby").unwrap());
         test_stack.push(Scope::new("text.html.basic").unwrap());
         test_stack.push(Scope::new("source.js.embedded.html").unwrap());
         test_stack.push(Scope::new("source.js").unwrap());
         test_stack.push(Scope::new("string.quoted.single.js").unwrap());
         test_stack.push(Scope::new("source.ruby.rails.embedded.html").unwrap());
         test_stack.push(Scope::new("meta.function.parameters.ruby").unwrap());

         let mut stack = ScopeStack::new();
         for (_, op) in ops.iter() {
             stack.apply(op).expect("#[cfg(test)]");
         }
         assert_eq!(stack, test_stack);
     }

     #[test]
     fn can_parse_backrefs() {
         let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
         let mut state = {
             let syntax = ss.find_syntax_by_name("Ruby on Rails").unwrap();
             ParseState::new(syntax)
         };

         // For parsing HEREDOC, the "SQL" is captured at the beginning and then used in another
         // regex with a backref, to match the end of the HEREDOC. Note that there can be code
         // after the marker (`.strip`) here.
         assert_eq!(ops(&mut state, "lol = <<-SQL.strip", &ss), vec![
             (0, Push(Scope::new("source.ruby.rails").unwrap())),
             (4, Push(Scope::new("keyword.operator.assignment.ruby").unwrap())),
             (5, Pop(1)),
             (6, Push(Scope::new("string.unquoted.embedded.sql.ruby").unwrap())),
             (6, Push(Scope::new("punctuation.definition.string.begin.ruby").unwrap())),
             (12, Pop(1)),
             (12, Pop(1)),
             (12, Push(Scope::new("string.unquoted.embedded.sql.ruby").unwrap())),
             (12, Push(Scope::new("text.sql.embedded.ruby").unwrap())),
             (12, Clear(ClearAmount::TopN(2))),
             (12, Push(Scope::new("punctuation.accessor.ruby").unwrap())),
             (13, Pop(1)),
             (18, Restore),
         ]);

         assert_eq!(ops(&mut state, "wow", &ss), vec![]);

         assert_eq!(ops(&mut state, "SQL", &ss), vec![
             (0, Pop(1)),
             (0, Push(Scope::new("punctuation.definition.string.end.ruby").unwrap())),
             (3, Pop(1)),
             (3, Pop(1)),
         ]);
     }

     #[test]
     fn can_parse_preprocessor_rules() {
         let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
         let mut state = {
             let syntax = ss.find_syntax_by_name("C").unwrap();
             ParseState::new(syntax)
         };

         assert_eq!(ops(&mut state, "#ifdef FOO", &ss), vec![
             (0, Push(Scope::new("source.c").unwrap())),
             (0, Push(Scope::new("meta.preprocessor.c").unwrap())),
             (0, Push(Scope::new("keyword.control.import.c").unwrap())),
             (6, Pop(1)),
             (10, Pop(1)),
         ]);
         assert_eq!(ops(&mut state, "{", &ss), vec![
             (0, Push(Scope::new("meta.block.c").unwrap())),
             (0, Push(Scope::new("punctuation.section.block.begin.c").unwrap())),
             (1, Pop(1)),
         ]);
         assert_eq!(ops(&mut state, "#else", &ss), vec![
             (0, Push(Scope::new("meta.preprocessor.c").unwrap())),
             (0, Push(Scope::new("keyword.control.import.c").unwrap())),
             (5, Pop(1)),
             (5, Pop(1)),
         ]);
         assert_eq!(ops(&mut state, "{", &ss), vec![
             (0, Push(Scope::new("meta.block.c").unwrap())),
             (0, Push(Scope::new("punctuation.section.block.begin.c").unwrap())),
             (1, Pop(1)),
         ]);
         assert_eq!(ops(&mut state, "#endif", &ss), vec![
             (0, Pop(1)),
             (0, Push(Scope::new("meta.block.c").unwrap())),
             (0, Push(Scope::new("meta.preprocessor.c").unwrap())),
             (0, Push(Scope::new("keyword.control.import.c").unwrap())),
             (6, Pop(2)),
             (6, Pop(2)),
             (6, Push(Scope::new("meta.block.c").unwrap())),
         ]);
         assert_eq!(ops(&mut state, "    foo;", &ss), vec![
             (7, Push(Scope::new("punctuation.terminator.c").unwrap())),
             (8, Pop(1)),
         ]);
         assert_eq!(ops(&mut state, "}", &ss), vec![
             (0, Push(Scope::new("punctuation.section.block.end.c").unwrap())),
             (1, Pop(1)),
             (1, Pop(1)),
         ]);
     }

     #[test]
     fn can_parse_issue25() {
         let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
         let mut state = {
             let syntax = ss.find_syntax_by_name("C").unwrap();
             ParseState::new(syntax)
         };

         // test fix for issue #25
         assert_eq!(ops(&mut state, "struct{estruct", &ss).len(), 10);
     }

     #[test]
     fn can_compare_parse_states() {
         let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
         let syntax = ss.find_syntax_by_name("Java").unwrap();
         let mut state1 = ParseState::new(syntax);
         let mut state2 = ParseState::new(syntax);

         assert_eq!(ops(&mut state1, "class Foo {", &ss).len(), 11);
         assert_eq!(ops(&mut state2, "class Fooo {", &ss).len(), 11);

         assert_eq!(state1, state2);
         ops(&mut state1, "}", &ss);
         assert_ne!(state1, state2);
     }

     #[test]
     fn can_parse_non_nested_clear_scopes() {
         let line = "'hello #simple_cleared_scopes_test world test \\n '";
         let expect = [
             "<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pushes-clear-scopes.example>",
             "<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pops-clear-scopes.example>",
             "<source.test>, <string.quoted.single.example>, <constant.character.escape.example>",
         ];
         expect_scope_stacks(line, &expect, TEST_SYNTAX);
     }

     #[test]
     fn can_parse_non_nested_too_many_clear_scopes() {
         let line = "'hello #too_many_cleared_scopes_test world test \\n '";
         let expect = [
             "<example.meta-scope.after-clear-scopes.example>, <example.pushes-clear-scopes.example>",
             "<example.meta-scope.after-clear-scopes.example>, <example.pops-clear-scopes.example>",
             "<source.test>, <string.quoted.single.example>, <constant.character.escape.example>",
         ];
         expect_scope_stacks(line, &expect, TEST_SYNTAX);
     }

     #[test]
     fn can_parse_nested_clear_scopes() {
         let line = "'hello #nested_clear_scopes_test world foo bar test \\n '";
         let expect = [
             "<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pushes-clear-scopes.example>",
             "<source.test>, <example.meta-scope.cleared-previous-meta-scope.example>, <foo>",
             "<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pops-clear-scopes.example>",
             "<source.test>, <string.quoted.single.example>, <constant.character.escape.example>",
         ];
         expect_scope_stacks(line, &expect, TEST_SYNTAX);
     }

     #[test]
     fn can_parse_infinite_loop() {
         let line = "#infinite_loop_test 123";
         let expect = [
             "<source.test>, <constant.numeric.test>",
         ];
         expect_scope_stacks(line, &expect, TEST_SYNTAX);
     }

     #[test]
     fn can_parse_infinite_seeming_loop() {
         // See https://github.com/SublimeTextIssues/Core/issues/1190 for an
         // explanation.
         let line = "#infinite_seeming_loop_test hello";
         let expect = [
             "<source.test>, <keyword.test>",
             "<source.test>, <test>, <string.unquoted.test>",
             "<source.test>, <test>, <keyword.control.test>",
         ];
         expect_scope_stacks(line, &expect, TEST_SYNTAX);
     }

     #[test]
     fn can_parse_prototype_that_pops_main() {
         let syntax = r#"
 name: test
 scope: source.test
 contexts:
   prototype:
     # This causes us to pop out of the main context. Sublime Text handles that
     # by pushing main back automatically.
     - match: (?=!)
       pop: true
   main:
     - match: foo
       scope: test.good
 "#;

         let line = "foo!";
         let expect = ["<source.test>, <test.good>"];
         expect_scope_stacks(line, &expect, syntax);
     }

     #[test]
     fn can_parse_syntax_with_newline_in_character_class() {
         let syntax = r#"
 name: test
 scope: source.test
 contexts:
   main:
     - match: foo[\n]
       scope: foo.end
     - match: foo
       scope: foo.any
 "#;

         let line = "foo";
         let expect = ["<source.test>, <foo.end>"];
         expect_scope_stacks(line, &expect, syntax);

         let line = "foofoofoo";
         let expect = [
             "<source.test>, <foo.any>",
             "<source.test>, <foo.any>",
             "<source.test>, <foo.end>",
         ];
         expect_scope_stacks(line, &expect, syntax);
     }

     #[test]
     fn can_parse_issue120() {
         let syntax = SyntaxDefinition::load_from_str(
             include_str!("../../testdata/embed_escape_test.sublime-syntax"),
             false,
             None
         ).unwrap();

         let line1 = "\"abctest\" foobar";
         let expect1 = [
             "<meta.attribute-with-value.style.html>, <string.quoted.double>, <punctuation.definition.string.begin.html>",
             "<meta.attribute-with-value.style.html>, <source.css>",
             "<meta.attribute-with-value.style.html>, <string.quoted.double>, <punctuation.definition.string.end.html>",
             "<meta.attribute-with-value.style.html>, <source.css>, <test.embedded>",
             "<top-level.test>",
         ];

         expect_scope_stacks_with_syntax(line1, &expect1, syntax.clone());

         let line2 = ">abctest</style>foobar";
         let expect2 = [
             "<meta.tag.style.begin.html>, <punctuation.definition.tag.end.html>",
             "<source.css.embedded.html>, <test.embedded>",
             "<top-level.test>",
         ];
         expect_scope_stacks_with_syntax(line2, &expect2, syntax);
     }

     #[test]
     fn can_parse_non_consuming_pop_that_would_loop() {
         // See https://github.com/trishume/syntect/issues/127
         let syntax = r#"
 name: test
 scope: source.test
 contexts:
   main:
     # This makes us go into "test" without consuming any characters
     - match: (?=hello)
       push: test
   test:
     # If we used this match, we'd go back to "main" without consuming anything,
     # and then back into "test", infinitely looping. ST detects this at this
     # point and ignores this match until at least one character matched.
     - match: (?!world)
       pop: true
     - match: \w+
       scope: test.matched
 "#;

         let line = "hello";
         let expect = ["<source.test>, <test.matched>"];
         expect_scope_stacks(line, &expect, syntax);
     }

     #[test]
     fn can_parse_non_consuming_set_and_pop_that_would_loop() {
         let syntax = r#"
 name: test
 scope: source.test
 contexts:
   main:
     # This makes us go into "a" without advancing
     - match: (?=test)
       push: a
   a:
     # This makes us go into "b" without advancing
     - match: (?=t)
       set: b
   b:
     # If we used this match, we'd go back to "main" without having advanced,
     # which means we'd have an infinite loop like with the previous test.
     # So even for a "set", we have to check if we're advancing or not.
     - match: (?=t)
       pop: true
     - match: \w+
       scope: test.matched
 "#;

         let line = "test";
         let expect = ["<source.test>, <test.matched>"];
         expect_scope_stacks(line, &expect, syntax);
     }

     #[test]
     fn can_parse_non_consuming_set_after_consuming_push_that_does_not_loop() {
         let syntax = r#"
 name: test
 scope: source.test
 contexts:
   main:
     # This makes us go into "a", but we consumed a character
     - match: t
       push: a
     - match: \w+
       scope: test.matched
   a:
     # This makes us go into "b" without consuming
     - match: (?=e)
       set: b
   b:
     # This match does not result in an infinite loop because we already consumed
     # a character to get into "a", so it's ok to pop back into "main".
     - match: (?=e)
       pop: true
 "#;

         let line = "test";
         let expect = ["<source.test>, <test.matched>"];
         expect_scope_stacks(line, &expect, syntax);
     }

     #[test]
     fn can_parse_non_consuming_set_after_consuming_set_that_does_not_loop() {
         let syntax = r#"
 name: test
 scope: source.test
 contexts:
   main:
     - match: (?=hello)
       push: a
     - match: \w+
       scope: test.matched
   a:
     - match: h
       set: b
   b:
     - match: (?=e)
       set: c
   c:
     # This is not an infinite loop because "a" consumed a character, so we can
     # actually pop back into main and then match the rest of the input.
     - match: (?=e)
       pop: true
 "#;

         let line = "hello";
         let expect = ["<source.test>, <test.matched>"];
         expect_scope_stacks(line, &expect, syntax);
     }

     #[test]
     fn can_parse_non_consuming_pop_that_would_loop_at_end_of_line() {
         let syntax = r#"
 name: test
 scope: source.test
 contexts:
   main:
     # This makes us go into "test" without consuming, even at the end of line
     - match: ""
       push: test
   test:
     - match: ""
       pop: true
     - match: \w+
       scope: test.matched
 "#;

         let line = "hello";
         let expect = ["<source.test>, <test.matched>"];
         expect_scope_stacks(line, &expect, syntax);
     }

     #[test]
     fn can_parse_empty_but_consuming_set_that_does_not_loop() {
         let syntax = r#"
 name: test
 scope: source.test
 contexts:
   main:
     - match: (?=hello)
       push: a
     - match: ello
       scope: test.good
   a:
     # This is an empty match, but it consumed a character (the "h")
     - match: (?=e)
       set: b
   b:
     # .. so it's ok to pop back to main from here
     - match: ""
       pop: true
     - match: ello
       scope: test.bad
 "#;

         let line = "hello";
         let expect = ["<source.test>, <test.good>"];
         expect_scope_stacks(line, &expect, syntax);
     }

     #[test]
     fn can_parse_non_consuming_pop_that_does_not_loop() {
         let syntax = r#"
 name: test
 scope: source.test
 contexts:
   main:
     # This is a non-consuming push, so "b" will need to check for a
     # non-consuming pop
     - match: (?=hello)
       push: [b, a]
     - match: ello
       scope: test.good
   a:
     # This pop is ok, it consumed "h"
     - match: (?=e)
       pop: true
   b:
     # This is non-consuming, and we set to "c"
     - match: (?=e)
       set: c
   c:
     # It's ok to pop back to "main" here because we consumed a character in the
     # meantime.
     - match: ""
       pop: true
     - match: ello
       scope: test.bad
 "#;

         let line = "hello";
         let expect = ["<source.test>, <test.good>"];
         expect_scope_stacks(line, &expect, syntax);
     }

     #[test]
     fn can_parse_non_consuming_pop_with_multi_push_that_does_not_loop() {
         let syntax = r#"
 name: test
 scope: source.test
 contexts:
   main:
     - match: (?=hello)
       push: [b, a]
     - match: ello
       scope: test.good
   a:
     # This pop is ok, as we're not popping back to "main" yet (which would loop),
     # we're popping to "b"
     - match: ""
       pop: true
     - match: \w+
       scope: test.bad
   b:
     - match: \w+
       scope: test.good
 "#;

         let line = "hello";
         let expect = ["<source.test>, <test.good>"];
         expect_scope_stacks(line, &expect, syntax);
     }

     #[test]
     fn can_parse_non_consuming_pop_of_recursive_context_that_does_not_loop() {
         let syntax = r#"
 name: test
 scope: source.test
 contexts:
   main:
     - match: xxx
       scope: test.good
     - include: basic-identifiers

   basic-identifiers:
     - match: '\w+::'
       scope: test.matched
       push: no-type-names

   no-type-names:
       - include: basic-identifiers
       - match: \w+
         scope: test.matched.inside
       # This is a tricky one because when this is the best match,
       # we have two instances of "no-type-names" on the stack, so we're popping
       # back from "no-type-names" to another "no-type-names".
       - match: ''
         pop: true
 "#;

         let line = "foo::bar::* xxx";
         let expect = ["<source.test>, <test.good>"];
         expect_scope_stacks(line, &expect, syntax);
     }

     #[test]
     fn can_parse_non_consuming_pop_order() {
         let syntax = r#"
 name: test
 scope: source.test
 contexts:
   main:
     - match: (?=hello)
       push: test
   test:
     # This matches first
     - match: (?=e)
       push: good
     # But this (looping) match replaces it, because it's an earlier match
     - match: (?=h)
       pop: true
     # And this should not replace it, as it's a later match (only matches at
     # the same position can replace looping pops).
     - match: (?=o)
       push: bad
   good:
     - match: \w+
       scope: test.good
   bad:
     - match: \w+
       scope: test.bad
 "#;

         let line = "hello";
         let expect = ["<source.test>, <test.good>"];
         expect_scope_stacks(line, &expect, syntax);
     }

     #[test]
     fn can_parse_prototype_with_embed() {
         let syntax = r#"
 name: Javadoc
 scope: text.html.javadoc
 contexts:
   prototype:
     - match: \*
       scope: punctuation.definition.comment.javadoc

   main:
     - meta_include_prototype: false
     - match: /\*\*
       scope: comment.block.documentation.javadoc punctuation.definition.comment.begin.javadoc
       embed: contents
       embed_scope: comment.block.documentation.javadoc text.html.javadoc
       escape: \*/
       escape_captures:
         0: comment.block.documentation.javadoc punctuation.definition.comment.end.javadoc

   contents:
     - match: ''
 "#;

         let syntax = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
         expect_scope_stacks_with_syntax("/** * */", &["<comment.block.documentation.javadoc>, <punctuation.definition.comment.begin.javadoc>", "<comment.block.documentation.javadoc>, <text.html.javadoc>, <punctuation.definition.comment.javadoc>", "<comment.block.documentation.javadoc>, <punctuation.definition.comment.end.javadoc>"], syntax);
     }

     #[test]
     fn can_parse_context_included_in_prototype_via_named_reference() {
         let syntax = r#"
 scope: source.test
 contexts:
   prototype:
     - match: a
       push: a
     - match: b
       scope: test.bad
   main:
     - match: unused
   # This context is included in the prototype (see `push: a`).
   # Because of that, ST doesn't apply the prototype to this context, so if
   # we're in here the "b" shouldn't match.
   a:
     - match: a
       scope: test.good
 "#;

         let stack_states = stack_states(parse("aa b", syntax));
         assert_eq!(stack_states, vec![
             "<source.test>",
             "<source.test>, <test.good>",
             "<source.test>",
         ], "Expected test.bad to not match");
     }

     #[test]
     fn can_parse_with_prototype_set() {
         let syntax = r#"%YAML 1.2
 ---
 scope: source.test-set-with-proto
 contexts:
   main:
     - match: a
       scope: a
       set: next1
       with_prototype:
         - match: '1'
           scope: '1'
         - match: '2'
           scope: '2'
         - match: '3'
           scope: '3'
         - match: '4'
           scope: '4'
     - match: '5'
       scope: '5'
       set: [next3, next2]
       with_prototype:
         - match: c
           scope: cwith
   next1:
     - match: b
       scope: b
       set: next2
   next2:
     - match: c
       scope: c
       push: next3
     - match: e
       scope: e
       pop: true
     - match: f
       scope: f
       set: [next1, next2]
   next3:
     - match: d
       scope: d
     - match: (?=e)
       pop: true
     - match: c
       scope: cwithout
 "#;

         expect_scope_stacks_with_syntax(
             "a1b2c3d4e5",
             &[
                 "<a>", "<1>", "<b>", "<2>", "<c>", "<3>", "<d>", "<4>", "<e>", "<5>"
             ], SyntaxDefinition::load_from_str(syntax, true, None).unwrap()
         );
         expect_scope_stacks_with_syntax(
             "5cfcecbedcdea",
             &[
                 "<5>", "<cwith>", "<f>", "<e>", "<b>", "<d>", "<cwithout>", "<a>"
             ], SyntaxDefinition::load_from_str(syntax, true, None).unwrap()
         );
     }

     #[test]
     fn can_parse_issue176() {
         let syntax = r#"
 scope: source.dummy
 contexts:
   main:
     - match: (test)(?=(foo))(f)
       captures:
         1: test
         2: ignored
         3: f
       push:
         - match: (oo)
           captures:
             1: keyword
 "#;

         let syntax = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
         expect_scope_stacks_with_syntax("testfoo", &["<test>", /*"<ignored>",*/ "<f>", "<keyword>"], syntax);
     }

     #[test]
     fn can_parse_two_with_prototypes_at_same_stack_level() {
         let syntax_yamlstr = r#"
 %YAML 1.2
 ---
 # See http://www.sublimetext.com/docs/3/syntax.html
 scope: source.example-wp
 contexts:
   main:
     - match: a
       scope: a
       push:
         - match: b
           scope: b
           set:
             - match: c
               scope: c
           with_prototype:
             - match: '2'
               scope: '2'
       with_prototype:
         - match: '1'
           scope: '1'
 "#;

         let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
         expect_scope_stacks_with_syntax("abc12", &["<1>", "<2>"], syntax);
     }

     #[test]
     fn can_parse_two_with_prototypes_at_same_stack_level_set_multiple() {
         let syntax_yamlstr = r#"
 %YAML 1.2
 ---
 # See http://www.sublimetext.com/docs/3/syntax.html
 scope: source.example-wp
 contexts:
   main:
     - match: a
       scope: a
       push:
         - match: b
           scope: b
           set: [context1, context2, context3]
           with_prototype:
             - match: '2'
               scope: '2'
       with_prototype:
         - match: '1'
           scope: '1'
     - match: '1'
       scope: digit1
     - match: '2'
       scope: digit2
   context1:
     - match: e
       scope: e
       pop: true
     - match: '2'
       scope: digit2
   context2:
     - match: d
       scope: d
       pop: true
     - match: '2'
       scope: digit2
   context3:
     - match: c
       scope: c
       pop: true
 "#;

         let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
         expect_scope_stacks_with_syntax("ab12", &["<1>", "<2>"], syntax.clone());
         expect_scope_stacks_with_syntax("abc12", &["<1>", "<digit2>"], syntax.clone());
         expect_scope_stacks_with_syntax("abcd12", &["<1>", "<digit2>"], syntax.clone());
         expect_scope_stacks_with_syntax("abcde12", &["<digit1>", "<digit2>"], syntax);
     }

     #[test]
     fn can_parse_two_with_prototypes_at_same_stack_level_updated_captures() {
         let syntax_yamlstr = r#"
 %YAML 1.2
 ---
 # See http://www.sublimetext.com/docs/3/syntax.html
 scope: source.example-wp
 contexts:
   main:
     - match: (a)
       scope: a
       push:
         - match: (b)
           scope: b
           set:
             - match: c
               scope: c
           with_prototype:
             - match: d
               scope: d
       with_prototype:
         - match: \1
           scope: '1'
           pop: true
 "#;

         let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
         expect_scope_stacks_with_syntax("aa", &["<a>", "<1>"], syntax.clone());
         expect_scope_stacks_with_syntax("abcdb", &["<a>", "<b>", "<c>", "<d>", "<1>"], syntax);
     }

     #[test]
     fn can_parse_two_with_prototypes_at_same_stack_level_updated_captures_ignore_unexisting() {
         let syntax_yamlstr = r#"
 %YAML 1.2
 ---
 # See http://www.sublimetext.com/docs/3/syntax.html
 scope: source.example-wp
 contexts:
   main:
     - match: (a)(-)
       scope: a
       push:
         - match: (b)
           scope: b
           set:
             - match: c
               scope: c
           with_prototype:
             - match: d
               scope: d
       with_prototype:
         - match: \2
           scope: '2'
           pop: true
         - match: \1
           scope: '1'
           pop: true
 "#;

         let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
         expect_scope_stacks_with_syntax("a--", &["<a>", "<2>"], syntax.clone());
         // it seems that when ST encounters a non existing pop backreference, it just pops back to the with_prototype's original parent context - i.e. cdb is unscoped
         // TODO: it would be useful to have syntest functionality available here for easier testing and clarity
         expect_scope_stacks_with_syntax("a-bcdba-", &["<a>", "<b>"], syntax);
     }

     #[test]
     fn can_parse_syntax_with_eol_and_newline() {
         let syntax = r#"
 name: test
 scope: source.test
 contexts:
   main:
     - match: foo$\n
       scope: foo.newline
 "#;

         let line = "foo";
         let expect = ["<source.test>, <foo.newline>"];
         expect_scope_stacks(line, &expect, syntax);
     }

     #[test]
     fn can_parse_syntax_with_eol_only() {
         let syntax = r#"
 name: test
 scope: source.test
 contexts:
   main:
     - match: foo$
       scope: foo.newline
 "#;

         let line = "foo";
         let expect = ["<source.test>, <foo.newline>"];
         expect_scope_stacks(line, &expect, syntax);
     }

     #[test]
     fn can_parse_syntax_with_beginning_of_line() {
         let syntax = r#"
 name: test
 scope: source.test
 contexts:
   main:
     - match: \w+
       scope: word
       push:
         # this should not match at the end of the line
         - match: ^\s*$
           pop: true
         - match: =+
           scope: heading
           pop: true
     - match: .*
       scope: other
 "#;

         let syntax_newlines = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
         let syntax_set = link(syntax_newlines);

         let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
         assert_eq!(ops(&mut state, "foo\n", &syntax_set), vec![
             (0, Push(Scope::new("source.test").unwrap())),
             (0, Push(Scope::new("word").unwrap())),
             (3, Pop(1))
         ]);
         assert_eq!(ops(&mut state, "===\n", &syntax_set), vec![
             (0, Push(Scope::new("heading").unwrap())),
             (3, Pop(1))
         ]);

         assert_eq!(ops(&mut state, "bar\n", &syntax_set), vec![
             (0, Push(Scope::new("word").unwrap())),
             (3, Pop(1))
         ]);
         // This should result in popping out of the context
         assert_eq!(ops(&mut state, "\n", &syntax_set), vec![]);
         // So now this matches other
         assert_eq!(ops(&mut state, "====\n", &syntax_set), vec![
             (0, Push(Scope::new("other").unwrap())),
             (4, Pop(1))
         ]);
     }

     #[test]
     fn can_parse_syntax_with_comment_and_eol() {
         let syntax = r#"
 name: test
 scope: source.test
 contexts:
   main:
     - match: (//).*$
       scope: comment.line.double-slash
 "#;

         let syntax_newlines = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
         let syntax_set = link(syntax_newlines);

         let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
         assert_eq!(ops(&mut state, "// foo\n", &syntax_set), vec![
             (0, Push(Scope::new("source.test").unwrap())),
             (0, Push(Scope::new("comment.line.double-slash").unwrap())),
             // 6 is important here, should not be 7. The pattern should *not* consume the newline,
             // but instead match before it. This is important for whitespace-sensitive syntaxes
             // where newlines terminate statements such as Scala.
             (6, Pop(1))
         ]);
     }

     #[test]
     fn can_parse_text_with_unicode_to_skip() {
         let syntax = r#"
 name: test
 scope: source.test
 contexts:
   main:
     - match: (?=.)
       push: test
   test:
     - match: (?=.)
       pop: true
     - match: x
       scope: test.good
 "#;

         // U+03C0 GREEK SMALL LETTER PI, 2 bytes in UTF-8
         expect_scope_stacks("\u{03C0}x", &["<source.test>, <test.good>"], syntax);
         // U+0800 SAMARITAN LETTER ALAF, 3 bytes in UTF-8
         expect_scope_stacks("\u{0800}x", &["<source.test>, <test.good>"], syntax);
         // U+1F600 GRINNING FACE, 4 bytes in UTF-8
         expect_scope_stacks("\u{1F600}x", &["<source.test>, <test.good>"], syntax);
     }

     #[test]
     fn can_include_backrefs() {
         let syntax = SyntaxDefinition::load_from_str(r#"
                 name: Backref Include Test
                 scope: source.backrefinc
                 contexts:
                   main:
                     - match: (a)
                       scope: a
                       push: context1
                   context1:
                     - include: context2
                   context2:
                     - match: \1
                       scope: b
                       pop: true
                 "#, true, None).unwrap();

         expect_scope_stacks_with_syntax("aa", &["<a>", "<b>"], syntax);
     }

     #[test]
     fn can_include_nested_backrefs() {
         let syntax = SyntaxDefinition::load_from_str(r#"
                 name: Backref Include Test
                 scope: source.backrefinc
                 contexts:
                   main:
                     - match: (a)
                       scope: a
                       push: context1
                   context1:
                     - include: context3
                   context3:
                     - include: context2
                   context2:
                     - match: \1
                       scope: b
                       pop: true
                 "#, true, None).unwrap();

         expect_scope_stacks_with_syntax("aa", &["<a>", "<b>"], syntax);
     }

     fn expect_scope_stacks(line_without_newline: &str, expect: &[&str], syntax: &str) {
         println!("Parsing with newlines");
         let line_with_newline = format!("{}\n", line_without_newline);
         let syntax_newlines = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
         expect_scope_stacks_with_syntax(&line_with_newline, expect, syntax_newlines);

         println!("Parsing without newlines");
         let syntax_nonewlines = SyntaxDefinition::load_from_str(syntax, false, None).unwrap();
         expect_scope_stacks_with_syntax(line_without_newline, expect, syntax_nonewlines);
     }

     fn expect_scope_stacks_with_syntax(line: &str, expect: &[&str], syntax: SyntaxDefinition) {
         // check that each expected scope stack appears at least once while parsing the given test line

         let syntax_set = link(syntax);
         let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
         let ops = ops(&mut state, line, &syntax_set);
         expect_scope_stacks_for_ops(ops, expect);
     }

     fn expect_scope_stacks_for_ops(ops: Vec<(usize, ScopeStackOp)>, expect: &[&str]) {
         let mut criteria_met = Vec::new();
         for stack_str in stack_states(ops) {
             println!("{}", stack_str);
             for expectation in expect.iter() {
                 if stack_str.contains(expectation) {
                     criteria_met.push(expectation);
                 }
             }
         }
         if let Some(missing) = expect.iter().find(|e| !criteria_met.contains(e)) {
             panic!("expected scope stack '{}' missing", missing);
         }
     }

     fn parse(line: &str, syntax: &str) -> Vec<(usize, ScopeStackOp)> {
         let syntax = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
         let syntax_set = link(syntax);

         let mut state = ParseState::new(&syntax_set.syntaxes()[0]);
         ops(&mut state, line, &syntax_set)
     }

     fn link(syntax: SyntaxDefinition) -> SyntaxSet {
         let mut builder = SyntaxSetBuilder::new();
         builder.add(syntax);
         builder.build()
     }

     fn ops(state: &mut ParseState, line: &str, syntax_set: &SyntaxSet) -> Vec<(usize, ScopeStackOp)> {
         let ops = state.parse_line(line, syntax_set).expect("#[cfg(test)]");
         debug_print_ops(line, &ops);
         ops
     }

     fn stack_states(ops: Vec<(usize, ScopeStackOp)>) -> Vec<String> {
         let mut states = Vec::new();
         let mut stack = ScopeStack::new();
         for (_, op) in ops.iter() {
             stack.apply(op).expect("#[cfg(test)]");
             let scopes: Vec<String> = stack.as_slice().iter().map(|s| format!("{:?}", s)).collect();
             let stack_str = scopes.join(", ");
             states.push(stack_str);
         }
         states
     }
 }