| /*! |
| Provides implementations of `fst::Automaton` for Aho-Corasick automata. |
| |
| This works by providing two wrapper types, [`Anchored`] and [`Unanchored`]. |
| The former executes an anchored search on an FST while the latter executes |
| an unanchored search. Building these wrappers is fallible and will fail if |
| the underlying Aho-Corasick automaton does not support the type of search it |
| represents. |
| */ |
| |
| use crate::{ |
| automaton::{Automaton, StateID}, |
| Anchored as AcAnchored, Input, MatchError, |
| }; |
| |
| /// Represents an unanchored Aho-Corasick search of a finite state transducer. |
| /// |
| /// Wrapping an Aho-Corasick automaton in `Unanchored` will fail if the |
| /// underlying automaton does not support unanchored searches. |
| /// |
| /// # Example |
| /// |
| /// This shows how to build an FST of keys and then run an unanchored search on |
| /// those keys using an Aho-Corasick automaton. |
| /// |
| /// ``` |
| /// use aho_corasick::{nfa::contiguous::NFA, transducer::Unanchored}; |
| /// use fst::{Automaton, IntoStreamer, Set, Streamer}; |
| /// |
| /// let set = Set::from_iter(&["abcd", "bc", "bcd", "xyz"]).unwrap(); |
| /// let nfa = NFA::new(&["bcd", "x"]).unwrap(); |
| /// // NFAs always support both unanchored and anchored searches. |
| /// let searcher = Unanchored::new(&nfa).unwrap(); |
| /// |
| /// let mut stream = set.search(searcher).into_stream(); |
| /// let mut results = vec![]; |
| /// while let Some(key) = stream.next() { |
| /// results.push(std::str::from_utf8(key).unwrap().to_string()); |
| /// } |
| /// assert_eq!(vec!["abcd", "bcd", "xyz"], results); |
| /// ``` |
| #[derive(Clone, Debug)] |
| pub struct Unanchored<A>(A); |
| |
| impl<A: Automaton> Unanchored<A> { |
| /// Create a new `Unanchored` implementation of the `fst::Automaton` trait. |
| /// |
| /// If the given Aho-Corasick automaton does not support unanchored |
| /// searches, then this returns an error. |
| pub fn new(aut: A) -> Result<Unanchored<A>, MatchError> { |
| let input = Input::new("").anchored(AcAnchored::No); |
| let _ = aut.start_state(&input)?; |
| Ok(Unanchored(aut)) |
| } |
| |
| /// Returns a borrow to the underlying automaton. |
| pub fn as_ref(&self) -> &A { |
| &self.0 |
| } |
| |
| /// Unwrap this value and return the inner automaton. |
| pub fn into_inner(self) -> A { |
| self.0 |
| } |
| } |
| |
| impl<A: Automaton> fst::Automaton for Unanchored<A> { |
| type State = StateID; |
| |
| #[inline] |
| fn start(&self) -> StateID { |
| let input = Input::new("").anchored(AcAnchored::No); |
| self.0.start_state(&input).expect("support for unanchored searches") |
| } |
| |
| #[inline] |
| fn is_match(&self, state: &StateID) -> bool { |
| self.0.is_match(*state) |
| } |
| |
| #[inline] |
| fn accept(&self, state: &StateID, byte: u8) -> StateID { |
| if fst::Automaton::is_match(self, state) { |
| return *state; |
| } |
| self.0.next_state(AcAnchored::No, *state, byte) |
| } |
| |
| #[inline] |
| fn can_match(&self, state: &StateID) -> bool { |
| !self.0.is_dead(*state) |
| } |
| } |
| |
| /// Represents an anchored Aho-Corasick search of a finite state transducer. |
| /// |
| /// Wrapping an Aho-Corasick automaton in `Unanchored` will fail if the |
| /// underlying automaton does not support unanchored searches. |
| /// |
| /// # Example |
| /// |
| /// This shows how to build an FST of keys and then run an anchored search on |
| /// those keys using an Aho-Corasick automaton. |
| /// |
| /// ``` |
| /// use aho_corasick::{nfa::contiguous::NFA, transducer::Anchored}; |
| /// use fst::{Automaton, IntoStreamer, Set, Streamer}; |
| /// |
| /// let set = Set::from_iter(&["abcd", "bc", "bcd", "xyz"]).unwrap(); |
| /// let nfa = NFA::new(&["bcd", "x"]).unwrap(); |
| /// // NFAs always support both unanchored and anchored searches. |
| /// let searcher = Anchored::new(&nfa).unwrap(); |
| /// |
| /// let mut stream = set.search(searcher).into_stream(); |
| /// let mut results = vec![]; |
| /// while let Some(key) = stream.next() { |
| /// results.push(std::str::from_utf8(key).unwrap().to_string()); |
| /// } |
| /// assert_eq!(vec!["bcd", "xyz"], results); |
| /// ``` |
| /// |
| /// This is like the example above, except we use an Aho-Corasick DFA, which |
| /// requires explicitly configuring it to support anchored searches. (NFAs |
| /// unconditionally support both unanchored and anchored searches.) |
| /// |
| /// ``` |
| /// use aho_corasick::{dfa::DFA, transducer::Anchored, StartKind}; |
| /// use fst::{Automaton, IntoStreamer, Set, Streamer}; |
| /// |
| /// let set = Set::from_iter(&["abcd", "bc", "bcd", "xyz"]).unwrap(); |
| /// let dfa = DFA::builder() |
| /// .start_kind(StartKind::Anchored) |
| /// .build(&["bcd", "x"]) |
| /// .unwrap(); |
| /// // We've explicitly configured our DFA to support anchored searches. |
| /// let searcher = Anchored::new(&dfa).unwrap(); |
| /// |
| /// let mut stream = set.search(searcher).into_stream(); |
| /// let mut results = vec![]; |
| /// while let Some(key) = stream.next() { |
| /// results.push(std::str::from_utf8(key).unwrap().to_string()); |
| /// } |
| /// assert_eq!(vec!["bcd", "xyz"], results); |
| /// ``` |
| #[derive(Clone, Debug)] |
| pub struct Anchored<A>(A); |
| |
| impl<A: Automaton> Anchored<A> { |
| /// Create a new `Anchored` implementation of the `fst::Automaton` trait. |
| /// |
| /// If the given Aho-Corasick automaton does not support anchored searches, |
| /// then this returns an error. |
| pub fn new(aut: A) -> Result<Anchored<A>, MatchError> { |
| let input = Input::new("").anchored(AcAnchored::Yes); |
| let _ = aut.start_state(&input)?; |
| Ok(Anchored(aut)) |
| } |
| |
| /// Returns a borrow to the underlying automaton. |
| pub fn as_ref(&self) -> &A { |
| &self.0 |
| } |
| |
| /// Unwrap this value and return the inner automaton. |
| pub fn into_inner(self) -> A { |
| self.0 |
| } |
| } |
| |
| impl<A: Automaton> fst::Automaton for Anchored<A> { |
| type State = StateID; |
| |
| #[inline] |
| fn start(&self) -> StateID { |
| let input = Input::new("").anchored(AcAnchored::Yes); |
| self.0.start_state(&input).expect("support for unanchored searches") |
| } |
| |
| #[inline] |
| fn is_match(&self, state: &StateID) -> bool { |
| self.0.is_match(*state) |
| } |
| |
| #[inline] |
| fn accept(&self, state: &StateID, byte: u8) -> StateID { |
| if fst::Automaton::is_match(self, state) { |
| return *state; |
| } |
| self.0.next_state(AcAnchored::Yes, *state, byte) |
| } |
| |
| #[inline] |
| fn can_match(&self, state: &StateID) -> bool { |
| !self.0.is_dead(*state) |
| } |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use alloc::{string::String, vec, vec::Vec}; |
| |
| use fst::{Automaton, IntoStreamer, Set, Streamer}; |
| |
| use crate::{ |
| dfa::DFA, |
| nfa::{contiguous, noncontiguous}, |
| StartKind, |
| }; |
| |
| use super::*; |
| |
| fn search<A: Automaton, D: AsRef<[u8]>>( |
| set: &Set<D>, |
| aut: A, |
| ) -> Vec<String> { |
| let mut stream = set.search(aut).into_stream(); |
| let mut results = vec![]; |
| while let Some(key) = stream.next() { |
| results.push(String::from(core::str::from_utf8(key).unwrap())); |
| } |
| results |
| } |
| |
| #[test] |
| fn unanchored() { |
| let set = |
| Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"]) |
| .unwrap(); |
| let patterns = vec!["baz", "bax"]; |
| let expected = vec!["baz", "xbax"]; |
| |
| let aut = Unanchored(noncontiguous::NFA::new(&patterns).unwrap()); |
| let got = search(&set, &aut); |
| assert_eq!(got, expected); |
| |
| let aut = Unanchored(contiguous::NFA::new(&patterns).unwrap()); |
| let got = search(&set, &aut); |
| assert_eq!(got, expected); |
| |
| let aut = Unanchored(DFA::new(&patterns).unwrap()); |
| let got = search(&set, &aut); |
| assert_eq!(got, expected); |
| } |
| |
| #[test] |
| fn anchored() { |
| let set = |
| Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"]) |
| .unwrap(); |
| let patterns = vec!["baz", "bax"]; |
| let expected = vec!["baz"]; |
| |
| let aut = Anchored(noncontiguous::NFA::new(&patterns).unwrap()); |
| let got = search(&set, &aut); |
| assert_eq!(got, expected); |
| |
| let aut = Anchored(contiguous::NFA::new(&patterns).unwrap()); |
| let got = search(&set, &aut); |
| assert_eq!(got, expected); |
| |
| let aut = Anchored( |
| DFA::builder() |
| .start_kind(StartKind::Anchored) |
| .build(&patterns) |
| .unwrap(), |
| ); |
| let got = search(&set, &aut); |
| assert_eq!(got, expected); |
| } |
| } |