blob: 58b34e00a058fece59e1326df93baf47cdb57c9c [file] [log] [blame]
use crate::{
dfa::{automaton::Automaton, dense, sparse},
util::id::StateID,
};
impl<T: AsRef<[u32]>> fst::Automaton for dense::DFA<T> {
type State = StateID;
#[inline]
fn start(&self) -> StateID {
self.start_state_forward(None, &[], 0, 0)
}
#[inline]
fn is_match(&self, state: &StateID) -> bool {
self.is_match_state(*state)
}
#[inline]
fn accept(&self, state: &StateID, byte: u8) -> StateID {
if fst::Automaton::is_match(self, state) {
return *state;
}
self.next_state(*state, byte)
}
#[inline]
fn accept_eof(&self, state: &StateID) -> Option<StateID> {
if fst::Automaton::is_match(self, state) {
return Some(*state);
}
Some(self.next_eoi_state(*state))
}
#[inline]
fn can_match(&self, state: &StateID) -> bool {
!self.is_dead_state(*state)
}
}
impl<T: AsRef<[u8]>> fst::Automaton for sparse::DFA<T> {
type State = StateID;
#[inline]
fn start(&self) -> StateID {
self.start_state_forward(None, &[], 0, 0)
}
#[inline]
fn is_match(&self, state: &StateID) -> bool {
self.is_match_state(*state)
}
#[inline]
fn accept(&self, state: &StateID, byte: u8) -> StateID {
if fst::Automaton::is_match(self, state) {
return *state;
}
self.next_state(*state, byte)
}
#[inline]
fn accept_eof(&self, state: &StateID) -> Option<StateID> {
if fst::Automaton::is_match(self, state) {
return Some(*state);
}
Some(self.next_eoi_state(*state))
}
#[inline]
fn can_match(&self, state: &StateID) -> bool {
!self.is_dead_state(*state)
}
}
#[cfg(test)]
mod tests {
use bstr::BString;
use fst::{Automaton, IntoStreamer, Set, Streamer};
use crate::dfa::{dense, sparse};
fn search<A: Automaton, D: AsRef<[u8]>>(
set: &Set<D>,
aut: A,
) -> Vec<BString> {
let mut stream = set.search(aut).into_stream();
let mut results = vec![];
while let Some(key) = stream.next() {
results.push(BString::from(key));
}
results
}
#[test]
fn dense_anywhere() {
let set =
Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"])
.unwrap();
let dfa = dense::DFA::new("ba.*").unwrap();
let got = search(&set, &dfa);
assert_eq!(got, vec!["bar", "baz", "xba", "xbax"]);
}
#[test]
fn dense_anchored() {
let set =
Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"])
.unwrap();
let dfa = dense::Builder::new()
.configure(dense::Config::new().anchored(true))
.build("ba.*")
.unwrap();
let got = search(&set, &dfa);
assert_eq!(got, vec!["bar", "baz"]);
}
#[test]
fn dense_assertions_start() {
let set =
Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"])
.unwrap();
let dfa = dense::Builder::new().build("^ba.*").unwrap();
let got = search(&set, &dfa);
assert_eq!(got, vec!["bar", "baz"]);
}
#[test]
fn dense_assertions_end() {
let set =
Set::from_iter(&["a", "bar", "bax", "wat", "xba", "xbax", "z"])
.unwrap();
let dfa = dense::Builder::new().build(".*x$").unwrap();
let got = search(&set, &dfa);
assert_eq!(got, vec!["bax", "xbax"]);
}
#[test]
fn dense_assertions_word() {
let set =
Set::from_iter(&["foo", "foox", "xfoo", "zzz foo zzz"]).unwrap();
let dfa = dense::Builder::new().build(r"(?-u)\bfoo\b").unwrap();
let got = search(&set, &dfa);
assert_eq!(got, vec!["foo", "zzz foo zzz"]);
}
#[test]
fn sparse_anywhere() {
let set =
Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"])
.unwrap();
let dfa = sparse::DFA::new("ba.*").unwrap();
let got = search(&set, &dfa);
assert_eq!(got, vec!["bar", "baz", "xba", "xbax"]);
}
#[test]
fn sparse_anchored() {
let set =
Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"])
.unwrap();
let dfa = dense::Builder::new()
.configure(dense::Config::new().anchored(true))
.build("ba.*")
.unwrap()
.to_sparse()
.unwrap();
let got = search(&set, &dfa);
assert_eq!(got, vec!["bar", "baz"]);
}
#[test]
fn sparse_assertions_start() {
let set =
Set::from_iter(&["a", "bar", "baz", "wat", "xba", "xbax", "z"])
.unwrap();
let dfa =
dense::Builder::new().build("^ba.*").unwrap().to_sparse().unwrap();
let got = search(&set, &dfa);
assert_eq!(got, vec!["bar", "baz"]);
}
#[test]
fn sparse_assertions_end() {
let set =
Set::from_iter(&["a", "bar", "bax", "wat", "xba", "xbax", "z"])
.unwrap();
let dfa =
dense::Builder::new().build(".*x$").unwrap().to_sparse().unwrap();
let got = search(&set, &dfa);
assert_eq!(got, vec!["bax", "xbax"]);
}
#[test]
fn sparse_assertions_word() {
let set =
Set::from_iter(&["foo", "foox", "xfoo", "zzz foo zzz"]).unwrap();
let dfa = dense::Builder::new()
.build(r"(?-u)\bfoo\b")
.unwrap()
.to_sparse()
.unwrap();
let got = search(&set, &dfa);
assert_eq!(got, vec!["foo", "zzz foo zzz"]);
}
}