| /// Represents the four possible starting configurations of a DFA search. |
| /// |
| /// The starting configuration is determined by inspecting the the beginning of |
| /// the haystack (up to 1 byte). Ultimately, this along with a pattern ID (if |
| /// specified) is what selects the start state to use in a DFA. |
| /// |
| /// In a DFA that doesn't have starting states for each pattern, then it will |
| /// have a maximum of four DFA start states. If the DFA was compiled with start |
| /// states for each pattern, then it will have a maximum of four DFA start |
| /// states for searching for any pattern, and then another maximum of four DFA |
| /// start states for executing an anchored search for each pattern. |
| /// |
| /// This ends up being represented as a table in the DFA (whether lazy or fully |
| /// built) where the stride of that table is 4, and each entry is an index into |
| /// the state transition table. Note though that multiple entries in the table |
| /// might point to the same state if the states would otherwise be equivalent. |
| /// (This is guaranteed by DFA minimization and may even be accomplished by |
| /// normal determinization, since it attempts to reuse equivalent states too.) |
| #[derive(Clone, Copy, Debug, Eq, PartialEq)] |
| pub(crate) enum Start { |
| /// This occurs when the starting position is not any of the ones below. |
| NonWordByte = 0, |
| /// This occurs when the byte immediately preceding the start of the search |
| /// is an ASCII word byte. |
| WordByte = 1, |
| /// This occurs when the starting position of the search corresponds to the |
| /// beginning of the haystack. |
| Text = 2, |
| /// This occurs when the byte immediately preceding the start of the search |
| /// is a line terminator. Specifically, `\n`. |
| Line = 3, |
| } |
| |
| impl Start { |
| /// Return the starting state corresponding to the given integer. If no |
| /// starting state exists for the given integer, then None is returned. |
| pub(crate) fn from_usize(n: usize) -> Option<Start> { |
| match n { |
| 0 => Some(Start::NonWordByte), |
| 1 => Some(Start::WordByte), |
| 2 => Some(Start::Text), |
| 3 => Some(Start::Line), |
| _ => None, |
| } |
| } |
| |
| /// Returns the total number of starting state configurations. |
| pub(crate) fn count() -> usize { |
| 4 |
| } |
| |
| /// Returns the starting state configuration for the given search |
| /// parameters. If the given offset range is not valid, then this panics. |
| #[inline(always)] |
| pub(crate) fn from_position_fwd( |
| bytes: &[u8], |
| start: usize, |
| end: usize, |
| ) -> Start { |
| assert!( |
| bytes.get(start..end).is_some(), |
| "{}..{} is invalid", |
| start, |
| end |
| ); |
| if start == 0 { |
| Start::Text |
| } else if bytes[start - 1] == b'\n' { |
| Start::Line |
| } else if crate::util::is_word_byte(bytes[start - 1]) { |
| Start::WordByte |
| } else { |
| Start::NonWordByte |
| } |
| } |
| |
| /// Returns the starting state configuration for a reverse search with the |
| /// given search parameters. If the given offset range is not valid, then |
| /// this panics. |
| #[inline(always)] |
| pub(crate) fn from_position_rev( |
| bytes: &[u8], |
| start: usize, |
| end: usize, |
| ) -> Start { |
| assert!( |
| bytes.get(start..end).is_some(), |
| "{}..{} is invalid", |
| start, |
| end |
| ); |
| if end == bytes.len() { |
| Start::Text |
| } else if bytes[end] == b'\n' { |
| Start::Line |
| } else if crate::util::is_word_byte(bytes[end]) { |
| Start::WordByte |
| } else { |
| Start::NonWordByte |
| } |
| } |
| |
| /// Return this starting configuration as an integer. It is guaranteed to |
| /// be less than `Start::count()`. |
| #[inline(always)] |
| pub(crate) fn as_usize(&self) -> usize { |
| *self as usize |
| } |
| } |