| use std::error; |
| use std::fmt; |
| use std::io; |
| |
| use grep_matcher::LineTerminator; |
| |
| use crate::lines::LineIter; |
| use crate::searcher::{ConfigError, Searcher}; |
| |
| /// A trait that describes errors that can be reported by searchers and |
| /// implementations of `Sink`. |
| /// |
| /// Unless you have a specialized use case, you probably don't need to |
| /// implement this trait explicitly. It's likely that using `io::Error` (which |
| /// implements this trait) for your error type is good enough, largely because |
| /// most errors that occur during search will likely be an `io::Error`. |
| pub trait SinkError: Sized { |
| /// A constructor for converting any value that satisfies the |
| /// `fmt::Display` trait into an error. |
| fn error_message<T: fmt::Display>(message: T) -> Self; |
| |
| /// A constructor for converting I/O errors that occur while searching into |
| /// an error of this type. |
| /// |
| /// By default, this is implemented via the `error_message` constructor. |
| fn error_io(err: io::Error) -> Self { |
| Self::error_message(err) |
| } |
| |
| /// A constructor for converting configuration errors that occur while |
| /// building a searcher into an error of this type. |
| /// |
| /// By default, this is implemented via the `error_message` constructor. |
| fn error_config(err: ConfigError) -> Self { |
| Self::error_message(err) |
| } |
| } |
| |
| /// An `io::Error` can be used as an error for `Sink` implementations out of |
| /// the box. |
| impl SinkError for io::Error { |
| fn error_message<T: fmt::Display>(message: T) -> io::Error { |
| io::Error::new(io::ErrorKind::Other, message.to_string()) |
| } |
| |
| fn error_io(err: io::Error) -> io::Error { |
| err |
| } |
| } |
| |
| /// A `Box<std::error::Error>` can be used as an error for `Sink` |
| /// implementations out of the box. |
| impl SinkError for Box<dyn error::Error> { |
| fn error_message<T: fmt::Display>(message: T) -> Box<dyn error::Error> { |
| Box::<dyn error::Error>::from(message.to_string()) |
| } |
| } |
| |
| /// A trait that defines how results from searchers are handled. |
| /// |
| /// In this crate, a searcher follows the "push" model. What that means is that |
| /// the searcher drives execution, and pushes results back to the caller. This |
| /// is in contrast to a "pull" model where the caller drives execution and |
| /// takes results as they need them. These are also known as "internal" and |
| /// "external" iteration strategies, respectively. |
| /// |
| /// For a variety of reasons, including the complexity of the searcher |
| /// implementation, this crate chooses the "push" or "internal" model of |
| /// execution. Thus, in order to act on search results, callers must provide |
| /// an implementation of this trait to a searcher, and the searcher is then |
| /// responsible for calling the methods on this trait. |
| /// |
| /// This trait defines several behaviors: |
| /// |
| /// * What to do when a match is found. Callers must provide this. |
| /// * What to do when an error occurs. Callers must provide this via the |
| /// [`SinkError`](trait.SinkError.html) trait. Generally, callers can just |
| /// use `io::Error` for this, which already implements `SinkError`. |
| /// * What to do when a contextual line is found. By default, these are |
| /// ignored. |
| /// * What to do when a gap between contextual lines has been found. By |
| /// default, this is ignored. |
| /// * What to do when a search has started. By default, this does nothing. |
| /// * What to do when a search has finished successfully. By default, this does |
| /// nothing. |
| /// |
| /// Callers must, at minimum, specify the behavior when an error occurs and |
| /// the behavior when a match occurs. The rest is optional. For each behavior, |
| /// callers may report an error (say, if writing the result to another |
| /// location failed) or simply return `false` if they want the search to stop |
| /// (e.g., when implementing a cap on the number of search results to show). |
| /// |
| /// When errors are reported (whether in the searcher or in the implementation |
| /// of `Sink`), then searchers quit immediately without calling `finish`. |
| /// |
| /// For simpler uses of `Sink`, callers may elect to use one of |
| /// the more convenient but less flexible implementations in the |
| /// [`sinks`](sinks/index.html) module. |
| pub trait Sink { |
| /// The type of an error that should be reported by a searcher. |
| /// |
| /// Errors of this type are not only returned by the methods on this |
| /// trait, but the constructors defined in `SinkError` are also used in |
| /// the searcher implementation itself. e.g., When a I/O error occurs when |
| /// reading data from a file. |
| type Error: SinkError; |
| |
| /// This method is called whenever a match is found. |
| /// |
| /// If multi line is enabled on the searcher, then the match reported here |
| /// may span multiple lines and it may include multiple matches. When multi |
| /// line is disabled, then the match is guaranteed to span exactly one |
| /// non-empty line (where a single line is, at minimum, a line terminator). |
| /// |
| /// If this returns `true`, then searching continues. If this returns |
| /// `false`, then searching is stopped immediately and `finish` is called. |
| /// |
| /// If this returns an error, then searching is stopped immediately, |
| /// `finish` is not called and the error is bubbled back up to the caller |
| /// of the searcher. |
| fn matched( |
| &mut self, |
| _searcher: &Searcher, |
| _mat: &SinkMatch<'_>, |
| ) -> Result<bool, Self::Error>; |
| |
| /// This method is called whenever a context line is found, and is optional |
| /// to implement. By default, it does nothing and returns `true`. |
| /// |
| /// In all cases, the context given is guaranteed to span exactly one |
| /// non-empty line (where a single line is, at minimum, a line terminator). |
| /// |
| /// If this returns `true`, then searching continues. If this returns |
| /// `false`, then searching is stopped immediately and `finish` is called. |
| /// |
| /// If this returns an error, then searching is stopped immediately, |
| /// `finish` is not called and the error is bubbled back up to the caller |
| /// of the searcher. |
| #[inline] |
| fn context( |
| &mut self, |
| _searcher: &Searcher, |
| _context: &SinkContext<'_>, |
| ) -> Result<bool, Self::Error> { |
| Ok(true) |
| } |
| |
| /// This method is called whenever a break in contextual lines is found, |
| /// and is optional to implement. By default, it does nothing and returns |
| /// `true`. |
| /// |
| /// A break can only occur when context reporting is enabled (that is, |
| /// either or both of `before_context` or `after_context` are greater than |
| /// `0`). More precisely, a break occurs between non-contiguous groups of |
| /// lines. |
| /// |
| /// If this returns `true`, then searching continues. If this returns |
| /// `false`, then searching is stopped immediately and `finish` is called. |
| /// |
| /// If this returns an error, then searching is stopped immediately, |
| /// `finish` is not called and the error is bubbled back up to the caller |
| /// of the searcher. |
| #[inline] |
| fn context_break( |
| &mut self, |
| _searcher: &Searcher, |
| ) -> Result<bool, Self::Error> { |
| Ok(true) |
| } |
| |
| /// This method is called whenever binary detection is enabled and binary |
| /// data is found. If binary data is found, then this is called at least |
| /// once for the first occurrence with the absolute byte offset at which |
| /// the binary data begins. |
| /// |
| /// If this returns `true`, then searching continues. If this returns |
| /// `false`, then searching is stopped immediately and `finish` is called. |
| /// |
| /// If this returns an error, then searching is stopped immediately, |
| /// `finish` is not called and the error is bubbled back up to the caller |
| /// of the searcher. |
| /// |
| /// By default, it does nothing and returns `true`. |
| #[inline] |
| fn binary_data( |
| &mut self, |
| _searcher: &Searcher, |
| _binary_byte_offset: u64, |
| ) -> Result<bool, Self::Error> { |
| Ok(true) |
| } |
| |
| /// This method is called when a search has begun, before any search is |
| /// executed. By default, this does nothing. |
| /// |
| /// If this returns `true`, then searching continues. If this returns |
| /// `false`, then searching is stopped immediately and `finish` is called. |
| /// |
| /// If this returns an error, then searching is stopped immediately, |
| /// `finish` is not called and the error is bubbled back up to the caller |
| /// of the searcher. |
| #[inline] |
| fn begin(&mut self, _searcher: &Searcher) -> Result<bool, Self::Error> { |
| Ok(true) |
| } |
| |
| /// This method is called when a search has completed. By default, this |
| /// does nothing. |
| /// |
| /// If this returns an error, the error is bubbled back up to the caller of |
| /// the searcher. |
| #[inline] |
| fn finish( |
| &mut self, |
| _searcher: &Searcher, |
| _: &SinkFinish, |
| ) -> Result<(), Self::Error> { |
| Ok(()) |
| } |
| } |
| |
| impl<'a, S: Sink> Sink for &'a mut S { |
| type Error = S::Error; |
| |
| #[inline] |
| fn matched( |
| &mut self, |
| searcher: &Searcher, |
| mat: &SinkMatch<'_>, |
| ) -> Result<bool, S::Error> { |
| (**self).matched(searcher, mat) |
| } |
| |
| #[inline] |
| fn context( |
| &mut self, |
| searcher: &Searcher, |
| context: &SinkContext<'_>, |
| ) -> Result<bool, S::Error> { |
| (**self).context(searcher, context) |
| } |
| |
| #[inline] |
| fn context_break( |
| &mut self, |
| searcher: &Searcher, |
| ) -> Result<bool, S::Error> { |
| (**self).context_break(searcher) |
| } |
| |
| #[inline] |
| fn binary_data( |
| &mut self, |
| searcher: &Searcher, |
| binary_byte_offset: u64, |
| ) -> Result<bool, S::Error> { |
| (**self).binary_data(searcher, binary_byte_offset) |
| } |
| |
| #[inline] |
| fn begin(&mut self, searcher: &Searcher) -> Result<bool, S::Error> { |
| (**self).begin(searcher) |
| } |
| |
| #[inline] |
| fn finish( |
| &mut self, |
| searcher: &Searcher, |
| sink_finish: &SinkFinish, |
| ) -> Result<(), S::Error> { |
| (**self).finish(searcher, sink_finish) |
| } |
| } |
| |
| impl<S: Sink + ?Sized> Sink for Box<S> { |
| type Error = S::Error; |
| |
| #[inline] |
| fn matched( |
| &mut self, |
| searcher: &Searcher, |
| mat: &SinkMatch<'_>, |
| ) -> Result<bool, S::Error> { |
| (**self).matched(searcher, mat) |
| } |
| |
| #[inline] |
| fn context( |
| &mut self, |
| searcher: &Searcher, |
| context: &SinkContext<'_>, |
| ) -> Result<bool, S::Error> { |
| (**self).context(searcher, context) |
| } |
| |
| #[inline] |
| fn context_break( |
| &mut self, |
| searcher: &Searcher, |
| ) -> Result<bool, S::Error> { |
| (**self).context_break(searcher) |
| } |
| |
| #[inline] |
| fn binary_data( |
| &mut self, |
| searcher: &Searcher, |
| binary_byte_offset: u64, |
| ) -> Result<bool, S::Error> { |
| (**self).binary_data(searcher, binary_byte_offset) |
| } |
| |
| #[inline] |
| fn begin(&mut self, searcher: &Searcher) -> Result<bool, S::Error> { |
| (**self).begin(searcher) |
| } |
| |
| #[inline] |
| fn finish( |
| &mut self, |
| searcher: &Searcher, |
| sink_finish: &SinkFinish, |
| ) -> Result<(), S::Error> { |
| (**self).finish(searcher, sink_finish) |
| } |
| } |
| |
| /// Summary data reported at the end of a search. |
| /// |
| /// This reports data such as the total number of bytes searched and the |
| /// absolute offset of the first occurrence of binary data, if any were found. |
| /// |
| /// A searcher that stops early because of an error does not call `finish`. |
| /// A searcher that stops early because the `Sink` implementor instructed it |
| /// to will still call `finish`. |
| #[derive(Clone, Debug)] |
| pub struct SinkFinish { |
| pub(crate) byte_count: u64, |
| pub(crate) binary_byte_offset: Option<u64>, |
| } |
| |
| impl SinkFinish { |
| /// Return the total number of bytes searched. |
| #[inline] |
| pub fn byte_count(&self) -> u64 { |
| self.byte_count |
| } |
| |
| /// If binary detection is enabled and if binary data was found, then this |
| /// returns the absolute byte offset of the first detected byte of binary |
| /// data. |
| /// |
| /// Note that since this is an absolute byte offset, it cannot be relied |
| /// upon to index into any addressable memory. |
| #[inline] |
| pub fn binary_byte_offset(&self) -> Option<u64> { |
| self.binary_byte_offset |
| } |
| } |
| |
| /// A type that describes a match reported by a searcher. |
| #[derive(Clone, Debug)] |
| pub struct SinkMatch<'b> { |
| pub(crate) line_term: LineTerminator, |
| pub(crate) bytes: &'b [u8], |
| pub(crate) absolute_byte_offset: u64, |
| pub(crate) line_number: Option<u64>, |
| pub(crate) buffer: &'b [u8], |
| pub(crate) bytes_range_in_buffer: std::ops::Range<usize>, |
| } |
| |
| impl<'b> SinkMatch<'b> { |
| /// Returns the bytes for all matching lines, including the line |
| /// terminators, if they exist. |
| #[inline] |
| pub fn bytes(&self) -> &'b [u8] { |
| self.bytes |
| } |
| |
| /// Return an iterator over the lines in this match. |
| /// |
| /// If multi line search is enabled, then this may yield more than one |
| /// line (but always at least one line). If multi line search is disabled, |
| /// then this always reports exactly one line (but may consist of just |
| /// the line terminator). |
| /// |
| /// Lines yielded by this iterator include their terminators. |
| #[inline] |
| pub fn lines(&self) -> LineIter<'b> { |
| LineIter::new(self.line_term.as_byte(), self.bytes) |
| } |
| |
| /// Returns the absolute byte offset of the start of this match. This |
| /// offset is absolute in that it is relative to the very beginning of the |
| /// input in a search, and can never be relied upon to be a valid index |
| /// into an in-memory slice. |
| #[inline] |
| pub fn absolute_byte_offset(&self) -> u64 { |
| self.absolute_byte_offset |
| } |
| |
| /// Returns the line number of the first line in this match, if available. |
| /// |
| /// Line numbers are only available when the search builder is instructed |
| /// to compute them. |
| #[inline] |
| pub fn line_number(&self) -> Option<u64> { |
| self.line_number |
| } |
| |
| /// TODO |
| #[inline] |
| pub fn buffer(&self) -> &'b [u8] { |
| self.buffer |
| } |
| |
| /// TODO |
| #[inline] |
| pub fn bytes_range_in_buffer(&self) -> std::ops::Range<usize> { |
| self.bytes_range_in_buffer.clone() |
| } |
| } |
| |
| /// The type of context reported by a searcher. |
| #[derive(Clone, Debug, Eq, PartialEq)] |
| pub enum SinkContextKind { |
| /// The line reported occurred before a match. |
| Before, |
| /// The line reported occurred after a match. |
| After, |
| /// Any other type of context reported, e.g., as a result of a searcher's |
| /// "passthru" mode. |
| Other, |
| } |
| |
| /// A type that describes a contextual line reported by a searcher. |
| #[derive(Clone, Debug)] |
| pub struct SinkContext<'b> { |
| pub(crate) line_term: LineTerminator, |
| pub(crate) bytes: &'b [u8], |
| pub(crate) kind: SinkContextKind, |
| pub(crate) absolute_byte_offset: u64, |
| pub(crate) line_number: Option<u64>, |
| } |
| |
| impl<'b> SinkContext<'b> { |
| /// Returns the context bytes, including line terminators. |
| #[inline] |
| pub fn bytes(&self) -> &'b [u8] { |
| self.bytes |
| } |
| |
| /// Returns the type of context. |
| #[inline] |
| pub fn kind(&self) -> &SinkContextKind { |
| &self.kind |
| } |
| |
| /// Return an iterator over the lines in this match. |
| /// |
| /// This always yields exactly one line (and that one line may contain just |
| /// the line terminator). |
| /// |
| /// Lines yielded by this iterator include their terminators. |
| #[cfg(test)] |
| pub(crate) fn lines(&self) -> LineIter<'b> { |
| LineIter::new(self.line_term.as_byte(), self.bytes) |
| } |
| |
| /// Returns the absolute byte offset of the start of this context. This |
| /// offset is absolute in that it is relative to the very beginning of the |
| /// input in a search, and can never be relied upon to be a valid index |
| /// into an in-memory slice. |
| #[inline] |
| pub fn absolute_byte_offset(&self) -> u64 { |
| self.absolute_byte_offset |
| } |
| |
| /// Returns the line number of the first line in this context, if |
| /// available. |
| /// |
| /// Line numbers are only available when the search builder is instructed |
| /// to compute them. |
| #[inline] |
| pub fn line_number(&self) -> Option<u64> { |
| self.line_number |
| } |
| } |
| |
| /// A collection of convenience implementations of `Sink`. |
| /// |
| /// Each implementation in this module makes some kind of sacrifice in the name |
| /// of making common cases easier to use. Most frequently, each type is a |
| /// wrapper around a closure specified by the caller that provides limited |
| /// access to the full suite of information available to implementors of |
| /// `Sink`. |
| /// |
| /// For example, the `UTF8` sink makes the following sacrifices: |
| /// |
| /// * All matches must be UTF-8. An arbitrary `Sink` does not have this |
| /// restriction and can deal with arbitrary data. If this sink sees invalid |
| /// UTF-8, then an error is returned and searching stops. (Use the `Lossy` |
| /// sink instead to suppress this error.) |
| /// * The searcher must be configured to report line numbers. If it isn't, |
| /// an error is reported at the first match and searching stops. |
| /// * Context lines, context breaks and summary data reported at the end of |
| /// a search are all ignored. |
| /// * Implementors are forced to use `io::Error` as their error type. |
| /// |
| /// If you need more flexibility, then you're advised to implement the `Sink` |
| /// trait directly. |
| pub mod sinks { |
| use std::io; |
| use std::str; |
| |
| use super::{Sink, SinkError, SinkMatch}; |
| use crate::searcher::Searcher; |
| |
| /// A sink that provides line numbers and matches as strings while ignoring |
| /// everything else. |
| /// |
| /// This implementation will return an error if a match contains invalid |
| /// UTF-8 or if the searcher was not configured to count lines. Errors |
| /// on invalid UTF-8 can be suppressed by using the `Lossy` sink instead |
| /// of this one. |
| /// |
| /// The closure accepts two parameters: a line number and a UTF-8 string |
| /// containing the matched data. The closure returns a |
| /// `Result<bool, io::Error>`. If the `bool` is `false`, then the search |
| /// stops immediately. Otherwise, searching continues. |
| /// |
| /// If multi line mode was enabled, the line number refers to the line |
| /// number of the first line in the match. |
| #[derive(Clone, Debug)] |
| pub struct UTF8<F>(pub F) |
| where |
| F: FnMut(u64, &str) -> Result<bool, io::Error>; |
| |
| impl<F> Sink for UTF8<F> |
| where |
| F: FnMut(u64, &str) -> Result<bool, io::Error>, |
| { |
| type Error = io::Error; |
| |
| fn matched( |
| &mut self, |
| _searcher: &Searcher, |
| mat: &SinkMatch<'_>, |
| ) -> Result<bool, io::Error> { |
| let matched = match str::from_utf8(mat.bytes()) { |
| Ok(matched) => matched, |
| Err(err) => return Err(io::Error::error_message(err)), |
| }; |
| let line_number = match mat.line_number() { |
| Some(line_number) => line_number, |
| None => { |
| let msg = "line numbers not enabled"; |
| return Err(io::Error::error_message(msg)); |
| } |
| }; |
| (self.0)(line_number, &matched) |
| } |
| } |
| |
| /// A sink that provides line numbers and matches as (lossily converted) |
| /// strings while ignoring everything else. |
| /// |
| /// This is like `UTF8`, except that if a match contains invalid UTF-8, |
| /// then it will be lossily converted to valid UTF-8 by substituting |
| /// invalid UTF-8 with Unicode replacement characters. |
| /// |
| /// This implementation will return an error on the first match if the |
| /// searcher was not configured to count lines. |
| /// |
| /// The closure accepts two parameters: a line number and a UTF-8 string |
| /// containing the matched data. The closure returns a |
| /// `Result<bool, io::Error>`. If the `bool` is `false`, then the search |
| /// stops immediately. Otherwise, searching continues. |
| /// |
| /// If multi line mode was enabled, the line number refers to the line |
| /// number of the first line in the match. |
| #[derive(Clone, Debug)] |
| pub struct Lossy<F>(pub F) |
| where |
| F: FnMut(u64, &str) -> Result<bool, io::Error>; |
| |
| impl<F> Sink for Lossy<F> |
| where |
| F: FnMut(u64, &str) -> Result<bool, io::Error>, |
| { |
| type Error = io::Error; |
| |
| fn matched( |
| &mut self, |
| _searcher: &Searcher, |
| mat: &SinkMatch<'_>, |
| ) -> Result<bool, io::Error> { |
| use std::borrow::Cow; |
| |
| let matched = match str::from_utf8(mat.bytes()) { |
| Ok(matched) => Cow::Borrowed(matched), |
| // TODO: In theory, it should be possible to amortize |
| // allocation here, but `std` doesn't provide such an API. |
| // Regardless, this only happens on matches with invalid UTF-8, |
| // which should be pretty rare. |
| Err(_) => String::from_utf8_lossy(mat.bytes()), |
| }; |
| let line_number = match mat.line_number() { |
| Some(line_number) => line_number, |
| None => { |
| let msg = "line numbers not enabled"; |
| return Err(io::Error::error_message(msg)); |
| } |
| }; |
| (self.0)(line_number, &matched) |
| } |
| } |
| |
| /// A sink that provides line numbers and matches as raw bytes while |
| /// ignoring everything else. |
| /// |
| /// This implementation will return an error on the first match if the |
| /// searcher was not configured to count lines. |
| /// |
| /// The closure accepts two parameters: a line number and a raw byte string |
| /// containing the matched data. The closure returns a `Result<bool, |
| /// io::Error>`. If the `bool` is `false`, then the search stops |
| /// immediately. Otherwise, searching continues. |
| /// |
| /// If multi line mode was enabled, the line number refers to the line |
| /// number of the first line in the match. |
| #[derive(Clone, Debug)] |
| pub struct Bytes<F>(pub F) |
| where |
| F: FnMut(u64, &[u8]) -> Result<bool, io::Error>; |
| |
| impl<F> Sink for Bytes<F> |
| where |
| F: FnMut(u64, &[u8]) -> Result<bool, io::Error>, |
| { |
| type Error = io::Error; |
| |
| fn matched( |
| &mut self, |
| _searcher: &Searcher, |
| mat: &SinkMatch<'_>, |
| ) -> Result<bool, io::Error> { |
| let line_number = match mat.line_number() { |
| Some(line_number) => line_number, |
| None => { |
| let msg = "line numbers not enabled"; |
| return Err(io::Error::error_message(msg)); |
| } |
| }; |
| (self.0)(line_number, mat.bytes()) |
| } |
| } |
| } |