| //! Word splitting functionality. |
| //! |
| //! To wrap text into lines, long words sometimes need to be split |
| //! across lines. The [`WordSplitter`] trait defines this |
| //! functionality. [`HyphenSplitter`] is the default implementation of |
| //! this treat: it will simply split words on existing hyphens. |
| |
| #[cfg(feature = "hyphenation")] |
| use hyphenation::{Hyphenator, Standard}; |
| |
| /// An interface for splitting words. |
| /// |
| /// When the [`wrap_iter`] method will try to fit text into a line, it |
| /// will eventually find a word that it too large the current text |
| /// width. It will then call the currently configured `WordSplitter` to |
| /// have it attempt to split the word into smaller parts. This trait |
| /// describes that functionality via the [`split`] method. |
| /// |
| /// If the `textwrap` crate has been compiled with the `hyphenation` |
| /// feature enabled, you will find an implementation of `WordSplitter` |
| /// by the `hyphenation::language::Corpus` struct. Use this struct for |
| /// language-aware hyphenation. See the [`hyphenation` documentation] |
| /// for details. |
| /// |
| /// [`wrap_iter`]: ../struct.Wrapper.html#method.wrap_iter |
| /// [`split`]: #tymethod.split |
| /// [`hyphenation` documentation]: https://docs.rs/hyphenation/ |
| pub trait WordSplitter { |
| /// Return all possible splits of word. Each split is a triple |
| /// with a head, a hyphen, and a tail where `head + &hyphen + |
| /// &tail == word`. The hyphen can be empty if there is already a |
| /// hyphen in the head. |
| /// |
| /// The splits should go from smallest to longest and should |
| /// include no split at all. So the word "technology" could be |
| /// split into |
| /// |
| /// ```no_run |
| /// vec![("tech", "-", "nology"), |
| /// ("technol", "-", "ogy"), |
| /// ("technolo", "-", "gy"), |
| /// ("technology", "", "")]; |
| /// ``` |
| fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)>; |
| } |
| |
| /// Use this as a [`Wrapper.splitter`] to avoid any kind of |
| /// hyphenation: |
| /// |
| /// ``` |
| /// use textwrap::{Wrapper, NoHyphenation}; |
| /// |
| /// let wrapper = Wrapper::with_splitter(8, NoHyphenation); |
| /// assert_eq!(wrapper.wrap("foo bar-baz"), vec!["foo", "bar-baz"]); |
| /// ``` |
| /// |
| /// [`Wrapper.splitter`]: ../struct.Wrapper.html#structfield.splitter |
| #[derive(Clone, Debug)] |
| pub struct NoHyphenation; |
| |
| /// `NoHyphenation` implements `WordSplitter` by not splitting the |
| /// word at all. |
| impl WordSplitter for NoHyphenation { |
| fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { |
| vec![(word, "", "")] |
| } |
| } |
| |
| /// Simple and default way to split words: splitting on existing |
| /// hyphens only. |
| /// |
| /// You probably don't need to use this type since it's already used |
| /// by default by `Wrapper::new`. |
| #[derive(Clone, Debug)] |
| pub struct HyphenSplitter; |
| |
| /// `HyphenSplitter` is the default `WordSplitter` used by |
| /// `Wrapper::new`. It will split words on any existing hyphens in the |
| /// word. |
| /// |
| /// It will only use hyphens that are surrounded by alphanumeric |
| /// characters, which prevents a word like "--foo-bar" from being |
| /// split on the first or second hyphen. |
| impl WordSplitter for HyphenSplitter { |
| fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { |
| let mut triples = Vec::new(); |
| // Split on hyphens, smallest split first. We only use hyphens |
| // that are surrounded by alphanumeric characters. This is to |
| // avoid splitting on repeated hyphens, such as those found in |
| // --foo-bar. |
| let mut char_indices = word.char_indices(); |
| // Early return if the word is empty. |
| let mut prev = match char_indices.next() { |
| None => return vec![(word, "", "")], |
| Some((_, ch)) => ch, |
| }; |
| |
| // Find current word, or return early if the word only has a |
| // single character. |
| let (mut idx, mut cur) = match char_indices.next() { |
| None => return vec![(word, "", "")], |
| Some((idx, cur)) => (idx, cur), |
| }; |
| |
| for (i, next) in char_indices { |
| if prev.is_alphanumeric() && cur == '-' && next.is_alphanumeric() { |
| let (head, tail) = word.split_at(idx + 1); |
| triples.push((head, "", tail)); |
| } |
| prev = cur; |
| idx = i; |
| cur = next; |
| } |
| |
| // Finally option is no split at all. |
| triples.push((word, "", "")); |
| |
| triples |
| } |
| } |
| |
| /// A hyphenation dictionary can be used to do language-specific |
| /// hyphenation using patterns from the hyphenation crate. |
| #[cfg(feature = "hyphenation")] |
| impl WordSplitter for Standard { |
| fn split<'w>(&self, word: &'w str) -> Vec<(&'w str, &'w str, &'w str)> { |
| // Find splits based on language dictionary. |
| let mut triples = Vec::new(); |
| for n in self.hyphenate(word).breaks { |
| let (head, tail) = word.split_at(n); |
| let hyphen = if head.ends_with('-') { "" } else { "-" }; |
| triples.push((head, hyphen, tail)); |
| } |
| // Finally option is no split at all. |
| triples.push((word, "", "")); |
| |
| triples |
| } |
| } |