| //! Word splitting functionality. |
| //! |
| //! To wrap text into lines, long words sometimes need to be split |
| //! across lines. The [`WordSplitter`] enum defines this |
| //! functionality. |
| |
| use crate::core::{display_width, Word}; |
| |
| /// The `WordSplitter` enum describes where words can be split. |
| /// |
| /// If the textwrap crate has been compiled with the `hyphenation` |
| /// Cargo feature enabled, you will find a |
| /// [`WordSplitter::Hyphenation`] variant. Use this struct for |
| /// language-aware hyphenation: |
| /// |
| /// ``` |
| /// #[cfg(feature = "hyphenation")] { |
| /// use hyphenation::{Language, Load, Standard}; |
| /// use textwrap::{wrap, Options, WordSplitter}; |
| /// |
| /// let text = "Oxidation is the loss of electrons."; |
| /// let dictionary = Standard::from_embedded(Language::EnglishUS).unwrap(); |
| /// let options = Options::new(8).word_splitter(WordSplitter::Hyphenation(dictionary)); |
| /// assert_eq!(wrap(text, &options), vec!["Oxida-", |
| /// "tion is", |
| /// "the loss", |
| /// "of elec-", |
| /// "trons."]); |
| /// } |
| /// ``` |
| /// |
| /// Please see the documentation for the [hyphenation] crate for more |
| /// details. |
| /// |
| /// [hyphenation]: https://docs.rs/hyphenation/ |
| #[derive(Clone)] |
| pub enum WordSplitter { |
| /// Use this as a [`Options.word_splitter`] to avoid any kind of |
| /// hyphenation: |
| /// |
| /// ``` |
| /// use textwrap::{wrap, Options, WordSplitter}; |
| /// |
| /// let options = Options::new(8).word_splitter(WordSplitter::NoHyphenation); |
| /// assert_eq!(wrap("foo bar-baz", &options), |
| /// vec!["foo", "bar-baz"]); |
| /// ``` |
| /// |
| /// [`Options.word_splitter`]: super::Options::word_splitter |
| NoHyphenation, |
| |
| /// `HyphenSplitter` is the default `WordSplitter` used by |
| /// [`Options::new`](super::Options::new). It will split words on |
| /// existing hyphens in the word. |
| /// |
| /// It will only use hyphens that are surrounded by alphanumeric |
| /// characters, which prevents a word like `"--foo-bar"` from |
| /// being split into `"--"` and `"foo-bar"`. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use textwrap::WordSplitter; |
| /// |
| /// assert_eq!(WordSplitter::HyphenSplitter.split_points("--foo-bar"), |
| /// vec![6]); |
| /// ``` |
| HyphenSplitter, |
| |
| /// Use a custom function as the word splitter. |
| /// |
| /// This variant lets you implement a custom word splitter using |
| /// your own function. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use textwrap::WordSplitter; |
| /// |
| /// fn split_at_underscore(word: &str) -> Vec<usize> { |
| /// word.match_indices('_').map(|(idx, _)| idx + 1).collect() |
| /// } |
| /// |
| /// let word_splitter = WordSplitter::Custom(split_at_underscore); |
| /// assert_eq!(word_splitter.split_points("a_long_identifier"), |
| /// vec![2, 7]); |
| /// ``` |
| Custom(fn(word: &str) -> Vec<usize>), |
| |
| /// A hyphenation dictionary can be used to do language-specific |
| /// hyphenation using patterns from the [hyphenation] crate. |
| /// |
| /// **Note:** Only available when the `hyphenation` Cargo feature is |
| /// enabled. |
| /// |
| /// [hyphenation]: https://docs.rs/hyphenation/ |
| #[cfg(feature = "hyphenation")] |
| Hyphenation(hyphenation::Standard), |
| } |
| |
| impl std::fmt::Debug for WordSplitter { |
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
| match self { |
| WordSplitter::NoHyphenation => f.write_str("NoHyphenation"), |
| WordSplitter::HyphenSplitter => f.write_str("HyphenSplitter"), |
| WordSplitter::Custom(_) => f.write_str("Custom(...)"), |
| #[cfg(feature = "hyphenation")] |
| WordSplitter::Hyphenation(dict) => write!(f, "Hyphenation({})", dict.language()), |
| } |
| } |
| } |
| |
| impl PartialEq<WordSplitter> for WordSplitter { |
| fn eq(&self, other: &WordSplitter) -> bool { |
| match (self, other) { |
| (WordSplitter::NoHyphenation, WordSplitter::NoHyphenation) => true, |
| (WordSplitter::HyphenSplitter, WordSplitter::HyphenSplitter) => true, |
| #[cfg(feature = "hyphenation")] |
| (WordSplitter::Hyphenation(this_dict), WordSplitter::Hyphenation(other_dict)) => { |
| this_dict.language() == other_dict.language() |
| } |
| (_, _) => false, |
| } |
| } |
| } |
| |
| impl WordSplitter { |
| /// Return all possible indices where `word` can be split. |
| /// |
| /// The indices are in the range `0..word.len()`. They point to |
| /// the index _after_ the split point, i.e., after `-` if |
| /// splitting on hyphens. This way, `word.split_at(idx)` will |
| /// break the word into two well-formed pieces. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use textwrap::WordSplitter; |
| /// assert_eq!(WordSplitter::NoHyphenation.split_points("cannot-be-split"), vec![]); |
| /// assert_eq!(WordSplitter::HyphenSplitter.split_points("can-be-split"), vec![4, 7]); |
| /// assert_eq!(WordSplitter::Custom(|word| vec![word.len()/2]).split_points("middle"), vec![3]); |
| /// ``` |
| pub fn split_points(&self, word: &str) -> Vec<usize> { |
| match self { |
| WordSplitter::NoHyphenation => Vec::new(), |
| WordSplitter::HyphenSplitter => { |
| let mut splits = Vec::new(); |
| |
| for (idx, _) in word.match_indices('-') { |
| // We only use hyphens that are surrounded by alphanumeric |
| // characters. This is to avoid splitting on repeated hyphens, |
| // such as those found in --foo-bar. |
| let prev = word[..idx].chars().next_back(); |
| let next = word[idx + 1..].chars().next(); |
| |
| if prev.filter(|ch| ch.is_alphanumeric()).is_some() |
| && next.filter(|ch| ch.is_alphanumeric()).is_some() |
| { |
| splits.push(idx + 1); // +1 due to width of '-'. |
| } |
| } |
| |
| splits |
| } |
| WordSplitter::Custom(splitter_func) => splitter_func(word), |
| #[cfg(feature = "hyphenation")] |
| WordSplitter::Hyphenation(dictionary) => { |
| use hyphenation::Hyphenator; |
| dictionary.hyphenate(word).breaks |
| } |
| } |
| } |
| } |
| |
| /// Split words into smaller words according to the split points given |
| /// by `word_splitter`. |
| /// |
| /// Note that we split all words, regardless of their length. This is |
| /// to more cleanly separate the business of splitting (including |
| /// automatic hyphenation) from the business of word wrapping. |
| pub fn split_words<'a, I>( |
| words: I, |
| word_splitter: &'a WordSplitter, |
| ) -> impl Iterator<Item = Word<'a>> |
| where |
| I: IntoIterator<Item = Word<'a>>, |
| { |
| words.into_iter().flat_map(move |word| { |
| let mut prev = 0; |
| let mut split_points = word_splitter.split_points(&word).into_iter(); |
| std::iter::from_fn(move || { |
| if let Some(idx) = split_points.next() { |
| let need_hyphen = !word[..idx].ends_with('-'); |
| let w = Word { |
| word: &word.word[prev..idx], |
| width: display_width(&word[prev..idx]), |
| whitespace: "", |
| penalty: if need_hyphen { "-" } else { "" }, |
| }; |
| prev = idx; |
| return Some(w); |
| } |
| |
| if prev < word.word.len() || prev == 0 { |
| let w = Word { |
| word: &word.word[prev..], |
| width: display_width(&word[prev..]), |
| whitespace: word.whitespace, |
| penalty: word.penalty, |
| }; |
| prev = word.word.len() + 1; |
| return Some(w); |
| } |
| |
| None |
| }) |
| }) |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| |
| // Like assert_eq!, but the left expression is an iterator. |
| macro_rules! assert_iter_eq { |
| ($left:expr, $right:expr) => { |
| assert_eq!($left.collect::<Vec<_>>(), $right); |
| }; |
| } |
| |
| #[test] |
| fn split_words_no_words() { |
| assert_iter_eq!(split_words(vec![], &WordSplitter::HyphenSplitter), vec![]); |
| } |
| |
| #[test] |
| fn split_words_empty_word() { |
| assert_iter_eq!( |
| split_words(vec![Word::from(" ")], &WordSplitter::HyphenSplitter), |
| vec![Word::from(" ")] |
| ); |
| } |
| |
| #[test] |
| fn split_words_single_word() { |
| assert_iter_eq!( |
| split_words(vec![Word::from("foobar")], &WordSplitter::HyphenSplitter), |
| vec![Word::from("foobar")] |
| ); |
| } |
| |
| #[test] |
| fn split_words_hyphen_splitter() { |
| assert_iter_eq!( |
| split_words(vec![Word::from("foo-bar")], &WordSplitter::HyphenSplitter), |
| vec![Word::from("foo-"), Word::from("bar")] |
| ); |
| } |
| |
| #[test] |
| fn split_words_no_hyphenation() { |
| assert_iter_eq!( |
| split_words(vec![Word::from("foo-bar")], &WordSplitter::NoHyphenation), |
| vec![Word::from("foo-bar")] |
| ); |
| } |
| |
| #[test] |
| fn split_words_adds_penalty() { |
| let fixed_split_point = |_: &str| vec![3]; |
| |
| assert_iter_eq!( |
| split_words( |
| vec![Word::from("foobar")].into_iter(), |
| &WordSplitter::Custom(fixed_split_point) |
| ), |
| vec![ |
| Word { |
| word: "foo", |
| width: 3, |
| whitespace: "", |
| penalty: "-" |
| }, |
| Word { |
| word: "bar", |
| width: 3, |
| whitespace: "", |
| penalty: "" |
| } |
| ] |
| ); |
| |
| assert_iter_eq!( |
| split_words( |
| vec![Word::from("fo-bar")].into_iter(), |
| &WordSplitter::Custom(fixed_split_point) |
| ), |
| vec![ |
| Word { |
| word: "fo-", |
| width: 3, |
| whitespace: "", |
| penalty: "" |
| }, |
| Word { |
| word: "bar", |
| width: 3, |
| whitespace: "", |
| penalty: "" |
| } |
| ] |
| ); |
| } |
| } |