| //! Building blocks for advanced wrapping functionality. |
| //! |
| //! The functions and structs in this module can be used to implement |
| //! advanced wrapping functionality when the [`wrap`](super::wrap) and |
| //! [`fill`](super::fill) function don't do what you want. |
| //! |
| //! In general, you want to follow these steps when wrapping |
| //! something: |
| //! |
| //! 1. Split your input into [`Fragment`]s. These are abstract blocks |
| //! of text or content which can be wrapped into lines. You can use |
| //! [`find_words`] to do this for text. |
| //! |
| //! 2. Potentially split your fragments into smaller pieces. This |
| //! allows you to implement things like hyphenation. If wrapping |
| //! text, [`split_words`] can help you do this. |
| //! |
| //! 3. Potentially break apart fragments that are still too large to |
| //! fit on a single line. This is implemented in [`break_words`]. |
| //! |
| //! 4. Finally take your fragments and put them into lines. There are |
| //! two algorithms for this: [`wrap_optimal_fit`] and |
| //! [`wrap_first_fit`]. The former produces better line breaks, the |
| //! latter is faster. |
| //! |
| //! 5. Iterate through the slices returned by the wrapping functions |
| //! and construct your lines of output. |
| //! |
| //! Please [open an issue](https://github.com/mgeisler/textwrap/) if |
| //! the functionality here is not sufficient or if you have ideas for |
| //! improving it. We would love to hear from you! |
| |
| use crate::{Options, WordSplitter}; |
| |
| #[cfg(feature = "smawk")] |
| mod optimal_fit; |
| #[cfg(feature = "smawk")] |
| pub use optimal_fit::wrap_optimal_fit; |
| |
| /// The CSI or “Control Sequence Introducer” introduces an ANSI escape |
| /// sequence. This is typically used for colored text and will be |
| /// ignored when computing the text width. |
| const CSI: (char, char) = ('\x1b', '['); |
| /// The final bytes of an ANSI escape sequence must be in this range. |
| const ANSI_FINAL_BYTE: std::ops::RangeInclusive<char> = '\x40'..='\x7e'; |
| |
| /// Skip ANSI escape sequences. The `ch` is the current `char`, the |
| /// `chars` provide the following characters. The `chars` will be |
| /// modified if `ch` is the start of an ANSI escape sequence. |
| #[inline] |
| fn skip_ansi_escape_sequence<I: Iterator<Item = char>>(ch: char, chars: &mut I) -> bool { |
| if ch == CSI.0 && chars.next() == Some(CSI.1) { |
| // We have found the start of an ANSI escape code, typically |
| // used for colored terminal text. We skip until we find a |
| // "final byte" in the range 0x40–0x7E. |
| for ch in chars { |
| if ANSI_FINAL_BYTE.contains(&ch) { |
| return true; |
| } |
| } |
| } |
| false |
| } |
| |
| #[cfg(feature = "unicode-width")] |
| #[inline] |
| fn ch_width(ch: char) -> usize { |
| unicode_width::UnicodeWidthChar::width(ch).unwrap_or(0) |
| } |
| |
| /// First character which [`ch_width`] will classify as double-width. |
| /// Please see [`display_width`]. |
| #[cfg(not(feature = "unicode-width"))] |
| const DOUBLE_WIDTH_CUTOFF: char = '\u{1100}'; |
| |
| #[cfg(not(feature = "unicode-width"))] |
| #[inline] |
| fn ch_width(ch: char) -> usize { |
| if ch < DOUBLE_WIDTH_CUTOFF { |
| 1 |
| } else { |
| 2 |
| } |
| } |
| |
| /// Compute the display width of `text` while skipping over ANSI |
| /// escape sequences. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use textwrap::core::display_width; |
| /// |
| /// assert_eq!(display_width("Café Plain"), 10); |
| /// assert_eq!(display_width("\u{1b}[31mCafé Rouge\u{1b}[0m"), 10); |
| /// ``` |
| /// |
| /// **Note:** When the `unicode-width` Cargo feature is disabled, the |
| /// width of a `char` is determined by a crude approximation which |
| /// simply counts chars below U+1100 as 1 column wide, and all other |
| /// characters as 2 columns wide. With the feature enabled, function |
| /// will correctly deal with [combining characters] in their |
| /// decomposed form (see [Unicode equivalence]). |
| /// |
| /// An example of a decomposed character is “é”, which can be |
| /// decomposed into: “e” followed by a combining acute accent: “◌́”. |
| /// Without the `unicode-width` Cargo feature, every `char` below |
| /// U+1100 has a width of 1. This includes the combining accent: |
| /// |
| /// ``` |
| /// use textwrap::core::display_width; |
| /// |
| /// assert_eq!(display_width("Cafe Plain"), 10); |
| /// #[cfg(feature = "unicode-width")] |
| /// assert_eq!(display_width("Cafe\u{301} Plain"), 10); |
| /// #[cfg(not(feature = "unicode-width"))] |
| /// assert_eq!(display_width("Cafe\u{301} Plain"), 11); |
| /// ``` |
| /// |
| /// ## Emojis and CJK Characters |
| /// |
| /// Characters such as emojis and [CJK characters] used in the |
| /// Chinese, Japanese, and Korean langauges are seen as double-width, |
| /// even if the `unicode-width` feature is disabled: |
| /// |
| /// ``` |
| /// use textwrap::core::display_width; |
| /// |
| /// assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20); |
| /// assert_eq!(display_width("你好"), 4); // “Nǐ hǎo” or “Hello” in Chinese |
| /// ``` |
| /// |
| /// # Limitations |
| /// |
| /// The displayed width of a string cannot always be computed from the |
| /// string alone. This is because the width depends on the rendering |
| /// engine used. This is particularly visible with [emoji modifier |
| /// sequences] where a base emoji is modified with, e.g., skin tone or |
| /// hair color modifiers. It is up to the rendering engine to detect |
| /// this and to produce a suitable emoji. |
| /// |
| /// A simple example is “❤️”, which consists of “❤” (U+2764: Black |
| /// Heart Symbol) followed by U+FE0F (Variation Selector-16). By |
| /// itself, “❤” is a black heart, but if you follow it with the |
| /// variant selector, you may get a wider red heart. |
| /// |
| /// A more complex example would be “👨🦰” which should depict a man |
| /// with red hair. Here the computed width is too large — and the |
| /// width differs depending on the use of the `unicode-width` feature: |
| /// |
| /// ``` |
| /// use textwrap::core::display_width; |
| /// |
| /// assert_eq!("👨🦰".chars().collect::<Vec<char>>(), ['\u{1f468}', '\u{200d}', '\u{1f9b0}']); |
| /// #[cfg(feature = "unicode-width")] |
| /// assert_eq!(display_width("👨🦰"), 4); |
| /// #[cfg(not(feature = "unicode-width"))] |
| /// assert_eq!(display_width("👨🦰"), 6); |
| /// ``` |
| /// |
| /// This happens because the grapheme consists of three code points: |
| /// “👨” (U+1F468: Man), Zero Width Joiner (U+200D), and “🦰” |
| /// (U+1F9B0: Red Hair). You can see them above in the test. With |
| /// `unicode-width` enabled, the ZWJ is correctly seen as having zero |
| /// width, without it is counted as a double-width character. |
| /// |
| /// ## Terminal Support |
| /// |
| /// Modern browsers typically do a great job at combining characters |
| /// as shown above, but terminals often struggle more. As an example, |
| /// Gnome Terminal version 3.38.1, shows “❤️” as a big red heart, but |
| /// shows "👨🦰" as “👨🦰”. |
| /// |
| /// [combining characters]: https://en.wikipedia.org/wiki/Combining_character |
| /// [Unicode equivalence]: https://en.wikipedia.org/wiki/Unicode_equivalence |
| /// [CJK characters]: https://en.wikipedia.org/wiki/CJK_characters |
| /// [emoji modifier sequences]: https://unicode.org/emoji/charts/full-emoji-modifiers.html |
| #[inline] |
| pub fn display_width(text: &str) -> usize { |
| let mut chars = text.chars(); |
| let mut width = 0; |
| while let Some(ch) = chars.next() { |
| if skip_ansi_escape_sequence(ch, &mut chars) { |
| continue; |
| } |
| width += ch_width(ch); |
| } |
| width |
| } |
| |
| /// A (text) fragment denotes the unit which we wrap into lines. |
| /// |
| /// Fragments represent an abstract _word_ plus the _whitespace_ |
| /// following the word. In case the word falls at the end of the line, |
| /// the whitespace is dropped and a so-called _penalty_ is inserted |
| /// instead (typically `"-"` if the word was hyphenated). |
| /// |
| /// For wrapping purposes, the precise content of the word, the |
| /// whitespace, and the penalty is irrelevant. All we need to know is |
| /// the displayed width of each part, which this trait provides. |
| pub trait Fragment: std::fmt::Debug { |
| /// Displayed width of word represented by this fragment. |
| fn width(&self) -> usize; |
| |
| /// Displayed width of the whitespace that must follow the word |
| /// when the word is not at the end of a line. |
| fn whitespace_width(&self) -> usize; |
| |
| /// Displayed width of the penalty that must be inserted if the |
| /// word falls at the end of a line. |
| fn penalty_width(&self) -> usize; |
| } |
| |
| /// A piece of wrappable text, including any trailing whitespace. |
| /// |
| /// A `Word` is an example of a [`Fragment`], so it has a width, |
| /// trailing whitespace, and potentially a penalty item. |
| #[derive(Debug, Copy, Clone, PartialEq, Eq)] |
| pub struct Word<'a> { |
| word: &'a str, |
| width: usize, |
| pub(crate) whitespace: &'a str, |
| pub(crate) penalty: &'a str, |
| } |
| |
| impl std::ops::Deref for Word<'_> { |
| type Target = str; |
| |
| fn deref(&self) -> &Self::Target { |
| self.word |
| } |
| } |
| |
| impl<'a> Word<'a> { |
| /// Construct a new `Word`. |
| /// |
| /// A trailing stretch of `' '` is automatically taken to be the |
| /// whitespace part of the word. |
| pub fn from(word: &str) -> Word<'_> { |
| let trimmed = word.trim_end_matches(' '); |
| Word { |
| word: trimmed, |
| width: display_width(&trimmed), |
| whitespace: &word[trimmed.len()..], |
| penalty: "", |
| } |
| } |
| |
| /// Break this word into smaller words with a width of at most |
| /// `line_width`. The whitespace and penalty from this `Word` is |
| /// added to the last piece. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use textwrap::core::Word; |
| /// assert_eq!( |
| /// Word::from("Hello! ").break_apart(3).collect::<Vec<_>>(), |
| /// vec![Word::from("Hel"), Word::from("lo! ")] |
| /// ); |
| /// ``` |
| pub fn break_apart<'b>(&'b self, line_width: usize) -> impl Iterator<Item = Word<'a>> + 'b { |
| let mut char_indices = self.word.char_indices(); |
| let mut offset = 0; |
| let mut width = 0; |
| |
| std::iter::from_fn(move || { |
| while let Some((idx, ch)) = char_indices.next() { |
| if skip_ansi_escape_sequence(ch, &mut char_indices.by_ref().map(|(_, ch)| ch)) { |
| continue; |
| } |
| |
| if width > 0 && width + ch_width(ch) > line_width { |
| let word = Word { |
| word: &self.word[offset..idx], |
| width: width, |
| whitespace: "", |
| penalty: "", |
| }; |
| offset = idx; |
| width = ch_width(ch); |
| return Some(word); |
| } |
| |
| width += ch_width(ch); |
| } |
| |
| if offset < self.word.len() { |
| let word = Word { |
| word: &self.word[offset..], |
| width: width, |
| whitespace: self.whitespace, |
| penalty: self.penalty, |
| }; |
| offset = self.word.len(); |
| return Some(word); |
| } |
| |
| None |
| }) |
| } |
| } |
| |
| impl Fragment for Word<'_> { |
| #[inline] |
| fn width(&self) -> usize { |
| self.width |
| } |
| |
| // We assume the whitespace consist of ' ' only. This allows us to |
| // compute the display width in constant time. |
| #[inline] |
| fn whitespace_width(&self) -> usize { |
| self.whitespace.len() |
| } |
| |
| // We assume the penalty is `""` or `"-"`. This allows us to |
| // compute the display width in constant time. |
| #[inline] |
| fn penalty_width(&self) -> usize { |
| self.penalty.len() |
| } |
| } |
| |
| /// Split line into words separated by regions of `' '` characters. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use textwrap::core::{find_words, Fragment, Word}; |
| /// let words = find_words("Hello World!").collect::<Vec<_>>(); |
| /// assert_eq!(words, vec![Word::from("Hello "), Word::from("World!")]); |
| /// assert_eq!(words[0].width(), 5); |
| /// assert_eq!(words[0].whitespace_width(), 1); |
| /// assert_eq!(words[0].penalty_width(), 0); |
| /// ``` |
| pub fn find_words(line: &str) -> impl Iterator<Item = Word> { |
| let mut start = 0; |
| let mut in_whitespace = false; |
| let mut char_indices = line.char_indices(); |
| |
| std::iter::from_fn(move || { |
| // for (idx, ch) in char_indices does not work, gives this |
| // error: |
| // |
| // > cannot move out of `char_indices`, a captured variable in |
| // > an `FnMut` closure |
| #[allow(clippy::while_let_on_iterator)] |
| while let Some((idx, ch)) = char_indices.next() { |
| if in_whitespace && ch != ' ' { |
| let word = Word::from(&line[start..idx]); |
| start = idx; |
| in_whitespace = ch == ' '; |
| return Some(word); |
| } |
| |
| in_whitespace = ch == ' '; |
| } |
| |
| if start < line.len() { |
| let word = Word::from(&line[start..]); |
| start = line.len(); |
| return Some(word); |
| } |
| |
| None |
| }) |
| } |
| |
| /// Split words into smaller words according to the split points given |
| /// by `options`. |
| /// |
| /// Note that we split all words, regardless of their length. This is |
| /// to more cleanly separate the business of splitting (including |
| /// automatic hyphenation) from the business of word wrapping. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use textwrap::core::{split_words, Word}; |
| /// use textwrap::{NoHyphenation, Options}; |
| /// |
| /// // The default splitter is HyphenSplitter: |
| /// let options = Options::new(80); |
| /// assert_eq!( |
| /// split_words(vec![Word::from("foo-bar")], &options).collect::<Vec<_>>(), |
| /// vec![Word::from("foo-"), Word::from("bar")] |
| /// ); |
| /// |
| /// // The NoHyphenation splitter ignores the '-': |
| /// let options = Options::new(80).splitter(NoHyphenation); |
| /// assert_eq!( |
| /// split_words(vec![Word::from("foo-bar")], &options).collect::<Vec<_>>(), |
| /// vec![Word::from("foo-bar")] |
| /// ); |
| /// ``` |
| pub fn split_words<'a, I, S, Opt>(words: I, options: Opt) -> impl Iterator<Item = Word<'a>> |
| where |
| I: IntoIterator<Item = Word<'a>>, |
| S: WordSplitter, |
| Opt: Into<Options<'a, S>>, |
| { |
| let options = options.into(); |
| |
| words.into_iter().flat_map(move |word| { |
| let mut prev = 0; |
| let mut split_points = options.splitter.split_points(&word).into_iter(); |
| std::iter::from_fn(move || { |
| if let Some(idx) = split_points.next() { |
| let need_hyphen = !word[..idx].ends_with('-'); |
| let w = Word { |
| word: &word.word[prev..idx], |
| width: display_width(&word[prev..idx]), |
| whitespace: "", |
| penalty: if need_hyphen { "-" } else { "" }, |
| }; |
| prev = idx; |
| return Some(w); |
| } |
| |
| if prev < word.word.len() || prev == 0 { |
| let w = Word { |
| word: &word.word[prev..], |
| width: display_width(&word[prev..]), |
| whitespace: word.whitespace, |
| penalty: word.penalty, |
| }; |
| prev = word.word.len() + 1; |
| return Some(w); |
| } |
| |
| None |
| }) |
| }) |
| } |
| |
| /// Forcibly break words wider than `line_width` into smaller words. |
| /// |
| /// This simply calls [`Word::break_apart`] on words that are too |
| /// wide. This means that no extra `'-'` is inserted, the word is |
| /// simply broken into smaller pieces. |
| pub fn break_words<'a, I>(words: I, line_width: usize) -> Vec<Word<'a>> |
| where |
| I: IntoIterator<Item = Word<'a>>, |
| { |
| let mut shortened_words = Vec::new(); |
| for word in words { |
| if word.width() > line_width { |
| shortened_words.extend(word.break_apart(line_width)); |
| } else { |
| shortened_words.push(word); |
| } |
| } |
| shortened_words |
| } |
| |
| /// Wrapping algorithms. |
| /// |
| /// After a text has been broken into [`Fragment`]s, the one now has |
| /// to decide how to break the fragments into lines. The simplest |
| /// algorithm for this is implemented by [`wrap_first_fit`]: it uses |
| /// no look-ahead and simply adds fragments to the line as long as |
| /// they fit. However, this can lead to poor line breaks if a large |
| /// fragment almost-but-not-quite fits on a line. When that happens, |
| /// the fragment is moved to the next line and it will leave behind a |
| /// large gap. A more advanced algorithm, implemented by |
| /// [`wrap_optimal_fit`], will take this into account. The optimal-fit |
| /// algorithm considers all possible line breaks and will attempt to |
| /// minimize the gaps left behind by overly short lines. |
| /// |
| /// While both algorithms run in linear time, the first-fit algorithm |
| /// is about 4 times faster than the optimal-fit algorithm. |
| #[derive(Debug, Copy, Clone, Eq, PartialEq)] |
| pub enum WrapAlgorithm { |
| /// Use an advanced algorithm which considers the entire paragraph |
| /// to find optimal line breaks. Implemented by |
| /// [`wrap_optimal_fit`]. |
| /// |
| /// **Note:** Only available when the `smawk` Cargo feature is |
| /// enabled. |
| #[cfg(feature = "smawk")] |
| OptimalFit, |
| /// Use a fast and simple algorithm with no look-ahead to find |
| /// line breaks. Implemented by [`wrap_first_fit`]. |
| FirstFit, |
| } |
| |
| /// Wrap abstract fragments into lines with a first-fit algorithm. |
| /// |
| /// The `line_widths` map line numbers (starting from 0) to a target |
| /// line width. This can be used to implement hanging indentation. |
| /// |
| /// The fragments must already have been split into the desired |
| /// widths, this function will not (and cannot) attempt to split them |
| /// further when arranging them into lines. |
| /// |
| /// # First-Fit Algorithm |
| /// |
| /// This implements a simple “greedy” algorithm: accumulate fragments |
| /// one by one and when a fragment no longer fits, start a new line. |
| /// There is no look-ahead, we simply take first fit of the fragments |
| /// we find. |
| /// |
| /// While fast and predictable, this algorithm can produce poor line |
| /// breaks when a long fragment is moved to a new line, leaving behind |
| /// a large gap: |
| /// |
| /// ``` |
| /// use textwrap::core::{find_words, wrap_first_fit, Word}; |
| /// |
| /// // Helper to convert wrapped lines to a Vec<String>. |
| /// fn lines_to_strings(lines: Vec<&[Word<'_>]>) -> Vec<String> { |
| /// lines.iter().map(|line| { |
| /// line.iter().map(|word| &**word).collect::<Vec<_>>().join(" ") |
| /// }).collect::<Vec<_>>() |
| /// } |
| /// |
| /// let text = "These few words will unfortunately not wrap nicely."; |
| /// let words = find_words(text).collect::<Vec<_>>(); |
| /// assert_eq!(lines_to_strings(wrap_first_fit(&words, |_| 15)), |
| /// vec!["These few words", |
| /// "will", // <-- short line |
| /// "unfortunately", |
| /// "not wrap", |
| /// "nicely."]); |
| /// |
| /// // We can avoid the short line if we look ahead: |
| /// #[cfg(feature = "smawk")] |
| /// assert_eq!(lines_to_strings(textwrap::core::wrap_optimal_fit(&words, |_| 15)), |
| /// vec!["These few", |
| /// "words will", |
| /// "unfortunately", |
| /// "not wrap", |
| /// "nicely."]); |
| /// ``` |
| /// |
| /// The [`wrap_optimal_fit`] function was used above to get better |
| /// line breaks. It uses an advanced algorithm which tries to avoid |
| /// short lines. This function is about 4 times faster than |
| /// [`wrap_optimal_fit`]. |
| /// |
| /// # Examples |
| /// |
| /// Imagine you're building a house site and you have a number of |
| /// tasks you need to execute. Things like pour foundation, complete |
| /// framing, install plumbing, electric cabling, install insulation. |
| /// |
| /// The construction workers can only work during daytime, so they |
| /// need to pack up everything at night. Because they need to secure |
| /// their tools and move machines back to the garage, this process |
| /// takes much more time than the time it would take them to simply |
| /// switch to another task. |
| /// |
| /// You would like to make a list of tasks to execute every day based |
| /// on your estimates. You can model this with a program like this: |
| /// |
| /// ``` |
| /// use textwrap::core::{wrap_first_fit, Fragment}; |
| /// |
| /// #[derive(Debug)] |
| /// struct Task<'a> { |
| /// name: &'a str, |
| /// hours: usize, // Time needed to complete task. |
| /// sweep: usize, // Time needed for a quick sweep after task during the day. |
| /// cleanup: usize, // Time needed for full cleanup if day ends with this task. |
| /// } |
| /// |
| /// impl Fragment for Task<'_> { |
| /// fn width(&self) -> usize { self.hours } |
| /// fn whitespace_width(&self) -> usize { self.sweep } |
| /// fn penalty_width(&self) -> usize { self.cleanup } |
| /// } |
| /// |
| /// // The morning tasks |
| /// let tasks = vec![ |
| /// Task { name: "Foundation", hours: 4, sweep: 2, cleanup: 3 }, |
| /// Task { name: "Framing", hours: 3, sweep: 1, cleanup: 2 }, |
| /// Task { name: "Plumbing", hours: 2, sweep: 2, cleanup: 2 }, |
| /// Task { name: "Electrical", hours: 2, sweep: 1, cleanup: 2 }, |
| /// Task { name: "Insulation", hours: 2, sweep: 1, cleanup: 2 }, |
| /// Task { name: "Drywall", hours: 3, sweep: 1, cleanup: 2 }, |
| /// Task { name: "Floors", hours: 3, sweep: 1, cleanup: 2 }, |
| /// Task { name: "Countertops", hours: 1, sweep: 1, cleanup: 2 }, |
| /// Task { name: "Bathrooms", hours: 2, sweep: 1, cleanup: 2 }, |
| /// ]; |
| /// |
| /// // Fill tasks into days, taking `day_length` into account. The |
| /// // output shows the hours worked per day along with the names of |
| /// // the tasks for that day. |
| /// fn assign_days<'a>(tasks: &[Task<'a>], day_length: usize) -> Vec<(usize, Vec<&'a str>)> { |
| /// let mut days = Vec::new(); |
| /// // Assign tasks to days. The assignment is a vector of slices, |
| /// // with a slice per day. |
| /// let assigned_days: Vec<&[Task<'a>]> = wrap_first_fit(&tasks, |i| day_length); |
| /// for day in assigned_days.iter() { |
| /// let last = day.last().unwrap(); |
| /// let work_hours: usize = day.iter().map(|t| t.hours + t.sweep).sum(); |
| /// let names = day.iter().map(|t| t.name).collect::<Vec<_>>(); |
| /// days.push((work_hours - last.sweep + last.cleanup, names)); |
| /// } |
| /// days |
| /// } |
| /// |
| /// // With a single crew working 8 hours a day: |
| /// assert_eq!( |
| /// assign_days(&tasks, 8), |
| /// [ |
| /// (7, vec!["Foundation"]), |
| /// (8, vec!["Framing", "Plumbing"]), |
| /// (7, vec!["Electrical", "Insulation"]), |
| /// (5, vec!["Drywall"]), |
| /// (7, vec!["Floors", "Countertops"]), |
| /// (4, vec!["Bathrooms"]), |
| /// ] |
| /// ); |
| /// |
| /// // With two crews working in shifts, 16 hours a day: |
| /// assert_eq!( |
| /// assign_days(&tasks, 16), |
| /// [ |
| /// (14, vec!["Foundation", "Framing", "Plumbing"]), |
| /// (15, vec!["Electrical", "Insulation", "Drywall", "Floors"]), |
| /// (6, vec!["Countertops", "Bathrooms"]), |
| /// ] |
| /// ); |
| /// ``` |
| /// |
| /// Apologies to anyone who actually knows how to build a house and |
| /// knows how long each step takes :-) |
| pub fn wrap_first_fit<T: Fragment, F: Fn(usize) -> usize>( |
| fragments: &[T], |
| line_widths: F, |
| ) -> Vec<&[T]> { |
| let mut lines = Vec::new(); |
| let mut start = 0; |
| let mut width = 0; |
| |
| for (idx, fragment) in fragments.iter().enumerate() { |
| let line_width = line_widths(lines.len()); |
| if width + fragment.width() + fragment.penalty_width() > line_width && idx > start { |
| lines.push(&fragments[start..idx]); |
| start = idx; |
| width = 0; |
| } |
| width += fragment.width() + fragment.whitespace_width(); |
| } |
| lines.push(&fragments[start..]); |
| lines |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| |
| #[cfg(feature = "unicode-width")] |
| use unicode_width::UnicodeWidthChar; |
| |
| // Like assert_eq!, but the left expression is an iterator. |
| macro_rules! assert_iter_eq { |
| ($left:expr, $right:expr) => { |
| assert_eq!($left.collect::<Vec<_>>(), $right); |
| }; |
| } |
| |
| #[test] |
| fn skip_ansi_escape_sequence_works() { |
| let blue_text = "\u{1b}[34mHello\u{1b}[0m"; |
| let mut chars = blue_text.chars(); |
| let ch = chars.next().unwrap(); |
| assert!(skip_ansi_escape_sequence(ch, &mut chars)); |
| assert_eq!(chars.next(), Some('H')); |
| } |
| |
| #[test] |
| fn emojis_have_correct_width() { |
| use unic_emoji_char::is_emoji; |
| |
| // Emojis in the Basic Latin (ASCII) and Latin-1 Supplement |
| // blocks all have a width of 1 column. This includes |
| // characters such as '#' and '©'. |
| for ch in '\u{1}'..'\u{FF}' { |
| if is_emoji(ch) { |
| let desc = format!("{:?} U+{:04X}", ch, ch as u32); |
| |
| #[cfg(feature = "unicode-width")] |
| assert_eq!(ch.width().unwrap(), 1, "char: {}", desc); |
| |
| #[cfg(not(feature = "unicode-width"))] |
| assert_eq!(ch_width(ch), 1, "char: {}", desc); |
| } |
| } |
| |
| // Emojis in the remaining blocks of the Basic Multilingual |
| // Plane (BMP), in the Supplementary Multilingual Plane (SMP), |
| // and in the Supplementary Ideographic Plane (SIP), are all 1 |
| // or 2 columns wide when unicode-width is used, and always 2 |
| // columns wide otherwise. This includes all of our favorite |
| // emojis such as 😊. |
| for ch in '\u{FF}'..'\u{2FFFF}' { |
| if is_emoji(ch) { |
| let desc = format!("{:?} U+{:04X}", ch, ch as u32); |
| |
| #[cfg(feature = "unicode-width")] |
| assert!(ch.width().unwrap() <= 2, "char: {}", desc); |
| |
| #[cfg(not(feature = "unicode-width"))] |
| assert_eq!(ch_width(ch), 2, "char: {}", desc); |
| } |
| } |
| |
| // The remaining planes contain almost no assigned code points |
| // and thus also no emojis. |
| } |
| |
| #[test] |
| fn display_width_works() { |
| assert_eq!("Café Plain".len(), 11); // “é” is two bytes |
| assert_eq!(display_width("Café Plain"), 10); |
| assert_eq!(display_width("\u{1b}[31mCafé Rouge\u{1b}[0m"), 10); |
| } |
| |
| #[test] |
| fn display_width_narrow_emojis() { |
| #[cfg(feature = "unicode-width")] |
| assert_eq!(display_width("⁉"), 1); |
| |
| // The ⁉ character is above DOUBLE_WIDTH_CUTOFF. |
| #[cfg(not(feature = "unicode-width"))] |
| assert_eq!(display_width("⁉"), 2); |
| } |
| |
| #[test] |
| fn display_width_narrow_emojis_variant_selector() { |
| #[cfg(feature = "unicode-width")] |
| assert_eq!(display_width("⁉\u{fe0f}"), 1); |
| |
| // The variant selector-16 is also counted. |
| #[cfg(not(feature = "unicode-width"))] |
| assert_eq!(display_width("⁉\u{fe0f}"), 4); |
| } |
| |
| #[test] |
| fn display_width_emojis() { |
| assert_eq!(display_width("😂😭🥺🤣✨😍🙏🥰😊🔥"), 20); |
| } |
| |
| #[test] |
| fn find_words_empty() { |
| assert_iter_eq!(find_words(""), vec![]); |
| } |
| |
| #[test] |
| fn find_words_single_word() { |
| assert_iter_eq!(find_words("foo"), vec![Word::from("foo")]); |
| } |
| |
| #[test] |
| fn find_words_two_words() { |
| assert_iter_eq!( |
| find_words("foo bar"), |
| vec![Word::from("foo "), Word::from("bar")] |
| ); |
| } |
| |
| #[test] |
| fn find_words_multiple_words() { |
| assert_iter_eq!( |
| find_words("foo bar baz"), |
| vec![Word::from("foo "), Word::from("bar "), Word::from("baz")] |
| ); |
| } |
| |
| #[test] |
| fn find_words_whitespace() { |
| assert_iter_eq!(find_words(" "), vec![Word::from(" ")]); |
| } |
| |
| #[test] |
| fn find_words_inter_word_whitespace() { |
| assert_iter_eq!( |
| find_words("foo bar"), |
| vec![Word::from("foo "), Word::from("bar")] |
| ) |
| } |
| |
| #[test] |
| fn find_words_trailing_whitespace() { |
| assert_iter_eq!(find_words("foo "), vec![Word::from("foo ")]); |
| } |
| |
| #[test] |
| fn find_words_leading_whitespace() { |
| assert_iter_eq!( |
| find_words(" foo"), |
| vec![Word::from(" "), Word::from("foo")] |
| ); |
| } |
| |
| #[test] |
| fn find_words_multi_column_char() { |
| assert_iter_eq!( |
| find_words("\u{1f920}"), // cowboy emoji 🤠 |
| vec![Word::from("\u{1f920}")] |
| ); |
| } |
| |
| #[test] |
| fn find_words_hyphens() { |
| assert_iter_eq!(find_words("foo-bar"), vec![Word::from("foo-bar")]); |
| assert_iter_eq!( |
| find_words("foo- bar"), |
| vec![Word::from("foo- "), Word::from("bar")] |
| ); |
| assert_iter_eq!( |
| find_words("foo - bar"), |
| vec![Word::from("foo "), Word::from("- "), Word::from("bar")] |
| ); |
| assert_iter_eq!( |
| find_words("foo -bar"), |
| vec![Word::from("foo "), Word::from("-bar")] |
| ); |
| } |
| |
| #[test] |
| fn split_words_no_words() { |
| assert_iter_eq!(split_words(vec![], 80), vec![]); |
| } |
| |
| #[test] |
| fn split_words_empty_word() { |
| assert_iter_eq!( |
| split_words(vec![Word::from(" ")], 80), |
| vec![Word::from(" ")] |
| ); |
| } |
| |
| #[test] |
| fn split_words_hyphen_splitter() { |
| assert_iter_eq!( |
| split_words(vec![Word::from("foo-bar")], 80), |
| vec![Word::from("foo-"), Word::from("bar")] |
| ); |
| } |
| |
| #[test] |
| fn split_words_short_line() { |
| // Note that `split_words` does not take the line width into |
| // account, that is the job of `break_words`. |
| assert_iter_eq!( |
| split_words(vec![Word::from("foobar")], 3), |
| vec![Word::from("foobar")] |
| ); |
| } |
| |
| #[test] |
| fn split_words_adds_penalty() { |
| #[derive(Debug)] |
| struct FixedSplitPoint; |
| impl WordSplitter for FixedSplitPoint { |
| fn split_points(&self, _: &str) -> Vec<usize> { |
| vec![3] |
| } |
| } |
| |
| let options = Options::new(80).splitter(FixedSplitPoint); |
| assert_iter_eq!( |
| split_words(vec![Word::from("foobar")].into_iter(), &options), |
| vec![ |
| Word { |
| word: "foo", |
| width: 3, |
| whitespace: "", |
| penalty: "-" |
| }, |
| Word { |
| word: "bar", |
| width: 3, |
| whitespace: "", |
| penalty: "" |
| } |
| ] |
| ); |
| |
| assert_iter_eq!( |
| split_words(vec![Word::from("fo-bar")].into_iter(), &options), |
| vec![ |
| Word { |
| word: "fo-", |
| width: 3, |
| whitespace: "", |
| penalty: "" |
| }, |
| Word { |
| word: "bar", |
| width: 3, |
| whitespace: "", |
| penalty: "" |
| } |
| ] |
| ); |
| } |
| } |