blob: dc0e317efdbab2c0ea50b67b1a387d61fcad3570 [file] [log] [blame]
// Copyright 2015-2017 Daniel P. Clark & array_tool Developers
//
// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
// http://opensource.org/licenses/MIT>, at your option. This file may not be
// copied, modified, or distributed except according to those terms.
/// A grapheme iterator that produces the bytes for each grapheme.
#[derive(Debug)]
pub struct GraphemeBytesIter<'a> {
source: &'a str,
offset: usize,
grapheme_count: usize,
}
impl<'a> GraphemeBytesIter<'a> {
/// Creates a new grapheme iterator from a string source.
pub fn new(source: &'a str) -> GraphemeBytesIter<'a> {
GraphemeBytesIter {
source: source,
offset: 0,
grapheme_count: 0,
}
}
}
impl<'a> Iterator for GraphemeBytesIter<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<&'a [u8]> {
let mut result: Option<&[u8]> = None;
let mut idx = self.offset;
for _ in self.offset..self.source.len() {
idx += 1;
if self.offset < self.source.len() {
if self.source.is_char_boundary(idx) {
let slice: &[u8] = self.source[self.offset..idx].as_bytes();
self.grapheme_count += 1;
self.offset = idx;
result = Some(slice);
break
}
}
}
result
}
}
impl<'a> ExactSizeIterator for GraphemeBytesIter<'a> {
fn len(&self) -> usize {
self.source.chars().count()
}
}
/// ToGraphemeBytesIter - create an iterator to return bytes for each grapheme in a string.
pub trait ToGraphemeBytesIter<'a> {
/// Returns a GraphemeBytesIter which you may iterate over.
///
/// # Example
/// ```
/// use array_tool::string::ToGraphemeBytesIter;
///
/// let string = "a s—d féZ";
/// let mut graphemes = string.grapheme_bytes_iter();
/// graphemes.skip(3).next();
/// ```
///
/// # Output
/// ```text
/// [226, 128, 148]
/// ```
fn grapheme_bytes_iter(&'a self) -> GraphemeBytesIter<'a>;
}
impl<'a> ToGraphemeBytesIter<'a> for str {
fn grapheme_bytes_iter(&'a self) -> GraphemeBytesIter<'a> {
GraphemeBytesIter::new(&self)
}
}
/// Squeeze - squeezes duplicate characters down to one each
pub trait Squeeze {
/// # Example
/// ```
/// use array_tool::string::Squeeze;
///
/// "yellow moon".squeeze("");
/// ```
///
/// # Output
/// ```text
/// "yelow mon"
/// ```
fn squeeze(&self, targets: &'static str) -> String;
}
impl Squeeze for str {
fn squeeze(&self, targets: &'static str) -> String {
let mut output = Vec::<u8>::with_capacity(self.len());
let everything: bool = targets.is_empty();
let chars = targets.grapheme_bytes_iter().collect::<Vec<&[u8]>>();
let mut last: &[u8] = &[0];
for character in self.grapheme_bytes_iter() {
if last != character {
output.extend_from_slice(character);
} else if !(everything || chars.contains(&character)) {
output.extend_from_slice(character);
}
last = character;
}
String::from_utf8(output).expect("squeeze failed to render String!")
}
}
/// Justify - expand line to given width.
pub trait Justify {
/// # Example
/// ```
/// use array_tool::string::Justify;
///
/// "asd asdf asd".justify_line(14);
/// ```
///
/// # Output
/// ```text
/// "asd asdf asd"
/// ```
fn justify_line(&self, width: usize) -> String;
}
impl Justify for str {
fn justify_line(&self, width: usize) -> String {
if self.is_empty() { return format!("{}", self) };
let trimmed = self.trim() ;
let len = trimmed.chars().count();
if len >= width { return self.to_string(); };
let difference = width - len;
let iter = trimmed.split_whitespace();
let spaces = iter.count() - 1;
let mut iter = trimmed.split_whitespace().peekable();
if spaces == 0 { return self.to_string(); }
let mut obj = String::with_capacity(trimmed.len() + spaces);
let div = difference / spaces;
let mut remainder = difference % spaces;
while let Some(x) = iter.next() {
obj.push_str( x );
let val = if remainder > 0 {
remainder = remainder - 1;
div + 1
} else { div };
for _ in 0..val+1 {
if let Some(_) = iter.peek() { // Don't add spaces if last word
obj.push_str( " " );
}
}
}
obj
}
}
/// Substitute string character for each index given.
pub trait SubstMarks {
/// # Example
/// ```
/// use array_tool::string::SubstMarks;
///
/// "asdf asdf asdf".subst_marks(vec![0,5,8], "Z");
/// ```
///
/// # Output
/// ```text
/// "Zsdf ZsdZ asdf"
/// ```
fn subst_marks(&self, marks: Vec<usize>, chr: &'static str) -> String;
}
impl SubstMarks for str {
fn subst_marks(&self, marks: Vec<usize>, chr: &'static str) -> String {
let mut output = Vec::<u8>::with_capacity(self.len());
let mut count = 0;
let mut last = 0;
for i in 0..self.len() {
let idx = i + 1;
if self.is_char_boundary(idx) {
if marks.contains(&count) {
count += 1;
last = idx;
output.extend_from_slice(chr.as_bytes());
continue
}
let slice: &[u8] = self[last..idx].as_bytes();
output.extend_from_slice(slice);
count += 1;
last = idx
}
}
String::from_utf8(output).expect("subst_marks failed to render String!")
}
}
/// After whitespace
pub trait AfterWhitespace {
/// Given offset method will seek from there to end of string to find the first
/// non white space. Resulting value is counted from offset.
///
/// # Example
/// ```
/// use array_tool::string::AfterWhitespace;
///
/// assert_eq!(
/// "asdf asdf asdf".seek_end_of_whitespace(6),
/// Some(9)
/// );
/// ```
fn seek_end_of_whitespace(&self, offset: usize) -> Option<usize>;
}
impl AfterWhitespace for str {
fn seek_end_of_whitespace(&self, offset: usize) -> Option<usize> {
if self.len() < offset { return None; };
let mut seeker = self[offset..self.len()].chars();
let mut val = None;
let mut indx = 0;
while let Some(x) = seeker.next() {
if x.ne(&" ".chars().next().unwrap()) {
val = Some(indx);
break;
}
indx += 1;
}
val
}
}
/// Word wrapping
pub trait WordWrap {
/// White space is treated as valid content and new lines will only be swapped in for
/// the last white space character at the end of the given width. White space may reach beyond
/// the width you've provided. You will need to trim end of lines in your own output (e.g.
/// splitting string at each new line and printing the line with trim_right). Or just trust
/// that lines that are beyond the width are just white space and only print the width -
/// ignoring tailing white space.
///
/// # Example
/// ```
/// use array_tool::string::WordWrap;
///
/// "asd asdf asd".word_wrap(8);
/// ```
///
/// # Output
/// ```text
/// "asd asdf\nasd"
/// ```
fn word_wrap(&self, width: usize) -> String;
}
// No need to worry about character encoding since we're only checking for the
// space and new line characters.
impl WordWrap for &'static str {
fn word_wrap(&self, width: usize) -> String {
let mut markers = vec![];
fn wordwrap(t: &'static str, chunk: usize, offset: usize, mrkrs: &mut Vec<usize>) -> String {
match t[offset..*vec![offset+chunk,t.len()].iter().min().unwrap()].rfind("\n") {
None => {
match t[offset..*vec![offset+chunk,t.len()].iter().min().unwrap()].rfind(" ") {
Some(x) => {
let mut eows = x; // end of white space
if offset+chunk < t.len() { // check if white space continues
match t.seek_end_of_whitespace(offset+x) {
Some(a) => {
if a.ne(&0) {
eows = x+a-1;
}
},
None => {},
}
}
if offset+chunk < t.len() { // safe to seek ahead by 1 or not end of string
if !["\n".chars().next().unwrap(), " ".chars().next().unwrap()].contains(
&t[offset+eows+1..offset+eows+2].chars().next().unwrap()
) {
mrkrs.push(offset+eows)
}
};
wordwrap(t, chunk, offset+eows+1, mrkrs)
},
None => {
if offset+chunk < t.len() { // String may continue
wordwrap(t, chunk, offset+1, mrkrs) // Recurse + 1 until next space
} else {
use string::SubstMarks;
return t.subst_marks(mrkrs.to_vec(), "\n")
}
},
}
},
Some(x) => {
wordwrap(t, chunk, offset+x+1, mrkrs)
},
}
};
wordwrap(self, width+1, 0, &mut markers)
}
}