blob: 1f8fdc98960c10a5a67bf2f145f968b9b8f3f0b8 [file] [log] [blame] [edit]
//! Utilities for common diff related operations.
//!
//! This module provides specialized utilities and simplified diff operations
//! for common operations. It's useful when you want to work with text diffs
//! and you're interested in getting vectors of these changes directly.
//!
//! # Slice Remapping
//!
//! When working with [`TextDiff`] it's common that one takes advantage of the
//! built-in tokenization of the differ. This for instance lets you do
//! grapheme level diffs. This is implemented by the differ generating rather
//! small slices of strings and running a diff algorithm over them.
//!
//! The downside of this is that all the [`DiffOp`] objects produced by the
//! diffing algorithm encode operations on these rather small slices. For
//! a lot of use cases this is not what one wants which can make this very
//! inconvenient. This module provides a [`TextDiffRemapper`] which lets you
//! map from the ranges that the [`TextDiff`] returns to the original input
//! strings. For more information see [`TextDiffRemapper`].
//!
//! # Simple Diff Functions
//!
//! This module provides a range of common test diff functions that will
//! produce vectors of `(change_tag, value)` tuples. They will automatically
//! optimize towards returning the most useful slice that one would expect for
//! the type of diff performed.
use std::hash::Hash;
use std::ops::{Index, Range};
use crate::{
capture_diff_slices, Algorithm, ChangeTag, DiffOp, DiffableStr, DiffableStrRef, TextDiff,
};
struct SliceRemapper<'x, T: ?Sized> {
source: &'x T,
indexes: Vec<Range<usize>>,
}
impl<'x, 'slices, T: DiffableStr + ?Sized> SliceRemapper<'x, T> {
fn new(source: &'x T, slices: &[&'x T]) -> SliceRemapper<'x, T> {
let indexes = slices
.iter()
.scan(0, |state, item| {
let start = *state;
let end = start + item.len();
*state = end;
Some(start..end)
})
.collect();
SliceRemapper { source, indexes }
}
fn slice(&self, range: Range<usize>) -> Option<&'x T> {
let start = self.indexes.get(range.start)?.start;
let end = self.indexes.get(range.end - 1)?.end;
Some(self.source.slice(start..end))
}
}
impl<'x, T: DiffableStr + ?Sized> Index<Range<usize>> for SliceRemapper<'x, T> {
type Output = T;
fn index(&self, range: Range<usize>) -> &Self::Output {
self.slice(range).expect("out of bounds")
}
}
/// A remapper that can remap diff ops to the original slices.
///
/// The idea here is that when a [`TextDiff`](crate::TextDiff) is created from
/// two strings and the internal tokenization is used, this remapper can take
/// a range in the tokenized sequences and remap it to the original string.
/// This is particularly useful when you want to do things like character or
/// grapheme level diffs but you want to not have to iterate over small sequences
/// but large consequitive ones from the source.
///
/// ```rust
/// use similar::{ChangeTag, TextDiff};
/// use similar::utils::TextDiffRemapper;
///
/// let old = "yo! foo bar baz";
/// let new = "yo! foo bor baz";
/// let diff = TextDiff::from_words(old, new);
/// let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
/// let changes: Vec<_> = diff.ops()
/// .iter()
/// .flat_map(move |x| remapper.iter_slices(x))
/// .collect();
///
/// assert_eq!(changes, vec![
/// (ChangeTag::Equal, "yo! foo "),
/// (ChangeTag::Delete, "bar"),
/// (ChangeTag::Insert, "bor"),
/// (ChangeTag::Equal, " baz")
/// ]);
pub struct TextDiffRemapper<'x, T: ?Sized> {
old: SliceRemapper<'x, T>,
new: SliceRemapper<'x, T>,
}
impl<'x, 'slices, T: DiffableStr + ?Sized> TextDiffRemapper<'x, T> {
/// Creates a new remapper from strings and slices.
pub fn new(
old_slices: &[&'x T],
new_slices: &[&'x T],
old: &'x T,
new: &'x T,
) -> TextDiffRemapper<'x, T> {
TextDiffRemapper {
old: SliceRemapper::new(old, old_slices),
new: SliceRemapper::new(new, new_slices),
}
}
/// Creates a new remapper from a text diff and the original strings.
pub fn from_text_diff<'old, 'new, 'bufs>(
diff: &TextDiff<'old, 'new, 'bufs, T>,
old: &'x T,
new: &'x T,
) -> TextDiffRemapper<'x, T>
where
'old: 'x,
'new: 'x,
{
TextDiffRemapper {
old: SliceRemapper::new(old, diff.old_slices()),
new: SliceRemapper::new(new, diff.new_slices()),
}
}
/// Slices into the old string.
pub fn slice_old(&self, range: Range<usize>) -> Option<&'x T> {
self.old.slice(range)
}
/// Slices into the new string.
pub fn slice_new(&self, range: Range<usize>) -> Option<&'x T> {
self.new.slice(range)
}
/// Given a diffop yields the changes it encodes against the original strings.
///
/// This is the same as the [`DiffOp::iter_slices`] method.
///
/// ## Panics
///
/// This method can panic if the input strings passed to the constructor
/// are incompatible with the input strings passed to the diffing algorithm.
pub fn iter_slices(&self, op: &DiffOp) -> impl Iterator<Item = (ChangeTag, &'x T)> {
// note: this is equivalent to the code in `DiffOp::iter_slices`. It is
// a copy/paste because the slicing currently cannot be well abstracted
// because of lifetime issues caused by the `Index` trait.
match *op {
DiffOp::Equal { old_index, len, .. } => {
Some((ChangeTag::Equal, self.old.slice(old_index..old_index + len)))
.into_iter()
.chain(None.into_iter())
}
DiffOp::Insert {
new_index, new_len, ..
} => Some((
ChangeTag::Insert,
self.new.slice(new_index..new_index + new_len),
))
.into_iter()
.chain(None.into_iter()),
DiffOp::Delete {
old_index, old_len, ..
} => Some((
ChangeTag::Delete,
self.old.slice(old_index..old_index + old_len),
))
.into_iter()
.chain(None.into_iter()),
DiffOp::Replace {
old_index,
old_len,
new_index,
new_len,
} => Some((
ChangeTag::Delete,
self.old.slice(old_index..old_index + old_len),
))
.into_iter()
.chain(
Some((
ChangeTag::Insert,
self.new.slice(new_index..new_index + new_len),
))
.into_iter(),
),
}
.map(|(tag, opt_val)| (tag, opt_val.expect("slice out of bounds")))
}
}
/// Shortcut for diffing two slices.
///
/// This function produces the diff of two slices and returns a vector
/// with the changes.
///
/// ```rust
/// use similar::{Algorithm, ChangeTag};
/// use similar::utils::diff_slices;
///
/// let old = "foo\nbar\nbaz".lines().collect::<Vec<_>>();
/// let new = "foo\nbar\nBAZ".lines().collect::<Vec<_>>();
/// assert_eq!(diff_slices(Algorithm::Myers, &old, &new), vec![
/// (ChangeTag::Equal, &["foo", "bar"][..]),
/// (ChangeTag::Delete, &["baz"][..]),
/// (ChangeTag::Insert, &["BAZ"][..]),
/// ]);
/// ```
pub fn diff_slices<'x, T: PartialEq + Hash + Ord>(
alg: Algorithm,
old: &'x [T],
new: &'x [T],
) -> Vec<(ChangeTag, &'x [T])> {
capture_diff_slices(alg, old, new)
.iter()
.flat_map(|op| op.iter_slices(old, new))
.collect()
}
/// Shortcut for making a character level diff.
///
/// This function produces the diff of two strings and returns a vector
/// with the changes. It returns connected slices into the original string
/// rather than character level slices.
///
/// ```rust
/// use similar::{Algorithm, ChangeTag};
/// use similar::utils::diff_chars;
///
/// assert_eq!(diff_chars(Algorithm::Myers, "foobarbaz", "fooBARbaz"), vec![
/// (ChangeTag::Equal, "foo"),
/// (ChangeTag::Delete, "bar"),
/// (ChangeTag::Insert, "BAR"),
/// (ChangeTag::Equal, "baz"),
/// ]);
/// ```
pub fn diff_chars<'x, T: DiffableStrRef + ?Sized>(
alg: Algorithm,
old: &'x T,
new: &'x T,
) -> Vec<(ChangeTag, &'x T::Output)> {
let old = old.as_diffable_str();
let new = new.as_diffable_str();
let diff = TextDiff::configure().algorithm(alg).diff_chars(old, new);
let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
diff.ops()
.iter()
.flat_map(move |x| remapper.iter_slices(x))
.collect()
}
/// Shortcut for making a word level diff.
///
/// This function produces the diff of two strings and returns a vector
/// with the changes. It returns connected slices into the original string
/// rather than word level slices.
///
/// ```rust
/// use similar::{Algorithm, ChangeTag};
/// use similar::utils::diff_words;
///
/// assert_eq!(diff_words(Algorithm::Myers, "foo bar baz", "foo bor baz"), vec![
/// (ChangeTag::Equal, "foo "),
/// (ChangeTag::Delete, "bar"),
/// (ChangeTag::Insert, "bor"),
/// (ChangeTag::Equal, " baz"),
/// ]);
/// ```
pub fn diff_words<'x, T: DiffableStrRef + ?Sized>(
alg: Algorithm,
old: &'x T,
new: &'x T,
) -> Vec<(ChangeTag, &'x T::Output)> {
let old = old.as_diffable_str();
let new = new.as_diffable_str();
let diff = TextDiff::configure().algorithm(alg).diff_words(old, new);
let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
diff.ops()
.iter()
.flat_map(move |x| remapper.iter_slices(x))
.collect()
}
/// Shortcut for making a unicode word level diff.
///
/// This function produces the diff of two strings and returns a vector
/// with the changes. It returns connected slices into the original string
/// rather than word level slices.
///
/// ```rust
/// use similar::{Algorithm, ChangeTag};
/// use similar::utils::diff_unicode_words;
///
/// let old = "The quick (\"brown\") fox can't jump 32.3 feet, right?";
/// let new = "The quick (\"brown\") fox can't jump 9.84 meters, right?";
/// assert_eq!(diff_unicode_words(Algorithm::Myers, old, new), vec![
/// (ChangeTag::Equal, "The quick (\"brown\") fox can\'t jump "),
/// (ChangeTag::Delete, "32.3"),
/// (ChangeTag::Insert, "9.84"),
/// (ChangeTag::Equal, " "),
/// (ChangeTag::Delete, "feet"),
/// (ChangeTag::Insert, "meters"),
/// (ChangeTag::Equal, ", right?")
/// ]);
/// ```
///
/// This requires the `unicode` feature.
#[cfg(feature = "unicode")]
pub fn diff_unicode_words<'x, T: DiffableStrRef + ?Sized>(
alg: Algorithm,
old: &'x T,
new: &'x T,
) -> Vec<(ChangeTag, &'x T::Output)> {
let old = old.as_diffable_str();
let new = new.as_diffable_str();
let diff = TextDiff::configure()
.algorithm(alg)
.diff_unicode_words(old, new);
let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
diff.ops()
.iter()
.flat_map(move |x| remapper.iter_slices(x))
.collect()
}
/// Shortcut for making a grapheme level diff.
///
/// This function produces the diff of two strings and returns a vector
/// with the changes. It returns connected slices into the original string
/// rather than grapheme level slices.
///
/// ```rust
/// use similar::{Algorithm, ChangeTag};
/// use similar::utils::diff_graphemes;
///
/// let old = "The flag of Austria is 🇦🇹";
/// let new = "The flag of Albania is 🇦🇱";
/// assert_eq!(diff_graphemes(Algorithm::Myers, old, new), vec![
/// (ChangeTag::Equal, "The flag of A"),
/// (ChangeTag::Delete, "ustr"),
/// (ChangeTag::Insert, "lban"),
/// (ChangeTag::Equal, "ia is "),
/// (ChangeTag::Delete, "🇦🇹"),
/// (ChangeTag::Insert, "🇦🇱"),
/// ]);
/// ```
///
/// This requires the `unicode` feature.
#[cfg(feature = "unicode")]
pub fn diff_graphemes<'x, T: DiffableStrRef + ?Sized>(
alg: Algorithm,
old: &'x T,
new: &'x T,
) -> Vec<(ChangeTag, &'x T::Output)> {
let old = old.as_diffable_str();
let new = new.as_diffable_str();
let diff = TextDiff::configure()
.algorithm(alg)
.diff_graphemes(old, new);
let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
diff.ops()
.iter()
.flat_map(move |x| remapper.iter_slices(x))
.collect()
}
/// Shortcut for making a line diff.
///
/// This function produces the diff of two slices and returns a vector
/// with the changes. Unlike [`diff_chars`] or [`diff_slices`] it returns a
/// change tag for each line.
///
/// ```rust
/// use similar::{Algorithm, ChangeTag};
/// use similar::utils::diff_lines;
///
/// assert_eq!(diff_lines(Algorithm::Myers, "foo\nbar\nbaz\nblah", "foo\nbar\nbaz\nblurgh"), vec![
/// (ChangeTag::Equal, "foo\n"),
/// (ChangeTag::Equal, "bar\n"),
/// (ChangeTag::Equal, "baz\n"),
/// (ChangeTag::Delete, "blah"),
/// (ChangeTag::Insert, "blurgh"),
/// ]);
/// ```
pub fn diff_lines<'x, T: DiffableStrRef + ?Sized>(
alg: Algorithm,
old: &'x T,
new: &'x T,
) -> Vec<(ChangeTag, &'x T::Output)> {
TextDiff::configure()
.algorithm(alg)
.diff_lines(old, new)
.iter_all_changes()
.map(|change| (change.tag(), change.value()))
.collect()
}
#[test]
fn test_remapper() {
let a = "foo bar baz";
let words = a.tokenize_words();
dbg!(&words);
let remap = SliceRemapper::new(a, &words);
assert_eq!(remap.slice(0..3), Some("foo bar"));
assert_eq!(remap.slice(1..3), Some(" bar"));
assert_eq!(remap.slice(0..1), Some("foo"));
assert_eq!(remap.slice(0..5), Some("foo bar baz"));
assert_eq!(remap.slice(0..6), None);
}