vendor/similar-2.6.0/src/algorithms/utils.rs - toolchain/rustc - Git at Google

 use std::collections::hash_map::Entry;
 use std::collections::HashMap;
 use std::fmt::Debug;
 use std::hash::{Hash, Hasher};
 use std::ops::{Add, Index, Range};

 /// Utility function to check if a range is empty that works on older rust versions
 #[inline(always)]
 #[allow(clippy::neg_cmp_op_on_partial_ord)]
 pub fn is_empty_range<T: PartialOrd<T>>(range: &Range<T>) -> bool {
     !(range.start < range.end)
 }

 /// Represents an item in the vector returned by [`unique`].
 ///
 /// It compares like the underlying item does it was created from but
 /// carries the index it was originally created from.
 pub struct UniqueItem<'a, Idx: ?Sized> {
     lookup: &'a Idx,
     index: usize,
 }

 impl<'a, Idx: ?Sized> UniqueItem<'a, Idx>
 where
     Idx: Index<usize>,
 {
     /// Returns the value.
     #[inline(always)]
     pub fn value(&self) -> &Idx::Output {
         &self.lookup[self.index]
     }

     /// Returns the original index.
     #[inline(always)]
     pub fn original_index(&self) -> usize {
         self.index
     }
 }

 impl<'a, Idx: Index<usize> + 'a> Debug for UniqueItem<'a, Idx>
 where
     Idx::Output: Debug,
 {
     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
         f.debug_struct("UniqueItem")
             .field("value", &self.value())
             .field("original_index", &self.original_index())
             .finish()
     }
 }

 impl<'a, 'b, A, B> PartialEq<UniqueItem<'a, A>> for UniqueItem<'b, B>
 where
     A: Index<usize> + 'b + ?Sized,
     B: Index<usize> + 'b + ?Sized,
     B::Output: PartialEq<A::Output>,
 {
     #[inline(always)]
     fn eq(&self, other: &UniqueItem<'a, A>) -> bool {
         self.value() == other.value()
     }
 }

 /// Returns only unique items in the sequence as vector.
 ///
 /// Each item is wrapped in a [`UniqueItem`] so that both the value and the
 /// index can be extracted.
 pub fn unique<Idx>(lookup: &Idx, range: Range<usize>) -> Vec<UniqueItem<Idx>>
 where
     Idx: Index<usize> + ?Sized,
     Idx::Output: Hash + Eq,
 {
     let mut by_item = HashMap::new();
     for index in range {
         match by_item.entry(&lookup[index]) {
             Entry::Vacant(entry) => {
                 entry.insert(Some(index));
             }
             Entry::Occupied(mut entry) => {
                 let entry = entry.get_mut();
                 if entry.is_some() {
                     *entry = None
                 }
             }
         }
     }
     let mut rv = by_item
         .into_iter()
         .filter_map(|(_, x)| x)
         .map(|index| UniqueItem { lookup, index })
         .collect::<Vec<_>>();
     rv.sort_by_key(|a| a.original_index());
     rv
 }

 /// Given two lookups and ranges calculates the length of the common prefix.
 pub fn common_prefix_len<Old, New>(
     old: &Old,
     old_range: Range<usize>,
     new: &New,
     new_range: Range<usize>,
 ) -> usize
 where
     Old: Index<usize> + ?Sized,
     New: Index<usize> + ?Sized,
     New::Output: PartialEq<Old::Output>,
 {
     if is_empty_range(&old_range) || is_empty_range(&new_range) {
         return 0;
     }
     new_range
         .zip(old_range)
         .take_while(
             #[inline(always)]
             |x| new[x.0] == old[x.1],
         )
         .count()
 }

 /// Given two lookups and ranges calculates the length of common suffix.
 pub fn common_suffix_len<Old, New>(
     old: &Old,
     old_range: Range<usize>,
     new: &New,
     new_range: Range<usize>,
 ) -> usize
 where
     Old: Index<usize> + ?Sized,
     New: Index<usize> + ?Sized,
     New::Output: PartialEq<Old::Output>,
 {
     if is_empty_range(&old_range) || is_empty_range(&new_range) {
         return 0;
     }
     new_range
         .rev()
         .zip(old_range.rev())
         .take_while(
             #[inline(always)]
             |x| new[x.0] == old[x.1],
         )
         .count()
 }

 struct OffsetLookup<Int> {
     offset: usize,
     vec: Vec<Int>,
 }

 impl<Int> Index<usize> for OffsetLookup<Int> {
     type Output = Int;

     #[inline(always)]
     fn index(&self, index: usize) -> &Self::Output {
         &self.vec[index - self.offset]
     }
 }

 /// A utility struct to convert distinct items to unique integers.
 ///
 /// This can be helpful on larger inputs to speed up the comparisons
 /// performed by doing a first pass where the data set gets reduced
 /// to (small) integers.
 ///
 /// The idea is that instead of passing two sequences to a diffling algorithm
 /// you first pass it via [`IdentifyDistinct`]:
 ///
 /// ```rust
 /// use similar::capture_diff;
 /// use similar::algorithms::{Algorithm, IdentifyDistinct};
 ///
 /// let old = &["foo", "bar", "baz"][..];
 /// let new = &["foo", "blah", "baz"][..];
 /// let h = IdentifyDistinct::<u32>::new(old, 0..old.len(), new, 0..new.len());
 /// let ops = capture_diff(
 ///     Algorithm::Myers,
 ///     h.old_lookup(),
 ///     h.old_range(),
 ///     h.new_lookup(),
 ///     h.new_range(),
 /// );
 /// ```
 ///
 /// The indexes are the same as with the passed source ranges.
 pub struct IdentifyDistinct<Int> {
     old: OffsetLookup<Int>,
     new: OffsetLookup<Int>,
 }

 impl<Int> IdentifyDistinct<Int>
 where
     Int: Add<Output = Int> + From<u8> + Default + Copy,
 {
     /// Creates an int hasher for two sequences.
     pub fn new<Old, New>(
         old: &Old,
         old_range: Range<usize>,
         new: &New,
         new_range: Range<usize>,
     ) -> Self
     where
         Old: Index<usize> + ?Sized,
         Old::Output: Eq + Hash,
         New: Index<usize> + ?Sized,
         New::Output: Eq + Hash + PartialEq<Old::Output>,
     {
         enum Key<'old, 'new, Old: ?Sized, New: ?Sized> {
             Old(&'old Old),
             New(&'new New),
         }

         impl<'old, 'new, Old, New> Hash for Key<'old, 'new, Old, New>
         where
             Old: Hash + ?Sized,
             New: Hash + ?Sized,
         {
             fn hash<H: Hasher>(&self, state: &mut H) {
                 match *self {
                     Key::Old(val) => val.hash(state),
                     Key::New(val) => val.hash(state),
                 }
             }
         }

         impl<'old, 'new, Old, New> PartialEq for Key<'old, 'new, Old, New>
         where
             Old: Eq + ?Sized,
             New: Eq + PartialEq<Old> + ?Sized,
         {
             #[inline(always)]
             fn eq(&self, other: &Self) -> bool {
                 match (self, other) {
                     (Key::Old(a), Key::Old(b)) => a == b,
                     (Key::New(a), Key::New(b)) => a == b,
                     (Key::Old(a), Key::New(b)) | (Key::New(b), Key::Old(a)) => b == a,
                 }
             }
         }

         impl<'old, 'new, Old, New> Eq for Key<'old, 'new, Old, New>
         where
             Old: Eq + ?Sized,
             New: Eq + PartialEq<Old> + ?Sized,
         {
         }

         let mut map = HashMap::new();
         let mut old_seq = Vec::new();
         let mut new_seq = Vec::new();
         let mut next_id = Int::default();
         let step = Int::from(1);
         let old_start = old_range.start;
         let new_start = new_range.start;

         for idx in old_range {
             let item = Key::Old(&old[idx]);
             let id = match map.entry(item) {
                 Entry::Occupied(o) => *o.get(),
                 Entry::Vacant(v) => {
                     let id = next_id;
                     next_id = next_id + step;
                     *v.insert(id)
                 }
             };
             old_seq.push(id);
         }

         for idx in new_range {
             let item = Key::New(&new[idx]);
             let id = match map.entry(item) {
                 Entry::Occupied(o) => *o.get(),
                 Entry::Vacant(v) => {
                     let id = next_id;
                     next_id = next_id + step;
                     *v.insert(id)
                 }
             };
             new_seq.push(id);
         }

         IdentifyDistinct {
             old: OffsetLookup {
                 offset: old_start,
                 vec: old_seq,
             },
             new: OffsetLookup {
                 offset: new_start,
                 vec: new_seq,
             },
         }
     }

     /// Returns a lookup for the old side.
     pub fn old_lookup(&self) -> &impl Index<usize, Output = Int> {
         &self.old
     }

     /// Returns a lookup for the new side.
     pub fn new_lookup(&self) -> &impl Index<usize, Output = Int> {
         &self.new
     }

     /// Convenience method to get back the old range.
     pub fn old_range(&self) -> Range<usize> {
         self.old.offset..self.old.offset + self.old.vec.len()
     }

     /// Convenience method to get back the new range.
     pub fn new_range(&self) -> Range<usize> {
         self.new.offset..self.new.offset + self.new.vec.len()
     }
 }

 #[test]
 fn test_unique() {
     let u = unique(&vec!['a', 'b', 'c', 'd', 'd', 'b'], 0..6)
         .into_iter()
         .map(|x| (*x.value(), x.original_index()))
         .collect::<Vec<_>>();
     assert_eq!(u, vec![('a', 0), ('c', 2)]);
 }

 #[test]
 fn test_int_hasher() {
     let ih = IdentifyDistinct::<u8>::new(
         &["", "foo", "bar", "baz"][..],
         1..4,
         &["", "foo", "blah", "baz"][..],
         1..4,
     );
     assert_eq!(ih.old_lookup()[1], 0);
     assert_eq!(ih.old_lookup()[2], 1);
     assert_eq!(ih.old_lookup()[3], 2);
     assert_eq!(ih.new_lookup()[1], 0);
     assert_eq!(ih.new_lookup()[2], 3);
     assert_eq!(ih.new_lookup()[3], 2);
     assert_eq!(ih.old_range(), 1..4);
     assert_eq!(ih.new_range(), 1..4);
 }

 #[test]
 fn test_common_prefix_len() {
     assert_eq!(
         common_prefix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
         0
     );
     assert_eq!(
         common_prefix_len("foobarbaz".as_bytes(), 0..9, "foobarblah".as_bytes(), 0..10),
         7
     );
     assert_eq!(
         common_prefix_len("foobarbaz".as_bytes(), 0..9, "blablabla".as_bytes(), 0..9),
         0
     );
     assert_eq!(
         common_prefix_len("foobarbaz".as_bytes(), 3..9, "foobarblah".as_bytes(), 3..10),
         4
     );
 }

 #[test]
 fn test_common_suffix_len() {
     assert_eq!(
         common_suffix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
         0
     );
     assert_eq!(
         common_suffix_len("1234".as_bytes(), 0..4, "X0001234".as_bytes(), 0..8),
         4
     );
     assert_eq!(
         common_suffix_len("1234".as_bytes(), 0..4, "Xxxx".as_bytes(), 0..4),
         0
     );
     assert_eq!(
         common_suffix_len("1234".as_bytes(), 2..4, "01234".as_bytes(), 2..5),
         2
     );
 }
	use std::collections::hash_map::Entry;
	use std::collections::HashMap;
	use std::fmt::Debug;
	use std::hash::{Hash, Hasher};
	use std::ops::{Add, Index, Range};

	/// Utility function to check if a range is empty that works on older rust versions
	#[inline(always)]
	#[allow(clippy::neg_cmp_op_on_partial_ord)]
	pub fn is_empty_range<T: PartialOrd<T>>(range: &Range<T>) -> bool {
	!(range.start < range.end)
	}

	/// Represents an item in the vector returned by [`unique`].
	///
	/// It compares like the underlying item does it was created from but
	/// carries the index it was originally created from.
	pub struct UniqueItem<'a, Idx: ?Sized> {
	lookup: &'a Idx,
	index: usize,
	}

	impl<'a, Idx: ?Sized> UniqueItem<'a, Idx>
	where
	Idx: Index<usize>,
	{
	/// Returns the value.
	#[inline(always)]
	pub fn value(&self) -> &Idx::Output {
	&self.lookup[self.index]
	}

	/// Returns the original index.
	#[inline(always)]
	pub fn original_index(&self) -> usize {
	self.index
	}
	}

	impl<'a, Idx: Index<usize> + 'a> Debug for UniqueItem<'a, Idx>
	where
	Idx::Output: Debug,
	{
	fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
	f.debug_struct("UniqueItem")
	.field("value", &self.value())
	.field("original_index", &self.original_index())
	.finish()
	}
	}

	impl<'a, 'b, A, B> PartialEq<UniqueItem<'a, A>> for UniqueItem<'b, B>
	where
	A: Index<usize> + 'b + ?Sized,
	B: Index<usize> + 'b + ?Sized,
	B::Output: PartialEq<A::Output>,
	{
	#[inline(always)]
	fn eq(&self, other: &UniqueItem<'a, A>) -> bool {
	self.value() == other.value()
	}
	}

	/// Returns only unique items in the sequence as vector.
	///
	/// Each item is wrapped in a [`UniqueItem`] so that both the value and the
	/// index can be extracted.
	pub fn unique<Idx>(lookup: &Idx, range: Range<usize>) -> Vec<UniqueItem<Idx>>
	where
	Idx: Index<usize> + ?Sized,
	Idx::Output: Hash + Eq,
	{
	let mut by_item = HashMap::new();
	for index in range {
	match by_item.entry(&lookup[index]) {
	Entry::Vacant(entry) => {
	entry.insert(Some(index));
	}
	Entry::Occupied(mut entry) => {
	let entry = entry.get_mut();
	if entry.is_some() {
	*entry = None
	}
	}
	}
	}
	let mut rv = by_item
	.into_iter()
	.filter_map(\|(_, x)\| x)
	.map(\|index\| UniqueItem { lookup, index })
	.collect::<Vec<_>>();
	rv.sort_by_key(\|a\| a.original_index());
	rv
	}

	/// Given two lookups and ranges calculates the length of the common prefix.
	pub fn common_prefix_len<Old, New>(
	old: &Old,
	old_range: Range<usize>,
	new: &New,
	new_range: Range<usize>,
	) -> usize
	where
	Old: Index<usize> + ?Sized,
	New: Index<usize> + ?Sized,
	New::Output: PartialEq<Old::Output>,
	{
	if is_empty_range(&old_range) \|\| is_empty_range(&new_range) {
	return 0;
	}
	new_range
	.zip(old_range)
	.take_while(
	#[inline(always)]
	\|x\| new[x.0] == old[x.1],
	)
	.count()
	}

	/// Given two lookups and ranges calculates the length of common suffix.
	pub fn common_suffix_len<Old, New>(
	old: &Old,
	old_range: Range<usize>,
	new: &New,
	new_range: Range<usize>,
	) -> usize
	where
	Old: Index<usize> + ?Sized,
	New: Index<usize> + ?Sized,
	New::Output: PartialEq<Old::Output>,
	{
	if is_empty_range(&old_range) \|\| is_empty_range(&new_range) {
	return 0;
	}
	new_range
	.rev()
	.zip(old_range.rev())
	.take_while(
	#[inline(always)]
	\|x\| new[x.0] == old[x.1],
	)
	.count()
	}

	struct OffsetLookup<Int> {
	offset: usize,
	vec: Vec<Int>,
	}

	impl<Int> Index<usize> for OffsetLookup<Int> {
	type Output = Int;

	#[inline(always)]
	fn index(&self, index: usize) -> &Self::Output {
	&self.vec[index - self.offset]
	}
	}

	/// A utility struct to convert distinct items to unique integers.
	///
	/// This can be helpful on larger inputs to speed up the comparisons
	/// performed by doing a first pass where the data set gets reduced
	/// to (small) integers.
	///
	/// The idea is that instead of passing two sequences to a diffling algorithm
	/// you first pass it via [`IdentifyDistinct`]:
	///
	/// ```rust
	/// use similar::capture_diff;
	/// use similar::algorithms::{Algorithm, IdentifyDistinct};
	///
	/// let old = &["foo", "bar", "baz"][..];
	/// let new = &["foo", "blah", "baz"][..];
	/// let h = IdentifyDistinct::<u32>::new(old, 0..old.len(), new, 0..new.len());
	/// let ops = capture_diff(
	/// Algorithm::Myers,
	/// h.old_lookup(),
	/// h.old_range(),
	/// h.new_lookup(),
	/// h.new_range(),
	/// );
	/// ```
	///
	/// The indexes are the same as with the passed source ranges.
	pub struct IdentifyDistinct<Int> {
	old: OffsetLookup<Int>,
	new: OffsetLookup<Int>,
	}

	impl<Int> IdentifyDistinct<Int>
	where
	Int: Add<Output = Int> + From<u8> + Default + Copy,
	{
	/// Creates an int hasher for two sequences.
	pub fn new<Old, New>(
	old: &Old,
	old_range: Range<usize>,
	new: &New,
	new_range: Range<usize>,
	) -> Self
	where
	Old: Index<usize> + ?Sized,
	Old::Output: Eq + Hash,
	New: Index<usize> + ?Sized,
	New::Output: Eq + Hash + PartialEq<Old::Output>,
	{
	enum Key<'old, 'new, Old: ?Sized, New: ?Sized> {
	Old(&'old Old),
	New(&'new New),
	}

	impl<'old, 'new, Old, New> Hash for Key<'old, 'new, Old, New>
	where
	Old: Hash + ?Sized,
	New: Hash + ?Sized,
	{
	fn hash<H: Hasher>(&self, state: &mut H) {
	match *self {
	Key::Old(val) => val.hash(state),
	Key::New(val) => val.hash(state),
	}
	}
	}

	impl<'old, 'new, Old, New> PartialEq for Key<'old, 'new, Old, New>
	where
	Old: Eq + ?Sized,
	New: Eq + PartialEq<Old> + ?Sized,
	{
	#[inline(always)]
	fn eq(&self, other: &Self) -> bool {
	match (self, other) {
	(Key::Old(a), Key::Old(b)) => a == b,
	(Key::New(a), Key::New(b)) => a == b,
	(Key::Old(a), Key::New(b)) \| (Key::New(b), Key::Old(a)) => b == a,
	}
	}
	}

	impl<'old, 'new, Old, New> Eq for Key<'old, 'new, Old, New>
	where
	Old: Eq + ?Sized,
	New: Eq + PartialEq<Old> + ?Sized,
	{
	}

	let mut map = HashMap::new();
	let mut old_seq = Vec::new();
	let mut new_seq = Vec::new();
	let mut next_id = Int::default();
	let step = Int::from(1);
	let old_start = old_range.start;
	let new_start = new_range.start;

	for idx in old_range {
	let item = Key::Old(&old[idx]);
	let id = match map.entry(item) {
	Entry::Occupied(o) => *o.get(),
	Entry::Vacant(v) => {
	let id = next_id;
	next_id = next_id + step;
	*v.insert(id)
	}
	};
	old_seq.push(id);
	}

	for idx in new_range {
	let item = Key::New(&new[idx]);
	let id = match map.entry(item) {
	Entry::Occupied(o) => *o.get(),
	Entry::Vacant(v) => {
	let id = next_id;
	next_id = next_id + step;
	*v.insert(id)
	}
	};
	new_seq.push(id);
	}

	IdentifyDistinct {
	old: OffsetLookup {
	offset: old_start,
	vec: old_seq,
	},
	new: OffsetLookup {
	offset: new_start,
	vec: new_seq,
	},
	}
	}

	/// Returns a lookup for the old side.
	pub fn old_lookup(&self) -> &impl Index<usize, Output = Int> {
	&self.old
	}

	/// Returns a lookup for the new side.
	pub fn new_lookup(&self) -> &impl Index<usize, Output = Int> {
	&self.new
	}

	/// Convenience method to get back the old range.
	pub fn old_range(&self) -> Range<usize> {
	self.old.offset..self.old.offset + self.old.vec.len()
	}

	/// Convenience method to get back the new range.
	pub fn new_range(&self) -> Range<usize> {
	self.new.offset..self.new.offset + self.new.vec.len()
	}
	}

	#[test]
	fn test_unique() {
	let u = unique(&vec!['a', 'b', 'c', 'd', 'd', 'b'], 0..6)
	.into_iter()
	.map(\|x\| (*x.value(), x.original_index()))
	.collect::<Vec<_>>();
	assert_eq!(u, vec![('a', 0), ('c', 2)]);
	}

	#[test]
	fn test_int_hasher() {
	let ih = IdentifyDistinct::<u8>::new(
	&["", "foo", "bar", "baz"][..],
	1..4,
	&["", "foo", "blah", "baz"][..],
	1..4,
	);
	assert_eq!(ih.old_lookup()[1], 0);
	assert_eq!(ih.old_lookup()[2], 1);
	assert_eq!(ih.old_lookup()[3], 2);
	assert_eq!(ih.new_lookup()[1], 0);
	assert_eq!(ih.new_lookup()[2], 3);
	assert_eq!(ih.new_lookup()[3], 2);
	assert_eq!(ih.old_range(), 1..4);
	assert_eq!(ih.new_range(), 1..4);
	}

	#[test]
	fn test_common_prefix_len() {
	assert_eq!(
	common_prefix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
	0
	);
	assert_eq!(
	common_prefix_len("foobarbaz".as_bytes(), 0..9, "foobarblah".as_bytes(), 0..10),
	7
	);
	assert_eq!(
	common_prefix_len("foobarbaz".as_bytes(), 0..9, "blablabla".as_bytes(), 0..9),
	0
	);
	assert_eq!(
	common_prefix_len("foobarbaz".as_bytes(), 3..9, "foobarblah".as_bytes(), 3..10),
	4
	);
	}

	#[test]
	fn test_common_suffix_len() {
	assert_eq!(
	common_suffix_len("".as_bytes(), 0..0, "".as_bytes(), 0..0),
	0
	);
	assert_eq!(
	common_suffix_len("1234".as_bytes(), 0..4, "X0001234".as_bytes(), 0..8),
	4
	);
	assert_eq!(
	common_suffix_len("1234".as_bytes(), 0..4, "Xxxx".as_bytes(), 0..4),
	0
	);
	assert_eq!(
	common_suffix_len("1234".as_bytes(), 2..4, "01234".as_bytes(), 2..5),
	2
	);
	}