| //! ### Deviation | 
 | //! | 
 | //! Note that the algorithm implemented here is in many ways different from what `git` does. | 
 | //! | 
 | //! - it's less sophisticated and doesn't use any ranking of candidates. Instead, it picks the first possible match. | 
 | //! - the set used for copy-detection is probably smaller by default. | 
 | use std::ops::Range; | 
 |  | 
 | use bstr::BStr; | 
 | use gix_object::tree::{EntryKind, EntryMode}; | 
 |  | 
 | use crate::{ | 
 |     blob::{platform::prepare_diff::Operation, DiffLineStats, ResourceKind}, | 
 |     rewrites::{CopySource, Outcome, Tracker}, | 
 |     Rewrites, | 
 | }; | 
 |  | 
 | /// The kind of a change. | 
 | #[derive(Debug, Copy, Clone, Ord, PartialOrd, PartialEq, Eq)] | 
 | pub enum ChangeKind { | 
 |     /// The change represents the *deletion* of an item. | 
 |     Deletion, | 
 |     /// The change represents the *modification* of an item. | 
 |     Modification, | 
 |     /// The change represents the *addition* of an item. | 
 |     Addition, | 
 | } | 
 |  | 
 | /// A trait providing all functionality to abstract over the concept of a change, as seen by the [`Tracker`]. | 
 | pub trait Change: Clone { | 
 |     /// Return the hash of this change for identification. | 
 |     /// | 
 |     /// Note that this is the id of the object as stored in `git`, i.e. it must have gone through workspace | 
 |     /// conversions. | 
 |     fn id(&self) -> &gix_hash::oid; | 
 |     /// Return the kind of this change. | 
 |     fn kind(&self) -> ChangeKind; | 
 |     /// Return more information about the kind of entry affected by this change. | 
 |     fn entry_mode(&self) -> EntryMode; | 
 |     /// Return the id of the change along with its mode. | 
 |     fn id_and_entry_mode(&self) -> (&gix_hash::oid, EntryMode); | 
 | } | 
 |  | 
 | /// A set of tracked items allows to figure out their relations by figuring out their similarity. | 
 | pub(crate) struct Item<T> { | 
 |     /// The underlying raw change | 
 |     change: T, | 
 |     /// That slice into the backing for paths. | 
 |     path: Range<usize>, | 
 |     /// If true, this item was already emitted, i.e. seen by the caller. | 
 |     emitted: bool, | 
 | } | 
 |  | 
 | impl<T: Change> Item<T> { | 
 |     fn location<'a>(&self, backing: &'a [u8]) -> &'a BStr { | 
 |         backing[self.path.clone()].as_ref() | 
 |     } | 
 |     fn entry_mode_compatible(&self, mode: EntryMode) -> bool { | 
 |         use EntryKind::*; | 
 |         matches!( | 
 |             (mode.kind(), self.change.entry_mode().kind()), | 
 |             (Blob | BlobExecutable, Blob | BlobExecutable) | (Link, Link) | 
 |         ) | 
 |     } | 
 |  | 
 |     fn is_source_for_destination_of(&self, kind: visit::SourceKind, dest_item_mode: EntryMode) -> bool { | 
 |         self.entry_mode_compatible(dest_item_mode) | 
 |             && match kind { | 
 |                 visit::SourceKind::Rename => !self.emitted && matches!(self.change.kind(), ChangeKind::Deletion), | 
 |                 visit::SourceKind::Copy => { | 
 |                     matches!(self.change.kind(), ChangeKind::Modification) | 
 |                 } | 
 |             } | 
 |     } | 
 | } | 
 |  | 
 | /// A module with types used in the user-callback in [Tracker::emit()](crate::rewrites::Tracker::emit()). | 
 | pub mod visit { | 
 |     use bstr::BStr; | 
 |     use gix_object::tree::EntryMode; | 
 |  | 
 |     use crate::blob::DiffLineStats; | 
 |  | 
 |     /// The source of a rewrite, rename or copy. | 
 |     #[derive(Debug, Clone, PartialEq, PartialOrd)] | 
 |     pub struct Source<'a, T> { | 
 |         /// The kind of entry. | 
 |         pub entry_mode: EntryMode, | 
 |         /// The hash of the state of the source as seen in the object database. | 
 |         pub id: gix_hash::ObjectId, | 
 |         /// Further specify what kind of source this is. | 
 |         pub kind: SourceKind, | 
 |         /// The repository-relative location of this entry. | 
 |         pub location: &'a BStr, | 
 |         /// The change that was registered as source. | 
 |         pub change: &'a T, | 
 |         /// If this is a rewrite, indicate how many lines would need to change to turn this source into the destination. | 
 |         pub diff: Option<DiffLineStats>, | 
 |     } | 
 |  | 
 |     /// Further identify the kind of [Source]. | 
 |     #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] | 
 |     pub enum SourceKind { | 
 |         /// This is the source of an entry that was renamed, as `source` was renamed to `destination`. | 
 |         Rename, | 
 |         /// This is the source of a copy, as `source` was copied into `destination`. | 
 |         Copy, | 
 |     } | 
 |  | 
 |     /// A change along with a location. | 
 |     #[derive(Clone)] | 
 |     pub struct Destination<'a, T: Clone> { | 
 |         /// The change at the given `location`. | 
 |         pub change: T, | 
 |         /// The repository-relative location of this destination. | 
 |         pub location: &'a BStr, | 
 |     } | 
 | } | 
 |  | 
 | /// | 
 | #[allow(clippy::empty_docs)] | 
 | pub mod emit { | 
 |     /// The error returned by [Tracker::emit()](super::Tracker::emit()). | 
 |     #[derive(Debug, thiserror::Error)] | 
 |     #[allow(missing_docs)] | 
 |     pub enum Error { | 
 |         #[error("Could not find blob for similarity checking")] | 
 |         FindExistingBlob(#[from] gix_object::find::existing_object::Error), | 
 |         #[error("Could not obtain exhaustive item set to use as possible sources for copy detection")] | 
 |         GetItemsForExhaustiveCopyDetection(#[source] Box<dyn std::error::Error + Send + Sync>), | 
 |         #[error(transparent)] | 
 |         SetResource(#[from] crate::blob::platform::set_resource::Error), | 
 |         #[error(transparent)] | 
 |         PrepareDiff(#[from] crate::blob::platform::prepare_diff::Error), | 
 |     } | 
 | } | 
 |  | 
 | /// Lifecycle | 
 | impl<T: Change> Tracker<T> { | 
 |     /// Create a new instance with `rewrites` configuration. | 
 |     pub fn new(rewrites: Rewrites) -> Self { | 
 |         Tracker { | 
 |             items: vec![], | 
 |             path_backing: vec![], | 
 |             rewrites, | 
 |         } | 
 |     } | 
 | } | 
 |  | 
 | /// build state and find matches. | 
 | impl<T: Change> Tracker<T> { | 
 |     /// We may refuse the push if that information isn't needed for what we have to track. | 
 |     pub fn try_push_change(&mut self, change: T, location: &BStr) -> Option<T> { | 
 |         if !change.entry_mode().is_blob_or_symlink() { | 
 |             return Some(change); | 
 |         } | 
 |         let keep = match (self.rewrites.copies, change.kind()) { | 
 |             (Some(_find_copies), _) => true, | 
 |             (None, ChangeKind::Modification { .. }) => false, | 
 |             (None, _) => true, | 
 |         }; | 
 |  | 
 |         if !keep { | 
 |             return Some(change); | 
 |         } | 
 |  | 
 |         let start = self.path_backing.len(); | 
 |         self.path_backing.extend_from_slice(location); | 
 |         self.items.push(Item { | 
 |             path: start..self.path_backing.len(), | 
 |             change, | 
 |             emitted: false, | 
 |         }); | 
 |         None | 
 |     } | 
 |  | 
 |     /// Can only be called once effectively as it alters its own state to assure each item is only emitted once. | 
 |     /// | 
 |     /// `cb(destination, source)` is called for each item, either with `Some(source)` if it's | 
 |     /// the destination of a copy or rename, or with `None` for source if no relation to other | 
 |     /// items in the tracked set exist, which is like saying 'no rename or rewrite or copy' happened. | 
 |     /// | 
 |     /// `objects` is used to access blob data for similarity checks if required and is taken directly from the object database. | 
 |     /// Worktree filters and text conversions will be applied afterwards automatically. Note that object-caching *should not* | 
 |     /// be enabled as caching is implemented by `diff_cache`, after all, the blob that's actually diffed is going | 
 |     /// through conversion steps. | 
 |     /// | 
 |     /// `diff_cache` is a way to retain a cache of resources that are prepared for rapid diffing, and it also controls | 
 |     /// the diff-algorithm (provided no user-algorithm is set). | 
 |     /// Note that we control a few options of `diff_cache` to assure it will ignore external commands. | 
 |     /// Note that we do not control how the `diff_cache` converts resources, it's left to the caller to decide | 
 |     /// if it should look at what's stored in `git`, or in the working tree, along with all diff-specific conversions. | 
 |     /// | 
 |     /// `push_source_tree(push_fn: push(change, location))` is a function that is called when the entire tree of the source | 
 |     /// should be added as modifications by calling `push` repeatedly to use for perfect copy tracking. Note that `push` | 
 |     /// will panic if `change` is not a modification, and it's valid to not call `push` at all. | 
 |     pub fn emit<PushSourceTreeFn, E>( | 
 |         &mut self, | 
 |         mut cb: impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_, T>>) -> crate::tree::visit::Action, | 
 |         diff_cache: &mut crate::blob::Platform, | 
 |         objects: &impl gix_object::FindObjectOrHeader, | 
 |         mut push_source_tree: PushSourceTreeFn, | 
 |     ) -> Result<Outcome, emit::Error> | 
 |     where | 
 |         PushSourceTreeFn: FnMut(&mut dyn FnMut(T, &BStr)) -> Result<(), E>, | 
 |         E: std::error::Error + Send + Sync + 'static, | 
 |     { | 
 |         diff_cache.options.skip_internal_diff_if_external_is_configured = false; | 
 |  | 
 |         fn by_id_and_location<T: Change>(a: &Item<T>, b: &Item<T>) -> std::cmp::Ordering { | 
 |             a.change | 
 |                 .id() | 
 |                 .cmp(b.change.id()) | 
 |                 .then_with(|| a.path.start.cmp(&b.path.start).then(a.path.end.cmp(&b.path.end))) | 
 |         } | 
 |         self.items.sort_by(by_id_and_location); | 
 |  | 
 |         let mut out = Outcome { | 
 |             options: self.rewrites, | 
 |             ..Default::default() | 
 |         }; | 
 |         self.match_pairs_of_kind( | 
 |             visit::SourceKind::Rename, | 
 |             &mut cb, | 
 |             self.rewrites.percentage, | 
 |             &mut out, | 
 |             diff_cache, | 
 |             objects, | 
 |         )?; | 
 |  | 
 |         if let Some(copies) = self.rewrites.copies { | 
 |             self.match_pairs_of_kind( | 
 |                 visit::SourceKind::Copy, | 
 |                 &mut cb, | 
 |                 copies.percentage, | 
 |                 &mut out, | 
 |                 diff_cache, | 
 |                 objects, | 
 |             )?; | 
 |  | 
 |             match copies.source { | 
 |                 CopySource::FromSetOfModifiedFiles => {} | 
 |                 CopySource::FromSetOfModifiedFilesAndAllSources => { | 
 |                     push_source_tree(&mut |change, location| { | 
 |                         assert!( | 
 |                             self.try_push_change(change, location).is_none(), | 
 |                             "we must accept every change" | 
 |                         ); | 
 |                         // make sure these aren't viable to be emitted anymore. | 
 |                         self.items.last_mut().expect("just pushed").emitted = true; | 
 |                     }) | 
 |                     .map_err(|err| emit::Error::GetItemsForExhaustiveCopyDetection(Box::new(err)))?; | 
 |                     self.items.sort_by(by_id_and_location); | 
 |  | 
 |                     self.match_pairs_of_kind( | 
 |                         visit::SourceKind::Copy, | 
 |                         &mut cb, | 
 |                         copies.percentage, | 
 |                         &mut out, | 
 |                         diff_cache, | 
 |                         objects, | 
 |                     )?; | 
 |                 } | 
 |             } | 
 |         } | 
 |  | 
 |         self.items | 
 |             .sort_by(|a, b| a.location(&self.path_backing).cmp(b.location(&self.path_backing))); | 
 |         for item in self.items.drain(..).filter(|item| !item.emitted) { | 
 |             if cb( | 
 |                 visit::Destination { | 
 |                     location: item.location(&self.path_backing), | 
 |                     change: item.change, | 
 |                 }, | 
 |                 None, | 
 |             ) == crate::tree::visit::Action::Cancel | 
 |             { | 
 |                 break; | 
 |             } | 
 |         } | 
 |         Ok(out) | 
 |     } | 
 | } | 
 |  | 
 | impl<T: Change> Tracker<T> { | 
 |     fn match_pairs_of_kind( | 
 |         &mut self, | 
 |         kind: visit::SourceKind, | 
 |         cb: &mut impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_, T>>) -> crate::tree::visit::Action, | 
 |         percentage: Option<f32>, | 
 |         out: &mut Outcome, | 
 |         diff_cache: &mut crate::blob::Platform, | 
 |         objects: &impl gix_object::FindObjectOrHeader, | 
 |     ) -> Result<(), emit::Error> { | 
 |         // we try to cheaply reduce the set of possibilities first, before possibly looking more exhaustively. | 
 |         let needs_second_pass = !needs_exact_match(percentage); | 
 |         if self.match_pairs(cb, None /* by identity */, kind, out, diff_cache, objects)? | 
 |             == crate::tree::visit::Action::Cancel | 
 |         { | 
 |             return Ok(()); | 
 |         } | 
 |         if needs_second_pass { | 
 |             let is_limited = if self.rewrites.limit == 0 { | 
 |                 false | 
 |             } else { | 
 |                 let (num_src, num_dst) = | 
 |                     estimate_involved_items(self.items.iter().map(|item| (item.emitted, item.change.kind())), kind); | 
 |                 let permutations = num_src * num_dst; | 
 |                 if permutations > self.rewrites.limit { | 
 |                     match kind { | 
 |                         visit::SourceKind::Rename => { | 
 |                             out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit = permutations; | 
 |                         } | 
 |                         visit::SourceKind::Copy => { | 
 |                             out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit = permutations; | 
 |                         } | 
 |                     } | 
 |                     true | 
 |                 } else { | 
 |                     false | 
 |                 } | 
 |             }; | 
 |             if !is_limited { | 
 |                 self.match_pairs(cb, percentage, kind, out, diff_cache, objects)?; | 
 |             } | 
 |         } | 
 |         Ok(()) | 
 |     } | 
 |  | 
 |     fn match_pairs( | 
 |         &mut self, | 
 |         cb: &mut impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_, T>>) -> crate::tree::visit::Action, | 
 |         percentage: Option<f32>, | 
 |         kind: visit::SourceKind, | 
 |         stats: &mut Outcome, | 
 |         diff_cache: &mut crate::blob::Platform, | 
 |         objects: &impl gix_object::FindObjectOrHeader, | 
 |     ) -> Result<crate::tree::visit::Action, emit::Error> { | 
 |         let mut dest_ofs = 0; | 
 |         while let Some((mut dest_idx, dest)) = self.items[dest_ofs..].iter().enumerate().find_map(|(idx, item)| { | 
 |             (!item.emitted && matches!(item.change.kind(), ChangeKind::Addition)).then_some((idx, item)) | 
 |         }) { | 
 |             dest_idx += dest_ofs; | 
 |             dest_ofs = dest_idx + 1; | 
 |             let src = find_match( | 
 |                 &self.items, | 
 |                 dest, | 
 |                 dest_idx, | 
 |                 percentage, | 
 |                 kind, | 
 |                 stats, | 
 |                 objects, | 
 |                 diff_cache, | 
 |                 &self.path_backing, | 
 |             )? | 
 |             .map(|(src_idx, src, diff)| { | 
 |                 let (id, entry_mode) = src.change.id_and_entry_mode(); | 
 |                 let id = id.to_owned(); | 
 |                 let location = src.location(&self.path_backing); | 
 |                 ( | 
 |                     visit::Source { | 
 |                         entry_mode, | 
 |                         id, | 
 |                         kind, | 
 |                         location, | 
 |                         change: &src.change, | 
 |                         diff, | 
 |                     }, | 
 |                     src_idx, | 
 |                 ) | 
 |             }); | 
 |             if src.is_none() { | 
 |                 continue; | 
 |             } | 
 |             let location = dest.location(&self.path_backing); | 
 |             let change = dest.change.clone(); | 
 |             let dest = visit::Destination { change, location }; | 
 |             let src_idx = src.as_ref().map(|t| t.1); | 
 |             let res = cb(dest, src.map(|t| t.0)); | 
 |  | 
 |             self.items[dest_idx].emitted = true; | 
 |             if let Some(src_idx) = src_idx { | 
 |                 self.items[src_idx].emitted = true; | 
 |             } | 
 |  | 
 |             if res == crate::tree::visit::Action::Cancel { | 
 |                 return Ok(crate::tree::visit::Action::Cancel); | 
 |             } | 
 |         } | 
 |         Ok(crate::tree::visit::Action::Continue) | 
 |     } | 
 | } | 
 |  | 
 | /// Returns the amount of viable sources and destinations for `items` as eligible for the given `kind` of operation. | 
 | fn estimate_involved_items( | 
 |     items: impl IntoIterator<Item = (bool, ChangeKind)>, | 
 |     kind: visit::SourceKind, | 
 | ) -> (usize, usize) { | 
 |     items | 
 |         .into_iter() | 
 |         .filter(|(emitted, _)| match kind { | 
 |             visit::SourceKind::Rename => !*emitted, | 
 |             visit::SourceKind::Copy => true, | 
 |         }) | 
 |         .fold((0, 0), |(mut src, mut dest), (emitted, change_kind)| { | 
 |             match change_kind { | 
 |                 ChangeKind::Addition => { | 
 |                     if kind == visit::SourceKind::Rename || !emitted { | 
 |                         dest += 1; | 
 |                     } | 
 |                 } | 
 |                 ChangeKind::Deletion => { | 
 |                     if kind == visit::SourceKind::Rename { | 
 |                         src += 1 | 
 |                     } | 
 |                 } | 
 |                 ChangeKind::Modification => { | 
 |                     if kind == visit::SourceKind::Copy { | 
 |                         src += 1 | 
 |                     } | 
 |                 } | 
 |             } | 
 |             (src, dest) | 
 |         }) | 
 | } | 
 |  | 
 | fn needs_exact_match(percentage: Option<f32>) -> bool { | 
 |     percentage.map_or(true, |p| p >= 1.0) | 
 | } | 
 |  | 
 | /// <`src_idx`, src, possibly diff stat> | 
 | type SourceTuple<'a, T> = (usize, &'a Item<T>, Option<DiffLineStats>); | 
 |  | 
 | /// Find `item` in our set of items ignoring `item_idx` to avoid finding ourselves, by similarity indicated by `percentage`. | 
 | /// The latter can be `None` or `Some(x)` where `x>=1` for identity, and anything else for similarity. | 
 | /// We also ignore emitted items entirely. | 
 | /// Use `kind` to indicate what kind of match we are looking for, which might be deletions matching an `item` addition, or | 
 | /// any non-deletion otherwise. | 
 | /// Note that we always try to find by identity first even if a percentage is given as it's much faster and may reduce the set | 
 | /// of items to be searched. | 
 | #[allow(clippy::too_many_arguments)] | 
 | fn find_match<'a, T: Change>( | 
 |     items: &'a [Item<T>], | 
 |     item: &Item<T>, | 
 |     item_idx: usize, | 
 |     percentage: Option<f32>, | 
 |     kind: visit::SourceKind, | 
 |     stats: &mut Outcome, | 
 |     objects: &impl gix_object::FindObjectOrHeader, | 
 |     diff_cache: &mut crate::blob::Platform, | 
 |     path_backing: &[u8], | 
 | ) -> Result<Option<SourceTuple<'a, T>>, emit::Error> { | 
 |     let (item_id, item_mode) = item.change.id_and_entry_mode(); | 
 |     if needs_exact_match(percentage) || item_mode.is_link() { | 
 |         let first_idx = items.partition_point(|a| a.change.id() < item_id); | 
 |         let range = items.get(first_idx..).map(|items| { | 
 |             let end = items | 
 |                 .iter() | 
 |                 .position(|a| a.change.id() != item_id) | 
 |                 .map_or(items.len(), |idx| first_idx + idx); | 
 |             first_idx..end | 
 |         }); | 
 |         let range = match range { | 
 |             Some(range) => range, | 
 |             None => return Ok(None), | 
 |         }; | 
 |         if range.is_empty() { | 
 |             return Ok(None); | 
 |         } | 
 |         let res = items[range.clone()].iter().enumerate().find_map(|(mut src_idx, src)| { | 
 |             src_idx += range.start; | 
 |             (src_idx != item_idx && src.is_source_for_destination_of(kind, item_mode)).then_some((src_idx, src, None)) | 
 |         }); | 
 |         if let Some(src) = res { | 
 |             return Ok(Some(src)); | 
 |         } | 
 |     } else { | 
 |         let mut has_new = false; | 
 |         let percentage = percentage.expect("it's set to something below 1.0 and we assured this"); | 
 |         debug_assert!( | 
 |             item_mode.is_blob(), | 
 |             "symlinks are matched exactly, and trees aren't used here" | 
 |         ); | 
 |  | 
 |         for (can_idx, src) in items | 
 |             .iter() | 
 |             .enumerate() | 
 |             .filter(|(src_idx, src)| *src_idx != item_idx && src.is_source_for_destination_of(kind, item_mode)) | 
 |         { | 
 |             if !has_new { | 
 |                 diff_cache.set_resource( | 
 |                     item_id.to_owned(), | 
 |                     item_mode.kind(), | 
 |                     item.location(path_backing), | 
 |                     ResourceKind::NewOrDestination, | 
 |                     objects, | 
 |                 )?; | 
 |                 has_new = true; | 
 |             } | 
 |             let (src_id, src_mode) = src.change.id_and_entry_mode(); | 
 |             diff_cache.set_resource( | 
 |                 src_id.to_owned(), | 
 |                 src_mode.kind(), | 
 |                 src.location(path_backing), | 
 |                 ResourceKind::OldOrSource, | 
 |                 objects, | 
 |             )?; | 
 |             let prep = diff_cache.prepare_diff()?; | 
 |             stats.num_similarity_checks += 1; | 
 |             match prep.operation { | 
 |                 Operation::InternalDiff { algorithm } => { | 
 |                     let tokens = | 
 |                         crate::blob::intern::InternedInput::new(prep.old.intern_source(), prep.new.intern_source()); | 
 |                     let counts = crate::blob::diff( | 
 |                         algorithm, | 
 |                         &tokens, | 
 |                         crate::blob::sink::Counter::new(diff::Statistics { | 
 |                             removed_bytes: 0, | 
 |                             input: &tokens, | 
 |                         }), | 
 |                     ); | 
 |                     let old_data_len = prep.old.data.as_slice().unwrap_or_default().len(); | 
 |                     let new_data_len = prep.new.data.as_slice().unwrap_or_default().len(); | 
 |                     let similarity = (old_data_len - counts.wrapped) as f32 / old_data_len.max(new_data_len) as f32; | 
 |                     if similarity >= percentage { | 
 |                         return Ok(Some(( | 
 |                             can_idx, | 
 |                             src, | 
 |                             DiffLineStats { | 
 |                                 removals: counts.removals, | 
 |                                 insertions: counts.insertions, | 
 |                                 before: tokens.before.len().try_into().expect("interner handles only u32"), | 
 |                                 after: tokens.after.len().try_into().expect("interner handles only u32"), | 
 |                                 similarity, | 
 |                             } | 
 |                             .into(), | 
 |                         ))); | 
 |                     } | 
 |                 } | 
 |                 Operation::ExternalCommand { .. } => { | 
 |                     unreachable!("we have disabled this possibility with an option") | 
 |                 } | 
 |                 Operation::SourceOrDestinationIsBinary => { | 
 |                     // TODO: figure out if git does more here | 
 |                 } | 
 |             }; | 
 |         } | 
 |     } | 
 |     Ok(None) | 
 | } | 
 |  | 
 | mod diff { | 
 |     use std::ops::Range; | 
 |  | 
 |     pub struct Statistics<'a, 'data> { | 
 |         pub removed_bytes: usize, | 
 |         pub input: &'a crate::blob::intern::InternedInput<&'data [u8]>, | 
 |     } | 
 |  | 
 |     impl<'a, 'data> crate::blob::Sink for Statistics<'a, 'data> { | 
 |         type Out = usize; | 
 |  | 
 |         fn process_change(&mut self, before: Range<u32>, _after: Range<u32>) { | 
 |             self.removed_bytes = self.input.before[before.start as usize..before.end as usize] | 
 |                 .iter() | 
 |                 .map(|token| self.input.interner[*token].len()) | 
 |                 .sum(); | 
 |         } | 
 |  | 
 |         fn finish(self) -> Self::Out { | 
 |             self.removed_bytes | 
 |         } | 
 |     } | 
 | } | 
 |  | 
 | #[cfg(test)] | 
 | mod estimate_involved_items { | 
 |     use super::estimate_involved_items; | 
 |     use crate::rewrites::tracker::{visit::SourceKind, ChangeKind}; | 
 |  | 
 |     #[test] | 
 |     fn renames_count_unemitted_as_sources_and_destinations() { | 
 |         let items = [ | 
 |             (false, ChangeKind::Addition), | 
 |             (true, ChangeKind::Deletion), | 
 |             (true, ChangeKind::Deletion), | 
 |         ]; | 
 |         assert_eq!( | 
 |             estimate_involved_items(items, SourceKind::Rename), | 
 |             (0, 1), | 
 |             "here we only have one eligible source, hence nothing to do" | 
 |         ); | 
 |         assert_eq!( | 
 |             estimate_involved_items(items.into_iter().map(|t| (false, t.1)), SourceKind::Rename), | 
 |             (2, 1), | 
 |             "now we have more possibilities as renames count un-emitted deletions as source" | 
 |         ); | 
 |     } | 
 |  | 
 |     #[test] | 
 |     fn copies_do_not_count_additions_as_sources() { | 
 |         let items = [ | 
 |             (false, ChangeKind::Addition), | 
 |             (true, ChangeKind::Addition), | 
 |             (true, ChangeKind::Deletion), | 
 |         ]; | 
 |         assert_eq!( | 
 |             estimate_involved_items(items, SourceKind::Copy), | 
 |             (0, 1), | 
 |             "one addition as source, the other isn't counted as it's emitted, nor is it considered a copy-source.\ | 
 |             deletions don't count" | 
 |         ); | 
 |     } | 
 |  | 
 |     #[test] | 
 |     fn copies_count_modifications_as_sources() { | 
 |         let items = [ | 
 |             (false, ChangeKind::Addition), | 
 |             (true, ChangeKind::Modification), | 
 |             (false, ChangeKind::Modification), | 
 |         ]; | 
 |         assert_eq!( | 
 |             estimate_involved_items(items, SourceKind::Copy), | 
 |             (2, 1), | 
 |             "any modifications is a valid source, emitted or not" | 
 |         ); | 
 |     } | 
 | } |