| use std::ptr::NonNull; |
| |
| use crate::intern::Token; |
| use crate::myers::middle_snake::{MiddleSnakeSearch, SearchResult}; |
| use crate::myers::preprocess::PreprocessedFile; |
| use crate::myers::slice::FileSlice; |
| use crate::util::sqrt; |
| use crate::Sink; |
| |
| mod middle_snake; |
| mod preprocess; |
| mod slice; |
| |
| pub struct Myers { |
| kvec: NonNull<[i32]>, |
| kforward: NonNull<i32>, |
| kbackward: NonNull<i32>, |
| max_cost: u32, |
| } |
| |
| pub fn diff<S: Sink>( |
| before: &[Token], |
| after: &[Token], |
| _num_tokens: u32, |
| mut sink: S, |
| minimal: bool, |
| ) -> S::Out { |
| // preprocess the files by removing parts of the file that are not contained in the other file at all |
| // this process remaps the token indices and therefore requires us to track changed files in a char array |
| // PERF use a bitset? |
| let (mut before, mut after) = preprocess::preprocess(before, after); |
| |
| // Perform the actual diff |
| Myers::new(before.tokens.len(), after.tokens.len()).run( |
| FileSlice::new(&mut before), |
| FileSlice::new(&mut after), |
| minimal, |
| ); |
| |
| process_changes_with_sink(&before, &after, &mut sink); |
| sink.finish() |
| } |
| |
| const HEUR_MIN_COST: u32 = 256; |
| const MAX_COST_MIN: u32 = 256; |
| |
| impl Drop for Myers { |
| fn drop(&mut self) { |
| unsafe { drop(Box::from_raw(self.kvec.as_ptr())) } |
| } |
| } |
| |
| impl Myers { |
| fn new(len1: usize, len2: usize) -> Self { |
| let ndiags = len1 + len2 as usize + 3; |
| let kvec = Box::leak(vec![0; 2 * ndiags + 2].into_boxed_slice()); |
| let kforward = NonNull::from(&mut kvec[len2 + 1]); |
| let kbackward = NonNull::from(&mut kvec[ndiags + len2 + 1]); |
| Self { kvec: kvec.into(), kforward, kbackward, max_cost: sqrt(ndiags).max(MAX_COST_MIN) } |
| } |
| |
| fn run<'f>(&mut self, mut file1: FileSlice<'f>, mut file2: FileSlice<'f>, mut need_min: bool) { |
| loop { |
| file1.strip_common(&mut file2); |
| |
| if file1.is_empty() { |
| file2.mark_changed(); |
| return; |
| } else if file2.is_empty() { |
| file1.mark_changed(); |
| return; |
| } |
| |
| let split = self.split(&file1, &file2, need_min); |
| self.run( |
| file1.borrow().slice(..split.token_idx1 as u32), |
| file2.borrow().slice(..split.token_idx2 as u32), |
| split.minimized_lo, |
| ); |
| |
| file1 = file1.slice(split.token_idx1 as u32..); |
| file2 = file2.slice(split.token_idx2 as u32..); |
| need_min = split.minimized_hi |
| } |
| } |
| |
| /// See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers. |
| /// Basically considers a "box" (off1, off2, lim1, lim2) and scan from both |
| /// the forward diagonal starting from (off1, off2) and the backward diagonal |
| /// starting from (lim1, lim2). If the K values on the same diagonal crosses |
| /// returns the furthest point of reach. We might encounter expensive edge cases |
| /// using this algorithm, so a little bit of heuristic is needed to cut the |
| /// search and to return a suboptimal point. |
| fn split(&mut self, file1: &FileSlice, file2: &FileSlice, need_min: bool) -> Split { |
| let mut forward_search = |
| unsafe { MiddleSnakeSearch::<false>::new(self.kforward, file1, file2) }; |
| let mut backwards_search = |
| unsafe { MiddleSnakeSearch::<true>::new(self.kbackward, file1, file2) }; |
| let is_odd = (file2.len() - file2.len()) & 1 != 0; |
| |
| let mut ec = 0; |
| |
| while ec <= self.max_cost { |
| let mut found_snake = false; |
| forward_search.next_d(); |
| if is_odd { |
| if let Some(res) = forward_search.run(file1, file2, |k, token_idx1| { |
| backwards_search.contains(k) |
| && backwards_search.x_pos_at_diagonal(k) <= token_idx1 |
| }) { |
| match res { |
| SearchResult::Snake => found_snake = true, |
| SearchResult::Found { token_idx1, token_idx2 } => { |
| return Split { |
| token_idx1, |
| token_idx2, |
| minimized_lo: true, |
| minimized_hi: true, |
| }; |
| } |
| } |
| } |
| } else { |
| found_snake |= forward_search.run(file1, file2, |_, _| false).is_some() |
| }; |
| |
| backwards_search.next_d(); |
| if !is_odd { |
| if let Some(res) = backwards_search.run(file1, file2, |k, token_idx1| { |
| forward_search.contains(k) && token_idx1 <= forward_search.x_pos_at_diagonal(k) |
| }) { |
| match res { |
| SearchResult::Snake => found_snake = true, |
| SearchResult::Found { token_idx1, token_idx2 } => { |
| return Split { |
| token_idx1, |
| token_idx2, |
| minimized_lo: true, |
| minimized_hi: true, |
| }; |
| } |
| } |
| } |
| } else { |
| found_snake |= backwards_search.run(file1, file2, |_, _| false).is_some() |
| }; |
| |
| if need_min { |
| continue; |
| } |
| |
| // If the edit cost is above the heuristic trigger and if |
| // we got a good snake, we sample current diagonals to see |
| // if some of them have reached an "interesting" path. Our |
| // measure is a function of the distance from the diagonal |
| // corner (i1 + i2) penalized with the distance from the |
| // mid diagonal itself. If this value is above the current |
| // edit cost times a magic factor (XDL_K_HEUR) we consider |
| // it interesting. |
| if found_snake && ec > HEUR_MIN_COST { |
| if let Some((token_idx1, token_idx2)) = forward_search.found_snake(ec, file1, file2) |
| { |
| return Split { |
| token_idx1, |
| token_idx2, |
| minimized_lo: true, |
| minimized_hi: false, |
| }; |
| } |
| |
| if let Some((token_idx1, token_idx2)) = |
| backwards_search.found_snake(ec, file1, file2) |
| { |
| return Split { |
| token_idx1, |
| token_idx2, |
| minimized_lo: false, |
| minimized_hi: true, |
| }; |
| } |
| } |
| |
| ec += 1; |
| } |
| |
| let (distance_forward, token_idx1_forward) = forward_search.best_position(file1, file2); |
| let (distance_backwards, token_idx1_backwards) = |
| backwards_search.best_position(file1, file2); |
| if distance_forward > file1.len() as isize + file2.len() as isize - distance_backwards { |
| Split { |
| token_idx1: token_idx1_forward, |
| token_idx2: (distance_forward - token_idx1_forward as isize) as i32, |
| minimized_lo: true, |
| minimized_hi: false, |
| } |
| } else { |
| Split { |
| token_idx1: token_idx1_backwards, |
| token_idx2: (distance_backwards - token_idx1_backwards as isize) as i32, |
| minimized_lo: false, |
| minimized_hi: true, |
| } |
| } |
| } |
| } |
| |
| #[derive(Debug)] |
| struct Split { |
| token_idx1: i32, |
| token_idx2: i32, |
| minimized_lo: bool, |
| minimized_hi: bool, |
| } |
| |
| /// the mapping performed during preprocessing makes it impossible to directly call |
| /// the `sink` during the diff itself. Instead `file.changed` is set to true for all |
| /// tokens that are changed |
| /// below these arrays are used to call the sink function |
| fn process_changes_with_sink( |
| before: &PreprocessedFile, |
| after: &PreprocessedFile, |
| sink: &mut impl Sink, |
| ) { |
| let before_end = before.is_changed.len() as u32 + before.offset; |
| let after_end = after.is_changed.len() as u32 + after.offset; |
| |
| let mut before = before |
| .is_changed |
| .iter() |
| .enumerate() |
| .map(|(i, removed)| (i as u32 + before.offset, *removed)); |
| |
| let mut after = after |
| .is_changed |
| .iter() |
| .enumerate() |
| .map(|(i, inserted)| (i as u32 + after.offset, *inserted)); |
| |
| let mut next1 = before.next(); |
| let mut next2 = after.next(); |
| |
| while let (Some((before_pos, removed)), Some((after_pos, inserted))) = (next1, next2) { |
| if !(removed | inserted) { |
| next1 = before.next(); |
| next2 = after.next(); |
| continue; |
| } |
| |
| let mut hunk_before = before_pos..before_pos; |
| let mut hunk_after = after_pos..after_pos; |
| if removed { |
| let end = before.find(|(_, changed)| !changed); |
| next1 = end.map(|(end, _)| (end, false)); |
| hunk_before.end = end.map_or(before_end, |(end, _)| end); |
| }; |
| |
| if inserted { |
| let end = after.find(|(_, changed)| !changed); |
| next2 = end.map(|(end, _)| (end, false)); |
| hunk_after.end = end.map_or(after_end, |(end, _)| end); |
| } |
| |
| sink.process_change(hunk_before, hunk_after); |
| } |
| |
| if let Some((before_pos, _)) = next1 { |
| sink.process_change(before_pos..before_end, after_end..after_end); |
| } else if let Some((after_pos, _)) = next2 { |
| sink.process_change(before_end..before_end, after_pos..after_end); |
| } |
| } |