| use std::convert::Infallible; |
| use std::path::PathBuf; |
| |
| use criterion::measurement::Measurement; |
| use criterion::{ |
| black_box, criterion_group, criterion_main, BenchmarkGroup, BenchmarkId, Criterion, |
| }; |
| use git_repository::object::tree::diff::{Action, Change}; |
| use git_repository::Id; |
| use imara_diff::intern::InternedInput; |
| use imara_diff::sink::Counter; |
| use imara_diff::Algorithm; |
| |
| fn extract_diff(change: &Change) -> Option<(Vec<u8>, Vec<u8>)> { |
| use git_repository::object::tree::diff::change::Event::Modification; |
| |
| let (previous_id, id) = match change.event { |
| Modification { previous_entry_mode, previous_id, entry_mode, id } |
| if previous_entry_mode.is_blob() && entry_mode.is_blob() => |
| { |
| (previous_id, id) |
| } |
| _ => return None, |
| }; |
| |
| let old = previous_id.object().ok()?.detach().data; |
| let new = id.object().ok()?.detach().data; |
| |
| Some((new, old)) |
| } |
| |
| fn git_tree_diff(from: Id, to: Id, diffs: &mut Vec<(Vec<u8>, Vec<u8>, usize)>) { |
| let from_tree = from.object().unwrap().peel_to_tree().unwrap(); |
| let to_tree = to.object().unwrap().peel_to_tree().unwrap(); |
| from_tree |
| .changes() |
| .track_filename() |
| .for_each_to_obtain_tree(&to_tree, |change| -> Result<_, Infallible> { |
| if let Some((old, new)) = extract_diff(&change) { |
| let input = InternedInput::new(&*old, &*new); |
| let changes = |
| imara_diff::diff(Algorithm::Myers, &input, Counter::default()).total(); |
| let complexity = changes * (old.len() + new.len()); |
| diffs.push((old, new, complexity)); |
| } |
| Ok(Action::Continue) |
| }) |
| .unwrap(); |
| } |
| |
| pub fn project_root() -> PathBuf { |
| let dir = env!("CARGO_MANIFEST_DIR"); |
| let mut res = PathBuf::from(dir); |
| while !res.join("README.md").exists() { |
| res = res.parent().expect("reached fs root without finding project root").to_owned() |
| } |
| res |
| } |
| |
| fn bench_repo(c: &mut Criterion, name: &str, tag2: &str, tag1: &str, num_commits: usize) { |
| let path = project_root().join("bench_data").join("repos").join(name); |
| let repo = git_repository::open(path).unwrap(); |
| let tag1 = repo.find_reference(tag1).unwrap().into_fully_peeled_id().unwrap(); |
| let tag2 = repo.find_reference(tag2).unwrap().into_fully_peeled_id().unwrap(); |
| let mut diffs = Vec::new(); |
| git_tree_diff(tag1, tag2, &mut diffs); |
| let mut last_commit = tag2; |
| tag2.object() |
| .unwrap() |
| .into_commit() |
| .ancestors() |
| .all() |
| .unwrap() |
| .take(num_commits as usize) |
| .for_each(|parent| { |
| let parent = parent.unwrap(); |
| git_tree_diff(last_commit, parent, &mut diffs); |
| last_commit = parent; |
| }); |
| diffs.sort_unstable_by_key(|(_, _, complexity)| *complexity); |
| |
| if std::env::var("PLOT").is_ok() { |
| let mut group = c.benchmark_group(format!("{name}_plot")); |
| group.sample_size(15); |
| bench_file_diffs(group, &diffs, 12, true); |
| } else { |
| bench_file_diffs(c.benchmark_group(name), &diffs, 2, false); |
| } |
| } |
| |
| fn bench_file_diffs<M: Measurement>( |
| mut group: BenchmarkGroup<M>, |
| files: &[(Vec<u8>, Vec<u8>, usize)], |
| num_chunks: usize, |
| compare_to_similar: bool, |
| ) { |
| let mut run = |name, f: fn(&[u8], &[u8]) -> usize| { |
| let mut i = 0; |
| for chunk in files.chunks((files.len() + num_chunks - 1) / num_chunks) { |
| let mut average_complexity: usize = chunk.iter().map(|(_, _, it)| *it).sum(); |
| average_complexity /= chunk.len(); |
| println!("benchmarking {i}..{}/{}", i + chunk.len(), files.len()); |
| i += chunk.len(); |
| group.bench_function( |
| BenchmarkId::new(name, format!("{average_complexity}::{}", chunk.len())), |
| |b| { |
| b.iter(|| { |
| for (old, new, _) in chunk { |
| // myers algorithm is O(ND) where D is the length of the (minimal) edit script and N the sum of file lengths |
| // we use that as an x axis to find a meaningful way to plot a |
| black_box(f(old, new)); |
| } |
| }); |
| }, |
| ); |
| } |
| }; |
| |
| run("imara_diff-histogram", |file1, file2| { |
| let input = InternedInput::new(file1, file2); |
| imara_diff::diff(Algorithm::Histogram, &input, Counter::default()).total() |
| }); |
| |
| run("imara_diff-myers", |file1, file2| { |
| let input = InternedInput::new(file1, file2); |
| imara_diff::diff(Algorithm::Myers, &input, Counter::default()).total() |
| }); |
| |
| if compare_to_similar { |
| run("similar", |file1, file2| { |
| let diff = similar::utils::diff_lines(similar::Algorithm::Myers, file1, file2); |
| diff.len() |
| }); |
| } |
| |
| group.finish(); |
| } |
| |
| fn rust(c: &mut Criterion) { |
| bench_repo(c, "rust", "1.64.0", "1.50.0", 30); |
| } |
| |
| fn vscode(c: &mut Criterion) { |
| bench_repo(c, "vscode", "1.72.2", "1.41.0", 30); |
| } |
| |
| fn linux(c: &mut Criterion) { |
| bench_repo(c, "linux", "v6.0", "v5.7", 30); |
| } |
| |
| fn helix(c: &mut Criterion) { |
| bench_repo(c, "helix", "22.08.1", "v0.5.0", 30); |
| } |
| |
| criterion_group!(realworld_repos, helix, rust, vscode, linux); |
| criterion_main!(realworld_repos); |