blob: a75545ef7c63cc1fb13d0e83a3b156f5b82a371b [file] [log] [blame]
use crate::diff::*;
use crate::levenshtein::distance;
use std::collections::{BTreeMap, HashSet};
pub type Mapping<'a> = BTreeMap<&'a str, &'a str>;
// TODO: Is it better to return a list of matches or a hashmap
// It might be better to do former because we may need to distinguise
// b/w full and partial match
#[derive(Debug, PartialEq)]
pub struct Matching<'a> {
pub from: &'a str,
pub to: &'a str,
}
impl<'a> Matching<'a> {
pub fn new(from: &'a str, to: &'a str) -> Matching<'a> {
Matching { from, to }
}
}
#[derive(Debug, PartialEq)]
pub enum Match<'a> {
Full(Matching<'a>),
Partial(Matching<'a>),
}
//
// impl<'a> Match<'a> {
// fn is_full(&self) -> bool {
// match self {
// Match::Full(_) => true,
// _ => false
// }
// }
// }
/// Matches both graphs and returns the mapping of nodes from g1 to g2
pub fn match_graphs<'a>(d1: &'a DiffGraph<'_>, d2: &'a DiffGraph<'_>) -> Vec<Match<'a>> {
let mut mapping: BTreeMap<&str, &str> = get_initial_mapping(d1, d2);
// TODO: This mapping may have duplicate mappings, remove them
let mut matches: Vec<Match> = mapping
.iter()
.map(|(from, to)| Match::Full(Matching { from, to }))
.collect();
// we use rev adjacency list because we are going to compare the parents
let rev_adj_list1 = d1.graph.rev_adj_list();
let rev_adj_list2 = d2.graph.rev_adj_list();
let mut matched_labels_in_2: HashSet<&str> = mapping.iter().map(|(_, v)| *v).collect();
let mut prev_mapping = mapping.clone();
loop {
let mut new_mapping: Mapping = BTreeMap::new();
for (k, v) in prev_mapping.iter() {
let parents_in_1 = rev_adj_list1.get(k).unwrap();
let parents_in_2 = rev_adj_list2.get(v).unwrap();
for n in parents_in_1.iter() {
// don't bother selecting if the node in 1 is already matched
// as we use a stricter selection for the initial matching
if mapping.contains_key(n) {
continue;
}
if let Some(lab) = select(d1, d2, n, &parents_in_2, false) {
// if the matched label is already matched to some node in 1, skip
if !matched_labels_in_2.contains(lab) {
matched_labels_in_2.insert(lab);
new_mapping.insert(n, lab);
}
}
}
}
// println!("{:?}", new_mapping);
// merge
let new_matches = get_new_matches(&mapping, &new_mapping);
if new_matches.len() == 0 {
break;
}
for mch in new_matches {
mapping.insert(mch.from, mch.to);
matches.push(Match::Partial(mch));
}
prev_mapping = new_mapping;
}
matches
}
fn get_initial_mapping<'a>(d1: &'a DiffGraph<'_>, d2: &'a DiffGraph<'_>) -> Mapping<'a> {
let mut mapping: BTreeMap<&str, &str> = BTreeMap::new();
let mut reverse_mapping: BTreeMap<&str, &str> = BTreeMap::new();
let g2_labels: Vec<&str> = d2.graph.nodes.iter().map(|n| n.label.as_str()).collect();
// TODO: this can be functional
for node in d1.graph.nodes.iter() {
if let Some(matched_lab) = select(d1, d2, &node.label, &g2_labels, true) {
if let Some(lab_in_1) = reverse_mapping.get(matched_lab) {
// matched_lab was already matched
// select the one with the lowest cost
// this is done so that no duplicate matching will occur
let dist_already = dist_bw_nodes(d1, d2, lab_in_1, matched_lab);
let dist_new = dist_bw_nodes(d1, d2, &node.label, matched_lab);
if dist_new > dist_already {
continue;
}
mapping.remove(lab_in_1);
}
mapping.insert(&node.label, matched_lab);
reverse_mapping.insert(matched_lab, &node.label);
}
}
mapping
}
fn dist_bw_nodes(d1: &DiffGraph<'_>, d2: &DiffGraph<'_>, n1: &str, n2: &str) -> Option<usize> {
let node1 = d1.graph.get_node_by_label(n1).unwrap();
let node2 = d2.graph.get_node_by_label(n2).unwrap();
let tup1 = (
d1.dist_start[n1] as i64,
d1.dist_end[n1] as i64,
node1.stmts.len() as i64,
);
let tup2 = (
d2.dist_start[n2] as i64,
d2.dist_end[n2] as i64,
node2.stmts.len() as i64,
);
let s1 = node1.stmts.join("");
let s2 = node2.stmts.join("");
let dist = distance(&s1, &s2);
Some(
((tup1.0 - tup2.0).pow(2) + (tup1.1 - tup2.1).pow(2) + (tup1.2 - tup2.2).pow(2)) as usize
+ dist,
)
}
/// Selects the most suitable match for n from L
fn select<'a>(
d1: &'a DiffGraph<'_>,
d2: &'a DiffGraph<'_>,
n: &'a str,
list_of_labs: &[&'a str],
use_text_dist_filter: bool,
) -> Option<&'a str> {
let node = d1.graph.get_node_by_label(n).unwrap();
let node_stmt_len = node.stmts.len();
let node_stmts = node.stmts.join("");
list_of_labs
.iter()
.filter(|lab| {
let other_node = d2.graph.get_node_by_label(lab).unwrap();
// filter out nodes that may differ by more than 2 lines
(other_node.stmts.len() as i64 - node.stmts.len() as i64).abs() <= 2
})
.filter(|lab| {
if !use_text_dist_filter {
return true;
}
let other_node_stmts = d2.graph.get_node_by_label(lab).unwrap().stmts.join("");
// allow bigger basic blocks have more edits
// 2 here is arbitary
distance(&node_stmts, &other_node_stmts) < 2 * node_stmt_len
})
.min_by_key(|x| dist_bw_nodes(d1, d2, n, x))
.map(|x| *x)
}
fn get_new_matches<'a>(mapping: &Mapping<'a>, new_mapping: &Mapping<'a>) -> Vec<Matching<'a>> {
let mut changed = Vec::new();
for (k, v) in new_mapping.iter() {
if !mapping.contains_key(k) {
changed.push(Matching { from: k, to: v })
}
}
changed
}