| use crate::diff::*; |
| use crate::levenshtein::distance; |
| use std::collections::{BTreeMap, HashSet}; |
| |
| pub type Mapping<'a> = BTreeMap<&'a str, &'a str>; |
| |
| // TODO: Is it better to return a list of matches or a hashmap |
| // It might be better to do former because we may need to distinguise |
| // b/w full and partial match |
| #[derive(Debug, PartialEq)] |
| pub struct Matching<'a> { |
| pub from: &'a str, |
| pub to: &'a str, |
| } |
| |
| impl<'a> Matching<'a> { |
| pub fn new(from: &'a str, to: &'a str) -> Matching<'a> { |
| Matching { from, to } |
| } |
| } |
| |
| #[derive(Debug, PartialEq)] |
| pub enum Match<'a> { |
| Full(Matching<'a>), |
| Partial(Matching<'a>), |
| } |
| // |
| // impl<'a> Match<'a> { |
| // fn is_full(&self) -> bool { |
| // match self { |
| // Match::Full(_) => true, |
| // _ => false |
| // } |
| // } |
| // } |
| |
| /// Matches both graphs and returns the mapping of nodes from g1 to g2 |
| pub fn match_graphs<'a>(d1: &'a DiffGraph<'_>, d2: &'a DiffGraph<'_>) -> Vec<Match<'a>> { |
| let mut mapping: BTreeMap<&str, &str> = get_initial_mapping(d1, d2); |
| |
| // TODO: This mapping may have duplicate mappings, remove them |
| |
| let mut matches: Vec<Match> = mapping |
| .iter() |
| .map(|(from, to)| Match::Full(Matching { from, to })) |
| .collect(); |
| |
| // we use rev adjacency list because we are going to compare the parents |
| let rev_adj_list1 = d1.graph.rev_adj_list(); |
| let rev_adj_list2 = d2.graph.rev_adj_list(); |
| |
| let mut matched_labels_in_2: HashSet<&str> = mapping.iter().map(|(_, v)| *v).collect(); |
| |
| let mut prev_mapping = mapping.clone(); |
| loop { |
| let mut new_mapping: Mapping = BTreeMap::new(); |
| for (k, v) in prev_mapping.iter() { |
| let parents_in_1 = rev_adj_list1.get(k).unwrap(); |
| let parents_in_2 = rev_adj_list2.get(v).unwrap(); |
| |
| for n in parents_in_1.iter() { |
| // don't bother selecting if the node in 1 is already matched |
| // as we use a stricter selection for the initial matching |
| if mapping.contains_key(n) { |
| continue; |
| } |
| if let Some(lab) = select(d1, d2, n, &parents_in_2, false) { |
| // if the matched label is already matched to some node in 1, skip |
| if !matched_labels_in_2.contains(lab) { |
| matched_labels_in_2.insert(lab); |
| new_mapping.insert(n, lab); |
| } |
| } |
| } |
| } |
| // println!("{:?}", new_mapping); |
| // merge |
| let new_matches = get_new_matches(&mapping, &new_mapping); |
| if new_matches.len() == 0 { |
| break; |
| } |
| for mch in new_matches { |
| mapping.insert(mch.from, mch.to); |
| matches.push(Match::Partial(mch)); |
| } |
| prev_mapping = new_mapping; |
| } |
| |
| matches |
| } |
| |
| fn get_initial_mapping<'a>(d1: &'a DiffGraph<'_>, d2: &'a DiffGraph<'_>) -> Mapping<'a> { |
| let mut mapping: BTreeMap<&str, &str> = BTreeMap::new(); |
| let mut reverse_mapping: BTreeMap<&str, &str> = BTreeMap::new(); |
| let g2_labels: Vec<&str> = d2.graph.nodes.iter().map(|n| n.label.as_str()).collect(); |
| |
| // TODO: this can be functional |
| for node in d1.graph.nodes.iter() { |
| if let Some(matched_lab) = select(d1, d2, &node.label, &g2_labels, true) { |
| if let Some(lab_in_1) = reverse_mapping.get(matched_lab) { |
| // matched_lab was already matched |
| // select the one with the lowest cost |
| // this is done so that no duplicate matching will occur |
| let dist_already = dist_bw_nodes(d1, d2, lab_in_1, matched_lab); |
| let dist_new = dist_bw_nodes(d1, d2, &node.label, matched_lab); |
| if dist_new > dist_already { |
| continue; |
| } |
| mapping.remove(lab_in_1); |
| } |
| mapping.insert(&node.label, matched_lab); |
| reverse_mapping.insert(matched_lab, &node.label); |
| } |
| } |
| |
| mapping |
| } |
| |
| fn dist_bw_nodes(d1: &DiffGraph<'_>, d2: &DiffGraph<'_>, n1: &str, n2: &str) -> Option<usize> { |
| let node1 = d1.graph.get_node_by_label(n1).unwrap(); |
| let node2 = d2.graph.get_node_by_label(n2).unwrap(); |
| |
| let tup1 = ( |
| d1.dist_start[n1] as i64, |
| d1.dist_end[n1] as i64, |
| node1.stmts.len() as i64, |
| ); |
| let tup2 = ( |
| d2.dist_start[n2] as i64, |
| d2.dist_end[n2] as i64, |
| node2.stmts.len() as i64, |
| ); |
| |
| let s1 = node1.stmts.join(""); |
| let s2 = node2.stmts.join(""); |
| let dist = distance(&s1, &s2); |
| |
| Some( |
| ((tup1.0 - tup2.0).pow(2) + (tup1.1 - tup2.1).pow(2) + (tup1.2 - tup2.2).pow(2)) as usize |
| + dist, |
| ) |
| } |
| |
| /// Selects the most suitable match for n from L |
| fn select<'a>( |
| d1: &'a DiffGraph<'_>, |
| d2: &'a DiffGraph<'_>, |
| n: &'a str, |
| list_of_labs: &[&'a str], |
| use_text_dist_filter: bool, |
| ) -> Option<&'a str> { |
| let node = d1.graph.get_node_by_label(n).unwrap(); |
| let node_stmt_len = node.stmts.len(); |
| let node_stmts = node.stmts.join(""); |
| list_of_labs |
| .iter() |
| .filter(|lab| { |
| let other_node = d2.graph.get_node_by_label(lab).unwrap(); |
| // filter out nodes that may differ by more than 2 lines |
| (other_node.stmts.len() as i64 - node.stmts.len() as i64).abs() <= 2 |
| }) |
| .filter(|lab| { |
| if !use_text_dist_filter { |
| return true; |
| } |
| let other_node_stmts = d2.graph.get_node_by_label(lab).unwrap().stmts.join(""); |
| // allow bigger basic blocks have more edits |
| // 2 here is arbitary |
| distance(&node_stmts, &other_node_stmts) < 2 * node_stmt_len |
| }) |
| .min_by_key(|x| dist_bw_nodes(d1, d2, n, x)) |
| .map(|x| *x) |
| } |
| |
| fn get_new_matches<'a>(mapping: &Mapping<'a>, new_mapping: &Mapping<'a>) -> Vec<Matching<'a>> { |
| let mut changed = Vec::new(); |
| for (k, v) in new_mapping.iter() { |
| if !mapping.contains_key(k) { |
| changed.push(Matching { from: k, to: v }) |
| } |
| } |
| |
| changed |
| } |