| use crate::histogram::{Histogram, MAX_CHAIN_LEN}; |
| use crate::intern::Token; |
| |
| pub(super) fn find_lcs( |
| before: &[Token], |
| after: &[Token], |
| histogram: &mut Histogram, |
| ) -> Option<Lcs> { |
| let mut search = |
| LcsSearch { lcs: Lcs::default(), min_occurances: MAX_CHAIN_LEN + 1, found_cs: false }; |
| search.run(before, after, histogram); |
| if search.success() { |
| Some(search.lcs) |
| } else { |
| None |
| } |
| } |
| |
| #[derive(Default, Debug)] |
| pub struct Lcs { |
| pub before_start: u32, |
| pub after_start: u32, |
| pub len: u32, |
| } |
| |
| pub struct LcsSearch { |
| lcs: Lcs, |
| min_occurances: u32, |
| found_cs: bool, |
| } |
| |
| impl LcsSearch { |
| fn run(&mut self, before: &[Token], after: &[Token], histogram: &mut Histogram) { |
| let mut pos = 0; |
| while let Some(&token) = after.get(pos as usize) { |
| if histogram.num_token_occurances(token) != 0 { |
| self.found_cs = true; |
| if histogram.num_token_occurances(token) <= self.min_occurances { |
| pos = self.update_lcs(pos, token, histogram, before, after); |
| continue; |
| } |
| } |
| |
| pos += 1; |
| } |
| |
| histogram.clear(); |
| } |
| |
| fn success(&mut self) -> bool { |
| !self.found_cs || self.min_occurances <= MAX_CHAIN_LEN |
| } |
| |
| fn update_lcs( |
| &mut self, |
| after_pos: u32, |
| token: Token, |
| histogram: &Histogram, |
| before: &[Token], |
| after: &[Token], |
| ) -> u32 { |
| let mut next_token_idx2 = after_pos + 1; |
| let mut occurances_iter = histogram.token_occurances(token).iter().copied(); |
| let mut token_idx1 = occurances_iter.next().unwrap(); |
| |
| 'occurances_iter: loop { |
| let mut occurances = histogram.num_token_occurances(token); |
| let mut start1 = token_idx1; |
| let mut start2 = after_pos; |
| loop { |
| if start1 == 0 || start2 == 0 { |
| break; |
| } |
| let token1 = before.get(start1 as usize - 1); |
| let token2 = after.get(start2 as usize - 1); |
| if matches!((token1, token2), (Some(token1), Some(token2)) if token1 == token2) { |
| start1 -= 1; |
| start2 -= 1; |
| let new_occurances = histogram.num_token_occurances(before[start1 as usize]); |
| occurances = occurances.min(new_occurances); |
| } else { |
| break; |
| } |
| } |
| |
| let mut end1 = token_idx1 + 1; |
| let mut end2 = after_pos + 1; |
| |
| loop { |
| let token1 = before.get(end1 as usize); |
| let token2 = after.get(end2 as usize); |
| if matches!((token1, token2), (Some(token1), Some(token2)) if token1 == token2) { |
| let new_occurances = histogram.num_token_occurances(before[end1 as usize]); |
| occurances = occurances.min(new_occurances); |
| end1 += 1; |
| end2 += 1; |
| } else { |
| break; |
| } |
| } |
| |
| if next_token_idx2 < end2 { |
| next_token_idx2 = end2; |
| } |
| |
| let len = end2 - start2; |
| debug_assert_eq!(len, end1 - start1); |
| if self.lcs.len < len || self.min_occurances > occurances { |
| self.min_occurances = occurances; |
| self.lcs = Lcs { before_start: start1, after_start: start2, len }; |
| } |
| |
| loop { |
| if let Some(next_token_idx) = occurances_iter.next() { |
| if next_token_idx > end2 { |
| token_idx1 = next_token_idx; |
| break; |
| } |
| } else { |
| break 'occurances_iter; |
| } |
| } |
| } |
| |
| next_token_idx2 |
| } |
| } |