blob: fc57edc77d9b605e5b736bec66189965ba55a5a8 [file] [log] [blame] [edit]
//! Common functions for [Longest common subsequences](https://en.wikipedia.org/wiki/Longest_common_subsequence_problem)
//! on slice.
cfg_prettytable! {
use crate::format_table;
use prettytable::{Cell, Row};
}
use std::cmp::max;
#[derive(Debug)]
pub struct Table<'a, T: 'a> {
x: &'a [T],
y: &'a [T],
table: Vec<Vec<usize>>,
}
/// Implements Longest Common Subsequences Table
/// Memory requirement: O(N^2)
///
/// Based on [Wikipedia article](https://en.wikipedia.org/wiki/Longest_common_subsequence_problem)
impl<'a, T> Table<'a, T>
where
T: PartialEq,
{
/// Creates new table for search common subsequences in x and y
pub fn new(x: &'a [T], y: &'a [T]) -> Table<'a, T> {
let x_len = x.len() + 1;
let y_len = y.len() + 1;
let mut table = vec![vec![0; y_len]; x_len];
for i in 1..x_len {
for j in 1..y_len {
table[i][j] = if x[i - 1] == y[j - 1] {
table[i - 1][j - 1] + 1
} else {
max(table[i][j - 1], table[i - 1][j])
};
}
}
Table { x, y, table }
}
fn seq_iter(&self) -> TableIter<T> {
TableIter {
x: self.x.len(),
y: self.y.len(),
table: self,
}
}
fn get_match(&self, x: usize, y: usize, len: usize) -> Match<T> {
Match {
x,
y,
len,
table: self,
}
}
/// Returns matches between X and Y
pub fn matches(&self) -> Vec<Match<T>> {
let mut matches: Vec<Match<T>> = Vec::new();
for (x, y) in self.seq_iter() {
if let Some(last) = matches.last_mut() {
if last.x == x + 1 && last.y == y + 1 {
last.x = x;
last.y = y;
last.len += 1;
continue;
}
}
matches.push(self.get_match(x, y, 1));
}
matches.reverse();
matches
}
/// Returns matches between X and Y with zero-len match at the end
pub fn matches_zero(&self) -> Vec<Match<T>> {
let mut matches = self.matches();
matches.push(self.get_match(self.x.len(), self.y.len(), 0));
matches
}
/// Find longest sequence
pub fn longest_seq(&self) -> Vec<&T> {
self.matches();
let mut common: Vec<_> = self.seq_iter().map(|(x, _y)| &self.x[x]).collect();
common.reverse();
common
}
}
#[cfg(feature = "prettytable-rs")]
/// Prints pretty-table for LCS
impl<'a, T> std::fmt::Display for Table<'a, T>
where
T: std::fmt::Display,
{
fn fmt(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
let mut table = format_table::new();
let mut header = vec!["".to_string(), "Ø".to_string()];
for i in self.x {
header.push(format!("{}", i));
}
table.set_titles(Row::new(
header.into_iter().map(|i| Cell::new(&i)).collect(),
));
for j in 0..=self.y.len() {
let mut row = vec![if j == 0 {
"Ø".to_string()
} else {
format!("{}", self.y[j - 1])
}];
for i in 0..=self.x.len() {
row.push(format!("{}", self.table[i][j]));
}
table.add_row(row.into_iter().map(|i| Cell::new(&i)).collect());
}
write!(formatter, "\n{}", table)
}
}
struct TableIter<'a, T: 'a> {
x: usize,
y: usize,
table: &'a Table<'a, T>,
}
impl<'a, T> Iterator for TableIter<'a, T> {
type Item = (usize, usize);
fn next(&mut self) -> Option<Self::Item> {
let table = &self.table.table;
while self.x != 0 && self.y != 0 {
let cur = table[self.x][self.y];
if cur == table[self.x - 1][self.y] {
self.x -= 1;
continue;
}
self.y -= 1;
if cur == table[self.x][self.y] {
continue;
}
self.x -= 1;
return Some((self.x, self.y));
}
None
}
}
pub struct Match<'a, T: 'a> {
pub x: usize,
pub y: usize,
pub len: usize,
table: &'a Table<'a, T>,
}
impl<'a, T> Match<'a, T> {
/// Returns matched sequence
pub fn seq(&self) -> &[T] {
&self.table.x[self.x..(self.x + self.len)]
}
}
#[test]
fn test_table() {
let x = vec!["A", "G", "C", "A", "T"];
let y = vec!["G", "A", "C"];
let table = Table::new(&x, &y);
assert_eq!(
format!("{}", table),
r#"
┌───┬───┬───┬───┬───┬───┬───┐
Ø A G C A T
├───┼───┼───┼───┼───┼───┼───┤
Ø 0 0 0 0 0 0
├───┼───┼───┼───┼───┼───┼───┤
G 0 0 1 1 1 1
├───┼───┼───┼───┼───┼───┼───┤
A 0 1 1 1 2 2
├───┼───┼───┼───┼───┼───┼───┤
C 0 1 1 2 2 2
└───┴───┴───┴───┴───┴───┴───┘
"#
);
assert_eq!(table.longest_seq(), vec![&"A", &"C"]);
}
#[test]
fn test_table_match() {
let test_v = vec![
(
"The quick brown fox jumps over the lazy dog",
"The quick brown dog leaps over the lazy cat",
"The quick brown o ps over the lazy ",
vec!["The quick brown ", "o", " ", "ps over the lazy "],
),
("ab:c", "ba:b:c", "ab:c", vec!["a", "b:c"]),
(
"The red brown fox jumped over the rolling log",
"The brown spotted fox leaped over the rolling log",
"The brown fox ped over the rolling log",
vec!["The ", "brown ", "fox ", "ped over the rolling log"],
),
];
for (x_str, y_str, exp_str, match_exp) in test_v {
let x: Vec<_> = x_str.split("").collect();
let y: Vec<_> = y_str.split("").collect();
// Trim empty elements
let table = Table::new(&x[1..(x.len() - 1)], &y[1..(y.len() - 1)]);
let seq = table
.longest_seq()
.iter()
.map(|i| i.to_string())
.collect::<Vec<String>>()
.join("");
assert_eq!(seq, exp_str);
let matches: Vec<_> = table.matches().iter().map(|m| m.seq().join("")).collect();
assert_eq!(matches, match_exp);
}
}