blob: fd780a92e8040fb1929f4b28b563cbc615e38a50 [file] [log] [blame]
use super::Result;
use alloc::vec::Vec;
use core::hash::Hash;
/// A base trait for all distance/similarity algorithms.
///
/// use textdistance::{Algorithm, Hamming};
/// let h = Hamming::default();
/// let res = h.for_str("abc", "acbd");
/// assert!(res.val() == 3);
///
pub trait Algorithm<R> {
/// Calculate distance/similarity for iterators.
///
/// use textdistance::{Algorithm, Hamming};
/// let h = Hamming::default();
/// let res = h.for_iter(1..4, 1..6);
/// assert!(res.val() == 2);
///
fn for_iter<C, E>(&self, s1: C, s2: C) -> Result<R>
where
C: Iterator<Item = E>,
E: Eq + Hash,
{
let s1: Vec<E> = s1.collect();
let s2: Vec<E> = s2.collect();
self.for_vec(&s1, &s2)
}
/// Calculate distance/similarity for vectors.
///
/// use textdistance::{Algorithm, Hamming};
/// let h = Hamming::default();
/// let res = h.for_vec(&vec![1, 2, 3], &vec![1, 3, 2, 4]);
/// assert!(res.val() == 3);
///
fn for_vec<E>(&self, s1: &[E], s2: &[E]) -> Result<R>
where
E: Eq + Hash,
{
self.for_iter(s1.iter(), s2.iter())
}
/// Calculate distance/similarity for strings.
///
/// use textdistance::{Algorithm, Hamming};
/// let h = Hamming::default();
/// let res = h.for_str("abc", "acbd");
/// assert!(res.val() == 3);
///
fn for_str(&self, s1: &str, s2: &str) -> Result<R> {
self.for_iter(s1.chars(), s2.chars())
}
/// Calculate distance/similarity for words in strings.
///
/// use textdistance::{Algorithm, Hamming};
/// let h = Hamming::default();
/// let res = h.for_words("the first edition", "the second edition");
/// assert!(res.val() == 1);
///
fn for_words(&self, s1: &str, s2: &str) -> Result<R> {
self.for_iter(s1.split_whitespace(), s2.split_whitespace())
}
/// Calculate distance/similarity for bigrams in strings.
///
/// use textdistance::{Algorithm, Hamming};
/// let h = Hamming::default();
/// let res = h.for_str("abd", "abcd");
/// assert!(res.val() == 2); // 3 bigrams (ab, bc, cd), only "ab" matches
///
fn for_bigrams(&self, s1: &str, s2: &str) -> Result<R> {
self.for_iter(bigrams(s1), bigrams(s2))
}
}
fn bigrams(s: &str) -> impl Iterator<Item = (char, char)> + '_ {
s.chars().zip(s.chars().skip(1))
}
#[cfg(test)]
mod tests {
use super::Algorithm;
use crate::Hamming;
use assert2::assert;
// use proptest::prelude::*;
use rstest::rstest;
#[rstest]
#[case(vec![], vec![], 0)]
#[case(vec![1], vec![1], 0)]
#[case(vec![1], vec![5], 1)]
#[case(vec![3], vec![5], 1)]
#[case(vec![3, 4, 5, 6], vec![1, 4, 5, 6, 7], 2)]
fn for_vec(#[case] s1: Vec<usize>, #[case] s2: Vec<usize>, #[case] exp: usize) {
let h = Hamming::default();
assert!(h.for_vec(&s1, &s2).val() == exp);
}
#[rstest]
#[case("", "", 0)]
#[case("", "\0", 1)]
#[case("", "abc", 3)]
#[case("abc", "", 3)]
#[case("sitting", "sitting", 0)]
#[case("abcdefg", "hijklmn", 7)]
#[case("karolin", "kathrin", 3)]
#[case("hello", "world", 4)]
fn for_str(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize) {
let h = Hamming::default();
assert!(h.for_str(s1, s2).val() == exp);
}
#[rstest]
#[case("", "", 0)]
#[case("", "\0", 1)]
#[case("", "abc", 1)]
#[case("abc", "", 1)]
#[case("oh hi mark", "oh hi world", 1)]
#[case("oh hi mark", "oh hi mad world", 2)]
#[case("oh hi mark", "greeting you mad world", 4)]
fn for_words(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize) {
let h = Hamming::default();
assert!(h.for_words(s1, s2).val() == exp);
}
#[rstest]
#[case("", "", 0)]
// #[case("", "a", 1)]
#[case("", "abc", 2)]
#[case("abc", "", 2)]
#[case("oh hi mark", "oh ho mark", 2)]
fn for_bigrams(#[case] s1: &str, #[case] s2: &str, #[case] exp: usize) {
let h = Hamming::default();
assert!(h.for_bigrams(s1, s2).val() == exp);
}
}