| //! Helper functions providing the default normalized implementation of distance/similarity algorithms for strings. |
| //! |
| //! See also [`textdistance::str`](super::str) for non-normalized distance. |
| use super::*; |
| |
| /// Calculate normalized unrestricted [Damerau-Levenshtein distance][1] for two strings. |
| /// |
| /// A wrapper for [`DamerauLevenshtein`]. |
| /// |
| /// use textdistance::nstr::damerau_levenshtein; |
| /// assert!(damerau_levenshtein("abc", "acbd") == 2./4.); // "bc" swapped and "d" added |
| /// |
| /// [1]: https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance |
| #[cfg(feature = "std")] |
| pub fn damerau_levenshtein(s1: &str, s2: &str) -> f64 { |
| DamerauLevenshtein::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized restricted [Damerau-Levenshtein distance][1] for two strings. |
| /// |
| /// A wrapper for [`DamerauLevenshtein`]. |
| /// |
| /// use textdistance::nstr::damerau_levenshtein; |
| /// assert!(damerau_levenshtein("abc", "acbd") == 2./4.); // "bc" swapped and "d" added |
| /// |
| /// [1]: https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance |
| #[cfg(feature = "std")] |
| pub fn damerau_levenshtein_restricted(s1: &str, s2: &str) -> f64 { |
| let a = DamerauLevenshtein { |
| restricted: true, |
| ..Default::default() |
| }; |
| a.for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized [Hamming distance][1] for two strings. |
| /// |
| /// A wrapper for [`Hamming`]. |
| /// |
| /// use textdistance::nstr::hamming; |
| /// assert!(hamming("abc", "acbd") == 3./4.); // only "a" matches |
| /// |
| /// [1]: https://en.wikipedia.org/wiki/Hamming_distance |
| pub fn hamming(s1: &str, s2: &str) -> f64 { |
| Hamming::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized the length of the [Longest Common SubSequence][1] for two strings. |
| /// |
| /// A wrapper for [`LCSSeq`]. |
| /// |
| /// use textdistance::nstr::lcsseq; |
| /// assert!(lcsseq("abcdef", "xbcegf") == 4./6.); // "bcef" |
| /// |
| /// [1]: https://en.wikipedia.org/wiki/Longest_common_subsequence |
| pub fn lcsseq(s1: &str, s2: &str) -> f64 { |
| LCSSeq::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized the length of the [Longest Common SubString][1] for two strings. |
| /// |
| /// A wrapper for [`LCSStr`]. |
| /// |
| /// use textdistance::nstr::lcsstr; |
| /// assert!(lcsstr("abcdef", "xbcegf") == 2./6.); // "bc" |
| /// |
| /// [1]: https://en.wikipedia.org/wiki/Longest_common_substring |
| pub fn lcsstr(s1: &str, s2: &str) -> f64 { |
| LCSStr::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized [Levenshtein distance][1] for two strings. |
| /// |
| /// A wrapper for [`Levenshtein`]. |
| /// |
| /// use textdistance::nstr::levenshtein; |
| /// assert!(levenshtein("abc", "acbd") == 2./4.); // add "c" at 2 and then swap "c" with "d" at 4 |
| /// |
| /// [1]: https://en.wikipedia.org/wiki/Levenshtein_distance |
| pub fn levenshtein(s1: &str, s2: &str) -> f64 { |
| Levenshtein::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized [Ratcliff-Obershelp normalized similarity][1] for two strings. |
| /// |
| /// A wrapper for [`RatcliffObershelp`]. |
| /// |
| /// use textdistance::nstr::ratcliff_obershelp; |
| /// assert_eq!(ratcliff_obershelp("abc", "acbd"), 0.5714285714285714); |
| /// |
| /// [1]: https://en.wikipedia.org/wiki/Gestalt_pattern_matching |
| pub fn ratcliff_obershelp(s1: &str, s2: &str) -> f64 { |
| RatcliffObershelp::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized [Sift4 distance][1] for two strings using the "simplest" algorithm. |
| /// |
| /// A wrapper for [`Sift4Simple`]. |
| /// |
| /// use textdistance::nstr::sift4_simple; |
| /// assert!(sift4_simple("abc", "acbd") == 2./4.); |
| /// |
| /// [1]: https://siderite.dev/blog/super-fast-and-accurate-string-distance.html |
| pub fn sift4_simple(s1: &str, s2: &str) -> f64 { |
| Sift4Simple::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized [Sift4 distance][1] for two strings using the "common" algorithm. |
| /// |
| /// A wrapper for [`Sift4Common`]. |
| /// |
| /// use textdistance::nstr::sift4_common; |
| /// assert!(sift4_common("abc", "acbd") == 2./4.); |
| /// |
| /// [1]: https://siderite.dev/blog/super-fast-and-accurate-string-distance.html |
| pub fn sift4_common(s1: &str, s2: &str) -> f64 { |
| Sift4Common::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized [Jaro normalized similarity][1] for two strings. |
| /// |
| /// A wrapper for [`Jaro`]. |
| /// |
| /// use textdistance::nstr::jaro; |
| /// assert_eq!(jaro("abc", "acbd"), 0.8055555555555555); |
| /// |
| /// [1]: https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance#Jaro_similarity |
| pub fn jaro(s1: &str, s2: &str) -> f64 { |
| Jaro::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized [Jaro-Winkler normalized similarity][1] for two strings. |
| /// |
| /// A wrapper for [`JaroWinkler`]. |
| /// |
| /// use textdistance::nstr::jaro_winkler; |
| /// assert_eq!(jaro_winkler("abc", "acbd"), 0.825); |
| /// |
| /// [1]: https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance |
| pub fn jaro_winkler(s1: &str, s2: &str) -> f64 { |
| JaroWinkler::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized [Yujian-Bo normalization][1] of [Levenshtein] for two strings. |
| /// |
| /// A wrapper for [`YujianBo`]. |
| /// |
| /// use textdistance::nstr::yujian_bo; |
| /// assert_eq!(yujian_bo("abc", "acbd"), 0.4444444444444444); |
| /// |
| /// [1]: https://ieeexplore.ieee.org/document/4160958 |
| pub fn yujian_bo(s1: &str, s2: &str) -> f64 { |
| YujianBo::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized [MLIPNS normalization][1] of [Hamming] for two strings. |
| /// |
| /// A wrapper for [`MLIPNS`]. |
| /// |
| /// use textdistance::nstr::mlipns; |
| /// assert!(mlipns("abc", "acbd") == 0.); |
| /// |
| /// [1]: https://www.sial.iias.spb.su/files/386-386-1-PB.pdf |
| pub fn mlipns(s1: &str, s2: &str) -> f64 { |
| MLIPNS::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized [Bag distance][1] for two strings. |
| /// |
| /// A wrapper for [`Bag`]. |
| /// |
| /// use textdistance::nstr::bag; |
| /// assert!(bag("abc", "acbd") == 1./4.); |
| /// |
| /// [1]: http://www-db.disi.unibo.it/research/papers/SPIRE02.pdf |
| #[cfg(feature = "std")] |
| pub fn bag(s1: &str, s2: &str) -> f64 { |
| Bag::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized [LIG3 normalization][1] of [Hamming] by [Levenshtein] for two strings. |
| /// |
| /// A wrapper for [`LIG3`]. |
| /// |
| /// use textdistance::nstr::lig3; |
| /// assert_eq!(lig3("abc", "acbd"), 0.5); |
| /// |
| /// [1]: https://github.com/chrislit/abydos/blob/master/abydos/distance/_lig3.py |
| pub fn lig3(s1: &str, s2: &str) -> f64 { |
| LIG3::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized [Jaccard normalized similarity][1] for two strings. |
| /// |
| /// A wrapper for [`Jaccard`]. |
| /// |
| /// use textdistance::nstr::jaccard; |
| /// assert_eq!(jaccard("abc", "acbd"), 0.75); |
| /// |
| /// [1]: https://en.wikipedia.org/wiki/Jaccard_index |
| #[cfg(feature = "std")] |
| pub fn jaccard(s1: &str, s2: &str) -> f64 { |
| Jaccard::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized [Sørensen–Dice normalized similarity][1] for two strings. |
| /// |
| /// A wrapper for [`SorensenDice`]. |
| /// |
| /// use textdistance::nstr::sorensen_dice; |
| /// assert_eq!(sorensen_dice("abc", "acbd"), 0.8571428571428571); |
| /// |
| /// [1]:https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient |
| #[cfg(feature = "std")] |
| pub fn sorensen_dice(s1: &str, s2: &str) -> f64 { |
| SorensenDice::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized [Tversky normalized similarity][1] for two strings. |
| /// |
| /// A wrapper for [`Tversky`]. |
| /// |
| /// use textdistance::nstr::tversky; |
| /// assert_eq!(tversky("abc", "acbd"), 0.75); |
| /// |
| /// [1]: https://en.wikipedia.org/wiki/Tversky_index |
| #[cfg(feature = "std")] |
| pub fn tversky(s1: &str, s2: &str) -> f64 { |
| Tversky::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized [Overlap normalized similarity][1] for two strings. |
| /// |
| /// A wrapper for [`Overlap`]. |
| /// |
| /// use textdistance::nstr::overlap; |
| /// assert_eq!(overlap("abc", "acbd"), 1.0); |
| /// |
| /// [1]: https://en.wikipedia.org/wiki/Overlap_coefficient |
| #[cfg(feature = "std")] |
| pub fn overlap(s1: &str, s2: &str) -> f64 { |
| Overlap::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized [Cosine normalized similarity][1] for two strings. |
| /// |
| /// A wrapper for [`Cosine`]. |
| /// |
| /// use textdistance::nstr::cosine; |
| /// assert_eq!(cosine("abc", "acbd"), 0.8660254037844387); |
| /// |
| /// [1]: https://en.wikipedia.org/wiki/Cosine_similarity |
| #[cfg(feature = "std")] |
| pub fn cosine(s1: &str, s2: &str) -> f64 { |
| Cosine::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized common prefix length for two strings. |
| /// |
| /// A wrapper for [`Prefix`]. |
| /// |
| /// use textdistance::nstr::prefix; |
| /// assert!(prefix("abc", "acbd") == 1./4.); // "a" |
| /// |
| pub fn prefix(s1: &str, s2: &str) -> f64 { |
| Prefix::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized common suffix length for two strings. |
| /// |
| /// A wrapper for [`Suffix`]. |
| /// |
| /// use textdistance::nstr::suffix; |
| /// assert!(suffix("abcd", "axcd") == 2./4.); // "cd" |
| /// |
| pub fn suffix(s1: &str, s2: &str) -> f64 { |
| Suffix::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized length distance for two strings. |
| /// |
| /// A wrapper for [`Length`]. |
| /// |
| /// use textdistance::nstr::length; |
| /// assert!(length("abcd", "axc") == (4. - 3.) / 4.); |
| /// |
| pub fn length(s1: &str, s2: &str) -> f64 { |
| Length::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized [Smith-Waterman similarity] for two strings. |
| /// |
| /// A wrapper for [`SmithWaterman`]. |
| /// |
| /// use textdistance::nstr::smith_waterman; |
| /// assert!(smith_waterman("abc", "acbd") == 1./4.); |
| /// |
| /// [Smith-Waterman similarity]: https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm |
| pub fn smith_waterman(s1: &str, s2: &str) -> f64 { |
| SmithWaterman::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized [Entropy]-based [normalized compression distance][1] for two strings. |
| /// |
| /// A wrapper for [`EntropyNCD`]. |
| /// |
| /// use textdistance::nstr::entropy_ncd; |
| /// assert_eq!(entropy_ncd("abc", "acbd"), 0.12174985473119697); |
| /// |
| /// [1]: https://en.wikipedia.org/wiki/Normalized_compression_distance |
| /// [Entropy]: https://en.wikipedia.org/wiki/Entropy_(information_theory) |
| #[cfg(feature = "std")] |
| pub fn entropy_ncd(s1: &str, s2: &str) -> f64 { |
| EntropyNCD::default().for_str(s1, s2).nval() |
| } |
| |
| /// Calculate normalized [Roberts similarity] for two strings. |
| /// |
| /// A wrapper for [`Roberts`]. |
| /// |
| /// use textdistance::nstr::roberts; |
| /// assert_eq!(roberts("abc", "acbd"), 0.8571428571428571); |
| /// |
| /// [Roberts similarity]: https://github.com/chrislit/abydos/blob/master/abydos/distance/_roberts.py |
| #[cfg(feature = "std")] |
| pub fn roberts(s1: &str, s2: &str) -> f64 { |
| Roberts::default().for_str(s1, s2).nval() |
| } |