| // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT |
| // file at the top-level directory of this distribution and at |
| // http://rust-lang.org/COPYRIGHT. |
| // |
| // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| // option. This file may not be copied, modified, or distributed |
| // except according to those terms. |
| |
| use std::{ |
| fs::File, |
| io::{BufRead, BufReader}, |
| }; |
| |
| use unicode_width::{UnicodeWidthChar, UnicodeWidthStr}; |
| |
| #[test] |
| fn test_str() { |
| assert_eq!("hello".width(), 10); |
| assert_eq!("hello".width_cjk(), 10); |
| assert_eq!("\0\0\0\x01\x01".width(), 5); |
| assert_eq!("\0\0\0\x01\x01".width_cjk(), 5); |
| assert_eq!("".width(), 0); |
| assert_eq!("".width_cjk(), 0); |
| assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(), 4); |
| assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width_cjk(), 8); |
| } |
| |
| #[test] |
| fn test_emoji() { |
| // Example from the README. |
| assert_eq!("👩".width(), 2); // Woman |
| assert_eq!("🔬".width(), 2); // Microscope |
| assert_eq!("👩🔬".width(), 4); // Woman scientist |
| } |
| |
| #[test] |
| fn test_char() { |
| assert_eq!('h'.width(), Some(2)); |
| assert_eq!('h'.width_cjk(), Some(2)); |
| assert_eq!('\x00'.width(), None); |
| assert_eq!('\x00'.width_cjk(), None); |
| assert_eq!('\x01'.width(), None); |
| assert_eq!('\x01'.width_cjk(), None); |
| assert_eq!('\u{2081}'.width(), Some(1)); |
| assert_eq!('\u{2081}'.width_cjk(), Some(2)); |
| } |
| |
| #[test] |
| fn test_char2() { |
| assert_eq!('\x0A'.width(), None); |
| assert_eq!('\x0A'.width_cjk(), None); |
| |
| assert_eq!('w'.width(), Some(1)); |
| assert_eq!('w'.width_cjk(), Some(1)); |
| |
| assert_eq!('h'.width(), Some(2)); |
| assert_eq!('h'.width_cjk(), Some(2)); |
| |
| assert_eq!('\u{AD}'.width(), Some(0)); |
| assert_eq!('\u{AD}'.width_cjk(), Some(0)); |
| |
| assert_eq!('\u{1160}'.width(), Some(0)); |
| assert_eq!('\u{1160}'.width_cjk(), Some(0)); |
| |
| assert_eq!('\u{a1}'.width(), Some(1)); |
| assert_eq!('\u{a1}'.width_cjk(), Some(2)); |
| |
| assert_eq!('\u{300}'.width(), Some(0)); |
| assert_eq!('\u{300}'.width_cjk(), Some(0)); |
| } |
| |
| #[test] |
| fn unicode_12() { |
| assert_eq!('\u{1F971}'.width(), Some(2)); |
| } |
| |
| #[test] |
| fn test_default_ignorable() { |
| assert_eq!('\u{E0000}'.width(), Some(0)); |
| |
| assert_eq!('\u{1160}'.width(), Some(0)); |
| assert_eq!('\u{3164}'.width(), Some(0)); |
| assert_eq!('\u{FFA0}'.width(), Some(0)); |
| } |
| |
| #[test] |
| fn test_jamo() { |
| assert_eq!('\u{1100}'.width(), Some(2)); |
| assert_eq!('\u{A97C}'.width(), Some(2)); |
| // Special case: U+115F HANGUL CHOSEONG FILLER |
| assert_eq!('\u{115F}'.width(), Some(2)); |
| assert_eq!('\u{1160}'.width(), Some(0)); |
| assert_eq!('\u{D7C6}'.width(), Some(0)); |
| assert_eq!('\u{11A8}'.width(), Some(0)); |
| assert_eq!('\u{D7FB}'.width(), Some(0)); |
| } |
| |
| #[test] |
| fn test_prepended_concatenation_marks() { |
| for c in [ |
| '\u{0600}', |
| '\u{0601}', |
| '\u{0602}', |
| '\u{0603}', |
| '\u{0604}', |
| '\u{06DD}', |
| '\u{110BD}', |
| '\u{110CD}', |
| ] { |
| assert_eq!(c.width(), Some(1), "{c:?} should have width 1"); |
| } |
| |
| for c in ['\u{0605}', '\u{070F}', '\u{0890}', '\u{0891}', '\u{08E2}'] { |
| assert_eq!(c.width(), Some(0), "{c:?} should have width 0"); |
| } |
| } |
| |
| #[test] |
| fn test_interlinear_annotation_chars() { |
| assert_eq!('\u{FFF9}'.width(), Some(1)); |
| assert_eq!('\u{FFFA}'.width(), Some(1)); |
| assert_eq!('\u{FFFB}'.width(), Some(1)); |
| } |
| |
| #[test] |
| fn test_hieroglyph_format_controls() { |
| assert_eq!('\u{13430}'.width(), Some(1)); |
| assert_eq!('\u{13436}'.width(), Some(1)); |
| assert_eq!('\u{1343C}'.width(), Some(1)); |
| } |
| |
| #[test] |
| fn test_marks() { |
| // Nonspacing marks have 0 width |
| assert_eq!('\u{0301}'.width(), Some(0)); |
| // Enclosing marks have 0 width |
| assert_eq!('\u{20DD}'.width(), Some(0)); |
| // Some spacing marks have width 1 |
| assert_eq!('\u{09CB}'.width(), Some(1)); |
| // But others have width 0 |
| assert_eq!('\u{09BE}'.width(), Some(0)); |
| } |
| |
| #[test] |
| fn test_devanagari_caret() { |
| assert_eq!('\u{A8FA}'.width(), Some(0)); |
| } |
| |
| #[test] |
| fn test_canonical_equivalence() { |
| let norm_file = BufReader::new( |
| File::open("tests/NormalizationTest.txt") |
| .expect("run `unicode.py` first to download `NormalizationTest.txt`"), |
| ); |
| for line in norm_file.lines() { |
| let line = line.unwrap(); |
| if line.is_empty() || line.starts_with('#') || line.starts_with('@') { |
| continue; |
| } |
| |
| let mut forms_iter = line.split(';').map(|substr| -> String { |
| substr |
| .split(' ') |
| .map(|s| char::try_from(u32::from_str_radix(s, 16).unwrap()).unwrap()) |
| .collect() |
| }); |
| |
| let orig = forms_iter.next().unwrap(); |
| let nfc = forms_iter.next().unwrap(); |
| let nfd = forms_iter.next().unwrap(); |
| let nfkc = forms_iter.next().unwrap(); |
| let nfkd = forms_iter.next().unwrap(); |
| |
| assert_eq!( |
| orig.width(), |
| nfc.width(), |
| "width of X == {orig:?} differs from toNFC(X) == {nfc:?}" |
| ); |
| |
| assert_eq!( |
| orig.width(), |
| nfd.width(), |
| "width of X == {orig:?} differs from toNFD(X) == {nfd:?}" |
| ); |
| |
| assert_eq!( |
| nfkc.width(), |
| nfkd.width(), |
| "width of toNFKC(X) == {nfkc:?} differs from toNFKD(X) == {nfkd:?}" |
| ); |
| |
| assert_eq!( |
| orig.width_cjk(), |
| nfc.width_cjk(), |
| "CJK width of X == {orig:?} differs from toNFC(X) == {nfc:?}" |
| ); |
| |
| assert_eq!( |
| orig.width_cjk(), |
| nfd.width_cjk(), |
| "CJK width of X == {orig:?} differs from toNFD(X) == {nfd:?}" |
| ); |
| |
| assert_eq!( |
| nfkc.width_cjk(), |
| nfkd.width_cjk(), |
| "CJK width of toNFKC(X) == {nfkc:?} differs from toNFKD(X) == {nfkd:?}" |
| ); |
| } |
| } |
| |
| #[test] |
| fn test_emoji_presentation() { |
| assert_eq!('\u{0023}'.width(), Some(1)); |
| assert_eq!('\u{FE0F}'.width(), Some(0)); |
| assert_eq!(UnicodeWidthStr::width("\u{0023}\u{FE0F}"), 2); |
| assert_eq!(UnicodeWidthStr::width("a\u{0023}\u{FE0F}a"), 4); |
| assert_eq!(UnicodeWidthStr::width("\u{0023}a\u{FE0F}"), 2); |
| assert_eq!(UnicodeWidthStr::width("a\u{FE0F}"), 1); |
| assert_eq!(UnicodeWidthStr::width("\u{0023}\u{0023}\u{FE0F}a"), 4); |
| |
| assert_eq!(UnicodeWidthStr::width("\u{002A}\u{FE0F}"), 2); |
| assert_eq!(UnicodeWidthStr::width("\u{23F9}\u{FE0F}"), 2); |
| assert_eq!(UnicodeWidthStr::width("\u{24C2}\u{FE0F}"), 2); |
| assert_eq!(UnicodeWidthStr::width("\u{1F6F3}\u{FE0F}"), 2); |
| assert_eq!(UnicodeWidthStr::width("\u{1F700}\u{FE0F}"), 1); |
| } |
| |
| #[test] |
| fn test_text_presentation() { |
| assert_eq!('\u{FE0E}'.width(), Some(0)); |
| |
| assert_eq!('\u{2648}'.width(), Some(2)); |
| assert_eq!("\u{2648}\u{FE0E}".width(), 1); |
| assert_eq!("\u{2648}\u{FE0E}".width_cjk(), 2); |
| |
| assert_eq!("\u{1F21A}\u{FE0E}".width(), 2); |
| assert_eq!("\u{1F21A}\u{FE0E}".width_cjk(), 2); |
| |
| assert_eq!("\u{0301}\u{FE0E}".width(), 0); |
| assert_eq!("\u{0301}\u{FE0E}".width_cjk(), 0); |
| |
| assert_eq!("a\u{FE0E}".width(), 1); |
| assert_eq!("a\u{FE0E}".width_cjk(), 1); |
| |
| assert_eq!("𘀀\u{FE0E}".width(), 2); |
| assert_eq!("𘀀\u{FE0E}".width_cjk(), 2); |
| } |
| |
| #[test] |
| fn test_control_line_break() { |
| assert_eq!('\u{2028}'.width(), Some(1)); |
| assert_eq!('\u{2029}'.width(), Some(1)); |
| assert_eq!("\r".width(), 1); |
| assert_eq!("\n".width(), 1); |
| assert_eq!("\r\n".width(), 1); |
| assert_eq!("\0".width(), 1); |
| assert_eq!("1\t2\r\n3\u{85}4".width(), 7); |
| } |
| |
| #[test] |
| fn char_str_consistent() { |
| let mut s = String::with_capacity(4); |
| for c in '\0'..=char::MAX { |
| s.clear(); |
| s.push(c); |
| assert_eq!(c.width().unwrap_or(1), s.width()) |
| } |
| } |
| |
| #[test] |
| fn test_lisu_tones() { |
| for c in '\u{A4F8}'..='\u{A4FD}' { |
| assert_eq!(c.width(), Some(1)); |
| assert_eq!(String::from(c).width(), 1); |
| } |
| for c1 in '\u{A4F8}'..='\u{A4FD}' { |
| for c2 in '\u{A4F8}'..='\u{A4FD}' { |
| let mut s = String::with_capacity(8); |
| s.push(c1); |
| s.push(c2); |
| match (c1, c2) { |
| ('\u{A4F8}'..='\u{A4FB}', '\u{A4FC}'..='\u{A4FD}') => assert_eq!(s.width(), 1), |
| _ => assert_eq!(s.width(), 2), |
| } |
| } |
| } |
| |
| assert_eq!("ꓪꓹ".width(), 2); |
| assert_eq!("ꓪꓹꓼ".width(), 2); |
| assert_eq!("ꓪꓹꓹ".width(), 3); |
| assert_eq!("ꓪꓼꓼ".width(), 3); |
| } |