blob: a9d2c30afd02bb5b5ab672ecef2efaf6d303cdc6 [file] [log] [blame] [edit]
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::borrow::ToOwned;
use std::collections::hash_map::{Entry, HashMap};
use tendril::StrTendril;
fn index_words_string(input: &String) -> HashMap<char, Vec<String>> {
let mut index = HashMap::new();
for word in input.split(|c| c == ' ') {
if word.len() == 0 {
continue;
}
let word = word.to_owned();
match index.entry(word.chars().next().unwrap()) {
Entry::Occupied(mut e) => {
let x: &mut Vec<String> = e.get_mut();
x.push(word);
}
Entry::Vacant(e) => {
e.insert(vec![word]);
}
}
}
index
}
fn index_words_tendril(input: &StrTendril) -> HashMap<char, Vec<StrTendril>> {
let mut index = HashMap::new();
let mut t = input.clone();
loop {
match t.pop_front_char_run(|c| c != ' ') {
None => return index,
Some((_, false)) => (),
Some((word, true)) => match index.entry(word.chars().next().unwrap()) {
Entry::Occupied(mut e) => {
e.get_mut().push(word);
}
Entry::Vacant(e) => {
e.insert(vec![word]);
}
},
}
}
}
static EN_1: &'static str = "Days turn to nights turn to paper into rocks into plastic";
static EN_2: &'static str =
"Here the notes in my laboratory journal cease. I was able to write the last \
words only with great effort. By now it was already clear to me that LSD had \
been the cause of the remarkable experience of the previous Friday, for the \
altered perceptions were of the same type as before, only much more intense. I \
had to struggle to speak intelligibly. I asked my laboratory assistant, who was \
informed of the self-experiment, to escort me home. We went by bicycle, no \
automobile being available because of wartime restrictions on their use. On the \
way home, my condition began to assume threatening forms. Everything in my \
field of vision wavered and was distorted as if seen in a curved mirror. I also \
had the sensation of being unable to move from the spot. Nevertheless, my \
assistant later told me that we had traveled very rapidly. Finally, we arrived \
at home safe and sound, and I was just barely capable of asking my companion to \
summon our family doctor and request milk from the neighbors.\n\n\
In spite of my delirious, bewildered condition, I had brief periods of clear \
and effective thinkingand chose milk as a nonspecific antidote for poisoning.";
static KR_1: &'static str =
"러스트(Rust)는 모질라(mozilla.org)에서 개발하고 있는, 메모리-안전하고 병렬 \
프로그래밍이 쉬운 차세대 프로그래밍 언어입니다. 아직 \
개발 단계이며 많은 기능이 구현 중으로, MIT/Apache2 라이선스로 배포됩니다.";
static HTML_KR_1: &'static str =
"<p>러스트(<a href=\"http://rust-lang.org\">Rust</a>)는 모질라(<a href=\"\
https://www.mozilla.org/\">mozilla.org</a>)에서 개발하고 있는, \
메모리-안전하고 병렬 프로그래밍이 쉬운 차세대 프로그래밍 언어입니다. \
아직 개발 단계이며 많은 기능이 구현 중으로, MIT/Apache2 라이선스로 배포됩니다.</p>";
mod index_words {
macro_rules! bench {
($txt:ident) => {
#[allow(non_snake_case)]
mod $txt {
const SMALL_SIZE: usize = 65536;
const LARGE_SIZE: usize = (1 << 20);
#[bench]
fn index_words_string(b: &mut ::test::Bencher) {
let mut s = String::new();
while s.len() < SMALL_SIZE {
s.push_str(::tendril::bench::$txt);
}
b.iter(|| ::tendril::bench::index_words_string(&s));
}
#[bench]
fn index_words_tendril(b: &mut ::test::Bencher) {
let mut t = ::tendril::StrTendril::new();
while t.len() < SMALL_SIZE {
t.push_slice(::tendril::bench::$txt);
}
b.iter(|| ::tendril::bench::index_words_tendril(&t));
}
#[bench]
fn index_words_big_string(b: &mut ::test::Bencher) {
let mut s = String::new();
while s.len() < LARGE_SIZE {
s.push_str(::tendril::bench::$txt);
}
b.iter(|| ::tendril::bench::index_words_string(&s));
}
#[bench]
fn index_words_big_tendril(b: &mut ::test::Bencher) {
let mut t = ::tendril::StrTendril::new();
while t.len() < LARGE_SIZE {
t.push_slice(::tendril::bench::$txt);
}
b.iter(|| ::tendril::bench::index_words_tendril(&t));
}
#[test]
fn correctness() {
use std::borrow::ToOwned;
use tendril::bench::{index_words_string, index_words_tendril};
use tendril::SliceExt;
let txt = ::tendril::bench::$txt;
let input_string = txt.to_owned();
let count_s = index_words_string(&input_string);
let mut keys: Vec<char> = count_s.keys().cloned().collect();
keys.sort();
let input_tendril = txt.to_tendril();
let count_t = index_words_tendril(&input_tendril);
let mut keys_t: Vec<char> = count_t.keys().cloned().collect();
keys_t.sort();
assert_eq!(keys, keys_t);
for k in &keys {
let vs = &count_s[k];
let vt = &count_t[k];
assert_eq!(vs.len(), vt.len());
assert!(vs.iter().zip(vt.iter()).all(|(s, t)| **s == **t));
}
}
}
};
}
bench!(EN_1);
bench!(EN_2);
bench!(KR_1);
bench!(HTML_KR_1);
}