blob: 0535b71488b37c129f77bfcc46b72bf5c5e312fb [file] [log] [blame]
use std::borrow::Cow;
use snowball::Among;
#[derive(Debug, Clone)]
pub struct SnowballEnv<'a> {
pub current: Cow<'a, str>,
pub cursor: usize,
pub limit: usize,
pub limit_backward: usize,
pub bra: usize,
pub ket: usize,
}
impl<'a> SnowballEnv<'a> {
pub fn create(value: &'a str) -> Self {
let len = value.len();
SnowballEnv {
current: Cow::from(value),
cursor: 0,
limit: len,
limit_backward: 0,
bra: 0,
ket: len,
}
}
pub fn get_current(self) -> Cow<'a, str> {
self.current
}
fn replace_s(&mut self, bra: usize, ket: usize, s: &str) -> i32 {
let adjustment = s.len() as i32 - (ket as i32 - bra as i32);
let mut result = String::with_capacity(self.current.len());
{
let (lhs, _) = self.current.split_at(bra);
let (_, rhs) = self.current.split_at(ket);
result.push_str(lhs);
result.push_str(s);
result.push_str(rhs);
}
// ... not very nice...
let new_lim = self.limit as i32 + adjustment;
self.limit = new_lim as usize;
if self.cursor >= ket {
let new_cur = self.cursor as i32 + adjustment;
self.cursor = new_cur as usize;
} else if self.cursor > bra {
self.cursor = bra
}
self.current = Cow::from(result);
adjustment
}
/// Check if s is after cursor.
/// If so, move cursor to the end of s
pub fn eq_s(&mut self, s: &str) -> bool {
if self.cursor >= self.limit {
return false;
}
if self.current[self.cursor..].starts_with(s) {
self.cursor += s.len();
while !self.current.is_char_boundary(self.cursor) {
self.cursor += 1;
}
true
} else {
false
}
}
/// Check if 's' is before cursor
/// If so, move cursor to the beginning of s
pub fn eq_s_b(&mut self, s: &str) -> bool {
if (self.cursor as i32 - self.limit_backward as i32) < s.len() as i32 {
false
// Check if cursor -s.len is a char boundry. if not well... return false obv
} else if !self.current.is_char_boundary(self.cursor - s.len()) ||
!self.current[self.cursor - s.len()..].starts_with(s) {
false
} else {
self.cursor -= s.len();
true
}
}
/// Replace string between `bra` and `ket` with s
pub fn slice_from(&mut self, s: &str) -> bool {
let (bra, ket) = (self.bra, self.ket);
self.replace_s(bra, ket, s);
true
}
/// Move cursor to next charater
pub fn next_char(&mut self) {
self.cursor += 1;
while !self.current.is_char_boundary(self.cursor) {
self.cursor += 1;
}
}
/// Move cursor to previous character
pub fn previous_char(&mut self) {
self.cursor -= 1;
while !self.current.is_char_boundary(self.cursor) {
self.cursor -= 1;
}
}
pub fn byte_index_for_hop(&self, mut delta: i32) -> i32 {
if delta > 0 {
let mut res = self.cursor;
while delta > 0 {
res += 1;
delta -= 1;
while res <= self.current.len() && !self.current.is_char_boundary(res) {
res += 1;
}
}
return res as i32;
} else if delta < 0 {
let mut res: i32 = self.cursor as i32;
while delta < 0 {
res -= 1;
delta += 1;
while res >= 0 && !self.current.is_char_boundary(res as usize) {
res -= 1;
}
}
return res as i32;
} else {
return self.cursor as i32;
}
}
// A grouping is represented by a minimum code point, a maximum code point,
// and a bitfield of which code points in that range are in the grouping.
// For example, in english.sbl, valid_LI is 'cdeghkmnrt'.
// The minimum and maximum code points are 99 and 116,
// so every time one of these grouping functions is called for g_valid_LI,
// min must be 99 and max must be 116. There are 18 code points within that
// range (inclusive) so the grouping is represented with 18 bits, plus 6 bits of padding:
//
// cdefghij klmnopqr st
// 11101100 10110001 01000000
//
// The first bit is the least significant.
// Those three bytes become &[0b00110111, 0b10001101, 0b00000010],
// which is &[55, 141, 2], which is how g_valid_LI is defined in english.rs.
/// Check if the char the cursor points to is in the grouping
pub fn in_grouping(&mut self, chars: &[u8], min: u32, max: u32) -> bool {
if self.cursor >= self.limit {
return false;
}
if let Some(chr) = self.current[self.cursor..].chars().next() {
let mut ch = chr as u32; //codepoint as integer
if ch > max || ch < min {
return false;
}
ch -= min;
if (chars[(ch >> 3) as usize] & (0x1 << (ch & 0x7))) == 0 {
return false;
}
self.next_char();
return true;
}
return false;
}
pub fn in_grouping_b(&mut self, chars: &[u8], min: u32, max: u32) -> bool {
if self.cursor <= self.limit_backward {
return false;
}
self.previous_char();
if let Some(chr) = self.current[self.cursor..].chars().next() {
let mut ch = chr as u32; //codepoint as integer
self.next_char();
if ch > max || ch < min {
return false;
}
ch -= min;
if (chars[(ch >> 3) as usize] & (0x1 << (ch & 0x7))) == 0 {
return false;
}
self.previous_char();
return true;
}
return false;
}
pub fn out_grouping(&mut self, chars: &[u8], min: u32, max: u32) -> bool {
if self.cursor >= self.limit {
return false;
}
if let Some(chr) = self.current[self.cursor..].chars().next() {
let mut ch = chr as u32; //codepoint as integer
if ch > max || ch < min {
self.next_char();
return true;
}
ch -= min;
if (chars[(ch >> 3) as usize] & (0x1 << (ch & 0x7))) == 0 {
self.next_char();
return true;
}
}
return false;
}
pub fn out_grouping_b(&mut self, chars: &[u8], min: u32, max: u32) -> bool {
if self.cursor <= self.limit_backward {
return false;
}
self.previous_char();
if let Some(chr) = self.current[self.cursor..].chars().next() {
let mut ch = chr as u32; //codepoint as integer
self.next_char();
if ch > max || ch < min {
self.previous_char();
return true;
}
ch -= min;
if (chars[(ch >> 3) as usize] & (0x1 << (ch & 0x7))) == 0 {
self.previous_char();
return true;
}
}
return false;
}
/// Helper function that removes the string slice between `bra` and `ket`
pub fn slice_del(&mut self) -> bool {
self.slice_from("")
}
pub fn insert(&mut self, bra: usize, ket: usize, s: &str) {
let adjustment = self.replace_s(bra, ket, s);
if bra <= self.bra {
self.bra = (self.bra as i32 + adjustment) as usize;
}
if bra <= self.ket {
self.ket = (self.ket as i32 + adjustment) as usize;
}
}
pub fn slice_to(&mut self) -> String {
self.current[self.bra..self.ket].to_string()
}
pub fn find_among<T>(&mut self, amongs: &[Among<T>], context: &mut T) -> i32 {
use std::cmp::min;
let mut i: i32 = 0;
let mut j: i32 = amongs.len() as i32;
let c = self.cursor;
let l = self.limit;
let mut common_i = 0;
let mut common_j = 0;
let mut first_key_inspected = false;
loop {
let k = i + ((j - i) >> 1);
let mut diff: i32 = 0;
let mut common = min(common_i, common_j);
let w = &amongs[k as usize];
for lvar in common..w.0.len() {
if c + common == l {
diff = -1;
break;
}
diff = self.current.as_bytes()[c + common] as i32 - w.0.as_bytes()[lvar] as i32;
if diff != 0 {
break;
}
common += 1;
}
if diff < 0 {
j = k;
common_j = common;
} else {
i = k;
common_i = common;
}
if j - i <= 1 {
if i > 0 {
break;
}
if j == i {
break;
}
if first_key_inspected {
break;
}
first_key_inspected = true;
}
}
loop {
let w = &amongs[i as usize];
if common_i >= w.0.len() {
self.cursor = c + w.0.len();
if let Some(ref method) = w.3 {
let res = method(self, context);
self.cursor = c + w.0.len();
if res {
return w.2;
}
} else {
return w.2;
}
}
i = w.1;
if i < 0 {
return 0;
}
}
}
pub fn find_among_b<T>(&mut self, amongs: &[Among<T>], context: &mut T) -> i32 {
let mut i: i32 = 0;
let mut j: i32 = amongs.len() as i32;
let c = self.cursor;
let lb = self.limit_backward;
let mut common_i = 0;
let mut common_j = 0;
let mut first_key_inspected = false;
loop {
let k = i + ((j - i) >> 1);
let mut diff: i32 = 0;
let mut common = if common_i < common_j {
common_i
} else {
common_j
};
let w = &amongs[k as usize];
for lvar in (0..w.0.len() - common).rev() {
if c - common == lb {
diff = -1;
break;
}
diff = self.current.as_bytes()[c - common - 1] as i32 - w.0.as_bytes()[lvar] as i32;
if diff != 0 {
break;
}
common += 1;
}
if diff < 0 {
j = k;
common_j = common;
} else {
i = k;
common_i = common;
}
if j - i <= 1 {
if i > 0 {
break;
}
if j == i {
break;
}
if first_key_inspected {
break;
}
first_key_inspected = true;
}
}
loop {
let w = &amongs[i as usize];
if common_i >= w.0.len() {
self.cursor = c - w.0.len();
if let Some(ref method) = w.3 {
let res = method(self, context);
self.cursor = c - w.0.len();
if res {
return w.2;
}
} else {
return w.2;
}
}
i = w.1;
if i < 0 {
return 0;
}
}
}
}