| use alloc::boxed::Box; |
| use core::convert::TryInto; |
| |
| /// The Hashtable trait used by the compression to store hashed bytes to their position. |
| /// `val` can be maximum the size of the input in bytes. |
| /// |
| /// `pos` can have a maximum value of u16::MAX or 65535 |
| /// If the hashtable is smaller it needs to reduce the pos to its space, e.g. by right |
| /// shifting. |
| /// |
| /// Duplication dictionary size. |
| /// |
| /// Every four bytes is assigned an entry. When this number is lower, fewer entries exists, and |
| /// thus collisions are more likely, hurting the compression ratio. |
| |
| /// hashes and right shifts to a maximum value of 16bit, 65535 |
| /// The right shift is done in order to not exceed, the hashtables capacity |
| #[inline] |
| fn hash(sequence: u32) -> u32 { |
| (sequence.wrapping_mul(2654435761_u32)) >> 16 |
| } |
| |
| /// hashes and right shifts to a maximum value of 16bit, 65535 |
| /// The right shift is done in order to not exceed, the hashtables capacity |
| #[cfg(target_pointer_width = "64")] |
| #[inline] |
| fn hash5(sequence: usize) -> u32 { |
| let primebytes = if cfg!(target_endian = "little") { |
| 889523592379_usize |
| } else { |
| 11400714785074694791_usize |
| }; |
| (((sequence << 24).wrapping_mul(primebytes)) >> 48) as u32 |
| } |
| |
| pub trait HashTable { |
| fn get_at(&self, pos: usize) -> usize; |
| fn put_at(&mut self, pos: usize, val: usize); |
| fn clear(&mut self); |
| #[inline] |
| #[cfg(target_pointer_width = "64")] |
| fn get_hash_at(input: &[u8], pos: usize) -> usize { |
| hash5(super::compress::get_batch_arch(input, pos)) as usize |
| } |
| #[inline] |
| #[cfg(target_pointer_width = "32")] |
| fn get_hash_at(input: &[u8], pos: usize) -> usize { |
| hash(super::compress::get_batch(input, pos)) as usize |
| } |
| } |
| |
| const HASHTABLE_SIZE_4K: usize = 4 * 1024; |
| const HASHTABLE_BIT_SHIFT_4K: usize = 4; |
| |
| #[derive(Debug)] |
| #[repr(align(64))] |
| pub struct HashTable4KU16 { |
| dict: Box<[u16; HASHTABLE_SIZE_4K]>, |
| } |
| impl HashTable4KU16 { |
| #[inline] |
| pub fn new() -> Self { |
| // This generates more efficient assembly in contrast to Box::new(slice), because of an |
| // optmized call alloc_zeroed, vs. alloc + memset |
| // try_into is optimized away |
| let dict = alloc::vec![0; HASHTABLE_SIZE_4K] |
| .into_boxed_slice() |
| .try_into() |
| .unwrap(); |
| Self { dict } |
| } |
| } |
| impl HashTable for HashTable4KU16 { |
| #[inline] |
| fn get_at(&self, hash: usize) -> usize { |
| self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] as usize |
| } |
| #[inline] |
| fn put_at(&mut self, hash: usize, val: usize) { |
| self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] = val as u16; |
| } |
| #[inline] |
| fn clear(&mut self) { |
| self.dict.fill(0); |
| } |
| #[inline] |
| fn get_hash_at(input: &[u8], pos: usize) -> usize { |
| hash(super::get_batch(input, pos)) as usize |
| } |
| } |
| |
| #[derive(Debug)] |
| pub struct HashTable4K { |
| dict: Box<[u32; HASHTABLE_SIZE_4K]>, |
| } |
| impl HashTable4K { |
| #[inline] |
| pub fn new() -> Self { |
| let dict = alloc::vec![0; HASHTABLE_SIZE_4K] |
| .into_boxed_slice() |
| .try_into() |
| .unwrap(); |
| Self { dict } |
| } |
| |
| #[cold] |
| #[allow(dead_code)] |
| pub fn reposition(&mut self, offset: u32) { |
| for i in self.dict.iter_mut() { |
| *i = i.saturating_sub(offset); |
| } |
| } |
| } |
| impl HashTable for HashTable4K { |
| #[inline] |
| fn get_at(&self, hash: usize) -> usize { |
| self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] as usize |
| } |
| #[inline] |
| fn put_at(&mut self, hash: usize, val: usize) { |
| self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] = val as u32; |
| } |
| #[inline] |
| fn clear(&mut self) { |
| self.dict.fill(0); |
| } |
| } |
| |
| const HASHTABLE_SIZE_8K: usize = 8 * 1024; |
| const HASH_TABLE_BIT_SHIFT_8K: usize = 3; |
| |
| #[derive(Debug)] |
| pub struct HashTable8K { |
| dict: Box<[u32; HASHTABLE_SIZE_8K]>, |
| } |
| #[allow(dead_code)] |
| impl HashTable8K { |
| #[inline] |
| pub fn new() -> Self { |
| let dict = alloc::vec![0; HASHTABLE_SIZE_8K] |
| .into_boxed_slice() |
| .try_into() |
| .unwrap(); |
| |
| Self { dict } |
| } |
| } |
| impl HashTable for HashTable8K { |
| #[inline] |
| fn get_at(&self, hash: usize) -> usize { |
| self.dict[hash >> HASH_TABLE_BIT_SHIFT_8K] as usize |
| } |
| #[inline] |
| fn put_at(&mut self, hash: usize, val: usize) { |
| self.dict[hash >> HASH_TABLE_BIT_SHIFT_8K] = val as u32; |
| } |
| #[inline] |
| fn clear(&mut self) { |
| self.dict.fill(0); |
| } |
| } |