vendor/gix-bitmap-0.2.11/src/ewah.rs - toolchain/rustc - Git at Google

 ///
 #[allow(clippy::empty_docs)]
 pub mod decode {
     /// The error returned by [`decode()`][super::decode()].
     #[derive(Debug, thiserror::Error)]
     #[allow(missing_docs)]
     pub enum Error {
         #[error("{}", message)]
         Corrupt { message: &'static str },
     }
 }

 /// Decode `data` as EWAH bitmap.
 pub fn decode(data: &[u8]) -> Result<(Vec, &[u8]), decode::Error> {
     use self::decode::Error;
     use crate::decode;

     let (num_bits, data) = decode::u32(data).ok_or(Error::Corrupt {
         message: "eof reading amount of bits",
     })?;
     let (len, data) = decode::u32(data).ok_or(Error::Corrupt {
         message: "eof reading chunk length",
     })?;
     let len = len as usize;

     // NOTE: git does this by copying all bytes first, and then it will change the endianness in a separate loop.
     //       Maybe it's faster, but we can't do it without unsafe. Let's leave it to the optimizer and maybe
     //       one day somebody will find out that it's worth it to use unsafe here.
     let (mut bits, data) = decode::split_at_pos(data, len * std::mem::size_of::<u64>()).ok_or(Error::Corrupt {
         message: "eof while reading bit data",
     })?;
     let mut buf = std::vec::Vec::<u64>::with_capacity(len);
     for _ in 0..len {
         let (bit_num, rest) = bits.split_at(std::mem::size_of::<u64>());
         bits = rest;
         buf.push(u64::from_be_bytes(bit_num.try_into().unwrap()))
     }

     let (rlw, data) = decode::u32(data).ok_or(Error::Corrupt {
         message: "eof while reading run length width",
     })?;

     Ok((
         Vec {
             num_bits,
             bits: buf,
             rlw: rlw.into(),
         },
         data,
     ))
 }

 mod access {
     use super::Vec;

     impl Vec {
         /// Call `f(index)` for each bit that is true, given the index of the bit that identifies it uniquely within the bit array.
         /// If `f` returns `None` the iteration will be stopped and `None` is returned.
         ///
         /// The index is sequential like in any other vector.
         pub fn for_each_set_bit(&self, mut f: impl FnMut(usize) -> Option<()>) -> Option<()> {
             let mut index = 0usize;
             let mut iter = self.bits.iter();
             while let Some(word) = iter.next() {
                 if rlw_runbit_is_set(word) {
                     let len = rlw_running_len_bits(word);
                     for _ in 0..len {
                         f(index)?;
                         index += 1;
                     }
                 } else {
                     index += usize::try_from(rlw_running_len_bits(word)).ok()?;
                 }

                 for _ in 0..rlw_literal_words(word) {
                     let word = iter
                         .next()
                         .expect("BUG: ran out of words while going through uncompressed portion");
                     for bit_index in 0..64 {
                         if word & (1 << bit_index) != 0 {
                             f(index)?;
                         }
                         index += 1;
                     }
                 }
             }
             Some(())
         }

         /// The amount of bits we are currently holding.
         pub fn num_bits(&self) -> usize {
             self.num_bits.try_into().expect("we are not on 16 bit systems")
         }
     }

     #[inline]
     fn rlw_running_len_bits(w: &u64) -> u64 {
         rlw_running_len(w) * 64
     }

     #[inline]
     fn rlw_running_len(w: &u64) -> u64 {
         (w >> 1) & RLW_LARGEST_RUNNING_COUNT
     }

     #[inline]
     fn rlw_literal_words(w: &u64) -> u64 {
         w >> (1 + RLW_RUNNING_BITS)
     }

     #[inline]
     fn rlw_runbit_is_set(w: &u64) -> bool {
         w & 1 == 1
     }

     const RLW_RUNNING_BITS: u64 = 4 * 8;
     const RLW_LARGEST_RUNNING_COUNT: u64 = (1 << RLW_RUNNING_BITS) - 1;
 }

 /// A growable collection of u64 that are seen as stream of individual bits.
 #[allow(dead_code)]
 #[derive(Clone)]
 pub struct Vec {
     num_bits: u32,
     bits: std::vec::Vec<u64>,
     /// RLW is an offset into the `bits` buffer, so `1` translates into &bits\[1] essentially.
     rlw: u64,
 }
	///
	#[allow(clippy::empty_docs)]
	pub mod decode {
	/// The error returned by [`decode()`][super::decode()].
	#[derive(Debug, thiserror::Error)]
	#[allow(missing_docs)]
	pub enum Error {
	#[error("{}", message)]
	Corrupt { message: &'static str },
	}
	}

	/// Decode `data` as EWAH bitmap.
	pub fn decode(data: &[u8]) -> Result<(Vec, &[u8]), decode::Error> {
	use self::decode::Error;
	use crate::decode;

	let (num_bits, data) = decode::u32(data).ok_or(Error::Corrupt {
	message: "eof reading amount of bits",
	})?;
	let (len, data) = decode::u32(data).ok_or(Error::Corrupt {
	message: "eof reading chunk length",
	})?;
	let len = len as usize;

	// NOTE: git does this by copying all bytes first, and then it will change the endianness in a separate loop.
	// Maybe it's faster, but we can't do it without unsafe. Let's leave it to the optimizer and maybe
	// one day somebody will find out that it's worth it to use unsafe here.
	let (mut bits, data) = decode::split_at_pos(data, len * std::mem::size_of::<u64>()).ok_or(Error::Corrupt {
	message: "eof while reading bit data",
	})?;
	let mut buf = std::vec::Vec::<u64>::with_capacity(len);
	for _ in 0..len {
	let (bit_num, rest) = bits.split_at(std::mem::size_of::<u64>());
	bits = rest;
	buf.push(u64::from_be_bytes(bit_num.try_into().unwrap()))
	}

	let (rlw, data) = decode::u32(data).ok_or(Error::Corrupt {
	message: "eof while reading run length width",
	})?;

	Ok((
	Vec {
	num_bits,
	bits: buf,
	rlw: rlw.into(),
	},
	data,
	))
	}

	mod access {
	use super::Vec;

	impl Vec {
	/// Call `f(index)` for each bit that is true, given the index of the bit that identifies it uniquely within the bit array.
	/// If `f` returns `None` the iteration will be stopped and `None` is returned.
	///
	/// The index is sequential like in any other vector.
	pub fn for_each_set_bit(&self, mut f: impl FnMut(usize) -> Option<()>) -> Option<()> {
	let mut index = 0usize;
	let mut iter = self.bits.iter();
	while let Some(word) = iter.next() {
	if rlw_runbit_is_set(word) {
	let len = rlw_running_len_bits(word);
	for _ in 0..len {
	f(index)?;
	index += 1;
	}
	} else {
	index += usize::try_from(rlw_running_len_bits(word)).ok()?;
	}

	for _ in 0..rlw_literal_words(word) {
	let word = iter
	.next()
	.expect("BUG: ran out of words while going through uncompressed portion");
	for bit_index in 0..64 {
	if word & (1 << bit_index) != 0 {
	f(index)?;
	}
	index += 1;
	}
	}
	}
	Some(())
	}

	/// The amount of bits we are currently holding.
	pub fn num_bits(&self) -> usize {
	self.num_bits.try_into().expect("we are not on 16 bit systems")
	}
	}

	#[inline]
	fn rlw_running_len_bits(w: &u64) -> u64 {
	rlw_running_len(w) * 64
	}

	#[inline]
	fn rlw_running_len(w: &u64) -> u64 {
	(w >> 1) & RLW_LARGEST_RUNNING_COUNT
	}

	#[inline]
	fn rlw_literal_words(w: &u64) -> u64 {
	w >> (1 + RLW_RUNNING_BITS)
	}

	#[inline]
	fn rlw_runbit_is_set(w: &u64) -> bool {
	w & 1 == 1
	}

	const RLW_RUNNING_BITS: u64 = 4 * 8;
	const RLW_LARGEST_RUNNING_COUNT: u64 = (1 << RLW_RUNNING_BITS) - 1;
	}

	/// A growable collection of u64 that are seen as stream of individual bits.
	#[allow(dead_code)]
	#[derive(Clone)]
	pub struct Vec {
	num_bits: u32,
	bits: std::vec::Vec<u64>,
	/// RLW is an offset into the `bits` buffer, so `1` translates into &bits\[1] essentially.
	rlw: u64,
	}