| //! Hash functions and hash utilities |
| //! |
| //! With the `fast-sha1` feature, the `Sha1` hash type will use a more elaborate implementation utilizing hardware support |
| //! in case it is available. Otherwise the `rustsha1` feature should be set. `fast-sha1` will take precedence. |
| //! Otherwise, a minimal yet performant implementation is used instead for a decent trade-off between compile times and run-time performance. |
| #[cfg(all(feature = "rustsha1", not(feature = "fast-sha1")))] |
| mod _impl { |
| use super::Sha1Digest; |
| |
| /// A implementation of the Sha1 hash, which can be used once. |
| #[derive(Default, Clone)] |
| pub struct Sha1(sha1_smol::Sha1); |
| |
| impl Sha1 { |
| /// Digest the given `bytes`. |
| pub fn update(&mut self, bytes: &[u8]) { |
| self.0.update(bytes) |
| } |
| /// Finalize the hash and produce a digest. |
| pub fn digest(self) -> Sha1Digest { |
| self.0.digest().bytes() |
| } |
| } |
| } |
| |
| /// A 20 bytes digest produced by a [`Sha1`] hash implementation. |
| #[cfg(any(feature = "fast-sha1", feature = "rustsha1"))] |
| pub type Sha1Digest = [u8; 20]; |
| |
| #[cfg(feature = "fast-sha1")] |
| mod _impl { |
| use sha1::Digest; |
| |
| use super::Sha1Digest; |
| |
| /// A implementation of the Sha1 hash, which can be used once. |
| #[derive(Default, Clone)] |
| pub struct Sha1(sha1::Sha1); |
| |
| impl Sha1 { |
| /// Digest the given `bytes`. |
| pub fn update(&mut self, bytes: &[u8]) { |
| self.0.update(bytes) |
| } |
| /// Finalize the hash and produce a digest. |
| pub fn digest(self) -> Sha1Digest { |
| self.0.finalize().into() |
| } |
| } |
| } |
| |
| #[cfg(any(feature = "rustsha1", feature = "fast-sha1"))] |
| pub use _impl::Sha1; |
| |
| /// Compute a CRC32 hash from the given `bytes`, returning the CRC32 hash. |
| /// |
| /// When calling this function for the first time, `previous_value` should be `0`. Otherwise it |
| /// should be the previous return value of this function to provide a hash of multiple sequential |
| /// chunks of `bytes`. |
| #[cfg(feature = "crc32")] |
| pub fn crc32_update(previous_value: u32, bytes: &[u8]) -> u32 { |
| let mut h = crc32fast::Hasher::new_with_initial(previous_value); |
| h.update(bytes); |
| h.finalize() |
| } |
| |
| /// Compute a CRC32 value of the given input `bytes`. |
| /// |
| /// In case multiple chunks of `bytes` are present, one should use [`crc32_update()`] instead. |
| #[cfg(feature = "crc32")] |
| pub fn crc32(bytes: &[u8]) -> u32 { |
| let mut h = crc32fast::Hasher::new(); |
| h.update(bytes); |
| h.finalize() |
| } |
| |
| /// Produce a hasher suitable for the given kind of hash. |
| #[cfg(any(feature = "rustsha1", feature = "fast-sha1"))] |
| pub fn hasher(kind: gix_hash::Kind) -> Sha1 { |
| match kind { |
| gix_hash::Kind::Sha1 => Sha1::default(), |
| } |
| } |
| |
| /// Compute the hash of `kind` for the bytes in the file at `path`, hashing only the first `num_bytes_from_start` |
| /// while initializing and calling `progress`. |
| /// |
| /// `num_bytes_from_start` is useful to avoid reading trailing hashes, which are never part of the hash itself, |
| /// denoting the amount of bytes to hash starting from the beginning of the file. |
| /// |
| /// # Note |
| /// |
| /// * Only available with the `gix-object` feature enabled due to usage of the [`gix_hash::Kind`] enum and the |
| /// [`gix_hash::ObjectId`] return value. |
| /// * [Interrupts][crate::interrupt] are supported. |
| #[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))] |
| pub fn bytes_of_file( |
| path: &std::path::Path, |
| num_bytes_from_start: u64, |
| kind: gix_hash::Kind, |
| progress: &mut dyn crate::progress::Progress, |
| should_interrupt: &std::sync::atomic::AtomicBool, |
| ) -> std::io::Result<gix_hash::ObjectId> { |
| bytes( |
| &mut std::fs::File::open(path)?, |
| num_bytes_from_start, |
| kind, |
| progress, |
| should_interrupt, |
| ) |
| } |
| |
| /// Similar to [`bytes_of_file`], but operates on a stream of bytes. |
| #[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))] |
| pub fn bytes( |
| read: &mut dyn std::io::Read, |
| num_bytes_from_start: u64, |
| kind: gix_hash::Kind, |
| progress: &mut dyn crate::progress::Progress, |
| should_interrupt: &std::sync::atomic::AtomicBool, |
| ) -> std::io::Result<gix_hash::ObjectId> { |
| bytes_with_hasher(read, num_bytes_from_start, hasher(kind), progress, should_interrupt) |
| } |
| |
| /// Similar to [`bytes()`], but takes a `hasher` instead of a hash kind. |
| #[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))] |
| pub fn bytes_with_hasher( |
| read: &mut dyn std::io::Read, |
| num_bytes_from_start: u64, |
| mut hasher: Sha1, |
| progress: &mut dyn crate::progress::Progress, |
| should_interrupt: &std::sync::atomic::AtomicBool, |
| ) -> std::io::Result<gix_hash::ObjectId> { |
| let start = std::time::Instant::now(); |
| // init progress before the possibility for failure, as convenience in case people want to recover |
| progress.init( |
| Some(num_bytes_from_start as prodash::progress::Step), |
| crate::progress::bytes(), |
| ); |
| |
| const BUF_SIZE: usize = u16::MAX as usize; |
| let mut buf = [0u8; BUF_SIZE]; |
| let mut bytes_left = num_bytes_from_start; |
| |
| while bytes_left > 0 { |
| let out = &mut buf[..BUF_SIZE.min(bytes_left as usize)]; |
| read.read_exact(out)?; |
| bytes_left -= out.len() as u64; |
| progress.inc_by(out.len()); |
| hasher.update(out); |
| if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) { |
| return Err(std::io::Error::new(std::io::ErrorKind::Other, "Interrupted")); |
| } |
| } |
| |
| let id = gix_hash::ObjectId::from(hasher.digest()); |
| progress.show_throughput(start); |
| Ok(id) |
| } |
| |
| #[cfg(any(feature = "rustsha1", feature = "fast-sha1"))] |
| mod write { |
| use crate::hash::Sha1; |
| |
| /// A utility to automatically generate a hash while writing into an inner writer. |
| pub struct Write<T> { |
| /// The hash implementation. |
| pub hash: Sha1, |
| /// The inner writer. |
| pub inner: T, |
| } |
| |
| impl<T> std::io::Write for Write<T> |
| where |
| T: std::io::Write, |
| { |
| fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { |
| let written = self.inner.write(buf)?; |
| self.hash.update(&buf[..written]); |
| Ok(written) |
| } |
| |
| fn flush(&mut self) -> std::io::Result<()> { |
| self.inner.flush() |
| } |
| } |
| |
| impl<T> Write<T> |
| where |
| T: std::io::Write, |
| { |
| /// Create a new hash writer which hashes all bytes written to `inner` with a hash of `kind`. |
| pub fn new(inner: T, object_hash: gix_hash::Kind) -> Self { |
| match object_hash { |
| gix_hash::Kind::Sha1 => Write { |
| inner, |
| hash: Sha1::default(), |
| }, |
| } |
| } |
| } |
| } |
| #[cfg(any(feature = "rustsha1", feature = "fast-sha1"))] |
| pub use write::Write; |