blob: 29abeac970169b55c77a9e3a09139928992de571 [file] [log] [blame]
use std::sync::atomic::AtomicBool;
use gix_features::{parallel, progress::Progress, zlib};
use crate::index;
mod reduce;
///
#[allow(clippy::empty_docs)]
pub mod with_index;
///
#[allow(clippy::empty_docs)]
pub mod with_lookup;
use reduce::Reducer;
mod error;
pub use error::Error;
use gix_features::progress::DynNestedProgress;
mod types;
pub use types::{Algorithm, ProgressId, SafetyCheck, Statistics};
/// Traversal options for [`index::File::traverse()`].
#[derive(Debug, Clone)]
pub struct Options<F> {
/// The algorithm to employ.
pub traversal: Algorithm,
/// If `Some`, only use the given amount of threads. Otherwise, the amount of threads to use will be selected based on
/// the amount of available logical cores.
pub thread_limit: Option<usize>,
/// The kinds of safety checks to perform.
pub check: SafetyCheck,
/// A function to create a pack cache
pub make_pack_lookup_cache: F,
}
impl Default for Options<fn() -> crate::cache::Never> {
fn default() -> Self {
Options {
check: Default::default(),
traversal: Default::default(),
thread_limit: None,
make_pack_lookup_cache: || crate::cache::Never,
}
}
}
/// The outcome of the [`traverse()`][index::File::traverse()] method.
pub struct Outcome {
/// The checksum obtained when hashing the file, which matched the checksum contained within the file.
pub actual_index_checksum: gix_hash::ObjectId,
/// The statistics obtained during traversal.
pub statistics: Statistics,
}
/// Traversal of pack data files using an index file
impl index::File {
/// Iterate through all _decoded objects_ in the given `pack` and handle them with a `Processor`.
/// The return value is (pack-checksum, [`Outcome`], `progress`), thus the pack traversal will always verify
/// the whole packs checksum to assure it was correct. In case of bit-rod, the operation will abort early without
/// verifying all objects using the [interrupt mechanism][gix_features::interrupt] mechanism.
///
/// # Algorithms
///
/// Using the [`Options::traversal`] field one can chose between two algorithms providing different tradeoffs. Both invoke
/// `new_processor()` to create functions receiving decoded objects, their object kind, index entry and a progress instance to provide
/// progress information.
///
/// * [`Algorithm::DeltaTreeLookup`] builds an index to avoid any unnecessary computation while resolving objects, avoiding
/// the need for a cache entirely, rendering `new_cache()` unused.
/// One could also call [`traverse_with_index()`][index::File::traverse_with_index()] directly.
/// * [`Algorithm::Lookup`] uses a cache created by `new_cache()` to avoid having to re-compute all bases of a delta-chain while
/// decoding objects.
/// One could also call [`traverse_with_lookup()`][index::File::traverse_with_lookup()] directly.
///
/// Use [`thread_limit`][Options::thread_limit] to further control parallelism and [`check`][SafetyCheck] to define how much the passed
/// objects shall be verified beforehand.
pub fn traverse<C, Processor, E, F>(
&self,
pack: &crate::data::File,
progress: &mut dyn DynNestedProgress,
should_interrupt: &AtomicBool,
processor: Processor,
Options {
traversal,
thread_limit,
check,
make_pack_lookup_cache,
}: Options<F>,
) -> Result<Outcome, Error<E>>
where
C: crate::cache::DecodeEntry,
E: std::error::Error + Send + Sync + 'static,
Processor: FnMut(gix_object::Kind, &[u8], &index::Entry, &dyn Progress) -> Result<(), E> + Send + Clone,
F: Fn() -> C + Send + Clone,
{
match traversal {
Algorithm::Lookup => self.traverse_with_lookup(
processor,
pack,
progress,
should_interrupt,
with_lookup::Options {
thread_limit,
check,
make_pack_lookup_cache,
},
),
Algorithm::DeltaTreeLookup => self.traverse_with_index(
pack,
processor,
progress,
should_interrupt,
with_index::Options { check, thread_limit },
),
}
}
fn possibly_verify<E>(
&self,
pack: &crate::data::File,
check: SafetyCheck,
pack_progress: &mut dyn Progress,
index_progress: &mut dyn Progress,
should_interrupt: &AtomicBool,
) -> Result<gix_hash::ObjectId, Error<E>>
where
E: std::error::Error + Send + Sync + 'static,
{
Ok(if check.file_checksum() {
if self.pack_checksum() != pack.checksum() {
return Err(Error::PackMismatch {
actual: pack.checksum(),
expected: self.pack_checksum(),
});
}
let (pack_res, id) = parallel::join(
move || pack.verify_checksum(pack_progress, should_interrupt),
move || self.verify_checksum(index_progress, should_interrupt),
);
pack_res?;
id?
} else {
self.index_checksum()
})
}
#[allow(clippy::too_many_arguments)]
fn decode_and_process_entry<C, E>(
&self,
check: SafetyCheck,
pack: &crate::data::File,
cache: &mut C,
buf: &mut Vec<u8>,
inflate: &mut zlib::Inflate,
progress: &mut dyn Progress,
index_entry: &index::Entry,
processor: &mut impl FnMut(gix_object::Kind, &[u8], &index::Entry, &dyn Progress) -> Result<(), E>,
) -> Result<crate::data::decode::entry::Outcome, Error<E>>
where
C: crate::cache::DecodeEntry,
E: std::error::Error + Send + Sync + 'static,
{
let pack_entry = pack.entry(index_entry.pack_offset);
let pack_entry_data_offset = pack_entry.data_offset;
let entry_stats = pack
.decode_entry(
pack_entry,
buf,
inflate,
&|id, _| {
self.lookup(id).map(|index| {
crate::data::decode::entry::ResolvedBase::InPack(pack.entry(self.pack_offset_at_index(index)))
})
},
cache,
)
.map_err(|e| Error::PackDecode {
source: e,
id: index_entry.oid,
offset: index_entry.pack_offset,
})?;
let object_kind = entry_stats.kind;
let header_size = (pack_entry_data_offset - index_entry.pack_offset) as usize;
let entry_len = header_size + entry_stats.compressed_size;
process_entry(
check,
object_kind,
buf,
index_entry,
|| pack.entry_crc32(index_entry.pack_offset, entry_len),
progress,
processor,
)?;
Ok(entry_stats)
}
}
#[allow(clippy::too_many_arguments)]
fn process_entry<E>(
check: SafetyCheck,
object_kind: gix_object::Kind,
decompressed: &[u8],
index_entry: &index::Entry,
pack_entry_crc32: impl FnOnce() -> u32,
progress: &dyn Progress,
processor: &mut impl FnMut(gix_object::Kind, &[u8], &index::Entry, &dyn Progress) -> Result<(), E>,
) -> Result<(), Error<E>>
where
E: std::error::Error + Send + Sync + 'static,
{
if check.object_checksum() {
let actual_oid = gix_object::compute_hash(index_entry.oid.kind(), object_kind, decompressed);
if actual_oid != index_entry.oid {
return Err(Error::PackObjectMismatch {
actual: actual_oid,
expected: index_entry.oid,
offset: index_entry.pack_offset,
kind: object_kind,
});
}
if let Some(desired_crc32) = index_entry.crc32 {
let actual_crc32 = pack_entry_crc32();
if actual_crc32 != desired_crc32 {
return Err(Error::Crc32Mismatch {
actual: actual_crc32,
expected: desired_crc32,
offset: index_entry.pack_offset,
kind: object_kind,
});
}
}
}
processor(object_kind, decompressed, index_entry, progress).map_err(Error::Processor)
}