blob: 72a076a851bb427ae5f8606f3cbe85660439888e [file] [log] [blame]
use std::{convert::TryInto, io, sync::atomic::AtomicBool};
pub use error::Error;
use gix_features::progress::{self, Progress};
use crate::cache::delta::{traverse, Tree};
pub(crate) mod encode;
mod error;
pub(crate) struct TreeEntry {
pub id: gix_hash::ObjectId,
pub crc32: u32,
}
/// Information gathered while executing [`write_data_iter_to_stream()`][crate::index::File::write_data_iter_to_stream]
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct Outcome {
/// The version of the verified index
pub index_version: crate::index::Version,
/// The verified checksum of the verified index
pub index_hash: gix_hash::ObjectId,
/// The hash of the '.pack' file, also found in its trailing bytes
pub data_hash: gix_hash::ObjectId,
/// The amount of objects that were verified, always the amount of objects in the pack.
pub num_objects: u32,
}
/// The progress ids used in [`write_data_iter_from_stream()`][crate::index::File::write_data_iter_to_stream()].
///
/// Use this information to selectively extract the progress of interest in case the parent application has custom visualization.
#[derive(Debug, Copy, Clone)]
pub enum ProgressId {
/// Counts the amount of objects that were index thus far.
IndexObjects,
/// The amount of bytes that were decompressed while decoding pack entries.
///
/// This is done to determine entry boundaries.
DecompressedBytes,
/// The amount of objects whose hashes were computed.
///
/// This is done by decoding them, which typically involves decoding delta objects.
ResolveObjects,
/// The amount of bytes that were decoded in total, as the sum of all bytes to represent all resolved objects.
DecodedBytes,
/// The amount of bytes written to the index file.
IndexBytesWritten,
}
impl From<ProgressId> for gix_features::progress::Id {
fn from(v: ProgressId) -> Self {
match v {
ProgressId::IndexObjects => *b"IWIO",
ProgressId::DecompressedBytes => *b"IWDB",
ProgressId::ResolveObjects => *b"IWRO",
ProgressId::DecodedBytes => *b"IWDB",
ProgressId::IndexBytesWritten => *b"IWBW",
}
}
}
/// Various ways of writing an index file from pack entries
impl crate::index::File {
/// Write information about `entries` as obtained from a pack data file into a pack index file via the `out` stream.
/// The resolver produced by `make_resolver` must resolve pack entries from the same pack data file that produced the
/// `entries` iterator.
///
/// * `kind` is the version of pack index to produce, use [`crate::index::Version::default()`] if in doubt.
/// * `tread_limit` is used for a parallel tree traversal for obtaining object hashes with optimal performance.
/// * `root_progress` is the top-level progress to stay informed about the progress of this potentially long-running
/// computation.
/// * `object_hash` defines what kind of object hash we write into the index file.
/// * `pack_version` is the version of the underlying pack for which `entries` are read. It's used in case none of these objects are provided
/// to compute a pack-hash.
///
/// # Remarks
///
/// * neither in-pack nor out-of-pack Ref Deltas are supported here, these must have been resolved beforehand.
/// * `make_resolver()` will only be called after the iterator stopped returning elements and produces a function that
/// provides all bytes belonging to a pack entry writing them to the given mutable output `Vec`.
/// It should return `None` if the entry cannot be resolved from the pack that produced the `entries` iterator, causing
/// the write operation to fail.
#[allow(clippy::too_many_arguments)]
pub fn write_data_iter_to_stream<F, F2, R, P>(
version: crate::index::Version,
make_resolver: F,
entries: impl Iterator<Item = Result<crate::data::input::Entry, crate::data::input::Error>>,
thread_limit: Option<usize>,
mut root_progress: P,
out: impl io::Write,
should_interrupt: &AtomicBool,
object_hash: gix_hash::Kind,
pack_version: crate::data::Version,
) -> Result<Outcome, Error>
where
F: FnOnce() -> io::Result<(F2, R)>,
R: Send + Sync,
F2: for<'r> Fn(crate::data::EntryRange, &'r R) -> Option<&'r [u8]> + Send + Clone,
P: Progress,
{
if version != crate::index::Version::default() {
return Err(Error::Unsupported(version));
}
let mut num_objects: usize = 0;
let mut last_seen_trailer = None;
let (anticipated_num_objects, upper_bound) = entries.size_hint();
let worst_case_num_objects_after_thin_pack_resolution = upper_bound.unwrap_or(anticipated_num_objects);
let mut tree = Tree::with_capacity(worst_case_num_objects_after_thin_pack_resolution)?;
let indexing_start = std::time::Instant::now();
root_progress.init(Some(4), progress::steps());
let mut objects_progress = root_progress.add_child_with_id("indexing", ProgressId::IndexObjects.into());
objects_progress.init(Some(anticipated_num_objects), progress::count("objects"));
let mut decompressed_progress =
root_progress.add_child_with_id("decompressing", ProgressId::DecompressedBytes.into());
decompressed_progress.init(None, progress::bytes());
let mut pack_entries_end: u64 = 0;
for entry in entries {
let crate::data::input::Entry {
header,
pack_offset,
crc32,
header_size,
compressed: _,
compressed_size,
decompressed_size,
trailer,
} = entry?;
decompressed_progress.inc_by(decompressed_size as usize);
let entry_len = header_size as u64 + compressed_size;
pack_entries_end = pack_offset + entry_len;
let crc32 = crc32.expect("crc32 to be computed by the iterator. Caller assures correct configuration.");
use crate::data::entry::Header::*;
match header {
Tree | Blob | Commit | Tag => {
tree.add_root(
pack_offset,
TreeEntry {
id: object_hash.null(),
crc32,
},
)?;
}
RefDelta { .. } => return Err(Error::IteratorInvariantNoRefDelta),
OfsDelta { base_distance } => {
let base_pack_offset =
crate::data::entry::Header::verified_base_pack_offset(pack_offset, base_distance).ok_or(
Error::IteratorInvariantBaseOffset {
pack_offset,
distance: base_distance,
},
)?;
tree.add_child(
base_pack_offset,
pack_offset,
TreeEntry {
id: object_hash.null(),
crc32,
},
)?;
}
};
last_seen_trailer = trailer;
num_objects += 1;
objects_progress.inc();
}
let num_objects: u32 = num_objects
.try_into()
.map_err(|_| Error::IteratorInvariantTooManyObjects(num_objects))?;
objects_progress.show_throughput(indexing_start);
decompressed_progress.show_throughput(indexing_start);
drop(objects_progress);
drop(decompressed_progress);
root_progress.inc();
let (resolver, pack) = make_resolver()?;
let sorted_pack_offsets_by_oid = {
let traverse::Outcome { roots, children } = tree.traverse(
resolver,
&pack,
pack_entries_end,
|data,
_progress,
traverse::Context {
entry,
decompressed: bytes,
..
}| {
modify_base(data, entry, bytes, version.hash());
Ok::<_, Error>(())
},
traverse::Options {
object_progress: root_progress.add_child_with_id("Resolving", ProgressId::ResolveObjects.into()),
size_progress: root_progress.add_child_with_id("Decoding", ProgressId::DecodedBytes.into()),
thread_limit,
should_interrupt,
object_hash,
},
)?;
root_progress.inc();
let mut items = roots;
items.extend(children);
{
let _progress = root_progress.add_child_with_id("sorting by id", gix_features::progress::UNKNOWN);
items.sort_by_key(|e| e.data.id);
}
root_progress.inc();
items
};
let pack_hash = match last_seen_trailer {
Some(ph) => ph,
None if num_objects == 0 => {
let header = crate::data::header::encode(pack_version, 0);
let mut hasher = gix_features::hash::hasher(object_hash);
hasher.update(&header);
gix_hash::ObjectId::from(hasher.digest())
}
None => return Err(Error::IteratorInvariantTrailer),
};
let index_hash = encode::write_to(
out,
sorted_pack_offsets_by_oid,
&pack_hash,
version,
root_progress.add_child_with_id("writing index file", ProgressId::IndexBytesWritten.into()),
)?;
root_progress.show_throughput_with(
indexing_start,
num_objects as usize,
progress::count("objects").expect("unit always set"),
progress::MessageLevel::Success,
);
Ok(Outcome {
index_version: version,
index_hash,
data_hash: pack_hash,
num_objects,
})
}
}
fn modify_base(entry: &mut TreeEntry, pack_entry: &crate::data::Entry, decompressed: &[u8], hash: gix_hash::Kind) {
let object_kind = pack_entry.header.as_kind().expect("base object as source of iteration");
let id = gix_object::compute_hash(hash, object_kind, decompressed);
entry.id = id;
}