| /// Information for the chunk about index names |
| pub mod index_names { |
| use std::path::{Path, PathBuf}; |
| |
| use gix_object::bstr::{BString, ByteSlice}; |
| |
| /// The ID used for the index-names chunk. |
| pub const ID: gix_chunk::Id = *b"PNAM"; |
| |
| /// |
| #[allow(clippy::empty_docs)] |
| pub mod decode { |
| use gix_object::bstr::BString; |
| |
| /// The error returned by [`from_bytes()`][super::from_bytes()]. |
| #[derive(Debug, thiserror::Error)] |
| #[allow(missing_docs)] |
| pub enum Error { |
| #[error("The pack names were not ordered alphabetically.")] |
| NotOrderedAlphabetically, |
| #[error("Each pack path name must be terminated with a null byte")] |
| MissingNullByte, |
| #[error("Couldn't turn path '{path}' into OS path due to encoding issues")] |
| PathEncoding { path: BString }, |
| #[error("non-padding bytes found after all paths were read.")] |
| UnknownTrailerBytes, |
| } |
| } |
| |
| /// Parse null-separated index names from the given `chunk` of bytes and the expected number of packs and indices. |
| /// Ignore padding bytes which are typically \0. |
| pub fn from_bytes(mut chunk: &[u8], num_packs: u32) -> Result<Vec<PathBuf>, decode::Error> { |
| let mut out = Vec::new(); |
| for _ in 0..num_packs { |
| let null_byte_pos = chunk.find_byte(b'\0').ok_or(decode::Error::MissingNullByte)?; |
| |
| let path = &chunk[..null_byte_pos]; |
| let path = gix_path::try_from_byte_slice(path) |
| .map_err(|_| decode::Error::PathEncoding { |
| path: BString::from(path), |
| })? |
| .to_owned(); |
| |
| if let Some(previous) = out.last() { |
| if previous >= &path { |
| return Err(decode::Error::NotOrderedAlphabetically); |
| } |
| } |
| out.push(path); |
| |
| chunk = &chunk[null_byte_pos + 1..]; |
| } |
| |
| if !chunk.is_empty() && !chunk.iter().all(|b| *b == 0) { |
| return Err(decode::Error::UnknownTrailerBytes); |
| } |
| // NOTE: git writes garbage into this chunk, usually extra \0 bytes, which we simply ignore. If we were strict |
| // about it we couldn't read this chunk data at all. |
| Ok(out) |
| } |
| |
| /// Calculate the size on disk for our chunk with the given index paths. Note that these are expected to have been processed already |
| /// to actually be file names. |
| pub fn storage_size(paths: impl IntoIterator<Item = impl AsRef<Path>>) -> u64 { |
| let mut count = 0u64; |
| for path in paths { |
| let path = path.as_ref(); |
| let ascii_path = path.to_str().expect("UTF-8 compatible paths"); |
| assert!( |
| ascii_path.is_ascii(), |
| "must use ascii bytes for correct size computation" |
| ); |
| count += (ascii_path.as_bytes().len() + 1/* null byte */) as u64 |
| } |
| |
| let needed_alignment = CHUNK_ALIGNMENT - (count % CHUNK_ALIGNMENT); |
| if needed_alignment < CHUNK_ALIGNMENT { |
| count += needed_alignment; |
| } |
| count |
| } |
| |
| /// Write all `paths` in order to `out`, including padding. |
| pub fn write( |
| paths: impl IntoIterator<Item = impl AsRef<Path>>, |
| out: &mut dyn std::io::Write, |
| ) -> std::io::Result<()> { |
| let mut written_bytes = 0; |
| for path in paths { |
| let path = path.as_ref().to_str().expect("UTF-8 path"); |
| out.write_all(path.as_bytes())?; |
| out.write_all(&[0])?; |
| written_bytes += path.as_bytes().len() as u64 + 1; |
| } |
| |
| let needed_alignment = CHUNK_ALIGNMENT - (written_bytes % CHUNK_ALIGNMENT); |
| if needed_alignment < CHUNK_ALIGNMENT { |
| let padding = [0u8; CHUNK_ALIGNMENT as usize]; |
| out.write_all(&padding[..needed_alignment as usize])?; |
| } |
| Ok(()) |
| } |
| |
| const CHUNK_ALIGNMENT: u64 = 4; |
| } |
| |
| /// Information for the chunk with the fanout table |
| pub mod fanout { |
| use crate::multi_index; |
| |
| /// The size of the fanout table |
| pub const SIZE: usize = 4 * 256; |
| |
| /// The id uniquely identifying the fanout table. |
| pub const ID: gix_chunk::Id = *b"OIDF"; |
| |
| /// Decode the fanout table contained in `chunk`, or return `None` if it didn't have the expected size. |
| pub fn from_bytes(chunk: &[u8]) -> Option<[u32; 256]> { |
| if chunk.len() != SIZE { |
| return None; |
| } |
| let mut out = [0; 256]; |
| for (c, f) in chunk.chunks_exact(4).zip(out.iter_mut()) { |
| *f = u32::from_be_bytes(c.try_into().unwrap()); |
| } |
| out.into() |
| } |
| |
| /// Write the fanout for the given entries, which must be sorted by oid |
| pub(crate) fn write( |
| sorted_entries: &[multi_index::write::Entry], |
| out: &mut dyn std::io::Write, |
| ) -> std::io::Result<()> { |
| let fanout = crate::index::encode::fanout(&mut sorted_entries.iter().map(|e| e.id.first_byte())); |
| |
| for value in fanout.iter() { |
| out.write_all(&value.to_be_bytes())?; |
| } |
| Ok(()) |
| } |
| } |
| |
| /// Information about the oid lookup table. |
| pub mod lookup { |
| use std::ops::Range; |
| |
| use crate::multi_index; |
| |
| /// The id uniquely identifying the oid lookup table. |
| pub const ID: gix_chunk::Id = *b"OIDL"; |
| |
| /// Return the amount of bytes needed to store the data on disk for the given amount of `entries` |
| pub fn storage_size(entries: usize, object_hash: gix_hash::Kind) -> u64 { |
| (entries * object_hash.len_in_bytes()) as u64 |
| } |
| |
| pub(crate) fn write( |
| sorted_entries: &[multi_index::write::Entry], |
| out: &mut dyn std::io::Write, |
| ) -> std::io::Result<()> { |
| for entry in sorted_entries { |
| out.write_all(entry.id.as_slice())?; |
| } |
| Ok(()) |
| } |
| |
| /// Return true if the size of the `offset` range seems to match for a `hash` of the given kind and the amount of objects. |
| pub fn is_valid(offset: &Range<usize>, hash: gix_hash::Kind, num_objects: u32) -> bool { |
| (offset.end - offset.start) / hash.len_in_bytes() == num_objects as usize |
| } |
| } |
| |
| /// Information about the offsets table. |
| pub mod offsets { |
| use std::ops::Range; |
| |
| use crate::multi_index; |
| |
| /// The id uniquely identifying the offsets table. |
| pub const ID: gix_chunk::Id = *b"OOFF"; |
| |
| /// Return the amount of bytes needed to offset data for `entries`. |
| pub fn storage_size(entries: usize) -> u64 { |
| (entries * (4 /*pack-id*/ + 4/* pack offset */)) as u64 |
| } |
| |
| pub(crate) fn write( |
| sorted_entries: &[multi_index::write::Entry], |
| large_offsets_needed: bool, |
| out: &mut dyn std::io::Write, |
| ) -> std::io::Result<()> { |
| use crate::index::encode::{HIGH_BIT, LARGE_OFFSET_THRESHOLD}; |
| let mut num_large_offsets = 0u32; |
| |
| for entry in sorted_entries { |
| out.write_all(&entry.pack_index.to_be_bytes())?; |
| |
| let offset: u32 = if large_offsets_needed { |
| if entry.pack_offset > LARGE_OFFSET_THRESHOLD { |
| let res = num_large_offsets | HIGH_BIT; |
| num_large_offsets += 1; |
| res |
| } else { |
| entry.pack_offset as u32 |
| } |
| } else { |
| entry |
| .pack_offset |
| .try_into() |
| .expect("without large offsets, pack-offset fits u32") |
| }; |
| out.write_all(&offset.to_be_bytes())?; |
| } |
| Ok(()) |
| } |
| |
| /// Returns true if the `offset` range seems to match the size required for `num_objects`. |
| pub fn is_valid(offset: &Range<usize>, num_objects: u32) -> bool { |
| let entry_size = 4 /* pack-id */ + 4 /* pack-offset */; |
| ((offset.end - offset.start) / num_objects as usize) == entry_size |
| } |
| } |
| |
| /// Information about the large offsets table. |
| pub mod large_offsets { |
| use std::ops::Range; |
| |
| use crate::{index::encode::LARGE_OFFSET_THRESHOLD, multi_index}; |
| |
| /// The id uniquely identifying the large offsets table (with 64 bit offsets) |
| pub const ID: gix_chunk::Id = *b"LOFF"; |
| |
| /// Returns Some(num-large-offset) if there are offsets larger than u32. |
| pub(crate) fn num_large_offsets(entries: &[multi_index::write::Entry]) -> Option<usize> { |
| let mut num_large_offsets = 0; |
| let mut needs_large_offsets = false; |
| for entry in entries { |
| if entry.pack_offset > LARGE_OFFSET_THRESHOLD { |
| num_large_offsets += 1; |
| } |
| if entry.pack_offset > u32::MAX as crate::data::Offset { |
| needs_large_offsets = true; |
| } |
| } |
| |
| needs_large_offsets.then_some(num_large_offsets) |
| } |
| /// Returns true if the `offsets` range seems to be properly aligned for the data we expect. |
| pub fn is_valid(offset: &Range<usize>) -> bool { |
| (offset.end - offset.start) % 8 == 0 |
| } |
| |
| pub(crate) fn write( |
| sorted_entries: &[multi_index::write::Entry], |
| mut num_large_offsets: usize, |
| out: &mut dyn std::io::Write, |
| ) -> std::io::Result<()> { |
| for offset in sorted_entries |
| .iter() |
| .filter_map(|e| (e.pack_offset > LARGE_OFFSET_THRESHOLD).then_some(e.pack_offset)) |
| { |
| out.write_all(&offset.to_be_bytes())?; |
| num_large_offsets = num_large_offsets |
| .checked_sub(1) |
| .expect("BUG: wrote more offsets the previously found"); |
| } |
| assert_eq!(num_large_offsets, 0, "BUG: wrote less offsets than initially counted"); |
| Ok(()) |
| } |
| |
| /// Return the amount of bytes needed to store the given amount of `large_offsets` |
| pub(crate) fn storage_size(large_offsets: usize) -> u64 { |
| 8 * large_offsets as u64 |
| } |
| } |