vendor/gix-index-0.33.1/src/lib.rs - toolchain/rustc - Git at Google

 //! ## Feature Flags
 #![cfg_attr(
     all(doc, feature = "document-features"),
     doc = ::document_features::document_features!()
 )]
 #![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg, doc_auto_cfg))]
 #![deny(unsafe_code, missing_docs, rust_2018_idioms)]

 use bstr::{BStr, ByteSlice};
 use std::{ops::Range, path::PathBuf};

 use filetime::FileTime;
 /// `gix_hash` is made available as it's part of the public API in various places.
 pub use gix_hash as hash;
 /// A re-export to allow calling [`State::from_tree()`].
 pub use gix_validate as validate;

 ///
 #[allow(clippy::empty_docs)]
 pub mod file;

 ///
 #[allow(clippy::empty_docs)]
 pub mod extension;

 ///
 #[allow(clippy::empty_docs)]
 pub mod entry;

 mod access;

 ///
 #[allow(clippy::empty_docs)]
 pub mod init;

 ///
 #[allow(clippy::empty_docs)]
 pub mod decode;

 ///
 #[allow(clippy::empty_docs)]
 pub mod verify;

 ///
 #[allow(clippy::empty_docs)]
 pub mod write;

 pub mod fs;

 /// All known versions of a git index file.
 #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 pub enum Version {
     /// Supports entries and various extensions.
     V2 = 2,
     /// Adds support for additional flags for each entry, called extended entries.
     V3 = 3,
     /// Supports deltified entry paths.
     V4 = 4,
 }

 /// An entry in the index, identifying a non-tree item on disk.
 #[derive(Debug, Clone, Eq, PartialEq)]
 pub struct Entry {
     /// The filesystem stat information for the file on disk.
     pub stat: entry::Stat,
     /// The object id for this entry's ODB representation (assuming it's up-to-date with it).
     pub id: gix_hash::ObjectId,
     /// Additional flags for use in algorithms and for efficiently storing stage information.
     pub flags: entry::Flags,
     /// The kind of item this entry represents - it's not all blobs in the index anymore.
     pub mode: entry::Mode,
     /// The range to lookup in the path backing to obtain the entry path relative to the repository.
     /// This costs additional memory but is probably worth it given that paths can stay in one big allocation.
     path: Range<usize>,
 }

 /// An index file whose state was read from a file on disk.
 #[derive(Clone)]
 pub struct File {
     /// The state containing the actual index data.
     pub(crate) state: State,
     /// The path from which the index was read or to which it is supposed to be written.
     pub(crate) path: PathBuf,
     /// The checksum of all bytes prior to the checksum itself.
     pub(crate) checksum: Option<gix_hash::ObjectId>,
 }

 /// The type to use and store paths to all entries.
 pub type PathStorage = Vec<u8>;
 /// The type to use and store paths to all entries, as reference
 pub type PathStorageRef = [u8];

 struct DirEntry<'a> {
     /// The first entry in the directory
     entry: &'a Entry,
     /// One past the last byte of the directory in the path-backing
     dir_end: usize,
 }

 impl DirEntry<'_> {
     fn path<'a>(&self, state: &'a State) -> &'a BStr {
         let range = self.entry.path.start..self.dir_end;
         state.path_backing[range].as_bstr()
     }
 }

 /// A backing store for accelerating lookups of entries in a case-sensitive and case-insensitive manner.
 pub struct AccelerateLookup<'a> {
     /// The entries themselves, hashed by their full icase path.
     /// Icase-clashes are handled in order of occurrence and are all available for iteration.
     icase_entries: hashbrown::HashTable<&'a Entry>,
     /// Each hash in this table corresponds to a directory containing one or more entries.
     icase_dirs: hashbrown::HashTable<DirEntry<'a>>,
 }

 /// An in-memory cache of a fully parsed git index file.
 ///
 /// As opposed to a snapshot, it's meant to be altered and eventually be written back to disk or converted into a tree.
 /// We treat index and its state synonymous.
 ///
 /// # A note on safety
 ///
 /// An index (i.e. [`State`]) created by hand is not guaranteed to have valid entry paths as they are entirely controlled
 /// by the caller, without applying any level of validation.
 ///
 /// This means that before using these paths to recreate files on disk, *they must be validated*.
 ///
 /// It's notable that it's possible to manufacture tree objects which contain names like `.git/hooks/pre-commit`
 /// which then will look like `.git/hooks/pre-commit` in the index, which doesn't care that the name came from a single
 /// tree instead of from trees named `.git`, `hooks` and a blob named `pre-commit`. The effect is still the same - an invalid
 /// path is presented in the index and its consumer must validate each path component before usage.
 ///
 /// It's recommended to do that using `gix_worktree::Stack` which has it built-in if it's created `for_checkout()`. Alternatively
 /// one can validate component names with `gix_validate::path::component()`.
 #[derive(Clone)]
 pub struct State {
     /// The kind of object hash used when storing the underlying file.
     ///
     /// Empty states for example won't have a single object id, so deduction of the hash used isn't always possible.
     object_hash: gix_hash::Kind,
     /// The time at which the state was created, indicating its freshness compared to other files on disk.
     ///
     /// Note that on platforms that only have a precisions of a second for this time, we will treat all entries with the
     /// same timestamp as this as potentially changed, checking more thoroughly if a change actually happened.
     timestamp: FileTime,
     version: Version,
     entries: Vec<Entry>,
     /// A memory area keeping all index paths, in full length, independently of the index version.
     ///
     /// Ranges into this storage are referred to by parts of `entries`.
     path_backing: PathStorage,
     /// True if one entry in the index has a special marker mode
     is_sparse: bool,

     // Extensions
     end_of_index_at_decode_time: bool,
     offset_table_at_decode_time: bool,
     tree: Option<extension::Tree>,
     link: Option<extension::Link>,
     resolve_undo: Option<extension::resolve_undo::Paths>,
     untracked: Option<extension::UntrackedCache>,
     fs_monitor: Option<extension::FsMonitor>,
 }

 mod impls {
     use crate::entry::Stage;
     use std::fmt::{Debug, Formatter};

     use crate::State;

     impl Debug for State {
         fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
             for entry in &self.entries {
                 writeln!(
                     f,
                     "{} {}{:?} {} {}",
                     match entry.flags.stage() {
                         Stage::Unconflicted => "       ",
                         Stage::Base => "BASE   ",
                         Stage::Ours => "OURS   ",
                         Stage::Theirs => "THEIRS ",
                     },
                     if entry.flags.is_empty() {
                         "".to_string()
                     } else {
                         format!("{:?} ", entry.flags)
                     },
                     entry.mode,
                     entry.id,
                     entry.path(self)
                 )?;
             }
             Ok(())
         }
     }
 }

 pub(crate) mod util {
     #[inline]
     pub fn var_int(data: &[u8]) -> Option<(u64, &[u8])> {
         let (num, consumed) = gix_features::decode::leb64_from_read(data).ok()?;
         let data = &data[consumed..];
         (num, data).into()
     }

     #[inline]
     pub fn read_u32(data: &[u8]) -> Option<(u32, &[u8])> {
         split_at_pos(data, 4).map(|(num, data)| (u32::from_be_bytes(num.try_into().unwrap()), data))
     }

     #[inline]
     pub fn read_u64(data: &[u8]) -> Option<(u64, &[u8])> {
         split_at_pos(data, 8).map(|(num, data)| (u64::from_be_bytes(num.try_into().unwrap()), data))
     }

     #[inline]
     pub fn from_be_u32(b: &[u8]) -> u32 {
         u32::from_be_bytes(b.try_into().unwrap())
     }

     #[inline]
     pub fn split_at_byte_exclusive(data: &[u8], byte: u8) -> Option<(&[u8], &[u8])> {
         if data.len() < 2 {
             return None;
         }
         data.iter().enumerate().find_map(|(idx, b)| {
             (*b == byte).then(|| {
                 if idx == 0 {
                     (&[] as &[u8], &data[1..])
                 } else {
                     let (a, b) = data.split_at(idx);
                     (a, &b[1..])
                 }
             })
         })
     }

     #[inline]
     pub fn split_at_pos(data: &[u8], pos: usize) -> Option<(&[u8], &[u8])> {
         if data.len() < pos {
             return None;
         }
         data.split_at(pos).into()
     }
 }

 #[test]
 fn size_of_entry() {
     assert_eq!(std::mem::size_of::<crate::Entry>(), 80);

     // the reason we have our own time is half the size.
     assert_eq!(std::mem::size_of::<crate::entry::stat::Time>(), 8);
     assert_eq!(std::mem::size_of::<filetime::FileTime>(), 16);
 }
	//! ## Feature Flags
	#![cfg_attr(
	all(doc, feature = "document-features"),
	doc = ::document_features::document_features!()
	)]
	#![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg, doc_auto_cfg))]
	#![deny(unsafe_code, missing_docs, rust_2018_idioms)]

	use bstr::{BStr, ByteSlice};
	use std::{ops::Range, path::PathBuf};

	use filetime::FileTime;
	/// `gix_hash` is made available as it's part of the public API in various places.
	pub use gix_hash as hash;
	/// A re-export to allow calling [`State::from_tree()`].
	pub use gix_validate as validate;

	///
	#[allow(clippy::empty_docs)]
	pub mod file;

	///
	#[allow(clippy::empty_docs)]
	pub mod extension;

	///
	#[allow(clippy::empty_docs)]
	pub mod entry;

	mod access;

	///
	#[allow(clippy::empty_docs)]
	pub mod init;

	///
	#[allow(clippy::empty_docs)]
	pub mod decode;

	///
	#[allow(clippy::empty_docs)]
	pub mod verify;

	///
	#[allow(clippy::empty_docs)]
	pub mod write;

	pub mod fs;

	/// All known versions of a git index file.
	#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
	#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
	pub enum Version {
	/// Supports entries and various extensions.
	V2 = 2,
	/// Adds support for additional flags for each entry, called extended entries.
	V3 = 3,
	/// Supports deltified entry paths.
	V4 = 4,
	}

	/// An entry in the index, identifying a non-tree item on disk.
	#[derive(Debug, Clone, Eq, PartialEq)]
	pub struct Entry {
	/// The filesystem stat information for the file on disk.
	pub stat: entry::Stat,
	/// The object id for this entry's ODB representation (assuming it's up-to-date with it).
	pub id: gix_hash::ObjectId,
	/// Additional flags for use in algorithms and for efficiently storing stage information.
	pub flags: entry::Flags,
	/// The kind of item this entry represents - it's not all blobs in the index anymore.
	pub mode: entry::Mode,
	/// The range to lookup in the path backing to obtain the entry path relative to the repository.
	/// This costs additional memory but is probably worth it given that paths can stay in one big allocation.
	path: Range<usize>,
	}

	/// An index file whose state was read from a file on disk.
	#[derive(Clone)]
	pub struct File {
	/// The state containing the actual index data.
	pub(crate) state: State,
	/// The path from which the index was read or to which it is supposed to be written.
	pub(crate) path: PathBuf,
	/// The checksum of all bytes prior to the checksum itself.
	pub(crate) checksum: Option<gix_hash::ObjectId>,
	}

	/// The type to use and store paths to all entries.
	pub type PathStorage = Vec<u8>;
	/// The type to use and store paths to all entries, as reference
	pub type PathStorageRef = [u8];

	struct DirEntry<'a> {
	/// The first entry in the directory
	entry: &'a Entry,
	/// One past the last byte of the directory in the path-backing
	dir_end: usize,
	}

	impl DirEntry<'_> {
	fn path<'a>(&self, state: &'a State) -> &'a BStr {
	let range = self.entry.path.start..self.dir_end;
	state.path_backing[range].as_bstr()
	}
	}

	/// A backing store for accelerating lookups of entries in a case-sensitive and case-insensitive manner.
	pub struct AccelerateLookup<'a> {
	/// The entries themselves, hashed by their full icase path.
	/// Icase-clashes are handled in order of occurrence and are all available for iteration.
	icase_entries: hashbrown::HashTable<&'a Entry>,
	/// Each hash in this table corresponds to a directory containing one or more entries.
	icase_dirs: hashbrown::HashTable<DirEntry<'a>>,
	}

	/// An in-memory cache of a fully parsed git index file.
	///
	/// As opposed to a snapshot, it's meant to be altered and eventually be written back to disk or converted into a tree.
	/// We treat index and its state synonymous.
	///
	/// # A note on safety
	///
	/// An index (i.e. [`State`]) created by hand is not guaranteed to have valid entry paths as they are entirely controlled
	/// by the caller, without applying any level of validation.
	///
	/// This means that before using these paths to recreate files on disk, they must be validated.
	///
	/// It's notable that it's possible to manufacture tree objects which contain names like `.git/hooks/pre-commit`
	/// which then will look like `.git/hooks/pre-commit` in the index, which doesn't care that the name came from a single
	/// tree instead of from trees named `.git`, `hooks` and a blob named `pre-commit`. The effect is still the same - an invalid
	/// path is presented in the index and its consumer must validate each path component before usage.
	///
	/// It's recommended to do that using `gix_worktree::Stack` which has it built-in if it's created `for_checkout()`. Alternatively
	/// one can validate component names with `gix_validate::path::component()`.
	#[derive(Clone)]
	pub struct State {
	/// The kind of object hash used when storing the underlying file.
	///
	/// Empty states for example won't have a single object id, so deduction of the hash used isn't always possible.
	object_hash: gix_hash::Kind,
	/// The time at which the state was created, indicating its freshness compared to other files on disk.
	///
	/// Note that on platforms that only have a precisions of a second for this time, we will treat all entries with the
	/// same timestamp as this as potentially changed, checking more thoroughly if a change actually happened.
	timestamp: FileTime,
	version: Version,
	entries: Vec<Entry>,
	/// A memory area keeping all index paths, in full length, independently of the index version.
	///
	/// Ranges into this storage are referred to by parts of `entries`.
	path_backing: PathStorage,
	/// True if one entry in the index has a special marker mode
	is_sparse: bool,

	// Extensions
	end_of_index_at_decode_time: bool,
	offset_table_at_decode_time: bool,
	tree: Option<extension::Tree>,
	link: Option<extension::Link>,
	resolve_undo: Option<extension::resolve_undo::Paths>,
	untracked: Option<extension::UntrackedCache>,
	fs_monitor: Option<extension::FsMonitor>,
	}

	mod impls {
	use crate::entry::Stage;
	use std::fmt::{Debug, Formatter};

	use crate::State;

	impl Debug for State {
	fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
	for entry in &self.entries {
	writeln!(
	f,
	"{} {}{:?} {} {}",
	match entry.flags.stage() {
	Stage::Unconflicted => " ",
	Stage::Base => "BASE ",
	Stage::Ours => "OURS ",
	Stage::Theirs => "THEIRS ",
	},
	if entry.flags.is_empty() {
	"".to_string()
	} else {
	format!("{:?} ", entry.flags)
	},
	entry.mode,
	entry.id,
	entry.path(self)
	)?;
	}
	Ok(())
	}
	}
	}

	pub(crate) mod util {
	#[inline]
	pub fn var_int(data: &[u8]) -> Option<(u64, &[u8])> {
	let (num, consumed) = gix_features::decode::leb64_from_read(data).ok()?;
	let data = &data[consumed..];
	(num, data).into()
	}

	#[inline]
	pub fn read_u32(data: &[u8]) -> Option<(u32, &[u8])> {
	split_at_pos(data, 4).map(\|(num, data)\| (u32::from_be_bytes(num.try_into().unwrap()), data))
	}

	#[inline]
	pub fn read_u64(data: &[u8]) -> Option<(u64, &[u8])> {
	split_at_pos(data, 8).map(\|(num, data)\| (u64::from_be_bytes(num.try_into().unwrap()), data))
	}

	#[inline]
	pub fn from_be_u32(b: &[u8]) -> u32 {
	u32::from_be_bytes(b.try_into().unwrap())
	}

	#[inline]
	pub fn split_at_byte_exclusive(data: &[u8], byte: u8) -> Option<(&[u8], &[u8])> {
	if data.len() < 2 {
	return None;
	}
	data.iter().enumerate().find_map(\|(idx, b)\| {
	(*b == byte).then(\|\| {
	if idx == 0 {
	(&[] as &[u8], &data[1..])
	} else {
	let (a, b) = data.split_at(idx);
	(a, &b[1..])
	}
	})
	})
	}

	#[inline]
	pub fn split_at_pos(data: &[u8], pos: usize) -> Option<(&[u8], &[u8])> {
	if data.len() < pos {
	return None;
	}
	data.split_at(pos).into()
	}
	}

	#[test]
	fn size_of_entry() {
	assert_eq!(std::mem::size_of::<crate::Entry>(), 80);

	// the reason we have our own time is half the size.
	assert_eq!(std::mem::size_of::<crate::entry::stat::Time>(), 8);
	assert_eq!(std::mem::size_of::<filetime::FileTime>(), 16);
	}