vendor/gix-commitgraph-0.24.3/src/verify.rs - toolchain/rustc - Git at Google

 //! Auxiliary types used by graph verification methods.
 use std::{
     cmp::{max, min},
     collections::BTreeMap,
     path::PathBuf,
 };

 use crate::{
     file::{self, commit},
     Graph, Position, GENERATION_NUMBER_MAX,
 };

 /// The error used in [`verify_integrity()`][Graph::verify_integrity].
 #[derive(thiserror::Error, Debug)]
 #[allow(missing_docs)]
 pub enum Error<E: std::error::Error + 'static> {
     #[error("'{}' should have {expected} base graphs, but claims {actual} base graphs", .path.display())]
     BaseGraphCount { actual: u8, expected: u8, path: PathBuf },
     #[error("'{}' base graph at index {index} should have ID {expected} but is {actual}", .path.display())]
     BaseGraphId {
         actual: gix_hash::ObjectId,
         expected: gix_hash::ObjectId,
         index: u8,
         path: PathBuf,
     },
     #[error(transparent)]
     Commit(#[from] commit::Error),
     #[error("{}: {err}", .path.display())]
     File {
         // Use zero-size error type. We will never return
         // `graph::verify::Error::File(file::verify::Error::Processor(...))`, because we are the
         // file's processor, and we convert`file::verify::Error::Processor<graph::verify::Error>`
         // variants into direct `graph::verify::Error` values.
         err: file::verify::Error<std::convert::Infallible>,
         path: PathBuf,
     },
     #[error("Commit {id}'s generation should be {expected} but is {actual}")]
     Generation {
         actual: u32,
         expected: u32,
         id: gix_hash::ObjectId,
     },
     #[error(
         "Commit {id} has parent position {parent_pos} that is out of range (should be in range 0-{max_valid_pos})"
     )]
     ParentOutOfRange {
         id: gix_hash::ObjectId,
         max_valid_pos: Position,
         parent_pos: Position,
     },
     #[error("{0}")]
     Processor(#[source] E),
     #[error("Commit-graph should be composed of at most 256 files but actually contains {0} files")]
     TooManyFiles(usize),
 }

 /// Statistics gathered while verifying the integrity of the graph as returned by [`Graph::verify_integrity()`].
 #[derive(Clone, Debug, Eq, PartialEq)]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
 pub struct Outcome {
     /// The length of the longest path between any two commits in this graph.
     ///
     /// For example, this will be `Some(9)` for a commit graph containing 10 linear commits.
     /// This will be `Some(0)` for a commit graph containing 0 or 1 commits.
     /// If the longest path length is too large to fit in a [u32], then this will be [None].
     pub longest_path_length: Option<u32>,
     /// The total number of commits traversed.
     pub num_commits: u32,
     /// A mapping of `N -> number of commits with N parents`.
     pub parent_counts: BTreeMap<u32, u32>,
 }

 impl Graph {
     /// Traverse all commits in the graph and call `processor(&commit) -> Result<(), E>` on it while verifying checksums.
     ///
     /// When `processor` returns an error, the entire verification is stopped and the error returned.
     pub fn verify_integrity<E>(
         &self,
         mut processor: impl FnMut(&file::Commit<'_>) -> Result<(), E>,
     ) -> Result<Outcome, Error<E>>
     where
         E: std::error::Error + 'static,
     {
         if self.files.len() > 256 {
             // A file in a split chain can only have up to 255 base files.
             return Err(Error::TooManyFiles(self.files.len()));
         }

         let mut stats = Outcome {
             longest_path_length: None,
             num_commits: 0,
             parent_counts: BTreeMap::new(),
         };
         let mut max_generation = 0u32;

         // TODO: Detect duplicate commit IDs across different files. Not sure how to do this without
         //   a separate loop, e.g. self.iter_sorted_ids().

         let mut file_start_pos = Position(0);
         for (file_index, file) in self.files.iter().enumerate() {
             if usize::from(file.base_graph_count()) != file_index {
                 return Err(Error::BaseGraphCount {
                     actual: file.base_graph_count(),
                     expected: file_index
                         .try_into()
                         .expect("files.len() check to protect against this"),
                     path: file.path().to_owned(),
                 });
             }

             for (base_graph_index, (expected, actual)) in self.files[..file_index]
                 .iter()
                 .map(crate::File::checksum)
                 .zip(file.iter_base_graph_ids())
                 .enumerate()
             {
                 if actual != expected {
                     return Err(Error::BaseGraphId {
                         actual: actual.into(),
                         expected: expected.into(),
                         index: base_graph_index
                             .try_into()
                             .expect("files.len() check to protect against this"),
                         path: file.path().to_owned(),
                     });
                 }
             }

             let next_file_start_pos = Position(file_start_pos.0 + file.num_commits());
             let file_stats = file
                 .traverse(|commit| {
                     let mut max_parent_generation = 0u32;
                     for parent_pos in commit.iter_parents() {
                         let parent_pos = parent_pos.map_err(Error::Commit)?;
                         if parent_pos >= next_file_start_pos {
                             return Err(Error::ParentOutOfRange {
                                 parent_pos,
                                 id: commit.id().into(),
                                 max_valid_pos: Position(next_file_start_pos.0 - 1),
                             });
                         }
                         let parent = self.commit_at(parent_pos);
                         max_parent_generation = max(max_parent_generation, parent.generation());
                     }

                     // If the max parent generation is GENERATION_NUMBER_MAX, then this commit's
                     // generation should be GENERATION_NUMBER_MAX too.
                     let expected_generation = min(max_parent_generation + 1, GENERATION_NUMBER_MAX);
                     if commit.generation() != expected_generation {
                         return Err(Error::Generation {
                             actual: commit.generation(),
                             expected: expected_generation,
                             id: commit.id().into(),
                         });
                     }

                     processor(commit).map_err(Error::Processor)?;

                     Ok(())
                 })
                 .map_err(|err| Error::File {
                     err: match err {
                         file::verify::Error::Processor(e) => return e,
                         file::verify::Error::RootTreeId { id, root_tree_id } => {
                             file::verify::Error::RootTreeId { id, root_tree_id }
                         }
                         file::verify::Error::Mismatch { actual, expected } => {
                             file::verify::Error::Mismatch { actual, expected }
                         }
                         file::verify::Error::Generation { generation, id } => {
                             file::verify::Error::Generation { generation, id }
                         }
                         file::verify::Error::Filename(expected) => file::verify::Error::Filename(expected),
                         file::verify::Error::Commit(err) => file::verify::Error::Commit(err),
                         file::verify::Error::CommitId { id, pos } => file::verify::Error::CommitId { id, pos },
                         file::verify::Error::CommitsOutOfOrder {
                             id,
                             pos,
                             predecessor_id,
                         } => file::verify::Error::CommitsOutOfOrder {
                             id,
                             pos,
                             predecessor_id,
                         },
                     },
                     path: file.path().to_owned(),
                 })?;

             max_generation = max(max_generation, file_stats.max_generation);
             stats.num_commits += file_stats.num_commits;
             for (key, value) in file_stats.parent_counts.into_iter() {
                 *stats.parent_counts.entry(key).or_insert(0) += value;
             }
             file_start_pos = next_file_start_pos;
         }

         stats.longest_path_length = if max_generation < GENERATION_NUMBER_MAX {
             Some(max_generation.saturating_sub(1))
         } else {
             None
         };
         Ok(stats)
     }
 }
	//! Auxiliary types used by graph verification methods.
	use std::{
	cmp::{max, min},
	collections::BTreeMap,
	path::PathBuf,
	};

	use crate::{
	file::{self, commit},
	Graph, Position, GENERATION_NUMBER_MAX,
	};

	/// The error used in [`verify_integrity()`][Graph::verify_integrity].
	#[derive(thiserror::Error, Debug)]
	#[allow(missing_docs)]
	pub enum Error<E: std::error::Error + 'static> {
	#[error("'{}' should have {expected} base graphs, but claims {actual} base graphs", .path.display())]
	BaseGraphCount { actual: u8, expected: u8, path: PathBuf },
	#[error("'{}' base graph at index {index} should have ID {expected} but is {actual}", .path.display())]
	BaseGraphId {
	actual: gix_hash::ObjectId,
	expected: gix_hash::ObjectId,
	index: u8,
	path: PathBuf,
	},
	#[error(transparent)]
	Commit(#[from] commit::Error),
	#[error("{}: {err}", .path.display())]
	File {
	// Use zero-size error type. We will never return
	// `graph::verify::Error::File(file::verify::Error::Processor(...))`, because we are the
	// file's processor, and we convert`file::verify::Error::Processor<graph::verify::Error>`
	// variants into direct `graph::verify::Error` values.
	err: file::verify::Error<std::convert::Infallible>,
	path: PathBuf,
	},
	#[error("Commit {id}'s generation should be {expected} but is {actual}")]
	Generation {
	actual: u32,
	expected: u32,
	id: gix_hash::ObjectId,
	},
	#[error(
	"Commit {id} has parent position {parent_pos} that is out of range (should be in range 0-{max_valid_pos})"
	)]
	ParentOutOfRange {
	id: gix_hash::ObjectId,
	max_valid_pos: Position,
	parent_pos: Position,
	},
	#[error("{0}")]
	Processor(#[source] E),
	#[error("Commit-graph should be composed of at most 256 files but actually contains {0} files")]
	TooManyFiles(usize),
	}

	/// Statistics gathered while verifying the integrity of the graph as returned by [`Graph::verify_integrity()`].
	#[derive(Clone, Debug, Eq, PartialEq)]
	#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
	pub struct Outcome {
	/// The length of the longest path between any two commits in this graph.
	///
	/// For example, this will be `Some(9)` for a commit graph containing 10 linear commits.
	/// This will be `Some(0)` for a commit graph containing 0 or 1 commits.
	/// If the longest path length is too large to fit in a [u32], then this will be [None].
	pub longest_path_length: Option<u32>,
	/// The total number of commits traversed.
	pub num_commits: u32,
	/// A mapping of `N -> number of commits with N parents`.
	pub parent_counts: BTreeMap<u32, u32>,
	}

	impl Graph {
	/// Traverse all commits in the graph and call `processor(&commit) -> Result<(), E>` on it while verifying checksums.
	///
	/// When `processor` returns an error, the entire verification is stopped and the error returned.
	pub fn verify_integrity<E>(
	&self,
	mut processor: impl FnMut(&file::Commit<'_>) -> Result<(), E>,
	) -> Result<Outcome, Error<E>>
	where
	E: std::error::Error + 'static,
	{
	if self.files.len() > 256 {
	// A file in a split chain can only have up to 255 base files.
	return Err(Error::TooManyFiles(self.files.len()));
	}

	let mut stats = Outcome {
	longest_path_length: None,
	num_commits: 0,
	parent_counts: BTreeMap::new(),
	};
	let mut max_generation = 0u32;

	// TODO: Detect duplicate commit IDs across different files. Not sure how to do this without
	// a separate loop, e.g. self.iter_sorted_ids().

	let mut file_start_pos = Position(0);
	for (file_index, file) in self.files.iter().enumerate() {
	if usize::from(file.base_graph_count()) != file_index {
	return Err(Error::BaseGraphCount {
	actual: file.base_graph_count(),
	expected: file_index
	.try_into()
	.expect("files.len() check to protect against this"),
	path: file.path().to_owned(),
	});
	}

	for (base_graph_index, (expected, actual)) in self.files[..file_index]
	.iter()
	.map(crate::File::checksum)
	.zip(file.iter_base_graph_ids())
	.enumerate()
	{
	if actual != expected {
	return Err(Error::BaseGraphId {
	actual: actual.into(),
	expected: expected.into(),
	index: base_graph_index
	.try_into()
	.expect("files.len() check to protect against this"),
	path: file.path().to_owned(),
	});
	}
	}

	let next_file_start_pos = Position(file_start_pos.0 + file.num_commits());
	let file_stats = file
	.traverse(\|commit\| {
	let mut max_parent_generation = 0u32;
	for parent_pos in commit.iter_parents() {
	let parent_pos = parent_pos.map_err(Error::Commit)?;
	if parent_pos >= next_file_start_pos {
	return Err(Error::ParentOutOfRange {
	parent_pos,
	id: commit.id().into(),
	max_valid_pos: Position(next_file_start_pos.0 - 1),
	});
	}
	let parent = self.commit_at(parent_pos);
	max_parent_generation = max(max_parent_generation, parent.generation());
	}

	// If the max parent generation is GENERATION_NUMBER_MAX, then this commit's
	// generation should be GENERATION_NUMBER_MAX too.
	let expected_generation = min(max_parent_generation + 1, GENERATION_NUMBER_MAX);
	if commit.generation() != expected_generation {
	return Err(Error::Generation {
	actual: commit.generation(),
	expected: expected_generation,
	id: commit.id().into(),
	});
	}

	processor(commit).map_err(Error::Processor)?;

	Ok(())
	})
	.map_err(\|err\| Error::File {
	err: match err {
	file::verify::Error::Processor(e) => return e,
	file::verify::Error::RootTreeId { id, root_tree_id } => {
	file::verify::Error::RootTreeId { id, root_tree_id }
	}
	file::verify::Error::Mismatch { actual, expected } => {
	file::verify::Error::Mismatch { actual, expected }
	}
	file::verify::Error::Generation { generation, id } => {
	file::verify::Error::Generation { generation, id }
	}
	file::verify::Error::Filename(expected) => file::verify::Error::Filename(expected),
	file::verify::Error::Commit(err) => file::verify::Error::Commit(err),
	file::verify::Error::CommitId { id, pos } => file::verify::Error::CommitId { id, pos },
	file::verify::Error::CommitsOutOfOrder {
	id,
	pos,
	predecessor_id,
	} => file::verify::Error::CommitsOutOfOrder {
	id,
	pos,
	predecessor_id,
	},
	},
	path: file.path().to_owned(),
	})?;

	max_generation = max(max_generation, file_stats.max_generation);
	stats.num_commits += file_stats.num_commits;
	for (key, value) in file_stats.parent_counts.into_iter() {
	*stats.parent_counts.entry(key).or_insert(0) += value;
	}
	file_start_pos = next_file_start_pos;
	}

	stats.longest_path_length = if max_generation < GENERATION_NUMBER_MAX {
	Some(max_generation.saturating_sub(1))
	} else {
	None
	};
	Ok(stats)
	}
	}