blob: 29f5947b37b6bbb8f714775ac4064fe6b2a2ec24 [file] [log] [blame]
use smallvec::SmallVec;
/// An iterator over the ancestors one or more starting commits
pub struct Ancestors<Find, Predicate, StateMut> {
find: Find,
cache: Option<gix_commitgraph::Graph>,
predicate: Predicate,
state: StateMut,
parents: Parents,
sorting: Sorting,
}
/// Specify how to handle commit parents during traversal.
#[derive(Default, Copy, Clone)]
pub enum Parents {
/// Traverse all parents, useful for traversing the entire ancestry.
#[default]
All,
/// Only traverse along the first parent, which commonly ignores all branches.
First,
}
/// Specify how to sort commits during traversal.
///
/// ### Sample History
///
/// The following history will be referred to for explaining how the sort order works, with the number denoting the commit timestamp
/// (*their X-alignment doesn't matter*).
///
/// ```text
/// ---1----2----4----7 <- second parent of 8
/// \ \
/// 3----5----6----8---
/// ```
#[derive(Default, Debug, Copy, Clone)]
pub enum Sorting {
/// Commits are sorted as they are mentioned in the commit graph.
///
/// In the *sample history* the order would be `8, 6, 7, 5, 4, 3, 2, 1`
///
/// ### Note
///
/// This is not to be confused with `git log/rev-list --topo-order`, which is notably different from
/// as it avoids overlapping branches.
#[default]
BreadthFirst,
/// Commits are sorted by their commit time in descending order, that is newest first.
///
/// The sorting applies to all currently queued commit ids and thus is full.
///
/// In the *sample history* the order would be `8, 7, 6, 5, 4, 3, 2, 1`
///
/// # Performance
///
/// This mode benefits greatly from having an object_cache in `find()`
/// to avoid having to lookup each commit twice.
ByCommitTimeNewestFirst,
/// This sorting is similar to `ByCommitTimeNewestFirst`, but adds a cutoff to not return commits older than
/// a given time, stopping the iteration once no younger commits is queued to be traversed.
///
/// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache.
///
/// In the *sample history* and a cut-off date of 4, the returned list of commits would be `8, 7, 6, 4`
ByCommitTimeNewestFirstCutoffOlderThan {
/// The amount of seconds since unix epoch, the same value obtained by any `gix_date::Time` structure and the way git counts time.
seconds: gix_date::SecondsSinceUnixEpoch,
},
}
/// The collection of parent ids we saw as part of the iteration.
///
/// Note that this list is truncated if [`Parents::First`] was used.
pub type ParentIds = SmallVec<[gix_hash::ObjectId; 1]>;
/// Information about a commit that we obtained naturally as part of the iteration.
#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
pub struct Info {
/// The id of the commit.
pub id: gix_hash::ObjectId,
/// All parent ids we have encountered. Note that these will be at most one if [`Parents::First`] is enabled.
pub parent_ids: ParentIds,
/// The time at which the commit was created. It's only `Some(_)` if sorting is not [`Sorting::BreadthFirst`], as the walk
/// needs to require the commit-date.
pub commit_time: Option<gix_date::SecondsSinceUnixEpoch>,
}
///
pub mod ancestors {
use std::{
borrow::{Borrow, BorrowMut},
collections::VecDeque,
};
use gix_date::SecondsSinceUnixEpoch;
use gix_hash::{oid, ObjectId};
use gix_hashtable::HashSet;
use gix_object::CommitRefIter;
use smallvec::SmallVec;
use crate::commit::{collect_parents, Ancestors, Either, Info, ParentIds, Parents, Sorting};
/// The error is part of the item returned by the [Ancestors] iterator.
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("The commit {oid} could not be found")]
FindExisting {
oid: ObjectId,
source: Box<dyn std::error::Error + Send + Sync + 'static>,
},
#[error(transparent)]
ObjectDecode(#[from] gix_object::decode::Error),
}
/// The state used and potentially shared by multiple graph traversals.
#[derive(Clone)]
pub struct State {
next: VecDeque<ObjectId>,
queue: gix_revwalk::PriorityQueue<SecondsSinceUnixEpoch, ObjectId>,
buf: Vec<u8>,
seen: HashSet<ObjectId>,
parents_buf: Vec<u8>,
parent_ids: SmallVec<[(ObjectId, SecondsSinceUnixEpoch); 2]>,
}
impl Default for State {
fn default() -> Self {
State {
next: Default::default(),
queue: gix_revwalk::PriorityQueue::new(),
buf: vec![],
seen: Default::default(),
parents_buf: vec![],
parent_ids: Default::default(),
}
}
}
impl State {
fn clear(&mut self) {
self.next.clear();
self.queue.clear();
self.buf.clear();
self.seen.clear();
}
}
/// Builder
impl<Find, Predicate, StateMut, E> Ancestors<Find, Predicate, StateMut>
where
Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<CommitRefIter<'a>, E>,
StateMut: BorrowMut<State>,
E: std::error::Error + Send + Sync + 'static,
{
/// Set the sorting method, either topological or by author date
pub fn sorting(mut self, sorting: Sorting) -> Result<Self, Error> {
self.sorting = sorting;
match self.sorting {
Sorting::BreadthFirst => {
self.queue_to_vecdeque();
}
Sorting::ByCommitTimeNewestFirst | Sorting::ByCommitTimeNewestFirstCutoffOlderThan { .. } => {
let cutoff_time = self.sorting.cutoff_time();
let state = self.state.borrow_mut();
for commit_id in state.next.drain(..) {
let commit_iter =
(self.find)(&commit_id, &mut state.buf).map_err(|err| Error::FindExisting {
oid: commit_id,
source: err.into(),
})?;
let time = commit_iter.committer()?.time.seconds;
match cutoff_time {
Some(cutoff_time) if time >= cutoff_time => {
state.queue.insert(time, commit_id);
}
Some(_) => {}
None => {
state.queue.insert(time, commit_id);
}
}
}
}
}
Ok(self)
}
/// Change our commit parent handling mode to the given one.
pub fn parents(mut self, mode: Parents) -> Self {
self.parents = mode;
if matches!(self.parents, Parents::First) {
self.queue_to_vecdeque();
}
self
}
/// Set the commitgraph as `cache` to greatly accelerate any traversal.
///
/// The cache will be used if possible, but we will fall-back without error to using the object
/// database for commit lookup. If the cache is corrupt, we will fall back to the object database as well.
pub fn commit_graph(mut self, cache: Option<gix_commitgraph::Graph>) -> Self {
self.cache = cache;
self
}
fn queue_to_vecdeque(&mut self) {
let state = self.state.borrow_mut();
state.next.extend(
std::mem::replace(&mut state.queue, gix_revwalk::PriorityQueue::new())
.into_iter_unordered()
.map(|(_time, id)| id),
);
}
}
/// Initialization
impl<Find, StateMut, E> Ancestors<Find, fn(&oid) -> bool, StateMut>
where
Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<CommitRefIter<'a>, E>,
StateMut: BorrowMut<State>,
E: std::error::Error + Send + Sync + 'static,
{
/// Create a new instance.
///
/// * `find` - a way to lookup new object data during traversal by their `ObjectId`, writing their data into buffer and returning
/// an iterator over commit tokens if the object is present and is a commit. Caching should be implemented within this function
/// as needed.
/// * `state` - all state used for the traversal. If multiple traversals are performed, allocations can be minimized by reusing
/// this state.
/// * `tips`
/// * the starting points of the iteration, usually commits
/// * each commit they lead to will only be returned once, including the tip that started it
pub fn new(tips: impl IntoIterator<Item = impl Into<ObjectId>>, state: StateMut, find: Find) -> Self {
Self::filtered(tips, state, find, |_| true)
}
}
/// Initialization
impl<Find, Predicate, StateMut, E> Ancestors<Find, Predicate, StateMut>
where
Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<CommitRefIter<'a>, E>,
Predicate: FnMut(&oid) -> bool,
StateMut: BorrowMut<State>,
E: std::error::Error + Send + Sync + 'static,
{
/// Create a new instance with commit filtering enabled.
///
/// * `find` - a way to lookup new object data during traversal by their `ObjectId`, writing their data into buffer and returning
/// an iterator over commit tokens if the object is present and is a commit. Caching should be implemented within this function
/// as needed.
/// * `state` - all state used for the traversal. If multiple traversals are performed, allocations can be minimized by reusing
/// this state.
/// * `tips`
/// * the starting points of the iteration, usually commits
/// * each commit they lead to will only be returned once, including the tip that started it
/// * `predicate` - indicate whether a given commit should be included in the result as well
/// as whether its parent commits should be traversed.
pub fn filtered(
tips: impl IntoIterator<Item = impl Into<ObjectId>>,
mut state: StateMut,
find: Find,
mut predicate: Predicate,
) -> Self {
let tips = tips.into_iter();
{
let state = state.borrow_mut();
state.clear();
state.next.reserve(tips.size_hint().0);
for tip in tips.map(Into::into) {
let was_inserted = state.seen.insert(tip);
if was_inserted && predicate(&tip) {
state.next.push_back(tip);
}
}
}
Self {
find,
cache: None,
predicate,
state,
parents: Default::default(),
sorting: Default::default(),
}
}
}
/// Access
impl<Find, Predicate, StateMut> Ancestors<Find, Predicate, StateMut>
where
StateMut: Borrow<State>,
{
/// Return an iterator for accessing more of the current commits data.
pub fn commit_iter(&self) -> CommitRefIter<'_> {
CommitRefIter::from_bytes(&self.state.borrow().buf)
}
/// Return the current commits data.
pub fn commit_data(&self) -> &[u8] {
&self.state.borrow().buf
}
}
impl<Find, Predicate, StateMut, E> Iterator for Ancestors<Find, Predicate, StateMut>
where
Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<CommitRefIter<'a>, E>,
Predicate: FnMut(&oid) -> bool,
StateMut: BorrowMut<State>,
E: std::error::Error + Send + Sync + 'static,
{
type Item = Result<Info, Error>;
fn next(&mut self) -> Option<Self::Item> {
if matches!(self.parents, Parents::First) {
self.next_by_topology()
} else {
match self.sorting {
Sorting::BreadthFirst => self.next_by_topology(),
Sorting::ByCommitTimeNewestFirst => self.next_by_commit_date(None),
Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds } => {
self.next_by_commit_date(seconds.into())
}
}
}
}
}
impl Sorting {
/// If not topo sort, provide the cutoff date if present.
fn cutoff_time(&self) -> Option<SecondsSinceUnixEpoch> {
match self {
Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds } => Some(*seconds),
_ => None,
}
}
}
/// Utilities
impl<Find, Predicate, StateMut, E> Ancestors<Find, Predicate, StateMut>
where
Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<CommitRefIter<'a>, E>,
Predicate: FnMut(&oid) -> bool,
StateMut: BorrowMut<State>,
E: std::error::Error + Send + Sync + 'static,
{
fn next_by_commit_date(
&mut self,
cutoff_older_than: Option<SecondsSinceUnixEpoch>,
) -> Option<Result<Info, Error>> {
let state = self.state.borrow_mut();
let (commit_time, oid) = state.queue.pop()?;
let mut parents: ParentIds = Default::default();
match super::find(self.cache.as_ref(), &mut self.find, &oid, &mut state.buf) {
Ok(Either::CachedCommit(commit)) => {
if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) {
// drop corrupt caches and try again with ODB
self.cache = None;
return self.next_by_commit_date(cutoff_older_than);
}
for (id, parent_commit_time) in state.parent_ids.drain(..) {
parents.push(id);
let was_inserted = state.seen.insert(id);
if !(was_inserted && (self.predicate)(&id)) {
continue;
}
match cutoff_older_than {
Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue,
Some(_) | None => state.queue.insert(parent_commit_time, id),
}
}
}
Ok(Either::CommitRefIter(commit_iter)) => {
for token in commit_iter {
match token {
Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue,
Ok(gix_object::commit::ref_iter::Token::Parent { id }) => {
parents.push(id);
let was_inserted = state.seen.insert(id);
if !(was_inserted && (self.predicate)(&id)) {
continue;
}
let parent = (self.find)(id.as_ref(), &mut state.parents_buf).ok();
let parent_commit_time = parent
.and_then(|parent| parent.committer().ok().map(|committer| committer.time.seconds))
.unwrap_or_default();
match cutoff_older_than {
Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue,
Some(_) | None => state.queue.insert(parent_commit_time, id),
}
}
Ok(_unused_token) => break,
Err(err) => return Some(Err(err.into())),
}
}
}
Err(err) => {
return Some(Err(Error::FindExisting {
oid,
source: err.into(),
}))
}
}
Some(Ok(Info {
id: oid,
parent_ids: parents,
commit_time: Some(commit_time),
}))
}
}
/// Utilities
impl<Find, Predicate, StateMut, E> Ancestors<Find, Predicate, StateMut>
where
Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<CommitRefIter<'a>, E>,
Predicate: FnMut(&oid) -> bool,
StateMut: BorrowMut<State>,
E: std::error::Error + Send + Sync + 'static,
{
fn next_by_topology(&mut self) -> Option<Result<Info, Error>> {
let state = self.state.borrow_mut();
let oid = state.next.pop_front()?;
let mut parents: ParentIds = Default::default();
match super::find(self.cache.as_ref(), &mut self.find, &oid, &mut state.buf) {
Ok(Either::CachedCommit(commit)) => {
if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) {
// drop corrupt caches and try again with ODB
self.cache = None;
return self.next_by_topology();
}
for (id, _commit_time) in state.parent_ids.drain(..) {
parents.push(id);
let was_inserted = state.seen.insert(id);
if was_inserted && (self.predicate)(&id) {
state.next.push_back(id);
}
if matches!(self.parents, Parents::First) {
break;
}
}
}
Ok(Either::CommitRefIter(commit_iter)) => {
for token in commit_iter {
match token {
Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue,
Ok(gix_object::commit::ref_iter::Token::Parent { id }) => {
parents.push(id);
let was_inserted = state.seen.insert(id);
if was_inserted && (self.predicate)(&id) {
state.next.push_back(id);
}
if matches!(self.parents, Parents::First) {
break;
}
}
Ok(_a_token_past_the_parents) => break,
Err(err) => return Some(Err(err.into())),
}
}
}
Err(err) => {
return Some(Err(Error::FindExisting {
oid,
source: err.into(),
}))
}
}
Some(Ok(Info {
id: oid,
parent_ids: parents,
commit_time: None,
}))
}
}
}
enum Either<'buf, 'cache> {
CommitRefIter(gix_object::CommitRefIter<'buf>),
CachedCommit(gix_commitgraph::file::Commit<'cache>),
}
fn collect_parents(
dest: &mut SmallVec<[(gix_hash::ObjectId, gix_date::SecondsSinceUnixEpoch); 2]>,
cache: Option<&gix_commitgraph::Graph>,
parents: gix_commitgraph::file::commit::Parents<'_>,
) -> bool {
dest.clear();
let cache = cache.as_ref().expect("parents iter is available, backed by `cache`");
for parent_id in parents {
match parent_id {
Ok(pos) => dest.push({
let parent = cache.commit_at(pos);
(
parent.id().to_owned(),
parent.committer_timestamp() as gix_date::SecondsSinceUnixEpoch, // we can't handle errors here and trying seems overkill
)
}),
Err(_err) => return false,
}
}
true
}
fn find<'cache, 'buf, Find, E>(
cache: Option<&'cache gix_commitgraph::Graph>,
mut find: Find,
id: &gix_hash::oid,
buf: &'buf mut Vec<u8>,
) -> Result<Either<'buf, 'cache>, E>
where
Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec<u8>) -> Result<gix_object::CommitRefIter<'a>, E>,
E: std::error::Error + Send + Sync + 'static,
{
match cache.and_then(|cache| cache.commit_by_id(id).map(Either::CachedCommit)) {
Some(c) => Ok(c),
None => find(id, buf).map(Either::CommitRefIter),
}
}