blob: 6e7e0c9e7c39ebd10616e8a3db36ba2cbfd56d08 [file] [log] [blame]
/// An iterator over the ancestors one or more starting commits
pub struct Ancestors<Find, Predicate, StateMut> {
find: Find,
predicate: Predicate,
state: StateMut,
parents: Parents,
sorting: Sorting,
}
/// Specify how to handle commit parents during traversal.
#[derive(Default, Copy, Clone)]
pub enum Parents {
/// Traverse all parents, useful for traversing the entire ancestry.
#[default]
All,
/// Only traverse along the first parent, which commonly ignores all branches.
First,
}
/// Specify how to sort commits during traversal.
#[derive(Default, Debug, Copy, Clone)]
pub enum Sorting {
/// Commits are sorted as they are mentioned in the commit graph.
#[default]
Topological,
/// Commits are sorted by their commit time in descending order, that is newest first.
///
/// The sorting applies to all currently queued commit ids and thus is full.
///
/// # Performance
///
/// This mode benefits greatly from having an object_cache in `find()`
/// to avoid having to lookup each commit twice.
ByCommitTimeNewestFirst,
/// This sorting is similar to `ByCommitTimeNewestFirst`, but adds a cutoff to not return commits older than
/// a given time, stopping the iteration once no younger commits is queued to be traversed.
///
/// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache.
ByCommitTimeNewestFirstCutoffOlderThan {
/// The amount of seconds since unix epoch, the same value obtained by any `gix_date::Time` structure and the way git counts time.
time_in_seconds_since_epoch: u32,
},
}
///
pub mod ancestors {
use std::{
borrow::{Borrow, BorrowMut},
collections::VecDeque,
iter::FromIterator,
};
use gix_hash::{oid, ObjectId};
use gix_hashtable::HashSet;
use gix_object::CommitRefIter;
use crate::commit::{Ancestors, Parents, Sorting};
/// The error is part of the item returned by the [Ancestors] iterator.
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("The commit {oid} could not be found")]
FindExisting {
oid: ObjectId,
source: Box<dyn std::error::Error + Send + Sync + 'static>,
},
#[error(transparent)]
ObjectDecode(#[from] gix_object::decode::Error),
}
type TimeInSeconds = u32;
/// The state used and potentially shared by multiple graph traversals.
#[derive(Default, Clone)]
pub struct State {
next: VecDeque<(ObjectId, TimeInSeconds)>,
buf: Vec<u8>,
seen: HashSet<ObjectId>,
parents_buf: Vec<u8>,
}
impl State {
fn clear(&mut self) {
self.next.clear();
self.buf.clear();
self.seen.clear();
}
}
/// Builder
impl<Find, Predicate, StateMut> Ancestors<Find, Predicate, StateMut> {
/// Change our commit parent handling mode to the given one.
pub fn parents(mut self, mode: Parents) -> Self {
self.parents = mode;
self
}
}
/// Builder
impl<Find, Predicate, StateMut, E> Ancestors<Find, Predicate, StateMut>
where
Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<CommitRefIter<'a>, E>,
StateMut: BorrowMut<State>,
E: std::error::Error + Send + Sync + 'static,
{
/// Set the sorting method, either topological or by author date
pub fn sorting(mut self, sorting: Sorting) -> Result<Self, Error> {
self.sorting = sorting;
if !matches!(self.sorting, Sorting::Topological) {
let mut cutoff_time_storage = self.sorting.cutoff_time().map(|cot| (cot, Vec::new()));
let state = self.state.borrow_mut();
for (commit_id, commit_time) in state.next.iter_mut() {
let commit_iter = (self.find)(commit_id, &mut state.buf).map_err(|err| Error::FindExisting {
oid: *commit_id,
source: err.into(),
})?;
let time = commit_iter.committer()?.time.seconds_since_unix_epoch;
match &mut cutoff_time_storage {
Some((cutoff_time, storage)) if time >= *cutoff_time => {
storage.push((*commit_id, time));
}
Some(_) => {}
None => *commit_time = time,
}
}
let mut v = match cutoff_time_storage {
Some((_, storage)) => storage,
None => Vec::from_iter(std::mem::take(&mut state.next).into_iter()),
};
v.sort_by(|a, b| a.1.cmp(&b.1).reverse());
state.next = v.into();
}
Ok(self)
}
}
/// Initialization
impl<Find, StateMut, E> Ancestors<Find, fn(&oid) -> bool, StateMut>
where
Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<CommitRefIter<'a>, E>,
StateMut: BorrowMut<State>,
E: std::error::Error + Send + Sync + 'static,
{
/// Create a new instance.
///
/// * `find` - a way to lookup new object data during traversal by their ObjectId, writing their data into buffer and returning
/// an iterator over commit tokens if the object is present and is a commit. Caching should be implemented within this function
/// as needed.
/// * `state` - all state used for the traversal. If multiple traversals are performed, allocations can be minimized by reusing
/// this state.
/// * `tips`
/// * the starting points of the iteration, usually commits
/// * each commit they lead to will only be returned once, including the tip that started it
pub fn new(tips: impl IntoIterator<Item = impl Into<ObjectId>>, state: StateMut, find: Find) -> Self {
Self::filtered(tips, state, find, |_| true)
}
}
/// Initialization
impl<Find, Predicate, StateMut, E> Ancestors<Find, Predicate, StateMut>
where
Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<CommitRefIter<'a>, E>,
Predicate: FnMut(&oid) -> bool,
StateMut: BorrowMut<State>,
E: std::error::Error + Send + Sync + 'static,
{
/// Create a new instance with commit filtering enabled.
///
/// * `find` - a way to lookup new object data during traversal by their ObjectId, writing their data into buffer and returning
/// an iterator over commit tokens if the object is present and is a commit. Caching should be implemented within this function
/// as needed.
/// * `state` - all state used for the traversal. If multiple traversals are performed, allocations can be minimized by reusing
/// this state.
/// * `tips`
/// * the starting points of the iteration, usually commits
/// * each commit they lead to will only be returned once, including the tip that started it
/// * `predicate` - indicate whether a given commit should be included in the result as well
/// as whether its parent commits should be traversed.
pub fn filtered(
tips: impl IntoIterator<Item = impl Into<ObjectId>>,
mut state: StateMut,
find: Find,
mut predicate: Predicate,
) -> Self {
let tips = tips.into_iter();
{
let state = state.borrow_mut();
state.clear();
state.next.reserve(tips.size_hint().0);
for tip in tips.map(Into::into) {
let was_inserted = state.seen.insert(tip);
if was_inserted && predicate(&tip) {
state.next.push_back((tip, 0));
}
}
}
Self {
find,
predicate,
state,
parents: Default::default(),
sorting: Default::default(),
}
}
}
/// Access
impl<Find, Predicate, StateMut> Ancestors<Find, Predicate, StateMut>
where
StateMut: Borrow<State>,
{
/// Return an iterator for accessing more of the current commits data.
pub fn commit_iter(&self) -> CommitRefIter<'_> {
CommitRefIter::from_bytes(&self.state.borrow().buf)
}
}
impl<Find, Predicate, StateMut, E> Iterator for Ancestors<Find, Predicate, StateMut>
where
Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<CommitRefIter<'a>, E>,
Predicate: FnMut(&oid) -> bool,
StateMut: BorrowMut<State>,
E: std::error::Error + Send + Sync + 'static,
{
type Item = Result<ObjectId, Error>;
fn next(&mut self) -> Option<Self::Item> {
if matches!(self.parents, Parents::First) {
self.next_by_topology()
} else {
match self.sorting {
Sorting::Topological => self.next_by_topology(),
Sorting::ByCommitTimeNewestFirst => self.next_by_commit_date(None),
Sorting::ByCommitTimeNewestFirstCutoffOlderThan {
time_in_seconds_since_epoch,
} => self.next_by_commit_date(time_in_seconds_since_epoch.into()),
}
}
}
}
impl Sorting {
/// If not topo sort, provide the cutoff date if present.
fn cutoff_time(&self) -> Option<u32> {
match self {
Sorting::ByCommitTimeNewestFirstCutoffOlderThan {
time_in_seconds_since_epoch,
} => Some(*time_in_seconds_since_epoch),
_ => None,
}
}
}
/// Utilities
impl<Find, Predicate, StateMut, E> Ancestors<Find, Predicate, StateMut>
where
Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<CommitRefIter<'a>, E>,
Predicate: FnMut(&oid) -> bool,
StateMut: BorrowMut<State>,
E: std::error::Error + Send + Sync + 'static,
{
fn next_by_commit_date(&mut self, cutoff_older_than: Option<TimeInSeconds>) -> Option<Result<ObjectId, Error>> {
let state = self.state.borrow_mut();
let (oid, _commit_time) = state.next.pop_front()?;
match (self.find)(&oid, &mut state.buf) {
Ok(commit_iter) => {
let mut count = 0;
for token in commit_iter {
count += 1;
let is_first = count == 1;
match token {
Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue,
Ok(gix_object::commit::ref_iter::Token::Parent { id }) => {
let was_inserted = state.seen.insert(id);
if !(was_inserted && (self.predicate)(&id)) {
if is_first && matches!(self.parents, Parents::First) {
break;
} else {
continue;
}
}
let parent = (self.find)(id.as_ref(), &mut state.parents_buf).ok();
let parent_commit_time = parent
.and_then(|parent| {
parent
.committer()
.ok()
.map(|committer| committer.time.seconds_since_unix_epoch)
})
.unwrap_or_default();
let pos = match state.next.binary_search_by(|c| c.1.cmp(&parent_commit_time).reverse())
{
Ok(_) => None,
Err(pos) => Some(pos),
};
match cutoff_older_than {
Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue,
Some(_) | None => match pos {
Some(pos) => state.next.insert(pos, (id, parent_commit_time)),
None => state.next.push_back((id, parent_commit_time)),
},
}
if is_first && matches!(self.parents, Parents::First) {
break;
}
}
Ok(_unused_token) => break,
Err(err) => return Some(Err(err.into())),
}
}
}
Err(err) => {
return Some(Err(Error::FindExisting {
oid,
source: err.into(),
}))
}
}
Some(Ok(oid))
}
}
/// Utilities
impl<Find, Predicate, StateMut, E> Ancestors<Find, Predicate, StateMut>
where
Find: for<'a> FnMut(&oid, &'a mut Vec<u8>) -> Result<CommitRefIter<'a>, E>,
Predicate: FnMut(&oid) -> bool,
StateMut: BorrowMut<State>,
E: std::error::Error + Send + Sync + 'static,
{
fn next_by_topology(&mut self) -> Option<Result<ObjectId, Error>> {
let state = self.state.borrow_mut();
let (oid, _commit_time) = state.next.pop_front()?;
match (self.find)(&oid, &mut state.buf) {
Ok(commit_iter) => {
for token in commit_iter {
match token {
Ok(gix_object::commit::ref_iter::Token::Tree { .. }) => continue,
Ok(gix_object::commit::ref_iter::Token::Parent { id }) => {
let was_inserted = state.seen.insert(id);
if was_inserted && (self.predicate)(&id) {
state.next.push_back((id, 0));
}
if matches!(self.parents, Parents::First) {
break;
}
}
Ok(_a_token_past_the_parents) => break,
Err(err) => return Some(Err(err.into())),
}
}
}
Err(err) => {
return Some(Err(Error::FindExisting {
oid,
source: err.into(),
}))
}
}
Some(Ok(oid))
}
}
}