diff --git a/gitoxide-core/src/hours/mod.rs b/gitoxide-core/src/hours/mod.rs index 0418ef1f01e..4080c30c4a0 100644 --- a/gitoxide-core/src/hours/mod.rs +++ b/gitoxide-core/src/hours/mod.rs @@ -175,7 +175,7 @@ where } commit_idx += 1; } - Err(gix::traverse::commit::ancestors::Error::Find { .. }) => { + Err(gix::traverse::commit::simple::Error::Find { .. }) => { is_shallow = true; break; } diff --git a/gitoxide-core/src/pack/create.rs b/gitoxide-core/src/pack/create.rs index 1229ddd2761..76bacd079c3 100644 --- a/gitoxide-core/src/pack/create.rs +++ b/gitoxide-core/src/pack/create.rs @@ -130,7 +130,7 @@ where .collect::, _>>()?; let handle = repo.objects.into_shared_arc().to_cache_arc(); let iter = Box::new( - traverse::commit::Ancestors::new(tips, handle.clone()) + traverse::commit::Simple::new(tips, handle.clone()) .map(|res| res.map_err(|err| Box::new(err) as Box<_>).map(|c| c.id)) .inspect(move |_| progress.inc()), ); @@ -361,7 +361,7 @@ pub mod input_iteration { #[derive(Debug, thiserror::Error)] pub enum Error { #[error("input objects couldn't be iterated completely")] - Iteration(#[from] traverse::commit::ancestors::Error), + Iteration(#[from] traverse::commit::simple::Error), #[error("An error occurred while reading hashes from standard input")] InputLinesIo(#[from] std::io::Error), #[error("Could not decode hex hash provided on standard input")] diff --git a/gitoxide-core/src/query/engine/update.rs b/gitoxide-core/src/query/engine/update.rs index b5c6467e0d6..2e809f0e2b3 100644 --- a/gitoxide-core/src/query/engine/update.rs +++ b/gitoxide-core/src/query/engine/update.rs @@ -429,7 +429,7 @@ pub fn update( break; } } - Err(gix::traverse::commit::ancestors::Error::Find { .. }) => { + Err(gix::traverse::commit::simple::Error::Find { .. }) => { writeln!(err, "shallow repository - commit history is truncated").ok(); break; } diff --git a/gix-diff/tests/tree/mod.rs b/gix-diff/tests/tree/mod.rs index ace13c36c99..bd57c5fc8dd 100644 --- a/gix-diff/tests/tree/mod.rs +++ b/gix-diff/tests/tree/mod.rs @@ -133,7 +133,7 @@ mod changes { let mut buf = Vec::new(); let head = head_of(db); - commit::Ancestors::new(Some(head), &db) + commit::Simple::new(Some(head), &db) .collect::, _>>() .expect("valid iteration") .into_iter() diff --git a/gix-pack/tests/pack/data/output/count_and_entries.rs b/gix-pack/tests/pack/data/output/count_and_entries.rs index c5e7b960d98..f357f894798 100644 --- a/gix-pack/tests/pack/data/output/count_and_entries.rs +++ b/gix-pack/tests/pack/data/output/count_and_entries.rs @@ -241,7 +241,7 @@ fn traversals() -> crate::Result { .copied() { let head = hex_to_id("dfcb5e39ac6eb30179808bbab721e8a28ce1b52e"); - let mut commits = commit::Ancestors::new(Some(head), db.clone()) + let mut commits = commit::Simple::new(Some(head), db.clone()) .map(Result::unwrap) .map(|c| c.id) .collect::>(); diff --git a/gix-traverse/src/commit/mod.rs b/gix-traverse/src/commit/mod.rs index f78881614a8..26e9287ae70 100644 --- a/gix-traverse/src/commit/mod.rs +++ b/gix-traverse/src/commit/mod.rs @@ -1,67 +1,43 @@ +//! Provide multiple traversal implementations with different performance envelopes. +//! +//! Use [`Simple`] for fast walks that maintain minimal state, or [`Topo`] for a more elaborate traversal. +use gix_hash::ObjectId; use gix_object::FindExt; +use gix_revwalk::graph::IdMap; +use gix_revwalk::PriorityQueue; use smallvec::SmallVec; /// A fast iterator over the ancestors of one or more starting commits. -pub struct Ancestors { +pub struct Simple { objects: Find, cache: Option, predicate: Predicate, - state: ancestors::State, + state: simple::State, parents: Parents, - sorting: Sorting, + sorting: simple::Sorting, } -/// Specify how to sort commits during the [ancestor](Ancestors) traversal. -/// -/// ### Sample History -/// -/// The following history will be referred to for explaining how the sort order works, with the number denoting the commit timestamp -/// (*their X-alignment doesn't matter*). +/// Simple ancestors traversal, without the need to keep track of graph-state. +pub mod simple; + +/// A commit walker that walks in topographical order, like `git rev-list +/// --topo-order` or `--date-order` depending on the chosen [`topo::Sorting`]. /// -/// ```text -/// ---1----2----4----7 <- second parent of 8 -/// \ \ -/// 3----5----6----8--- -/// ``` -#[derive(Default, Debug, Copy, Clone)] -pub enum Sorting { - /// Commits are sorted as they are mentioned in the commit graph. - /// - /// In the *sample history* the order would be `8, 6, 7, 5, 4, 3, 2, 1` - /// - /// ### Note - /// - /// This is not to be confused with `git log/rev-list --topo-order`, which is notably different from - /// as it avoids overlapping branches. - #[default] - BreadthFirst, - /// Commits are sorted by their commit time in descending order, that is newest first. - /// - /// The sorting applies to all currently queued commit ids and thus is full. - /// - /// In the *sample history* the order would be `8, 7, 6, 5, 4, 3, 2, 1` - /// - /// # Performance - /// - /// This mode benefits greatly from having an object_cache in `find()` - /// to avoid having to lookup each commit twice. - ByCommitTimeNewestFirst, - /// This sorting is similar to `ByCommitTimeNewestFirst`, but adds a cutoff to not return commits older than - /// a given time, stopping the iteration once no younger commits is queued to be traversed. - /// - /// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache. - /// - /// In the *sample history* and a cut-off date of 4, the returned list of commits would be `8, 7, 6, 4` - ByCommitTimeNewestFirstCutoffOlderThan { - /// The amount of seconds since unix epoch, the same value obtained by any `gix_date::Time` structure and the way git counts time. - seconds: gix_date::SecondsSinceUnixEpoch, - }, +/// Instantiate with [`topo::Builder`]. +pub struct Topo { + commit_graph: Option, + find: Find, + predicate: Predicate, + indegrees: IdMap, + states: IdMap, + explore_queue: PriorityQueue, + indegree_queue: PriorityQueue, + topo_queue: topo::iter::Queue, + parents: Parents, + min_gen: u32, + buf: Vec, } -/// Simple ancestors traversal -pub mod ancestors; - -// Topological traversal pub mod topo; /// Specify how to handle commit parents during traversal. @@ -86,8 +62,8 @@ pub struct Info { pub id: gix_hash::ObjectId, /// All parent ids we have encountered. Note that these will be at most one if [`Parents::First`] is enabled. pub parent_ids: ParentIds, - /// The time at which the commit was created. It's only `Some(_)` if sorting is not [`Sorting::BreadthFirst`], as the walk - /// needs to require the commit-date. + /// The time at which the commit was created. It will only be `Some(_)` if the chosen traversal was + /// taking dates into consideration. pub commit_time: Option, } diff --git a/gix-traverse/src/commit/ancestors.rs b/gix-traverse/src/commit/simple.rs similarity index 85% rename from gix-traverse/src/commit/ancestors.rs rename to gix-traverse/src/commit/simple.rs index 51b647714e4..a4a3ff391c8 100644 --- a/gix-traverse/src/commit/ancestors.rs +++ b/gix-traverse/src/commit/simple.rs @@ -4,7 +4,54 @@ use gix_hashtable::HashSet; use smallvec::SmallVec; use std::collections::VecDeque; -/// The error is part of the item returned by the [Ancestors](super::Ancestors) iterator. +/// Specify how to sort commits during a [simple](super::Simple) traversal. +/// +/// ### Sample History +/// +/// The following history will be referred to for explaining how the sort order works, with the number denoting the commit timestamp +/// (*their X-alignment doesn't matter*). +/// +/// ```text +/// ---1----2----4----7 <- second parent of 8 +/// \ \ +/// 3----5----6----8--- +/// ``` +#[derive(Default, Debug, Copy, Clone)] +pub enum Sorting { + /// Commits are sorted as they are mentioned in the commit graph. + /// + /// In the *sample history* the order would be `8, 6, 7, 5, 4, 3, 2, 1` + /// + /// ### Note + /// + /// This is not to be confused with `git log/rev-list --topo-order`, which is notably different from + /// as it avoids overlapping branches. + #[default] + BreadthFirst, + /// Commits are sorted by their commit time in descending order, that is newest first. + /// + /// The sorting applies to all currently queued commit ids and thus is full. + /// + /// In the *sample history* the order would be `8, 7, 6, 5, 4, 3, 2, 1` + /// + /// # Performance + /// + /// This mode benefits greatly from having an object_cache in `find()` + /// to avoid having to lookup each commit twice. + ByCommitTimeNewestFirst, + /// This sorting is similar to `ByCommitTimeNewestFirst`, but adds a cutoff to not return commits older than + /// a given time, stopping the iteration once no younger commits is queued to be traversed. + /// + /// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache. + /// + /// In the *sample history* and a cut-off date of 4, the returned list of commits would be `8, 7, 6, 4` + ByCommitTimeNewestFirstCutoffOlderThan { + /// The amount of seconds since unix epoch, the same value obtained by any `gix_date::Time` structure and the way git counts time. + seconds: gix_date::SecondsSinceUnixEpoch, + }, +} + +/// The error is part of the item returned by the [Ancestors](super::Simple) iterator. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { @@ -33,7 +80,7 @@ mod init { use gix_object::{CommitRefIter, FindExt}; use super::{ - super::{Ancestors, Either, Info, ParentIds, Parents, Sorting}, + super::{simple::Sorting, Either, Info, ParentIds, Parents, Simple}, collect_parents, Error, State, }; @@ -60,7 +107,7 @@ mod init { } /// Builder - impl Ancestors + impl Simple where Find: gix_object::Find, { @@ -121,7 +168,7 @@ mod init { } /// Lifecyle - impl Ancestors bool> + impl Simple bool> where Find: gix_object::Find, { @@ -139,7 +186,7 @@ mod init { } /// Lifecyle - impl Ancestors + impl Simple where Find: gix_object::Find, Predicate: FnMut(&oid) -> bool, @@ -183,7 +230,7 @@ mod init { } /// Access - impl Ancestors { + impl Simple { /// Return an iterator for accessing data of the current commit, parsed lazily. pub fn commit_iter(&self) -> CommitRefIter<'_> { CommitRefIter::from_bytes(&self.state.buf) @@ -195,7 +242,7 @@ mod init { } } - impl Iterator for Ancestors + impl Iterator for Simple where Find: gix_object::Find, Predicate: FnMut(&oid) -> bool, @@ -228,7 +275,7 @@ mod init { } /// Utilities - impl Ancestors + impl Simple where Find: gix_object::Find, Predicate: FnMut(&oid) -> bool, @@ -298,7 +345,7 @@ mod init { } /// Utilities - impl Ancestors + impl Simple where Find: gix_object::Find, Predicate: FnMut(&oid) -> bool, diff --git a/gix-traverse/src/commit/topo/init.rs b/gix-traverse/src/commit/topo/init.rs index 4506832a6bb..42972d2b871 100644 --- a/gix-traverse/src/commit/topo/init.rs +++ b/gix-traverse/src/commit/topo/init.rs @@ -1,11 +1,11 @@ use crate::commit::topo::iter::gen_and_commit_time; -use crate::commit::topo::{Error, Sorting, Walk, WalkFlags}; -use crate::commit::{find, Info, Parents}; +use crate::commit::topo::{Error, Sorting, WalkFlags}; +use crate::commit::{find, Info, Parents, Topo}; use gix_hash::{oid, ObjectId}; use gix_revwalk::graph::IdMap; use gix_revwalk::PriorityQueue; -/// Builder for [`Walk`]. +/// Builder for [`Topo`]. pub struct Builder { commit_graph: Option, find: Find, @@ -20,7 +20,7 @@ impl Builder bool> where Find: gix_object::Find, { - /// Create a new `Builder` for a [`Walk`] that reads commits from a repository with `find`. + /// Create a new `Builder` for a [`Topo`] that reads commits from a repository with `find`. /// starting at the `tips` and ending at the `ends`. Like `git rev-list /// --topo-order ^ends... tips...`. pub fn from_iters( @@ -87,11 +87,11 @@ where self } - /// Build a new [`Walk`] instance. + /// Build a new [`Topo`] instance. /// /// Note that merely building an instance is currently expensive. - pub fn build(self) -> Result, Error> { - let mut w = Walk { + pub fn build(self) -> Result, Error> { + let mut w = Topo { commit_graph: self.commit_graph, find: self.find, predicate: self.predicate, diff --git a/gix-traverse/src/commit/topo/iter.rs b/gix-traverse/src/commit/topo/iter.rs index 121a31860fa..09f38eb7e7a 100644 --- a/gix-traverse/src/commit/topo/iter.rs +++ b/gix-traverse/src/commit/topo/iter.rs @@ -1,17 +1,17 @@ -use crate::commit::topo::{Error, Sorting, Walk, WalkFlags}; -use crate::commit::{find, Either, Info, Parents}; +use crate::commit::topo::{Error, Sorting, WalkFlags}; +use crate::commit::{find, Either, Info, Parents, Topo}; use gix_hash::{oid, ObjectId}; use gix_revwalk::PriorityQueue; use smallvec::SmallVec; -pub(super) type GenAndCommitTime = (u32, i64); +pub(in crate::commit) type GenAndCommitTime = (u32, i64); // Git's priority queue works as a LIFO stack if no compare function is set, // which is the case for `--topo-order.` However, even in that case the initial // items of the queue are sorted according to the commit time before beginning // the walk. #[derive(Debug)] -pub(super) enum Queue { +pub(in crate::commit) enum Queue { Date(PriorityQueue), Topo(Vec<(i64, Info)>), } @@ -45,7 +45,7 @@ impl Queue { } } -impl Walk +impl Topo where Find: gix_object::Find, { @@ -214,7 +214,7 @@ where } } -impl Iterator for Walk +impl Iterator for Topo where Find: gix_object::Find, Predicate: FnMut(&oid) -> bool, diff --git a/gix-traverse/src/commit/topo/mod.rs b/gix-traverse/src/commit/topo/mod.rs index 7d7aa61932e..6ae543c106c 100644 --- a/gix-traverse/src/commit/topo/mod.rs +++ b/gix-traverse/src/commit/topo/mod.rs @@ -1,28 +1,7 @@ -//! Topological commit traversal, similar to `git log --topo-order`. - -use gix_hash::ObjectId; -use gix_revwalk::{graph::IdMap, PriorityQueue}; +//! Topological commit traversal, similar to `git log --topo-order`, which keeps track of graph state. use bitflags::bitflags; -use super::Parents; - -/// A commit walker that walks in topographical order, like `git rev-list -/// --topo-order` or `--date-order` depending on the chosen [`Sorting`]. -pub struct Walk { - commit_graph: Option, - find: Find, - predicate: Predicate, - indegrees: IdMap, - states: IdMap, - explore_queue: PriorityQueue, - indegree_queue: PriorityQueue, - topo_queue: iter::Queue, - parents: Parents, - min_gen: u32, - buf: Vec, -} - /// The errors that can occur during creation and iteration. #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] @@ -44,7 +23,7 @@ bitflags! { // NOTE: The names correspond to the names of the flags in revision.h #[repr(transparent)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] - struct WalkFlags: u32 { + pub(super) struct WalkFlags: u32 { /// Commit has been seen const Seen = 0b000001; /// Commit has been processed by the Explore walk @@ -88,4 +67,4 @@ pub enum Sorting { mod init; pub use init::Builder; -mod iter; +pub(super) mod iter; diff --git a/gix-traverse/src/lib.rs b/gix-traverse/src/lib.rs index 3cf6d2b3af7..49776318332 100644 --- a/gix-traverse/src/lib.rs +++ b/gix-traverse/src/lib.rs @@ -2,7 +2,6 @@ #![deny(missing_docs, rust_2018_idioms)] #![forbid(unsafe_code)] -/// Commit traversal pub mod commit; /// Tree traversal diff --git a/gix-traverse/tests/commit/mod.rs b/gix-traverse/tests/commit/mod.rs index 5fda4796282..7aa327bfa53 100644 --- a/gix-traverse/tests/commit/mod.rs +++ b/gix-traverse/tests/commit/mod.rs @@ -1,2 +1,2 @@ -mod ancestor; +mod simple; mod topo; diff --git a/gix-traverse/tests/commit/ancestor.rs b/gix-traverse/tests/commit/simple.rs similarity index 95% rename from gix-traverse/tests/commit/ancestor.rs rename to gix-traverse/tests/commit/simple.rs index 6a9b2faa6c3..983941f35de 100644 --- a/gix-traverse/tests/commit/ancestor.rs +++ b/gix-traverse/tests/commit/simple.rs @@ -9,7 +9,7 @@ struct TraversalAssertion<'a> { tips: &'a [&'a str], expected: &'a [&'a str], mode: commit::Parents, - sorting: commit::Sorting, + sorting: commit::simple::Sorting, } impl<'a> TraversalAssertion<'a> { @@ -33,7 +33,7 @@ impl<'a> TraversalAssertion<'a> { self } - fn with_sorting(&mut self, sorting: commit::Sorting) -> &mut Self { + fn with_sorting(&mut self, sorting: commit::simple::Sorting) -> &mut Self { self.sorting = sorting; self } @@ -63,7 +63,7 @@ impl TraversalAssertion<'_> { let (store, tips, expected) = self.setup()?; for use_commitgraph in [false, true] { - let oids = commit::Ancestors::filtered(tips.clone(), &store, predicate.clone()) + let oids = commit::Simple::filtered(tips.clone(), &store, predicate.clone()) .sorting(self.sorting)? .parents(self.mode) .commit_graph(self.setup_commitgraph(store.store_ref(), use_commitgraph)) @@ -79,7 +79,7 @@ impl TraversalAssertion<'_> { let (store, tips, expected) = self.setup()?; for use_commitgraph in [false, true] { - let oids = commit::Ancestors::new(tips.clone(), &store) + let oids = commit::Simple::new(tips.clone(), &store) .sorting(self.sorting)? .parents(self.mode) .commit_graph(self.setup_commitgraph(store.store_ref(), use_commitgraph)) @@ -92,9 +92,9 @@ impl TraversalAssertion<'_> { } mod different_date_intermixed { - use gix_traverse::commit::Sorting; + use gix_traverse::commit::simple::Sorting; - use crate::commit::ancestor::TraversalAssertion; + use crate::commit::simple::TraversalAssertion; #[test] fn head_breadth_first() -> crate::Result { @@ -140,9 +140,9 @@ mod different_date_intermixed { } mod different_date { - use gix_traverse::commit::Sorting; + use gix_traverse::commit::simple::Sorting; - use crate::commit::ancestor::TraversalAssertion; + use crate::commit::simple::TraversalAssertion; #[test] fn head_breadth_first() -> crate::Result { @@ -193,9 +193,9 @@ mod different_date { /// Same dates are somewhat special as they show how sorting-details on priority queues affects ordering mod same_date { - use gix_traverse::commit::{Parents, Sorting}; + use gix_traverse::commit::{simple::Sorting, Parents}; - use crate::{commit::ancestor::TraversalAssertion, hex_to_id}; + use crate::{commit::simple::TraversalAssertion, hex_to_id}; #[test] fn c4_breadth_first() -> crate::Result { @@ -337,9 +337,9 @@ mod same_date { /// Some dates adjusted to be a year apart, but still 'c1' and 'c2' with the same date. mod adjusted_dates { - use gix_traverse::commit::{Ancestors, Parents, Sorting}; + use gix_traverse::commit::{simple::Sorting, Parents, Simple}; - use crate::{commit::ancestor::TraversalAssertion, hex_to_id}; + use crate::{commit::simple::TraversalAssertion, hex_to_id}; #[test] fn head_breadth_first() -> crate::Result { @@ -390,7 +390,7 @@ mod adjusted_dates { let dir = gix_testtools::scripted_fixture_read_only_standalone("make_traversal_repo_for_commits_with_dates.sh")?; let store = gix_odb::at(dir.join(".git").join("objects"))?; - let iter = Ancestors::new( + let iter = Simple::new( Some(hex_to_id("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7" /* c2 */)), &store, ) diff --git a/gix/src/ext/object_id.rs b/gix/src/ext/object_id.rs index 018c0ab032b..44e03bbd7c9 100644 --- a/gix/src/ext/object_id.rs +++ b/gix/src/ext/object_id.rs @@ -1,9 +1,9 @@ use gix_hash::ObjectId; -use gix_traverse::commit::Ancestors; +use gix_traverse::commit::Simple; pub trait Sealed {} -pub type AncestorsIter = Ancestors bool>; +pub type AncestorsIter = Simple bool>; /// An extension trait to add functionality to [`ObjectId`]s. pub trait ObjectIdExt: Sealed { @@ -23,7 +23,7 @@ impl ObjectIdExt for ObjectId { where Find: gix_object::Find, { - Ancestors::new(Some(self), find) + Simple::new(Some(self), find) } fn attach(self, repo: &crate::Repository) -> crate::Id<'_> { diff --git a/gix/src/revision/spec/parse/types.rs b/gix/src/revision/spec/parse/types.rs index fc09e13c097..9629b0b34d4 100644 --- a/gix/src/revision/spec/parse/types.rs +++ b/gix/src/revision/spec/parse/types.rs @@ -184,7 +184,7 @@ pub enum Error { next: Option>, }, #[error(transparent)] - Traverse(#[from] gix_traverse::commit::ancestors::Error), + Traverse(#[from] gix_traverse::commit::simple::Error), #[error(transparent)] Walk(#[from] crate::revision::walk::Error), #[error("Spec does not contain a single object id")] diff --git a/gix/src/revision/walk.rs b/gix/src/revision/walk.rs index a089733a479..78e7c5c7497 100644 --- a/gix/src/revision/walk.rs +++ b/gix/src/revision/walk.rs @@ -8,7 +8,7 @@ use crate::{ext::ObjectIdExt, revision, Repository}; #[allow(missing_docs)] pub enum Error { #[error(transparent)] - AncestorIter(#[from] gix_traverse::commit::ancestors::Error), + AncestorIter(#[from] gix_traverse::commit::simple::Error), #[error(transparent)] ShallowCommits(#[from] crate::shallow::open::Error), #[error(transparent)] @@ -166,7 +166,7 @@ impl<'repo> Platform<'repo> { Ok(revision::Walk { repo, inner: Box::new( - gix_traverse::commit::Ancestors::filtered(tips, &repo.objects, { + gix_traverse::commit::Simple::filtered(tips, &repo.objects, { // Note that specific shallow handling for commit-graphs isn't needed as these contain // all information there is, and exclude shallow parents to be structurally consistent. let shallow_commits = repo.shallow_commits()?; @@ -221,13 +221,12 @@ pub(crate) mod iter { /// The iterator returned by [`crate::revision::walk::Platform::all()`]. pub struct Walk<'repo> { pub(crate) repo: &'repo crate::Repository, - pub(crate) inner: Box< - dyn Iterator> + 'repo, - >, + pub(crate) inner: + Box> + 'repo>, } impl<'repo> Iterator for Walk<'repo> { - type Item = Result, gix_traverse::commit::ancestors::Error>; + type Item = Result, gix_traverse::commit::simple::Error>; fn next(&mut self) -> Option { self.inner