gix-commitgraph-0.24.2/.cargo_vcs_info.json0000644000000001550000000000100142210ustar { "git": { "sha1": "b050327e76f234b19be921b78b7b28e034319fdb" }, "path_in_vcs": "gix-commitgraph" }gix-commitgraph-0.24.2/Cargo.toml0000644000000033500000000000100122170ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.65" name = "gix-commitgraph" version = "0.24.2" authors = [ "Conor Davis ", "Sebastian Thiel ", ] include = [ "src/**/*", "LICENSE-*", ] description = "Read-only access to the git commitgraph file format" documentation = "https://git-scm.com/docs/commit-graph#:~:text=The%20commit-graph%20file%20is%20a%20supplemental%20data%20structure,or%20in%20the%20info%20directory%20of%20an%20alternate." license = "MIT OR Apache-2.0" repository = "https://github.com/Byron/gitoxide" [package.metadata.docs.rs] all-features = true features = ["document-features"] [lib] doctest = false [dependencies.bstr] version = "1.3.0" features = ["std"] default-features = false [dependencies.document-features] version = "0.2.0" optional = true [dependencies.gix-chunk] version = "^0.4.8" [dependencies.gix-features] version = "^0.38.1" features = ["rustsha1"] [dependencies.gix-hash] version = "^0.14.2" [dependencies.memmap2] version = "0.9.0" [dependencies.serde] version = "1.0.114" features = ["derive"] optional = true default-features = false [dependencies.thiserror] version = "1.0.26" [dev-dependencies] [features] serde = [ "dep:serde", "gix-hash/serde", "bstr/serde", ] gix-commitgraph-0.24.2/Cargo.toml.orig000064400000000000000000000025751046102023000157100ustar 00000000000000[package] name = "gix-commitgraph" version = "0.24.2" repository = "https://github.com/Byron/gitoxide" documentation = "https://git-scm.com/docs/commit-graph#:~:text=The%20commit-graph%20file%20is%20a%20supplemental%20data%20structure,or%20in%20the%20info%20directory%20of%20an%20alternate." license = "MIT OR Apache-2.0" description = "Read-only access to the git commitgraph file format" authors = ["Conor Davis ", "Sebastian Thiel "] edition = "2021" include = ["src/**/*", "LICENSE-*"] rust-version = "1.65" [lib] doctest = false [features] ## Data structures implement `serde::Serialize` and `serde::Deserialize` serde = ["dep:serde", "gix-hash/serde", "bstr/serde"] [dependencies] gix-features = { version = "^0.38.1", path = "../gix-features", features = ["rustsha1"] } gix-hash = { version = "^0.14.2", path = "../gix-hash" } gix-chunk = { version = "^0.4.8", path = "../gix-chunk" } bstr = { version = "1.3.0", default-features = false, features = ["std"] } memmap2 = "0.9.0" serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } thiserror = "1.0.26" document-features = { version = "0.2.0", optional = true } [dev-dependencies] gix-testtools = { path = "../tests/tools" } gix-date = { path = "../gix-date" } [package.metadata.docs.rs] all-features = true features = ["document-features"] gix-commitgraph-0.24.2/LICENSE-APACHE000064400000000000000000000247461046102023000147510ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. gix-commitgraph-0.24.2/LICENSE-MIT000064400000000000000000000017771046102023000144600ustar 00000000000000Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. gix-commitgraph-0.24.2/src/access.rs000064400000000000000000000060101046102023000154030ustar 00000000000000use crate::{file, file::Commit, File, Graph, Position}; /// Access impl Graph { /// Returns the commit at the given position `pos`. /// /// # Panics /// If `pos` is greater or equal to [`num_commits()`][Graph::num_commits()]. pub fn commit_at(&self, pos: Position) -> Commit<'_> { let r = self.lookup_by_pos(pos); r.file.commit_at(r.pos) } /// Returns the commit matching the given `id`. pub fn commit_by_id(&self, id: impl AsRef) -> Option> { let r = self.lookup_by_id(id.as_ref())?; Some(r.file.commit_at(r.file_pos)) } /// Returns the `hash` at the given position `pos`. /// /// # Panics /// If `pos` is greater or equal to [`num_commits()`][Graph::num_commits()]. pub fn id_at(&self, pos: Position) -> &gix_hash::oid { let r = self.lookup_by_pos(pos); r.file.id_at(r.pos) } /// Iterate over commits in unsorted order. pub fn iter_commits(&self) -> impl Iterator> { self.files.iter().flat_map(File::iter_commits) } /// Iterate over commit IDs in unsorted order. pub fn iter_ids(&self) -> impl Iterator { self.files.iter().flat_map(File::iter_ids) } /// Translate the given `id` to its position in the file. pub fn lookup(&self, id: impl AsRef) -> Option { Some(self.lookup_by_id(id.as_ref())?.graph_pos) } /// Returns the number of commits stored in this file. pub fn num_commits(&self) -> u32 { self.files.iter().map(File::num_commits).sum() } } /// Access fundamentals impl Graph { fn lookup_by_id(&self, id: &gix_hash::oid) -> Option> { let mut current_file_start = 0; for file in &self.files { if let Some(lex_pos) = file.lookup(id) { return Some(LookupByIdResult { file, file_pos: lex_pos, graph_pos: Position(current_file_start + lex_pos.0), }); } current_file_start += file.num_commits(); } None } fn lookup_by_pos(&self, pos: Position) -> LookupByPositionResult<'_> { let mut remaining = pos.0; for (file_index, file) in self.files.iter().enumerate() { match remaining.checked_sub(file.num_commits()) { Some(v) => remaining = v, None => { return LookupByPositionResult { file, _file_index: file_index, pos: file::Position(remaining), } } } } panic!("graph position too large: {}", pos.0); } } #[derive(Clone)] struct LookupByIdResult<'a> { pub file: &'a File, pub graph_pos: Position, pub file_pos: file::Position, } #[derive(Clone)] struct LookupByPositionResult<'a> { pub file: &'a File, pub _file_index: usize, pub pos: file::Position, } gix-commitgraph-0.24.2/src/file/access.rs000064400000000000000000000114201046102023000163230ustar 00000000000000use std::{ fmt::{Debug, Formatter}, path::Path, }; use crate::{ file::{self, commit::Commit, COMMIT_DATA_ENTRY_SIZE_SANS_HASH}, File, }; /// Access impl File { /// The number of base graphs that this file depends on. pub fn base_graph_count(&self) -> u8 { self.base_graph_count } /// Returns the commit data for the commit located at the given lexicographical position. /// /// `pos` must range from 0 to `self.num_commits()`. /// /// # Panics /// /// Panics if `pos` is out of bounds. pub fn commit_at(&self, pos: file::Position) -> Commit<'_> { Commit::new(self, pos) } /// The kind of hash used in this File. /// /// Note that it is always conforming to the hash used in the owning repository. pub fn object_hash(&self) -> gix_hash::Kind { self.object_hash } /// Returns an object id at the given index in our list of (sorted) hashes. /// The position ranges from 0 to `self.num_commits()` // copied from gix-odb/src/pack/index/ext pub fn id_at(&self, pos: file::Position) -> &gix_hash::oid { assert!( pos.0 < self.num_commits(), "expected lexicographical position less than {}, got {}", self.num_commits(), pos.0 ); let pos: usize = pos .0 .try_into() .expect("an architecture able to hold 32 bits of integer"); let start = self.oid_lookup_offset + (pos * self.hash_len); gix_hash::oid::from_bytes_unchecked(&self.data[start..][..self.hash_len]) } /// Return an iterator over all object hashes stored in the base graph. pub fn iter_base_graph_ids(&self) -> impl Iterator { let start = self.base_graphs_list_offset.unwrap_or(0); let base_graphs_list = &self.data[start..][..self.hash_len * usize::from(self.base_graph_count)]; base_graphs_list .chunks_exact(self.hash_len) .map(gix_hash::oid::from_bytes_unchecked) } /// return an iterator over all commits in this file. pub fn iter_commits(&self) -> impl Iterator> { (0..self.num_commits()).map(move |i| self.commit_at(file::Position(i))) } /// Return an iterator over all object hashes stored in this file. pub fn iter_ids(&self) -> impl Iterator { (0..self.num_commits()).map(move |i| self.id_at(file::Position(i))) } /// Translate the given object hash to its position within this file, if present. // copied from gix-odb/src/pack/index/ext pub fn lookup(&self, id: impl AsRef) -> Option { self.lookup_inner(id.as_ref()) } fn lookup_inner(&self, id: &gix_hash::oid) -> Option { let first_byte = usize::from(id.first_byte()); let mut upper_bound = self.fan[first_byte]; let mut lower_bound = if first_byte != 0 { self.fan[first_byte - 1] } else { 0 }; while lower_bound < upper_bound { let mid = (lower_bound + upper_bound) / 2; let mid_sha = self.id_at(file::Position(mid)); use std::cmp::Ordering::*; match id.cmp(mid_sha) { Less => upper_bound = mid, Equal => return Some(file::Position(mid)), Greater => lower_bound = mid + 1, } } None } /// Returns the number of commits in this graph file. /// /// The maximum valid `file::Position` that can be used with this file is one less than /// `num_commits()`. pub fn num_commits(&self) -> u32 { self.fan[255] } /// Returns the path to this file. pub fn path(&self) -> &Path { &self.path } } impl File { /// Returns the byte slice for the given commit in this file's Commit Data (CDAT) chunk. pub(crate) fn commit_data_bytes(&self, pos: file::Position) -> &[u8] { assert!( pos.0 < self.num_commits(), "expected lexicographical position less than {}, got {}", self.num_commits(), pos.0 ); let pos: usize = pos .0 .try_into() .expect("an architecture able to hold 32 bits of integer"); let entry_size = self.hash_len + COMMIT_DATA_ENTRY_SIZE_SANS_HASH; let start = self.commit_data_offset + (pos * entry_size); &self.data[start..][..entry_size] } /// Returns the byte slice for this file's entire Extra Edge List (EDGE) chunk. pub(crate) fn extra_edges_data(&self) -> Option<&[u8]> { Some(&self.data[self.extra_edges_list_range.clone()?]) } } impl Debug for File { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, r#"File("{:?}")"#, self.path.display()) } } gix-commitgraph-0.24.2/src/file/commit.rs000064400000000000000000000230451046102023000163600ustar 00000000000000//! Low-level operations on individual commits. use std::{ fmt::{Debug, Formatter}, slice::Chunks, }; use crate::{ file::{self, EXTENDED_EDGES_MASK, LAST_EXTENDED_EDGE_MASK, NO_PARENT}, File, Position, }; /// The error used in the [`file::commit`][self] module. #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum Error { #[error("commit {0}'s extra edges overflows the commit-graph file's extra edges list")] ExtraEdgesListOverflow(gix_hash::ObjectId), #[error("commit {0}'s first parent is an extra edge index, which is invalid")] FirstParentIsExtraEdgeIndex(gix_hash::ObjectId), #[error("commit {0} has extra edges, but commit-graph file has no extra edges list")] MissingExtraEdgesList(gix_hash::ObjectId), #[error("commit {0} has a second parent but not a first parent")] SecondParentWithoutFirstParent(gix_hash::ObjectId), } /// A commit as stored in a [`File`]. #[derive(Copy, Clone)] pub struct Commit<'a> { file: &'a File, pos: file::Position, // We can parse the below fields lazily if needed. commit_timestamp: u64, generation: u32, parent1: ParentEdge, parent2: ParentEdge, root_tree_id: &'a gix_hash::oid, } #[inline] fn read_u32(b: &[u8]) -> u32 { u32::from_be_bytes(b.try_into().unwrap()) } impl<'a> Commit<'a> { pub(crate) fn new(file: &'a File, pos: file::Position) -> Self { let bytes = file.commit_data_bytes(pos); Commit { file, pos, root_tree_id: gix_hash::oid::from_bytes_unchecked(&bytes[..file.hash_len]), parent1: ParentEdge::from_raw(read_u32(&bytes[file.hash_len..][..4])), parent2: ParentEdge::from_raw(read_u32(&bytes[file.hash_len + 4..][..4])), // TODO: Add support for corrected commit date offset overflow. // See https://github.com/git/git/commit/e8b63005c48696a26f976f5f9b0ccaf1983e439d and // https://github.com/git/git/commit/f90fca638e99a031dce8e3aca72427b2f9b4bb38 for more details and hints at a test. generation: read_u32(&bytes[file.hash_len + 8..][..4]) >> 2, commit_timestamp: u64::from_be_bytes(bytes[file.hash_len + 8..][..8].try_into().unwrap()) & 0x0003_ffff_ffff, } } /// Returns the committer timestamp of this commit. /// /// The value is the number of seconds since 1970-01-01 00:00:00 UTC. pub fn committer_timestamp(&self) -> u64 { self.commit_timestamp } /// Returns the generation number of this commit. /// /// Commits without parents have generation number 1. Commits with parents have a generation /// number that is the max of their parents' generation numbers + 1. pub fn generation(&self) -> u32 { self.generation } /// Returns an iterator over the parent positions for lookup in the owning [Graph][crate::Graph]. pub fn iter_parents(self) -> Parents<'a> { // I didn't find a combinator approach that a) was as strict as ParentIterator, b) supported // fuse-after-first-error behavior, and b) was significantly shorter or more understandable // than ParentIterator. So here we are. Parents { commit_data: self, state: ParentIteratorState::First, } } /// Returns the hash of this commit. pub fn id(&self) -> &'a gix_hash::oid { self.file.id_at(self.pos) } /// Returns the first parent of this commit. pub fn parent1(&self) -> Result, Error> { self.iter_parents().next().transpose() } /// Returns the position at which this commit is stored in the parent [File]. pub fn position(&self) -> file::Position { self.pos } /// Return the hash of the tree this commit points to. pub fn root_tree_id(&self) -> &gix_hash::oid { self.root_tree_id } } impl<'a> Debug for Commit<'a> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!( f, "Commit {{ id: {}, lex_pos: {}, generation: {}, root_tree_id: {}, parent1: {:?}, parent2: {:?} }}", self.id(), self.pos, self.generation(), self.root_tree_id(), self.parent1, self.parent2, ) } } impl<'a> Eq for Commit<'a> {} impl<'a> PartialEq for Commit<'a> { fn eq(&self, other: &Self) -> bool { std::ptr::eq(self.file, other.file) && self.pos == other.pos } } /// An iterator over parents of a [`Commit`]. pub struct Parents<'a> { commit_data: Commit<'a>, state: ParentIteratorState<'a>, } impl<'a> Iterator for Parents<'a> { type Item = Result; fn next(&mut self) -> Option { let state = std::mem::replace(&mut self.state, ParentIteratorState::Exhausted); match state { ParentIteratorState::First => match self.commit_data.parent1 { ParentEdge::None => match self.commit_data.parent2 { ParentEdge::None => None, _ => Some(Err(Error::SecondParentWithoutFirstParent(self.commit_data.id().into()))), }, ParentEdge::GraphPosition(pos) => { self.state = ParentIteratorState::Second; Some(Ok(pos)) } ParentEdge::ExtraEdgeIndex(_) => { Some(Err(Error::FirstParentIsExtraEdgeIndex(self.commit_data.id().into()))) } }, ParentIteratorState::Second => match self.commit_data.parent2 { ParentEdge::None => None, ParentEdge::GraphPosition(pos) => Some(Ok(pos)), ParentEdge::ExtraEdgeIndex(extra_edge_index) => { if let Some(extra_edges_list) = self.commit_data.file.extra_edges_data() { let start_offset: usize = extra_edge_index .try_into() .expect("an architecture able to hold 32 bits of integer"); let start_offset = start_offset .checked_mul(4) .expect("an extended edge index small enough to fit in usize"); if let Some(tail) = extra_edges_list.get(start_offset..) { self.state = ParentIteratorState::Extra(tail.chunks(4)); // This recursive call is what blocks me from replacing ParentIterator // with a std::iter::from_fn closure. self.next() } else { Some(Err(Error::ExtraEdgesListOverflow(self.commit_data.id().into()))) } } else { Some(Err(Error::MissingExtraEdgesList(self.commit_data.id().into()))) } } }, ParentIteratorState::Extra(mut chunks) => { if let Some(chunk) = chunks.next() { let extra_edge = read_u32(chunk); match ExtraEdge::from_raw(extra_edge) { ExtraEdge::Internal(pos) => { self.state = ParentIteratorState::Extra(chunks); Some(Ok(pos)) } ExtraEdge::Last(pos) => Some(Ok(pos)), } } else { Some(Err(Error::ExtraEdgesListOverflow(self.commit_data.id().into()))) } } ParentIteratorState::Exhausted => None, } } fn size_hint(&self) -> (usize, Option) { match (&self.state, self.commit_data.parent1, self.commit_data.parent2) { (ParentIteratorState::First, ParentEdge::None, ParentEdge::None) => (0, Some(0)), (ParentIteratorState::First, ParentEdge::None, _) => (1, Some(1)), (ParentIteratorState::First, ParentEdge::GraphPosition(_), ParentEdge::None) => (1, Some(1)), (ParentIteratorState::First, ParentEdge::GraphPosition(_), ParentEdge::GraphPosition(_)) => (2, Some(2)), (ParentIteratorState::First, ParentEdge::GraphPosition(_), ParentEdge::ExtraEdgeIndex(_)) => (3, None), (ParentIteratorState::First, ParentEdge::ExtraEdgeIndex(_), _) => (1, Some(1)), (ParentIteratorState::Second, _, ParentEdge::None) => (0, Some(0)), (ParentIteratorState::Second, _, ParentEdge::GraphPosition(_)) => (1, Some(1)), (ParentIteratorState::Second, _, ParentEdge::ExtraEdgeIndex(_)) => (2, None), (ParentIteratorState::Extra(_), _, _) => (1, None), (ParentIteratorState::Exhausted, _, _) => (0, Some(0)), } } } #[derive(Debug)] enum ParentIteratorState<'a> { First, Second, Extra(Chunks<'a, u8>), Exhausted, } #[derive(Clone, Copy, Debug)] enum ParentEdge { None, GraphPosition(Position), ExtraEdgeIndex(u32), } impl ParentEdge { pub fn from_raw(raw: u32) -> ParentEdge { if raw == NO_PARENT { return ParentEdge::None; } if raw & EXTENDED_EDGES_MASK != 0 { ParentEdge::ExtraEdgeIndex(raw & !EXTENDED_EDGES_MASK) } else { ParentEdge::GraphPosition(Position(raw)) } } } enum ExtraEdge { Internal(Position), Last(Position), } impl ExtraEdge { pub fn from_raw(raw: u32) -> Self { if raw & LAST_EXTENDED_EDGE_MASK != 0 { Self::Last(Position(raw & !LAST_EXTENDED_EDGE_MASK)) } else { Self::Internal(Position(raw)) } } } gix-commitgraph-0.24.2/src/file/init.rs000064400000000000000000000233671046102023000160420ustar 00000000000000use std::path::Path; use std::path::PathBuf; use crate::{ file::{ ChunkId, BASE_GRAPHS_LIST_CHUNK_ID, COMMIT_DATA_CHUNK_ID, COMMIT_DATA_ENTRY_SIZE_SANS_HASH, EXTENDED_EDGES_LIST_CHUNK_ID, FAN_LEN, HEADER_LEN, OID_FAN_CHUNK_ID, OID_LOOKUP_CHUNK_ID, SIGNATURE, }, File, }; use bstr::ByteSlice; /// The error used in [`File::at()`]. #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum Error { #[error("Commit-graph {:?} chunk contains {from_chunk} base graphs, but commit-graph file header claims {from_header} base graphs", BASE_GRAPHS_LIST_CHUNK_ID.as_bstr())] BaseGraphMismatch { from_header: u8, from_chunk: u32 }, #[error("Commit-graph {:?} chunk contains {chunk1_commits} commits, but {:?} chunk contains {chunk2_commits} commits", .chunk1_id.as_bstr(), .chunk2_id.as_bstr())] CommitCountMismatch { chunk1_id: ChunkId, chunk1_commits: u32, chunk2_id: ChunkId, chunk2_commits: u32, }, #[error("{0}")] Corrupt(String), // This error case is disabled, as git allows extra garbage in the extra edges list? // #[error("The last entry in commit-graph's extended edges list does is not marked as being terminal")] // ExtraEdgesOverflow, #[error("Could not open commit-graph file at '{}'", .path.display())] Io { #[source] err: std::io::Error, path: std::path::PathBuf, }, #[error("{0}")] Trailer(String), #[error("Commit-graph file uses unsupported hash version: {0}")] UnsupportedHashVersion(u8), #[error("Unsupported commit-graph file version: {0}")] UnsupportedVersion(u8), #[error(transparent)] ChunkFileDecode(#[from] gix_chunk::file::decode::Error), #[error(transparent)] MissingChunk(#[from] gix_chunk::file::index::offset_by_kind::Error), #[error("Commit-graph chunk {:?} has invalid size: {msg}", .id.as_bstr())] InvalidChunkSize { id: ChunkId, msg: String }, } const MIN_FILE_SIZE: usize = HEADER_LEN + gix_chunk::file::Index::size_for_entries(3 /*OIDF, OIDL, CDAT*/) + FAN_LEN * 4 /* FANOUT TABLE CHUNK OIDF */ + gix_hash::Kind::shortest().len_in_bytes(); impl File { /// Try to parse the commit graph file at `path`. pub fn at(path: impl AsRef) -> Result { Self::try_from(path.as_ref()) } /// A lower-level constructor which constructs a new instance directly from the mapping in `data`, /// assuming that it originated from `path`. /// /// Note that `path` is only used for verification of the hash its basename contains, but otherwise /// is not of importance. pub fn new(data: memmap2::Mmap, path: PathBuf) -> Result { let data_size = data.len(); if data_size < MIN_FILE_SIZE { return Err(Error::Corrupt( "Commit-graph file too small even for an empty graph".to_owned(), )); } let mut ofs = 0; if &data[ofs..ofs + SIGNATURE.len()] != SIGNATURE { return Err(Error::Corrupt( "Commit-graph file does not start with expected signature".to_owned(), )); } ofs += SIGNATURE.len(); match data[ofs] { 1 => (), x => { return Err(Error::UnsupportedVersion(x)); } }; ofs += 1; let object_hash = gix_hash::Kind::try_from(data[ofs]).map_err(Error::UnsupportedHashVersion)?; ofs += 1; let chunk_count = data[ofs]; // Can assert chunk_count >= MIN_CHUNKS here, but later OIDF+OIDL+CDAT presence checks make // it redundant. ofs += 1; let base_graph_count = data[ofs]; ofs += 1; let chunks = gix_chunk::file::Index::from_bytes(&data, ofs, chunk_count as u32)?; let base_graphs_list_offset = chunks .validated_usize_offset_by_id(BASE_GRAPHS_LIST_CHUNK_ID, |chunk_range| { let chunk_size = chunk_range.len(); if chunk_size % object_hash.len_in_bytes() != 0 { return Err(Error::InvalidChunkSize { id: BASE_GRAPHS_LIST_CHUNK_ID, msg: format!( "chunk size {} is not a multiple of {}", chunk_size, object_hash.len_in_bytes() ), }); } let chunk_base_graph_count: u32 = (chunk_size / object_hash.len_in_bytes()) .try_into() .expect("base graph count to fit in 32-bits"); if chunk_base_graph_count != u32::from(base_graph_count) { return Err(Error::BaseGraphMismatch { from_chunk: chunk_base_graph_count, from_header: base_graph_count, }); } Ok(chunk_range.start) }) .ok() .transpose()?; let (commit_data_offset, commit_data_count) = chunks.validated_usize_offset_by_id(COMMIT_DATA_CHUNK_ID, |chunk_range| { let chunk_size = chunk_range.len(); let entry_size = object_hash.len_in_bytes() + COMMIT_DATA_ENTRY_SIZE_SANS_HASH; if chunk_size % entry_size != 0 { return Err(Error::InvalidChunkSize { id: COMMIT_DATA_CHUNK_ID, msg: format!("chunk size {chunk_size} is not a multiple of {entry_size}"), }); } Ok(( chunk_range.start, (chunk_size / entry_size) .try_into() .expect("number of commits in CDAT chunk to fit in 32 bits"), )) })??; let fan_offset = chunks.validated_usize_offset_by_id(OID_FAN_CHUNK_ID, |chunk_range| { let chunk_size = chunk_range.len(); let expected_size = 4 * FAN_LEN; if chunk_size != expected_size { return Err(Error::InvalidChunkSize { id: OID_FAN_CHUNK_ID, msg: format!("expected chunk length {expected_size}, got {chunk_size}"), }); } Ok(chunk_range.start) })??; let (oid_lookup_offset, oid_lookup_count) = chunks.validated_usize_offset_by_id(OID_LOOKUP_CHUNK_ID, |chunk_range| { let chunk_size = chunk_range.len(); if chunk_size % object_hash.len_in_bytes() != 0 { return Err(Error::InvalidChunkSize { id: OID_LOOKUP_CHUNK_ID, msg: format!( "chunk size {} is not a multiple of {}", chunk_size, object_hash.len_in_bytes() ), }); } Ok(( chunk_range.start, (chunk_size / object_hash.len_in_bytes()) .try_into() .expect("number of commits in OIDL chunk to fit in 32 bits"), )) })??; let extra_edges_list_range = chunks.usize_offset_by_id(EXTENDED_EDGES_LIST_CHUNK_ID).ok(); let trailer = &data[chunks.highest_offset() as usize..]; if trailer.len() != object_hash.len_in_bytes() { return Err(Error::Trailer(format!( "Expected commit-graph trailer to contain {} bytes, got {}", object_hash.len_in_bytes(), trailer.len() ))); } if base_graph_count > 0 && base_graphs_list_offset.is_none() { return Err(gix_chunk::file::index::offset_by_kind::Error { kind: BASE_GRAPHS_LIST_CHUNK_ID, } .into()); } let (fan, _) = read_fan(&data[fan_offset..]); if oid_lookup_count != fan[255] { return Err(Error::CommitCountMismatch { chunk1_id: OID_FAN_CHUNK_ID, chunk1_commits: fan[255], chunk2_id: OID_LOOKUP_CHUNK_ID, chunk2_commits: oid_lookup_count, }); } if commit_data_count != fan[255] { return Err(Error::CommitCountMismatch { chunk1_id: OID_FAN_CHUNK_ID, chunk1_commits: fan[255], chunk2_id: COMMIT_DATA_CHUNK_ID, chunk2_commits: commit_data_count, }); } Ok(File { base_graph_count, base_graphs_list_offset, commit_data_offset, data, extra_edges_list_range, fan, oid_lookup_offset, path, hash_len: object_hash.len_in_bytes(), object_hash, }) } } impl TryFrom<&Path> for File { type Error = Error; fn try_from(path: &Path) -> Result { let data = std::fs::File::open(path) .and_then(|file| { // SAFETY: we have to take the risk of somebody changing the file underneath. Git never writes into the same file. #[allow(unsafe_code)] unsafe { memmap2::MmapOptions::new().map_copy_read_only(&file) } }) .map_err(|e| Error::Io { err: e, path: path.to_owned(), })?; Self::new(data, path.to_owned()) } } // Copied from gix-odb/pack/index/init.rs fn read_fan(d: &[u8]) -> ([u32; FAN_LEN], usize) { assert!(d.len() >= FAN_LEN * 4); let mut fan = [0; FAN_LEN]; for (c, f) in d.chunks_exact(4).zip(fan.iter_mut()) { *f = u32::from_be_bytes(c.try_into().unwrap()); } (fan, FAN_LEN * 4) } gix-commitgraph-0.24.2/src/file/mod.rs000064400000000000000000000034511046102023000156460ustar 00000000000000//! Operations on a single commit-graph file. use std::fmt::{Display, Formatter}; pub use self::{commit::Commit, init::Error}; mod access; pub mod commit; mod init; pub mod verify; const COMMIT_DATA_ENTRY_SIZE_SANS_HASH: usize = 16; pub(crate) const FAN_LEN: usize = 256; const HEADER_LEN: usize = 8; const SIGNATURE: &[u8] = b"CGPH"; type ChunkId = gix_chunk::Id; const BASE_GRAPHS_LIST_CHUNK_ID: ChunkId = *b"BASE"; const COMMIT_DATA_CHUNK_ID: ChunkId = *b"CDAT"; const EXTENDED_EDGES_LIST_CHUNK_ID: ChunkId = *b"EDGE"; const OID_FAN_CHUNK_ID: ChunkId = *b"OIDF"; const OID_LOOKUP_CHUNK_ID: ChunkId = *b"OIDL"; // Note that git's commit-graph-format.txt as of v2.28.0 gives an incorrect value 0x0700_0000 for // NO_PARENT. Fixed in https://github.com/git/git/commit/4d515253afcef985e94400adbfed7044959f9121 . const NO_PARENT: u32 = 0x7000_0000; const EXTENDED_EDGES_MASK: u32 = 0x8000_0000; const LAST_EXTENDED_EDGE_MASK: u32 = 0x8000_0000; /// The position of a given commit within a graph file, starting at 0. /// /// Commits within a graph file are sorted in lexicographical order by OID; a commit's lexicographical position /// is its position in this ordering. If a commit graph spans multiple files, each file's commits /// start at lexicographical position 0, so it is unique across a single file but is not unique across /// the whole commit graph. Each commit also has a graph position ([`Position`][crate::Position]), /// which is unique across the whole commit graph. /// In order to avoid accidentally mixing lexicographical positions with graph positions, distinct types are used for each. #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub struct Position(pub u32); impl Display for Position { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { self.0.fmt(f) } } gix-commitgraph-0.24.2/src/file/verify.rs000064400000000000000000000156051046102023000163770ustar 00000000000000//! Auxiliary types used in commit graph file verification methods. use std::{ cmp::{max, min}, collections::HashMap, path::Path, }; use crate::{file, File, GENERATION_NUMBER_INFINITY, GENERATION_NUMBER_MAX}; /// The error used in [`File::traverse()`]. #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum Error { #[error(transparent)] Commit(#[from] file::commit::Error), #[error("commit at file position {pos} has invalid ID {id}")] CommitId { id: gix_hash::ObjectId, pos: file::Position, }, #[error("commit at file position {pos} with ID {id} is out of order relative to its predecessor with ID {predecessor_id}")] CommitsOutOfOrder { id: gix_hash::ObjectId, pos: file::Position, predecessor_id: gix_hash::ObjectId, }, #[error("commit-graph filename should be {0}")] Filename(String), #[error("commit {id} has invalid generation {generation}")] Generation { generation: u32, id: gix_hash::ObjectId }, #[error("checksum mismatch: expected {expected}, got {actual}")] Mismatch { actual: gix_hash::ObjectId, expected: gix_hash::ObjectId, }, #[error("{0}")] Processor(#[source] E), #[error("commit {id} has invalid root tree ID {root_tree_id}")] RootTreeId { id: gix_hash::ObjectId, root_tree_id: gix_hash::ObjectId, }, } /// The positive result of [`File::traverse()`] providing some statistical information. #[derive(Clone, Debug, Eq, PartialEq)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub struct Outcome { /// The largest encountered [`file::Commit`] generation number. pub max_generation: u32, /// The smallest encountered [`file::Commit`] generation number. pub min_generation: u32, /// The largest number of parents in a single [`file::Commit`]. pub max_parents: u32, /// The total number of [`commits`][file::Commit]s seen in the iteration. pub num_commits: u32, /// A mapping of `N -> number of commits with N parents`. pub parent_counts: HashMap, } /// Verification impl File { /// Returns the trailing checksum over the entire content of this file. pub fn checksum(&self) -> &gix_hash::oid { gix_hash::oid::from_bytes_unchecked(&self.data[self.data.len() - self.hash_len..]) } /// Traverse all [commits][file::Commit] stored in this file and call `processor(commit) -> Result<(), Error>` on it. /// /// If the `processor` fails, the iteration will be stopped and the entire call results in the respective error. pub fn traverse<'a, E, Processor>(&'a self, mut processor: Processor) -> Result> where E: std::error::Error + 'static, Processor: FnMut(&file::Commit<'a>) -> Result<(), E>, { self.verify_checksum() .map_err(|(actual, expected)| Error::Mismatch { actual, expected })?; verify_split_chain_filename_hash(&self.path, self.checksum()).map_err(Error::Filename)?; let null_id = self.object_hash().null_ref(); let mut stats = Outcome { max_generation: 0, max_parents: 0, min_generation: GENERATION_NUMBER_INFINITY, num_commits: self.num_commits(), parent_counts: HashMap::new(), }; // TODO: Verify self.fan values as we go. let mut prev_id: &gix_hash::oid = null_id; for commit in self.iter_commits() { if commit.id() <= prev_id { if commit.id() == null_id { return Err(Error::CommitId { pos: commit.position(), id: commit.id().into(), }); } return Err(Error::CommitsOutOfOrder { pos: commit.position(), id: commit.id().into(), predecessor_id: prev_id.into(), }); } if commit.root_tree_id() == null_id { return Err(Error::RootTreeId { id: commit.id().into(), root_tree_id: commit.root_tree_id().into(), }); } if commit.generation() > GENERATION_NUMBER_MAX { return Err(Error::Generation { generation: commit.generation(), id: commit.id().into(), }); } processor(&commit).map_err(Error::Processor)?; stats.max_generation = max(stats.max_generation, commit.generation()); stats.min_generation = min(stats.min_generation, commit.generation()); let parent_count = commit .iter_parents() .try_fold(0u32, |acc, pos| pos.map(|_| acc + 1)) .map_err(Error::Commit)?; *stats.parent_counts.entry(parent_count).or_insert(0) += 1; prev_id = commit.id(); } if stats.min_generation == GENERATION_NUMBER_INFINITY { stats.min_generation = 0; } Ok(stats) } /// Assure the [`checksum`][File::checksum()] matches the actual checksum over all content of this file, excluding the trailing /// checksum itself. /// /// Return the actual checksum on success or `(actual checksum, expected checksum)` if there is a mismatch. pub fn verify_checksum(&self) -> Result { // Even though we could use gix_features::hash::bytes_of_file(…), this would require using our own // Error type to support io::Error and Mismatch. As we only gain progress, there probably isn't much value // as these files are usually small enough to process them in less than a second, even for the large ones. // But it's possible, once a progress instance is passed. let data_len_without_trailer = self.data.len() - self.hash_len; let mut hasher = gix_features::hash::hasher(self.object_hash()); hasher.update(&self.data[..data_len_without_trailer]); let actual = gix_hash::ObjectId::from_bytes_or_panic(hasher.digest().as_ref()); let expected = self.checksum(); if actual == expected { Ok(actual) } else { Err((actual, expected.into())) } } } /// If the given path's filename matches "graph-{hash}.graph", check that `hash` matches the /// expected hash. fn verify_split_chain_filename_hash(path: &Path, expected: &gix_hash::oid) -> Result<(), String> { path.file_name() .and_then(std::ffi::OsStr::to_str) .and_then(|filename| filename.strip_suffix(".graph")) .and_then(|stem| stem.strip_prefix("graph-")) .map_or(Ok(()), |hex| match gix_hash::ObjectId::from_hex(hex.as_bytes()) { Ok(actual) if actual == expected => Ok(()), _ => Err(format!("graph-{}.graph", expected.to_hex())), }) } gix-commitgraph-0.24.2/src/init.rs000064400000000000000000000105161046102023000151130ustar 00000000000000use std::{ io::{BufRead, BufReader}, path::{Path, PathBuf}, }; use crate::{file, File, Graph, MAX_COMMITS}; /// The error returned by initializations functions like [`Graph::at()`]. #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum Error { #[error("{}", .path.display())] File { #[source] err: file::Error, path: PathBuf, }, #[error("Commit-graph files mismatch: '{}' uses hash {hash1:?}, but '{}' uses hash {hash2:?}", .path1.display(), .path2.display())] HashVersionMismatch { path1: PathBuf, hash1: gix_hash::Kind, path2: PathBuf, hash2: gix_hash::Kind, }, #[error("Did not find any files that look like commit graphs at '{}'", .0.display())] InvalidPath(PathBuf), #[error("Could not open commit-graph file at '{}'", .path.display())] Io { #[source] err: std::io::Error, path: PathBuf, }, #[error( "Commit-graph files contain {0} commits altogether, but only {} commits are allowed", MAX_COMMITS )] TooManyCommits(u64), } /// Instantiate a `Graph` from various sources. impl Graph { /// Instantiate a commit graph from `path` which may be a directory containing graph files or the graph file itself. pub fn at(path: &Path) -> Result { Self::try_from(path) } /// Instantiate a commit graph from the directory containing all of its files. pub fn from_commit_graphs_dir(path: &Path) -> Result { let commit_graphs_dir = path; let chain_file_path = commit_graphs_dir.join("commit-graph-chain"); let chain_file = std::fs::File::open(&chain_file_path).map_err(|e| Error::Io { err: e, path: chain_file_path.clone(), })?; let mut files = Vec::new(); for line in BufReader::new(chain_file).lines() { let hash = line.map_err(|e| Error::Io { err: e, path: chain_file_path.clone(), })?; let graph_file_path = commit_graphs_dir.join(format!("graph-{hash}.graph")); files.push(File::at(&graph_file_path).map_err(|e| Error::File { err: e, path: graph_file_path.clone(), })?); } Self::new(files) } /// Instantiate a commit graph from a `.git/objects/info/commit-graph` or /// `.git/objects/info/commit-graphs/graph-*.graph` file. pub fn from_file(path: &Path) -> Result { let file = File::at(path).map_err(|e| Error::File { err: e, path: path.to_owned(), })?; Self::new(vec![file]) } /// Instantiate a commit graph from an `.git/objects/info` directory. pub fn from_info_dir(info_dir: &Path) -> Result { Self::from_file(&info_dir.join("commit-graph")) .or_else(|_| Self::from_commit_graphs_dir(&info_dir.join("commit-graphs"))) } /// Create a new commit graph from a list of `files`. pub fn new(files: Vec) -> Result { let num_commits: u64 = files.iter().map(|f| u64::from(f.num_commits())).sum(); if num_commits > u64::from(MAX_COMMITS) { return Err(Error::TooManyCommits(num_commits)); } for window in files.windows(2) { let f1 = &window[0]; let f2 = &window[1]; if f1.object_hash() != f2.object_hash() { return Err(Error::HashVersionMismatch { path1: f1.path().to_owned(), hash1: f1.object_hash(), path2: f2.path().to_owned(), hash2: f2.object_hash(), }); } } Ok(Self { files }) } } impl TryFrom<&Path> for Graph { type Error = Error; fn try_from(path: &Path) -> Result { if path.is_file() { // Assume we are looking at `.git/objects/info/commit-graph` or // `.git/objects/info/commit-graphs/graph-*.graph`. Self::from_file(path) } else if path.is_dir() { if path.join("commit-graph-chain").is_file() { Self::from_commit_graphs_dir(path) } else { Self::from_info_dir(path) } } else { Err(Error::InvalidPath(path.to_owned())) } } } gix-commitgraph-0.24.2/src/lib.rs000064400000000000000000000054611046102023000147210ustar 00000000000000//! Read, verify, and traverse git commit graphs. //! //! A [commit graph][Graph] is an index of commits in the git commit history. //! The [Graph] stores commit data in a way that accelerates lookups considerably compared to //! traversing the git history by usual means. //! //! As generating the full commit graph from scratch can take some time, git may write new commits //! to separate [files][File] instead of overwriting the original file. //! Eventually, git will merge these files together as the number of files grows. //! ## Feature Flags #![cfg_attr( all(doc, feature = "document-features"), doc = ::document_features::document_features!() )] #![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg, doc_auto_cfg))] #![deny(missing_docs, rust_2018_idioms, unsafe_code)] use std::path::Path; /// A single commit-graph file. /// /// All operations on a `File` are local to that graph file. Since a commit graph can span multiple /// files, all interesting graph operations belong on [`Graph`]. pub struct File { base_graph_count: u8, base_graphs_list_offset: Option, commit_data_offset: usize, data: memmap2::Mmap, extra_edges_list_range: Option>, fan: [u32; file::FAN_LEN], oid_lookup_offset: usize, path: std::path::PathBuf, hash_len: usize, object_hash: gix_hash::Kind, } /// A complete commit graph. /// /// The data in the commit graph may come from a monolithic `objects/info/commit-graph` file, or it /// may come from one or more `objects/info/commit-graphs/graph-*.graph` files. These files are /// generated via `git commit-graph write ...` commands. pub struct Graph { files: Vec, } /// Instantiate a commit graph from an `.git/objects/info` directory, or one of the various commit-graph files. pub fn at(path: impl AsRef) -> Result { Graph::at(path.as_ref()) } mod access; pub mod file; /// #[allow(clippy::empty_docs)] pub mod init; pub mod verify; /// The number of generations that are considered 'infinite' commit history. pub const GENERATION_NUMBER_INFINITY: u32 = 0xffff_ffff; /// The largest valid generation number. /// /// If a commit's real generation number is larger than this, the commit graph will cap the value to /// this number. /// The largest distinct generation number is `GENERATION_NUMBER_MAX - 1`. pub const GENERATION_NUMBER_MAX: u32 = 0x3fff_ffff; /// The maximum number of commits that can be stored in a commit graph. pub const MAX_COMMITS: u32 = (1 << 30) + (1 << 29) + (1 << 28) - 1; /// A generalized position for use in [`Graph`]. #[derive(Clone, Copy, Debug, Eq, Ord, PartialEq, PartialOrd, Hash)] pub struct Position(pub u32); impl std::fmt::Display for Position { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.0.fmt(f) } } gix-commitgraph-0.24.2/src/verify.rs000064400000000000000000000207761046102023000154650ustar 00000000000000//! Auxiliary types used by graph verification methods. use std::{ cmp::{max, min}, collections::BTreeMap, path::PathBuf, }; use crate::{ file::{self, commit}, Graph, Position, GENERATION_NUMBER_MAX, }; /// The error used in [`verify_integrity()`][Graph::verify_integrity]. #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum Error { #[error("'{}' should have {expected} base graphs, but claims {actual} base graphs", .path.display())] BaseGraphCount { actual: u8, expected: u8, path: PathBuf }, #[error("'{}' base graph at index {index} should have ID {expected} but is {actual}", .path.display())] BaseGraphId { actual: gix_hash::ObjectId, expected: gix_hash::ObjectId, index: u8, path: PathBuf, }, #[error(transparent)] Commit(#[from] commit::Error), #[error("{}: {err}", .path.display())] File { // Use zero-size error type. We will never return // `graph::verify::Error::File(file::verify::Error::Processor(...))`, because we are the // file's processor, and we convert`file::verify::Error::Processor` // variants into direct `graph::verify::Error` values. err: file::verify::Error, path: PathBuf, }, #[error("Commit {id}'s generation should be {expected} but is {actual}")] Generation { actual: u32, expected: u32, id: gix_hash::ObjectId, }, #[error( "Commit {id} has parent position {parent_pos} that is out of range (should be in range 0-{max_valid_pos})" )] ParentOutOfRange { id: gix_hash::ObjectId, max_valid_pos: Position, parent_pos: Position, }, #[error("{0}")] Processor(#[source] E), #[error("Commit-graph should be composed of at most 256 files but actually contains {0} files")] TooManyFiles(usize), } /// Statistics gathered while verifying the integrity of the graph as returned by [`Graph::verify_integrity()`]. #[derive(Clone, Debug, Eq, PartialEq)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] pub struct Outcome { /// The length of the longest path between any two commits in this graph. /// /// For example, this will be `Some(9)` for a commit graph containing 10 linear commits. /// This will be `Some(0)` for a commit graph containing 0 or 1 commits. /// If the longest path length is too large to fit in a [u32], then this will be [None]. pub longest_path_length: Option, /// The total number of commits traversed. pub num_commits: u32, /// A mapping of `N -> number of commits with N parents`. pub parent_counts: BTreeMap, } impl Graph { /// Traverse all commits in the graph and call `processor(&commit) -> Result<(), E>` on it while verifying checksums. /// /// When `processor` returns an error, the entire verification is stopped and the error returned. pub fn verify_integrity( &self, mut processor: impl FnMut(&file::Commit<'_>) -> Result<(), E>, ) -> Result> where E: std::error::Error + 'static, { if self.files.len() > 256 { // A file in a split chain can only have up to 255 base files. return Err(Error::TooManyFiles(self.files.len())); } let mut stats = Outcome { longest_path_length: None, num_commits: 0, parent_counts: BTreeMap::new(), }; let mut max_generation = 0u32; // TODO: Detect duplicate commit IDs across different files. Not sure how to do this without // a separate loop, e.g. self.iter_sorted_ids(). let mut file_start_pos = Position(0); for (file_index, file) in self.files.iter().enumerate() { if usize::from(file.base_graph_count()) != file_index { return Err(Error::BaseGraphCount { actual: file.base_graph_count(), expected: file_index .try_into() .expect("files.len() check to protect against this"), path: file.path().to_owned(), }); } for (base_graph_index, (expected, actual)) in self.files[..file_index] .iter() .map(crate::File::checksum) .zip(file.iter_base_graph_ids()) .enumerate() { if actual != expected { return Err(Error::BaseGraphId { actual: actual.into(), expected: expected.into(), index: base_graph_index .try_into() .expect("files.len() check to protect against this"), path: file.path().to_owned(), }); } } let next_file_start_pos = Position(file_start_pos.0 + file.num_commits()); let file_stats = file .traverse(|commit| { let mut max_parent_generation = 0u32; for parent_pos in commit.iter_parents() { let parent_pos = parent_pos.map_err(Error::Commit)?; if parent_pos >= next_file_start_pos { return Err(Error::ParentOutOfRange { parent_pos, id: commit.id().into(), max_valid_pos: Position(next_file_start_pos.0 - 1), }); } let parent = self.commit_at(parent_pos); max_parent_generation = max(max_parent_generation, parent.generation()); } // If the max parent generation is GENERATION_NUMBER_MAX, then this commit's // generation should be GENERATION_NUMBER_MAX too. let expected_generation = min(max_parent_generation + 1, GENERATION_NUMBER_MAX); if commit.generation() != expected_generation { return Err(Error::Generation { actual: commit.generation(), expected: expected_generation, id: commit.id().into(), }); } processor(commit).map_err(Error::Processor)?; Ok(()) }) .map_err(|err| Error::File { err: match err { file::verify::Error::Processor(e) => return e, file::verify::Error::RootTreeId { id, root_tree_id } => { file::verify::Error::RootTreeId { id, root_tree_id } } file::verify::Error::Mismatch { actual, expected } => { file::verify::Error::Mismatch { actual, expected } } file::verify::Error::Generation { generation, id } => { file::verify::Error::Generation { generation, id } } file::verify::Error::Filename(expected) => file::verify::Error::Filename(expected), file::verify::Error::Commit(err) => file::verify::Error::Commit(err), file::verify::Error::CommitId { id, pos } => file::verify::Error::CommitId { id, pos }, file::verify::Error::CommitsOutOfOrder { id, pos, predecessor_id, } => file::verify::Error::CommitsOutOfOrder { id, pos, predecessor_id, }, }, path: file.path().to_owned(), })?; max_generation = max(max_generation, file_stats.max_generation); stats.num_commits += file_stats.num_commits; for (key, value) in file_stats.parent_counts.into_iter() { *stats.parent_counts.entry(key).or_insert(0) += value; } file_start_pos = next_file_start_pos; } stats.longest_path_length = if max_generation < GENERATION_NUMBER_MAX { Some(max_generation.saturating_sub(1)) } else { None }; Ok(stats) } }