gix-odb-0.66.0/.cargo_vcs_info.json0000644000000001450000000000100124560ustar { "git": { "sha1": "beb0ea8c4ff94c64b7773772a9d388ccb403f3c1" }, "path_in_vcs": "gix-odb" }gix-odb-0.66.0/Cargo.toml0000644000000100060000000000100104510ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.65" name = "gix-odb" version = "0.66.0" authors = ["Sebastian Thiel "] build = false include = [ "src/**/*", "LICENSE-*", ] autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "Implements various git object databases" readme = false license = "MIT OR Apache-2.0" repository = "https://github.com/GitoxideLabs/gitoxide" [package.metadata.docs.rs] features = [ "document-features", "serde", ] [lib] name = "gix_odb" path = "src/lib.rs" doctest = false [dependencies.arc-swap] version = "1.5.0" [dependencies.document-features] version = "0.2.0" optional = true [dependencies.gix-date] version = "^0.9.3" [dependencies.gix-features] version = "^0.39.1" features = [ "rustsha1", "walkdir", "zlib", "crc32", ] [dependencies.gix-fs] version = "^0.12.1" [dependencies.gix-hash] version = "^0.15.1" [dependencies.gix-hashtable] version = "^0.6.0" [dependencies.gix-object] version = "^0.46.1" [dependencies.gix-pack] version = "^0.56.0" default-features = false [dependencies.gix-path] version = "^0.10.13" [dependencies.gix-quote] version = "^0.4.14" [dependencies.parking_lot] version = "0.12.0" [dependencies.serde] version = "1.0.114" features = ["derive"] optional = true default-features = false [dependencies.tempfile] version = "3.10.0" [dependencies.thiserror] version = "2.0.0" [features] serde = [ "dep:serde", "gix-hash/serde", "gix-object/serde", "gix-pack/serde", ] [lints.clippy] bool_to_int_with_if = "allow" borrow_as_ptr = "allow" cast_lossless = "allow" cast_possible_truncation = "allow" cast_possible_wrap = "allow" cast_precision_loss = "allow" cast_sign_loss = "allow" checked_conversions = "allow" copy_iterator = "allow" default_trait_access = "allow" doc_markdown = "allow" empty_docs = "allow" enum_glob_use = "allow" explicit_deref_methods = "allow" explicit_into_iter_loop = "allow" explicit_iter_loop = "allow" filter_map_next = "allow" fn_params_excessive_bools = "allow" from_iter_instead_of_collect = "allow" if_not_else = "allow" ignored_unit_patterns = "allow" implicit_clone = "allow" inconsistent_struct_constructor = "allow" inefficient_to_string = "allow" inline_always = "allow" items_after_statements = "allow" iter_not_returning_iterator = "allow" iter_without_into_iter = "allow" manual_assert = "allow" manual_is_variant_and = "allow" manual_let_else = "allow" manual_string_new = "allow" many_single_char_names = "allow" match_bool = "allow" match_same_arms = "allow" match_wild_err_arm = "allow" match_wildcard_for_single_variants = "allow" missing_errors_doc = "allow" missing_panics_doc = "allow" module_name_repetitions = "allow" must_use_candidate = "allow" mut_mut = "allow" naive_bytecount = "allow" needless_for_each = "allow" needless_pass_by_value = "allow" needless_raw_string_hashes = "allow" no_effect_underscore_binding = "allow" option_option = "allow" range_plus_one = "allow" redundant_else = "allow" return_self_not_must_use = "allow" should_panic_without_expect = "allow" similar_names = "allow" single_match_else = "allow" stable_sort_primitive = "allow" struct_excessive_bools = "allow" struct_field_names = "allow" too_long_first_doc_paragraph = "allow" too_many_lines = "allow" transmute_ptr_to_ptr = "allow" trivially_copy_pass_by_ref = "allow" unnecessary_join = "allow" unnecessary_wraps = "allow" unreadable_literal = "allow" unused_self = "allow" used_underscore_binding = "allow" wildcard_imports = "allow" [lints.clippy.pedantic] level = "warn" priority = -1 [lints.rust] gix-odb-0.66.0/Cargo.toml.orig000064400000000000000000000027371046102023000141460ustar 00000000000000lints.workspace = true [package] name = "gix-odb" version = "0.66.0" repository = "https://github.com/GitoxideLabs/gitoxide" authors = ["Sebastian Thiel "] license = "MIT OR Apache-2.0" description = "Implements various git object databases" edition = "2021" include = ["src/**/*", "LICENSE-*"] rust-version = "1.65" autotests = false [lib] doctest = false [features] ## Data structures implement `serde::Serialize` and `serde::Deserialize`. serde = ["dep:serde", "gix-hash/serde", "gix-object/serde", "gix-pack/serde"] [dependencies] gix-features = { version = "^0.39.1", path = "../gix-features", features = ["rustsha1", "walkdir", "zlib", "crc32"] } gix-hashtable = { version = "^0.6.0", path = "../gix-hashtable" } gix-hash = { version = "^0.15.1", path = "../gix-hash" } gix-date = { version = "^0.9.3", path = "../gix-date" } gix-path = { version = "^0.10.13", path = "../gix-path" } gix-quote = { version = "^0.4.14", path = "../gix-quote" } gix-object = { version = "^0.46.1", path = "../gix-object" } gix-pack = { version = "^0.56.0", path = "../gix-pack", default-features = false } gix-fs = { version = "^0.12.1", path = "../gix-fs" } serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } tempfile = "3.10.0" thiserror = "2.0.0" parking_lot = { version = "0.12.0" } arc-swap = "1.5.0" document-features = { version = "0.2.0", optional = true } [package.metadata.docs.rs] features = ["document-features", "serde"] gix-odb-0.66.0/LICENSE-APACHE000064400000000000000000000247461046102023000132070ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. gix-odb-0.66.0/LICENSE-MIT000064400000000000000000000017771046102023000127160ustar 00000000000000Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. gix-odb-0.66.0/src/alternate/mod.rs000064400000000000000000000054121046102023000151430ustar 00000000000000//! A file with directories of other git object databases to use when reading objects. //! //! This inherently makes alternates read-only. //! //! An alternate file in `/info/alternates` can look as follows: //! //! ```text //! # a comment, empty lines are also allowed //! # relative paths resolve relative to the parent git repository //! ../path/relative/to/repo/.git //! /absolute/path/to/repo/.git //! //! "/a/ansi-c-quoted/path/with/tabs\t/.git" //! //! # each .git directory should indeed be a directory, and not a file //! ``` //! //! Based on the [canonical implementation](https://github.com/git/git/blob/master/sha1-file.c#L598:L609). use std::{fs, io, path::PathBuf}; use gix_path::realpath::MAX_SYMLINKS; /// pub mod parse; /// Returned by [`resolve()`] #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum Error { #[error(transparent)] Io(#[from] io::Error), #[error(transparent)] Realpath(#[from] gix_path::realpath::Error), #[error(transparent)] Parse(#[from] parse::Error), #[error("Alternates form a cycle: {} -> {}", .0.iter().map(|p| format!("'{}'", p.display())).collect::>().join(" -> "), .0.first().expect("more than one directories").display())] Cycle(Vec), } /// Given an `objects_directory`, try to resolve alternate object directories possibly located in the /// `./info/alternates` file into canonical paths and resolve relative paths with the help of the `current_dir`. /// If no alternate object database was resolved, the resulting `Vec` is empty (it is not an error /// if there are no alternates). /// It is an error once a repository is seen again as it would lead to a cycle. pub fn resolve(objects_directory: PathBuf, current_dir: &std::path::Path) -> Result, Error> { let mut dirs = vec![(0, objects_directory.clone())]; let mut out = Vec::new(); let mut seen = vec![gix_path::realpath_opts(&objects_directory, current_dir, MAX_SYMLINKS)?]; while let Some((depth, dir)) = dirs.pop() { match fs::read(dir.join("info").join("alternates")) { Ok(input) => { for path in parse::content(&input)?.into_iter() { let path = objects_directory.join(path); let path_canonicalized = gix_path::realpath_opts(&path, current_dir, MAX_SYMLINKS)?; if seen.contains(&path_canonicalized) { return Err(Error::Cycle(seen)); } seen.push(path_canonicalized); dirs.push((depth + 1, path)); } } Err(err) if err.kind() == io::ErrorKind::NotFound => {} Err(err) => return Err(err.into()), }; if depth != 0 { out.push(dir); } } Ok(out) } gix-odb-0.66.0/src/alternate/parse.rs000064400000000000000000000020261046102023000154740ustar 00000000000000use std::{borrow::Cow, path::PathBuf}; use gix_object::bstr::ByteSlice; /// Returned as part of [`crate::alternate::Error::Parse`] #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum Error { #[error("Could not obtain an object path for the alternate directory '{}'", String::from_utf8_lossy(.0))] PathConversion(Vec), #[error("Could not unquote alternate path")] Unquote(#[from] gix_quote::ansi_c::undo::Error), } pub(crate) fn content(input: &[u8]) -> Result, Error> { let mut out = Vec::new(); for line in input.split(|b| *b == b'\n') { let line = line.as_bstr(); if line.is_empty() || line.starts_with(b"#") { continue; } out.push( gix_path::try_from_bstr(if line.starts_with(b"\"") { gix_quote::ansi_c::undo(line)?.0 } else { Cow::Borrowed(line) }) .map_err(|_| Error::PathConversion(line.to_vec()))? .into_owned(), ); } Ok(out) } gix-odb-0.66.0/src/cache.rs000064400000000000000000000203741046102023000134540ustar 00000000000000use std::{ cell::RefCell, ops::{Deref, DerefMut}, rc::Rc, sync::Arc, }; use crate::Cache; /// A type to store pack caches in boxes. pub type PackCache = dyn gix_pack::cache::DecodeEntry + Send + 'static; /// A constructor for boxed pack caches. pub type NewPackCacheFn = dyn Fn() -> Box + Send + Sync + 'static; /// A type to store object caches in boxes. pub type ObjectCache = dyn gix_pack::cache::Object + Send + 'static; /// A constructor for boxed object caches. pub type NewObjectCacheFn = dyn Fn() -> Box + Send + Sync + 'static; impl Cache>> { /// Convert this cache's handle into one that keeps its store in an arc. This creates an entirely new store, /// so should be done early to avoid unnecessary work (and mappings). pub fn into_arc(self) -> std::io::Result>>> { let inner = self.inner.into_arc()?; Ok(Cache { inner, new_pack_cache: self.new_pack_cache, new_object_cache: self.new_object_cache, pack_cache: self.pack_cache, object_cache: self.object_cache, }) } } impl Cache>> { /// No op, as we are containing an arc handle already. pub fn into_arc(self) -> std::io::Result>>> { Ok(self) } } impl Cache { /// Dissolve this instance, discard all caches, and return the inner implementation. pub fn into_inner(self) -> S { self.inner } /// Use this methods directly after creating a new instance to add a constructor for pack caches. /// /// These are used to speed up decoding objects which are located in packs, reducing long delta chains by storing /// their intermediate results. pub fn with_pack_cache(mut self, create: impl Fn() -> Box + Send + Sync + 'static) -> Self { self.pack_cache = Some(RefCell::new(create())); self.new_pack_cache = Some(Arc::new(create)); self } /// Use this methods directly after creating a new instance to add a constructor for object caches. /// /// Only use this kind of cache if the same objects are repeatedly accessed for great speedups, usually during diffing of /// trees. pub fn with_object_cache(mut self, create: impl Fn() -> Box + Send + Sync + 'static) -> Self { self.object_cache = Some(RefCell::new(create())); self.new_object_cache = Some(Arc::new(create)); self } /// Set the pack cache constructor on this instance. pub fn set_pack_cache(&mut self, create: impl Fn() -> Box + Send + Sync + 'static) { self.pack_cache = Some(RefCell::new(create())); self.new_pack_cache = Some(Arc::new(create)); } /// Set the object cache constructor on this instance. pub fn set_object_cache(&mut self, create: impl Fn() -> Box + Send + Sync + 'static) { self.object_cache = Some(RefCell::new(create())); self.new_object_cache = Some(Arc::new(create)); } /// Return true if an object cache is present. pub fn has_object_cache(&self) -> bool { self.object_cache.is_some() } /// Return true if a pack cache is present. pub fn has_pack_cache(&self) -> bool { self.pack_cache.is_some() } /// Remove the current pack cache as well as its constructor from this instance. pub fn unset_pack_cache(&mut self) { self.pack_cache = None; self.new_pack_cache = None; } /// Remove the current object cache as well as its constructor from this instance. pub fn unset_object_cache(&mut self) { self.object_cache = None; self.new_object_cache = None; } } impl From for Cache where S: gix_pack::Find, { fn from(store: S) -> Self { Self { inner: store, pack_cache: None, new_pack_cache: None, object_cache: None, new_object_cache: None, } } } impl Clone for Cache { fn clone(&self) -> Self { Cache { inner: self.inner.clone(), new_pack_cache: self.new_pack_cache.clone(), new_object_cache: self.new_object_cache.clone(), pack_cache: self.new_pack_cache.as_ref().map(|create| RefCell::new(create())), object_cache: self.new_object_cache.as_ref().map(|create| RefCell::new(create())), } } } impl Deref for Cache { type Target = S; fn deref(&self) -> &Self::Target { &self.inner } } impl DerefMut for Cache { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.inner } } mod impls { use std::{cell::RefCell, io::Read, ops::DerefMut}; use gix_hash::{oid, ObjectId}; use gix_object::{Data, Kind}; use gix_pack::cache::Object; use crate::{find::Header, pack::data::entry::Location, Cache}; impl gix_object::Write for Cache where S: gix_object::Write, { fn write_stream( &self, kind: Kind, size: u64, from: &mut dyn Read, ) -> Result { self.inner.write_stream(kind, size, from) } } impl gix_object::Find for Cache where S: gix_pack::Find, { fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec) -> Result>, gix_object::find::Error> { gix_pack::Find::try_find(self, id, buffer).map(|t| t.map(|t| t.0)) } } impl gix_object::Exists for Cache where S: gix_pack::Find, { fn exists(&self, id: &oid) -> bool { self.inner.contains(id) } } impl crate::Header for Cache where S: crate::Header, { fn try_header(&self, id: &oid) -> Result, gix_object::find::Error> { self.inner.try_header(id) } } impl gix_object::FindHeader for Cache where S: gix_object::FindHeader, { fn try_header(&self, id: &oid) -> Result, gix_object::find::Error> { self.inner.try_header(id) } } impl gix_pack::Find for Cache where S: gix_pack::Find, { fn contains(&self, id: &oid) -> bool { self.inner.contains(id) } fn try_find<'a>( &self, id: &oid, buffer: &'a mut Vec, ) -> Result, Option)>, gix_object::find::Error> { match self.pack_cache.as_ref().map(RefCell::borrow_mut) { Some(mut pack_cache) => self.try_find_cached(id, buffer, pack_cache.deref_mut()), None => self.try_find_cached(id, buffer, &mut gix_pack::cache::Never), } } fn try_find_cached<'a>( &self, id: &oid, buffer: &'a mut Vec, pack_cache: &mut dyn gix_pack::cache::DecodeEntry, ) -> Result, Option)>, gix_object::find::Error> { if let Some(mut obj_cache) = self.object_cache.as_ref().map(RefCell::borrow_mut) { if let Some(kind) = obj_cache.get(&id.as_ref().to_owned(), buffer) { return Ok(Some((Data::new(kind, buffer), None))); } } let possibly_obj = self.inner.try_find_cached(id.as_ref(), buffer, pack_cache)?; if let (Some(mut obj_cache), Some((obj, _location))) = (self.object_cache.as_ref().map(RefCell::borrow_mut), &possibly_obj) { obj_cache.put(id.as_ref().to_owned(), obj.kind, obj.data); } Ok(possibly_obj) } fn location_by_oid(&self, id: &oid, buf: &mut Vec) -> Option { self.inner.location_by_oid(id, buf) } fn pack_offsets_and_oid(&self, pack_id: u32) -> Option> { self.inner.pack_offsets_and_oid(pack_id) } fn entry_by_location(&self, location: &Location) -> Option { self.inner.entry_by_location(location) } } } gix-odb-0.66.0/src/find.rs000064400000000000000000000042171046102023000133270ustar 00000000000000/// An object header informing about object properties, without it being fully decoded in the process. #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] pub enum Header { /// The object was not packed, but is currently located in the loose object portion of the database. /// /// As packs are searched first, this means that in this very moment, the object whose header we retrieved is unique /// in the object database. Loose { /// The kind of the object. kind: gix_object::Kind, /// The size of the object's data in bytes. size: u64, }, /// The object was present in a pack. /// /// Note that this does not imply it is unique in the database, as it might be present in more than one pack and even /// as loose object. Packed(gix_pack::data::decode::header::Outcome), } mod header { use super::Header; impl Header { /// Return the object kind of the object we represent. pub fn kind(&self) -> gix_object::Kind { match self { Header::Packed(out) => out.kind, Header::Loose { kind, .. } => *kind, } } /// Return the size of the object in bytes. pub fn size(&self) -> u64 { match self { Header::Packed(out) => out.object_size, Header::Loose { size, .. } => *size, } } /// Return the amount of deltas decoded to obtain this header, if the object was packed. pub fn num_deltas(&self) -> Option { match self { Header::Packed(out) => out.num_deltas.into(), Header::Loose { .. } => None, } } } impl From for Header { fn from(packed_header: gix_pack::data::decode::header::Outcome) -> Self { Header::Packed(packed_header) } } impl From<(u64, gix_object::Kind)> for Header { fn from((object_size, kind): (u64, gix_object::Kind)) -> Self { Header::Loose { kind, size: object_size, } } } } gix-odb-0.66.0/src/lib.rs000064400000000000000000000155361046102023000131630ustar 00000000000000//! Git stores all of its data as _Objects_, which are data along with a hash over all data. Thus it's an //! object store indexed by the signature of data itself with inherent deduplication: the same data will have the same hash, //! and thus occupy the same space within the store. //! //! There is only one all-round object store, also known as the [`Store`], as it supports ~~everything~~ most of what git has to offer. //! //! * loose object reading and writing //! * access to packed objects //! * multiple loose objects and pack locations as gathered from `alternates` files. //! ## Feature Flags #![cfg_attr( all(doc, feature = "document-features"), doc = ::document_features::document_features!() )] #![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg, doc_auto_cfg))] #![deny(missing_docs, rust_2018_idioms, unsafe_code)] use std::{ cell::RefCell, path::PathBuf, sync::{atomic::AtomicUsize, Arc}, }; use arc_swap::ArcSwap; use gix_features::{threading::OwnShared, zlib::stream::deflate}; pub use gix_pack as pack; mod store_impls; pub use store_impls::{dynamic as store, loose}; pub mod alternate; /// A way to access objects along with pre-configured thread-local caches for packed base objects as well as objects themselves. /// /// By default, no cache will be used. pub struct Cache { /// The inner provider of trait implementations we use in conjunction with our caches. /// /// For calling methods on `inner`, prefer to make use of auto-dereferencing, i.e. `cache.inner_method()` instead of `cache.inner.inner_method()`. inner: S, // TODO: have single-threaded code-paths also for pack-creation (entries from counts) so that we can use OwnShared here // instead of Arc. However, it's probably not that important as these aren't called often. new_pack_cache: Option>, new_object_cache: Option>, pack_cache: Option>>, object_cache: Option>>, } /// pub mod cache; /// /// It can optionally compress the content, similarly to what would happen when using a [`loose::Store`]. /// #[derive(Clone)] pub struct Sink { compressor: Option>>, object_hash: gix_hash::Kind, } /// Create a new [`Sink`] with compression disabled. pub fn sink(object_hash: gix_hash::Kind) -> Sink { Sink { compressor: None, object_hash, } } /// pub mod memory; mod sink; /// pub mod find; /// An object database equivalent to `/dev/null`, dropping all objects stored into it. mod traits; pub use traits::{Header, HeaderExt}; /// A thread-local handle to access any object. pub type Handle = Cache>>; /// A thread-local handle to access any object, but thread-safe and independent of the actual type of `OwnShared` or feature toggles in `gix-features`. pub type HandleArc = Cache>>; use store::types; /// The object store for use in any applications with support for auto-updates in the light of changes to the object database. /// /// ### Features /// /// - entirely lazy, creating an instance does no disk IO at all if [`Slots::Given`][store::init::Slots::Given] is used. /// - multi-threaded lazy-loading of indices and packs /// - per-thread pack and object caching avoiding cache trashing. /// - most-recently-used packs are always first for speedups if objects are stored in the same pack, typical for packs organized by /// commit graph and object age. /// - lock-free reading for perfect scaling across all cores, and changes to it don't affect readers as long as these don't want to /// enter the same branch. /// - sync with the state on disk if objects aren't found to catch up with changes if an object seems to be missing. /// - turn off the behaviour above for all handles if objects are expected to be missing due to spare checkouts. pub struct Store { /// The central write lock without which the slotmap index can't be changed. write: parking_lot::Mutex<()>, /// The source directory from which all content is loaded, and the central write lock for use when a directory refresh is needed. pub(crate) path: PathBuf, /// The current working directory at the time this store was instantiated. It becomes relevant when resolving alternate paths /// when re-reading the store configuration on updates when an object was missed. /// Keeping it here helps to assure consistency even while a process changes its CWD. pub(crate) current_dir: PathBuf, /// A set of replacements that given a source OID return a destination OID. The vector is sorted. pub(crate) replacements: Vec<(gix_hash::ObjectId, gix_hash::ObjectId)>, /// A list of indices keeping track of which slots are filled with data. These are usually, but not always, consecutive. pub(crate) index: ArcSwap, /// The below state acts like a slot-map with each slot is mutable when the write lock is held, but readable independently of it. /// This allows multiple file to be loaded concurrently if there is multiple handles requesting to load packs or additional indices. /// The map is static and cannot typically change. /// It's read often and changed rarely. pub(crate) files: Vec, /// The amount of handles that would prevent us from unloading packs or indices pub(crate) num_handles_stable: AtomicUsize, /// The amount of handles that don't affect our ability to compact our internal data structures or unload packs or indices. pub(crate) num_handles_unstable: AtomicUsize, /// The amount of times we re-read the disk state to consolidate our in-memory representation. pub(crate) num_disk_state_consolidation: AtomicUsize, /// If true, we are allowed to use multi-pack indices and they must have the `object_hash` or be ignored. use_multi_pack_index: bool, /// The hash kind to use for some operations object_hash: gix_hash::Kind, } /// Create a new cached handle to the object store with support for additional options. /// /// `replacements` is an iterator over pairs of old and new object ids for replacement support. /// This means that when asking for object `X`, one will receive object `X-replaced` given an iterator like `Some((X, X-replaced))`. pub fn at_opts( objects_dir: impl Into, replacements: impl IntoIterator, options: store::init::Options, ) -> std::io::Result { let handle = OwnShared::new(Store::at_opts( objects_dir.into(), &mut replacements.into_iter(), options, )?) .to_handle(); Ok(Cache::from(handle)) } /// Create a new cached handle to the object store. pub fn at(objects_dir: impl Into) -> std::io::Result { at_opts(objects_dir, Vec::new(), Default::default()) } gix-odb-0.66.0/src/memory.rs000064400000000000000000000177421046102023000137260ustar 00000000000000use crate::find::Header; use crate::Cache; use gix_object::Data; use std::cell::RefCell; use std::ops::{Deref, DerefMut}; use std::rc::Rc; use std::sync::Arc; /// An object database to read from any implementation but write to memory. /// Previously written objects can be returned from memory upon query, which makes the view of objects consistent. /// In-Memory objects can be disabled by [taking out its storage](Proxy::take_object_memory). From there in-memory /// object can also be persisted one by one. /// /// It's possible to turn off the memory by removing it from the instance. pub struct Proxy { /// The actual odb implementation inner: T, /// The kind of hash to produce when writing new objects. object_hash: gix_hash::Kind, /// The storage for in-memory objects. /// If `None`, the proxy will always read from and write-through to `inner`. memory: Option>, } /// Lifecycle impl Proxy { /// Create a new instance using `odb` as actual object provider, with an empty in-memory store for /// objects that are to be written. /// Use `object_hash` to determine the kind of hash to produce when writing new objects. pub fn new(odb: T, object_hash: gix_hash::Kind) -> Proxy { Proxy { inner: odb, object_hash, memory: Some(Default::default()), } } /// Turn ourselves into our inner object database, while deallocating objects stored in memory. pub fn into_inner(self) -> T { self.inner } /// Strip object memory off this instance, which means that writes will go through to the inner object database /// right away. /// This mode makes the proxy fully transparent. pub fn with_write_passthrough(mut self) -> Self { self.memory.take(); self } } impl Proxy>>> { /// No op, as we are containing an arc handle already. pub fn into_arc(self) -> std::io::Result>>>> { Ok(self) } } impl Proxy>>> { /// Create an entirely new instance, but with the in-memory objects moving between them. pub fn into_arc(self) -> std::io::Result>>>> { Ok(Proxy { inner: self.inner.into_arc()?, object_hash: self.object_hash, memory: self.memory, }) } } impl From for Proxy { fn from(odb: crate::Handle) -> Self { let object_hash = odb.store.object_hash; Proxy::new(odb, object_hash) } } /// Memory Access impl Proxy { /// Take all the objects in memory so far, with the memory storage itself and return it. /// /// The instance will remain in a state where it won't be able to store objects in memory at all, /// they will now be stored in the underlying object database. /// This mode makes the proxy fully transparent. /// /// To avoid that, use [`reset_object_memory()`](Self::reset_object_memory()) or return the storage /// using [`set_object_memory()`](Self::set_object_memory()). pub fn take_object_memory(&mut self) -> Option { self.memory.take().map(RefCell::into_inner) } /// Set the object storage to contain only `new` objects, and return whichever objects were there previously. pub fn set_object_memory(&mut self, new: Storage) -> Option { let previous = self.take_object_memory(); self.memory = Some(RefCell::new(new)); previous } /// If objects aren't written to memory yet, this will happen after the call. /// /// Otherwise, no change will be performed. pub fn enable_object_memory(&mut self) -> &mut Self { if self.memory.is_none() { self.memory = Some(Default::default()); } self } /// Reset the internal storage to be empty, and return the previous storage, with all objects /// it contained. /// /// Note that this does nothing if this instance didn't contain object memory in the first place. /// In that case, set it explicitly. pub fn reset_object_memory(&self) -> Option { self.memory.as_ref().map(|m| std::mem::take(&mut *m.borrow_mut())) } /// Return the amount of objects currently stored in memory. pub fn num_objects_in_memory(&self) -> usize { self.memory.as_ref().map_or(0, |m| m.borrow().len()) } } impl Clone for Proxy where T: Clone, { fn clone(&self) -> Self { Proxy { inner: self.inner.clone(), object_hash: self.object_hash, memory: self.memory.clone(), } } } impl gix_object::Find for Proxy where T: gix_object::Find, { fn try_find<'a>( &self, id: &gix_hash::oid, buffer: &'a mut Vec, ) -> Result>, gix_object::find::Error> { if let Some(map) = self.memory.as_ref() { let map = map.borrow(); if let Some((kind, data)) = map.get(id) { buffer.clear(); buffer.extend_from_slice(data); return Ok(Some(Data { kind: *kind, data: &*buffer, })); } } self.inner.try_find(id, buffer) } } impl gix_object::Exists for Proxy where T: gix_object::Exists, { fn exists(&self, id: &gix_hash::oid) -> bool { self.memory.as_ref().map_or(false, |map| map.borrow().contains_key(id)) || self.inner.exists(id) } } impl crate::Header for Proxy where T: crate::Header, { fn try_header(&self, id: &gix_hash::oid) -> Result, gix_object::find::Error> { if let Some(map) = self.memory.as_ref() { let map = map.borrow(); if let Some((kind, data)) = map.get(id) { return Ok(Some(Header::Loose { kind: *kind, size: data.len() as u64, })); } } self.inner.try_header(id) } } impl gix_object::FindHeader for Proxy where T: gix_object::FindHeader, { fn try_header(&self, id: &gix_hash::oid) -> Result, gix_object::find::Error> { if let Some(map) = self.memory.as_ref() { let map = map.borrow(); if let Some((kind, data)) = map.get(id) { return Ok(Some(gix_object::Header { kind: *kind, size: data.len() as u64, })); } } self.inner.try_header(id) } } impl gix_object::Write for Proxy where T: gix_object::Write, { fn write_stream( &self, kind: gix_object::Kind, size: u64, from: &mut dyn std::io::Read, ) -> Result { let Some(map) = self.memory.as_ref() else { return self.inner.write_stream(kind, size, from); }; let mut buf = Vec::new(); from.read_to_end(&mut buf)?; let id = gix_object::compute_hash(self.object_hash, kind, &buf); map.borrow_mut().insert(id, (kind, buf)); Ok(id) } } impl Deref for Proxy { type Target = T; fn deref(&self) -> &Self::Target { &self.inner } } impl DerefMut for Proxy { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.inner } } /// A mapping between an object id and all data corresponding to an object, acting like a `HashMap`. #[derive(Default, Debug, Clone, Eq, PartialEq)] pub struct Storage(gix_hashtable::HashMap)>); impl Deref for Storage { type Target = gix_hashtable::HashMap)>; fn deref(&self) -> &Self::Target { &self.0 } } impl DerefMut for Storage { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } } gix-odb-0.66.0/src/sink.rs000064400000000000000000000033021046102023000133450ustar 00000000000000use std::{ cell::RefCell, io::{self, Write}, }; use gix_features::zlib::stream::deflate; use crate::Sink; impl Sink { /// Enable or disable compression. Compression is disabled by default pub fn compress(mut self, enable: bool) -> Self { if enable { self.compressor = Some(RefCell::new(deflate::Write::new(io::sink()))); } else { self.compressor = None; } self } } impl gix_object::Write for Sink { fn write_stream( &self, kind: gix_object::Kind, mut size: u64, from: &mut dyn io::Read, ) -> Result { let mut buf = [0u8; u16::MAX as usize]; let header = gix_object::encode::loose_header(kind, size); let possibly_compress = |buf: &[u8]| -> io::Result<()> { if let Some(compressor) = self.compressor.as_ref() { compressor.try_borrow_mut().expect("no recursion").write_all(buf)?; } Ok(()) }; let mut hasher = gix_features::hash::hasher(self.object_hash); hasher.update(&header); possibly_compress(&header).map_err(Box::new)?; while size != 0 { let bytes = (size as usize).min(buf.len()); from.read_exact(&mut buf[..bytes]).map_err(Box::new)?; hasher.update(&buf[..bytes]); possibly_compress(&buf[..bytes]).map_err(Box::new)?; size -= bytes as u64; } if let Some(compressor) = self.compressor.as_ref() { let mut c = compressor.borrow_mut(); c.flush().map_err(Box::new)?; c.reset(); } Ok(hasher.digest().into()) } } gix-odb-0.66.0/src/store_impls/dynamic/access.rs000064400000000000000000000015711046102023000176340ustar 00000000000000use crate::Store; impl Store { /// The root path at which we expect to find all objects and packs, and which is the source of the /// alternate file traversal in case there are linked repositories. pub fn path(&self) -> &std::path::Path { &self.path } /// The kind of object hash to assume when dealing with pack indices and pack data files. pub fn object_hash(&self) -> gix_hash::Kind { self.object_hash } /// Whether or not we are allowed to use multi-pack indices pub fn use_multi_pack_index(&self) -> bool { self.use_multi_pack_index } /// An iterator over replacements from object-ids `X` to `X-replaced` as `(X, X-replaced)`, sorted by the original id `X`. pub fn replacements(&self) -> impl Iterator + '_ { self.replacements.iter().copied() } } gix-odb-0.66.0/src/store_impls/dynamic/find.rs000064400000000000000000000601221046102023000173100ustar 00000000000000use std::ops::Deref; use gix_pack::cache::DecodeEntry; use crate::store::{handle, load_index}; pub(crate) mod error { use crate::{loose, pack}; /// Returned by [`Handle::try_find()`][gix_pack::Find::try_find()] #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum Error { #[error("An error occurred while obtaining an object from the loose object store")] Loose(#[from] loose::find::Error), #[error("An error occurred while obtaining an object from the packed object store")] Pack(#[from] pack::data::decode::Error), #[error(transparent)] LoadIndex(#[from] crate::store::load_index::Error), #[error(transparent)] LoadPack(#[from] std::io::Error), #[error(transparent)] EntryType(#[from] gix_pack::data::entry::decode::Error), #[error("Reached recursion limit of {} while resolving ref delta bases for {}", .max_depth, .id)] DeltaBaseRecursionLimit { /// the maximum recursion depth we encountered. max_depth: usize, /// The original object to lookup id: gix_hash::ObjectId, }, #[error("The base object {} could not be found but is required to decode {}", .base_id, .id)] DeltaBaseMissing { /// the id of the base object which failed to lookup base_id: gix_hash::ObjectId, /// The original object to lookup id: gix_hash::ObjectId, }, #[error("An error occurred when looking up a ref delta base object {} to decode {}", .base_id, .id)] DeltaBaseLookup { #[source] err: Box, /// the id of the base object which failed to lookup base_id: gix_hash::ObjectId, /// The original object to lookup id: gix_hash::ObjectId, }, } #[derive(Copy, Clone)] pub(crate) struct DeltaBaseRecursion<'a> { pub depth: usize, pub original_id: &'a gix_hash::oid, } impl<'a> DeltaBaseRecursion<'a> { pub fn new(id: &'a gix_hash::oid) -> Self { Self { original_id: id, depth: 0, } } pub fn inc_depth(mut self) -> Self { self.depth += 1; self } } #[cfg(test)] mod tests { use super::*; #[test] fn error_size() { let actual = std::mem::size_of::(); assert!(actual <= 88, "{actual} <= 88: should not grow without us noticing"); } } } pub use error::Error; use gix_features::zlib; use crate::store::types::PackId; impl super::Handle where S: Deref + Clone, { fn try_find_cached_inner<'a, 'b>( &'b self, mut id: &'b gix_hash::oid, buffer: &'a mut Vec, inflate: &mut zlib::Inflate, pack_cache: &mut dyn DecodeEntry, snapshot: &mut load_index::Snapshot, recursion: Option>, ) -> Result, Option)>, Error> { if let Some(r) = recursion { if r.depth >= self.max_recursion_depth { return Err(Error::DeltaBaseRecursionLimit { max_depth: self.max_recursion_depth, id: r.original_id.to_owned(), }); } } else if !self.ignore_replacements { if let Ok(pos) = self .store .replacements .binary_search_by(|(map_this, _)| map_this.as_ref().cmp(id)) { id = self.store.replacements[pos].1.as_ref(); } } 'outer: loop { { let marker = snapshot.marker; for (idx, index) in snapshot.indices.iter_mut().enumerate() { if let Some(handle::index_lookup::Outcome { object_index: handle::IndexForObjectInPack { pack_id, pack_offset }, index_file, pack: possibly_pack, }) = index.lookup(id) { let pack = match possibly_pack { Some(pack) => pack, None => match self.store.load_pack(pack_id, marker)? { Some(pack) => { *possibly_pack = Some(pack); possibly_pack.as_deref().expect("just put it in") } None => { // The pack wasn't available anymore so we are supposed to try another round with a fresh index match self.store.load_one_index(self.refresh, snapshot.marker)? { Some(new_snapshot) => { *snapshot = new_snapshot; self.clear_cache(); continue 'outer; } None => { // nothing new in the index, kind of unexpected to not have a pack but to also // to have no new index yet. We set the new index before removing any slots, so // this should be observable. return Ok(None); } } } }, }; let entry = pack.entry(pack_offset)?; let header_size = entry.header_size(); let res = pack.decode_entry( entry, buffer, inflate, &|id, _out| { let pack_offset = index_file.pack_offset_by_id(id)?; pack.entry(pack_offset) .ok() .map(gix_pack::data::decode::entry::ResolvedBase::InPack) }, pack_cache, ); let res = match res { Ok(r) => Ok(( gix_object::Data { kind: r.kind, data: buffer.as_slice(), }, Some(gix_pack::data::entry::Location { pack_id: pack.id, pack_offset, entry_size: r.compressed_size + header_size, }), )), Err(gix_pack::data::decode::Error::DeltaBaseUnresolved(base_id)) => { // Only with multi-pack indices it's allowed to jump to refer to other packs within this // multi-pack. Otherwise this would constitute a thin pack which is only allowed in transit. // However, if we somehow end up with that, we will resolve it safely, even though we could // avoid handling this case and error instead. // Since this is a special case, we just allocate here to make it work. It's an actual delta-ref object // which is sent by some servers that points to an object outside of the pack we are looking // at right now. With the complexities of loading packs, we go into recursion here. Git itself // doesn't do a cycle check, and we won't either but limit the recursive depth. // The whole ordeal isn't as efficient as it could be due to memory allocation and // later mem-copying when trying again. let mut buf = Vec::new(); let obj_kind = self .try_find_cached_inner( &base_id, &mut buf, inflate, pack_cache, snapshot, recursion .map(error::DeltaBaseRecursion::inc_depth) .or_else(|| error::DeltaBaseRecursion::new(id).into()), ) .map_err(|err| Error::DeltaBaseLookup { err: Box::new(err), base_id, id: id.to_owned(), })? .ok_or_else(|| Error::DeltaBaseMissing { base_id, id: id.to_owned(), })? .0 .kind; let handle::index_lookup::Outcome { object_index: handle::IndexForObjectInPack { pack_id: _, pack_offset, }, index_file, pack: possibly_pack, } = match snapshot.indices[idx].lookup(id) { Some(res) => res, None => { let mut out = None; for index in &mut snapshot.indices { out = index.lookup(id); if out.is_some() { break; } } out.unwrap_or_else(|| { panic!("could not find object {id} in any index after looking up one of its base objects {base_id}" ) }) } }; let pack = possibly_pack .as_ref() .expect("pack to still be available like just now"); let entry = pack.entry(pack_offset)?; let header_size = entry.header_size(); pack.decode_entry( entry, buffer, inflate, &|id, out| { index_file .pack_offset_by_id(id) .and_then(|pack_offset| { pack.entry(pack_offset) .ok() .map(gix_pack::data::decode::entry::ResolvedBase::InPack) }) .or_else(|| { (id == base_id).then(|| { out.resize(buf.len(), 0); out.copy_from_slice(buf.as_slice()); gix_pack::data::decode::entry::ResolvedBase::OutOfPack { kind: obj_kind, end: out.len(), } }) }) }, pack_cache, ) .map(move |r| { ( gix_object::Data { kind: r.kind, data: buffer.as_slice(), }, Some(gix_pack::data::entry::Location { pack_id: pack.id, pack_offset, entry_size: r.compressed_size + header_size, }), ) }) } Err(err) => Err(err), }?; if idx != 0 { snapshot.indices.swap(0, idx); } return Ok(Some(res)); } } } for lodb in snapshot.loose_dbs.iter() { // TODO: remove this double-lookup once the borrow checker allows it. if lodb.contains(id) { return lodb .try_find(id, buffer) .map(|obj| obj.map(|obj| (obj, None))) .map_err(Into::into); } } match self.store.load_one_index(self.refresh, snapshot.marker)? { Some(new_snapshot) => { *snapshot = new_snapshot; self.clear_cache(); } None => return Ok(None), } } } pub(crate) fn clear_cache(&self) { self.packed_object_count.borrow_mut().take(); } } impl gix_pack::Find for super::Handle where S: Deref + Clone, { // TODO: probably make this method fallible, but that would mean its own error type. fn contains(&self, id: &gix_hash::oid) -> bool { let mut snapshot = self.snapshot.borrow_mut(); loop { for (idx, index) in snapshot.indices.iter().enumerate() { if index.contains(id) { if idx != 0 { snapshot.indices.swap(0, idx); } return true; } } for lodb in snapshot.loose_dbs.iter() { if lodb.contains(id) { return true; } } match self.store.load_one_index(self.refresh, snapshot.marker) { Ok(Some(new_snapshot)) => { *snapshot = new_snapshot; self.clear_cache(); } Ok(None) => return false, // nothing more to load, or our refresh mode doesn't allow disk refreshes Err(_) => return false, // something went wrong, nothing we can communicate here with this trait. TODO: Maybe that should change? } } } fn try_find_cached<'a>( &self, id: &gix_hash::oid, buffer: &'a mut Vec, pack_cache: &mut dyn DecodeEntry, ) -> Result, Option)>, gix_object::find::Error> { let mut snapshot = self.snapshot.borrow_mut(); let mut inflate = self.inflate.borrow_mut(); self.try_find_cached_inner(id, buffer, &mut inflate, pack_cache, &mut snapshot, None) .map_err(|err| Box::new(err) as _) } fn location_by_oid(&self, id: &gix_hash::oid, buf: &mut Vec) -> Option { assert!( matches!(self.token.as_ref(), Some(handle::Mode::KeepDeletedPacksAvailable)), "BUG: handle must be configured to `prevent_pack_unload()` before using this method" ); assert!(self.store_ref().replacements.is_empty() || self.ignore_replacements, "Everything related to packing must not use replacements. These are not used here, but it should be turned off for good measure."); let mut snapshot = self.snapshot.borrow_mut(); let mut inflate = self.inflate.borrow_mut(); 'outer: loop { { let marker = snapshot.marker; for (idx, index) in snapshot.indices.iter_mut().enumerate() { if let Some(handle::index_lookup::Outcome { object_index: handle::IndexForObjectInPack { pack_id, pack_offset }, index_file: _, pack: possibly_pack, }) = index.lookup(id) { let pack = match possibly_pack { Some(pack) => pack, None => match self.store.load_pack(pack_id, marker).ok()? { Some(pack) => { *possibly_pack = Some(pack); possibly_pack.as_deref().expect("just put it in") } None => { // The pack wasn't available anymore so we are supposed to try another round with a fresh index match self.store.load_one_index(self.refresh, snapshot.marker).ok()? { Some(new_snapshot) => { *snapshot = new_snapshot; self.clear_cache(); continue 'outer; } None => { // nothing new in the index, kind of unexpected to not have a pack but to also // to have no new index yet. We set the new index before removing any slots, so // this should be observable. return None; } } } }, }; let entry = pack.entry(pack_offset).ok()?; buf.resize(entry.decompressed_size.try_into().expect("representable size"), 0); assert_eq!(pack.id, pack_id.to_intrinsic_pack_id(), "both ids must always match"); let res = pack .decompress_entry(&entry, &mut inflate, buf) .ok() .map(|entry_size_past_header| gix_pack::data::entry::Location { pack_id: pack.id, pack_offset, entry_size: entry.header_size() + entry_size_past_header, }); if idx != 0 { snapshot.indices.swap(0, idx); } return res; } } } match self.store.load_one_index(self.refresh, snapshot.marker).ok()? { Some(new_snapshot) => { *snapshot = new_snapshot; self.clear_cache(); } None => return None, } } } fn pack_offsets_and_oid(&self, pack_id: u32) -> Option> { assert!( matches!(self.token.as_ref(), Some(handle::Mode::KeepDeletedPacksAvailable)), "BUG: handle must be configured to `prevent_pack_unload()` before using this method" ); let pack_id = PackId::from_intrinsic_pack_id(pack_id); loop { let snapshot = self.snapshot.borrow(); { for index in &snapshot.indices { if let Some(iter) = index.iter(pack_id) { return Some(iter.map(|e| (e.pack_offset, e.oid)).collect()); } } } match self.store.load_one_index(self.refresh, snapshot.marker).ok()? { Some(new_snapshot) => { drop(snapshot); *self.snapshot.borrow_mut() = new_snapshot; } None => return None, } } } fn entry_by_location(&self, location: &gix_pack::data::entry::Location) -> Option { assert!( matches!(self.token.as_ref(), Some(handle::Mode::KeepDeletedPacksAvailable)), "BUG: handle must be configured to `prevent_pack_unload()` before using this method" ); let pack_id = PackId::from_intrinsic_pack_id(location.pack_id); let mut snapshot = self.snapshot.borrow_mut(); let marker = snapshot.marker; loop { { for index in &mut snapshot.indices { if let Some(possibly_pack) = index.pack(pack_id) { let pack = match possibly_pack { Some(pack) => pack, None => { let pack = self.store.load_pack(pack_id, marker).ok()?.expect( "BUG: pack must exist from previous call to location_by_oid() and must not be unloaded", ); *possibly_pack = Some(pack); possibly_pack.as_deref().expect("just put it in") } }; return pack .entry_slice(location.entry_range(location.pack_offset)) .map(|data| gix_pack::find::Entry { data: data.to_owned(), version: pack.version(), }); } } } snapshot.indices.insert( 0, self.store .index_by_id(pack_id, marker) .expect("BUG: index must always be present, must not be unloaded or overwritten"), ); } } } impl gix_object::Find for super::Handle where S: Deref + Clone, Self: gix_pack::Find, { fn try_find<'a>( &self, id: &gix_hash::oid, buffer: &'a mut Vec, ) -> Result>, gix_object::find::Error> { gix_pack::Find::try_find(self, id, buffer).map(|t| t.map(|t| t.0)) } } impl gix_object::FindHeader for super::Handle where S: Deref + Clone, { fn try_header(&self, id: &gix_hash::oid) -> Result, gix_object::find::Error> { let mut snapshot = self.snapshot.borrow_mut(); let mut inflate = self.inflate.borrow_mut(); self.try_header_inner(id, &mut inflate, &mut snapshot, None) .map(|maybe_header| { maybe_header.map(|hdr| gix_object::Header { kind: hdr.kind(), size: hdr.size(), }) }) .map_err(|err| Box::new(err) as _) } } impl gix_object::Exists for super::Handle where S: Deref + Clone, Self: gix_pack::Find, { fn exists(&self, id: &gix_hash::oid) -> bool { gix_pack::Find::contains(self, id) } } gix-odb-0.66.0/src/store_impls/dynamic/handle.rs000064400000000000000000000377101046102023000176320ustar 00000000000000use std::{ cell::RefCell, ops::Deref, rc::Rc, sync::{atomic::Ordering, Arc}, }; use gix_features::threading::OwnShared; use gix_hash::oid; use crate::store::{handle, types, RefreshMode}; pub(crate) enum SingleOrMultiIndex { Single { index: Arc, data: Option>, }, Multi { index: Arc, data: Vec>>, }, } /// A utility to allow looking up pack offsets for a particular pack pub(crate) enum IntraPackLookup<'a> { Single(&'a gix_pack::index::File), /// the internal pack-id inside of a multi-index for which the lookup is supposed to be. /// Used to prevent ref-delta OIDs to, for some reason, point to a different pack. Multi { index: &'a gix_pack::multi_index::File, required_pack_index: gix_pack::multi_index::PackIndex, }, } impl IntraPackLookup<'_> { pub(crate) fn pack_offset_by_id(&self, id: &oid) -> Option { match self { IntraPackLookup::Single(index) => index .lookup(id) .map(|entry_index| index.pack_offset_at_index(entry_index)), IntraPackLookup::Multi { index, required_pack_index, } => index.lookup(id).and_then(|entry_index| { let (pack_index, pack_offset) = index.pack_id_and_pack_offset_at_index(entry_index); (pack_index == *required_pack_index).then_some(pack_offset) }), } } } pub struct IndexLookup { pub(crate) file: SingleOrMultiIndex, /// The index we were found at in the slot map pub(crate) id: types::IndexId, } pub struct IndexForObjectInPack { /// The internal identifier of the pack itself, which either is referred to by an index or a multi-pack index. pub(crate) pack_id: types::PackId, /// The offset at which the object's entry can be found pub(crate) pack_offset: u64, } pub(crate) mod index_lookup { use std::{collections::HashSet, sync::Arc}; use gix_hash::oid; use crate::store::{handle, handle::IntraPackLookup, types}; pub(crate) struct Outcome<'a> { pub object_index: handle::IndexForObjectInPack, pub index_file: IntraPackLookup<'a>, pub pack: &'a mut Option>, } impl handle::IndexLookup { /// Return an iterator over the entries of the given pack. The `pack_id` is required to identify a pack uniquely within /// a potential multi-pack index. pub(crate) fn iter( &self, pack_id: types::PackId, ) -> Option + '_>> { (self.id == pack_id.index).then(|| match &self.file { handle::SingleOrMultiIndex::Single { index, .. } => index.iter(), handle::SingleOrMultiIndex::Multi { index, .. } => { let pack_index = pack_id.multipack_index.expect( "BUG: multi-pack index must be set if this is a multi-pack, pack-indices seem unstable", ); Box::new(index.iter().filter_map(move |e| { (e.pack_index == pack_index).then_some(gix_pack::index::Entry { oid: e.oid, pack_offset: e.pack_offset, crc32: None, }) })) } }) } pub(crate) fn pack(&mut self, pack_id: types::PackId) -> Option<&'_ mut Option>> { (self.id == pack_id.index).then(move || match &mut self.file { handle::SingleOrMultiIndex::Single { data, .. } => data, handle::SingleOrMultiIndex::Multi { data, .. } => { let pack_index = pack_id.multipack_index.expect( "BUG: multi-pack index must be set if this is a multi-pack, pack-indices seem unstable", ); &mut data[pack_index as usize] } }) } /// Return true if the given object id exists in this index pub(crate) fn contains(&self, object_id: &oid) -> bool { match &self.file { handle::SingleOrMultiIndex::Single { index, .. } => index.lookup(object_id).is_some(), handle::SingleOrMultiIndex::Multi { index, .. } => index.lookup(object_id).is_some(), } } /// Return true if the given object id exists in this index pub(crate) fn oid_at_index(&self, entry_index: u32) -> &gix_hash::oid { match &self.file { handle::SingleOrMultiIndex::Single { index, .. } => index.oid_at_index(entry_index), handle::SingleOrMultiIndex::Multi { index, .. } => index.oid_at_index(entry_index), } } /// Return the amount of objects contained in the index, essentially the number of object ids. pub(crate) fn num_objects(&self) -> u32 { match &self.file { handle::SingleOrMultiIndex::Single { index, .. } => index.num_objects(), handle::SingleOrMultiIndex::Multi { index, .. } => index.num_objects(), } } /// Call `lookup_prefix(…)` on either index or multi-index, and transform matches into an object id. pub(crate) fn lookup_prefix( &self, prefix: gix_hash::Prefix, candidates: Option<&mut HashSet>, ) -> Option { let mut candidate_entries = candidates.as_ref().map(|_| 0..0); let res = match &self.file { handle::SingleOrMultiIndex::Single { index, .. } => { index.lookup_prefix(prefix, candidate_entries.as_mut()) } handle::SingleOrMultiIndex::Multi { index, .. } => { index.lookup_prefix(prefix, candidate_entries.as_mut()) } }?; if let Some((candidates, entries)) = candidates.zip(candidate_entries) { candidates.extend(entries.map(|entry| self.oid_at_index(entry).to_owned())); } Some(res.map(|entry_index| self.oid_at_index(entry_index).to_owned())) } /// See if the oid is contained in this index, and return its full id for lookup possibly alongside its data file if already /// loaded. /// Also return the index itself as it's needed to resolve intra-pack ref-delta objects. They are a possibility even though /// they won't be used in practice as it's more efficient to store their offsets. /// If it is not loaded, ask it to be loaded and put it into the returned mutable option for safe-keeping. pub(crate) fn lookup(&mut self, object_id: &oid) -> Option> { let id = self.id; match &mut self.file { handle::SingleOrMultiIndex::Single { index, data } => index.lookup(object_id).map(move |idx| Outcome { object_index: handle::IndexForObjectInPack { pack_id: types::PackId { index: id, multipack_index: None, }, pack_offset: index.pack_offset_at_index(idx), }, index_file: IntraPackLookup::Single(index), pack: data, }), handle::SingleOrMultiIndex::Multi { index, data } => index.lookup(object_id).map(move |idx| { let (pack_index, pack_offset) = index.pack_id_and_pack_offset_at_index(idx); Outcome { object_index: handle::IndexForObjectInPack { pack_id: types::PackId { index: id, multipack_index: Some(pack_index), }, pack_offset, }, index_file: IntraPackLookup::Multi { index, required_pack_index: pack_index, }, pack: &mut data[pack_index as usize], } }), } } } } pub(crate) enum Mode { DeletedPacksAreInaccessible, /// This mode signals that we should not unload packs even after they went missing. KeepDeletedPacksAvailable, } /// Handle registration impl super::Store { pub(crate) fn register_handle(&self) -> Mode { self.num_handles_unstable.fetch_add(1, Ordering::Relaxed); Mode::DeletedPacksAreInaccessible } pub(crate) fn remove_handle(&self, mode: Mode) { match mode { Mode::KeepDeletedPacksAvailable => { let _lock = self.write.lock(); self.num_handles_stable.fetch_sub(1, Ordering::SeqCst) } Mode::DeletedPacksAreInaccessible => self.num_handles_unstable.fetch_sub(1, Ordering::Relaxed), }; } pub(crate) fn upgrade_handle(&self, mode: Mode) -> Mode { if let Mode::DeletedPacksAreInaccessible = mode { let _lock = self.write.lock(); self.num_handles_stable.fetch_add(1, Ordering::SeqCst); self.num_handles_unstable.fetch_sub(1, Ordering::SeqCst); } Mode::KeepDeletedPacksAvailable } } /// Handle creation impl super::Store { /// The amount of times a ref-delta base can be followed when multi-indices are involved. pub const INITIAL_MAX_RECURSION_DEPTH: usize = 32; /// Create a new cache filled with a handle to this store, if this store is supporting shared ownership. /// /// Note that the actual type of `OwnShared` depends on the `parallel` feature toggle of the `gix-features` crate. pub fn to_cache(self: &OwnShared) -> crate::Cache>> { self.to_handle().into() } /// Create a new cache filled with a handle to this store if this store is held in an `Arc`. pub fn to_cache_arc(self: &Arc) -> crate::Cache>> { self.to_handle_arc().into() } /// Create a new database handle to this store if this store is supporting shared ownership. /// /// See also, [`to_cache()`][super::Store::to_cache()] which is probably more useful. pub fn to_handle(self: &OwnShared) -> super::Handle> { let token = self.register_handle(); super::Handle { store: self.clone(), refresh: RefreshMode::default(), ignore_replacements: false, token: Some(token), inflate: RefCell::new(Default::default()), snapshot: RefCell::new(self.collect_snapshot()), max_recursion_depth: Self::INITIAL_MAX_RECURSION_DEPTH, packed_object_count: Default::default(), } } /// Create a new database handle to this store if this store is held in an `Arc`. /// /// This method is useful in applications that know they will use threads. pub fn to_handle_arc(self: &Arc) -> super::Handle> { let token = self.register_handle(); super::Handle { store: self.clone(), refresh: Default::default(), ignore_replacements: false, token: Some(token), inflate: RefCell::new(Default::default()), snapshot: RefCell::new(self.collect_snapshot()), max_recursion_depth: Self::INITIAL_MAX_RECURSION_DEPTH, packed_object_count: Default::default(), } } /// Transform the only instance into an `Arc` or panic if this is not the only Rc handle /// to the contained store. /// /// This is meant to be used when the `gix_features::threading::OwnShared` refers to an `Rc` as it was compiled without the /// `parallel` feature toggle. pub fn into_shared_arc(self: OwnShared) -> Arc { match OwnShared::try_unwrap(self) { Ok(this) => Arc::new(this), Err(_) => panic!("BUG: Must be called when there is only one owner for this RC"), } } } impl super::Handle where S: Deref + Clone, { /// Call once if pack ids are stored and later used for lookup, meaning they should always remain mapped and not be unloaded /// even if they disappear from disk. /// This must be called if there is a chance that git maintenance is happening while a pack is created. pub fn prevent_pack_unload(&mut self) { self.token = self.token.take().map(|token| self.store.upgrade_handle(token)); } /// Return a shared reference to the contained store. pub fn store_ref(&self) -> &S::Target { &self.store } /// Return an owned store with shared ownership. pub fn store(&self) -> S { self.store.clone() } /// Set the handle to never cause ODB refreshes if an object could not be found. /// /// The latter is the default, as typically all objects referenced in a git repository are contained in the local clone. /// More recently, however, this doesn't always have to be the case due to sparse checkouts and other ways to only have a /// limited amount of objects available locally. pub fn refresh_never(&mut self) { self.refresh = RefreshMode::Never; } /// Return the current refresh mode. pub fn refresh_mode(&mut self) -> RefreshMode { self.refresh } } impl Drop for super::Handle where S: Deref + Clone, { fn drop(&mut self) { if let Some(token) = self.token.take() { self.store.remove_handle(token); } } } impl TryFrom<&super::Store> for super::Store { type Error = std::io::Error; fn try_from(s: &super::Store) -> Result { super::Store::at_opts( s.path().into(), &mut s.replacements(), crate::store::init::Options { slots: crate::store::init::Slots::Given(s.files.len().try_into().expect("BUG: too many slots")), object_hash: Default::default(), use_multi_pack_index: false, current_dir: s.current_dir.clone().into(), }, ) } } impl super::Handle> { /// Convert a ref counted store into one that is ref-counted and thread-safe, by creating a new Store. pub fn into_arc(self) -> std::io::Result>> { let store = Arc::new(super::Store::try_from(self.store_ref())?); let mut cache = store.to_handle_arc(); cache.refresh = self.refresh; cache.max_recursion_depth = self.max_recursion_depth; Ok(cache) } } impl super::Handle> { /// Convert a ref counted store into one that is ref-counted and thread-safe, by creating a new Store pub fn into_arc(self) -> std::io::Result>> { Ok(self) } } impl Clone for super::Handle where S: Deref + Clone, { fn clone(&self) -> Self { super::Handle { store: self.store.clone(), refresh: self.refresh, ignore_replacements: self.ignore_replacements, token: { let token = self.store.register_handle(); match self.token.as_ref().expect("token is always set here ") { handle::Mode::DeletedPacksAreInaccessible => token, handle::Mode::KeepDeletedPacksAvailable => self.store.upgrade_handle(token), } .into() }, inflate: RefCell::new(Default::default()), snapshot: RefCell::new(self.store.collect_snapshot()), max_recursion_depth: self.max_recursion_depth, packed_object_count: Default::default(), } } } gix-odb-0.66.0/src/store_impls/dynamic/header.rs000064400000000000000000000223221046102023000176200ustar 00000000000000use std::ops::Deref; use gix_features::zlib; use gix_hash::oid; use super::find::Error; use crate::{ find::Header, store::{find::error::DeltaBaseRecursion, handle, load_index}, }; impl super::Handle where S: Deref + Clone, { pub(crate) fn try_header_inner<'b>( &'b self, mut id: &'b gix_hash::oid, inflate: &mut zlib::Inflate, snapshot: &mut load_index::Snapshot, recursion: Option>, ) -> Result, Error> { if let Some(r) = recursion { if r.depth >= self.max_recursion_depth { return Err(Error::DeltaBaseRecursionLimit { max_depth: self.max_recursion_depth, id: r.original_id.to_owned(), }); } } else if !self.ignore_replacements { if let Ok(pos) = self .store .replacements .binary_search_by(|(map_this, _)| map_this.as_ref().cmp(id)) { id = self.store.replacements[pos].1.as_ref(); } } 'outer: loop { { let marker = snapshot.marker; for (idx, index) in snapshot.indices.iter_mut().enumerate() { if let Some(handle::index_lookup::Outcome { object_index: handle::IndexForObjectInPack { pack_id, pack_offset }, index_file, pack: possibly_pack, }) = index.lookup(id) { let pack = match possibly_pack { Some(pack) => pack, None => match self.store.load_pack(pack_id, marker)? { Some(pack) => { *possibly_pack = Some(pack); possibly_pack.as_deref().expect("just put it in") } None => { // The pack wasn't available anymore so we are supposed to try another round with a fresh index match self.store.load_one_index(self.refresh, snapshot.marker)? { Some(new_snapshot) => { *snapshot = new_snapshot; self.clear_cache(); continue 'outer; } None => { // nothing new in the index, kind of unexpected to not have a pack but to also // to have no new index yet. We set the new index before removing any slots, so // this should be observable. return Ok(None); } } } }, }; let entry = pack.entry(pack_offset)?; let res = match pack.decode_header(entry, inflate, &|id| { index_file.pack_offset_by_id(id).and_then(|pack_offset| { pack.entry(pack_offset) .ok() .map(gix_pack::data::decode::header::ResolvedBase::InPack) }) }) { Ok(header) => Ok(header.into()), Err(gix_pack::data::decode::Error::DeltaBaseUnresolved(base_id)) => { // Only with multi-pack indices it's allowed to jump to refer to other packs within this // multi-pack. Otherwise this would constitute a thin pack which is only allowed in transit. // However, if we somehow end up with that, we will resolve it safely, even though we could // avoid handling this case and error instead. let hdr = self .try_header_inner( &base_id, inflate, snapshot, recursion .map(DeltaBaseRecursion::inc_depth) .or_else(|| DeltaBaseRecursion::new(id).into()), ) .map_err(|err| Error::DeltaBaseLookup { err: Box::new(err), base_id, id: id.to_owned(), })? .ok_or_else(|| Error::DeltaBaseMissing { base_id, id: id.to_owned(), })?; let handle::index_lookup::Outcome { object_index: handle::IndexForObjectInPack { pack_id: _, pack_offset, }, index_file, pack: possibly_pack, } = match snapshot.indices[idx].lookup(id) { Some(res) => res, None => { let mut out = None; for index in &mut snapshot.indices { out = index.lookup(id); if out.is_some() { break; } } out.unwrap_or_else(|| { panic!("could not find object {id} in any index after looking up one of its base objects {base_id}") }) } }; let pack = possibly_pack .as_ref() .expect("pack to still be available like just now"); let entry = pack.entry(pack_offset)?; pack.decode_header(entry, inflate, &|id| { index_file .pack_offset_by_id(id) .and_then(|pack_offset| { pack.entry(pack_offset) .ok() .map(gix_pack::data::decode::header::ResolvedBase::InPack) }) .or_else(|| { (id == base_id).then(|| { gix_pack::data::decode::header::ResolvedBase::OutOfPack { kind: hdr.kind(), num_deltas: hdr.num_deltas(), } }) }) }) .map(Into::into) } Err(err) => Err(err), }?; if idx != 0 { snapshot.indices.swap(0, idx); } return Ok(Some(res)); } } } for lodb in snapshot.loose_dbs.iter() { // TODO: remove this double-lookup once the borrow checker allows it. if lodb.contains(id) { return lodb.try_header(id).map(|opt| opt.map(Into::into)).map_err(Into::into); } } match self.store.load_one_index(self.refresh, snapshot.marker)? { Some(new_snapshot) => { *snapshot = new_snapshot; self.clear_cache(); } None => return Ok(None), } } } } impl crate::Header for super::Handle where S: Deref + Clone, { fn try_header(&self, id: &oid) -> Result, gix_object::find::Error> { let mut snapshot = self.snapshot.borrow_mut(); let mut inflate = self.inflate.borrow_mut(); self.try_header_inner(id, &mut inflate, &mut snapshot, None) .map_err(|err| Box::new(err) as _) } } gix-odb-0.66.0/src/store_impls/dynamic/init.rs000064400000000000000000000127311046102023000173360ustar 00000000000000use std::{path::PathBuf, sync::Arc}; use arc_swap::ArcSwap; use crate::{ store::types::{MutableIndexAndPack, SlotMapIndex}, Store, }; /// Options for use in [`Store::at_opts()`]. #[derive(Clone, Debug)] pub struct Options { /// How to obtain a size for the slot map. pub slots: Slots, /// The kind of hash we expect in our packs and would use for loose object iteration and object writing. pub object_hash: gix_hash::Kind, /// If false, no multi-pack indices will be used. If true, they will be used if their hash matches `object_hash`. pub use_multi_pack_index: bool, /// The current directory of the process at the time of instantiation. /// If unset, it will be retrieved using `gix_fs::current_dir(false)`. pub current_dir: Option, } impl Default for Options { fn default() -> Self { Options { slots: Default::default(), object_hash: Default::default(), use_multi_pack_index: true, current_dir: None, } } } /// Configures the amount of slots in the index slotmap, which is fixed throughout the existence of the store. #[derive(Copy, Clone, Debug)] pub enum Slots { /// The amount of slots to use, that is the total amount of indices we can hold at a time. /// Using this has the advantage of avoiding an initial directory listing of the repository, and is recommended /// on the server side where the repository setup is controlled. /// /// Note that this won't affect their packs, as each index can have one or more packs associated with it. Given(u16), /// Compute the amount of slots needed, as probably best used on the client side where a variety of repositories is encountered. AsNeededByDiskState { /// 1.0 means no safety, 1.1 means 10% more slots than needed multiplier: f32, /// The minimum amount of slots to assume minimum: usize, }, } impl Default for Slots { fn default() -> Self { Slots::AsNeededByDiskState { multiplier: 1.1, minimum: 32, } } } impl Store { /// Open the store at `objects_dir` (containing loose objects and `packs/`), which must only be a directory for /// the store to be created without any additional work being done. /// `slots` defines how many multi-pack-indices as well as indices we can know about at a time, which includes /// the allowance for all additional object databases coming in via `alternates` as well. /// Note that the `slots` isn't used for packs, these are included with their multi-index or index respectively. /// For example, In a repository with 250m objects and geometric packing one would expect 27 index/pack pairs, /// or a single multi-pack index. /// `replacements` is an iterator over pairs of old and new object ids for replacement support. /// This means that when asking for object `X`, one will receive object `X-replaced` given an iterator like `Some((X, X-replaced))`. pub fn at_opts( objects_dir: PathBuf, replacements: &mut dyn Iterator, Options { slots, object_hash, use_multi_pack_index, current_dir, }: Options, ) -> std::io::Result { let _span = gix_features::trace::detail!("gix_odb::Store::at()"); let current_dir = current_dir.map_or_else( || { // It's only used for real-pathing alternate paths and there it just needs to be consistent (enough). gix_fs::current_dir(false) }, Ok, )?; if !objects_dir.is_dir() { return Err(std::io::Error::new( std::io::ErrorKind::Other, // TODO: use NotADirectory when stabilized format!("'{}' wasn't a directory", objects_dir.display()), )); } let slot_count = match slots { Slots::Given(n) => n as usize, Slots::AsNeededByDiskState { multiplier, minimum } => { let mut db_paths = crate::alternate::resolve(objects_dir.clone(), ¤t_dir) .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))?; db_paths.insert(0, objects_dir.clone()); let num_slots = super::Store::collect_indices_and_mtime_sorted_by_size(db_paths, None, None) .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err))? .len(); ((num_slots as f32 * multiplier) as usize).max(minimum) } }; if slot_count > crate::store::types::PackId::max_indices() { return Err(std::io::Error::new( std::io::ErrorKind::Other, "Cannot use more than 1^15 slots", )); } let mut replacements: Vec<_> = replacements.collect(); replacements.sort_by(|a, b| a.0.cmp(&b.0)); Ok(Store { current_dir, write: Default::default(), replacements, path: objects_dir, files: Vec::from_iter(std::iter::repeat_with(MutableIndexAndPack::default).take(slot_count)), index: ArcSwap::new(Arc::new(SlotMapIndex::default())), use_multi_pack_index, object_hash, num_handles_stable: Default::default(), num_handles_unstable: Default::default(), num_disk_state_consolidation: Default::default(), }) } } gix-odb-0.66.0/src/store_impls/dynamic/iter.rs000064400000000000000000000205011046102023000173300ustar 00000000000000use std::{ops::Deref, option::Option::None, sync::Arc, vec::IntoIter}; use gix_hash::ObjectId; use crate::{ loose, store::{handle, handle::SingleOrMultiIndex, types::PackId}, store_impls::dynamic, }; struct EntryForOrdering { pack_offset: u64, entry_index: u32, pack_index: u16, } enum State { Pack { index_iter: IntoIter, index: handle::IndexLookup, ordered_entries: Option>, entry_index: u32, num_objects: u32, }, Loose { iter: loose::Iter, index: usize, }, Depleted, } /// Define the order in which objects are returned. #[derive(Default, Debug, Copy, Clone)] pub enum Ordering { /// Traverse packs first as sorted by their index files in lexicographical order (sorted by object id), then traverse loose objects /// as sorted by their names as well. /// /// This mode uses no memory as it's the natural ordering of objects, and is best to obtain all object ids as quickly as possible, /// while noting that these may contain duplicates. However, it's very costly to obtain object information or decode them with this /// scheme as cache-hits are unlikely with it and memory maps are less efficient when loading them in random order. #[default] PackLexicographicalThenLooseLexicographical, /// Traverse packs first yielding object ids sorted by their position in the pack, with those at the beginning of the pack file coming first. /// Then follow loose objects sorted by their names. /// /// This mode allocates and as to pre-sort objects by their offsets, delaying the start of the iteration once per pack while keeping /// memory allocated once per pack. This price is usually worth paying once querying object information is planned as pack caches /// are more efficiently used that way. PackAscendingOffsetThenLooseLexicographical, } /// An iterator over all, _possibly duplicate_, objects of an object store, which by default uses no extra memory but yields an /// order that is costly to traverse when querying object information or decoding them. /// /// Use [`with_ordering()`][AllObjects::with_ordering()] to choose a performance trade-off. pub struct AllObjects { state: State, num_objects: usize, loose_dbs: Arc>, order: Ordering, } /// Builder impl AllObjects { /// Set the ordering of the objects returned, trading off memory and latency for object query performance. pub fn with_ordering(mut self, order: Ordering) -> Self { self.order = order; self } } impl AllObjects { /// Create a new iterator from a dynamic store, which will be forced to load all indices eagerly and in the current thread. pub fn new(db: &dynamic::Store) -> Result { let snapshot = db.load_all_indices()?; let packed_objects = snapshot .indices .iter() .fold(0usize, |dbc, index| dbc.saturating_add(index.num_objects() as usize)); let mut index_iter = snapshot.indices.into_iter(); let loose_dbs = snapshot.loose_dbs; let order = Default::default(); let state = match index_iter.next() { Some(index) => { let num_objects = index.num_objects(); State::Pack { index_iter, ordered_entries: maybe_sort_entries(&index, order), index, entry_index: 0, num_objects, } } None => { let index = 0; State::Loose { iter: loose_dbs.get(index).expect("at least one loose db").iter(), index, } } }; Ok(AllObjects { state, loose_dbs, num_objects: packed_objects, order, }) } } fn maybe_sort_entries(index: &handle::IndexLookup, order: Ordering) -> Option> { let mut order: Vec<_> = match order { Ordering::PackLexicographicalThenLooseLexicographical => return None, Ordering::PackAscendingOffsetThenLooseLexicographical => match &index.file { // We know that we cannot have more than u32 entry indices per pack. SingleOrMultiIndex::Single { index, .. } => index .iter() .enumerate() .map(|(idx, e)| EntryForOrdering { pack_offset: e.pack_offset, entry_index: idx as u32, pack_index: 0, }) .collect(), SingleOrMultiIndex::Multi { index, .. } => index .iter() .enumerate() .map(|(idx, e)| EntryForOrdering { pack_offset: e.pack_offset, entry_index: idx as u32, pack_index: { debug_assert!( e.pack_index < PackId::max_packs_in_multi_index(), "this shows the relation between u16 and pack_index (u32) and why this is OK" ); e.pack_index as u16 }, }) .collect(), }, }; order.sort_by(|a, b| { a.pack_index .cmp(&b.pack_index) .then_with(|| a.pack_offset.cmp(&b.pack_offset)) }); Some(order) } impl Iterator for AllObjects { type Item = Result; fn next(&mut self) -> Option { match &mut self.state { State::Depleted => None, State::Pack { index_iter, ordered_entries, index, entry_index, num_objects, } => { if *entry_index < *num_objects { let oid = match ordered_entries { Some(entries) => index.oid_at_index(entries[*entry_index as usize].entry_index), None => index.oid_at_index(*entry_index), } .to_owned(); *entry_index += 1; Some(Ok(oid)) } else { match index_iter.next() { Some(new_index) => { *ordered_entries = maybe_sort_entries(&new_index, self.order); *index = new_index; *entry_index = 0; *num_objects = index.num_objects(); } None => { let index = 0; self.state = State::Loose { iter: self.loose_dbs.get(index).expect("at least one loose odb").iter(), index, } } } self.next() } } State::Loose { iter, index } => match iter.next() { Some(id) => Some(id), None => { *index += 1; match self.loose_dbs.get(*index).map(loose::Store::iter) { Some(new_iter) => { *iter = new_iter; self.next() } None => { self.state = State::Depleted; None } } } }, } } fn size_hint(&self) -> (usize, Option) { (self.num_objects, None) } } impl super::Handle where S: Deref + Clone, { /// Return an iterator over all, _possibly duplicate_, objects, first the ones in all packs of all linked databases (via alternates), /// followed by all loose objects. pub fn iter(&self) -> Result { AllObjects::new(self.store_ref()) } } impl dynamic::Store { /// Like [`Handle::iter()`][super::Handle::iter()], but accessible directly on the store. pub fn iter(&self) -> Result { AllObjects::new(self) } } gix-odb-0.66.0/src/store_impls/dynamic/load_index.rs000064400000000000000000001077141046102023000205070ustar 00000000000000use std::{ collections::{BTreeMap, VecDeque}, ffi::OsStr, ops::Deref, path::{Path, PathBuf}, sync::{ atomic::{AtomicU16, Ordering}, Arc, }, time::SystemTime, }; use crate::store::{handle, types, RefreshMode}; pub(crate) struct Snapshot { /// Indices ready for object lookup or contains checks, ordered usually by modification data, recent ones first. pub(crate) indices: Vec, /// A set of loose objects dbs to search once packed objects weren't found. pub(crate) loose_dbs: Arc>, /// remember what this state represents and to compare to other states. pub(crate) marker: types::SlotIndexMarker, } mod error { use std::path::PathBuf; use gix_pack::multi_index::PackIndex; /// Returned by [`crate::at_opts()`] #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum Error { #[error("The objects directory at '{0}' is not an accessible directory")] Inaccessible(PathBuf), #[error(transparent)] Io(#[from] std::io::Error), #[error(transparent)] Alternate(#[from] crate::alternate::Error), #[error("The slotmap turned out to be too small with {} entries, would need {} more", .current, .needed)] InsufficientSlots { current: usize, needed: usize }, /// The problem here is that some logic assumes that more recent generations are higher than previous ones. If we would overflow, /// we would break that invariant which can lead to the wrong object from being returned. It would probably be super rare, but… /// let's not risk it. #[error( "Would have overflown amount of max possible generations of {}", super::Generation::MAX )] GenerationOverflow, #[error("Cannot numerically handle more than {limit} packs in a single multi-pack index, got {actual} in file {index_path:?}")] TooManyPacksInMultiIndex { actual: PackIndex, limit: PackIndex, index_path: PathBuf, }, } } pub use error::Error; use crate::store::types::{Generation, IndexAndPacks, MutableIndexAndPack, PackId, SlotMapIndex}; impl super::Store { /// Load all indices, refreshing from disk only if needed. pub(crate) fn load_all_indices(&self) -> Result { let mut snapshot = self.collect_snapshot(); while let Some(new_snapshot) = self.load_one_index(RefreshMode::Never, snapshot.marker)? { snapshot = new_snapshot; } Ok(snapshot) } /// If `None` is returned, there is new indices and the caller should give up. This is a possibility even if it's allowed to refresh /// as here might be no change to pick up. pub(crate) fn load_one_index( &self, refresh_mode: RefreshMode, marker: types::SlotIndexMarker, ) -> Result, Error> { let index = self.index.load(); if !index.is_initialized() { return self.consolidate_with_disk_state(true /* needs_init */, false /*load one new index*/); } if marker.generation != index.generation || marker.state_id != index.state_id() { // We have a more recent state already, provide it. Ok(Some(self.collect_snapshot())) } else { // always compare to the latest state // Nothing changed in the meantime, try to load another index… if self.load_next_index(index) { Ok(Some(self.collect_snapshot())) } else { // …and if that didn't yield anything new consider refreshing our disk state. match refresh_mode { RefreshMode::Never => Ok(None), RefreshMode::AfterAllIndicesLoaded => { self.consolidate_with_disk_state(false /* needs init */, true /*load one new index*/) } } } } } /// load a new index (if not yet loaded), and return true if one was indeed loaded (leading to a `state_id()` change) of the current index. /// Note that interacting with the slot-map is inherently racy and we have to deal with it, being conservative in what we even try to load /// as our index might already be out-of-date as we try to use it to learn what's next. fn load_next_index(&self, mut index: arc_swap::Guard>) -> bool { 'retry_with_changed_index: loop { let previous_state_id = index.state_id(); 'retry_with_next_slot_index: loop { match index .next_index_to_load .fetch_update(Ordering::SeqCst, Ordering::SeqCst, |current| { (current != index.slot_indices.len()).then_some(current + 1) }) { Ok(slot_map_index) => { // This slot-map index is in bounds and was only given to us. let _ongoing_operation = IncOnNewAndDecOnDrop::new(&index.num_indices_currently_being_loaded); let slot = &self.files[index.slot_indices[slot_map_index]]; let _lock = slot.write.lock(); if slot.generation.load(Ordering::SeqCst) > index.generation { // There is a disk consolidation in progress which just overwrote a slot that could be disposed with some other // index, one we didn't intend to load. // Continue with the next slot index in the hope there is something else we can do… continue 'retry_with_next_slot_index; } let mut bundle = slot.files.load_full(); let bundle_mut = Arc::make_mut(&mut bundle); if let Some(files) = bundle_mut.as_mut() { // these are always expected to be set, unless somebody raced us. We handle this later by retrying. let res = { let res = files.load_index(self.object_hash); slot.files.store(bundle); index.loaded_indices.fetch_add(1, Ordering::SeqCst); res }; match res { Ok(_) => { break 'retry_with_next_slot_index; } Err(_err) => { gix_features::trace::error!(err=?_err, "Failed to load index file - some objects may seem to not exist"); continue 'retry_with_next_slot_index; } } } } Err(_nothing_more_to_load) => { // There can be contention as many threads start working at the same time and take all the // slots to load indices for. Some threads might just be left-over and have to wait for something // to change. // TODO: potentially hot loop - could this be a condition variable? // This is a timing-based fix for the case that the `num_indices_being_loaded` isn't yet incremented, // and we might break out here without actually waiting for the loading operation. Then we'd fail to // observe a change and the underlying handler would not have all the indices it needs at its disposal. // Yielding means we will definitely loose enough time to observe the ongoing operation, // or its effects. std::thread::yield_now(); while index.num_indices_currently_being_loaded.load(Ordering::SeqCst) != 0 { std::thread::yield_now(); } break 'retry_with_next_slot_index; } } } if previous_state_id == index.state_id() { let potentially_new_index = self.index.load(); if Arc::as_ptr(&potentially_new_index) == Arc::as_ptr(&index) { // There isn't a new index with which to retry the whole ordeal, so nothing could be done here. return false; } else { // the index changed, worth trying again index = potentially_new_index; continue 'retry_with_changed_index; } } else { // something inarguably changed, probably an index was loaded. 'probably' because we consider failed loads valid attempts, // even they don't change anything for the caller which would then do a round for nothing. return true; } } } /// refresh and possibly clear out our existing data structures, causing all pack ids to be invalidated. /// `load_new_index` is an optimization to at least provide one newly loaded pack after refreshing the slot map. pub(crate) fn consolidate_with_disk_state( &self, needs_init: bool, load_new_index: bool, ) -> Result, Error> { let index = self.index.load(); let previous_index_state = Arc::as_ptr(&index) as usize; // IMPORTANT: get a lock after we recorded the previous state. let write = self.write.lock(); let objects_directory = &self.path; // Now we know the index isn't going to change anymore, even though threads might still load indices in the meantime. let index = self.index.load(); if previous_index_state != Arc::as_ptr(&index) as usize { // Someone else took the look before and changed the index. Return it without doing any additional work. return Ok(Some(self.collect_snapshot())); } let was_uninitialized = !index.is_initialized(); // We might not be able to detect by pointer if the state changed, as this itself is racy. So we keep track of double-initialization // using a flag, which means that if `needs_init` was true we saw the index uninitialized once, but now that we are here it's // initialized meaning that somebody was faster, and we couldn't detect it by comparisons to the index. // If so, make sure we collect the snapshot instead of returning None in case nothing actually changed, which is likely with a // race like this. if !was_uninitialized && needs_init { return Ok(Some(self.collect_snapshot())); } self.num_disk_state_consolidation.fetch_add(1, Ordering::Relaxed); let db_paths: Vec<_> = std::iter::once(objects_directory.to_owned()) .chain(crate::alternate::resolve(objects_directory.clone(), &self.current_dir)?) .collect(); // turn db paths into loose object databases. Reuse what's there, but only if it is in the right order. let loose_dbs = if was_uninitialized || db_paths.len() != index.loose_dbs.len() || db_paths .iter() .zip(index.loose_dbs.iter().map(|ldb| &ldb.path)) .any(|(lhs, rhs)| lhs != rhs) { Arc::new( db_paths .iter() .map(|path| crate::loose::Store::at(path, self.object_hash)) .collect::>(), ) } else { Arc::clone(&index.loose_dbs) }; let indices_by_modification_time = Self::collect_indices_and_mtime_sorted_by_size( db_paths, index.slot_indices.len().into(), self.use_multi_pack_index.then_some(self.object_hash), )?; let mut idx_by_index_path: BTreeMap<_, _> = index .slot_indices .iter() .filter_map(|&idx| { let f = &self.files[idx]; Option::as_ref(&f.files.load()).map(|f| (f.index_path().to_owned(), idx)) }) .collect(); let mut new_slot_map_indices = Vec::new(); // these indices into the slot map still exist there/didn't change let mut index_paths_to_add = was_uninitialized .then(|| VecDeque::with_capacity(indices_by_modification_time.len())) .unwrap_or_default(); // Figure out this number based on what we see while handling the existing indices let mut num_loaded_indices = 0; for (index_info, mtime) in indices_by_modification_time.into_iter().map(|(a, b, _)| (a, b)) { match idx_by_index_path.remove(index_info.path()) { Some(slot_idx) => { let slot = &self.files[slot_idx]; let files_guard = slot.files.load(); let files = Option::as_ref(&files_guard).expect("slot is set or we wouldn't know it points to this file"); if index_info.is_multi_index() && files.mtime() != mtime { // we have a changed multi-pack index. We can't just change the existing slot as it may alter slot indices // that are currently available. Instead we have to move what's there into a new slot, along with the changes, // and later free the slot or dispose of the index in the slot (like we do for removed/missing files). index_paths_to_add.push_back((index_info, mtime, Some(slot_idx))); // If the current slot is loaded, the soon-to-be copied multi-index path will be loaded as well. if files.index_is_loaded() { num_loaded_indices += 1; } } else { // packs and indices are immutable, so no need to check modification times. Unchanged multi-pack indices also // are handled like this just to be sure they are in the desired state. For these, the only way this could happen // is if somebody deletes and then puts back if Self::assure_slot_matches_index(&write, slot, index_info, mtime, index.generation) { num_loaded_indices += 1; } new_slot_map_indices.push(slot_idx); } } None => index_paths_to_add.push_back((index_info, mtime, None)), } } let needs_stable_indices = self.maintain_stable_indices(&write); let mut next_possibly_free_index = index .slot_indices .iter() .max() .map_or(0, |idx| (idx + 1) % self.files.len()); let mut num_indices_checked = 0; let mut needs_generation_change = false; let mut slot_indices_to_remove: Vec<_> = idx_by_index_path.into_values().collect(); while let Some((mut index_info, mtime, move_from_slot_idx)) = index_paths_to_add.pop_front() { 'increment_slot_index: loop { if num_indices_checked == self.files.len() { return Err(Error::InsufficientSlots { current: self.files.len(), needed: index_paths_to_add.len() + 1, /*the one currently popped off*/ }); } let slot_index = next_possibly_free_index; let slot = &self.files[slot_index]; next_possibly_free_index = (next_possibly_free_index + 1) % self.files.len(); num_indices_checked += 1; match move_from_slot_idx { Some(move_from_slot_idx) => { debug_assert!(index_info.is_multi_index(), "only set for multi-pack indices"); if slot_index == move_from_slot_idx { // don't try to move onto ourselves continue 'increment_slot_index; } match Self::try_set_index_slot( &write, slot, index_info, mtime, index.generation, needs_stable_indices, ) { Ok(dest_was_empty) => { slot_indices_to_remove.push(move_from_slot_idx); new_slot_map_indices.push(slot_index); // To avoid handling out the wrong pack (due to reassigned pack ids), declare this a new generation. if !dest_was_empty { needs_generation_change = true; } break 'increment_slot_index; } Err(unused_index_info) => index_info = unused_index_info, } } None => { match Self::try_set_index_slot( &write, slot, index_info, mtime, index.generation, needs_stable_indices, ) { Ok(dest_was_empty) => { new_slot_map_indices.push(slot_index); if !dest_was_empty { needs_generation_change = true; } break 'increment_slot_index; } Err(unused_index_info) => index_info = unused_index_info, } } } // This isn't racy as it's only us who can change the Option::Some/None state of a slot. } } assert_eq!( index_paths_to_add.len(), 0, "By this time we have assigned all new files to slots" ); let generation = if needs_generation_change { index.generation.checked_add(1).ok_or(Error::GenerationOverflow)? } else { index.generation }; let index_unchanged = index.slot_indices == new_slot_map_indices; if generation != index.generation { assert!( !index_unchanged, "if the generation changed, the slot index must have changed for sure" ); } if !index_unchanged || loose_dbs != index.loose_dbs { let new_index = Arc::new(SlotMapIndex { slot_indices: new_slot_map_indices, loose_dbs, generation, // if there was a prior generation, some indices might already be loaded. But we deal with it by trying to load the next index then, // until we find one. next_index_to_load: index_unchanged .then(|| Arc::clone(&index.next_index_to_load)) .unwrap_or_default(), loaded_indices: index_unchanged .then(|| Arc::clone(&index.loaded_indices)) .unwrap_or_else(|| Arc::new(num_loaded_indices.into())), num_indices_currently_being_loaded: Default::default(), }); self.index.store(new_index); } // deleted items - remove their slots AFTER we have set the new index if we may alter indices, otherwise we only declare them garbage. // removing slots may cause pack loading to fail, and they will then reload their indices. for slot in slot_indices_to_remove.into_iter().map(|idx| &self.files[idx]) { let _lock = slot.write.lock(); let mut files = slot.files.load_full(); let files_mut = Arc::make_mut(&mut files); if needs_stable_indices { if let Some(files) = files_mut.as_mut() { files.trash(); // generation stays the same, as it's the same value still but scheduled for eventual removal. } } else { // set the generation before we actually change the value, otherwise readers of old generations could observe the new one. // We rather want them to turn around here and update their index, which, by that time, might actually already be available. // If not, they would fail unable to load a pack or index they need, but that's preferred over returning wrong objects. // Safety: can't race as we hold the lock, have to set the generation beforehand to help avoid others to observe the value. slot.generation.store(generation, Ordering::SeqCst); *files_mut = None; }; slot.files.store(files); } let new_index = self.index.load(); Ok(if index.state_id() == new_index.state_id() { // there was no change, and nothing was loaded in the meantime, reflect that in the return value to not get into loops. None } else { if load_new_index { self.load_next_index(new_index); } Some(self.collect_snapshot()) }) } pub(crate) fn collect_indices_and_mtime_sorted_by_size( db_paths: Vec, initial_capacity: Option, multi_pack_index_object_hash: Option, ) -> Result, Error> { let mut indices_by_modification_time = Vec::with_capacity(initial_capacity.unwrap_or_default()); for db_path in db_paths { let packs = db_path.join("pack"); let entries = match std::fs::read_dir(packs) { Ok(e) => e, Err(err) if err.kind() == std::io::ErrorKind::NotFound => continue, Err(err) => return Err(err.into()), }; let indices = entries .filter_map(Result::ok) .filter_map(|e| e.metadata().map(|md| (e.path(), md)).ok()) .filter(|(_, md)| md.file_type().is_file()) .filter(|(p, _)| { let ext = p.extension(); (ext == Some(OsStr::new("idx")) && p.with_extension("pack").is_file()) || (multi_pack_index_object_hash.is_some() && ext.is_none() && is_multipack_index(p)) }) .map(|(p, md)| md.modified().map_err(Error::from).map(|mtime| (p, mtime, md.len()))) .collect::, _>>()?; let multi_index_info = multi_pack_index_object_hash .and_then(|hash| { indices.iter().find_map(|(p, a, b)| { is_multipack_index(p) .then(|| { // we always open the multi-pack here to be able to remove indices gix_pack::multi_index::File::at(p) .ok() .filter(|midx| midx.object_hash() == hash) .map(|midx| (midx, *a, *b)) }) .flatten() .map(|t| { if t.0.num_indices() > PackId::max_packs_in_multi_index() { Err(Error::TooManyPacksInMultiIndex { index_path: p.to_owned(), actual: t.0.num_indices(), limit: PackId::max_packs_in_multi_index(), }) } else { Ok(t) } }) }) }) .transpose()?; if let Some((multi_index, mtime, flen)) = multi_index_info { let index_names_in_multi_index: Vec<_> = multi_index.index_names().iter().map(AsRef::as_ref).collect(); let mut indices_not_in_multi_index: Vec<(Either, _, _)> = indices .into_iter() .filter_map(|(path, a, b)| { (path != multi_index.path() && !index_names_in_multi_index .contains(&Path::new(path.file_name().expect("file name present")))) .then_some((Either::IndexPath(path), a, b)) }) .collect(); indices_not_in_multi_index.insert(0, (Either::MultiIndexFile(Arc::new(multi_index)), mtime, flen)); indices_by_modification_time.extend(indices_not_in_multi_index); } else { indices_by_modification_time.extend( indices .into_iter() .filter_map(|(p, a, b)| (!is_multipack_index(&p)).then_some((Either::IndexPath(p), a, b))), ); } } // Unlike libgit2, do not sort by modification date, but by size and put the biggest indices first. That way // the chance to hit an object should be higher. We leave it to the handle to sort by LRU. // Git itself doesn't change the order which may safe time, but we want it to be stable which also helps some tests. // NOTE: this will work well for well-packed repos or those using geometric repacking, but force us to open a lot // of files when dealing with new objects, as there is no notion of recency here as would be with unmaintained // repositories. Different algorithms should be provided, like newest packs first, and possibly a mix of both // with big packs first, then sorting by recency for smaller packs. // We also want to implement `fetch.unpackLimit` to alleviate this issue a little. indices_by_modification_time.sort_by(|l, r| l.2.cmp(&r.2).reverse()); Ok(indices_by_modification_time) } /// returns Ok if the copy could happen because dest-slot was actually free or disposable , and Some(true) if it was empty #[allow(clippy::too_many_arguments)] fn try_set_index_slot( lock: &parking_lot::MutexGuard<'_, ()>, dest_slot: &MutableIndexAndPack, index_info: Either, mtime: SystemTime, current_generation: Generation, needs_stable_indices: bool, ) -> Result { let (dest_slot_was_empty, generation) = match &**dest_slot.files.load() { Some(bundle) => { if bundle.index_path() == index_info.path() || (bundle.is_disposable() && needs_stable_indices) { // it might be possible to see ourselves in case all slots are taken, but there are still a few more destination // slots to look for. return Err(index_info); } // Since we overwrite an existing slot, we have to increment the generation to prevent anyone from trying to use it while // before we are replacing it with a different value. // In detail: // We need to declare this to be the future to avoid anything in that slot to be returned to people who // last saw the old state. They will then try to get a new index which by that time, might be happening // in time so they get the latest one. If not, they will probably get into the same situation again until // it finally succeeds. Alternatively, the object will be reported unobtainable, but at least it won't return // some other object. (false, current_generation + 1) } None => { // For multi-pack indices: // Do NOT copy the packs over, they need to be reopened to get the correct pack id matching the new slot map index. // If we are allowed to delete the original, and nobody has the pack referenced, it is closed which is preferred. // Thus we simply always start new with packs in multi-pack indices. // In the worst case this could mean duplicate file handle usage though as the old and the new index can't share // packs due to the intrinsic id. // Note that the ID is used for cache access, too, so it must be unique. It must also be mappable from pack-id to slotmap id. (true, current_generation) } }; Self::set_slot_to_index(lock, dest_slot, index_info, mtime, generation); Ok(dest_slot_was_empty) } fn set_slot_to_index( _lock: &parking_lot::MutexGuard<'_, ()>, slot: &MutableIndexAndPack, index_info: Either, mtime: SystemTime, generation: Generation, ) { let _lock = slot.write.lock(); let mut files = slot.files.load_full(); let files_mut = Arc::make_mut(&mut files); // set the generation before we actually change the value, otherwise readers of old generations could observe the new one. // We rather want them to turn around here and update their index, which, by that time, might actually already be available. // If not, they would fail unable to load a pack or index they need, but that's preferred over returning wrong objects. // Safety: can't race as we hold the lock, have to set the generation beforehand to help avoid others to observe the value. slot.generation.store(generation, Ordering::SeqCst); *files_mut = Some(index_info.into_index_and_packs(mtime)); slot.files.store(files); } /// Returns true if the index was left in a loaded state. fn assure_slot_matches_index( _lock: &parking_lot::MutexGuard<'_, ()>, slot: &MutableIndexAndPack, index_info: Either, mtime: SystemTime, current_generation: Generation, ) -> bool { match Option::as_ref(&slot.files.load()) { Some(bundle) => { assert_eq!( bundle.index_path(), index_info.path(), "Parallel writers cannot change the file the slot points to." ); if bundle.is_disposable() { // put it into the correct mode, it's now available for sure so should not be missing or garbage. // The latter can happen if files are removed and put back for some reason, but we should definitely // have them in a decent state now that we know/think they are there. let _lock = slot.write.lock(); let mut files = slot.files.load_full(); let files_mut = Arc::make_mut(&mut files) .as_mut() .expect("BUG: cannot change from something to nothing, would be race"); files_mut.put_back(); debug_assert_eq!( files_mut.mtime(), mtime, "BUG: we can only put back files that didn't obviously change" ); // Safety: can't race as we hold the lock, must be set before replacing the data. // NOTE that we don't change the generation as it's still the very same index we talk about, it doesn't change // identity. slot.generation.store(current_generation, Ordering::SeqCst); slot.files.store(files); } else { // it's already in the correct state, either loaded or unloaded. } bundle.index_is_loaded() } None => { unreachable!("BUG: a slot can never be deleted if we have it recorded in the index WHILE changing said index. There shouldn't be a race") } } } /// Stability means that indices returned by this API will remain valid. /// Without that constraint, we may unload unused packs and indices, and may rebuild the slotmap index. /// /// Note that this must be called with a lock to the relevant state held to assure these values don't change while /// we are working on said index. fn maintain_stable_indices(&self, _guard: &parking_lot::MutexGuard<'_, ()>) -> bool { self.num_handles_stable.load(Ordering::SeqCst) > 0 } pub(crate) fn collect_snapshot(&self) -> Snapshot { // We don't observe changes-on-disk in our 'wait-for-load' loop. // That loop is meant to help assure the marker (which includes the amount of loaded indices) matches // the actual amount of indices we collect. let index = self.index.load(); loop { if index.num_indices_currently_being_loaded.deref().load(Ordering::SeqCst) != 0 { std::thread::yield_now(); continue; } let marker = index.marker(); let indices = if index.is_initialized() { index .slot_indices .iter() .map(|idx| (*idx, &self.files[*idx])) .filter_map(|(id, file)| { let lookup = match (**file.files.load()).as_ref()? { types::IndexAndPacks::Index(bundle) => handle::SingleOrMultiIndex::Single { index: bundle.index.loaded()?.clone(), data: bundle.data.loaded().cloned(), }, types::IndexAndPacks::MultiIndex(multi) => handle::SingleOrMultiIndex::Multi { index: multi.multi_index.loaded()?.clone(), data: multi.data.iter().map(|f| f.loaded().cloned()).collect(), }, }; handle::IndexLookup { file: lookup, id }.into() }) .collect() } else { Vec::new() }; return Snapshot { indices, loose_dbs: Arc::clone(&index.loose_dbs), marker, }; } } } // Outside of this method we will never assign new slot indices. fn is_multipack_index(path: &Path) -> bool { path.file_name() == Some(OsStr::new("multi-pack-index")) } struct IncOnNewAndDecOnDrop<'a>(&'a AtomicU16); impl<'a> IncOnNewAndDecOnDrop<'a> { pub fn new(v: &'a AtomicU16) -> Self { v.fetch_add(1, Ordering::SeqCst); Self(v) } } impl Drop for IncOnNewAndDecOnDrop<'_> { fn drop(&mut self) { self.0.fetch_sub(1, Ordering::SeqCst); } } pub(crate) enum Either { IndexPath(PathBuf), MultiIndexFile(Arc), } impl Either { fn path(&self) -> &Path { match self { Either::IndexPath(p) => p, Either::MultiIndexFile(f) => f.path(), } } fn into_index_and_packs(self, mtime: SystemTime) -> IndexAndPacks { match self { Either::IndexPath(path) => IndexAndPacks::new_single(path, mtime), Either::MultiIndexFile(file) => IndexAndPacks::new_multi_from_open_file(file, mtime), } } fn is_multi_index(&self) -> bool { matches!(self, Either::MultiIndexFile(_)) } } impl Eq for Either {} impl PartialEq for Either { fn eq(&self, other: &Self) -> bool { self.path().eq(other.path()) } } impl PartialOrd for Either { fn partial_cmp(&self, other: &Self) -> Option { Some(self.path().cmp(other.path())) } } impl Ord for Either { fn cmp(&self, other: &Self) -> std::cmp::Ordering { self.path().cmp(other.path()) } } gix-odb-0.66.0/src/store_impls/dynamic/load_one.rs000064400000000000000000000177011046102023000201550ustar 00000000000000use std::{ path::Path, sync::{atomic::Ordering, Arc}, }; use crate::store::{handle, types}; impl super::Store { /// If Ok(None) is returned, the pack-id was stale and referred to an unloaded pack or a pack which couldn't be /// loaded as its file didn't exist on disk anymore. /// If the oid is known, just load indices again to continue /// (objects rarely ever removed so should be present, maybe in another pack though), /// and redo the entire lookup for a valid pack id whose pack can probably be loaded next time. pub(crate) fn load_pack( &self, id: types::PackId, marker: types::SlotIndexMarker, ) -> std::io::Result>> { let index = self.index.load(); if index.generation != marker.generation { return Ok(None); } fn load_pack( path: &Path, id: types::PackId, object_hash: gix_hash::Kind, ) -> std::io::Result> { gix_pack::data::File::at(path, object_hash) .map(|mut pack| { pack.id = id.to_intrinsic_pack_id(); Arc::new(pack) }) .map_err(|err| match err { gix_pack::data::header::decode::Error::Io { source, .. } => source, other => std::io::Error::new(std::io::ErrorKind::Other, other), }) } let slot = &self.files[id.index]; // pin the current state before loading in the generation. That way we won't risk seeing the wrong value later. let slot_files = &**slot.files.load(); if slot.generation.load(Ordering::SeqCst) > marker.generation { // There is a disk consolidation in progress which just overwrote a slot that could be disposed with some other // pack, one we didn't intend to load. // Hope that when the caller returns/retries the new index is set so they can fetch it and retry. return Ok(None); } match id.multipack_index { None => { match slot_files { Some(types::IndexAndPacks::Index(bundle)) => { match bundle.data.loaded() { Some(pack) => Ok(Some(pack.clone())), None => { let _lock = slot.write.lock(); let mut files = slot.files.load_full(); let files_mut = Arc::make_mut(&mut files); let pack = match files_mut { Some(types::IndexAndPacks::Index(bundle)) => bundle .data .load_with_recovery(|path| load_pack(path, id, self.object_hash))?, Some(types::IndexAndPacks::MultiIndex(_)) => { // something changed between us getting the lock, trigger a complete index refresh. None } None => { unreachable!("BUG: must set this handle to be stable to avoid slots to be cleared/changed") } }; slot.files.store(files); Ok(pack) } } } // This can also happen if they use an old index into our new and refreshed data which might have a multi-index // here. Some(types::IndexAndPacks::MultiIndex(_)) => Ok(None), None => { unreachable!("BUG: must set this handle to be stable to avoid slots to be cleared/changed") } } } Some(pack_index) => { match slot_files { Some(types::IndexAndPacks::MultiIndex(bundle)) => { match bundle.data.get(pack_index as usize) { None => Ok(None), // somewhat unexpected, data must be stale Some(on_disk_pack) => match on_disk_pack.loaded() { Some(pack) => Ok(Some(pack.clone())), None => { let _lock = slot.write.lock(); let mut files = slot.files.load_full(); let files_mut = Arc::make_mut(&mut files); let pack = match files_mut { Some(types::IndexAndPacks::Index(_)) => { // something changed between us getting the lock, trigger a complete index refresh. None } Some(types::IndexAndPacks::MultiIndex(bundle)) => bundle .data .get_mut(pack_index as usize) .expect("BUG: must set this handle to be stable") .load_with_recovery(|path| load_pack(path, id, self.object_hash))?, None => { unreachable!("BUG: must set this handle to be stable to avoid slots to be cleared/changed") } }; slot.files.store(files); Ok(pack) } }, } } // This can also happen if they use an old index into our new and refreshed data which might have a multi-index // here. Some(types::IndexAndPacks::Index(_)) => Ok(None), None => { unreachable!("BUG: must set this handle to be stable to avoid slots to be cleared/changed") } } } } } /// Similar to `.load_pack()`, but for entire indices, bypassing the index entirely and going solely by marker and id. /// Returns `None` if the index wasn't available anymore or could otherwise not be loaded, which can be considered a bug /// as we should always keep needed indices available. pub(crate) fn index_by_id(&self, id: types::PackId, marker: types::SlotIndexMarker) -> Option { let slot = self.files.get(id.index)?; // Pin this value before we check the generation to avoid seeing something newer later. let slot_files = &**slot.files.load(); if slot.generation.load(Ordering::SeqCst) > marker.generation { // This means somebody just overwrote our trashed slot with a new (or about to be stored) index, which means the slot isn't // what we need it to be. // This shouldn't as we won't overwrite slots while handles need stable indices. return None; } let lookup = match (slot_files).as_ref()? { types::IndexAndPacks::Index(bundle) => handle::SingleOrMultiIndex::Single { index: bundle.index.loaded()?.clone(), data: bundle.data.loaded().cloned(), }, types::IndexAndPacks::MultiIndex(multi) => handle::SingleOrMultiIndex::Multi { index: multi.multi_index.loaded()?.clone(), data: multi.data.iter().map(|f| f.loaded().cloned()).collect(), }, }; handle::IndexLookup { id: id.index, file: lookup, } .into() } } gix-odb-0.66.0/src/store_impls/dynamic/metrics.rs000064400000000000000000000056571046102023000200520ustar 00000000000000use std::sync::atomic::Ordering; use crate::store::{types, types::IndexAndPacks}; impl super::Store { /// Return metrics collected in a racy fashion, giving an idea of what's currently going on in the store. /// /// Use this to decide whether a new instance should be created to get a chance at dropping all open handles. pub fn metrics(&self) -> types::Metrics { let mut open_packs = 0; let mut open_indices = 0; let mut known_packs = 0; let mut known_indices = 0; let mut unused_slots = 0; let mut unreachable_indices = 0; let mut unreachable_packs = 0; let index = self.index.load(); for f in index.slot_indices.iter().map(|idx| &self.files[*idx]) { match &**f.files.load() { Some(IndexAndPacks::Index(bundle)) => { if bundle.index.is_loaded() { open_indices += 1; } known_indices += 1; if bundle.data.is_loaded() { open_packs += 1; } known_packs += 1; } Some(IndexAndPacks::MultiIndex(multi)) => { if multi.multi_index.is_loaded() { open_indices += 1; } known_indices += 1; for pack in &multi.data { if pack.is_loaded() { open_packs += 1; } known_packs += 1; } } None => {} } } for slot in &self.files { match slot.files.load().as_ref() { None => { unused_slots += 1; } Some(bundle) => { if bundle.is_disposable() { unreachable_indices += 1; unreachable_packs += match bundle { IndexAndPacks::Index(single) => usize::from(single.data.is_loaded()), IndexAndPacks::MultiIndex(multi) => { multi.data.iter().map(|p| usize::from(p.is_loaded())).sum() } } } } } } types::Metrics { num_handles: self.num_handles_unstable.load(Ordering::Relaxed) + self.num_handles_stable.load(Ordering::Relaxed), num_refreshes: self.num_disk_state_consolidation.load(Ordering::Relaxed), open_reachable_packs: open_packs, open_reachable_indices: open_indices, known_reachable_indices: known_indices, known_packs, unused_slots, loose_dbs: index.loose_dbs.len(), unreachable_indices, unreachable_packs, } } } gix-odb-0.66.0/src/store_impls/dynamic/mod.rs000064400000000000000000000045521046102023000171540ustar 00000000000000//! The standard object store which should fit all needs. use std::{cell::RefCell, ops::Deref}; use gix_features::zlib; use crate::Store; /// This effectively acts like a handle but exists to be usable from the actual `crate::Handle` implementation which adds caches on top. /// Each store is quickly cloned and contains thread-local state for shared packs. pub struct Handle where S: Deref + Clone, { pub(crate) store: S, /// Defines what happens when there is no more indices to load. pub refresh: RefreshMode, /// The maximum recursion depth for resolving ref-delta base objects, that is objects referring to other objects within /// a pack. /// Recursive loops are possible only in purposefully crafted packs. /// This value doesn't have to be huge as in typical scenarios, these kind of objects are rare and chains supposedly are /// even more rare. pub max_recursion_depth: usize, /// If true, replacements will not be performed even if these are available. pub ignore_replacements: bool, pub(crate) token: Option, snapshot: RefCell, inflate: RefCell, packed_object_count: RefCell>, } /// Decide what happens when all indices are loaded. #[derive(Default, Clone, Copy)] pub enum RefreshMode { /// Check for new or changed pack indices (and pack data files) when the last known index is loaded. /// During runtime we will keep pack indices stable by never reusing them, however, there is the option for /// clearing internal caches which is likely to change pack ids and it will trigger unloading of packs as they are missing on disk. #[default] AfterAllIndicesLoaded, /// Use this if you expect a lot of missing objects that shouldn't trigger refreshes even after all packs are loaded. /// This comes at the risk of not learning that the packs have changed in the mean time. Never, } impl RefreshMode { /// Set this refresh mode to never refresh. pub fn never(&mut self) { *self = RefreshMode::Never; } } /// pub mod find; /// pub mod prefix; mod header; /// pub mod iter; /// pub mod write; /// pub mod init; pub(crate) mod types; pub use types::Metrics; pub(crate) mod handle; /// pub mod load_index; /// pub mod verify; mod load_one; mod metrics; mod access; /// pub mod structure; gix-odb-0.66.0/src/store_impls/dynamic/prefix.rs000064400000000000000000000173711046102023000176750ustar 00000000000000use std::{collections::HashSet, ops::Deref}; use gix_object::Exists; use crate::store::{load_index, Handle}; /// pub mod lookup { use crate::loose; /// Returned by [`Handle::lookup_prefix()`][crate::store::Handle::lookup_prefix()] #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum Error { #[error("An error occurred looking up a prefix which requires iteration")] LooseWalkDir(#[from] loose::iter::Error), #[error(transparent)] LoadIndex(#[from] crate::store::load_index::Error), } /// A way to indicate if a lookup, despite successful, was ambiguous or yielded exactly /// one result in the particular index. pub type Outcome = Result; } /// pub mod disambiguate { /// A potentially ambiguous prefix for use with `Handle::disambiguate_prefix()`. #[derive(Debug, Copy, Clone)] pub struct Candidate { id: gix_hash::ObjectId, hex_len: usize, } impl Candidate { /// Create a new potentially ambiguous prefix from an `id` and the desired minimal `hex_len`. /// /// It is considered ambiguous until it's disambiguated by validating that there is only a single object /// matching this prefix. pub fn new(id: impl Into, hex_len: usize) -> Result { let id = id.into(); gix_hash::Prefix::new(&id, hex_len)?; Ok(Candidate { id, hex_len }) } /// Transform ourselves into a `Prefix` with our current hex lengths. pub fn to_prefix(&self) -> gix_hash::Prefix { gix_hash::Prefix::new(&self.id, self.hex_len).expect("our hex-len to always be in bounds") } pub(crate) fn inc_hex_len(&mut self) { self.hex_len += 1; assert!(self.hex_len <= self.id.kind().len_in_hex()); } pub(crate) fn id(&self) -> &gix_hash::oid { &self.id } pub(crate) fn hex_len(&self) -> usize { self.hex_len } } /// Returned by [`Handle::disambiguate_prefix()`][crate::store::Handle::disambiguate_prefix()] #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum Error { #[error("An error occurred while trying to determine if a full hash contained in the object database")] Contains(#[from] crate::store::find::Error), #[error(transparent)] Lookup(#[from] super::lookup::Error), } } impl Handle where S: Deref + Clone, { /// Return the exact number of packed objects after loading all currently available indices /// as last seen on disk. pub fn packed_object_count(&self) -> Result { let mut count = self.packed_object_count.borrow_mut(); match *count { Some(count) => Ok(count), None => { let _span = gix_features::trace::detail!("gix_odb::Handle::packed_object_count()"); let mut snapshot = self.snapshot.borrow_mut(); *snapshot = self.store.load_all_indices()?; let mut obj_count = 0; for index in &snapshot.indices { obj_count += u64::from(index.num_objects()); } *count = Some(obj_count); Ok(obj_count) } } } /// Given a prefix `candidate` with an object id and an initial `hex_len`, check if it only matches a single /// object within the entire object database and increment its `hex_len` by one until it is unambiguous. /// Return `Ok(None)` if no object with that prefix exists. pub fn disambiguate_prefix( &self, mut candidate: disambiguate::Candidate, ) -> Result, disambiguate::Error> { let max_hex_len = candidate.id().kind().len_in_hex(); if candidate.hex_len() == max_hex_len { return Ok(self.exists(candidate.id()).then(|| candidate.to_prefix())); } while candidate.hex_len() != max_hex_len { let res = self.lookup_prefix(candidate.to_prefix(), None)?; match res { Some(Ok(_id)) => return Ok(Some(candidate.to_prefix())), Some(Err(())) => { candidate.inc_hex_len(); continue; } None => return Ok(None), } } Ok(Some(candidate.to_prefix())) } /// Find the only object matching `prefix` and return it as `Ok(Some(Ok()))`, or return `Ok(Some(Err(()))` /// if multiple different objects with the same prefix were found. /// /// Return `Ok(None)` if no object matched the `prefix`. /// /// Pass `candidates` to obtain the set of all object ids matching `prefix`, with the same return value as /// one would have received if it remained `None`. /// /// ### Performance Note /// /// - Unless the handles refresh mode is set to `Never`, each lookup will trigger a refresh of the object databases files /// on disk if the prefix doesn't lead to ambiguous results. /// - Since all objects need to be examined to assure non-ambiguous return values, after calling this method all indices will /// be loaded. /// - If `candidates` is `Some(…)`, the traversal will continue to obtain all candidates, which takes more time /// as there is no early abort. pub fn lookup_prefix( &self, prefix: gix_hash::Prefix, mut candidates: Option<&mut HashSet>, ) -> Result, lookup::Error> { let mut candidate: Option = None; loop { let snapshot = self.snapshot.borrow(); for index in &snapshot.indices { #[allow(clippy::needless_option_as_deref)] // needed as it's the equivalent of a reborrow. let lookup_result = index.lookup_prefix(prefix, candidates.as_deref_mut()); if candidates.is_none() && !check_candidate(lookup_result, &mut candidate) { return Ok(Some(Err(()))); } } for lodb in snapshot.loose_dbs.iter() { #[allow(clippy::needless_option_as_deref)] // needed as it's the equivalent of a reborrow. let lookup_result = lodb.lookup_prefix(prefix, candidates.as_deref_mut())?; if candidates.is_none() && !check_candidate(lookup_result, &mut candidate) { return Ok(Some(Err(()))); } } match self.store.load_one_index(self.refresh, snapshot.marker)? { Some(new_snapshot) => { drop(snapshot); *self.snapshot.borrow_mut() = new_snapshot; } None => { return match &candidates { Some(candidates) => match candidates.len() { 0 => Ok(None), 1 => Ok(candidates.iter().copied().next().map(Ok)), _ => Ok(Some(Err(()))), }, None => Ok(candidate.map(Ok)), }; } } } fn check_candidate(lookup_result: Option, candidate: &mut Option) -> bool { match (lookup_result, &*candidate) { (Some(Ok(oid)), Some(candidate)) if *candidate != oid => false, (Some(Ok(_)) | None, Some(_)) | (None, None) => true, (Some(Err(())), _) => false, (Some(Ok(oid)), None) => { *candidate = Some(oid); true } } } } } gix-odb-0.66.0/src/store_impls/dynamic/structure.rs000064400000000000000000000106131046102023000204300ustar 00000000000000use std::path::PathBuf; use crate::{store::load_index, types::IndexAndPacks, Store}; /// A record of a structural element of an object database. #[derive(Debug, Clone, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub enum Record { /// A loose object database. LooseObjectDatabase { /// The root of the object database. objects_directory: PathBuf, /// The amount of object files. num_objects: usize, }, /// A pack index file Index { /// The location of the index file, path: PathBuf, /// Whether or not the index is mapped into memory. state: IndexState, }, /// A multi-index file MultiIndex { /// The location of the multi-index file, path: PathBuf, /// Whether or not the index is mapped into memory. state: IndexState, }, /// An empty slot was encountered, this is possibly happening as the ODB changes during query with /// a file being removed. Empty, } #[derive(Debug, Clone, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] /// Possible stats of pack indices. pub enum IndexState { /// The index is active in memory because a mapping exists. Loaded, /// The index couldn't be unloaded as it was still in use, but that can happen another time. Disposable, /// The index isn't loaded/memory mapped. Unloaded, } impl Store { /// Return information about all files known to us as well as their loading state. /// /// Note that this call is expensive as it gathers additional information about loose object databases. /// Note that it may change as we collect information due to the highly volatile nature of the /// implementation. The likelihood of actual changes is low though as these still depend on something /// changing on disk and somebody reading at the same time. pub fn structure(&self) -> Result, load_index::Error> { let _span = gix_features::trace::detail!("gix_odb::Store::structure()"); let index = self.index.load(); if !index.is_initialized() { self.consolidate_with_disk_state(true, false /*load one new index*/)?; } let index = self.index.load(); let mut res: Vec<_> = index .loose_dbs .iter() .map(|db| Record::LooseObjectDatabase { objects_directory: db.path.clone(), num_objects: db.iter().count(), }) .collect(); for slot in index.slot_indices.iter().map(|idx| &self.files[*idx]) { let files = slot.files.load(); let record = match &**files { Some(index) => { let state = if index.is_disposable() { IndexState::Disposable } else if index.index_is_loaded() { IndexState::Loaded } else { IndexState::Unloaded }; match index { IndexAndPacks::Index(b) => Record::Index { path: b.index.path().into(), state, }, IndexAndPacks::MultiIndex(b) => Record::MultiIndex { path: b.multi_index.path().into(), state, }, } } None => Record::Empty, }; res.push(record); } Ok(res) } /// Provide a list of all `objects` directories of `alternate` object database paths. /// This list might be empty if there are no alternates. /// /// Read more about alternates in the documentation of the [`resolve`][crate::alternate::resolve()] function. pub fn alternate_db_paths(&self) -> Result, load_index::Error> { let index = self.index.load(); if !index.is_initialized() { self.consolidate_with_disk_state(true, false /*load one new index*/)?; } let index = self.index.load(); Ok(index .loose_dbs .iter() .skip( 1, /* first odb is always the primary one, all the follows is alternates */ ) .map(|db| db.path.clone()) .collect()) } } gix-odb-0.66.0/src/store_impls/dynamic/types.rs000064400000000000000000000435161046102023000175440ustar 00000000000000use std::{ path::{Path, PathBuf}, sync::{ atomic::{AtomicU16, AtomicU32, AtomicUsize, Ordering}, Arc, }, time::SystemTime, }; use arc_swap::ArcSwap; use gix_features::hash; /// An id to refer to an index file or a multipack index file pub type IndexId = usize; pub(crate) type StateId = u32; pub(crate) type Generation = u32; pub(crate) type AtomicGeneration = AtomicU32; /// A way to indicate which pack indices we have seen already and which of them are loaded, along with an idea /// of whether stored `PackId`s are still usable. #[derive(Default, Copy, Clone, Debug)] pub struct SlotIndexMarker { /// The generation the `loaded_until_index` belongs to. Indices of different generations are completely incompatible. /// This value changes once the internal representation is compacted, something that may happen only if there is no handle /// requiring stable pack indices. pub(crate) generation: Generation, /// A unique id identifying the index state as well as all loose databases we have last observed. /// If it changes in any way, the value is different. pub(crate) state_id: StateId, } /// A way to load and refer to a pack uniquely, namespaced by their indexing mechanism, aka multi-pack or not. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub struct PackId { /// This is the index in the slot map at which the packs index is located. pub(crate) index: IndexId, /// If the pack is in a multi-pack index, this additional index is the pack-index within the multi-pack index identified by `index`. pub(crate) multipack_index: Option, } impl PackId { /// Returns the maximum of indices we can represent. pub(crate) const fn max_indices() -> usize { (1 << 15) - 1 } /// Returns the maximum of packs we can represent if stored in a multi-index. pub(crate) const fn max_packs_in_multi_index() -> gix_pack::multi_index::PackIndex { (1 << 16) - 1 } /// Packs have a built-in identifier to make data structures simpler, and this method represents ourselves as such id /// to be convertible back and forth. We essentially compress ourselves into a u32. /// /// Bit 16 is a marker to tell us if it's a multi-pack or not, the ones before are the index file itself, the ones after /// are used to encode the pack index within the multi-pack. pub(crate) fn to_intrinsic_pack_id(self) -> gix_pack::data::Id { assert!(self.index < (1 << 15), "There shouldn't be more than 2^15 indices"); match self.multipack_index { None => self.index as gix_pack::data::Id, Some(midx) => { assert!( midx <= Self::max_packs_in_multi_index(), "There shouldn't be more than 2^16 packs per multi-index" ); ((self.index as gix_pack::data::Id | 1 << 15) | midx << 16) as gix_pack::data::Id } } } pub(crate) fn from_intrinsic_pack_id(pack_id: gix_pack::data::Id) -> Self { if pack_id & (1 << 15) == 0 { PackId { index: (pack_id & 0x7fff) as IndexId, multipack_index: None, } } else { PackId { index: (pack_id & 0x7fff) as IndexId, multipack_index: Some(pack_id >> 16), } } } } /// An index that changes only if the packs directory changes and its contents is re-read. #[derive(Default)] pub struct SlotMapIndex { /// The index into the slot map at which we expect an index or pack file. Neither of these might be loaded yet. pub(crate) slot_indices: Vec, /// A list of loose object databases as resolved by their alternates file in the `object_directory`. The first entry is this objects /// directory loose file database. All other entries are the loose stores of alternates. /// It's in an Arc to be shared to Handles, but not to be shared across SlotMapIndices. pub(crate) loose_dbs: Arc>, /// A static value that doesn't ever change for a particular clone of this index. pub(crate) generation: Generation, /// The number of indices loaded thus far when the index of the slot map was last examined, which can change as new indices are loaded /// in parallel. /// Shared across SlotMapIndex instances of the same generation. pub(crate) next_index_to_load: Arc, /// Incremented by one up to `slot_indices.len()` once an attempt to load an index completed. /// If a load failed, there will also be an increment. /// Shared across SlotMapIndex instances of the same generation. pub(crate) loaded_indices: Arc, /// The amount of indices that are currently being loaded. /// Zero if no loading operation is currently happening, or more otherwise. pub(crate) num_indices_currently_being_loaded: Arc, } impl SlotMapIndex { pub(crate) fn state_id(self: &Arc) -> StateId { // We let the loaded indices take part despite not being part of our own snapshot. // This is to account for indices being loaded in parallel without actually changing the snapshot itself. let hash = hash::crc32(&(Arc::as_ptr(self) as usize).to_be_bytes()); hash::crc32_update(hash, &self.loaded_indices.load(Ordering::SeqCst).to_be_bytes()) } pub(crate) fn marker(self: &Arc) -> SlotIndexMarker { SlotIndexMarker { generation: self.generation, state_id: self.state_id(), } } /// Returns true if we already know at least one loose object db, a sign of being initialized pub(crate) fn is_initialized(&self) -> bool { !self.loose_dbs.is_empty() } } #[derive(Clone)] pub(crate) struct OnDiskFile { /// The last known path of the file path: Arc, /// the time the file was last modified mtime: SystemTime, state: OnDiskFileState, } #[derive(Clone)] pub(crate) enum OnDiskFileState { /// The file is on disk and can be loaded from there. Unloaded, Loaded(T), /// The file was loaded, but appeared to be missing on disk after reconciling our state with what's on disk. /// As there were handles that required pack-id stability we had to keep the item to allow finding it on later /// lookups. Garbage(T), /// File is missing on disk and could not be loaded when we tried or turned missing after reconciling our state. Missing, } impl OnDiskFile { pub fn path(&self) -> &Path { &self.path } /// Return true if we hold a memory map of the file already. pub fn is_loaded(&self) -> bool { matches!(self.state, OnDiskFileState::Loaded(_) | OnDiskFileState::Garbage(_)) } /// Return true if we are to be collected as garbage pub fn is_disposable(&self) -> bool { matches!(self.state, OnDiskFileState::Garbage(_) | OnDiskFileState::Missing) } // On error, always declare the file missing and return an error. pub(crate) fn load_strict(&mut self, load: impl FnOnce(&Path) -> std::io::Result) -> std::io::Result<()> { use OnDiskFileState::*; match self.state { Unloaded | Missing => match load(&self.path) { Ok(v) => { self.state = Loaded(v); Ok(()) } Err(err) => { // TODO: Should be provide more information? We don't even know what exactly failed right now, degenerating information. self.state = Missing; Err(err) } }, Loaded(_) | Garbage(_) => Ok(()), } } /// If the file is missing, we don't consider this failure but instead return Ok(None) to allow recovery. /// when we know that loading is necessary. This also works around borrow check, which is a nice coincidence. pub fn load_with_recovery(&mut self, load: impl FnOnce(&Path) -> std::io::Result) -> std::io::Result> { use OnDiskFileState::*; match &mut self.state { Loaded(v) | Garbage(v) => Ok(Some(v.clone())), Missing => Ok(None), Unloaded => match load(&self.path) { Ok(v) => { self.state = OnDiskFileState::Loaded(v.clone()); Ok(Some(v)) } Err(err) if err.kind() == std::io::ErrorKind::NotFound => { self.state = OnDiskFileState::Missing; Ok(None) } Err(err) => Err(err), }, } } pub fn loaded(&self) -> Option<&T> { use OnDiskFileState::*; match &self.state { Loaded(v) | Garbage(v) => Some(v), Unloaded | Missing => None, } } pub fn put_back(&mut self) { match std::mem::replace(&mut self.state, OnDiskFileState::Missing) { OnDiskFileState::Garbage(v) => self.state = OnDiskFileState::Loaded(v), OnDiskFileState::Missing => self.state = OnDiskFileState::Unloaded, other @ (OnDiskFileState::Loaded(_) | OnDiskFileState::Unloaded) => self.state = other, } } pub fn trash(&mut self) { match std::mem::replace(&mut self.state, OnDiskFileState::Missing) { OnDiskFileState::Loaded(v) => self.state = OnDiskFileState::Garbage(v), other @ (OnDiskFileState::Garbage(_) | OnDiskFileState::Unloaded | OnDiskFileState::Missing) => { self.state = other; } } } } #[derive(Clone)] pub(crate) struct IndexFileBundle { pub index: OnDiskFile>, pub data: OnDiskFile>, } #[derive(Clone)] pub(crate) struct MultiIndexFileBundle { pub multi_index: OnDiskFile>, pub data: Vec>>, } #[derive(Clone)] pub(crate) enum IndexAndPacks { Index(IndexFileBundle), /// Note that there can only be one multi-pack file per repository, but thanks to git alternates, there can be multiple overall. MultiIndex(MultiIndexFileBundle), } impl IndexAndPacks { pub(crate) fn index_path(&self) -> &Path { match self { IndexAndPacks::Index(index) => &index.index.path, IndexAndPacks::MultiIndex(index) => &index.multi_index.path, } } pub(crate) fn mtime(&self) -> SystemTime { match self { IndexAndPacks::Index(index) => index.index.mtime, IndexAndPacks::MultiIndex(index) => index.multi_index.mtime, } } /// If we are garbage, put ourselves into the loaded state. Otherwise, put ourselves back to unloaded. pub(crate) fn put_back(&mut self) { match self { IndexAndPacks::Index(bundle) => { bundle.index.put_back(); bundle.data.put_back(); } IndexAndPacks::MultiIndex(bundle) => { bundle.multi_index.put_back(); for data in &mut bundle.data { data.put_back(); } } } } // The inverse of `put_back()`, by trashing the content. pub(crate) fn trash(&mut self) { match self { IndexAndPacks::Index(bundle) => { bundle.index.trash(); bundle.data.trash(); } IndexAndPacks::MultiIndex(bundle) => { bundle.multi_index.trash(); for data in &mut bundle.data { data.trash(); } } } } pub(crate) fn index_is_loaded(&self) -> bool { match self { Self::Index(bundle) => bundle.index.is_loaded(), Self::MultiIndex(bundle) => bundle.multi_index.is_loaded(), } } pub(crate) fn is_disposable(&self) -> bool { match self { Self::Index(bundle) => bundle.index.is_disposable() || bundle.data.is_disposable(), Self::MultiIndex(bundle) => { bundle.multi_index.is_disposable() || bundle.data.iter().any(OnDiskFile::is_disposable) } } } pub(crate) fn load_index(&mut self, object_hash: gix_hash::Kind) -> std::io::Result<()> { match self { IndexAndPacks::Index(bundle) => bundle.index.load_strict(|path| { gix_pack::index::File::at(path, object_hash) .map(Arc::new) .map_err(|err| match err { gix_pack::index::init::Error::Io { source, .. } => source, err => std::io::Error::new(std::io::ErrorKind::Other, err), }) }), IndexAndPacks::MultiIndex(bundle) => { bundle.multi_index.load_strict(|path| { gix_pack::multi_index::File::at(path) .map(Arc::new) .map_err(|err| match err { gix_pack::multi_index::init::Error::Io { source, .. } => source, err => std::io::Error::new(std::io::ErrorKind::Other, err), }) })?; if let Some(multi_index) = bundle.multi_index.loaded() { bundle.data = Self::index_names_to_pack_paths(multi_index); } Ok(()) } } } pub(crate) fn new_single(index_path: PathBuf, mtime: SystemTime) -> Self { let data_path = index_path.with_extension("pack"); Self::Index(IndexFileBundle { index: OnDiskFile { path: index_path.into(), state: OnDiskFileState::Unloaded, mtime, }, data: OnDiskFile { path: data_path.into(), state: OnDiskFileState::Unloaded, mtime, }, }) } pub(crate) fn new_multi_from_open_file(multi_index: Arc, mtime: SystemTime) -> Self { let data = Self::index_names_to_pack_paths(&multi_index); Self::MultiIndex(MultiIndexFileBundle { multi_index: OnDiskFile { path: Arc::new(multi_index.path().to_owned()), state: OnDiskFileState::Loaded(multi_index), mtime, }, data, }) } fn index_names_to_pack_paths( multi_index: &gix_pack::multi_index::File, ) -> Vec>> { let parent_dir = multi_index.path().parent().expect("parent present"); let data = multi_index .index_names() .iter() .map(|idx| OnDiskFile { path: parent_dir.join(idx.with_extension("pack")).into(), state: OnDiskFileState::Unloaded, mtime: SystemTime::UNIX_EPOCH, }) .collect(); data } } #[derive(Default)] pub(crate) struct MutableIndexAndPack { pub(crate) files: ArcSwap>, pub(crate) write: parking_lot::Mutex<()>, /// The generation required at least to read this slot. If these mismatch, the caller is likely referring to a now changed slot /// that has different content under the same id. /// Must only be changed when the write lock is held. pub(crate) generation: AtomicGeneration, } /// A snapshot about resource usage. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Metrics { /// The total amount of handles which can be used to access object information. pub num_handles: usize, /// The amount of refreshes performed to reconcile with the ODB state on disk. pub num_refreshes: usize, /// The amount of indices that are currently open and will be returned to handles. pub open_reachable_indices: usize, /// The amount of reachable, known indices, which aren't opened yet. pub known_reachable_indices: usize, /// The amount of packs which are open in memory and will be returned to handles. pub open_reachable_packs: usize, /// The amount of packs that are reachable and will be returned to handles. They aren't open yet. pub known_packs: usize, /// The amount of slots which are empty. /// /// Over time these will fill, but they can be emptied as files are removed from disk. pub unused_slots: usize, /// Unreachable indices are still using slots, but aren't returned to new handles anymore unless they still happen to /// know their id. /// /// This allows to keep files available while they are still potentially required for operations like pack generation, despite /// the file on disk being removed or changed. pub unreachable_indices: usize, /// Equivalent to `unreachable_indices`, but for mapped packed data files pub unreachable_packs: usize, /// The amount of loose object databases currently available for object retrieval. /// /// There may be more than one if 'alternates' are used. pub loose_dbs: usize, } #[cfg(test)] mod tests { use super::*; mod pack_id { use super::PackId; #[test] fn to_intrinsic_roundtrip() { let single = PackId { index: (1 << 15) - 1, multipack_index: None, }; let multi = PackId { index: (1 << 15) - 1, multipack_index: Some((1 << 16) - 1), }; assert_eq!(PackId::from_intrinsic_pack_id(single.to_intrinsic_pack_id()), single); assert_eq!(PackId::from_intrinsic_pack_id(multi.to_intrinsic_pack_id()), multi); } #[test] #[should_panic] fn max_supported_index_count() { PackId { index: 1 << 15, multipack_index: None, } .to_intrinsic_pack_id(); } } } gix-odb-0.66.0/src/store_impls/dynamic/verify.rs000064400000000000000000000273671046102023000177120ustar 00000000000000use std::{ ops::Deref, sync::atomic::{AtomicBool, Ordering}, time::Instant, }; use gix_features::progress::{DynNestedProgress, MessageLevel, Progress}; use crate::{ pack, store::verify::integrity::{IndexStatistics, SingleOrMultiStatistics}, types::IndexAndPacks, }; /// pub mod integrity { use std::{marker::PhantomData, path::PathBuf}; use crate::pack; /// Options for use in [`Store::verify_integrity()`][crate::Store::verify_integrity()]. pub type Options = pack::index::verify::integrity::Options; /// Returned by [`Store::verify_integrity()`][crate::Store::verify_integrity()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error(transparent)] MultiIndexIntegrity(#[from] pack::index::traverse::Error), #[error(transparent)] IndexIntegrity(#[from] pack::index::traverse::Error), #[error(transparent)] IndexOpen(#[from] pack::index::init::Error), #[error(transparent)] LooseObjectStoreIntegrity(#[from] crate::loose::verify::integrity::Error), #[error(transparent)] MultiIndexOpen(#[from] pack::multi_index::init::Error), #[error(transparent)] PackOpen(#[from] pack::data::init::Error), #[error(transparent)] InitializeODB(#[from] crate::store::load_index::Error), #[error("The disk on state changed while performing the operation, and we observed the change.")] NeedsRetryDueToChangeOnDisk, } #[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] /// Integrity information about loose object databases pub struct LooseObjectStatistics { /// The path to the root directory of the loose objects database pub path: PathBuf, /// The statistics created after verifying the loose object database. pub statistics: crate::loose::verify::integrity::Statistics, } #[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] /// Traversal statistics of packs governed by single indices or multi-pack indices. #[allow(missing_docs)] pub enum SingleOrMultiStatistics { Single(pack::index::traverse::Statistics), Multi(Vec<(PathBuf, pack::index::traverse::Statistics)>), } /// Statistics gathered when traversing packs of various kinds of indices. #[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct IndexStatistics { /// The path to the index or multi-pack index for which statistics were gathered. pub path: PathBuf, /// The actual statistics for the index at `path`. pub statistics: SingleOrMultiStatistics, } /// Returned by [`Store::verify_integrity()`][crate::Store::verify_integrity()]. pub struct Outcome { /// Statistics for validated loose object stores. pub loose_object_stores: Vec, /// Pack traversal statistics for each index and their pack(s) pub index_statistics: Vec, } /// The progress ids used in [`Store::verify_integrity()`][crate::Store::verify_integrity()]. /// /// Use this information to selectively extract the progress of interest in case the parent application has custom visualization. #[derive(Debug, Copy, Clone)] pub enum ProgressId { /// Contains the path of the currently validated loose object database. VerifyLooseObjectDbPath, /// The root progress for all verification of an index. It doesn't contain any useful information itself. VerifyIndex(PhantomData), /// The root progress for all verification of a multi-index. It doesn't contain any useful information itself. VerifyMultiIndex(PhantomData), } impl From for gix_features::progress::Id { fn from(v: ProgressId) -> Self { match v { ProgressId::VerifyLooseObjectDbPath => *b"VISP", ProgressId::VerifyMultiIndex(_) => *b"VIMI", ProgressId::VerifyIndex(_) => *b"VISI", } } } } impl super::Store { /// Check the integrity of all objects as per the given `options`. /// /// Note that this will not force loading all indices or packs permanently, as we will only use the momentarily loaded disk state. /// This does, however, include all alternates. pub fn verify_integrity( &self, progress: &mut dyn DynNestedProgress, should_interrupt: &AtomicBool, options: integrity::Options, ) -> Result where C: pack::cache::DecodeEntry, F: Fn() -> C + Send + Clone, { let _span = gix_features::trace::coarse!("gix_odb:Store::verify_integrity()"); let mut index = self.index.load(); if !index.is_initialized() { self.consolidate_with_disk_state(true, false)?; index = self.index.load(); assert!( index.is_initialized(), "BUG: after consolidating successfully, we have an initialized index" ); } progress.init( Some(index.slot_indices.len()), gix_features::progress::count("pack indices"), ); let mut statistics = Vec::new(); let index_check_message = |path: &std::path::Path| { format!( "Checking integrity: {}", path.file_name() .map_or_else(Default::default, std::ffi::OsStr::to_string_lossy) ) }; gix_features::trace::detail!("verify indices").into_scope(|| { for slot_index in &index.slot_indices { let slot = &self.files[*slot_index]; if slot.generation.load(Ordering::SeqCst) != index.generation { return Err(integrity::Error::NeedsRetryDueToChangeOnDisk); } let files = slot.files.load(); let files = Option::as_ref(&files).ok_or(integrity::Error::NeedsRetryDueToChangeOnDisk)?; let start = Instant::now(); let (mut child_progress, num_objects, index_path) = match files { IndexAndPacks::Index(bundle) => { let index; let index = match bundle.index.loaded() { Some(index) => index.deref(), None => { index = pack::index::File::at(bundle.index.path(), self.object_hash)?; &index } }; let pack; let data = match bundle.data.loaded() { Some(pack) => pack.deref(), None => { pack = pack::data::File::at(bundle.data.path(), self.object_hash)?; &pack } }; let mut child_progress = progress.add_child_with_id( "verify index".into(), integrity::ProgressId::VerifyIndex(Default::default()).into(), ); let outcome = index.verify_integrity( Some(pack::index::verify::PackContext { data, options: options.clone(), }), &mut child_progress, should_interrupt, )?; statistics.push(IndexStatistics { path: bundle.index.path().to_owned(), statistics: SingleOrMultiStatistics::Single( outcome .pack_traverse_statistics .expect("pack provided so there are stats"), ), }); (child_progress, index.num_objects(), index.path().to_owned()) } IndexAndPacks::MultiIndex(bundle) => { let index; let index = match bundle.multi_index.loaded() { Some(index) => index.deref(), None => { index = pack::multi_index::File::at(bundle.multi_index.path())?; &index } }; let mut child_progress = progress.add_child_with_id( "verify multi-index".into(), integrity::ProgressId::VerifyMultiIndex(Default::default()).into(), ); let outcome = index.verify_integrity(&mut child_progress, should_interrupt, options.clone())?; let index_dir = bundle.multi_index.path().parent().expect("file in a directory"); statistics.push(IndexStatistics { path: Default::default(), statistics: SingleOrMultiStatistics::Multi( outcome .pack_traverse_statistics .into_iter() .zip(index.index_names()) .map(|(statistics, index_name)| (index_dir.join(index_name), statistics)) .collect(), ), }); (child_progress, index.num_objects(), index.path().to_owned()) } }; child_progress.set_name(index_check_message(&index_path)); child_progress.show_throughput_with( start, num_objects as usize, gix_features::progress::count("objects").expect("set"), MessageLevel::Success, ); progress.inc(); } Ok(()) })?; progress.init( Some(index.loose_dbs.len()), gix_features::progress::count("loose object stores"), ); let mut loose_object_stores = Vec::new(); gix_features::trace::detail!("verify loose ODBs").into_scope( || -> Result<_, crate::loose::verify::integrity::Error> { for loose_db in &*index.loose_dbs { let out = loose_db .verify_integrity( &mut progress.add_child_with_id( loose_db.path().display().to_string(), integrity::ProgressId::VerifyLooseObjectDbPath.into(), ), should_interrupt, ) .map(|statistics| integrity::LooseObjectStatistics { path: loose_db.path().to_owned(), statistics, })?; loose_object_stores.push(out); } Ok(()) }, )?; Ok(integrity::Outcome { loose_object_stores, index_statistics: statistics, }) } } gix-odb-0.66.0/src/store_impls/dynamic/write.rs000064400000000000000000000027021046102023000175220ustar 00000000000000use std::{io::Read, ops::Deref}; use gix_hash::ObjectId; use gix_object::Kind; use crate::store; mod error { use crate::{loose, store}; /// The error returned by the [dynamic Store's][crate::Store] [`Write`](gix_object::Write) implementation. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error(transparent)] LoadIndex(#[from] store::load_index::Error), #[error(transparent)] LooseWrite(#[from] loose::write::Error), #[error(transparent)] Io(#[from] std::io::Error), } } pub use error::Error; use crate::store_impls::dynamic; impl gix_object::Write for store::Handle where S: Deref + Clone, { fn write_stream(&self, kind: Kind, size: u64, from: &mut dyn Read) -> Result { let mut snapshot = self.snapshot.borrow_mut(); Ok(match snapshot.loose_dbs.first() { Some(ldb) => ldb.write_stream(kind, size, from)?, None => { let new_snapshot = self .store .load_one_index(self.refresh, snapshot.marker) .map_err(Box::new)? .expect("there is always at least one ODB, and this code runs only once for initialization"); *snapshot = new_snapshot; snapshot.loose_dbs[0].write_stream(kind, size, from)? } }) } } gix-odb-0.66.0/src/store_impls/loose/find.rs000064400000000000000000000237411046102023000170130ustar 00000000000000use std::{cmp::Ordering, collections::HashSet, fs, io::Read, path::PathBuf}; use gix_features::zlib; use crate::store_impls::loose::{hash_path, Store, HEADER_MAX_SIZE}; /// Returned by [`Store::try_find()`] #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum Error { #[error("decompression of loose object at '{path}' failed")] DecompressFile { source: zlib::inflate::Error, path: PathBuf, }, #[error("file at '{path}' showed invalid size of inflated data, expected {expected}, got {actual}")] SizeMismatch { actual: u64, expected: u64, path: PathBuf }, #[error(transparent)] Decode(#[from] gix_object::decode::LooseHeaderDecodeError), #[error("Cannot store {size} in memory as it's not representable")] OutOfMemory { size: u64 }, #[error("Could not {action} data at '{path}'")] Io { source: std::io::Error, action: &'static str, path: PathBuf, }, } /// Object lookup impl Store { const OPEN_ACTION: &'static str = "open"; /// Returns true if the given id is contained in our repository. pub fn contains(&self, id: &gix_hash::oid) -> bool { debug_assert_eq!(self.object_hash, id.kind()); hash_path(id, self.path.clone()).is_file() } /// Given a `prefix`, find an object that matches it uniquely within this loose object /// database as `Ok(Some(Ok()))`. /// If there is more than one object matching the object `Ok(Some(Err(()))` is returned. /// /// Finally, if no object matches, the return value is `Ok(None)`. /// /// The outer `Result` is to indicate errors during file system traversal. /// /// Pass `candidates` to obtain the set of all object ids matching `prefix`, with the same return value as /// one would have received if it remained `None`. pub fn lookup_prefix( &self, prefix: gix_hash::Prefix, mut candidates: Option<&mut HashSet>, ) -> Result, crate::loose::iter::Error> { let single_directory_iter = crate::loose::Iter { inner: gix_features::fs::walkdir_new( &self.path.join(prefix.as_oid().to_hex_with_len(2).to_string()), gix_features::fs::walkdir::Parallelism::Serial, false, ) .min_depth(1) .max_depth(1) .follow_links(false) .into_iter(), hash_hex_len: prefix.as_oid().kind().len_in_hex(), }; let mut candidate = None; for oid in single_directory_iter { let oid = match oid { Ok(oid) => oid, Err(err) => { return match err.io_error() { Some(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None), None | Some(_) => Err(err), } } }; if prefix.cmp_oid(&oid) == Ordering::Equal { match &mut candidates { Some(candidates) => { candidates.insert(oid); } None => { if candidate.is_some() { return Ok(Some(Err(()))); } candidate = Some(oid); } } } } match &mut candidates { Some(candidates) => match candidates.len() { 0 => Ok(None), 1 => Ok(candidates.iter().next().copied().map(Ok)), _ => Ok(Some(Err(()))), }, None => Ok(candidate.map(Ok)), } } /// Return the object identified by the given [`ObjectId`][gix_hash::ObjectId] if present in this database, /// writing its raw data into the given `out` buffer. /// /// Returns `Err` if there was an error locating or reading the object. Returns `Ok` if /// there was no such object. pub fn try_find<'a>( &self, id: &gix_hash::oid, out: &'a mut Vec, ) -> Result>, Error> { debug_assert_eq!(self.object_hash, id.kind()); match self.find_inner(id, out) { Ok(obj) => Ok(Some(obj)), Err(err) => match err { Error::Io { source: err, action, path, } => { if action == Self::OPEN_ACTION && err.kind() == std::io::ErrorKind::NotFound { Ok(None) } else { Err(Error::Io { source: err, action, path, }) } } err => Err(err), }, } } /// Return only the decompressed size of the object and its kind without fully reading it into memory as tuple of `(size, kind)`. /// Returns `None` if `id` does not exist in the database. pub fn try_header(&self, id: &gix_hash::oid) -> Result, Error> { const BUF_SIZE: usize = 256; let mut buf = [0_u8; BUF_SIZE]; let path = hash_path(id, self.path.clone()); let mut inflate = zlib::Inflate::default(); let mut istream = match fs::File::open(&path) { Ok(f) => f, Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(None), Err(err) => { return Err(Error::Io { source: err, action: Self::OPEN_ACTION, path, }) } }; let (compressed_buf, _) = buf.split_at_mut(BUF_SIZE - HEADER_MAX_SIZE); let bytes_read = istream.read(compressed_buf).map_err(|e| Error::Io { source: e, action: "read", path: path.to_owned(), })?; let (compressed_buf, header_buf) = buf.split_at_mut(bytes_read); let (status, _consumed_in, consumed_out) = inflate .once(compressed_buf, header_buf) .map_err(|e| Error::DecompressFile { source: e, path: path.to_owned(), })?; if status == zlib::Status::BufError { return Err(Error::DecompressFile { source: zlib::inflate::Error::Status(status), path, }); } let (kind, size, _header_size) = gix_object::decode::loose_header(&header_buf[..consumed_out])?; Ok(Some((size, kind))) } fn find_inner<'a>(&self, id: &gix_hash::oid, buf: &'a mut Vec) -> Result, Error> { let path = hash_path(id, self.path.clone()); let mut inflate = zlib::Inflate::default(); let ((status, consumed_in, consumed_out), bytes_read) = { let mut istream = fs::File::open(&path).map_err(|e| Error::Io { source: e, action: Self::OPEN_ACTION, path: path.to_owned(), })?; buf.clear(); let bytes_read = istream.read_to_end(buf).map_err(|e| Error::Io { source: e, action: "read", path: path.to_owned(), })?; buf.resize(bytes_read + HEADER_MAX_SIZE, 0); let (input, output) = buf.split_at_mut(bytes_read); ( inflate .once(&input[..bytes_read], output) .map_err(|e| Error::DecompressFile { source: e, path: path.to_owned(), })?, bytes_read, ) }; if status == zlib::Status::BufError { return Err(Error::DecompressFile { source: zlib::inflate::Error::Status(status), path, }); } let decompressed_start = bytes_read; let (kind, size, header_size) = gix_object::decode::loose_header(&buf[decompressed_start..decompressed_start + consumed_out])?; if status == zlib::Status::StreamEnd { let decompressed_body_bytes_sans_header = decompressed_start + header_size..decompressed_start + consumed_out; if consumed_out as u64 != size + header_size as u64 { return Err(Error::SizeMismatch { expected: size + header_size as u64, actual: consumed_out as u64, path, }); } buf.copy_within(decompressed_body_bytes_sans_header, 0); } else { let new_len = bytes_read as u64 + size + header_size as u64; buf.resize(new_len.try_into().map_err(|_| Error::OutOfMemory { size: new_len })?, 0); { let (input, output) = buf.split_at_mut(bytes_read); let num_decompressed_bytes = zlib::stream::inflate::read( &mut &input[consumed_in..], &mut inflate.state, &mut output[consumed_out..], ) .map_err(|e| Error::Io { source: e, action: "deflate", path: path.to_owned(), })?; if num_decompressed_bytes as u64 + consumed_out as u64 != size + header_size as u64 { return Err(Error::SizeMismatch { expected: size + header_size as u64, actual: num_decompressed_bytes as u64 + consumed_out as u64, path, }); } }; buf.copy_within(decompressed_start + header_size.., 0); } buf.resize( size.try_into() .expect("BUG: here the size is already confirmed to fit into memory"), 0, ); Ok(gix_object::Data { kind, data: buf }) } } gix-odb-0.66.0/src/store_impls/loose/iter.rs000064400000000000000000000055641046102023000170410ustar 00000000000000use gix_features::fs; use crate::store_impls::loose; /// Returned by [`loose::Store::iter()`] pub type Error = gix_features::fs::walkdir::Error; impl loose::Iter { fn path_to_id( &self, res: Result, ) -> Option> { use std::path::Component::Normal; match res { Ok(e) => { let p = e.path(); let mut ci = p.components(); let (c2, c1) = (ci.next_back(), ci.next_back()); if let (Some(Normal(c1)), Some(Normal(c2))) = (c1, c2) { if c1.len() == 2 && c2.len() == self.hash_hex_len - 2 { if let (Some(c1), Some(c2)) = (c1.to_str(), c2.to_str()) { let mut buf = gix_hash::Kind::hex_buf(); { let (first_byte, rest) = buf[..self.hash_hex_len].split_at_mut(2); first_byte.copy_from_slice(c1.as_bytes()); rest.copy_from_slice(c2.as_bytes()); } if let Ok(b) = gix_hash::ObjectId::from_hex(&buf[..self.hash_hex_len]) { return Some(Ok(b)); } } } } } Err(err) => return Some(Err(err)), }; None } } impl Iterator for loose::Iter { type Item = Result; fn next(&mut self) -> Option { while let Some(res) = self.inner.next() { if let Some(res) = self.path_to_id(res) { return Some(res); } } None } } /// Iteration and traversal impl loose::Store { /// Return an iterator over all objects contained in the database. /// /// The [`Id`][gix_hash::ObjectId]s returned by the iterator can typically be used in the [`locate(…)`][loose::Store::try_find()] method. /// _Note_ that the result is not sorted or stable, thus ordering can change between runs. /// /// # Notes /// /// [`loose::Iter`] is used instead of `impl Iterator<…>` to allow using this iterator in struct fields, as is currently /// needed if iterators need to be implemented by hand in the absence of generators. pub fn iter(&self) -> loose::Iter { loose::Iter { inner: fs::walkdir_new( &self.path, fs::walkdir::Parallelism::ThreadPoolPerTraversal { thread_name: "gix_odb::loose::Store::iter: fs-walk", }, false, ) .min_depth(2) .max_depth(3) .follow_links(false) .into_iter(), hash_hex_len: self.object_hash.len_in_hex(), } } } gix-odb-0.66.0/src/store_impls/loose/mod.rs000064400000000000000000000040141046102023000166420ustar 00000000000000//! An object database storing each object in a zlib compressed file with its hash in the path /// The maximum size that an object header can have. `git2` says 64, and `git` says 32 but also mentions it can be larger. const HEADER_MAX_SIZE: usize = 64; use std::path::{Path, PathBuf}; use gix_features::fs; /// A database for reading and writing objects to disk, one file per object. #[derive(Clone, PartialEq, Eq)] pub struct Store { /// The directory in which objects are stored, containing 256 folders representing the hashes first byte. pub(crate) path: PathBuf, /// The kind of hash we should assume during iteration and when writing new objects. pub(crate) object_hash: gix_hash::Kind, } /// Initialization impl Store { /// Initialize the Db with the `objects_directory` containing the hexadecimal first byte subdirectories, which in turn /// contain all loose objects. /// /// In a git repository, this would be `.git/objects`. /// /// The `object_hash` determines which hash to use when writing, finding or iterating objects. pub fn at(objects_directory: impl Into, object_hash: gix_hash::Kind) -> Store { Store { path: objects_directory.into(), object_hash, } } /// Return the path to our `objects` directory. pub fn path(&self) -> &Path { &self.path } /// Return the kind of hash we would iterate and write. pub fn object_hash(&self) -> gix_hash::Kind { self.object_hash } } fn hash_path(id: &gix_hash::oid, mut root: PathBuf) -> PathBuf { let mut hex = gix_hash::Kind::hex_buf(); let hex_len = id.hex_to_buf(hex.as_mut()); let buf = std::str::from_utf8(&hex[..hex_len]).expect("ascii only in hex"); root.push(&buf[..2]); root.push(&buf[2..]); root } /// pub mod find; /// pub mod iter; /// pub mod verify; /// The type for an iterator over `Result)` pub struct Iter { inner: fs::walkdir::DirEntryIter, hash_hex_len: usize, } /// pub mod write; gix-odb-0.66.0/src/store_impls/loose/verify.rs000064400000000000000000000071541046102023000173770ustar 00000000000000use std::{ sync::atomic::{AtomicBool, Ordering}, time::Instant, }; use gix_features::progress::{Count, DynNestedProgress, Progress}; use crate::loose::Store; /// pub mod integrity { /// The error returned by [`verify_integrity()`][super::Store::verify_integrity()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("{kind} object {id} could not be decoded")] ObjectDecode { source: gix_object::decode::Error, kind: gix_object::Kind, id: gix_hash::ObjectId, }, #[error("{kind} object {expected} wasn't re-encoded without change - new hash is {actual}")] ObjectHashMismatch { kind: gix_object::Kind, actual: gix_hash::ObjectId, expected: gix_hash::ObjectId, }, #[error("Objects were deleted during iteration - try again")] Retry, #[error("Interrupted")] Interrupted, } /// The outcome returned by [`verify_integrity()`][super::Store::verify_integrity()]. #[derive(Debug, PartialEq, Eq, Hash, Ord, PartialOrd, Clone)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Statistics { /// The amount of loose objects we checked. pub num_objects: usize, } /// The progress ids used in [`verify_integrity()`][super::Store::verify_integrity()]. /// /// Use this information to selectively extract the progress of interest in case the parent application has custom visualization. #[derive(Debug, Copy, Clone)] pub enum ProgressId { /// The amount of loose objects that have been verified. LooseObjects, } impl From for gix_features::progress::Id { fn from(v: ProgressId) -> Self { match v { ProgressId::LooseObjects => *b"VILO", } } } } impl Store { /// Check all loose objects for their integrity checking their hash matches the actual data and by decoding them fully. pub fn verify_integrity( &self, progress: &mut dyn DynNestedProgress, should_interrupt: &AtomicBool, ) -> Result { use gix_object::Write; let mut buf = Vec::new(); let sink = crate::sink(self.object_hash); let mut num_objects = 0; let start = Instant::now(); let mut progress = progress.add_child_with_id("Validating".into(), integrity::ProgressId::LooseObjects.into()); progress.init(None, gix_features::progress::count("loose objects")); for id in self.iter().filter_map(Result::ok) { let object = self .try_find(&id, &mut buf) .map_err(|_| integrity::Error::Retry)? .ok_or(integrity::Error::Retry)?; let actual_id = sink.write_buf(object.kind, object.data).expect("sink never fails"); if actual_id != id { return Err(integrity::Error::ObjectHashMismatch { kind: object.kind, actual: actual_id, expected: id, }); } object.decode().map_err(|err| integrity::Error::ObjectDecode { source: err, kind: object.kind, id, })?; progress.inc(); num_objects += 1; if should_interrupt.load(Ordering::SeqCst) { return Err(integrity::Error::Interrupted); } } progress.show_throughput(start); Ok(integrity::Statistics { num_objects }) } } gix-odb-0.66.0/src/store_impls/loose/write.rs000064400000000000000000000125611046102023000172230ustar 00000000000000use std::{fs, io, io::Write, path::PathBuf}; use gix_features::{hash, zlib::stream::deflate}; use gix_object::WriteTo; use tempfile::NamedTempFile; use super::Store; use crate::store_impls::loose; /// Returned by the [`gix_object::Write`] trait implementation of [`Store`] #[derive(thiserror::Error, Debug)] #[allow(missing_docs)] pub enum Error { #[error("Could not {message} '{path}'")] Io { source: io::Error, message: &'static str, path: PathBuf, }, #[error("An IO error occurred while writing an object")] IoRaw(#[from] io::Error), #[error("Could not turn temporary file into persisted file at '{target}'")] Persist { source: tempfile::PersistError, target: PathBuf, }, } impl gix_object::Write for Store { fn write(&self, object: &dyn WriteTo) -> Result { let mut to = self.dest()?; to.write_all(&object.loose_header()).map_err(|err| Error::Io { source: err, message: "write header to tempfile in", path: self.path.to_owned(), })?; object.write_to(&mut to).map_err(|err| Error::Io { source: err, message: "stream all data into tempfile in", path: self.path.to_owned(), })?; to.flush().map_err(Box::new)?; Ok(self.finalize_object(to).map_err(Box::new)?) } /// Write the given buffer in `from` to disk in one syscall at best. /// /// This will cost at least 4 IO operations. fn write_buf(&self, kind: gix_object::Kind, from: &[u8]) -> Result { let mut to = self.dest().map_err(Box::new)?; to.write_all(&gix_object::encode::loose_header(kind, from.len() as u64)) .map_err(|err| Error::Io { source: err, message: "write header to tempfile in", path: self.path.to_owned(), })?; to.write_all(from).map_err(|err| Error::Io { source: err, message: "stream all data into tempfile in", path: self.path.to_owned(), })?; to.flush()?; Ok(self.finalize_object(to)?) } /// Write the given stream in `from` to disk with at least one syscall. /// /// This will cost at least 4 IO operations. fn write_stream( &self, kind: gix_object::Kind, size: u64, mut from: &mut dyn io::Read, ) -> Result { let mut to = self.dest().map_err(Box::new)?; to.write_all(&gix_object::encode::loose_header(kind, size)) .map_err(|err| Error::Io { source: err, message: "write header to tempfile in", path: self.path.to_owned(), })?; io::copy(&mut from, &mut to) .map_err(|err| Error::Io { source: err, message: "stream all data into tempfile in", path: self.path.to_owned(), }) .map_err(Box::new)?; to.flush().map_err(Box::new)?; Ok(self.finalize_object(to)?) } } type CompressedTempfile = deflate::Write; /// Access impl Store { /// Return the path to the object with `id`. /// /// Note that is may not exist yet. pub fn object_path(&self, id: &gix_hash::oid) -> PathBuf { loose::hash_path(id, self.path.clone()) } } impl Store { fn dest(&self) -> Result, Error> { #[cfg_attr(not(unix), allow(unused_mut))] let mut builder = tempfile::Builder::new(); #[cfg(unix)] { use std::os::unix::fs::PermissionsExt; let perms = std::fs::Permissions::from_mode(0o444); builder.permissions(perms); } Ok(hash::Write::new( deflate::Write::new(builder.tempfile_in(&self.path).map_err(|err| Error::Io { source: err, message: "create named temp file in", path: self.path.to_owned(), })?), self.object_hash, )) } fn finalize_object( &self, hash::Write { hash, inner: file }: hash::Write, ) -> Result { let id = gix_hash::ObjectId::from(hash.digest()); let object_path = loose::hash_path(&id, self.path.clone()); let object_dir = object_path .parent() .expect("each object path has a 1 hex-bytes directory"); if let Err(err) = fs::create_dir(object_dir) { match err.kind() { io::ErrorKind::AlreadyExists => {} _ => return Err(err.into()), } } let file = file.into_inner(); let res = file.persist(&object_path); // On windows, we assume that such errors are due to its special filesystem semantics, // on any other platform that would be a legitimate error though. #[cfg(windows)] if let Err(err) = &res { if err.error.kind() == std::io::ErrorKind::PermissionDenied || err.error.kind() == std::io::ErrorKind::AlreadyExists { return Ok(id); } } res.map_err(|err| Error::Persist { source: err, target: object_path, })?; Ok(id) } } gix-odb-0.66.0/src/store_impls/mod.rs000064400000000000000000000000401046102023000155140ustar 00000000000000pub mod dynamic; pub mod loose; gix-odb-0.66.0/src/traits.rs000064400000000000000000000034561046102023000137210ustar 00000000000000use crate::find; /// A way to obtain object properties without fully decoding it. pub trait Header { /// Try to read the header of the object associated with `id` or return `None` if it could not be found. fn try_header(&self, id: &gix_hash::oid) -> Result, gix_object::find::Error>; } mod _impls { use std::{ops::Deref, rc::Rc, sync::Arc}; use gix_hash::oid; use crate::find::Header; impl crate::Header for &T where T: crate::Header, { fn try_header(&self, id: &oid) -> Result, gix_object::find::Error> { (*self).try_header(id) } } impl crate::Header for Rc where T: crate::Header, { fn try_header(&self, id: &oid) -> Result, gix_object::find::Error> { self.deref().try_header(id) } } impl crate::Header for Arc where T: crate::Header, { fn try_header(&self, id: &oid) -> Result, gix_object::find::Error> { self.deref().try_header(id) } } } mod ext { use crate::find; /// An extension trait with convenience functions. pub trait HeaderExt: super::Header { /// Like [`try_header(…)`][super::Header::try_header()], but flattens the `Result>` into a single `Result` making a non-existing object an error. fn header(&self, id: impl AsRef) -> Result { let id = id.as_ref(); self.try_header(id) .map_err(gix_object::find::existing::Error::Find)? .ok_or_else(|| gix_object::find::existing::Error::NotFound { oid: id.to_owned() }) } } impl HeaderExt for T {} } pub use ext::HeaderExt;