gix-index-0.32.1/.cargo_vcs_info.json0000644000000001470000000000100130150ustar { "git": { "sha1": "54ac55946bb04635cd74582a1ce2e4bee70f2e60" }, "path_in_vcs": "gix-index" }gix-index-0.32.1/Cargo.toml0000644000000043640000000000100110200ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.65" name = "gix-index" version = "0.32.1" authors = ["Sebastian Thiel "] include = [ "src/**/*", "LICENSE-*", "README.md", ] autotests = false description = "A work-in-progress crate of the gitoxide project dedicated implementing the git index file" readme = "README.md" license = "MIT OR Apache-2.0" repository = "https://github.com/Byron/gitoxide" [package.metadata.docs.rs] features = [ "document-features", "serde", ] [lib] test = true doctest = false [dependencies.bitflags] version = "2" [dependencies.bstr] version = "1.3.0" default-features = false [dependencies.document-features] version = "0.2.0" optional = true [dependencies.filetime] version = "0.2.15" [dependencies.fnv] version = "1.0.7" [dependencies.gix-bitmap] version = "^0.2.11" [dependencies.gix-features] version = "^0.38.1" features = [ "rustsha1", "progress", ] [dependencies.gix-fs] version = "^0.10.2" [dependencies.gix-hash] version = "^0.14.2" [dependencies.gix-lock] version = "^13.0.0" [dependencies.gix-object] version = "^0.42.1" [dependencies.gix-traverse] version = "^0.39.0" [dependencies.gix-utils] version = "^0.1.12" [dependencies.hashbrown] version = "0.14.3" [dependencies.itoa] version = "1.0.3" [dependencies.memmap2] version = "0.9.0" [dependencies.serde] version = "1.0.114" features = ["derive"] optional = true default-features = false [dependencies.smallvec] version = "1.7.0" [dependencies.thiserror] version = "1.0.32" [features] serde = [ "dep:serde", "smallvec/serde", "gix-hash/serde", ] [target."cfg(not(windows))".dependencies.libc] version = "0.2.149" [target."cfg(not(windows))".dependencies.rustix] version = "0.38.20" features = [ "std", "fs", ] default-features = false gix-index-0.32.1/Cargo.toml.orig000064400000000000000000000032761046102023000145020ustar 00000000000000[package] name = "gix-index" version = "0.32.1" repository = "https://github.com/Byron/gitoxide" license = "MIT OR Apache-2.0" description = "A work-in-progress crate of the gitoxide project dedicated implementing the git index file" authors = ["Sebastian Thiel "] edition = "2021" include = ["src/**/*", "LICENSE-*", "README.md"] rust-version = "1.65" autotests = false [lib] doctest = false test = true [features] ## Data structures implement `serde::Serialize` and `serde::Deserialize`. serde = ["dep:serde", "smallvec/serde", "gix-hash/serde"] [dependencies] gix-features = { version = "^0.38.1", path = "../gix-features", features = [ "rustsha1", "progress", ] } gix-hash = { version = "^0.14.2", path = "../gix-hash" } gix-bitmap = { version = "^0.2.11", path = "../gix-bitmap" } gix-object = { version = "^0.42.1", path = "../gix-object" } gix-traverse = { version = "^0.39.0", path = "../gix-traverse" } gix-lock = { version = "^13.0.0", path = "../gix-lock" } gix-fs = { version = "^0.10.2", path = "../gix-fs" } gix-utils = { version = "^0.1.12", path = "../gix-utils" } hashbrown = "0.14.3" fnv = "1.0.7" thiserror = "1.0.32" memmap2 = "0.9.0" filetime = "0.2.15" bstr = { version = "1.3.0", default-features = false } serde = { version = "1.0.114", optional = true, default-features = false, features = [ "derive", ] } smallvec = "1.7.0" itoa = "1.0.3" bitflags = "2" document-features = { version = "0.2.0", optional = true } [target.'cfg(not(windows))'.dependencies] rustix = { version = "0.38.20", default-features = false, features = [ "std", "fs", ] } libc = { version = "0.2.149" } [package.metadata.docs.rs] features = ["document-features", "serde"] gix-index-0.32.1/LICENSE-APACHE000064400000000000000000000247461046102023000135440ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. gix-index-0.32.1/LICENSE-MIT000064400000000000000000000017771046102023000132530ustar 00000000000000Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. gix-index-0.32.1/README.md000064400000000000000000000010301046102023000130540ustar 00000000000000 #### Test fixtures Most of the test indices are snatched directly from the unit test suite of `git` itself, usually by running something like the following ```shell ./t1700-split-index.sh -r 2 --debug ``` Then one finds all test state and the index in particular in `trash directory/t1700-split-index/.git/index` and can possibly copy it over and use as fixture. The preferred way is to find a test of interest, and use its setup code within one of our own fixture scripts that are executed once to generate the file of interest. gix-index-0.32.1/src/access/mod.rs000064400000000000000000000557411046102023000147740ustar 00000000000000use std::{cmp::Ordering, ops::Range}; use bstr::{BStr, ByteSlice, ByteVec}; use filetime::FileTime; use crate::entry::{Stage, StageRaw}; use crate::{entry, extension, AccelerateLookup, Entry, PathStorage, PathStorageRef, State, Version}; // TODO: integrate this somehow, somewhere, depending on later usage. #[allow(dead_code)] mod sparse; /// General information and entries impl State { /// Return the version used to store this state's information on disk. pub fn version(&self) -> Version { self.version } /// Returns time at which the state was created, indicating its freshness compared to other files on disk. pub fn timestamp(&self) -> FileTime { self.timestamp } /// Updates the timestamp of this state, indicating its freshness compared to other files on disk. /// /// Be careful about using this as setting a timestamp without correctly updating the index /// **will cause (file system) race conditions** see racy-git.txt in the git documentation /// for more details. pub fn set_timestamp(&mut self, timestamp: FileTime) { self.timestamp = timestamp } /// Return the kind of hashes used in this instance. pub fn object_hash(&self) -> gix_hash::Kind { self.object_hash } /// Return our entries pub fn entries(&self) -> &[Entry] { &self.entries } /// Return our path backing, the place which keeps all paths one after another, with entries storing only the range to access them. pub fn path_backing(&self) -> &PathStorageRef { &self.path_backing } /// Runs `filter_map` on all entries, returning an iterator over all paths along with the result of `filter_map`. pub fn entries_with_paths_by_filter_map<'a, T>( &'a self, mut filter_map: impl FnMut(&'a BStr, &Entry) -> Option + 'a, ) -> impl Iterator + 'a { self.entries.iter().filter_map(move |e| { let p = e.path(self); filter_map(p, e).map(|t| (p, t)) }) } /// Return mutable entries along with their path, as obtained from `backing`. pub fn entries_mut_with_paths_in<'state, 'backing>( &'state mut self, backing: &'backing PathStorageRef, ) -> impl Iterator { self.entries.iter_mut().map(move |e| { let path = backing[e.path.clone()].as_bstr(); (e, path) }) } /// Find the entry index in [`entries()`][State::entries()] matching the given repository-relative /// `path` and `stage`, or `None`. /// /// Use the index for accessing multiple stages if they exists, but at least the single matching entry. pub fn entry_index_by_path_and_stage(&self, path: &BStr, stage: entry::Stage) -> Option { let mut stage_cmp = Ordering::Equal; let idx = self .entries .binary_search_by(|e| { let res = e.path(self).cmp(path); if res.is_eq() { stage_cmp = e.stage().cmp(&stage); } res }) .ok()?; self.entry_index_by_idx_and_stage(path, idx, stage as StageRaw, stage_cmp) } /// Walk as far in `direction` as possible, with [`Ordering::Greater`] towards higher stages, and [`Ordering::Less`] /// towards lower stages, and return the lowest or highest seen stage. /// Return `None` if there is no greater or smaller stage. fn walk_entry_stages(&self, path: &BStr, base: usize, direction: Ordering) -> Option { match direction { Ordering::Greater => self .entries .get(base + 1..)? .iter() .enumerate() .take_while(|(_, e)| e.path(self) == path) .last() .map(|(idx, _)| base + 1 + idx), Ordering::Equal => Some(base), Ordering::Less => self.entries[..base] .iter() .enumerate() .rev() .take_while(|(_, e)| e.path(self) == path) .last() .map(|(idx, _)| idx), } } fn entry_index_by_idx_and_stage( &self, path: &BStr, idx: usize, wanted_stage: entry::StageRaw, stage_cmp: Ordering, ) -> Option { match stage_cmp { Ordering::Greater => self.entries[..idx] .iter() .enumerate() .rev() .take_while(|(_, e)| e.path(self) == path) .find_map(|(idx, e)| (e.stage_raw() == wanted_stage).then_some(idx)), Ordering::Equal => Some(idx), Ordering::Less => self .entries .get(idx + 1..)? .iter() .enumerate() .take_while(|(_, e)| e.path(self) == path) .find_map(|(ofs, e)| (e.stage_raw() == wanted_stage).then_some(idx + ofs + 1)), } } /// Return a data structure to help with case-insensitive lookups. /// /// It's required perform any case-insensitive lookup. /// TODO: needs multi-threaded insertion, raw-table to have multiple locks depending on bucket. pub fn prepare_icase_backing(&self) -> AccelerateLookup<'_> { let _span = gix_features::trace::detail!("prepare_icase_backing", entries = self.entries.len()); let mut out = AccelerateLookup::with_capacity(self.entries.len()); for entry in &self.entries { let entry_path = entry.path(self); let hash = AccelerateLookup::icase_hash(entry_path); out.icase_entries .insert_unique(hash, entry, |e| AccelerateLookup::icase_hash(e.path(self))); let mut last_pos = entry_path.len(); while let Some(slash_idx) = entry_path[..last_pos].rfind_byte(b'/') { let dir = entry_path[..slash_idx].as_bstr(); last_pos = slash_idx; let dir_range = entry.path.start..(entry.path.start + dir.len()); let hash = AccelerateLookup::icase_hash(dir); if out .icase_dirs .find(hash, |dir| { dir.path(self) == self.path_backing[dir_range.clone()].as_bstr() }) .is_none() { out.icase_dirs.insert_unique( hash, crate::DirEntry { entry, dir_end: dir_range.end, }, |dir| AccelerateLookup::icase_hash(dir.path(self)), ); } else { break; } } } gix_features::trace::debug!(directories = out.icase_dirs.len(), "stored directories"); out } /// Return the entry at `path` that is at the lowest available stage, using `lookup` for acceleration. /// It must have been created from this instance, and was ideally kept up-to-date with it. /// /// If `ignore_case` is `true`, a case-insensitive (ASCII-folding only) search will be performed. pub fn entry_by_path_icase<'a>( &'a self, path: &BStr, ignore_case: bool, lookup: &AccelerateLookup<'a>, ) -> Option<&'a Entry> { lookup .icase_entries .find(AccelerateLookup::icase_hash(path), |e| { let entry_path = e.path(self); if entry_path == path { return true; }; if !ignore_case { return false; } entry_path.eq_ignore_ascii_case(path) }) .copied() } /// Return the entry (at any stage) that is inside of `directory`, or `None`, /// using `lookup` for acceleration. /// Note that submodules are not detected as directories and the user should /// make another call to [`entry_by_path_icase()`](Self::entry_by_path_icase) to check for this /// possibility. Doing so might also reveal a sparse directory. /// /// If `ignore_case` is set pub fn entry_closest_to_directory_icase<'a>( &'a self, directory: &BStr, ignore_case: bool, lookup: &AccelerateLookup<'a>, ) -> Option<&Entry> { lookup .icase_dirs .find(AccelerateLookup::icase_hash(directory), |dir| { let dir_path = dir.path(self); if dir_path == directory { return true; }; if !ignore_case { return false; } dir_path.eq_ignore_ascii_case(directory) }) .map(|dir| dir.entry) } /// Return the entry (at any stage) that is inside of `directory`, or `None`. /// Note that submodules are not detected as directories and the user should /// make another call to [`entry_by_path_icase()`](Self::entry_by_path_icase) to check for this /// possibility. Doing so might also reveal a sparse directory. /// /// Note that this is a case-sensitive search. pub fn entry_closest_to_directory(&self, directory: &BStr) -> Option<&Entry> { let idx = self.entry_index_by_path(directory).err()?; for entry in &self.entries[idx..] { let path = entry.path(self); if path.get(..directory.len())? != directory { break; } let dir_char = path.get(directory.len())?; if *dir_char > b'/' { break; } if *dir_char < b'/' { continue; } return Some(entry); } None } /// Find the entry index in [`entries()[..upper_bound]`][State::entries()] matching the given repository-relative /// `path` and `stage`, or `None`. /// /// Use the index for accessing multiple stages if they exists, but at least the single matching entry. /// /// # Panics /// /// If `upper_bound` is out of bounds of our entries array. pub fn entry_index_by_path_and_stage_bounded( &self, path: &BStr, stage: entry::Stage, upper_bound: usize, ) -> Option { self.entries[..upper_bound] .binary_search_by(|e| e.path(self).cmp(path).then_with(|| e.stage().cmp(&stage))) .ok() } /// Like [`entry_index_by_path_and_stage()`](State::entry_index_by_path_and_stage()), /// but returns the entry instead of the index. pub fn entry_by_path_and_stage(&self, path: &BStr, stage: entry::Stage) -> Option<&Entry> { self.entry_index_by_path_and_stage(path, stage) .map(|idx| &self.entries[idx]) } /// Return the entry at `path` that is either at stage 0, or at stage 2 (ours) in case of a merge conflict. /// /// Using this method is more efficient in comparison to doing two searches, one for stage 0 and one for stage 2. pub fn entry_by_path(&self, path: &BStr) -> Option<&Entry> { let mut stage_at_index = 0; let idx = self .entries .binary_search_by(|e| { let res = e.path(self).cmp(path); if res.is_eq() { stage_at_index = e.stage_raw(); } res }) .ok()?; let idx = if stage_at_index == 0 || stage_at_index == 2 { idx } else { self.entry_index_by_idx_and_stage(path, idx, Stage::Ours as StageRaw, stage_at_index.cmp(&2))? }; Some(&self.entries[idx]) } /// Return the index at `Ok(index)` where the entry matching `path` (in any stage) can be found, or return /// `Err(index)` to indicate the insertion position at which an entry with `path` would fit in. pub fn entry_index_by_path(&self, path: &BStr) -> Result { self.entries.binary_search_by(|e| e.path(self).cmp(path)) } /// Return the slice of entries which all share the same `prefix`, or `None` if there isn't a single such entry. /// /// If `prefix` is empty, all entries are returned. pub fn prefixed_entries(&self, prefix: &BStr) -> Option<&[Entry]> { self.prefixed_entries_range(prefix).map(|range| &self.entries[range]) } /// Return the range of entries which all share the same `prefix`, or `None` if there isn't a single such entry. /// /// If `prefix` is empty, the range will include all entries. pub fn prefixed_entries_range(&self, prefix: &BStr) -> Option> { if prefix.is_empty() { return Some(0..self.entries.len()); } let prefix_len = prefix.len(); let mut low = self.entries.partition_point(|e| { e.path(self) .get(..prefix_len) .map_or_else(|| e.path(self) <= &prefix[..e.path.len()], |p| p < prefix) }); let mut high = low + self.entries[low..].partition_point(|e| e.path(self).get(..prefix_len).map_or(false, |p| p <= prefix)); let low_entry = &self.entries.get(low)?; if low_entry.stage_raw() != 0 { low = self .walk_entry_stages(low_entry.path(self), low, Ordering::Less) .unwrap_or(low); } if let Some(high_entry) = self.entries.get(high) { if high_entry.stage_raw() != 0 { high = self .walk_entry_stages(high_entry.path(self), high, Ordering::Less) .unwrap_or(high); } } (low != high).then_some(low..high) } /// Return the entry at `idx` or _panic_ if the index is out of bounds. /// /// The `idx` is typically returned by [`entry_by_path_and_stage()`][State::entry_by_path_and_stage()]. pub fn entry(&self, idx: usize) -> &Entry { &self.entries[idx] } /// Returns a boolean value indicating whether the index is sparse or not. /// /// An index is sparse if it contains at least one [`Mode::DIR`][entry::Mode::DIR] entry. pub fn is_sparse(&self) -> bool { self.is_sparse } /// Return the range of entries that exactly match the given `path`, in all available stages, or `None` if no entry with such /// path exists. /// /// The range can be used to access the respective entries via [`entries()`](Self::entries()) or [`entries_mut()](Self::entries_mut()). pub fn entry_range(&self, path: &BStr) -> Option> { let mut stage_at_index = 0; let idx = self .entries .binary_search_by(|e| { let res = e.path(self).cmp(path); if res.is_eq() { stage_at_index = e.stage_raw(); } res }) .ok()?; let (start, end) = ( self.walk_entry_stages(path, idx, Ordering::Less).unwrap_or(idx), self.walk_entry_stages(path, idx, Ordering::Greater).unwrap_or(idx) + 1, ); Some(start..end) } } impl<'a> AccelerateLookup<'a> { fn with_capacity(cap: usize) -> Self { let ratio_of_entries_to_dirs_in_webkit = 20; // 400k entries and 20k dirs Self { icase_entries: hashbrown::HashTable::with_capacity(cap), icase_dirs: hashbrown::HashTable::with_capacity(cap / ratio_of_entries_to_dirs_in_webkit), } } fn icase_hash(data: &BStr) -> u64 { use std::hash::Hasher; let mut hasher = fnv::FnvHasher::default(); for b in data.as_bytes() { hasher.write_u8(b.to_ascii_lowercase()); } hasher.finish() } } /// Mutation impl State { /// After usage of the storage obtained by [`take_path_backing()`][Self::take_path_backing()], return it here. /// Note that it must not be empty. pub fn return_path_backing(&mut self, backing: PathStorage) { debug_assert!( self.path_backing.is_empty(), "BUG: return path backing only after taking it, once" ); self.path_backing = backing; } /// Return mutable entries in a slice. pub fn entries_mut(&mut self) -> &mut [Entry] { &mut self.entries } /// Return a writable slice to entries and read-access to their path storage at the same time. pub fn entries_mut_and_pathbacking(&mut self) -> (&mut [Entry], &PathStorageRef) { (&mut self.entries, &self.path_backing) } /// Return mutable entries along with their paths in an iterator. pub fn entries_mut_with_paths(&mut self) -> impl Iterator { let paths = &self.path_backing; self.entries.iter_mut().map(move |e| { let path = paths[e.path.clone()].as_bstr(); (e, path) }) } /// Return all parts that relate to entries, which includes path storage. /// /// This can be useful for obtaining a standalone, boxable iterator pub fn into_entries(self) -> (Vec, PathStorage) { (self.entries, self.path_backing) } /// Sometimes it's needed to remove the path backing to allow certain mutation to happen in the state while supporting reading the entry's /// path. pub fn take_path_backing(&mut self) -> PathStorage { assert_eq!( self.entries.is_empty(), self.path_backing.is_empty(), "BUG: cannot take out backing multiple times" ); std::mem::take(&mut self.path_backing) } /// Like [`entry_index_by_path_and_stage()`][State::entry_index_by_path_and_stage()], /// but returns the mutable entry instead of the index. pub fn entry_mut_by_path_and_stage(&mut self, path: &BStr, stage: entry::Stage) -> Option<&mut Entry> { self.entry_index_by_path_and_stage(path, stage) .map(|idx| &mut self.entries[idx]) } /// Push a new entry containing `stat`, `id`, `flags` and `mode` and `path` to the end of our storage, without performing /// any sanity checks. This means it's possible to push a new entry to the same path on the same stage and even after sorting /// the entries lookups may still return the wrong one of them unless the correct binary search criteria is chosen. /// /// Note that this *is likely* to break invariants that will prevent further lookups by path unless /// [`entry_index_by_path_and_stage_bounded()`][State::entry_index_by_path_and_stage_bounded()] is used with /// the `upper_bound` being the amount of entries before the first call to this method. /// /// Alternatively, make sure to call [`sort_entries()`][State::sort_entries()] before entry lookup by path to restore /// the invariant. pub fn dangerously_push_entry( &mut self, stat: entry::Stat, id: gix_hash::ObjectId, flags: entry::Flags, mode: entry::Mode, path: &BStr, ) { let path = { let path_start = self.path_backing.len(); self.path_backing.push_str(path); path_start..self.path_backing.len() }; self.entries.push(Entry { stat, id, flags, mode, path, }); } /// Unconditionally sort entries as needed to perform lookups quickly. pub fn sort_entries(&mut self) { let path_backing = &self.path_backing; self.entries.sort_by(|a, b| { Entry::cmp_filepaths(a.path_in(path_backing), b.path_in(path_backing)) .then_with(|| a.stage().cmp(&b.stage())) }); } /// Similar to [`sort_entries()`][State::sort_entries()], but applies `compare` after comparing /// by path and stage as a third criteria. pub fn sort_entries_by(&mut self, mut compare: impl FnMut(&Entry, &Entry) -> Ordering) { let path_backing = &self.path_backing; self.entries.sort_by(|a, b| { Entry::cmp_filepaths(a.path_in(path_backing), b.path_in(path_backing)) .then_with(|| a.stage().cmp(&b.stage())) .then_with(|| compare(a, b)) }); } /// Physically remove all entries for which `should_remove(idx, path, entry)` returns `true`, traversing them from first to last. /// /// Note that the memory used for the removed entries paths is not freed, as it's append-only. /// /// ### Performance /// /// To implement this operation typically, one would rather add [entry::Flags::REMOVE] to each entry to remove /// them when [writing the index](Self::write_to()). pub fn remove_entries(&mut self, mut should_remove: impl FnMut(usize, &BStr, &mut Entry) -> bool) { let mut index = 0; let paths = &self.path_backing; self.entries.retain_mut(|e| { let path = e.path_in(paths); let res = !should_remove(index, path, e); index += 1; res }); } } /// Extensions impl State { /// Access the `tree` extension. pub fn tree(&self) -> Option<&extension::Tree> { self.tree.as_ref() } /// Access the `link` extension. pub fn link(&self) -> Option<&extension::Link> { self.link.as_ref() } /// Obtain the resolve-undo extension. pub fn resolve_undo(&self) -> Option<&extension::resolve_undo::Paths> { self.resolve_undo.as_ref() } /// Obtain the untracked extension. pub fn untracked(&self) -> Option<&extension::UntrackedCache> { self.untracked.as_ref() } /// Obtain the fsmonitor extension. pub fn fs_monitor(&self) -> Option<&extension::FsMonitor> { self.fs_monitor.as_ref() } /// Return `true` if the end-of-index extension was present when decoding this index. pub fn had_end_of_index_marker(&self) -> bool { self.end_of_index_at_decode_time } /// Return `true` if the offset-table extension was present when decoding this index. pub fn had_offset_table(&self) -> bool { self.offset_table_at_decode_time } } #[cfg(test)] mod tests { use std::path::{Path, PathBuf}; #[test] fn entry_by_path_with_conflicting_file() { let file = PathBuf::from("tests") .join("fixtures") .join(Path::new("loose_index").join("conflicting-file.git-index")); let file = crate::File::at(file, gix_hash::Kind::Sha1, false, Default::default()).expect("valid file"); assert_eq!( file.entries().len(), 3, "we have a set of conflict entries for a single file" ); for idx in 0..3 { for wanted_stage in 1..=3 { let actual_idx = file .entry_index_by_idx_and_stage( "file".into(), idx, wanted_stage, (idx + 1).cmp(&(wanted_stage as usize)), ) .expect("found"); assert_eq!( actual_idx + 1, wanted_stage as usize, "the index and stage have a relation, and that is upheld if we search correctly" ); } } } } gix-index-0.32.1/src/access/sparse.rs000064400000000000000000000057761046102023000155150ustar 00000000000000/// Configuration related to sparse indexes. #[derive(Debug, Default, Clone, Copy)] pub struct Options { /// If true, certain entries in the index will be excluded / skipped for certain operations, /// based on the ignore patterns in the `.git/info/sparse-checkout` file. These entries will /// carry the [`SKIP_WORKTREE`][crate::entry::Flags::SKIP_WORKTREE] flag. /// /// This typically is the value of `core.sparseCheckout` in the git configuration. pub sparse_checkout: bool, /// Interpret the `.git/info/sparse-checkout` file using _cone mode_. /// /// If true, _cone mode_ is active and entire directories will be included in the checkout, as well as files in the root /// of the repository. /// If false, non-cone mode is active and entries to _include_ will be matched with patterns like those found in `.gitignore` files. /// /// This typically is the value of `core.sparseCheckoutCone` in the git configuration. pub directory_patterns_only: bool, /// If true, will attempt to write a sparse index file which only works in cone mode. /// /// A sparse index has [`DIR` entries][crate::entry::Mode::DIR] that represent entire directories to be skipped /// during checkout and other operations due to the added presence of /// the [`SKIP_WORKTREE`][crate::entry::Flags::SKIP_WORKTREE] flag. /// /// This is typically the value of `index.sparse` in the git configuration. pub write_sparse_index: bool, } impl Options { /// Derive a valid mode from all parameters that affect the 'sparseness' of the index. /// /// Some combinations of them degenerate to one particular mode. pub fn sparse_mode(&self) -> Mode { match ( self.sparse_checkout, self.directory_patterns_only, self.write_sparse_index, ) { (true, true, true) => Mode::IncludeDirectoriesStoreIncludedEntriesAndExcludedDirs, (true, true, false) => Mode::IncludeDirectoriesStoreAllEntriesSkipUnmatched, (true, false, _) => Mode::IncludeByIgnorePatternStoreAllEntriesSkipUnmatched, (false, _, _) => Mode::Disabled, } } } /// Describes the configuration how a sparse index should be written, or if one should be written at all. #[derive(Debug)] pub enum Mode { /// index with DIR entries for exclusion and included entries, directory-only include patterns in `.git/info/sparse-checkout` file. IncludeDirectoriesStoreIncludedEntriesAndExcludedDirs, /// index with all file entries and skip worktree flags for exclusion, directory-only include patterns in `.git/info/sparse-checkout` file. IncludeDirectoriesStoreAllEntriesSkipUnmatched, /// index with all file entries and skip-worktree flags for exclusion, `ignore` patterns to include entries in `.git/info/sparse-checkout` file. IncludeByIgnorePatternStoreAllEntriesSkipUnmatched, /// index with all entries, non is excluded, `.git/info/sparse-checkout` file is not considered, a regular index. Disabled, } gix-index-0.32.1/src/decode/entries.rs000064400000000000000000000144771046102023000156510ustar 00000000000000use std::ops::Range; use crate::{ decode::{self, header}, entry, util::{read_u32, split_at_byte_exclusive, split_at_pos, var_int}, Entry, Version, }; /// a guess directly from git sources pub const AVERAGE_V4_DELTA_PATH_LEN_IN_BYTES: usize = 80; pub struct Outcome { pub is_sparse: bool, } pub fn estimate_path_storage_requirements_in_bytes( num_entries: u32, on_disk_size: usize, offset_to_extensions: Option, object_hash: gix_hash::Kind, version: Version, ) -> usize { const fn on_disk_entry_sans_path(object_hash: gix_hash::Kind) -> usize { 8 + // ctime 8 + // mtime (4 * 6) + // various stat fields 2 + // flag, ignore extended flag as we'd rather overallocate a bit object_hash.len_in_bytes() } match version { Version::V3 | Version::V2 => { let size_of_entries_block = offset_to_extensions.unwrap_or(on_disk_size); size_of_entries_block .saturating_sub(num_entries as usize * on_disk_entry_sans_path(object_hash)) .saturating_sub(header::SIZE) } Version::V4 => num_entries as usize * AVERAGE_V4_DELTA_PATH_LEN_IN_BYTES, } } /// Note that `data` must point to the beginning of the entries, right past the header. pub fn chunk<'a>( mut data: &'a [u8], entries: &mut Vec, path_backing: &mut Vec, num_entries: u32, object_hash: gix_hash::Kind, version: Version, ) -> Result<(Outcome, &'a [u8]), decode::Error> { let mut is_sparse = false; let has_delta_paths = version == Version::V4; let mut prev_path = None; let mut delta_buf = Vec::::with_capacity(AVERAGE_V4_DELTA_PATH_LEN_IN_BYTES); for idx in 0..num_entries { let (entry, remaining) = load_one( data, path_backing, object_hash.len_in_bytes(), has_delta_paths, prev_path, ) .ok_or(decode::Error::Entry { index: idx })?; data = remaining; if entry.mode.is_sparse() { is_sparse = true; } // TODO: entries are actually in an intrusive collection, with path as key. Could be set for us. This affects 'ignore_case' which we // also don't yet handle but probably could, maybe even smartly with the collection. // For now it's unclear to me how they access the index, they could iterate quickly, and have fast access by path. entries.push(entry); prev_path = entries.last().map(|e| (e.path.clone(), &mut delta_buf)); } Ok((Outcome { is_sparse }, data)) } /// Note that `prev_path` is only useful if the version is V4 fn load_one<'a>( data: &'a [u8], path_backing: &mut Vec, hash_len: usize, has_delta_paths: bool, prev_path_and_buf: Option<(Range, &mut Vec)>, ) -> Option<(Entry, &'a [u8])> { let first_byte_of_entry = data.as_ptr() as usize; let (ctime_secs, data) = read_u32(data)?; let (ctime_nsecs, data) = read_u32(data)?; let (mtime_secs, data) = read_u32(data)?; let (mtime_nsecs, data) = read_u32(data)?; let (dev, data) = read_u32(data)?; let (ino, data) = read_u32(data)?; let (mode, data) = read_u32(data)?; let (uid, data) = read_u32(data)?; let (gid, data) = read_u32(data)?; let (size, data) = read_u32(data)?; let (hash, data) = split_at_pos(data, hash_len)?; let (flags, data) = read_u16(data)?; let flags = entry::at_rest::Flags::from_bits_retain(flags); let (flags, data) = if flags.contains(entry::at_rest::Flags::EXTENDED) { let (extended_flags, data) = read_u16(data)?; let extended_flags = entry::at_rest::FlagsExtended::from_bits(extended_flags)?; let extended_flags = extended_flags.to_flags()?; (flags.to_memory() | extended_flags, data) } else { (flags.to_memory(), data) }; let start = path_backing.len(); let data = if has_delta_paths { let (strip_len, data) = var_int(data)?; if let Some((prev_path, buf)) = prev_path_and_buf { let end = prev_path.end.checked_sub(strip_len.try_into().ok()?)?; let copy_len = end.checked_sub(prev_path.start)?; if copy_len > 0 { buf.resize(copy_len, 0); buf.copy_from_slice(&path_backing[prev_path.start..end]); path_backing.extend_from_slice(buf); } } let (path, data) = split_at_byte_exclusive(data, 0)?; path_backing.extend_from_slice(path); data } else { let (path, data) = if flags.contains(entry::Flags::PATH_LEN) { split_at_byte_exclusive(data, 0)? } else { let path_len = (flags.bits() & entry::Flags::PATH_LEN.bits()) as usize; let (path, data) = split_at_pos(data, path_len)?; (path, skip_padding(data, first_byte_of_entry)) }; // TODO(perf): for some reason, this causes tremendous `memmove` time even though the backing // has enough capacity most of the time. path_backing.extend_from_slice(path); data }; let path_range = start..path_backing.len(); Some(( Entry { stat: entry::Stat { ctime: entry::stat::Time { secs: ctime_secs, nsecs: ctime_nsecs, }, mtime: entry::stat::Time { secs: mtime_secs, nsecs: mtime_nsecs, }, dev, ino, uid, gid, size, }, id: gix_hash::ObjectId::from_bytes_or_panic(hash), flags: flags & !entry::Flags::PATH_LEN, // This forces us to add the bits we need before being able to use them. mode: entry::Mode::from_bits_truncate(mode), path: path_range, }, data, )) } #[inline] fn skip_padding(data: &[u8], first_byte_of_entry: usize) -> &[u8] { let current_offset = data.as_ptr() as usize; let c_padding = (current_offset - first_byte_of_entry + 8) & !7; let skip = (first_byte_of_entry + c_padding) - current_offset; &data[skip..] } #[inline] fn read_u16(data: &[u8]) -> Option<(u16, &[u8])> { split_at_pos(data, 2).map(|(num, data)| (u16::from_be_bytes(num.try_into().unwrap()), data)) } gix-index-0.32.1/src/decode/header.rs000064400000000000000000000025551046102023000154220ustar 00000000000000pub(crate) const SIZE: usize = 4 /*signature*/ + 4 /*version*/ + 4 /* num entries */; use crate::{util::from_be_u32, Version}; pub(crate) const SIGNATURE: &[u8] = b"DIRC"; mod error { /// The error produced when failing to decode an index header. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("{0}")] Corrupt(&'static str), #[error("Index version {0} is not supported")] UnsupportedVersion(u32), } } pub use error::Error; pub(crate) fn decode(data: &[u8], object_hash: gix_hash::Kind) -> Result<(Version, u32, &[u8]), Error> { if data.len() < (3 * 4) + object_hash.len_in_bytes() { return Err(Error::Corrupt( "File is too small even for header with zero entries and smallest hash", )); } let (signature, data) = data.split_at(4); if signature != SIGNATURE { return Err(Error::Corrupt( "Signature mismatch - this doesn't claim to be a header file", )); } let (version, data) = data.split_at(4); let version = match from_be_u32(version) { 2 => Version::V2, 3 => Version::V3, 4 => Version::V4, unknown => return Err(Error::UnsupportedVersion(unknown)), }; let (entries, data) = data.split_at(4); let entries = from_be_u32(entries); Ok((version, entries, data)) } gix-index-0.32.1/src/decode/mod.rs000064400000000000000000000351541046102023000147520ustar 00000000000000use filetime::FileTime; use crate::{entry, extension, Entry, State, Version}; mod entries; /// #[allow(clippy::empty_docs)] pub mod header; mod error { use crate::{decode, extension}; /// The error returned by [`State::from_bytes()`][crate::State::from_bytes()]. #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error(transparent)] Header(#[from] decode::header::Error), #[error("Could not parse entry at index {index}")] Entry { index: u32 }, #[error("Mandatory extension wasn't implemented or malformed.")] Extension(#[from] extension::decode::Error), #[error("Index trailer should have been {expected} bytes long, but was {actual}")] UnexpectedTrailerLength { expected: usize, actual: usize }, #[error("Shared index checksum was {actual_checksum} but should have been {expected_checksum}")] ChecksumMismatch { actual_checksum: gix_hash::ObjectId, expected_checksum: gix_hash::ObjectId, }, } } pub use error::Error; use gix_features::parallel::InOrderIter; use crate::util::read_u32; /// Options to define how to decode an index state [from bytes][State::from_bytes()]. #[derive(Debug, Default, Clone, Copy)] pub struct Options { /// If Some(_), we are allowed to use more than one thread. If Some(N), use no more than N threads. If Some(0)|None, use as many threads /// as there are logical cores. /// /// This applies to loading extensions in parallel to entries if the common EOIE extension is available. /// It also allows to use multiple threads for loading entries if the IEOT extension is present. pub thread_limit: Option, /// The minimum size in bytes to load extensions in their own thread, assuming there is enough `num_threads` available. /// If set to 0, for example, extensions will always be read in their own thread if enough threads are available. pub min_extension_block_in_bytes_for_threading: usize, /// Set the expected hash of this index if we are read as part of a `link` extension. /// /// We will abort reading this file if it doesn't match. pub expected_checksum: Option, } impl State { /// Decode an index state from `data` and store `timestamp` in the resulting instance for pass-through, assuming `object_hash` /// to be used through the file. Also return the stored hash over all bytes in `data` or `None` if none was written due to `index.skipHash`. pub fn from_bytes( data: &[u8], timestamp: FileTime, object_hash: gix_hash::Kind, _options @ Options { thread_limit, min_extension_block_in_bytes_for_threading, expected_checksum, }: Options, ) -> Result<(Self, Option), Error> { let _span = gix_features::trace::detail!("gix_index::State::from_bytes()", options = ?_options); let (version, num_entries, post_header_data) = header::decode(data, object_hash)?; let start_of_extensions = extension::end_of_index_entry::decode(data, object_hash); let mut num_threads = gix_features::parallel::num_threads(thread_limit); let path_backing_buffer_size = entries::estimate_path_storage_requirements_in_bytes( num_entries, data.len(), start_of_extensions, object_hash, version, ); let (entries, ext, data) = match start_of_extensions { Some(offset) if num_threads > 1 => { let extensions_data = &data[offset..]; let index_offsets_table = extension::index_entry_offset_table::find(extensions_data, object_hash); let (entries_res, ext_res) = gix_features::parallel::threads(|scope| { let extension_loading = (extensions_data.len() > min_extension_block_in_bytes_for_threading).then({ num_threads -= 1; || { gix_features::parallel::build_thread() .name("gix-index.from_bytes.load-extensions".into()) .spawn_scoped(scope, || extension::decode::all(extensions_data, object_hash)) .expect("valid name") } }); let entries_res = match index_offsets_table { Some(entry_offsets) => { let chunk_size = (entry_offsets.len() as f32 / num_threads as f32).ceil() as usize; let entry_offsets_chunked = entry_offsets.chunks(chunk_size); let num_chunks = entry_offsets_chunked.len(); let mut threads = Vec::with_capacity(num_chunks); for (id, chunks) in entry_offsets_chunked.enumerate() { let chunks = chunks.to_vec(); threads.push( gix_features::parallel::build_thread() .name(format!("gix-index.from_bytes.read-entries.{id}")) .spawn_scoped(scope, move || { let num_entries_for_chunks = chunks.iter().map(|c| c.num_entries).sum::() as usize; let mut entries = Vec::with_capacity(num_entries_for_chunks); let path_backing_buffer_size_for_chunks = entries::estimate_path_storage_requirements_in_bytes( num_entries_for_chunks as u32, data.len() / num_chunks, start_of_extensions.map(|ofs| ofs / num_chunks), object_hash, version, ); let mut path_backing = Vec::with_capacity(path_backing_buffer_size_for_chunks); let mut is_sparse = false; for offset in chunks { let ( entries::Outcome { is_sparse: chunk_is_sparse, }, _data, ) = entries::chunk( &data[offset.from_beginning_of_file as usize..], &mut entries, &mut path_backing, offset.num_entries, object_hash, version, )?; is_sparse |= chunk_is_sparse; } Ok::<_, Error>(( id, EntriesOutcome { entries, path_backing, is_sparse, }, )) }) .expect("valid name"), ); } let mut results = InOrderIter::from(threads.into_iter().map(|thread| thread.join().unwrap())); let mut acc = results.next().expect("have at least two results, one per thread"); // We explicitly don't adjust the reserve in acc and rather allow for more copying // to happens as vectors grow to keep the peak memory size low. // NOTE: one day, we might use a memory pool for paths. We could encode the block of memory // in some bytes in the path offset. That way there is more indirection/slower access // to the path, but it would save time here. // As it stands, `git` is definitely more efficient at this and probably uses less memory too. // Maybe benchmarks can tell if that is noticeable later at 200/400GB/s memory bandwidth, or maybe just // 100GB/s on a single core. while let (Ok(lhs), Some(res)) = (acc.as_mut(), results.next()) { match res { Ok(mut rhs) => { lhs.is_sparse |= rhs.is_sparse; let ofs = lhs.path_backing.len(); lhs.path_backing.append(&mut rhs.path_backing); lhs.entries.extend(rhs.entries.into_iter().map(|mut e| { e.path.start += ofs; e.path.end += ofs; e })); } Err(err) => { acc = Err(err); } } } acc.map(|acc| (acc, &data[data.len() - object_hash.len_in_bytes()..])) } None => entries( post_header_data, path_backing_buffer_size, num_entries, object_hash, version, ), }; let ext_res = extension_loading.map_or_else( || extension::decode::all(extensions_data, object_hash), |thread| thread.join().unwrap(), ); (entries_res, ext_res) }); let (ext, data) = ext_res?; (entries_res?.0, ext, data) } None | Some(_) => { let (entries, data) = entries( post_header_data, path_backing_buffer_size, num_entries, object_hash, version, )?; let (ext, data) = extension::decode::all(data, object_hash)?; (entries, ext, data) } }; if data.len() != object_hash.len_in_bytes() { return Err(Error::UnexpectedTrailerLength { expected: object_hash.len_in_bytes(), actual: data.len(), }); } let checksum = gix_hash::ObjectId::from_bytes_or_panic(data); let checksum = (!checksum.is_null()).then_some(checksum); if let Some((expected_checksum, actual_checksum)) = expected_checksum.zip(checksum) { if actual_checksum != expected_checksum { return Err(Error::ChecksumMismatch { actual_checksum, expected_checksum, }); } } let EntriesOutcome { entries, path_backing, mut is_sparse, } = entries; let extension::decode::Outcome { tree, link, resolve_undo, untracked, fs_monitor, is_sparse: is_sparse_from_ext, // a marker is needed in case there are no directories end_of_index, offset_table, } = ext; is_sparse |= is_sparse_from_ext; Ok(( State { object_hash, timestamp, version, entries, path_backing, is_sparse, end_of_index_at_decode_time: end_of_index, offset_table_at_decode_time: offset_table, tree, link, resolve_undo, untracked, fs_monitor, }, checksum, )) } } struct EntriesOutcome { pub entries: Vec, pub path_backing: Vec, pub is_sparse: bool, } fn entries( post_header_data: &[u8], path_backing_buffer_size: usize, num_entries: u32, object_hash: gix_hash::Kind, version: Version, ) -> Result<(EntriesOutcome, &[u8]), Error> { let mut entries = Vec::with_capacity(num_entries as usize); let mut path_backing = Vec::with_capacity(path_backing_buffer_size); entries::chunk( post_header_data, &mut entries, &mut path_backing, num_entries, object_hash, version, ) .map(|(entries::Outcome { is_sparse }, data): (entries::Outcome, &[u8])| { ( EntriesOutcome { entries, path_backing, is_sparse, }, data, ) }) } pub(crate) fn stat(data: &[u8]) -> Option<(entry::Stat, &[u8])> { let (ctime_secs, data) = read_u32(data)?; let (ctime_nsecs, data) = read_u32(data)?; let (mtime_secs, data) = read_u32(data)?; let (mtime_nsecs, data) = read_u32(data)?; let (dev, data) = read_u32(data)?; let (ino, data) = read_u32(data)?; let (uid, data) = read_u32(data)?; let (gid, data) = read_u32(data)?; let (size, data) = read_u32(data)?; Some(( entry::Stat { mtime: entry::stat::Time { secs: ctime_secs, nsecs: ctime_nsecs, }, ctime: entry::stat::Time { secs: mtime_secs, nsecs: mtime_nsecs, }, dev, ino, uid, gid, size, }, data, )) } gix-index-0.32.1/src/entry/flags.rs000064400000000000000000000140531046102023000152000ustar 00000000000000use bitflags::bitflags; use crate::entry::Stage; bitflags! { /// In-memory flags. /// /// Notably, not all of these will be persisted but can be used to aid all kinds of operations. #[derive(Debug, Clone, Copy, Eq, PartialEq)] pub struct Flags: u32 { // TODO: could we use the pathlen ourselves to save 8 bytes? And how to handle longer paths than that? 0 as sentinel maybe? /// The mask to obtain the length of the path associated with this entry, up to 4095 characters without extension. const PATH_LEN = 0x0fff; /// The mask to apply to obtain the stage number of an entry, encoding three value: 0 = base, 1 = ours, 2 = theirs. const STAGE_MASK = 1<<12 | 1<<13; /// If set, additional bits need to be written to storage. const EXTENDED = 1<<14; /// If set, the entry be assumed to match with the version on the working tree, as a way to avoid `lstat()` checks. const ASSUME_VALID = 1 << 15; /// Indicates that an entry needs to be updated as it's in-memory representation doesn't match what's on disk. const UPDATE = 1 << 16; /// Indicates an entry should be removed - this typically happens during writing, by simply skipping over them. const REMOVE = 1 << 17; /// Indicates that an entry is known to be up-to-date. const UPTODATE = 1 << 18; /// Only temporarily used by unpack_trees() (in C). const ADDED = 1 << 19; /// Whether an up-to-date object hash exists for the entry. const HASHED = 1 << 20; /// Set if the filesystem monitor is valid. const FSMONITOR_VALID = 1 << 21; /// Remove in work directory const WORKTREE_REMOVE = 1 << 22; /// Set to indicate the entry exists in multiple stages at once due to conflicts. const CONFLICTED = 1 << 23; /// Indicates that the entry was already turned into a tree. const UNPACKED = 1 << 24; /// Only temporarily used by unpack_trees() (in C) const NEW_SKIP_WORKTREE = 1 << 25; /// temporarily mark paths matched by a path spec const PATHSPEC_MATCHED = 1 << 26; /// When the index is split, this indicates the entry is up-to-date in the shared portion of the index. const UPDATE_IN_BASE = 1 << 27; /// Indicates the entry name is present in the base/shared index, and thus doesn't have to be stored in this one. const STRIP_NAME = 1 << 28; /// Created with `git add --intent-to-add` to mark empty entries that have their counter-part in the worktree, but not /// yet in the object database. const INTENT_TO_ADD = 1 << 29; /// Stored at rest const SKIP_WORKTREE = 1 << 30; /// For future extension const EXTENDED_2 = 1 << 31; } } impl Flags { /// Return the stage as extracted from the bits of this instance. pub fn stage(&self) -> Stage { match self.stage_raw() { 0 => Stage::Unconflicted, 1 => Stage::Base, 2 => Stage::Ours, 3 => Stage::Theirs, _ => unreachable!("BUG: Flags::STAGE_MASK is two bits, whose 4 possible values we have covered"), } } /// Return an entry's stage as raw number between 0 and 4. /// Possible values are: /// /// * 0 = no conflict, /// * 1 = base, /// * 2 = ours, /// * 3 = theirs pub fn stage_raw(&self) -> u32 { (*self & Flags::STAGE_MASK).bits() >> 12 } /// Transform ourselves to a storage representation to keep all flags which are to be persisted, /// skipping all extended flags. Note that the caller has to check for the `EXTENDED` bit to be present /// and write extended flags as well if so. pub fn to_storage(mut self) -> at_rest::Flags { at_rest::Flags::from_bits_retain( { self.remove(Self::PATH_LEN); self } .bits() as u16, ) } } pub(crate) mod at_rest { use bitflags::bitflags; bitflags! { /// Flags how they are serialized to a storage location #[derive(Copy, Clone, Debug)] pub struct Flags: u16 { /// A portion of a the flags that encodes the length of the path that follows. const PATH_LEN = 0x0fff; const STAGE_MASK = 0x3000; /// If set, there is more extended flags past this one const EXTENDED = 0x4000; /// If set, the entry be assumed to match with the version on the working tree, as a way to avoid `lstat()` checks. const ASSUME_VALID = 0x8000; } } impl Flags { pub fn to_memory(self) -> super::Flags { super::Flags::from_bits_retain(self.bits() as u32) } } bitflags! { /// Extended flags - add flags for serialization here and offset them down to u16. #[derive(Copy, Clone, Debug, PartialEq)] pub struct FlagsExtended: u16 { const INTENT_TO_ADD = 1 << (29 - 16); const SKIP_WORKTREE = 1 << (30 - 16); } } impl FlagsExtended { pub fn from_flags(flags: super::Flags) -> Self { Self::from_bits_retain( ((flags & (super::Flags::INTENT_TO_ADD | super::Flags::SKIP_WORKTREE)).bits() >> 16) as u16, ) } pub fn to_flags(self) -> Option { super::Flags::from_bits((self.bits() as u32) << 16) } } #[cfg(test)] mod tests { use super::*; #[test] fn flags_extended_conversion() { assert_eq!( FlagsExtended::all().to_flags(), Some(super::super::Flags::INTENT_TO_ADD | super::super::Flags::SKIP_WORKTREE) ); assert_eq!( FlagsExtended::from_flags(super::super::Flags::all()), FlagsExtended::all() ); } #[test] fn flags_from_bits_with_conflict() { let input = 0b1110_0010_1000_1011; assert_eq!(Flags::from_bits_retain(input).bits(), input); } } } gix-index-0.32.1/src/entry/mod.rs000064400000000000000000000076051046102023000146700ustar 00000000000000/// The stage of an entry. #[derive(Default, Copy, Clone, Debug, PartialEq, Eq, Ord, PartialOrd, Hash)] pub enum Stage { /// This is the default, and most entries are in this stage. #[default] Unconflicted = 0, /// The entry is the common base between 'our' change and 'their' change, for comparison. Base = 1, /// The entry represents our change. Ours = 2, /// The entry represents their change. Theirs = 3, } // The stage of an entry, one of… /// * 0 = no conflict, /// * 1 = base, /// * 2 = ours, /// * 3 = theirs pub type StageRaw = u32; /// #[allow(clippy::empty_docs)] pub mod mode; mod flags; pub(crate) use flags::at_rest; pub use flags::Flags; /// #[allow(clippy::empty_docs)] pub mod stat; mod write; use bitflags::bitflags; bitflags! { /// The kind of file of an entry. #[derive(Copy, Clone, Debug, PartialEq, Eq)] pub struct Mode: u32 { /// directory (only used for sparse checkouts), equivalent to a tree, which is _excluded_ from the index via /// cone-mode. const DIR = 0o040000; /// regular file const FILE = 0o100644; /// regular file, executable const FILE_EXECUTABLE = 0o100755; /// Symbolic link const SYMLINK = 0o120000; /// A git commit for submodules const COMMIT = 0o160000; } } /// An entry's filesystem stat information. #[derive(Debug, Default, PartialEq, Eq, Hash, Ord, PartialOrd, Clone, Copy)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct Stat { /// Modification time pub mtime: stat::Time, /// Creation time pub ctime: stat::Time, /// Device number pub dev: u32, /// Inode number pub ino: u32, /// User id of the owner pub uid: u32, /// Group id of the owning group pub gid: u32, /// The size of bytes on disk. Capped to u32 so files bigger than that will need thorough additional checking pub size: u32, } mod access { use bstr::{BStr, ByteSlice}; use crate::{entry, Entry, State}; impl Entry { /// Return an entry's path, relative to the repository, which is extracted from its owning `state`. pub fn path<'a>(&self, state: &'a State) -> &'a BStr { state.path_backing[self.path.clone()].as_bstr() } /// Return an entry's path using the given `backing`. pub fn path_in<'backing>(&self, backing: &'backing crate::PathStorageRef) -> &'backing BStr { backing[self.path.clone()].as_bstr() } /// Return an entry's stage. See [entry::Stage] for possible values. pub fn stage(&self) -> entry::Stage { self.flags.stage() } /// Return an entry's stage as raw number between 0 and 4. /// Possible values are: /// /// * 0 = no conflict, /// * 1 = base, /// * 2 = ours, /// * 3 = theirs pub fn stage_raw(&self) -> u32 { self.flags.stage_raw() } } } mod _impls { use std::cmp::Ordering; use bstr::BStr; use crate::{Entry, State}; impl Entry { /// Compare one entry to another by their path, by comparing only their common path portion byte by byte, then resorting to /// entry length and stage. pub fn cmp(&self, other: &Self, state: &State) -> Ordering { let lhs = self.path(state); let rhs = other.path(state); Entry::cmp_filepaths(lhs, rhs).then_with(|| self.stage().cmp(&other.stage())) } /// Compare one entry to another by their path, by comparing only their common path portion byte by byte, then resorting to /// entry length. pub fn cmp_filepaths(a: &BStr, b: &BStr) -> Ordering { let common_len = a.len().min(b.len()); a[..common_len] .cmp(&b[..common_len]) .then_with(|| a.len().cmp(&b.len())) } } } gix-index-0.32.1/src/entry/mode.rs000064400000000000000000000073571046102023000150410ustar 00000000000000use crate::entry::Mode; impl Mode { /// Return `true` if this is a sparse entry, as it points to a directory which usually isn't what an 'unsparse' index tracks. pub fn is_sparse(&self) -> bool { *self == Self::DIR } /// Return `true` if this is a submodule entry. pub fn is_submodule(&self) -> bool { *self == Self::DIR | Self::SYMLINK } /// Convert this instance to a tree's entry mode, or return `None` if for some /// and unexpected reason the bitflags don't resemble any known entry-mode. pub fn to_tree_entry_mode(&self) -> Option { gix_object::tree::EntryMode::try_from(self.bits()).ok() } /// Compares this mode to the file system version ([`std::fs::symlink_metadata`]) /// and returns the change needed to update this mode to match the file. /// /// * if `has_symlinks` is false symlink entries will simply check if there /// is a normal file on disk /// * if `executable_bit` is false the executable bit will not be compared /// `Change::ExecutableBit` will never be generated /// /// If there is a type change then we will use whatever information is /// present on the FS. Specifically if `has_symlinks` is false we will /// never generate `Change::TypeChange { new_mode: Mode::SYMLINK }`. and /// iff `executable_bit` is false we will never generate `Change::TypeChange /// { new_mode: Mode::FILE_EXECUTABLE }` (all files are assumed to be not /// executable). That measn that unstaging and staging files can be a lossy /// operation on such file systems. /// /// If a directory replaced a normal file/symlink we assume that the /// directory is a submodule. Normal (non-submodule) directories would /// cause a file to be deleted from the index and should be handled before /// calling this function. /// /// If the stat information belongs to something other than a normal file/ /// directory (like a socket) we just return an identity change (non-files /// can not be committed to git). pub fn change_to_match_fs( self, stat: &crate::fs::Metadata, has_symlinks: bool, executable_bit: bool, ) -> Option { match self { Mode::FILE if !stat.is_file() => (), Mode::SYMLINK if has_symlinks && !stat.is_symlink() => (), Mode::SYMLINK if !has_symlinks && !stat.is_file() => (), Mode::COMMIT | Mode::DIR if !stat.is_dir() => (), Mode::FILE if executable_bit && stat.is_executable() => return Some(Change::ExecutableBit), Mode::FILE_EXECUTABLE if executable_bit && !stat.is_executable() => return Some(Change::ExecutableBit), _ => return None, }; let new_mode = if stat.is_dir() { Mode::COMMIT } else if executable_bit && stat.is_executable() { Mode::FILE_EXECUTABLE } else { Mode::FILE }; Some(Change::Type { new_mode }) } } /// A change of a [`Mode`]. pub enum Change { /// The type of mode changed, like symlink => file. Type { /// The mode representing the new index type. new_mode: Mode, }, /// The executable permission of this file has changed. ExecutableBit, } impl Change { /// Applies this change to `mode` and returns the changed one. pub fn apply(self, mode: Mode) -> Mode { match self { Change::Type { new_mode } => new_mode, Change::ExecutableBit => match mode { Mode::FILE => Mode::FILE_EXECUTABLE, Mode::FILE_EXECUTABLE => Mode::FILE, _ => unreachable!("invalid mode change: can't flip executable bit of {mode:?}"), }, } } } gix-index-0.32.1/src/entry/stat.rs000064400000000000000000000152031046102023000150550ustar 00000000000000use std::{ cmp::Ordering, time::{SystemTime, SystemTimeError}, }; use filetime::FileTime; use crate::entry::Stat; impl Stat { /// Detect whether this stat entry is racy if stored in a file index with `timestamp`. /// /// An index entry is considered racy if it's `mtime` is larger or equal to the index `timestamp`. /// The index `timestamp` marks the point in time before which we definitely resolved the racy git problem /// for all index entries so any index entries that changed afterwards will need to be examined for /// changes by actually reading the file from disk at least once. pub fn is_racy( &self, timestamp: FileTime, Options { check_stat, use_nsec, .. }: Options, ) -> bool { match timestamp.unix_seconds().cmp(&(self.mtime.secs as i64)) { Ordering::Less => true, Ordering::Equal if use_nsec && check_stat => timestamp.nanoseconds() <= self.mtime.nsecs, Ordering::Equal => true, Ordering::Greater => false, } } /// Compares the stat information of two index entries. /// /// Intuitively this is basically equivalent to `self == other`. /// However there a lot of nobs in git that tweak whether certain stat information is used when checking /// equality, see [`Options`]. /// This function respects those options while performing the stat comparison and may therefore ignore some fields. pub fn matches( &self, other: &Self, Options { trust_ctime, check_stat, use_nsec, use_stdev, }: Options, ) -> bool { if self.mtime.secs != other.mtime.secs { return false; } if check_stat && use_nsec && self.mtime.nsecs != other.mtime.nsecs { return false; } if self.size != other.size { return false; } if trust_ctime { if self.ctime.secs != other.ctime.secs { return false; } if check_stat && use_nsec && self.ctime.nsecs != other.ctime.nsecs { return false; } } if check_stat { if use_stdev && self.dev != other.dev { return false; } self.ino == other.ino && self.gid == other.gid && self.uid == other.uid } else { true } } /// Creates stat information from the result of `symlink_metadata`. pub fn from_fs(stat: &crate::fs::Metadata) -> Result { let mtime = stat.modified().unwrap_or(std::time::UNIX_EPOCH); let ctime = stat.created().unwrap_or(std::time::UNIX_EPOCH); #[cfg(windows)] let res = Stat { mtime: mtime.try_into()?, ctime: ctime.try_into()?, dev: 0, ino: 0, uid: 0, gid: 0, // truncation to 32 bits is on purpose (git does the same). size: stat.len() as u32, }; #[cfg(not(windows))] let res = { Stat { mtime: mtime.try_into().unwrap_or_default(), ctime: ctime.try_into().unwrap_or_default(), // truncating to 32 bits is fine here because // that's what the linux syscalls returns // just rust upcasts to 64 bits for some reason? // numbers this large are impractical anyway (that's a lot of hard-drives). dev: stat.dev() as u32, ino: stat.ino() as u32, uid: stat.uid(), gid: stat.gid(), // truncation to 32 bits is on purpose (git does the same). size: stat.len() as u32, } }; Ok(res) } } impl TryFrom for Time { type Error = SystemTimeError; fn try_from(s: SystemTime) -> Result { let d = s.duration_since(std::time::UNIX_EPOCH)?; Ok(Time { // truncation to 32 bits is on purpose (we only compare the low bits) secs: d.as_secs() as u32, nsecs: d.subsec_nanos(), }) } } impl From