gix-status-0.16.0/.cargo_vcs_info.json0000644000000001500000000000100132240ustar { "git": { "sha1": "beb0ea8c4ff94c64b7773772a9d388ccb403f3c1" }, "path_in_vcs": "gix-status" }gix-status-0.16.0/Cargo.toml0000644000000102760000000000100112340ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.65" name = "gix-status" version = "0.16.0" authors = [ "Sebastian Thiel ", "Pascal Kuthe ", ] build = false include = [ "src/**/*", "LICENSE-*", ] autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "A crate of the gitoxide project dealing with 'git status'-like functionality" readme = false license = "MIT OR Apache-2.0" repository = "https://github.com/GitoxideLabs/gitoxide" [package.metadata.docs.rs] features = [ "document-features", "worktree-rewrites", ] [lib] name = "gix_status" path = "src/lib.rs" doctest = false [dependencies.bstr] version = "1.3.0" default-features = false [dependencies.document-features] version = "0.2.0" optional = true [dependencies.filetime] version = "0.2.15" [dependencies.gix-diff] version = "^0.49.0" features = ["blob"] optional = true default-features = false [dependencies.gix-dir] version = "^0.11.0" optional = true [dependencies.gix-features] version = "^0.39.1" features = ["progress"] [dependencies.gix-filter] version = "^0.16.0" [dependencies.gix-fs] version = "^0.12.1" [dependencies.gix-hash] version = "^0.15.1" [dependencies.gix-index] version = "^0.37.0" [dependencies.gix-object] version = "^0.46.1" [dependencies.gix-path] version = "^0.10.13" [dependencies.gix-pathspec] version = "^0.8.1" [dependencies.gix-worktree] version = "^0.38.0" features = ["attributes"] default-features = false [dependencies.thiserror] version = "2.0.0" [features] worktree-rewrites = [ "dep:gix-dir", "dep:gix-diff", ] [target.'cfg(not(target_has_atomic = "64"))'.dependencies.portable-atomic] version = "1" [lints.clippy] bool_to_int_with_if = "allow" borrow_as_ptr = "allow" cast_lossless = "allow" cast_possible_truncation = "allow" cast_possible_wrap = "allow" cast_precision_loss = "allow" cast_sign_loss = "allow" checked_conversions = "allow" copy_iterator = "allow" default_trait_access = "allow" doc_markdown = "allow" empty_docs = "allow" enum_glob_use = "allow" explicit_deref_methods = "allow" explicit_into_iter_loop = "allow" explicit_iter_loop = "allow" filter_map_next = "allow" fn_params_excessive_bools = "allow" from_iter_instead_of_collect = "allow" if_not_else = "allow" ignored_unit_patterns = "allow" implicit_clone = "allow" inconsistent_struct_constructor = "allow" inefficient_to_string = "allow" inline_always = "allow" items_after_statements = "allow" iter_not_returning_iterator = "allow" iter_without_into_iter = "allow" manual_assert = "allow" manual_is_variant_and = "allow" manual_let_else = "allow" manual_string_new = "allow" many_single_char_names = "allow" match_bool = "allow" match_same_arms = "allow" match_wild_err_arm = "allow" match_wildcard_for_single_variants = "allow" missing_errors_doc = "allow" missing_panics_doc = "allow" module_name_repetitions = "allow" must_use_candidate = "allow" mut_mut = "allow" naive_bytecount = "allow" needless_for_each = "allow" needless_pass_by_value = "allow" needless_raw_string_hashes = "allow" no_effect_underscore_binding = "allow" option_option = "allow" range_plus_one = "allow" redundant_else = "allow" return_self_not_must_use = "allow" should_panic_without_expect = "allow" similar_names = "allow" single_match_else = "allow" stable_sort_primitive = "allow" struct_excessive_bools = "allow" struct_field_names = "allow" too_long_first_doc_paragraph = "allow" too_many_lines = "allow" transmute_ptr_to_ptr = "allow" trivially_copy_pass_by_ref = "allow" unnecessary_join = "allow" unnecessary_wraps = "allow" unreadable_literal = "allow" unused_self = "allow" used_underscore_binding = "allow" wildcard_imports = "allow" [lints.clippy.pedantic] level = "warn" priority = -1 [lints.rust] gix-status-0.16.0/Cargo.toml.orig000064400000000000000000000033511046102023000147110ustar 00000000000000lints.workspace = true [package] name = "gix-status" version = "0.16.0" repository = "https://github.com/GitoxideLabs/gitoxide" license = "MIT OR Apache-2.0" description = "A crate of the gitoxide project dealing with 'git status'-like functionality" authors = ["Sebastian Thiel ", "Pascal Kuthe "] edition = "2021" include = ["src/**/*", "LICENSE-*"] rust-version = "1.65" autotests = false [lib] doctest = false [features] ## Add support for tracking rewrites along with checking for worktree modifications. worktree-rewrites = ["dep:gix-dir", "dep:gix-diff"] [dependencies] gix-index = { version = "^0.37.0", path = "../gix-index" } gix-fs = { version = "^0.12.1", path = "../gix-fs" } gix-hash = { version = "^0.15.1", path = "../gix-hash" } gix-object = { version = "^0.46.1", path = "../gix-object" } gix-path = { version = "^0.10.13", path = "../gix-path" } gix-features = { version = "^0.39.1", path = "../gix-features", features = ["progress"] } gix-filter = { version = "^0.16.0", path = "../gix-filter" } gix-worktree = { version = "^0.38.0", path = "../gix-worktree", default-features = false, features = ["attributes"] } gix-pathspec = { version = "^0.8.1", path = "../gix-pathspec" } gix-dir = { version = "^0.11.0", path = "../gix-dir", optional = true } gix-diff = { version = "^0.49.0", path = "../gix-diff", default-features = false, features = ["blob"], optional = true } thiserror = "2.0.0" filetime = "0.2.15" bstr = { version = "1.3.0", default-features = false } document-features = { version = "0.2.0", optional = true } [target.'cfg(not(target_has_atomic = "64"))'.dependencies] portable-atomic = "1" [package.metadata.docs.rs] features = ["document-features", "worktree-rewrites"] gix-status-0.16.0/LICENSE-APACHE000064400000000000000000000247461046102023000137610ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. gix-status-0.16.0/LICENSE-MIT000064400000000000000000000017771046102023000134700ustar 00000000000000Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. gix-status-0.16.0/src/index_as_worktree/function.rs000064400000000000000000000670571046102023000205350ustar 00000000000000use std::{ io, path::Path, slice::Chunks, sync::atomic::{AtomicUsize, Ordering}, }; use bstr::BStr; use filetime::FileTime; use gix_features::parallel::{in_parallel_if, Reduce}; use gix_filter::pipeline::convert::ToGitOutcome; use gix_object::FindExt; use crate::index_as_worktree::Context; use crate::{ index_as_worktree::{ traits, traits::{read_data::Stream, CompareBlobs, SubmoduleStatus}, types::{Error, Options}, Change, Conflict, EntryStatus, Outcome, VisitEntry, }, is_dir_to_mode, AtomicU64, SymlinkCheck, }; /// Calculates the changes that need to be applied to an `index` to match the state of the `worktree` and makes them /// observable in `collector`, along with information produced by `compare` which gets to see blobs that may have changes, and /// `submodule` which can take a look at submodules in detail to produce status information (BASE version if its conflicting). /// `options` are used to configure the operation. /// /// Note `worktree` must be the root path of the worktree, not a path inside of the worktree. /// /// Note that `index` may require changes to be up-to-date with the working tree and avoid expensive computations by updating /// respective entries with stat information from the worktree, and its timestamp is adjusted to the current time for which it /// will be considered fresh. All changes that would be applied to the index are delegated to the caller, which receives these /// as [`EntryStatus`]. /// The `pathspec` is used to determine which index entries to check for status in the first place. /// /// `objects` is used to access the version of an object in the object database for direct comparison. /// /// **It's important to note that the `index` should have its [timestamp updated](gix_index::State::set_timestamp()) with a timestamp /// from just before making this call *if* [entries were updated](Outcome::entries_to_update)** /// /// ### Note /// /// Technically, this function does more as it also provides additional information, like whether a file has conflicts, /// and files that were added with `git add` are shown as a special as well. It also provides updates to entry filesystem /// stats like `git status` would if it had to determine the hash. /// If that happened, the index should be written back after updating the entries with these updated stats, see [Outcome::skipped]. /// /// Thus, some care has to be taken to do the right thing when letting the index match the worktree by evaluating the changes observed /// by the `collector`. #[allow(clippy::too_many_arguments)] pub fn index_as_worktree<'index, T, U, Find, E>( index: &'index gix_index::State, worktree: &Path, collector: &mut impl VisitEntry<'index, ContentChange = T, SubmoduleStatus = U>, compare: impl CompareBlobs + Send + Clone, submodule: impl SubmoduleStatus + Send + Clone, objects: Find, progress: &mut dyn gix_features::progress::Progress, Context { pathspec, stack, filter, should_interrupt, }: Context<'_>, options: Options, ) -> Result where T: Send, U: Send, E: std::error::Error + Send + Sync + 'static, Find: gix_object::Find + Send + Clone, { // the order is absolutely critical here we use the old timestamp to detect racy index entries // (modified at or after the last index update) during the index update we then set those // entries size to 0 (see below) to ensure they keep showing up as racy and reset the timestamp. let timestamp = index.timestamp(); let (chunk_size, thread_limit, _) = gix_features::parallel::optimize_chunk_size_and_thread_limit( 500, // just like git index.entries().len().into(), options.thread_limit, None, ); let range = index .prefixed_entries_range(pathspec.common_prefix()) .unwrap_or(0..index.entries().len()); let (entries, path_backing) = (index.entries(), index.path_backing()); let mut num_entries = entries.len(); let entry_index_offset = range.start; let entries = &entries[range]; let _span = gix_features::trace::detail!("gix_status::index_as_worktree", num_entries = entries.len(), chunk_size = chunk_size, thread_limit = ?thread_limit); let entries_skipped_by_common_prefix = num_entries - entries.len(); let (skipped_by_pathspec, skipped_by_entry_flags, symlink_metadata_calls, entries_to_update) = Default::default(); let (worktree_bytes, worktree_reads, odb_bytes, odb_reads, racy_clean) = Default::default(); num_entries = entries.len(); progress.init(entries.len().into(), gix_features::progress::count("files")); let count = progress.counter(); let new_state = { let options = &options; let (skipped_by_pathspec, skipped_by_entry_flags) = (&skipped_by_pathspec, &skipped_by_entry_flags); let (symlink_metadata_calls, entries_to_update) = (&symlink_metadata_calls, &entries_to_update); let (racy_clean, worktree_bytes) = (&racy_clean, &worktree_bytes); let (worktree_reads, odb_bytes, odb_reads) = (&worktree_reads, &odb_bytes, &odb_reads); move |_| { ( State { buf: Vec::new(), buf2: Vec::new(), attr_stack: stack, path_stack: SymlinkCheck::new(worktree.into()), timestamp, path_backing, filter, options, skipped_by_pathspec, skipped_by_entry_flags, symlink_metadata_calls, entries_to_update, racy_clean, worktree_reads, worktree_bytes, odb_reads, odb_bytes, }, compare, submodule, objects, pathspec, ) } }; in_parallel_if( || true, // TODO: heuristic: when is parallelization not worth it? Git says 500 items per thread, but to 20 threads, we can be more fine-grained though. gix_features::interrupt::Iter::new( OffsetIter { inner: entries.chunks(chunk_size), offset: entry_index_offset, }, should_interrupt, ), thread_limit, new_state, |(entry_offset, chunk_entries), (state, blobdiff, submdule, objects, pathspec)| { let all_entries = index.entries(); let mut out = Vec::new(); let mut idx = 0; while let Some(entry) = chunk_entries.get(idx) { let absolute_entry_index = entry_offset + idx; if idx == 0 && entry.stage_raw() != 0 { let offset = entry_offset.checked_sub(1).and_then(|prev_idx| { let prev_entry = &all_entries[prev_idx]; let entry_path = entry.path_in(state.path_backing); if prev_entry.stage_raw() == 0 || prev_entry.path_in(state.path_backing) != entry_path { // prev_entry (in previous chunk) does not belong to our conflict return None; } Conflict::try_from_entry(all_entries, state.path_backing, absolute_entry_index, entry_path) .map(|(_conflict, offset)| offset) }); if let Some(entries_to_skip_as_conflict_originates_in_previous_chunk) = offset { // skip current entry as it's done, along with following conflict entries idx += entries_to_skip_as_conflict_originates_in_previous_chunk + 1; continue; } } let res = state.process( all_entries, entry, absolute_entry_index, pathspec, blobdiff, submdule, objects, &mut idx, ); idx += 1; count.fetch_add(1, Ordering::Relaxed); if let Some(res) = res { out.push(res); } } out }, ReduceChange { collector, entries: index.entries(), }, )?; Ok(Outcome { entries_to_process: num_entries, entries_processed: count.load(Ordering::Relaxed), entries_skipped_by_common_prefix, entries_skipped_by_pathspec: skipped_by_pathspec.load(Ordering::Relaxed), entries_skipped_by_entry_flags: skipped_by_entry_flags.load(Ordering::Relaxed), entries_to_update: entries_to_update.load(Ordering::Relaxed), symlink_metadata_calls: symlink_metadata_calls.load(Ordering::Relaxed), racy_clean: racy_clean.load(Ordering::Relaxed), worktree_files_read: worktree_reads.load(Ordering::Relaxed), worktree_bytes: worktree_bytes.load(Ordering::Relaxed), odb_objects_read: odb_reads.load(Ordering::Relaxed), odb_bytes: odb_bytes.load(Ordering::Relaxed), }) } struct State<'a, 'b> { buf: Vec, buf2: Vec, timestamp: FileTime, /// This is the cheap stack that only assure that we don't go through symlinks. /// It's always used to get the path to perform an lstat on. path_stack: SymlinkCheck, /// This is the expensive stack that will need to check for `.gitattributes` files each time /// it changes directory. It's only used when we know we have to read a worktree file, which in turn /// requires attributes to drive the filter configuration. attr_stack: gix_worktree::Stack, filter: gix_filter::Pipeline, path_backing: &'b gix_index::PathStorageRef, options: &'a Options, skipped_by_pathspec: &'a AtomicUsize, skipped_by_entry_flags: &'a AtomicUsize, symlink_metadata_calls: &'a AtomicUsize, entries_to_update: &'a AtomicUsize, racy_clean: &'a AtomicUsize, worktree_bytes: &'a AtomicU64, worktree_reads: &'a AtomicUsize, odb_bytes: &'a AtomicU64, odb_reads: &'a AtomicUsize, } type StatusResult<'index, T, U> = Result<(&'index gix_index::Entry, usize, &'index BStr, EntryStatus), Error>; impl<'index> State<'_, 'index> { #[allow(clippy::too_many_arguments)] fn process( &mut self, entries: &'index [gix_index::Entry], entry: &'index gix_index::Entry, entry_index: usize, pathspec: &mut gix_pathspec::Search, diff: &mut impl CompareBlobs, submodule: &mut impl SubmoduleStatus, objects: &Find, outer_entry_index: &mut usize, ) -> Option> where E: std::error::Error + Send + Sync + 'static, Find: gix_object::Find, { if entry.flags.intersects( gix_index::entry::Flags::UPTODATE | gix_index::entry::Flags::SKIP_WORKTREE | gix_index::entry::Flags::ASSUME_VALID | gix_index::entry::Flags::FSMONITOR_VALID, ) { self.skipped_by_entry_flags.fetch_add(1, Ordering::Relaxed); return None; } let path = entry.path_in(self.path_backing); let is_excluded = pathspec .pattern_matching_relative_path( path, Some(entry.mode.is_submodule()), &mut |relative_path, case, is_dir, out| { self.attr_stack .set_case(case) .at_entry(relative_path, Some(is_dir_to_mode(is_dir)), objects) .map_or(false, |platform| platform.matching_attributes(out)) }, ) .map_or(true, |m| m.is_excluded()); if is_excluded { self.skipped_by_pathspec.fetch_add(1, Ordering::Relaxed); return None; } let status = if entry.stage_raw() != 0 { Ok( Conflict::try_from_entry(entries, self.path_backing, entry_index, path).map(|(conflict, offset)| { *outer_entry_index += offset; // let out loop skip over entries related to the conflict EntryStatus::Conflict(conflict) }), ) } else { self.compute_status(entry, path, diff, submodule, objects) }; match status { Ok(None) => None, Ok(Some(status)) => Some(Ok((entry, entry_index, path, status))), Err(err) => Some(Err(err)), } } /// # On how racy-git is handled here /// /// Basically the racy detection is a safety mechanism that ensures we can always just compare the stat /// information between index and worktree and if they match we don't need to look at the content. /// This usually just works but if a file updates quickly we could run into the following situation: /// /// * save file version `A` from disk into worktree (git add) /// * file is changed so fast that the mtime doesn't change - *we only looks at seconds by default* /// * file contents change but file-size stays the same, so `"foo" -> "bar"` has the same size but different content /// /// Now both `mtime` and `size`, and all other stat information, is the same but the file has actually changed. /// This case is called *racily clean*. *The file should show up as changed but due to a data race it doesn't.* /// This is the racy git problem. /// /// To solve this we do the following trick: Whenever we modify the index, which includes `git status`, we save the /// current timestamp before the modification starts. This timestamp fundamentally represents a checkpoint of sorts. /// We "promise" ourselves that after the modification finishes all entries modified before this timestamp have the /// racy git problem resolved. /// /// So now when we modify the index we must resolve the racy git problem somehow. To do that we only need to look at /// unchanged entries. Changed entries are not interesting since they are already showing up as changed anyway so there /// isn't really a race-condition to worry about. This also explains why removing the `return` here doesn't have an apparent effect. /// This entire branch here is just the optimization of "don't even look at index entries where the stat hasn't changed". /// If we don't have this optimization the result shouldn't change, our status implementation will just be super slow :D /// /// We calculate whether this change is `racy_clean`, so if the last `timestamp` is before or the same as the `mtime` of the entry /// which is what `new_stat.is_racy(..)` does in the branch, and only if we are sure that there is no race condition /// do we `return` early. Since we don't `return` early we just do a full content comparison below, /// which always yields the correct result, there is no race condition there. /// /// If a file showed up as racily clean and didn't change then we don't need to do anything. After this status check is /// complete and the file won't show up as racily clean anymore, since it's mtime is now before the new timestamp. /// However, if the file did actually change then we really ran into one of those rare race conditions in that case we, /// and git does the same, set the size of the file in the index to 0. This will always make the file show up as changed. /// This adds the need to treat all files of size 0 in the index as changed. This is not quite right of course because 0 sized files /// could be entirely valid and unchanged. Therefore this only applies if the oid doesn't match the oid of an empty file, /// which is a constant. /// /// Adapted from [here](https://github.com/GitoxideLabs/gitoxide/pull/805#discussion_r1164676777). fn compute_status( &mut self, entry: &gix_index::Entry, rela_path: &BStr, diff: &mut impl CompareBlobs, submodule: &mut impl SubmoduleStatus, objects: &Find, ) -> Result>, Error> where E: std::error::Error + Send + Sync + 'static, Find: gix_object::Find, { let worktree_path = match self.path_stack.verified_path(gix_path::from_bstr(rela_path).as_ref()) { Ok(path) => path, Err(err) if gix_fs::io_err::is_not_found(err.kind(), err.raw_os_error()) => { return Ok(Some(Change::Removed.into())) } Err(err) => return Err(err.into()), }; self.symlink_metadata_calls.fetch_add(1, Ordering::Relaxed); let metadata = match gix_index::fs::Metadata::from_path_no_follow(worktree_path) { Ok(metadata) if metadata.is_dir() => { // index entries are normally only for files/symlinks // if a file turned into a directory it was removed // the only exception here are submodules which are // part of the index despite being directories if entry.mode.is_submodule() { let status = submodule .status(entry, rela_path) .map_err(|err| Error::SubmoduleStatus { rela_path: rela_path.into(), source: Box::new(err), })?; return Ok(status.map(|status| Change::SubmoduleModification(status).into())); } else { return Ok(Some(Change::Removed.into())); } } Ok(metadata) => metadata, Err(err) if gix_fs::io_err::is_not_found(err.kind(), err.raw_os_error()) => { return Ok(Some(Change::Removed.into())) } Err(err) => { return Err(err.into()); } }; if entry.flags.contains(gix_index::entry::Flags::INTENT_TO_ADD) { return Ok(Some(EntryStatus::IntentToAdd)); } let new_stat = gix_index::entry::Stat::from_fs(&metadata)?; let executable_bit_changed = match entry .mode .change_to_match_fs(&metadata, self.options.fs.symlink, self.options.fs.executable_bit) { Some(gix_index::entry::mode::Change::Type { .. }) => return Ok(Some(Change::Type.into())), Some(gix_index::entry::mode::Change::ExecutableBit) => true, None => false, }; // Here we implement racy-git. See racy-git.txt in the git documentation for a detailed documentation. // // A file is racy if: // 1. its `mtime` is at or after the last index timestamp and its entry stat information // matches the on-disk file but the file contents are actually modified // 2. it's size is 0 (set after detecting a file was racy previously) // // The first case is detected below by checking the timestamp if the file is marked unmodified. // The second case is usually detected either because the on-disk file is not empty, hence // the basic stat match fails, or by checking whether the size doesn't fit the oid. let mut racy_clean = false; if !executable_bit_changed && new_stat.matches(&entry.stat, self.options.stat) // TODO: find a test for the following line or remove it. Is this more often hit with smudge/clean filters? && (!entry.id.is_empty_blob() || entry.stat.size == 0) { racy_clean = new_stat.is_racy(self.timestamp, self.options.stat); if !racy_clean { return Ok(None); } else { self.racy_clean.fetch_add(1, Ordering::Relaxed); } } self.buf.clear(); self.buf2.clear(); let file_size_bytes = if cfg!(windows) && metadata.is_symlink() { // symlinks on Windows seem to have a length of zero, so just pretend // they have the correct length to avoid short-cutting, and enforce a full buffer check. u64::from(entry.stat.size) } else { metadata.len() }; let fetch_data = ReadDataImpl { buf: &mut self.buf, path: worktree_path, rela_path, entry, file_len: file_size_bytes, filter: &mut self.filter, attr_stack: &mut self.attr_stack, options: self.options, id: &entry.id, objects, worktree_reads: self.worktree_reads, worktree_bytes: self.worktree_bytes, odb_reads: self.odb_reads, odb_bytes: self.odb_bytes, }; let content_change = diff.compare_blobs(entry, file_size_bytes, fetch_data, &mut self.buf2)?; // This file is racy clean! Set the size to 0 so we keep detecting this as the file is updated. if content_change.is_some() || executable_bit_changed { let set_entry_stat_size_zero = content_change.is_some() && racy_clean; Ok(Some( Change::Modification { executable_bit_changed, content_change, set_entry_stat_size_zero, } .into(), )) } else { self.entries_to_update.fetch_add(1, Ordering::Relaxed); Ok(Some(EntryStatus::NeedsUpdate(new_stat))) } } } struct ReduceChange<'a, 'index, T: VisitEntry<'index>> { collector: &'a mut T, entries: &'index [gix_index::Entry], } impl<'index, T, U, C: VisitEntry<'index, ContentChange = T, SubmoduleStatus = U>> Reduce for ReduceChange<'_, 'index, C> { type Input = Vec>; type FeedProduce = (); type Output = (); type Error = Error; fn feed(&mut self, items: Self::Input) -> Result { for item in items { let (entry, entry_index, path, status) = item?; self.collector .visit_entry(self.entries, entry, entry_index, path, status); } Ok(()) } fn finalize(self) -> Result { Ok(()) } } struct ReadDataImpl<'a, Find> where Find: gix_object::Find, { buf: &'a mut Vec, path: &'a Path, rela_path: &'a BStr, file_len: u64, entry: &'a gix_index::Entry, filter: &'a mut gix_filter::Pipeline, attr_stack: &'a mut gix_worktree::Stack, options: &'a Options, id: &'a gix_hash::oid, objects: Find, worktree_bytes: &'a AtomicU64, worktree_reads: &'a AtomicUsize, odb_bytes: &'a AtomicU64, odb_reads: &'a AtomicUsize, } impl<'a, Find> traits::ReadData<'a> for ReadDataImpl<'a, Find> where Find: gix_object::Find, { fn read_blob(self) -> Result<&'a [u8], Error> { Ok(self.objects.find_blob(self.id, self.buf).map(|b| { self.odb_reads.fetch_add(1, Ordering::Relaxed); self.odb_bytes.fetch_add(b.data.len() as u64, Ordering::Relaxed); b.data })?) } fn stream_worktree_file(self) -> Result, Error> { self.buf.clear(); // symlinks are only stored as actual symlinks if the FS supports it otherwise they are just // normal files with their content equal to the linked path (so can be read normally) // let is_symlink = self.entry.mode == gix_index::entry::Mode::SYMLINK; // TODO: what to do about precompose unicode and ignore_case for symlinks let out = if is_symlink && self.options.fs.symlink { // conversion to bstr can never fail because symlinks are only used // on unix (by git) so no reason to use the try version here let symlink_path = gix_path::to_unix_separators_on_windows(gix_path::into_bstr(std::fs::read_link(self.path).unwrap())); self.buf.extend_from_slice(&symlink_path); self.worktree_bytes.fetch_add(self.buf.len() as u64, Ordering::Relaxed); Stream { inner: ToGitOutcome::Buffer(self.buf), bytes: None, len: None, } } else { self.buf.clear(); let platform = self .attr_stack .at_entry(self.rela_path, Some(self.entry.mode), &self.objects)?; let file = std::fs::File::open(self.path)?; let out = self .filter .convert_to_git( file, self.path, &mut |_path, attrs| { platform.matching_attributes(attrs); }, &mut |buf| Ok(self.objects.find_blob(self.id, buf).map(|_| Some(()))?), ) .map_err(|err| io::Error::new(io::ErrorKind::Other, err))?; let len = match out { ToGitOutcome::Unchanged(_) => Some(self.file_len), ToGitOutcome::Process(_) | ToGitOutcome::Buffer(_) => None, }; Stream { inner: out, bytes: Some(self.worktree_bytes), len, } }; self.worktree_reads.fetch_add(1, Ordering::Relaxed); Ok(out) } } struct OffsetIter<'a, T> { inner: Chunks<'a, T>, offset: usize, } impl<'a, T> Iterator for OffsetIter<'a, T> { type Item = (usize, &'a [T]); fn next(&mut self) -> Option { let block = self.inner.next()?; let offset = self.offset; self.offset += block.len(); Some((offset, block)) } } impl Conflict { /// Given `entries` and `path_backing`, both values obtained from an [index](gix_index::State), use `start_index` and enumerate /// all conflict stages that still match `entry_path` to produce a conflict description. /// Also return the amount of extra-entries that were part of the conflict declaration (not counting the entry at `start_index`) /// /// If for some reason entry at `start_index` isn't in conflicting state, `None` is returned. pub fn try_from_entry( entries: &[gix_index::Entry], path_backing: &gix_index::PathStorageRef, start_index: usize, entry_path: &BStr, ) -> Option<(Self, usize)> { use Conflict::*; let mut mask = None::; let mut count = 0_usize; for stage in (start_index..(start_index + 3).min(entries.len())).filter_map(|idx| { let entry = &entries[idx]; let stage = entry.stage_raw(); (stage > 0 && entry.path_in(path_backing) == entry_path).then_some(stage) }) { // This could be `1 << (stage - 1)` but let's be specific. *mask.get_or_insert(0) |= match stage { 1 => 0b001, 2 => 0b010, 3 => 0b100, _ => 0, }; count += 1; } mask.map(|mask| { ( match mask { 0b001 => BothDeleted, 0b010 => AddedByUs, 0b011 => DeletedByThem, 0b100 => AddedByThem, 0b101 => DeletedByUs, 0b110 => BothAdded, 0b111 => BothModified, _ => unreachable!("BUG: bitshifts and typical entry layout doesn't allow for more"), }, count - 1, ) }) } } gix-status-0.16.0/src/index_as_worktree/mod.rs000064400000000000000000000003751046102023000174550ustar 00000000000000//! Changes between an index and a worktree. /// mod types; pub use types::{Change, Conflict, Context, EntryStatus, Error, Options, Outcome, VisitEntry}; mod recorder; pub use recorder::{Record, Recorder}; pub(super) mod function; /// pub mod traits; gix-status-0.16.0/src/index_as_worktree/recorder.rs000064400000000000000000000026341046102023000205030ustar 00000000000000use bstr::BStr; use gix_index as index; use crate::index_as_worktree::{EntryStatus, VisitEntry}; /// A record of a change. /// /// It's created either if there is a conflict or a change, or both. #[derive(Debug, Clone)] pub struct Record<'index, T, U> { /// The index entry that is changed. pub entry: &'index index::Entry, /// The index of the `entry` relative to all entries in the input index. pub entry_index: usize, /// The path to the entry. pub relative_path: &'index BStr, /// The status information itself. pub status: EntryStatus, } /// Convenience implementation of [`VisitEntry`] that collects all non-trivial changes into a `Vec`. #[derive(Debug, Default)] pub struct Recorder<'index, T = (), U = ()> { /// collected changes, index entries without conflicts or changes are excluded. pub records: Vec>, } impl<'index, T: Send, U: Send> VisitEntry<'index> for Recorder<'index, T, U> { type ContentChange = T; type SubmoduleStatus = U; fn visit_entry( &mut self, _entries: &'index [index::Entry], entry: &'index index::Entry, entry_index: usize, relative_path: &'index BStr, status: EntryStatus, ) { self.records.push(Record { entry, entry_index, relative_path, status, }); } } gix-status-0.16.0/src/index_as_worktree/traits.rs000064400000000000000000000140751046102023000202060ustar 00000000000000use std::{io::Read, sync::atomic::AtomicBool}; use bstr::BStr; use gix_hash::ObjectId; use gix_index as index; use index::Entry; use crate::index_as_worktree::Error; /// Compares the content of two blobs in some way. pub trait CompareBlobs { /// Output data produced by [`compare_blobs()`][CompareBlobs::compare_blobs()]. type Output; /// Providing the underlying index `entry`, allow comparing a file in the worktree of size `worktree_blob_size` /// and allow streaming its bytes using `data`. /// If this function returns `None` the `entry` and the worktree blob are assumed to be identical. /// Use `data` to obtain the data for the blob referred to by `entry`, allowing comparisons of the data itself. /// `buf` can be used to store additional data, and it can be assumed to be a cleared buffer. fn compare_blobs<'a, 'b>( &mut self, entry: &gix_index::Entry, worktree_blob_size: u64, data: impl ReadData<'a>, buf: &mut Vec, ) -> Result, Error>; } /// Determine the status of a submodule, which always indicates that it changed if present. pub trait SubmoduleStatus { /// The status result, describing in which way the submodule changed. type Output; /// A custom error that may occur while computing the submodule status. type Error: std::error::Error + Send + Sync + 'static; /// Compute the status of the submodule at `entry` and `rela_path`, or return `None` if no change was detected. fn status(&mut self, entry: &gix_index::Entry, rela_path: &BStr) -> Result, Self::Error>; } /// Lazy borrowed access to worktree or blob data, with streaming support for worktree files. pub trait ReadData<'a> { /// Returns the contents of this blob. /// /// This potentially performs IO and other expensive operations /// and should only be called when necessary. fn read_blob(self) -> Result<&'a [u8], Error>; /// Stream a worktree file in such a manner that its content matches what would be put into git. fn stream_worktree_file(self) -> Result, Error>; } /// pub mod read_data { use std::sync::atomic::Ordering; use gix_filter::pipeline::convert::ToGitOutcome; use crate::AtomicU64; /// A stream with worktree file data. pub struct Stream<'a> { pub(crate) inner: ToGitOutcome<'a, std::fs::File>, pub(crate) bytes: Option<&'a AtomicU64>, pub(crate) len: Option, } impl<'a> Stream<'a> { /// Return the underlying byte-buffer if there is one. /// /// If `None`, read from this instance like a stream. /// Note that this method should only be called once to assure proper accounting of the amount of bytes read. pub fn as_bytes(&self) -> Option<&'a [u8]> { self.inner.as_bytes().map(|v| { if let Some(bytes) = self.bytes { bytes.fetch_add(v.len() as u64, Ordering::Relaxed); } v }) } /// Return the size of the stream in bytes if it is known in advance. pub fn size(&self) -> Option { self.len } } impl std::io::Read for Stream<'_> { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { let n = self.inner.read(buf)?; if let Some(bytes) = self.bytes { bytes.fetch_add(n as u64, Ordering::Relaxed); } Ok(n) } } } /// Compares to blobs by comparing their size and oid, and only looks at the file if /// the size matches, therefore it's very fast. #[derive(Clone)] pub struct FastEq; impl CompareBlobs for FastEq { type Output = (); // TODO: make all streaming IOPs interruptible. fn compare_blobs<'a, 'b>( &mut self, entry: &Entry, worktree_file_size: u64, data: impl ReadData<'a>, buf: &mut Vec, ) -> Result, Error> { // make sure to account for racily smudged entries here so that they don't always keep // showing up as modified even after their contents have changed again, to a potentially // unmodified state. That means that we want to ignore stat.size == 0 for non_empty_blobs. if u64::from(entry.stat.size) != worktree_file_size && (entry.id.is_empty_blob() || entry.stat.size != 0) { return Ok(Some(())); } HashEq .compare_blobs(entry, worktree_file_size, data, buf) .map(|opt| opt.map(|_| ())) } } /// Compares files to blobs by *always* comparing their hashes. /// /// Same as [`FastEq`] but does not contain a fast path for files with mismatched files and /// therefore always returns an OID that can be reused later. #[derive(Clone)] pub struct HashEq; impl CompareBlobs for HashEq { type Output = ObjectId; fn compare_blobs<'a, 'b>( &mut self, entry: &Entry, _worktree_blob_size: u64, data: impl ReadData<'a>, buf: &mut Vec, ) -> Result, Error> { let mut stream = data.stream_worktree_file()?; match stream.as_bytes() { Some(buffer) => { let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, buffer); Ok((entry.id != file_hash).then_some(file_hash)) } None => { let file_hash = match stream.size() { None => { stream.read_to_end(buf)?; gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, buf) } Some(len) => gix_object::compute_stream_hash( entry.id.kind(), gix_object::Kind::Blob, &mut stream, len, &mut gix_features::progress::Discard, &AtomicBool::default(), )?, }; Ok((entry.id != file_hash).then_some(file_hash)) } } } } gix-status-0.16.0/src/index_as_worktree/types.rs000064400000000000000000000240501046102023000200360ustar 00000000000000use bstr::{BStr, BString}; use std::sync::atomic::AtomicBool; /// The error returned by [index_as_worktree()`](crate::index_as_worktree()). #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Could not convert path to UTF8")] IllformedUtf8, #[error("The clock was off when reading file related metadata after updating a file on disk")] Time(#[from] std::time::SystemTimeError), #[error("IO error while writing blob or reading file metadata or changing filetype")] Io(#[from] std::io::Error), #[error("Failed to obtain blob from object database")] Find(#[from] gix_object::find::existing_object::Error), #[error("Could not determine status for submodule at '{rela_path}'")] SubmoduleStatus { rela_path: BString, source: Box, }, } /// Options that control how the index status with a worktree is computed. #[derive(Clone, Default, Debug, PartialEq, Eq, Hash)] pub struct Options { /// Capabilities of the file system which affect the status computation. pub fs: gix_fs::Capabilities, /// If set, don't use more than this amount of threads. /// Otherwise, usually use as many threads as there are logical cores. /// A value of 0 is interpreted as no-limit pub thread_limit: Option, /// Options that control how stat comparisons are made when checking if a file is fresh. pub stat: gix_index::entry::stat::Options, } /// The context for [index_as_worktree()`](crate::index_as_worktree()). #[derive(Clone)] pub struct Context<'a> { /// The pathspec to limit the amount of paths that are checked. Can be empty to allow all paths. /// /// Note that these are expected to have a [common_prefix()](gix_pathspec::Search::common_prefix()) according /// to the prefix of the repository to efficiently limit the scope of the paths we process. pub pathspec: gix_pathspec::Search, /// A stack pre-configured to allow accessing attributes for each entry, as required for `filter` /// and possibly pathspecs. pub stack: gix_worktree::Stack, /// A filter to be able to perform conversions from and to the worktree format. /// /// It is needed to potentially refresh the index with data read from the worktree, which needs to be converted back /// to the form stored in Git. /// /// Note that for this to be correct, the attribute `stack` must be configured correctly as well. pub filter: gix_filter::Pipeline, /// A flag to query to learn if cancellation is requested. pub should_interrupt: &'a AtomicBool, } /// Provide additional information collected during the runtime of [`index_as_worktree()`](crate::index_as_worktree()). #[derive(Clone, Debug, Default, Eq, PartialEq, Ord, PartialOrd)] pub struct Outcome { /// The total amount of entries that is to be processed. pub entries_to_process: usize, /// The amount of entries we actually processed. If this isn't the entire set, the operation was interrupted. pub entries_processed: usize, /// The amount of entries we didn't even traverse (and thus update with stat) due to a common prefix in pathspecs. /// This is similar to the current working directory. pub entries_skipped_by_common_prefix: usize, /// The amount of entries that were skipped due to exclusion by *pathspecs*. pub entries_skipped_by_pathspec: usize, /// The amount of entries that were skipped as the entry flag indicated this. pub entries_skipped_by_entry_flags: usize, /// The amount of times we queried symlink-metadata for a file on disk. pub symlink_metadata_calls: usize, /// The amount of entries whose stats would need to be updated as its modification couldn't be determined without /// an expensive calculation. /// /// With these updates, this calculation will be avoided next time the status runs. /// Note that the stat updates are delegated to the caller. pub entries_to_update: usize, /// The amount of entries that were considered racy-clean - they will need thorough checking to see if they are truly clean, /// i.e. didn't change. pub racy_clean: usize, /// The amount of bytes read from the worktree in order to determine if an entry changed, across all files. pub worktree_bytes: u64, /// The amount of files read in full from the worktree (and into memory). pub worktree_files_read: usize, /// The amount of bytes read from the object database in order to determine if an entry changed, across all objects. pub odb_bytes: u64, /// The amount of objects read from the object database. pub odb_objects_read: usize, } impl Outcome { /// The total amount of skipped entries, i.e. those that weren't processed at all. pub fn skipped(&self) -> usize { self.entries_skipped_by_common_prefix + self.entries_skipped_by_pathspec + self.entries_skipped_by_entry_flags } } /// How an index entry needs to be changed to obtain the destination worktree state, i.e. `entry.apply(this_change) == worktree-entry`. #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] pub enum Change { /// This corresponding file does not exist in the worktree anymore. Removed, /// The type of file changed compared to the worktree. /// /// Examples include when a symlink is now a regular file, or a regular file was replaced with a named pipe. /// /// ### Deviation /// /// A change to a non-file is marked as `modification` in Git, but that's related to the content which we can't evaluate. /// Hence, a type-change is considered more appropriate. Type, /// This worktree file was modified in some form, like a permission change or content change or both, /// as compared to this entry. Modification { /// Indicates that one of the stat changes was an executable bit change /// which is a significant change itself. executable_bit_changed: bool, /// The output of the [`CompareBlobs`](crate::index_as_worktree::traits::CompareBlobs) run on this entry. /// If there is no content change and only the executable bit /// changed then this is `None`. content_change: Option, /// If true, the caller is expected to set [entry.stat.size = 0](gix_index::entry::Stat::size) to assure this /// otherwise racily clean entry can still be detected as dirty next time this is called, but this time without /// reading it from disk to hash it. It's a performance optimization and not doing so won't change the correctness /// of the operation. set_entry_stat_size_zero: bool, }, /// A submodule is initialized and checked out, and there was modification to either: /// /// * the `HEAD` as compared to the superproject's desired commit for `HEAD` /// * the worktree has at least one modified file /// * there is at least one untracked file /// /// The exact nature of the modification is handled by the caller which may retain information per submodule or /// re-compute details as needed when seeing this variant. SubmoduleModification(U), } /// Information about an entry. #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Debug)] pub enum EntryStatus { /// The entry is in a conflicting state, and we didn't collect any more information about it. Conflict(Conflict), /// There is no conflict and a change was discovered. Change(Change), /// The entry didn't change, but its state caused extra work that can be avoided next time if its stats would be updated to the /// given stat. NeedsUpdate( /// The new stats which represent what's currently in the working tree. If these replace the current stats in the entry, /// next time this operation runs we can determine the actual state much faster. gix_index::entry::Stat, ), /// An index entry that corresponds to an untracked worktree file marked with `git add --intent-to-add`. /// /// This means it's not available in the object database yet even though now an entry exists that represents the worktree file. /// The entry represents the promise of adding a new file, no matter the actual stat or content. /// Effectively this means nothing changed. /// This also means the file is still present, and that no detailed change checks were performed. IntentToAdd, } impl From> for EntryStatus { fn from(value: Change) -> Self { EntryStatus::Change(value) } } /// Describes a conflicting entry as comparison between 'our' version and 'their' version of it. /// /// If one side isn't specified, it is assumed to have modified the entry. In general, there would be no conflict /// if both parties ended up in the same state. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] pub enum Conflict { /// Both deleted a different version of the entry. BothDeleted, /// We added, they modified, ending up in different states. AddedByUs, /// They deleted the entry, we modified it. DeletedByThem, /// They added the entry, we modified it, ending up in different states. AddedByThem, /// We deleted the entry, they modified it, ending up in different states. DeletedByUs, /// Both added the entry in different states. BothAdded, /// Both modified the entry, ending up in different states. BothModified, } /// Observe the status of an entry by comparing an index entry to the worktree. pub trait VisitEntry<'index> { /// Data generated by comparing an entry with a file. type ContentChange; /// Data obtained when checking the submodule status. type SubmoduleStatus; /// Observe the `status` of `entry` at the repository-relative `rela_path` at `entry_index` /// (for accessing `entry` and surrounding in the complete list of `entries`). fn visit_entry( &mut self, entries: &'index [gix_index::Entry], entry: &'index gix_index::Entry, entry_index: usize, rela_path: &'index BStr, status: EntryStatus, ); } gix-status-0.16.0/src/index_as_worktree_with_renames/mod.rs000064400000000000000000000664611046102023000222320ustar 00000000000000//! Changes between the index and the worktree along with optional rename tracking. mod types; pub use types::{Context, DirwalkContext, Entry, Error, Options, Outcome, RewriteSource, Sorting, Summary, VisitEntry}; mod recorder; pub use recorder::Recorder; pub(super) mod function { use crate::index_as_worktree::traits::{CompareBlobs, SubmoduleStatus}; use crate::index_as_worktree_with_renames::function::rewrite::ModificationOrDirwalkEntry; use crate::index_as_worktree_with_renames::{Context, Entry, Error, Options, Outcome, RewriteSource, VisitEntry}; use crate::is_dir_to_mode; use bstr::ByteSlice; use gix_worktree::stack::State; use std::borrow::Cow; use std::path::Path; /// Similar to [`index_as_worktree(…)`](crate::index_as_worktree()), except that it will automatically /// track renames if enabled, while additionally providing information about untracked files /// (or more, depending on the configuration). /// /// * `index` /// - used for checking modifications, and also for knowing which files are tracked during /// the working-dir traversal. /// * `worktree` /// - The root of the worktree, in a format that respects `core.precomposeUnicode`. /// * `collector` /// - A [`VisitEntry`] implementation that sees the results of this operation. /// * `compare` /// - An implementation to compare two blobs for equality, used during index modification checks. /// * `submodule` /// - An implementation to determine the status of a submodule when encountered during /// index modification checks. /// * `objects` /// - A way to obtain objects from the git object database. /// * `progress` /// - A way to send progress information for the index modification checks. /// * `ctx` /// - Additional information that will be accessed during index modification checks and traversal. /// * `options` /// - a way to configure both paths of the operation. #[allow(clippy::too_many_arguments)] pub fn index_as_worktree_with_renames<'index, T, U, Find, E>( index: &'index gix_index::State, worktree: &Path, collector: &mut impl VisitEntry<'index, ContentChange = T, SubmoduleStatus = U>, compare: impl CompareBlobs + Send + Clone, submodule: impl SubmoduleStatus + Send + Clone, objects: Find, progress: &mut dyn gix_features::progress::Progress, mut ctx: Context<'_>, options: Options<'_>, ) -> Result where T: Send + Clone, U: Send + Clone, E: std::error::Error + Send + Sync + 'static, Find: gix_object::Find + gix_object::FindHeader + Send + Clone, { gix_features::parallel::threads(|scope| -> Result { let (tx, rx) = std::sync::mpsc::channel(); let walk_outcome = options .dirwalk .map(|options| { gix_features::parallel::build_thread() .name("gix_status::dirwalk".into()) .spawn_scoped(scope, { let tx = tx.clone(); let mut collect = dirwalk::Delegate { tx, should_interrupt: ctx.should_interrupt, }; let dirwalk_ctx = ctx.dirwalk; let objects = objects.clone(); let mut excludes = match ctx.resource_cache.attr_stack.state() { State::CreateDirectoryAndAttributesStack { .. } | State::AttributesStack(_) => None, State::AttributesAndIgnoreStack { .. } | State::IgnoreStack(_) => { Some(ctx.resource_cache.attr_stack.clone()) } }; let mut pathspec_attr_stack = ctx .pathspec .patterns() .any(|p| !p.attributes.is_empty()) .then(|| ctx.resource_cache.attr_stack.clone()); let mut pathspec = ctx.pathspec.clone(); move || -> Result<_, Error> { gix_dir::walk( worktree, gix_dir::walk::Context { should_interrupt: Some(ctx.should_interrupt), git_dir_realpath: dirwalk_ctx.git_dir_realpath, current_dir: dirwalk_ctx.current_dir, index, ignore_case_index_lookup: dirwalk_ctx.ignore_case_index_lookup, pathspec: &mut pathspec, pathspec_attributes: &mut |relative_path, case, is_dir, out| { let stack = pathspec_attr_stack .as_mut() .expect("can only be called if attributes are used in patterns"); stack .set_case(case) .at_entry(relative_path, Some(is_dir_to_mode(is_dir)), &objects) .map_or(false, |platform| platform.matching_attributes(out)) }, excludes: excludes.as_mut(), objects: &objects, explicit_traversal_root: Some(worktree), }, options, &mut collect, ) .map_err(Error::DirWalk) } }) .map_err(Error::SpawnThread) }) .transpose()?; let entries = &index.entries()[index .prefixed_entries_range(ctx.pathspec.common_prefix()) .unwrap_or(0..index.entries().len())]; let filter = options.rewrites.is_some().then(|| { ( ctx.resource_cache.filter.worktree_filter.clone(), ctx.resource_cache.attr_stack.clone(), ) }); let tracked_modifications_outcome = gix_features::parallel::build_thread() .name("gix_status::index_as_worktree".into()) .spawn_scoped(scope, { let mut collect = tracked_modifications::Delegate { tx }; let objects = objects.clone(); let stack = ctx.resource_cache.attr_stack.clone(); let filter = ctx.resource_cache.filter.worktree_filter.clone(); move || -> Result<_, Error> { crate::index_as_worktree( index, worktree, &mut collect, compare, submodule, objects, progress, crate::index_as_worktree::Context { pathspec: ctx.pathspec, stack, filter, should_interrupt: ctx.should_interrupt, }, options.tracked_file_modifications, ) .map_err(Error::TrackedFileModifications) } }) .map_err(Error::SpawnThread)?; let tracker = options .rewrites .map(gix_diff::rewrites::Tracker::>::new) .zip(filter); let rewrite_outcome = match tracker { Some((mut tracker, (mut filter, mut attrs))) => { let mut entries_for_sorting = options.sorting.map(|_| Vec::new()); let mut buf = Vec::new(); for event in rx { let (change, location) = match event { Event::IndexEntry(record) => { let location = Cow::Borrowed(record.relative_path); (ModificationOrDirwalkEntry::Modification(record), location) } Event::DirEntry(entry, collapsed_directory_status) => { let location = Cow::Owned(entry.rela_path.clone()); ( ModificationOrDirwalkEntry::DirwalkEntry { id: rewrite::calculate_worktree_id( options.object_hash, worktree, entry.disk_kind, entry.rela_path.as_bstr(), &mut filter, &mut attrs, &objects, &mut buf, ctx.should_interrupt, )?, entry, collapsed_directory_status, }, location, ) } }; if let Some(v) = entries_for_sorting.as_mut() { v.push((change, location)); } else if let Some(change) = tracker.try_push_change(change, location.as_ref()) { collector.visit_entry(rewrite::change_to_entry(change, entries)); } } let mut entries_for_sorting = entries_for_sorting.map(|mut v| { v.sort_by(|a, b| a.1.cmp(&b.1)); let mut remaining = Vec::new(); for (change, location) in v { if let Some(change) = tracker.try_push_change(change, location.as_ref()) { remaining.push(rewrite::change_to_entry(change, entries)); } } remaining }); let outcome = tracker.emit( |dest, src| { match src { None => { let entry = rewrite::change_to_entry(dest.change, entries); if let Some(v) = entries_for_sorting.as_mut() { v.push(entry); } else { collector.visit_entry(entry); } } Some(src) => { let ModificationOrDirwalkEntry::DirwalkEntry { id, entry, collapsed_directory_status, } = dest.change else { unreachable!("BUG: only possible destinations are dirwalk entries (additions)"); }; let source = match src.change { ModificationOrDirwalkEntry::Modification(record) => { RewriteSource::RewriteFromIndex { index_entries: entries, source_entry: record.entry, source_entry_index: record.entry_index, source_rela_path: record.relative_path, source_status: record.status.clone(), } } ModificationOrDirwalkEntry::DirwalkEntry { id, entry, collapsed_directory_status, } => RewriteSource::CopyFromDirectoryEntry { source_dirwalk_entry: entry.clone(), source_dirwalk_entry_collapsed_directory_status: *collapsed_directory_status, source_dirwalk_entry_id: *id, }, }; let entry = Entry::Rewrite { source, dirwalk_entry: entry, dirwalk_entry_collapsed_directory_status: collapsed_directory_status, dirwalk_entry_id: id, diff: src.diff, copy: src.kind == gix_diff::rewrites::tracker::visit::SourceKind::Copy, }; if let Some(v) = entries_for_sorting.as_mut() { v.push(entry); } else { collector.visit_entry(entry); } } } gix_diff::tree::visit::Action::Continue }, &mut ctx.resource_cache, &objects, |_cb| { // NOTE: to make this work, we'd want to wait the index modification check to complete. // Then it's possible to efficiently emit the tracked files along with what we already sent, // i.e. untracked and ignored files. gix_features::trace::debug!("full-tree copy tracking isn't currently supported"); Ok::<_, std::io::Error>(()) }, )?; if let Some(mut v) = entries_for_sorting { v.sort_by(|a, b| a.destination_rela_path().cmp(b.destination_rela_path())); for entry in v { collector.visit_entry(entry); } } Some(outcome) } None => { let mut entries_for_sorting = options.sorting.map(|_| Vec::new()); for event in rx { let entry = match event { Event::IndexEntry(record) => Entry::Modification { entries, entry: record.entry, entry_index: record.entry_index, rela_path: record.relative_path, status: record.status, }, Event::DirEntry(entry, collapsed_directory_status) => Entry::DirectoryContents { entry, collapsed_directory_status, }, }; if let Some(v) = entries_for_sorting.as_mut() { v.push(entry); } else { collector.visit_entry(entry); } } if let Some(mut v) = entries_for_sorting { v.sort_by(|a, b| a.destination_rela_path().cmp(b.destination_rela_path())); for entry in v { collector.visit_entry(entry); } } None } }; let walk_outcome = walk_outcome .map(|handle| handle.join().expect("no panic")) .transpose()?; let tracked_modifications_outcome = tracked_modifications_outcome.join().expect("no panic")?; Ok(Outcome { dirwalk: walk_outcome.map(|t| t.0), tracked_file_modification: tracked_modifications_outcome, rewrites: rewrite_outcome, }) }) } enum Event<'index, T, U> { IndexEntry(crate::index_as_worktree::Record<'index, T, U>), DirEntry(gix_dir::Entry, Option), } mod tracked_modifications { use crate::index_as_worktree::{EntryStatus, Record}; use crate::index_as_worktree_with_renames::function::Event; use bstr::BStr; use gix_index::Entry; pub(super) struct Delegate<'index, T, U> { pub(super) tx: std::sync::mpsc::Sender>, } impl<'index, T, U> crate::index_as_worktree::VisitEntry<'index> for Delegate<'index, T, U> { type ContentChange = T; type SubmoduleStatus = U; fn visit_entry( &mut self, _entries: &'index [Entry], entry: &'index Entry, entry_index: usize, rela_path: &'index BStr, status: EntryStatus, ) { self.tx .send(Event::IndexEntry(Record { entry, entry_index, relative_path: rela_path, status, })) .ok(); } } } mod dirwalk { use super::Event; use gix_dir::entry::Status; use gix_dir::walk::Action; use gix_dir::EntryRef; use std::sync::atomic::{AtomicBool, Ordering}; pub(super) struct Delegate<'index, 'a, T, U> { pub(super) tx: std::sync::mpsc::Sender>, pub(super) should_interrupt: &'a AtomicBool, } impl gix_dir::walk::Delegate for Delegate<'_, '_, T, U> { fn emit(&mut self, entry: EntryRef<'_>, collapsed_directory_status: Option) -> Action { // Status never shows untracked entries of untrackable type if entry.disk_kind != Some(gix_dir::entry::Kind::Untrackable) { let entry = entry.to_owned(); self.tx.send(Event::DirEntry(entry, collapsed_directory_status)).ok(); } if self.should_interrupt.load(Ordering::Relaxed) { Action::Cancel } else { Action::Continue } } } } mod rewrite { use crate::index_as_worktree::{Change, EntryStatus}; use crate::index_as_worktree_with_renames::{Entry, Error}; use bstr::BStr; use gix_diff::rewrites::tracker::ChangeKind; use gix_diff::tree::visit::Relation; use gix_dir::entry::Kind; use gix_filter::pipeline::convert::ToGitOutcome; use gix_hash::oid; use gix_object::tree::EntryMode; use std::io::Read; use std::path::Path; #[derive(Clone)] pub enum ModificationOrDirwalkEntry<'index, T, U> where T: Clone, U: Clone, { Modification(crate::index_as_worktree::Record<'index, T, U>), DirwalkEntry { id: gix_hash::ObjectId, entry: gix_dir::Entry, collapsed_directory_status: Option, }, } impl gix_diff::rewrites::tracker::Change for ModificationOrDirwalkEntry<'_, T, U> where T: Clone, U: Clone, { fn id(&self) -> &oid { match self { ModificationOrDirwalkEntry::Modification(m) => &m.entry.id, ModificationOrDirwalkEntry::DirwalkEntry { id, .. } => id, } } fn relation(&self) -> Option { // TODO: figure out if index or worktree can provide containerization - worktree should be possible. // index would take some processing. None } fn kind(&self) -> ChangeKind { match self { ModificationOrDirwalkEntry::Modification(m) => match &m.status { EntryStatus::Conflict(_) | EntryStatus::IntentToAdd | EntryStatus::NeedsUpdate(_) => { ChangeKind::Modification } EntryStatus::Change(c) => match c { Change::Removed => ChangeKind::Deletion, Change::Type | Change::Modification { .. } | Change::SubmoduleModification(_) => { ChangeKind::Modification } }, }, ModificationOrDirwalkEntry::DirwalkEntry { .. } => ChangeKind::Addition, } } fn entry_mode(&self) -> EntryMode { match self { ModificationOrDirwalkEntry::Modification(c) => c.entry.mode.to_tree_entry_mode(), ModificationOrDirwalkEntry::DirwalkEntry { entry, .. } => entry.disk_kind.map(|kind| { match kind { Kind::Untrackable => { // Trees are never tracked for rewrites, so we 'pretend'. gix_object::tree::EntryKind::Tree } Kind::File => gix_object::tree::EntryKind::Blob, Kind::Symlink => gix_object::tree::EntryKind::Link, Kind::Repository | Kind::Directory => gix_object::tree::EntryKind::Tree, } .into() }), } .unwrap_or(gix_object::tree::EntryKind::Blob.into()) } fn id_and_entry_mode(&self) -> (&oid, EntryMode) { (self.id(), self.entry_mode()) } } /// Note that for non-files, we always return a null-sha and assume that the rename-tracking /// does nothing for these anyway. #[allow(clippy::too_many_arguments)] pub(super) fn calculate_worktree_id( object_hash: gix_hash::Kind, worktree_root: &Path, disk_kind: Option, rela_path: &BStr, filter: &mut gix_filter::Pipeline, attrs: &mut gix_worktree::Stack, objects: &dyn gix_object::Find, buf: &mut Vec, should_interrupt: &std::sync::atomic::AtomicBool, ) -> Result { let Some(kind) = disk_kind else { return Ok(object_hash.null()); }; Ok(match kind { Kind::Untrackable => { // Go along with unreadable files, they are passed along without rename tracking. return Ok(object_hash.null()); } Kind::File => { let platform = attrs .at_entry(rela_path, None, objects) .map_err(Error::SetAttributeContext)?; let rela_path = gix_path::from_bstr(rela_path); let file_path = worktree_root.join(rela_path.as_ref()); let file = std::fs::File::open(&file_path).map_err(Error::OpenWorktreeFile)?; let out = filter.convert_to_git( file, rela_path.as_ref(), &mut |_path, attrs| { platform.matching_attributes(attrs); }, &mut |_buf| Ok(None), )?; match out { ToGitOutcome::Unchanged(mut file) => gix_object::compute_stream_hash( object_hash, gix_object::Kind::Blob, &mut file, file_path.metadata().map_err(Error::OpenWorktreeFile)?.len(), &mut gix_features::progress::Discard, should_interrupt, ) .map_err(Error::HashFile)?, ToGitOutcome::Buffer(buf) => gix_object::compute_hash(object_hash, gix_object::Kind::Blob, buf), ToGitOutcome::Process(mut stream) => { buf.clear(); stream.read_to_end(buf).map_err(Error::HashFile)?; gix_object::compute_hash(object_hash, gix_object::Kind::Blob, buf) } } } Kind::Symlink => { let path = worktree_root.join(gix_path::from_bstr(rela_path)); let target = gix_path::into_bstr(std::fs::read_link(path).map_err(Error::ReadLink)?); gix_object::compute_hash(object_hash, gix_object::Kind::Blob, &target) } Kind::Directory | Kind::Repository => object_hash.null(), }) } #[inline] pub(super) fn change_to_entry<'index, T, U>( change: ModificationOrDirwalkEntry<'index, T, U>, entries: &'index [gix_index::Entry], ) -> Entry<'index, T, U> where T: Clone, U: Clone, { match change { ModificationOrDirwalkEntry::Modification(r) => Entry::Modification { entries, entry: r.entry, entry_index: r.entry_index, rela_path: r.relative_path, status: r.status, }, ModificationOrDirwalkEntry::DirwalkEntry { id: _, entry, collapsed_directory_status, } => Entry::DirectoryContents { entry, collapsed_directory_status, }, } } } } gix-status-0.16.0/src/index_as_worktree_with_renames/recorder.rs000064400000000000000000000011051046102023000232400ustar 00000000000000use crate::index_as_worktree_with_renames::{Entry, VisitEntry}; /// Convenience implementation of [`VisitEntry`] that collects all changes into a `Vec`. #[derive(Debug, Default)] pub struct Recorder<'index, T = (), U = ()> { /// The collected changes. pub records: Vec>, } impl<'index, T: Send, U: Send> VisitEntry<'index> for Recorder<'index, T, U> { type ContentChange = T; type SubmoduleStatus = U; fn visit_entry(&mut self, entry: Entry<'index, Self::ContentChange, Self::SubmoduleStatus>) { self.records.push(entry); } } gix-status-0.16.0/src/index_as_worktree_with_renames/types.rs000064400000000000000000000470511046102023000226110ustar 00000000000000use crate::index_as_worktree::{Change, EntryStatus}; use bstr::{BStr, ByteSlice}; use std::sync::atomic::AtomicBool; /// The error returned by [index_as_worktree_with_renames()`](crate::index_as_worktree_with_renames()). #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error(transparent)] TrackedFileModifications(#[from] crate::index_as_worktree::Error), #[error(transparent)] DirWalk(gix_dir::walk::Error), #[error(transparent)] SpawnThread(std::io::Error), #[error("Failed to change the context for querying gitattributes to the respective path")] SetAttributeContext(std::io::Error), #[error("Could not open worktree file for reading")] OpenWorktreeFile(std::io::Error), #[error(transparent)] HashFile(std::io::Error), #[error("Could not read worktree link content")] ReadLink(std::io::Error), #[error(transparent)] ConvertToGit(#[from] gix_filter::pipeline::convert::to_git::Error), #[error(transparent)] RewriteTracker(#[from] gix_diff::rewrites::tracker::emit::Error), } /// The way all output should be sorted. #[derive(Clone, Copy, Default, Debug, Eq, PartialEq, PartialOrd, Ord, Hash)] pub enum Sorting { /// The entries are sorted by their path in a case-sensitive fashion. #[default] ByPathCaseSensitive, } /// Provide additional information collected during the runtime of [`index_as_worktree_with_renames()`](crate::index_as_worktree_with_renames()). #[derive(Clone, Debug, Default, PartialEq)] pub struct Outcome { /// The outcome of the modification check of tracked files. pub tracked_file_modification: crate::index_as_worktree::Outcome, /// The outcome of the directory walk, or `None` if its [options](Options::dirwalk) also weren't present which means /// the dirwalk never ran. pub dirwalk: Option, /// The result of the rewrite operation, if [rewrites were configured](Options::rewrites). pub rewrites: Option, } /// Either an index entry for renames or another directory entry in case of copies. #[derive(Clone, PartialEq, Debug)] pub enum RewriteSource<'index, ContentChange, SubmoduleStatus> { /// The source originates in the index and is detected as missing in the working tree. /// This can also happen for copies. RewriteFromIndex { /// All entries in the index. index_entries: &'index [gix_index::Entry], /// The entry that is the source of the rewrite, which means it was removed on disk, /// equivalent to [Change::Removed](crate::index_as_worktree::Change::Removed). /// /// Note that the [entry-id](gix_index::Entry::id) is the content-id of the source of the rewrite. source_entry: &'index gix_index::Entry, /// The index of the `source_entry` for lookup in `index_entries` - useful to look at neighbors. source_entry_index: usize, /// The repository-relative path of the `source_entry`. source_rela_path: &'index BStr, /// The computed status of the `source_entry`. source_status: EntryStatus, }, /// This source originates in the directory tree and is always the source of copies. CopyFromDirectoryEntry { /// The source of the copy operation, which is also an entry of the directory walk. /// /// Note that its [`rela_path`](gix_dir::EntryRef::rela_path) is the source of the rewrite. source_dirwalk_entry: gix_dir::Entry, /// `collapsed_directory_status` is `Some(dir_status)` if this `source_dirwalk_entry` was part of a directory with the given /// `dir_status` that wasn't the same as the one of `source_dirwalk_entry` and if [gix_dir::walk::Options::emit_collapsed] was /// [CollapsedEntriesEmissionMode::OnStatusMismatch](gix_dir::walk::CollapsedEntriesEmissionMode::OnStatusMismatch). /// It will also be `Some(dir_status)` if that option was [CollapsedEntriesEmissionMode::All](gix_dir::walk::CollapsedEntriesEmissionMode::All). source_dirwalk_entry_collapsed_directory_status: Option, /// The object id as it would appear if the entry was written to the object database. /// It's the same as `dirwalk_entry_id`, or `diff` is `Some(_)` to indicate that the copy was determined by similarity. source_dirwalk_entry_id: gix_hash::ObjectId, }, } /// An 'entry' in the sense of a merge of modified tracked files and results from a directory walk. #[derive(Clone, PartialEq, Debug)] pub enum Entry<'index, ContentChange, SubmoduleStatus> { /// A tracked file was modified, and index-specific information is passed. Modification { /// All entries in the index. entries: &'index [gix_index::Entry], /// The entry with modifications. entry: &'index gix_index::Entry, /// The index of the `entry` for lookup in `entries` - useful to look at neighbors. entry_index: usize, /// The repository-relative path of the entry. rela_path: &'index BStr, /// The computed status of the entry. status: EntryStatus, }, /// An entry returned by the directory walk, without any relation to the index. /// /// This can happen if ignored files are returned as well, or if rename-tracking is disabled. DirectoryContents { /// The entry found during the disk traversal. entry: gix_dir::Entry, /// `collapsed_directory_status` is `Some(dir_status)` if this `entry` was part of a directory with the given /// `dir_status` that wasn't the same as the one of `entry` and if [gix_dir::walk::Options::emit_collapsed] was /// [CollapsedEntriesEmissionMode::OnStatusMismatch](gix_dir::walk::CollapsedEntriesEmissionMode::OnStatusMismatch). /// It will also be `Some(dir_status)` if that option was [CollapsedEntriesEmissionMode::All](gix_dir::walk::CollapsedEntriesEmissionMode::All). collapsed_directory_status: Option, }, /// The rewrite tracking discovered a match between a deleted and added file, and considers them equal enough, /// depending on the tracker settings. /// /// Note that the source of the rewrite is always the index as it detects the absence of entries, something that /// can't be done during a directory walk. Rewrite { /// The source of the rewrite operation. source: RewriteSource<'index, ContentChange, SubmoduleStatus>, /// The untracked entry found during the disk traversal, the destination of the rewrite. /// /// Note that its [`rela_path`](gix_dir::EntryRef::rela_path) is the destination of the rewrite, and the current /// location of the entry. dirwalk_entry: gix_dir::Entry, /// `collapsed_directory_status` is `Some(dir_status)` if this `dirwalk_entry` was part of a directory with the given /// `dir_status` that wasn't the same as the one of `dirwalk_entry` and if [gix_dir::walk::Options::emit_collapsed] was /// [CollapsedEntriesEmissionMode::OnStatusMismatch](gix_dir::walk::CollapsedEntriesEmissionMode::OnStatusMismatch). /// It will also be `Some(dir_status)` if that option was [CollapsedEntriesEmissionMode::All](gix_dir::walk::CollapsedEntriesEmissionMode::All). dirwalk_entry_collapsed_directory_status: Option, /// The object id after the rename, specifically hashed in order to determine equality. dirwalk_entry_id: gix_hash::ObjectId, /// It's `None` if the 'source.id' is equal to `dirwalk_entry_id`, as identity made an actual diff computation unnecessary. /// Otherwise, and if enabled, it's `Some(stats)` to indicate how similar both entries were. diff: Option, /// If true, this rewrite is created by copy, and 'source.id' is pointing to its source. /// Otherwise, it's a rename, and 'source.id' points to a deleted object, /// as renames are tracked as deletions and additions of the same or similar content. copy: bool, }, } /// An easy to grasp summary of the changes of the worktree compared to the index. #[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord, Hash)] pub enum Summary { /// An entry exists in the index but doesn't in the worktree. Removed, /// A file exists in the worktree but doesn't have a corresponding entry in the index. /// /// In a `git status`, this would be an untracked file. Added, /// A file or submodule was modified, compared to the state recorded in the index. /// On Unix, the change of executable bit also counts as modification. /// /// If the modification is a submodule, it could also stem from various other factors, like /// having modified or untracked files, or changes in the index. Modified, /// The type of the entry in the worktree changed compared to the index. /// /// This can happen if a file in the worktree now is a directory, or a symlink, for example. TypeChange, /// A match between an entry in the index and a differently named file in the worktree was detected, /// considering the index the source of a rename operation, and the worktree file the destination. /// /// Note that the renamed file may also have been modified, but is considered similar enough. /// /// To obtain this state, rewrite-tracking must have been enabled, as otherwise the source would be /// considered `Removed` and the destination would be considered `Added`. Renamed, /// A match between an entry in the index and a differently named file in the worktree was detected, /// considering the index the source of the copy of a worktree file. /// /// Note that the copied file may also have been modified, but is considered similar enough. /// /// To obtain this state, rewrite-and-copy-tracking must have been enabled, as otherwise the source would be /// considered `Removed` and the destination would be considered `Added`. Copied, /// An index entry with a corresponding worktree file that corresponds to an untracked worktree /// file marked with `git add --intent-to-add`. /// /// This means it's not available in the object database yet even though now an entry exists /// that represents the worktree file. /// The entry represents the promise of adding a new file, no matter the actual stat or content. /// Effectively this means nothing changed. /// This also means the file is still present, and that no detailed change checks were performed. IntentToAdd, /// Describes a conflicting entry in the index, which also means that /// no further comparison to the worktree file was performed. /// /// As this variant only describes the state of the index, the corresponding worktree file may /// or may not exist. Conflict, } /// Access impl RewriteSource<'_, ContentChange, SubmoduleStatus> { /// The repository-relative path of this source. pub fn rela_path(&self) -> &BStr { match self { RewriteSource::RewriteFromIndex { source_rela_path, .. } => source_rela_path, RewriteSource::CopyFromDirectoryEntry { source_dirwalk_entry, .. } => source_dirwalk_entry.rela_path.as_bstr(), } } } /// Access impl Entry<'_, ContentChange, SubmoduleStatus> { /// Return a summary of the entry as digest of its status, or `None` if this entry is /// created from the directory walk and is *not untracked*, or if it is merely to communicate /// a needed update to the index entry. pub fn summary(&self) -> Option { Some(match self { Entry::Modification { status: EntryStatus::Conflict(_), .. } => Summary::Conflict, Entry::Modification { status: EntryStatus::IntentToAdd, .. } => Summary::IntentToAdd, Entry::Modification { status: EntryStatus::NeedsUpdate(_), .. } => return None, Entry::Modification { status: EntryStatus::Change(change), .. } => match change { Change::SubmoduleModification(_) | Change::Modification { .. } => Summary::Modified, Change::Type => Summary::TypeChange, Change::Removed => Summary::Removed, }, Entry::DirectoryContents { entry, .. } => { if matches!(entry.status, gix_dir::entry::Status::Untracked) { Summary::Added } else { return None; } } Entry::Rewrite { copy, .. } => { if *copy { Summary::Copied } else { Summary::Renamed } } }) } /// The repository-relative path at which the source of a rewrite is located. /// /// If this isn't a rewrite, the path is the location of the entry itself. pub fn source_rela_path(&self) -> &BStr { match self { Entry::Modification { rela_path, .. } => rela_path, Entry::DirectoryContents { entry, .. } => entry.rela_path.as_bstr(), Entry::Rewrite { source, .. } => source.rela_path(), } } /// The repository-relative path at which the destination of a rewrite is located. /// /// If this isn't a rewrite, the path is the location of the entry itself. pub fn destination_rela_path(&self) -> &BStr { match self { Entry::Modification { rela_path, .. } => rela_path, Entry::DirectoryContents { entry, .. } => entry.rela_path.as_bstr(), Entry::Rewrite { dirwalk_entry, .. } => dirwalk_entry.rela_path.as_bstr(), } } } /// Options for use in [index_as_worktree_with_renames()](crate::index_as_worktree_with_renames()). #[derive(Clone, Default)] pub struct Options<'a> { /// The way all output should be sorted. /// /// If `None`, and depending on the `rewrites` field, output will be immediate but the output order /// isn't determined, and may differ between two runs. `rewrites` also depend on the order of entries that /// are presented to it, hence for deterministic results, sorting needs to be enabled. /// /// If `Some(_)`, all entries are collected beforehand, so they can be sorted before outputting any of them /// to the user. /// /// If immediate output of entries in any order is desired, this should be `None`, /// along with `rewrites` being `None` as well. pub sorting: Option, /// The kind of hash to create when hashing worktree entries. pub object_hash: gix_hash::Kind, /// Options to configure how modifications to tracked files should be obtained. pub tracked_file_modifications: crate::index_as_worktree::Options, /// Options to control the directory walk that informs about untracked files. /// /// Note that we forcefully disable emission of tracked files to avoid any overlap /// between emissions to indicate modifications, and those that are obtained by /// the directory walk. /// /// If `None`, the directory walk portion will not run at all, yielding data similar /// to a bare [index_as_worktree()](crate::index_as_worktree()) call. pub dirwalk: Option>, /// The configuration for the rewrite tracking. Note that if set, the [`dirwalk`](Self::dirwalk) should be configured /// to *not* collapse untracked and ignored entries, as rewrite tracking is on a file-by-file basis. /// Also note that when `Some(_)`, it will collect certain changes depending on the exact configuration, which typically increases /// the latency until the first entries are received. Note that some entries are never candidates for renames, which means /// they are forwarded to the caller right away. /// /// If `None`, no tracking will occur, which means that all output becomes visible to the delegate immediately. pub rewrites: Option, } /// The context for [index_as_worktree_with_renames()`](crate::index_as_worktree_with_renames()). pub struct Context<'a> { /// The pathspec to limit the amount of paths that are checked. Can be empty to allow all paths. /// /// Note that these are expected to have a [common_prefix()](gix_pathspec::Search::common_prefix()) according /// to the prefix of the repository to efficiently limit the scope of the paths we process, both for the /// index modifications as well as for the directory walk. pub pathspec: gix_pathspec::Search, /// A fully-configured platform capable of producing diffable buffers similar to what Git would do, for use /// with rewrite tracking. /// /// Note that it contains resources that are additionally used here: /// /// * `attr_stack` /// - A stack pre-configured to allow accessing attributes for each entry, as required for `filter` /// and possibly pathspecs. /// It *may* also allow accessing `.gitignore` information for use in the directory walk. /// If no excludes information is present, the directory walk will identify ignored files as untracked, which /// might be desirable under certain circumstances. /// * `filter` /// - A filter to be able to perform conversions from and to the worktree format. /// It is needed to potentially refresh the index with data read from the worktree, which needs to be converted back /// to the form stored in Git. pub resource_cache: gix_diff::blob::Platform, /// A flag to query to learn if cancellation is requested. pub should_interrupt: &'a AtomicBool, /// The context for the directory walk. pub dirwalk: DirwalkContext<'a>, } /// All information that is required to perform a [dirwalk](gix_dir::walk()). pub struct DirwalkContext<'a> { /// The `git_dir` of the parent repository, after a call to [`gix_path::realpath()`]. /// /// It's used to help us differentiate our own `.git` directory from nested unrelated repositories, /// which is needed if `core.worktree` is used to nest the `.git` directory deeper within. pub git_dir_realpath: &'a std::path::Path, /// The current working directory as returned by `gix_fs::current_dir()` to assure it respects `core.precomposeUnicode`. /// It's used to produce the realpath of the git-dir of a repository candidate to assure it's not our own repository. pub current_dir: &'a std::path::Path, /// A utility to lookup index entries faster, and deal with ignore-case handling. /// /// Must be set if [`ignore_case`](gix_dir::walk::Options::ignore_case) is `true`, or else some entries won't be found if their case is different. /// /// [Read more in `gix-dir`](gix_dir::walk::Context::ignore_case_index_lookup). pub ignore_case_index_lookup: Option<&'a gix_index::AccelerateLookup<'a>>, } /// Observe the status of an entry by comparing an index entry to the worktree, along /// with potential directory walk results. pub trait VisitEntry<'a> { /// Data generated by comparing an entry with a file. type ContentChange; /// Data obtained when checking the submodule status. type SubmoduleStatus; /// Observe the `status` of `entry` at the repository-relative `rela_path` at `entry_index` /// (for accessing `entry` and surrounding in the complete list of `entries`). fn visit_entry(&mut self, entry: Entry<'a, Self::ContentChange, Self::SubmoduleStatus>); } gix-status-0.16.0/src/lib.rs000064400000000000000000000027071046102023000137310ustar 00000000000000//! This crate includes the various diffs `git` can do between different representations //! of the repository state, like comparisons between… //! //! * index and working tree //! * index and tree //! * find untracked files //! //! While also being able to check check if the working tree is dirty, quickly. //! //! ### Feature Flags #![cfg_attr( all(doc, feature = "document-features"), doc = ::document_features::document_features!() )] #![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg, doc_auto_cfg))] #![deny(missing_docs, rust_2018_idioms, unsafe_code)] #[cfg(target_has_atomic = "64")] use std::sync::atomic::AtomicU64; #[cfg(not(target_has_atomic = "64"))] use portable_atomic::AtomicU64; pub mod index_as_worktree; pub use index_as_worktree::function::index_as_worktree; #[cfg(feature = "worktree-rewrites")] pub mod index_as_worktree_with_renames; #[cfg(feature = "worktree-rewrites")] pub use index_as_worktree_with_renames::function::index_as_worktree_with_renames; /// A stack that validates we are not going through a symlink in a way that is read-only. /// /// It can efficiently validate paths when these are queried in sort-order, which leads to each component /// to only be checked once. pub struct SymlinkCheck { inner: gix_fs::Stack, } mod stack; fn is_dir_to_mode(is_dir: bool) -> gix_index::entry::Mode { if is_dir { gix_index::entry::Mode::DIR } else { gix_index::entry::Mode::FILE } } gix-status-0.16.0/src/stack.rs000064400000000000000000000036711046102023000142710ustar 00000000000000use std::path::{Path, PathBuf}; use gix_fs::Stack; use crate::SymlinkCheck; impl SymlinkCheck { /// Create a new stack that starts operating at `root`. pub fn new(root: PathBuf) -> Self { Self { inner: gix_fs::Stack::new(root), } } /// Return a valid filesystem path located in our root by appending `relative_path`, which is guaranteed to /// not pass through a symbolic link. That way the caller can be sure to not be misled by an attacker that /// tries to make us reach outside of the repository. /// /// Note that the file pointed to by `relative_path` may still be a symbolic link, or not exist at all, /// and that an error may also be produced if directories on the path leading to the leaf /// component of `relative_path` are missing. /// /// ### Note /// /// On windows, no verification is performed, instead only the combined path is provided as usual. pub fn verified_path(&mut self, relative_path: &Path) -> std::io::Result<&Path> { self.inner.make_relative_path_current(relative_path, &mut Delegate)?; Ok(self.inner.current()) } } struct Delegate; impl gix_fs::stack::Delegate for Delegate { fn push_directory(&mut self, _stack: &Stack) -> std::io::Result<()> { Ok(()) } #[cfg_attr(windows, allow(unused_variables))] fn push(&mut self, is_last_component: bool, stack: &Stack) -> std::io::Result<()> { #[cfg(windows)] { Ok(()) } #[cfg(not(windows))] { if is_last_component { return Ok(()); } if stack.current().symlink_metadata()?.is_symlink() { return Err(std::io::Error::new( std::io::ErrorKind::Other, "Cannot step through symlink to perform an lstat", )); } Ok(()) } } fn pop_directory(&mut self) {} }