gix-features-0.36.1/.cargo_vcs_info.json0000644000000001520000000000100135240ustar { "git": { "sha1": "55d386a2448aba1dd22c73fb63b3fd5b3a8401c9" }, "path_in_vcs": "gix-features" }gix-features-0.36.1/Cargo.toml0000644000000071010000000000100115230ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" rust-version = "1.65" name = "gix-features" version = "0.36.1" authors = ["Sebastian Thiel "] include = [ "src/**/*", "LICENSE-*", ] description = "A crate to integrate various capabilities using compile-time feature flags" license = "MIT OR Apache-2.0" repository = "https://github.com/Byron/gitoxide" [package.metadata.docs.rs] all-features = true features = ["document-features"] [lib] test = false doctest = false [[test]] name = "hash" path = "tests/hash.rs" required-features = ["rustsha1"] [[test]] name = "parallel" path = "tests/parallel_threaded.rs" required-features = [ "parallel", "rustsha1", ] [[test]] name = "multi-threaded" path = "tests/parallel_shared_threaded.rs" required-features = [ "parallel", "rustsha1", ] [[test]] name = "single-threaded" path = "tests/parallel_shared.rs" required-features = ["rustsha1"] [[test]] name = "pipe" path = "tests/pipe.rs" required-features = ["io-pipe"] [dependencies.bytes] version = "1.0.0" optional = true [dependencies.bytesize] version = "1.0.1" optional = true [dependencies.crc32fast] version = "1.2.1" optional = true [dependencies.crossbeam-channel] version = "0.5.0" optional = true [dependencies.document-features] version = "0.2.0" optional = true [dependencies.flate2] version = "1.0.25" optional = true default-features = false [dependencies.gix-hash] version = "^0.13.2" [dependencies.gix-trace] version = "^0.1.4" [dependencies.jwalk] version = "0.8.1" optional = true [dependencies.once_cell] version = "1.13.0" optional = true [dependencies.parking_lot] version = "0.12.0" optional = true default-features = false [dependencies.prodash] version = "26.2.2" optional = true default-features = false [dependencies.sha1] version = "0.10.0" optional = true [dependencies.sha1_smol] version = "1.0.0" optional = true [dependencies.thiserror] version = "1.0.38" optional = true [dependencies.walkdir] version = "2.3.2" optional = true [dev-dependencies.bstr] version = "1.3.0" default-features = false [features] cache-efficiency-debug = [] crc32 = ["dep:crc32fast"] default = [] fast-sha1 = ["dep:sha1"] fs-walkdir-parallel = ["dep:jwalk"] io-pipe = ["dep:bytes"] once_cell = ["dep:once_cell"] parallel = [ "dep:crossbeam-channel", "dep:parking_lot", ] progress = ["prodash"] progress-unit-bytes = [ "dep:bytesize", "prodash?/unit-bytes", ] progress-unit-human-numbers = ["prodash?/unit-human"] rustsha1 = ["dep:sha1_smol"] tracing = ["gix-trace/tracing"] tracing-detail = ["gix-trace/tracing-detail"] walkdir = ["dep:walkdir"] zlib = [ "dep:flate2", "flate2?/rust_backend", "dep:thiserror", ] zlib-ng = [ "zlib", "flate2?/zlib-ng", ] zlib-ng-compat = [ "zlib", "flate2?/zlib-ng-compat", ] zlib-rust-backend = [ "zlib", "flate2?/rust_backend", ] zlib-stock = [ "zlib", "flate2?/zlib", ] [target."cfg(all(any(target_arch = \"aarch64\", target_arch = \"x86\", target_arch = \"x86_64\"), not(target_env = \"msvc\")))".dependencies.sha1] version = "0.10.0" features = ["asm"] optional = true [target."cfg(unix)".dependencies.libc] version = "0.2.119" gix-features-0.36.1/Cargo.toml.orig000064400000000000000000000152701046102023000152120ustar 00000000000000[package] name = "gix-features" description = "A crate to integrate various capabilities using compile-time feature flags" repository = "https://github.com/Byron/gitoxide" version = "0.36.1" authors = ["Sebastian Thiel "] license = "MIT OR Apache-2.0" edition = "2021" rust-version = "1.65" include = ["src/**/*", "LICENSE-*"] [lib] doctest = false test = false [features] default = [] ## Provide traits and utilities for providing progress information. These can then be rendered ## using facilities of the `prodash` crate. progress = ["prodash"] ## Provide human-readable numbers as well as easier to read byte units for progress bars. progress-unit-human-numbers = ["prodash?/unit-human"] ## Provide human readable byte units for progress bars. progress-unit-bytes = ["dep:bytesize", "prodash?/unit-bytes"] ## If set, walkdir iterators will be multi-threaded. fs-walkdir-parallel = [ "dep:jwalk" ] ## Implement `tracing` with `tracing-core`, which provides applications with valuable performance details if they opt-in to it. ## ## Note that this may have overhead as well, thus instrumentations should be used stategically, only providing coarse tracing by default and adding details ## only where needed while marking them with the appropriate level. tracing = [ "gix-trace/tracing" ] ## If enabled, detailed tracing is also emitted, which can greatly increase insights but at a cost. tracing-detail = [ "gix-trace/tracing-detail" ] ## Use scoped threads and channels to parallelize common workloads on multiple objects. If enabled, it is used everywhere ## where it makes sense. ## As caches are likely to be used and instantiated per thread, more memory will be used on top of the costs for threads. ## The `threading` module will contain thread-safe primitives for shared ownership and mutation, otherwise these will be their single threaded counterparts. ## This way, single-threaded applications don't have to pay for threaded primitives. parallel = ["dep:crossbeam-channel", "dep:parking_lot"] ## If enabled, OnceCell will be made available for interior mutability either in sync or unsync forms. once_cell = ["dep:once_cell"] ## Makes facilities of the `walkdir` crate partially available. ## In conjunction with the **parallel** feature, directory walking will be parallel instead behind a compatible interface. walkdir = ["dep:walkdir"] #* an in-memory unidirectional pipe using `bytes` as efficient transfer mechanism. io-pipe = ["dep:bytes"] ## provide a proven and fast `crc32` implementation. crc32 = ["dep:crc32fast"] #! ### Mutually Exclusive ZLIB ## Enable the usage of zlib related utilities to compress or decompress data. ## The base `zlib` feature uses the `flate2` Rust crate; the other mutually exclusive features select the `flate2 backend. ## Note that a competitive Zlib implementation is critical to `gitoxide's` object database performance. ## Enabling this without enabling one of the other features below will use a low-performance pure-Rust backend. zlib = ["dep:flate2", "flate2?/rust_backend", "dep:thiserror"] ## Use the C-based zlib-ng backend, which can compress and decompress significantly faster. zlib-ng = ["zlib", "flate2?/zlib-ng"] ## Use zlib-ng via its zlib-compat API. Useful if you already need zlib for C ## code elsewhere in your dependencies. Otherwise, use zlib-ng. zlib-ng-compat = ["zlib", "flate2?/zlib-ng-compat"] ## Use a slower C-based backend which can compress and decompress significantly faster than the rust version. ## Unlike `zlib-ng-compat`, this allows using dynamic linking with system `zlib` libraries and doesn't require cmake. zlib-stock = ["zlib", "flate2?/zlib"] ## Pure Rust backend, available for completeness even though it's the default ## if neither of the above options are set. Low performance, but pure Rust, so it ## may build in environments where other backends don't. zlib-rust-backend = ["zlib", "flate2?/rust_backend"] #! ### Mutually Exclusive SHA1 ## A fast SHA1 implementation is critical to `gitoxide's` object database performance ## A multi-crate implementation that can use hardware acceleration, thus bearing the potential for up to 2Gb/s throughput on ## CPUs that support it, like AMD Ryzen or Intel Core i3, as well as Apple Silicon like M1. ## Takes precedence over `rustsha1` if both are specified. fast-sha1 = ["dep:sha1"] ## A standard and well performing pure Rust implementation of Sha1. Will significantly slow down various git operations. rustsha1 = ["dep:sha1_smol"] #! ### Other ## Count cache hits and misses and print that debug information on drop. ## Caches implement this by default, which costs nothing unless this feature is enabled cache-efficiency-debug = [] [[test]] name = "hash" path = "tests/hash.rs" required-features = ["rustsha1"] [[test]] name = "parallel" path = "tests/parallel_threaded.rs" required-features = ["parallel", "rustsha1"] [[test]] name = "multi-threaded" path = "tests/parallel_shared_threaded.rs" required-features = ["parallel", "rustsha1"] [[test]] name = "single-threaded" path = "tests/parallel_shared.rs" required-features = ["rustsha1"] [[test]] name = "pipe" path = "tests/pipe.rs" required-features = ["io-pipe"] [dependencies] gix-hash = { version = "^0.13.2", path = "../gix-hash" } gix-trace = { version = "^0.1.4", path = "../gix-trace" } # 'parallel' feature crossbeam-channel = { version = "0.5.0", optional = true } parking_lot = { version = "0.12.0", default-features = false, optional = true } jwalk = { version = "0.8.1", optional = true } walkdir = { version = "2.3.2", optional = true } # used when parallel is off # hashing and 'fast-sha1' feature sha1_smol = { version = "1.0.0", optional = true } crc32fast = { version = "1.2.1", optional = true } sha1 = { version = "0.10.0", optional = true } # progress prodash = { workspace = true, optional = true } bytesize = { version = "1.0.1", optional = true } # pipe bytes = { version = "1.0.0", optional = true } # zlib module flate2 = { version = "1.0.25", optional = true, default-features = false } thiserror = { version = "1.0.38", optional = true } once_cell = { version = "1.13.0", optional = true } document-features = { version = "0.2.0", optional = true } [target.'cfg(unix)'.dependencies] libc = { version = "0.2.119" } [dev-dependencies] bstr = { version = "1.3.0", default-features = false } # Assembly doesn't yet compile on MSVC on windows, but does on GNU, see https://github.com/RustCrypto/asm-hashes/issues/17 # At this time, only aarch64, x86 and x86_64 are supported. [target.'cfg(all(any(target_arch = "aarch64", target_arch = "x86", target_arch = "x86_64"), not(target_env = "msvc")))'.dependencies] sha1 = { version = "0.10.0", optional = true, features = ["asm"] } [package.metadata.docs.rs] all-features = true features = ["document-features"] gix-features-0.36.1/LICENSE-APACHE000064400000000000000000000251221046102023000142440ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS Copyright 2018-2021 Sebastian Thiel, and [contributors](https://github.com/byron/gitoxide/contributors) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. gix-features-0.36.1/LICENSE-MIT000064400000000000000000000021551046102023000137550ustar 00000000000000Copyright (c) 2018-2021 Sebastian Thiel, and [contributors](https://github.com/byron/gitoxide/contributors). Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. gix-features-0.36.1/src/cache.rs000064400000000000000000000034121046102023000145160ustar 00000000000000#[cfg(feature = "cache-efficiency-debug")] mod impl_ { /// A helper to collect useful information about cache efficiency. pub struct Debug { owner: String, hits: usize, puts: usize, misses: usize, } impl Debug { /// Create a new instance #[inline] pub fn new(owner: String) -> Self { Debug { owner, hits: 0, puts: 0, misses: 0, } } /// Count cache insertions #[inline] pub fn put(&mut self) { self.puts += 1; } /// Count hits #[inline] pub fn hit(&mut self) { self.hits += 1; } /// Count misses #[inline] pub fn miss(&mut self) { self.misses += 1; } } impl Drop for Debug { fn drop(&mut self) { let hits = self.hits; let misses = self.misses; let ratio = hits as f32 / misses as f32; eprintln!( "{}[{:0x}]: {} / {} (hits/misses) = {:.02}%, puts = {}", self.owner, self as *const _ as usize, hits, misses, ratio * 100.0, self.puts ); } } } #[cfg(not(feature = "cache-efficiency-debug"))] mod impl_ { /// The disabled, zero size do-nothing equivalent pub struct Debug; impl Debug { /// Create a new instance #[inline] pub fn new(_owner: String) -> Self { Debug } /// noop pub fn put(&mut self) {} /// noop pub fn hit(&mut self) {} /// noop pub fn miss(&mut self) {} } } pub use impl_::Debug; gix-features-0.36.1/src/decode.rs000064400000000000000000000017721046102023000147050ustar 00000000000000use std::io::Read; /// Decode variable int numbers from a `Read` implementation. /// /// Note: currently overflow checks are only done in debug mode. #[inline] pub fn leb64_from_read(mut r: impl Read) -> Result<(u64, usize), std::io::Error> { let mut b = [0u8; 1]; let mut i = 0; r.read_exact(&mut b)?; i += 1; let mut value = b[0] as u64 & 0x7f; while b[0] & 0x80 != 0 { r.read_exact(&mut b)?; i += 1; debug_assert!(i <= 10, "Would overflow value at 11th iteration"); value += 1; value = (value << 7) + (b[0] as u64 & 0x7f) } Ok((value, i)) } /// Decode variable int numbers. #[inline] pub fn leb64(d: &[u8]) -> (u64, usize) { let mut i = 0; let mut c = d[i]; i += 1; let mut value = c as u64 & 0x7f; while c & 0x80 != 0 { c = d[i]; i += 1; debug_assert!(i <= 10, "Would overflow value at 11th iteration"); value += 1; value = (value << 7) + (c as u64 & 0x7f) } (value, i) } gix-features-0.36.1/src/fs.rs000064400000000000000000000114021046102023000140610ustar 00000000000000//! Filesystem utilities //! //! These are will be parallel if the `parallel` feature is enabled, at the expense of compiling additional dependencies //! along with runtime costs for maintaining a global [`rayon`](https://docs.rs/rayon) thread pool. //! //! For information on how to use the [`WalkDir`] type, have a look at //! * [`jwalk::WalkDir`](https://docs.rs/jwalk/0.5.1/jwalk/type.WalkDir.html) if `parallel` feature is enabled //! * [walkdir::WalkDir](https://docs.rs/walkdir/2.3.1/walkdir/struct.WalkDir.html) otherwise #[cfg(any(feature = "walkdir", feature = "fs-walkdir-parallel"))] mod shared { /// The desired level of parallelism. pub enum Parallelism { /// Do not parallelize at all by making a serial traversal on the current thread. Serial, /// Create a new thread pool for each traversal with up to 16 threads or the amount of logical cores of the machine. ThreadPoolPerTraversal { /// The base name of the threads we create as part of the thread-pool. thread_name: &'static str, }, } } /// #[cfg(feature = "fs-walkdir-parallel")] pub mod walkdir { use std::path::Path; pub use jwalk::{DirEntry as DirEntryGeneric, DirEntryIter as DirEntryIterGeneric, Error, WalkDir}; pub use super::shared::Parallelism; /// An alias for an uncustomized directory entry to match the one of the non-parallel version offered by `walkdir`. pub type DirEntry = DirEntryGeneric<((), ())>; impl From for jwalk::Parallelism { fn from(v: Parallelism) -> Self { match v { Parallelism::Serial => jwalk::Parallelism::Serial, Parallelism::ThreadPoolPerTraversal { thread_name } => std::thread::available_parallelism() .map_or_else( |_| Parallelism::Serial.into(), |threads| { let pool = jwalk::rayon::ThreadPoolBuilder::new() .num_threads(threads.get().min(16)) .stack_size(128 * 1024) .thread_name(move |idx| format!("{thread_name} {idx}")) .build() .expect("we only set options that can't cause a build failure"); jwalk::Parallelism::RayonExistingPool { pool: pool.into(), busy_timeout: None, } }, ), } } } /// Instantiate a new directory iterator which will not skip hidden files, with the given level of `parallelism`. pub fn walkdir_new(root: &Path, parallelism: Parallelism) -> WalkDir { WalkDir::new(root).skip_hidden(false).parallelism(parallelism.into()) } /// Instantiate a new directory iterator which will not skip hidden files and is sorted pub fn walkdir_sorted_new(root: &Path, parallelism: Parallelism) -> WalkDir { WalkDir::new(root) .skip_hidden(false) .sort(true) .parallelism(parallelism.into()) } /// The Iterator yielding directory items pub type DirEntryIter = DirEntryIterGeneric<((), ())>; } #[cfg(all(feature = "walkdir", not(feature = "fs-walkdir-parallel")))] /// pub mod walkdir { use std::path::Path; pub use walkdir::{DirEntry, Error, WalkDir}; pub use super::shared::Parallelism; /// Instantiate a new directory iterator which will not skip hidden files, with the given level of `parallelism`. pub fn walkdir_new(root: &Path, _: Parallelism) -> WalkDir { WalkDir::new(root) } /// Instantiate a new directory iterator which will not skip hidden files and is sorted, with the given level of `parallelism`. pub fn walkdir_sorted_new(root: &Path, _: Parallelism) -> WalkDir { WalkDir::new(root).sort_by_file_name() } /// The Iterator yielding directory items pub type DirEntryIter = walkdir::IntoIter; } #[cfg(any(feature = "walkdir", feature = "fs-walkdir-parallel"))] pub use self::walkdir::{walkdir_new, walkdir_sorted_new, WalkDir}; /// Prepare open options which won't follow symlinks when the file is opened. /// /// Note: only effective on unix currently. pub fn open_options_no_follow() -> std::fs::OpenOptions { #[cfg_attr(not(unix), allow(unused_mut))] let mut options = std::fs::OpenOptions::new(); #[cfg(unix)] { /// Make sure that it's impossible to follow through to the target of symlinks. /// Note that this will still follow symlinks in the path, which is what we assume /// has been checked separately. use std::os::unix::fs::OpenOptionsExt; options.custom_flags(libc::O_NOFOLLOW); } options } gix-features-0.36.1/src/hash.rs000064400000000000000000000153221046102023000144010ustar 00000000000000//! Hash functions and hash utilities //! //! With the `fast-sha1` feature, the `Sha1` hash type will use a more elaborate implementation utilizing hardware support //! in case it is available. Otherwise the `rustsha1` feature should be set. `fast-sha1` will take precedence. //! Otherwise, a minimal yet performant implementation is used instead for a decent trade-off between compile times and run-time performance. #[cfg(all(feature = "rustsha1", not(feature = "fast-sha1")))] mod _impl { use super::Sha1Digest; /// A implementation of the Sha1 hash, which can be used once. #[derive(Default, Clone)] pub struct Sha1(sha1_smol::Sha1); impl Sha1 { /// Digest the given `bytes`. pub fn update(&mut self, bytes: &[u8]) { self.0.update(bytes) } /// Finalize the hash and produce a digest. pub fn digest(self) -> Sha1Digest { self.0.digest().bytes() } } } /// A 20 bytes digest produced by a [`Sha1`] hash implementation. #[cfg(any(feature = "fast-sha1", feature = "rustsha1"))] pub type Sha1Digest = [u8; 20]; #[cfg(feature = "fast-sha1")] mod _impl { use sha1::Digest; use super::Sha1Digest; /// A implementation of the Sha1 hash, which can be used once. #[derive(Default, Clone)] pub struct Sha1(sha1::Sha1); impl Sha1 { /// Digest the given `bytes`. pub fn update(&mut self, bytes: &[u8]) { self.0.update(bytes) } /// Finalize the hash and produce a digest. pub fn digest(self) -> Sha1Digest { self.0.finalize().into() } } } #[cfg(any(feature = "rustsha1", feature = "fast-sha1"))] pub use _impl::Sha1; /// Compute a CRC32 hash from the given `bytes`, returning the CRC32 hash. /// /// When calling this function for the first time, `previous_value` should be `0`. Otherwise it /// should be the previous return value of this function to provide a hash of multiple sequential /// chunks of `bytes`. #[cfg(feature = "crc32")] pub fn crc32_update(previous_value: u32, bytes: &[u8]) -> u32 { let mut h = crc32fast::Hasher::new_with_initial(previous_value); h.update(bytes); h.finalize() } /// Compute a CRC32 value of the given input `bytes`. /// /// In case multiple chunks of `bytes` are present, one should use [`crc32_update()`] instead. #[cfg(feature = "crc32")] pub fn crc32(bytes: &[u8]) -> u32 { let mut h = crc32fast::Hasher::new(); h.update(bytes); h.finalize() } /// Produce a hasher suitable for the given kind of hash. #[cfg(any(feature = "rustsha1", feature = "fast-sha1"))] pub fn hasher(kind: gix_hash::Kind) -> Sha1 { match kind { gix_hash::Kind::Sha1 => Sha1::default(), } } /// Compute the hash of `kind` for the bytes in the file at `path`, hashing only the first `num_bytes_from_start` /// while initializing and calling `progress`. /// /// `num_bytes_from_start` is useful to avoid reading trailing hashes, which are never part of the hash itself, /// denoting the amount of bytes to hash starting from the beginning of the file. /// /// # Note /// /// * Only available with the `gix-object` feature enabled due to usage of the [`gix_hash::Kind`] enum and the /// [`gix_hash::ObjectId`] return value. /// * [Interrupts][crate::interrupt] are supported. #[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))] pub fn bytes_of_file( path: &std::path::Path, num_bytes_from_start: u64, kind: gix_hash::Kind, progress: &mut dyn crate::progress::Progress, should_interrupt: &std::sync::atomic::AtomicBool, ) -> std::io::Result { bytes( &mut std::fs::File::open(path)?, num_bytes_from_start, kind, progress, should_interrupt, ) } /// Similar to [`bytes_of_file`], but operates on a stream of bytes. #[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))] pub fn bytes( read: &mut dyn std::io::Read, num_bytes_from_start: u64, kind: gix_hash::Kind, progress: &mut dyn crate::progress::Progress, should_interrupt: &std::sync::atomic::AtomicBool, ) -> std::io::Result { bytes_with_hasher(read, num_bytes_from_start, hasher(kind), progress, should_interrupt) } /// Similar to [`bytes()`], but takes a `hasher` instead of a hash kind. #[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))] pub fn bytes_with_hasher( read: &mut dyn std::io::Read, num_bytes_from_start: u64, mut hasher: Sha1, progress: &mut dyn crate::progress::Progress, should_interrupt: &std::sync::atomic::AtomicBool, ) -> std::io::Result { let start = std::time::Instant::now(); // init progress before the possibility for failure, as convenience in case people want to recover progress.init( Some(num_bytes_from_start as prodash::progress::Step), crate::progress::bytes(), ); const BUF_SIZE: usize = u16::MAX as usize; let mut buf = [0u8; BUF_SIZE]; let mut bytes_left = num_bytes_from_start; while bytes_left > 0 { let out = &mut buf[..BUF_SIZE.min(bytes_left as usize)]; read.read_exact(out)?; bytes_left -= out.len() as u64; progress.inc_by(out.len()); hasher.update(out); if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) { return Err(std::io::Error::new(std::io::ErrorKind::Other, "Interrupted")); } } let id = gix_hash::ObjectId::from(hasher.digest()); progress.show_throughput(start); Ok(id) } #[cfg(any(feature = "rustsha1", feature = "fast-sha1"))] mod write { use crate::hash::Sha1; /// A utility to automatically generate a hash while writing into an inner writer. pub struct Write { /// The hash implementation. pub hash: Sha1, /// The inner writer. pub inner: T, } impl std::io::Write for Write where T: std::io::Write, { fn write(&mut self, buf: &[u8]) -> std::io::Result { let written = self.inner.write(buf)?; self.hash.update(&buf[..written]); Ok(written) } fn flush(&mut self) -> std::io::Result<()> { self.inner.flush() } } impl Write where T: std::io::Write, { /// Create a new hash writer which hashes all bytes written to `inner` with a hash of `kind`. pub fn new(inner: T, object_hash: gix_hash::Kind) -> Self { match object_hash { gix_hash::Kind::Sha1 => Write { inner, hash: Sha1::default(), }, } } } } #[cfg(any(feature = "rustsha1", feature = "fast-sha1"))] pub use write::Write; gix-features-0.36.1/src/interrupt.rs000064400000000000000000000110021046102023000155010ustar 00000000000000//! Utilities to cause interruptions in common traits, like Read/Write and Iterator. use std::{ io, sync::atomic::{AtomicBool, Ordering}, }; /// A wrapper for an inner iterator which will check for interruptions on each iteration, stopping the iteration when /// that is requested. pub struct Iter<'a, I> { /// The actual iterator to yield elements from. pub inner: I, should_interrupt: &'a AtomicBool, } impl<'a, I> Iter<'a, I> where I: Iterator, { /// Create a new iterator over `inner` which checks for interruptions on each iteration on `should_interrupt`. /// /// Note that this means the consumer of the iterator data should also be able to access `should_interrupt` and /// consider it when producing the final result to avoid claiming success even though the operation is only partially /// complete. pub fn new(inner: I, should_interrupt: &'a AtomicBool) -> Self { Iter { inner, should_interrupt, } } } impl<'a, I> Iterator for Iter<'a, I> where I: Iterator, { type Item = I::Item; fn next(&mut self) -> Option { if self.should_interrupt.load(Ordering::Relaxed) { return None; } self.inner.next() } } /// A wrapper for an inner iterator which will check for interruptions on each iteration. pub struct IterWithErr<'a, I, EFN> { /// The actual iterator to yield elements from. pub inner: I, make_err: Option, should_interrupt: &'a AtomicBool, } impl<'a, I, EFN, E> IterWithErr<'a, I, EFN> where I: Iterator, EFN: FnOnce() -> E, { /// Create a new iterator over `inner` which checks for interruptions on each iteration and calls `make_err()` to /// signal an interruption happened, causing no further items to be iterated from that point on. pub fn new(inner: I, make_err: EFN, should_interrupt: &'a AtomicBool) -> Self { IterWithErr { inner, make_err: Some(make_err), should_interrupt, } } } impl<'a, I, EFN, E> Iterator for IterWithErr<'a, I, EFN> where I: Iterator, EFN: FnOnce() -> E, { type Item = Result; fn next(&mut self) -> Option { self.make_err.as_ref()?; if self.should_interrupt.load(Ordering::Relaxed) { return self.make_err.take().map(|f| Err(f())); } match self.inner.next() { Some(next) => Some(Ok(next)), None => { self.make_err = None; None } } } } /// A wrapper for implementors of [`std::io::Read`] or [`std::io::BufRead`] with interrupt support. /// /// It fails a [read][std::io::Read::read] while an interrupt was requested. pub struct Read<'a, R> { /// The actual implementor of [`std::io::Read`] to which interrupt support will be added. pub inner: R, /// The flag to trigger interruption pub should_interrupt: &'a AtomicBool, } impl<'a, R> io::Read for Read<'a, R> where R: io::Read, { fn read(&mut self, buf: &mut [u8]) -> io::Result { if self.should_interrupt.load(Ordering::Relaxed) { return Err(std::io::Error::new(std::io::ErrorKind::Other, "Interrupted")); } self.inner.read(buf) } } impl<'a, R> io::BufRead for Read<'a, R> where R: io::BufRead, { fn fill_buf(&mut self) -> io::Result<&[u8]> { self.inner.fill_buf() } fn consume(&mut self, amt: usize) { self.inner.consume(amt) } } /// A wrapper for implementors of [`std::io::Write`] with interrupt checks on each write call. /// /// It fails a [write][std::io::Write::write] while an interrupt was requested. pub struct Write<'a, W> { /// The actual implementor of [`std::io::Write`] to which interrupt support will be added. pub inner: W, /// The flag to trigger interruption pub should_interrupt: &'a AtomicBool, } impl io::Write for Write<'_, W> where W: std::io::Write, { fn write(&mut self, buf: &[u8]) -> io::Result { if self.should_interrupt.load(Ordering::Relaxed) { return Err(std::io::Error::new(std::io::ErrorKind::Other, "Interrupted")); } self.inner.write(buf) } fn flush(&mut self) -> io::Result<()> { // Don't interrupt here, allow flushes to happen to prefer disk consistency. self.inner.flush() } } impl io::Seek for Write<'_, W> where W: std::io::Seek, { fn seek(&mut self, pos: io::SeekFrom) -> io::Result { self.inner.seek(pos) } } gix-features-0.36.1/src/io.rs000064400000000000000000000064601046102023000140700ustar 00000000000000//!A unidirectional pipe for bytes, analogous to a unix pipe. Available with the `io-pipe` feature toggle. /// A unidirectional pipe for bytes, analogous to a unix pipe. Available with the `io-pipe` feature toggle. #[cfg(feature = "io-pipe")] pub mod pipe { use std::io; use bytes::{Buf, BufMut, BytesMut}; /// The write-end of the pipe, receiving items to become available in the [`Reader`]. /// /// It's commonly used with the [`std::io::Write`] trait it implements. pub struct Writer { /// The channel through which bytes are transferred. Useful for sending [`std::io::Error`]s instead. pub channel: std::sync::mpsc::SyncSender>, buf: BytesMut, } /// The read-end of the pipe, implementing the [`std::io::Read`] trait. pub struct Reader { channel: std::sync::mpsc::Receiver>, buf: BytesMut, } impl io::BufRead for Reader { fn fill_buf(&mut self) -> io::Result<&[u8]> { if self.buf.is_empty() { match self.channel.recv() { Ok(Ok(buf)) => self.buf = buf, Ok(Err(err)) => return Err(err), Err(_) => {} } }; Ok(&self.buf) } fn consume(&mut self, amt: usize) { self.buf.advance(amt.min(self.buf.len())); } } impl io::Read for Reader { fn read(&mut self, mut out: &mut [u8]) -> io::Result { let mut written = 0; while !out.is_empty() { if self.buf.is_empty() { match self.channel.recv() { Ok(Ok(buf)) => self.buf = buf, Ok(Err(err)) => return Err(err), Err(_) => break, } } let bytes_to_write = self.buf.len().min(out.len()); let (to_write, rest) = out.split_at_mut(bytes_to_write); self.buf.split_to(bytes_to_write).copy_to_slice(to_write); out = rest; written += bytes_to_write; } Ok(written) } } impl io::Write for Writer { fn write(&mut self, buf: &[u8]) -> io::Result { self.buf.put_slice(buf); self.channel .send(Ok(self.buf.split())) .map_err(|err| io::Error::new(io::ErrorKind::BrokenPipe, err))?; Ok(buf.len()) } fn flush(&mut self) -> io::Result<()> { Ok(()) } } /// Returns the _([`write`][Writer], [`read`][Reader])_ ends of a pipe for transferring bytes, analogous to a unix pipe. /// /// * `in_flight_writes` defines the amount of chunks of bytes to keep in memory until the `write` end will block when writing. /// If `0`, the `write` end will always block until the `read` end consumes the transferred bytes. pub fn unidirectional(in_flight_writes: usize) -> (Writer, Reader) { let (tx, rx) = std::sync::mpsc::sync_channel(in_flight_writes); ( Writer { channel: tx, buf: BytesMut::with_capacity(4096), }, Reader { channel: rx, buf: BytesMut::new(), }, ) } } gix-features-0.36.1/src/lib.rs000064400000000000000000000036221046102023000142240ustar 00000000000000//! A crate providing foundational capabilities to other `git-*` crates with trade-offs between compile time, binary size or speed //! selectable using cargo feature toggles. //! //! It's designed to allow the application level crate to configure feature toggles, affecting all other `git-*` crates using //! this one. //! //! Thus all features provided here commonly have a 'cheap' base implementation, with the option to pull in //! counterparts with higher performance. //! ## Feature Flags #![cfg_attr( all(doc, feature = "document-features"), doc = ::document_features::document_features!() )] #![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg, doc_auto_cfg))] #![deny(missing_docs, rust_2018_idioms, unsafe_code)] /// pub mod cache; /// pub mod decode; pub mod fs; pub mod hash; pub mod interrupt; #[cfg(feature = "io-pipe")] pub mod io; pub mod parallel; #[cfg(feature = "progress")] pub mod progress; pub mod threading; pub use gix_trace as trace; /// #[cfg(feature = "zlib")] pub mod zlib; /// pub mod iter { /// An iterator over chunks of input, producing `Vec` with a size of `size`, with the last chunk being the remainder and thus /// potentially smaller than `size`. pub struct Chunks { /// The inner iterator to ask for items. pub inner: I, /// The size of chunks to produce pub size: usize, } impl Iterator for Chunks where I: Iterator, { type Item = Vec; fn next(&mut self) -> Option { let mut res = Vec::with_capacity(self.size); let mut items_left = self.size; for item in &mut self.inner { res.push(item); items_left -= 1; if items_left == 0 { break; } } (!res.is_empty()).then_some(res) } } } gix-features-0.36.1/src/parallel/eager_iter.rs000064400000000000000000000100511046102023000173520ustar 00000000000000/// Evaluate any iterator in their own thread. /// /// This is particularly useful if the wrapped iterator performs IO and/or heavy computations. /// Use [`EagerIter::new()`] for instantiation. pub struct EagerIter { receiver: std::sync::mpsc::Receiver>, chunk: Option>, size_hint: (usize, Option), } impl EagerIter where I: Iterator + Send + 'static, ::Item: Send, { /// Return a new `EagerIter` which evaluates `iter` in its own thread, /// with a given `chunk_size` allowing a maximum `chunks_in_flight`. /// /// * `chunk_size` describes how many items returned by `iter` will be a single item of this `EagerIter`. /// This helps to reduce the overhead imposed by transferring many small items. /// If this number is 1, each item will become a single chunk. 0 is invalid. /// * `chunks_in_flight` describes how many chunks can be kept in memory in case the consumer of the `EagerIter`s items /// isn't consuming them fast enough. Setting this number to 0 effectively turns off any caching, but blocks `EagerIter` /// if its items aren't consumed fast enough. pub fn new(iter: I, chunk_size: usize, chunks_in_flight: usize) -> Self { let (sender, receiver) = std::sync::mpsc::sync_channel(chunks_in_flight); let size_hint = iter.size_hint(); assert!(chunk_size > 0, "non-zero chunk size is needed"); std::thread::spawn(move || { let mut out = Vec::with_capacity(chunk_size); for item in iter { out.push(item); if out.len() == chunk_size { if sender.send(out).is_err() { return; } out = Vec::with_capacity(chunk_size); } } if !out.is_empty() { sender.send(out).ok(); } }); EagerIter { receiver, chunk: None, size_hint, } } fn fill_buf_and_pop(&mut self) -> Option { self.chunk = self.receiver.recv().ok().map(|v| { assert!(!v.is_empty()); v.into_iter() }); self.chunk.as_mut().and_then(Iterator::next) } } impl Iterator for EagerIter where I: Iterator + Send + 'static, ::Item: Send, { type Item = I::Item; fn next(&mut self) -> Option { match self.chunk.as_mut() { Some(chunk) => chunk.next().or_else(|| self.fill_buf_and_pop()), None => self.fill_buf_and_pop(), } } fn size_hint(&self) -> (usize, Option) { self.size_hint } } /// An conditional `EagerIter`, which may become a just-in-time iterator running in the main thread depending on a condition. pub enum EagerIterIf { /// A separate thread will eagerly evaluate iterator `I`. Eager(EagerIter), /// The current thread evaluates `I`. OnDemand(I), } impl EagerIterIf where I: Iterator + Send + 'static, ::Item: Send, { /// Return a new `EagerIterIf` if `condition()` returns true. /// /// For all other parameters, please see [`EagerIter::new()`]. pub fn new(condition: impl FnOnce() -> bool, iter: I, chunk_size: usize, chunks_in_flight: usize) -> Self { if condition() { EagerIterIf::Eager(EagerIter::new(iter, chunk_size, chunks_in_flight)) } else { EagerIterIf::OnDemand(iter) } } } impl Iterator for EagerIterIf where I: Iterator + Send + 'static, ::Item: Send, { type Item = I::Item; fn next(&mut self) -> Option { match self { EagerIterIf::OnDemand(i) => i.next(), EagerIterIf::Eager(i) => i.next(), } } fn size_hint(&self) -> (usize, Option) { match self { EagerIterIf::OnDemand(i) => i.size_hint(), EagerIterIf::Eager(i) => i.size_hint(), } } } gix-features-0.36.1/src/parallel/in_order.rs000064400000000000000000000054641046102023000170610ustar 00000000000000use std::{cmp::Ordering, collections::BTreeMap}; /// A counter for items that are in sequence, to be able to put them back into original order later. pub type SequenceId = usize; /// An iterator which olds iterated items with a **sequential** ID starting at 0 long enough to dispense them in order. pub struct InOrderIter { /// The iterator yielding the out-of-order elements we are to yield in order. pub inner: I, store: BTreeMap, next_chunk: SequenceId, is_done: bool, } impl From for InOrderIter where I: Iterator>, { fn from(iter: I) -> Self { InOrderIter { inner: iter, store: Default::default(), next_chunk: 0, is_done: false, } } } impl Iterator for InOrderIter where I: Iterator>, { type Item = Result; fn next(&mut self) -> Option { if self.is_done { return None; } 'find_next_in_sequence: loop { match self.inner.next() { Some(Ok((c, v))) => match c.cmp(&self.next_chunk) { Ordering::Equal => { self.next_chunk += 1; return Some(Ok(v)); } Ordering::Less => { unreachable!("in a correctly ordered sequence we can never see keys again, got {}", c) } Ordering::Greater => { let previous = self.store.insert(c, v); assert!( previous.is_none(), "Chunks are returned only once, input is an invalid sequence" ); if let Some(v) = self.store.remove(&self.next_chunk) { self.next_chunk += 1; return Some(Ok(v)); } continue 'find_next_in_sequence; } }, Some(Err(e)) => { self.is_done = true; self.store.clear(); return Some(Err(e)); } None => match self.store.remove(&self.next_chunk) { Some(v) => { self.next_chunk += 1; return Some(Ok(v)); } None => { debug_assert!( self.store.is_empty(), "When iteration is done we should not have stored items left" ); return None; } }, } } } } gix-features-0.36.1/src/parallel/in_parallel.rs000064400000000000000000000335201046102023000175340ustar 00000000000000use std::sync::atomic::{AtomicBool, AtomicIsize, AtomicUsize, Ordering}; use crate::parallel::{num_threads, Reduce}; /// A scope to start threads within. pub type Scope<'scope, 'env> = std::thread::Scope<'scope, 'env>; /// Runs `left` and `right` in parallel, returning their output when both are done. pub fn join(left: impl FnOnce() -> O1 + Send, right: impl FnOnce() -> O2 + Send) -> (O1, O2) { std::thread::scope(|s| { let left = std::thread::Builder::new() .name("gitoxide.join.left".into()) .spawn_scoped(s, left) .expect("valid name"); let right = std::thread::Builder::new() .name("gitoxide.join.right".into()) .spawn_scoped(s, right) .expect("valid name"); (left.join().unwrap(), right.join().unwrap()) }) } /// Runs `f` with a scope to be used for spawning threads that will not outlive the function call. /// That way it's possible to handle threads without needing the 'static lifetime for data they interact with. /// /// Note that the threads should not rely on actual parallelism as threading might be turned off entirely, hence should not /// connect each other with channels as deadlock would occur in single-threaded mode. pub fn threads<'env, F, R>(f: F) -> R where F: for<'scope> FnOnce(&'scope std::thread::Scope<'scope, 'env>) -> R, { std::thread::scope(f) } /// Create a builder for threads which allows them to be spawned into a scope and configured prior to spawning. pub fn build_thread() -> std::thread::Builder { std::thread::Builder::new() } /// Read items from `input` and `consume` them in multiple threads, /// whose output output is collected by a `reducer`. Its task is to /// aggregate these outputs into the final result returned by this function with the benefit of not having to be thread-safe. /// /// * if `thread_limit` is `Some`, the given amount of threads will be used. If `None`, all logical cores will be used. /// * `new_thread_state(thread_number) -> State` produces thread-local state once per thread to be based to `consume` /// * `consume(Item, &mut State) -> Output` produces an output given an input obtained by `input` along with mutable state initially /// created by `new_thread_state(…)`. /// * For `reducer`, see the [`Reduce`] trait pub fn in_parallel( input: impl Iterator + Send, thread_limit: Option, new_thread_state: impl FnOnce(usize) -> S + Send + Clone, consume: impl FnMut(I, &mut S) -> O + Send + Clone, mut reducer: R, ) -> Result<::Output, ::Error> where R: Reduce, I: Send, O: Send, { let num_threads = num_threads(thread_limit); std::thread::scope(move |s| { let receive_result = { let (send_input, receive_input) = crossbeam_channel::bounded::(num_threads); let (send_result, receive_result) = crossbeam_channel::bounded::(num_threads); for thread_id in 0..num_threads { std::thread::Builder::new() .name(format!("gitoxide.in_parallel.produce.{thread_id}")) .spawn_scoped(s, { let send_result = send_result.clone(); let receive_input = receive_input.clone(); let new_thread_state = new_thread_state.clone(); let mut consume = consume.clone(); move || { let mut state = new_thread_state(thread_id); for item in receive_input { if send_result.send(consume(item, &mut state)).is_err() { break; } } } }) .expect("valid name"); } std::thread::Builder::new() .name("gitoxide.in_parallel.feed".into()) .spawn_scoped(s, move || { for item in input { if send_input.send(item).is_err() { break; } } }) .expect("valid name"); receive_result }; for item in receive_result { drop(reducer.feed(item)?); } reducer.finalize() }) } /// Read items from `input` and `consume` them in multiple threads, /// whose output output is collected by a `reducer`. Its task is to /// aggregate these outputs into the final result returned by this function with the benefit of not having to be thread-safe. /// Caall `finalize` to finish the computation, once per thread, if there was no error sending results earlier. /// /// * if `thread_limit` is `Some`, the given amount of threads will be used. If `None`, all logical cores will be used. /// * `new_thread_state(thread_number) -> State` produces thread-local state once per thread to be based to `consume` /// * `consume(Item, &mut State) -> Output` produces an output given an input obtained by `input` along with mutable state initially /// created by `new_thread_state(…)`. /// * `finalize(State) -> Output` is called to potentially process remaining work that was placed in `State`. /// * For `reducer`, see the [`Reduce`] trait pub fn in_parallel_with_finalize( input: impl Iterator + Send, thread_limit: Option, new_thread_state: impl FnOnce(usize) -> S + Send + Clone, consume: impl FnMut(I, &mut S) -> O + Send + Clone, finalize: impl FnOnce(S) -> O + Send + Clone, mut reducer: R, ) -> Result<::Output, ::Error> where R: Reduce, I: Send, O: Send, { let num_threads = num_threads(thread_limit); std::thread::scope(move |s| { let receive_result = { let (send_input, receive_input) = crossbeam_channel::bounded::(num_threads); let (send_result, receive_result) = crossbeam_channel::bounded::(num_threads); for thread_id in 0..num_threads { std::thread::Builder::new() .name(format!("gitoxide.in_parallel.produce.{thread_id}")) .spawn_scoped(s, { let send_result = send_result.clone(); let receive_input = receive_input.clone(); let new_thread_state = new_thread_state.clone(); let mut consume = consume.clone(); let finalize = finalize.clone(); move || { let mut state = new_thread_state(thread_id); let mut can_send = true; for item in receive_input { if send_result.send(consume(item, &mut state)).is_err() { can_send = false; break; } } if can_send { send_result.send(finalize(state)).ok(); } } }) .expect("valid name"); } std::thread::Builder::new() .name("gitoxide.in_parallel.feed".into()) .spawn_scoped(s, move || { for item in input { if send_input.send(item).is_err() { break; } } }) .expect("valid name"); receive_result }; for item in receive_result { drop(reducer.feed(item)?); } reducer.finalize() }) } /// An experiment to have fine-grained per-item parallelization with built-in aggregation via thread state. /// This is only good for operations where near-random access isn't detrimental, so it's not usually great /// for file-io as it won't make use of sorted inputs well. /// Note that `periodic` is not guaranteed to be called in case other threads come up first and finish too fast. /// `consume(&mut item, &mut stat, &Scope, &threads_available, &should_interrupt)` is called for performing the actual computation. /// Note that `threads_available` should be decremented to start a thread that can steal your own work (as stored in `item`), /// which allows callees to implement their own work-stealing in case the work is distributed unevenly. /// Work stealing should only start after having processed at least one item to give all threads naturally operating on the slice /// some time to start. Starting threads while slice-workers are still starting up would lead to over-allocation of threads, /// which is why the number of threads left may turn negative. Once threads are started and stopped, be sure to adjust /// the thread-count accordingly. // TODO: better docs pub fn in_parallel_with_slice( input: &mut [I], thread_limit: Option, new_thread_state: impl FnOnce(usize) -> S + Send + Clone, consume: impl FnMut(&mut I, &mut S, &AtomicIsize, &AtomicBool) -> Result<(), E> + Send + Clone, mut periodic: impl FnMut() -> Option + Send, state_to_rval: impl FnOnce(S) -> R + Send + Clone, ) -> Result, E> where I: Send, E: Send, R: Send, { let num_threads = num_threads(thread_limit); let mut results = Vec::with_capacity(num_threads); let stop_everything = &AtomicBool::default(); let index = &AtomicUsize::default(); let threads_left = &AtomicIsize::new(num_threads as isize); std::thread::scope({ move |s| { std::thread::Builder::new() .name("gitoxide.in_parallel_with_slice.watch-interrupts".into()) .spawn_scoped(s, { move || loop { if stop_everything.load(Ordering::Relaxed) { break; } match periodic() { Some(duration) => std::thread::sleep(duration), None => { stop_everything.store(true, Ordering::Relaxed); break; } } } }) .expect("valid name"); let input_len = input.len(); struct Input(*mut I) where I: Send; // SAFETY: I is Send, and we only use the pointer for creating new // pointers (within the input slice) from the threads. #[allow(unsafe_code)] unsafe impl Send for Input where I: Send {} let threads: Vec<_> = (0..num_threads) .map(|thread_id| { std::thread::Builder::new() .name(format!("gitoxide.in_parallel_with_slice.produce.{thread_id}")) .spawn_scoped(s, { let new_thread_state = new_thread_state.clone(); let state_to_rval = state_to_rval.clone(); let mut consume = consume.clone(); let input = Input(input.as_mut_ptr()); move || { let _ = &input; threads_left.fetch_sub(1, Ordering::SeqCst); let mut state = new_thread_state(thread_id); let res = (|| { while let Ok(input_index) = index.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |x| { (x < input_len).then_some(x + 1) }) { if stop_everything.load(Ordering::Relaxed) { break; } // SAFETY: our atomic counter for `input_index` is only ever incremented, yielding // each item exactly once. let item = { #[allow(unsafe_code)] unsafe { &mut *input.0.add(input_index) } }; if let Err(err) = consume(item, &mut state, threads_left, stop_everything) { stop_everything.store(true, Ordering::Relaxed); return Err(err); } } Ok(state_to_rval(state)) })(); threads_left.fetch_add(1, Ordering::SeqCst); res } }) .expect("valid name") }) .collect(); for thread in threads { match thread.join() { Ok(res) => { results.push(res?); } Err(err) => { // a panic happened, stop the world gracefully (even though we panic later) stop_everything.store(true, Ordering::Relaxed); std::panic::resume_unwind(err); } } } stop_everything.store(true, Ordering::Relaxed); Ok(results) } }) } gix-features-0.36.1/src/parallel/mod.rs000064400000000000000000000167461046102023000160440ustar 00000000000000//! Run computations in parallel, or not based the `parallel` feature toggle. //! //! ### `in_parallel`(…) //! //! The [`in_parallel(…)`][in_parallel()] is the typical fan-out-fan-in mode of parallelism, with thread local storage //! made available to a `consume(…)` function to process input. The result is sent to the [`Reduce`] running in the calling //! thread to aggregate the results into a single output, which is returned by [`in_parallel()`]. //! //! Interruptions can be achieved by letting the reducers [`feed(…)`][Reduce::feed()] method fail. //! //! It gets a boost in usability as it allows threads to borrow variables from the stack, most commonly the repository itself //! or the data to work on. //! //! This mode of operation doesn't lend itself perfectly to being wrapped for `async` as it appears like a single long-running //! operation which runs as fast as possible, which is cancellable only by merit of stopping the input or stopping the output //! aggregation. //! //! ### `reduce::Stepwise` //! //! The [`Stepwise`][reduce::Stepwise] iterator works exactly as [`in_parallel()`] except that the processing of the output produced by //! `consume(I, &mut State) -> O` is made accessible by the `Iterator` trait's `next()` method. As produced work is not //! buffered, the owner of the iterator controls the progress made. //! //! Getting the final output of the [`Reduce`] is achieved through the consuming [`Stepwise::finalize()`][reduce::Stepwise::finalize()] method, which //! is functionally equivalent to calling [`in_parallel()`]. //! //! In an `async` context this means that progress is only made each time `next()` is called on the iterator, while merely dropping //! the iterator will wind down the computation without any result. //! //! #### Maintaining Safety //! //! In order to assure that threads don't outlive the data they borrow because their handles are leaked, we enforce //! the `'static` lifetime for its inputs, making it less intuitive to use. It is, however, possible to produce //! suitable input iterators as long as they can hold something on the heap. #[cfg(feature = "parallel")] mod in_parallel; #[cfg(feature = "parallel")] pub use in_parallel::{ build_thread, in_parallel, in_parallel_with_finalize, in_parallel_with_slice, join, threads, Scope, }; mod serial; #[cfg(not(feature = "parallel"))] pub use serial::{build_thread, in_parallel, in_parallel_with_finalize, in_parallel_with_slice, join, threads, Scope}; mod in_order; pub use in_order::{InOrderIter, SequenceId}; mod eager_iter; pub use eager_iter::{EagerIter, EagerIterIf}; /// A no-op returning the input _(`desired_chunk_size`, `Some(thread_limit)`, `thread_limit)_ used /// when the `parallel` feature toggle is not set. #[cfg(not(feature = "parallel"))] pub fn optimize_chunk_size_and_thread_limit( desired_chunk_size: usize, _num_items: Option, thread_limit: Option, _available_threads: Option, ) -> (usize, Option, usize) { (desired_chunk_size, thread_limit, num_threads(thread_limit)) } /// Return the 'optimal' _(`size of chunks`, `amount of threads as Option`, `amount of threads`)_ to use in [`in_parallel()`] for the given /// `desired_chunk_size`, `num_items`, `thread_limit` and `available_threads`. /// /// * `desired_chunk_size` is the amount of items per chunk you think should be used. /// * `num_items` is the total amount of items in the iteration, if `Some`. /// Otherwise this knowledge will not affect the output of this function. /// * `thread_limit` is the amount of threads to use at most, if `Some`. /// Otherwise this knowledge will not affect the output of this function. /// * `available_threads` is the total amount of threads available, if `Some`. /// Otherwise the actual amount of available threads is determined by querying the system. /// /// `Note` that this implementation is available only if the `parallel` feature toggle is set. #[cfg(feature = "parallel")] pub fn optimize_chunk_size_and_thread_limit( desired_chunk_size: usize, num_items: Option, thread_limit: Option, available_threads: Option, ) -> (usize, Option, usize) { let available_threads = available_threads.unwrap_or_else(|| std::thread::available_parallelism().map_or(1, Into::into)); let available_threads = thread_limit.map_or(available_threads, |l| if l == 0 { available_threads } else { l }); let (lower, upper) = (50, 1000); let (chunk_size, thread_limit) = num_items.map_or( { let chunk_size = if available_threads == 1 { desired_chunk_size } else if desired_chunk_size < lower { lower } else { desired_chunk_size.min(upper) }; (chunk_size, available_threads) }, |num_items| { let desired_chunks_per_thread_at_least = 2; let items = num_items; let chunk_size = (items / (available_threads * desired_chunks_per_thread_at_least)).clamp(1, upper); let num_chunks = items / chunk_size; let thread_limit = if num_chunks <= available_threads { (num_chunks / desired_chunks_per_thread_at_least).max(1) } else { available_threads }; (chunk_size, thread_limit) }, ); (chunk_size, Some(thread_limit), thread_limit) } /// Always returns 1, available when the `parallel` feature toggle is unset. #[cfg(not(feature = "parallel"))] pub fn num_threads(_thread_limit: Option) -> usize { 1 } /// Returns the amount of threads the system can effectively use as the amount of its logical cores. /// /// Only available with the `parallel` feature toggle set. #[cfg(feature = "parallel")] pub fn num_threads(thread_limit: Option) -> usize { let logical_cores = std::thread::available_parallelism().map_or(1, Into::into); thread_limit.map_or(logical_cores, |l| if l == 0 { logical_cores } else { l }) } /// Run [`in_parallel()`] only if the given `condition()` returns true when eagerly evaluated. /// /// For parameters, see the documentation of [`in_parallel()`] #[cfg(feature = "parallel")] pub fn in_parallel_if( condition: impl FnOnce() -> bool, input: impl Iterator + Send, thread_limit: Option, new_thread_state: impl FnOnce(usize) -> S + Send + Clone, consume: impl FnMut(I, &mut S) -> O + Send + Clone, reducer: R, ) -> Result<::Output, ::Error> where R: Reduce, I: Send, O: Send, { if num_threads(thread_limit) > 1 && condition() { in_parallel(input, thread_limit, new_thread_state, consume, reducer) } else { serial::in_parallel(input, thread_limit, new_thread_state, consume, reducer) } } /// Run [`in_parallel()`] only if the given `condition()` returns true when eagerly evaluated. /// /// For parameters, see the documentation of [`in_parallel()`] /// /// Note that the non-parallel version is equivalent to [`in_parallel()`]. #[cfg(not(feature = "parallel"))] pub fn in_parallel_if( _condition: impl FnOnce() -> bool, input: impl Iterator, thread_limit: Option, new_thread_state: impl FnOnce(usize) -> S, consume: impl FnMut(I, &mut S) -> O, reducer: R, ) -> Result<::Output, ::Error> where R: Reduce, I: Send, O: Send, { serial::in_parallel(input, thread_limit, new_thread_state, consume, reducer) } /// pub mod reduce; pub use reduce::Reduce; gix-features-0.36.1/src/parallel/reduce.rs000064400000000000000000000246701046102023000165270ustar 00000000000000#[cfg(feature = "parallel")] mod stepped { use crate::parallel::num_threads; /// An iterator adaptor to allow running computations using [`in_parallel()`][crate::parallel::in_parallel()] in a step-wise manner, see the [module docs][crate::parallel] /// for details. pub struct Stepwise { /// This field is first to assure it's dropped first and cause threads that are dropped next to stop their loops /// as sending results fails when the receiver is dropped. receive_result: std::sync::mpsc::Receiver, /// `join()` will be called on these guards to assure every thread tries to send through a closed channel. When /// that happens, they break out of their loops. threads: Vec>, /// The reducer is called only in the thread using the iterator, dropping it has no side effects. reducer: Option, } impl Drop for Stepwise { fn drop(&mut self) { let (_, sink) = std::sync::mpsc::channel(); drop(std::mem::replace(&mut self.receive_result, sink)); let mut last_err = None; for handle in std::mem::take(&mut self.threads) { if let Err(err) = handle.join() { last_err = Some(err); }; } if let Some(thread_err) = last_err { std::panic::resume_unwind(thread_err); } } } impl Stepwise { /// Instantiate a new iterator and start working in threads. /// For a description of parameters, see [`in_parallel()`][crate::parallel::in_parallel()]. pub fn new( input: InputIter, thread_limit: Option, new_thread_state: ThreadStateFn, consume: ConsumeFn, reducer: Reduce, ) -> Self where InputIter: Iterator + Send + 'static, ThreadStateFn: Fn(usize) -> S + Send + Clone + 'static, ConsumeFn: Fn(I, &mut S) -> O + Send + Clone + 'static, Reduce: super::Reduce + 'static, I: Send + 'static, O: Send + 'static, { let num_threads = num_threads(thread_limit); let mut threads = Vec::with_capacity(num_threads + 1); let receive_result = { let (send_input, receive_input) = crossbeam_channel::bounded::(num_threads); let (send_result, receive_result) = std::sync::mpsc::sync_channel::(num_threads); for thread_id in 0..num_threads { let handle = std::thread::spawn({ let send_result = send_result.clone(); let receive_input = receive_input.clone(); let new_thread_state = new_thread_state.clone(); let consume = consume.clone(); move || { let mut state = new_thread_state(thread_id); for item in receive_input { if send_result.send(consume(item, &mut state)).is_err() { break; } } } }); threads.push(handle); } threads.push(std::thread::spawn(move || { for item in input { if send_input.send(item).is_err() { break; } } })); receive_result }; Stepwise { threads, receive_result, reducer: Some(reducer), } } /// Consume the iterator by finishing its iteration and calling [`Reduce::finalize()`][crate::parallel::Reduce::finalize()]. pub fn finalize(mut self) -> Result { for value in self.by_ref() { drop(value?); } self.reducer .take() .expect("this is the last call before consumption") .finalize() } } impl Iterator for Stepwise { type Item = Result; fn next(&mut self) -> Option<::Item> { self.receive_result .recv() .ok() .and_then(|input| self.reducer.as_mut().map(|r| r.feed(input))) } } impl super::Finalize for Stepwise { type Reduce = R; fn finalize( self, ) -> Result< <::Reduce as super::Reduce>::Output, <::Reduce as super::Reduce>::Error, > { Stepwise::finalize(self) } } } #[cfg(not(feature = "parallel"))] mod stepped { /// An iterator adaptor to allow running computations using [`in_parallel()`][crate::parallel::in_parallel()] in a step-wise manner, see the [module docs][crate::parallel] /// for details. pub struct Stepwise { input: InputIter, consume: ConsumeFn, thread_state: ThreadState, reducer: Reduce, } impl Stepwise where InputIter: Iterator, ConsumeFn: Fn(I, &mut S) -> O, Reduce: super::Reduce, { /// Instantiate a new iterator. /// For a description of parameters, see [`in_parallel()`][crate::parallel::in_parallel()]. pub fn new( input: InputIter, _thread_limit: Option, new_thread_state: ThreadStateFn, consume: ConsumeFn, reducer: Reduce, ) -> Self where ThreadStateFn: Fn(usize) -> S, { Stepwise { input, consume, thread_state: new_thread_state(0), reducer, } } /// Consume the iterator by finishing its iteration and calling [`Reduce::finalize()`][crate::parallel::Reduce::finalize()]. pub fn finalize(mut self) -> Result { for value in self.by_ref() { drop(value?); } self.reducer.finalize() } } impl Iterator for Stepwise where InputIter: Iterator, ConsumeFn: Fn(I, &mut ThreadState) -> O, Reduce: super::Reduce, { type Item = Result; fn next(&mut self) -> Option<::Item> { self.input .next() .map(|input| self.reducer.feed((self.consume)(input, &mut self.thread_state))) } } impl super::Finalize for Stepwise where InputIter: Iterator, ConsumeFn: Fn(I, &mut S) -> O, R: super::Reduce, { type Reduce = R; fn finalize( self, ) -> Result< <::Reduce as super::Reduce>::Output, <::Reduce as super::Reduce>::Error, > { Stepwise::finalize(self) } } } use std::marker::PhantomData; pub use stepped::Stepwise; /// An trait for aggregating items commonly produced in threads into a single result, without itself /// needing to be thread safe. pub trait Reduce { /// The type fed to the reducer in the [`feed()`][Reduce::feed()] method. /// /// It's produced by a function that may run on multiple threads. type Input; /// The type produced in Ok(…) by [`feed()`][Reduce::feed()]. /// Most reducers by nature use `()` here as the value is in the aggregation. /// However, some may use it to collect statistics only and return their Input /// in some form as a result here for [`Stepwise`] to be useful. type FeedProduce; /// The type produced once by the [`finalize()`][Reduce::finalize()] method. /// /// For traditional reducers, this is the value produced by the entire operation. /// For those made for step-wise iteration this may be aggregated statistics. type Output; /// The error type to use for all methods of this trait. type Error; /// Called each time a new `item` was produced in order to aggregate it into the final result. /// /// If an `Error` is returned, the entire operation will be stopped. fn feed(&mut self, item: Self::Input) -> Result; /// Called once once all items where passed to `feed()`, producing the final `Output` of the operation or an `Error`. fn finalize(self) -> Result; } /// An identity reducer for those who want to use [`Stepwise`] or [`in_parallel()`][crate::parallel::in_parallel()] /// without the use of non-threaded reduction of products created in threads. pub struct IdentityWithResult { _input: PhantomData, _error: PhantomData, } impl Default for IdentityWithResult { fn default() -> Self { IdentityWithResult { _input: Default::default(), _error: Default::default(), } } } impl Reduce for IdentityWithResult { type Input = Result; type FeedProduce = Input; type Output = (); type Error = Error; fn feed(&mut self, item: Self::Input) -> Result { item } fn finalize(self) -> Result { Ok(()) } } /// A trait reflecting the `finalize()` method of [`Reduce`] implementations pub trait Finalize { /// An implementation of [`Reduce`] type Reduce: self::Reduce; /// Similar to the [`Reduce::finalize()`] method fn finalize( self, ) -> Result<<::Reduce as self::Reduce>::Output, <::Reduce as self::Reduce>::Error>; } gix-features-0.36.1/src/parallel/serial.rs000064400000000000000000000150611046102023000165310ustar 00000000000000use crate::parallel::Reduce; #[cfg(not(feature = "parallel"))] mod not_parallel { use std::sync::atomic::{AtomicBool, AtomicIsize}; /// Runs `left` and then `right`, one after another, returning their output when both are done. pub fn join(left: impl FnOnce() -> O1, right: impl FnOnce() -> O2) -> (O1, O2) { (left(), right()) } /// A scope for spawning threads. pub struct Scope<'scope, 'env: 'scope> { _scope: std::marker::PhantomData<&'scope mut &'scope ()>, _env: std::marker::PhantomData<&'env mut &'env ()>, } pub struct ThreadBuilder; /// Create a builder for threads which allows them to be spawned into a scope and configured prior to spawning. pub fn build_thread() -> ThreadBuilder { ThreadBuilder } #[allow(unsafe_code)] unsafe impl Sync for Scope<'_, '_> {} impl ThreadBuilder { pub fn name(self, _new: String) -> Self { self } pub fn spawn_scoped<'scope, 'env, F, T>( &self, scope: &'scope Scope<'scope, 'env>, f: F, ) -> std::io::Result> where F: FnOnce() -> T + 'scope, T: 'scope, { Ok(scope.spawn(f)) } } impl<'scope, 'env> Scope<'scope, 'env> { /// Provided with this scope, let `f` start new threads that live within it. pub fn spawn(&'scope self, f: F) -> ScopedJoinHandle<'scope, T> where F: FnOnce() -> T + 'scope, T: 'scope, { ScopedJoinHandle { result: f(), _marker: Default::default(), } } } /// Runs `f` with a scope to be used for spawning threads that will not outlive the function call. /// Note that this implementation will run the spawned functions immediately. pub fn threads<'env, F, R>(f: F) -> R where F: for<'scope> FnOnce(&'scope Scope<'scope, 'env>) -> R, { f(&Scope { _scope: Default::default(), _env: Default::default(), }) } /// A handle that can be used to join its scoped thread. /// /// This struct is created by the [`Scope::spawn`] method and the /// [`ScopedThreadBuilder::spawn`] method. pub struct ScopedJoinHandle<'scope, T> { /// Holds the result of the inner closure. result: T, _marker: std::marker::PhantomData<&'scope mut &'scope ()>, } impl ScopedJoinHandle<'_, T> { pub fn join(self) -> std::thread::Result { Ok(self.result) } pub fn is_finished(&self) -> bool { true } } /// An experiment to have fine-grained per-item parallelization with built-in aggregation via thread state. /// This is only good for operations where near-random access isn't detrimental, so it's not usually great /// for file-io as it won't make use of sorted inputs well. // TODO: better docs pub fn in_parallel_with_slice( input: &mut [I], _thread_limit: Option, new_thread_state: impl FnOnce(usize) -> S + Clone, mut consume: impl FnMut(&mut I, &mut S, &AtomicIsize, &AtomicBool) -> Result<(), E> + Clone, mut periodic: impl FnMut() -> Option, state_to_rval: impl FnOnce(S) -> R + Clone, ) -> Result, E> { let mut state = new_thread_state(0); let should_interrupt = &AtomicBool::default(); let threads_left = &AtomicIsize::default(); for item in input { consume(item, &mut state, threads_left, should_interrupt)?; if periodic().is_none() { break; } } Ok(vec![state_to_rval(state)]) } } #[cfg(not(feature = "parallel"))] pub use not_parallel::{build_thread, in_parallel_with_slice, join, threads, Scope, ScopedJoinHandle}; /// Read items from `input` and `consume` them in a single thread, producing an output to be collected by a `reducer`, /// whose task is to aggregate these outputs into the final result returned by this function. /// /// * `new_thread_state(thread_number) -> State` produces thread-local state once per thread to be based to `consume` /// * `consume(Item, &mut State) -> Output` produces an output given an input along with mutable state. /// * For `reducer`, see the [`Reduce`] trait /// * if `thread_limit` has no effect as everything is run on the main thread, but is present to keep the signature /// similar to the parallel version. /// /// **This serial version performing all calculations on the current thread.** pub fn in_parallel( input: impl Iterator, _thread_limit: Option, new_thread_state: impl FnOnce(usize) -> S, mut consume: impl FnMut(I, &mut S) -> O, mut reducer: R, ) -> Result<::Output, ::Error> where R: Reduce, { let mut state = new_thread_state(0); for item in input { drop(reducer.feed(consume(item, &mut state))?); } reducer.finalize() } /// Read items from `input` and `consume` them in multiple threads, /// whose output output is collected by a `reducer`. Its task is to /// aggregate these outputs into the final result returned by this function with the benefit of not having to be thread-safe. /// Caall `finalize` to finish the computation, once per thread, if there was no error sending results earlier. /// /// * if `thread_limit` is `Some`, the given amount of threads will be used. If `None`, all logical cores will be used. /// * `new_thread_state(thread_number) -> State` produces thread-local state once per thread to be based to `consume` /// * `consume(Item, &mut State) -> Output` produces an output given an input obtained by `input` along with mutable state initially /// created by `new_thread_state(…)`. /// * `finalize(State) -> Output` is called to potentially process remaining work that was placed in `State`. /// * For `reducer`, see the [`Reduce`] trait #[cfg(not(feature = "parallel"))] pub fn in_parallel_with_finalize( input: impl Iterator, _thread_limit: Option, new_thread_state: impl FnOnce(usize) -> S, mut consume: impl FnMut(I, &mut S) -> O, finalize: impl FnOnce(S) -> O + Send + Clone, mut reducer: R, ) -> Result<::Output, ::Error> where R: Reduce, { let mut state = new_thread_state(0); for item in input { drop(reducer.feed(consume(item, &mut state))?); } reducer.feed(finalize(state))?; reducer.finalize() } gix-features-0.36.1/src/progress.rs000064400000000000000000000110611046102023000153160ustar 00000000000000//! Various `prodash` types along with various utilities for comfort. use std::io; #[cfg(feature = "progress-unit-bytes")] pub use bytesize; pub use prodash::{ self, messages::MessageLevel, progress::{ AtomicStep, Discard, DoOrDiscard, Either, Id, Step, StepShared, Task, ThroughputOnDrop, Value, UNKNOWN, }, unit, BoxedDynNestedProgress, Count, DynNestedProgress, DynNestedProgressToNestedProgress, NestedProgress, Progress, Unit, }; /// A stub for the portions of the `bytesize` crate that we use internally in `gitoxide`. #[cfg(not(feature = "progress-unit-bytes"))] pub mod bytesize { /// A stub for the `ByteSize` wrapper. pub struct ByteSize(pub u64); impl std::fmt::Display for ByteSize { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.0.fmt(f) } } } /// A unit for displaying bytes with throughput and progress percentage. #[cfg(feature = "progress-unit-bytes")] pub fn bytes() -> Option { Some(unit::dynamic_and_mode( unit::Bytes, unit::display::Mode::with_throughput().and_percentage(), )) } /// A unit for displaying bytes with throughput and progress percentage. #[cfg(not(feature = "progress-unit-bytes"))] pub fn bytes() -> Option { Some(unit::label_and_mode( "B", unit::display::Mode::with_throughput().and_percentage(), )) } /// A unit for displaying human readable numbers with throughput and progress percentage, and a single decimal place. pub fn count(name: &'static str) -> Option { count_with_decimals(name, 1) } /// A unit for displaying human readable numbers with `name` suffix, /// with throughput and progress percentage, and `decimals` decimal places. #[cfg(feature = "progress-unit-human-numbers")] pub fn count_with_decimals(name: &'static str, decimals: usize) -> Option { Some(unit::dynamic_and_mode( unit::Human::new( { let mut f = unit::human::Formatter::new(); f.with_decimals(decimals); f }, name, ), unit::display::Mode::with_throughput().and_percentage(), )) } /// A unit for displaying human readable numbers with `name` suffix, /// with throughput and progress percentage, and `decimals` decimal places. #[cfg(not(feature = "progress-unit-human-numbers"))] pub fn count_with_decimals(name: &'static str, _decimals: usize) -> Option { Some(unit::label_and_mode( name, unit::display::Mode::with_throughput().and_percentage(), )) } /// A predefined unit for displaying a multi-step progress pub fn steps() -> Option { Some(unit::dynamic(unit::Range::new("steps"))) } /// A structure passing every [`read`](std::io::Read::read()) call through to the contained Progress instance using [`inc_by(bytes_read)`](Count::inc_by()). pub struct Read { /// The implementor of [`std::io::Read`] to which progress is added pub inner: T, /// The progress instance receiving progress information on each invocation of `reader` pub progress: P, } impl io::Read for Read where T: io::Read, P: Progress, { fn read(&mut self, buf: &mut [u8]) -> io::Result { let bytes_read = self.inner.read(buf)?; self.progress.inc_by(bytes_read); Ok(bytes_read) } } impl io::BufRead for Read where T: io::BufRead, P: Progress, { fn fill_buf(&mut self) -> io::Result<&[u8]> { self.inner.fill_buf() } fn consume(&mut self, amt: usize) { self.inner.consume(amt) } } /// A structure passing every [`write`][std::io::Write::write()] call through to the contained Progress instance using [`inc_by(bytes_written)`](Count::inc_by()). /// /// This is particularly useful if the final size of the bytes to write is known or can be estimated precisely enough. pub struct Write { /// The implementor of [`std::io::Write`] to which progress is added pub inner: T, /// The progress instance receiving progress information on each invocation of `reader` pub progress: P, } impl io::Write for Write where T: io::Write, P: Progress, { fn write(&mut self, buf: &[u8]) -> io::Result { let written = self.inner.write(buf)?; self.progress.inc_by(written); Ok(written) } fn flush(&mut self) -> io::Result<()> { self.inner.flush() } } impl io::Seek for Write where T: io::Seek, { fn seek(&mut self, pos: io::SeekFrom) -> io::Result { self.inner.seek(pos) } } gix-features-0.36.1/src/threading.rs000064400000000000000000000105101046102023000154150ustar 00000000000000//! Type definitions for putting shared ownership and synchronized mutation behind the `threading` feature toggle. //! //! That way, single-threaded applications will not have to use thread-safe primitives, and simply do not specify the 'threading' feature. #[cfg(feature = "parallel")] mod _impl { use std::sync::Arc; /// A thread-safe cell which can be written to only once. #[cfg(feature = "once_cell")] pub type OnceCell = once_cell::sync::OnceCell; /// A reference counted pointer type for shared ownership. pub type OwnShared = Arc; /// A synchronization primitive which can start read-only and transition to support mutation. pub type MutableOnDemand = parking_lot::RwLock; /// A synchronization primitive which provides read-write access right away. pub type Mutable = parking_lot::Mutex; /// A guarded reference suitable for safekeeping in a struct. pub type RefGuard<'a, T> = parking_lot::RwLockReadGuard<'a, T>; /// A mapped reference created from a `RefGuard` pub type MappedRefGuard<'a, U> = parking_lot::MappedRwLockReadGuard<'a, U>; /// Get a shared reference through a [`MutableOnDemand`] for read-only access. pub fn get_ref(v: &MutableOnDemand) -> RefGuard<'_, T> { v.read() } /// Get a mutable reference through a [`MutableOnDemand`] for read-write access. pub fn get_mut(v: &MutableOnDemand) -> parking_lot::RwLockWriteGuard<'_, T> { v.write() } /// Get a mutable reference to the underlying data, with semantics similar to [Arc::make_mut()]. pub fn make_mut(this: &mut OwnShared) -> &mut T { OwnShared::make_mut(this) } /// Get a mutable reference through a [`Mutable`] for read-write access. pub fn lock(v: &Mutable) -> parking_lot::MutexGuard<'_, T> { v.lock() } /// Downgrade a handle previously obtained with [`get_mut()`] to drop mutation support. pub fn downgrade_mut_to_ref<'a, T>( v: parking_lot::RwLockWriteGuard<'a, T>, _orig: &'a MutableOnDemand, ) -> RefGuard<'a, T> { parking_lot::RwLockWriteGuard::downgrade(v) } /// Map a read guard into a sub-type it contains. pub fn map_ref(v: RefGuard<'_, T>, f: impl FnOnce(&T) -> &U) -> MappedRefGuard<'_, U> { parking_lot::RwLockReadGuard::map(v, f) } } #[cfg(not(feature = "parallel"))] mod _impl { use std::{ cell::{Ref, RefCell, RefMut}, rc::Rc, }; /// A thread-safe cell which can be written to only once. #[cfg(feature = "once_cell")] pub type OnceCell = once_cell::unsync::OnceCell; /// A reference counted pointer type for shared ownership. pub type OwnShared = Rc; /// A synchronization primitive which can start read-only and transition to support mutation. pub type MutableOnDemand = RefCell; /// A synchronization primitive which provides read-write access right away. pub type Mutable = RefCell; /// A guarded reference suitable for safekeeping in a struct. pub type RefGuard<'a, T> = Ref<'a, T>; /// A mapped reference created from a RefGuard pub type MappedRefGuard<'a, U> = Ref<'a, U>; /// Get a shared reference through a [`MutableOnDemand`] for read-only access. pub fn get_mut(v: &RefCell) -> RefMut<'_, T> { v.borrow_mut() } /// Get a mutable reference to the underlying data, with semantics similar to [Rc::make_mut()]. pub fn make_mut(this: &mut OwnShared) -> &mut T { OwnShared::make_mut(this) } /// Get a mutable reference through a [`Mutable`] for read-write access. pub fn lock(v: &Mutable) -> RefMut<'_, T> { v.borrow_mut() } /// Get a mutable reference through a [`MutableOnDemand`] for read-write access. pub fn get_ref(v: &RefCell) -> RefGuard<'_, T> { v.borrow() } /// Downgrade a handle previously obtained with [`upgrade_ref_to_mut()`] to drop mutation support. pub fn downgrade_mut_to_ref<'a, T>(v: RefMut<'a, T>, orig: &'a RefCell) -> RefGuard<'a, T> { drop(v); orig.borrow() } /// Map a read guard into a sub-type it contains. pub fn map_ref(v: RefGuard<'_, T>, f: impl FnOnce(&T) -> &U) -> MappedRefGuard<'_, U> { Ref::map(v, f) } } pub use _impl::*; gix-features-0.36.1/src/zlib/mod.rs000064400000000000000000000031321046102023000151710ustar 00000000000000pub use flate2::{Decompress, Status}; /// non-streaming interfaces for decompression pub mod inflate { /// The error returned by various [Inflate methods][super::Inflate] #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("Could not write all bytes when decompressing content")] WriteInflated(#[from] std::io::Error), #[error("Could not decode zip stream, status was '{0:?}'")] Inflate(#[from] flate2::DecompressError), #[error("The zlib status indicated an error, status was '{0:?}'")] Status(flate2::Status), } } /// Decompress a few bytes of a zlib stream without allocation pub struct Inflate { /// The actual decompressor doing all the work. pub state: Decompress, } impl Default for Inflate { fn default() -> Self { Inflate { state: Decompress::new(true), } } } impl Inflate { /// Run the decompressor exactly once. Cannot be run multiple times pub fn once(&mut self, input: &[u8], out: &mut [u8]) -> Result<(flate2::Status, usize, usize), inflate::Error> { let before_in = self.state.total_in(); let before_out = self.state.total_out(); let status = self.state.decompress(input, out, flate2::FlushDecompress::None)?; Ok(( status, (self.state.total_in() - before_in) as usize, (self.state.total_out() - before_out) as usize, )) } /// Ready this instance for decoding another data stream. pub fn reset(&mut self) { self.state.reset(true); } } /// pub mod stream; gix-features-0.36.1/src/zlib/stream/deflate/mod.rs000064400000000000000000000067761046102023000201110ustar 00000000000000use flate2::Compress; const BUF_SIZE: usize = 4096 * 8; /// A utility to zlib compress anything that is written via its [Write][std::io::Write] implementation. /// /// Be sure to call `flush()` when done to finalize the deflate stream. pub struct Write { compressor: Compress, inner: W, buf: [u8; BUF_SIZE], } impl Clone for Write where W: Clone, { fn clone(&self) -> Self { Write { compressor: impls::new_compress(), inner: self.inner.clone(), buf: self.buf, } } } mod impls { use std::io; use flate2::{Compress, Compression, FlushCompress, Status}; use crate::zlib::stream::deflate; pub(crate) fn new_compress() -> Compress { Compress::new(Compression::fast(), true) } impl deflate::Write where W: io::Write, { /// Create a new instance writing compressed bytes to `inner`. pub fn new(inner: W) -> deflate::Write { deflate::Write { compressor: new_compress(), inner, buf: [0; deflate::BUF_SIZE], } } /// Reset the compressor, starting a new compression stream. /// /// That way multiple streams can be written to the same inner writer. pub fn reset(&mut self) { self.compressor.reset(); } /// Consume `self` and return the inner writer. pub fn into_inner(self) -> W { self.inner } fn write_inner(&mut self, mut buf: &[u8], flush: FlushCompress) -> io::Result { let total_in_when_start = self.compressor.total_in(); loop { let last_total_in = self.compressor.total_in(); let last_total_out = self.compressor.total_out(); let status = self .compressor .compress(buf, &mut self.buf, flush) .map_err(|err| io::Error::new(io::ErrorKind::Other, err))?; let written = self.compressor.total_out() - last_total_out; if written > 0 { self.inner.write_all(&self.buf[..written as usize])?; } match status { Status::StreamEnd => return Ok((self.compressor.total_in() - total_in_when_start) as usize), Status::Ok | Status::BufError => { let consumed = self.compressor.total_in() - last_total_in; buf = &buf[consumed as usize..]; // output buffer still makes progress if self.compressor.total_out() > last_total_out { continue; } // input still makes progress if self.compressor.total_in() > last_total_in { continue; } // input also makes no progress anymore, need more so leave with what we have return Ok((self.compressor.total_in() - total_in_when_start) as usize); } } } } } impl io::Write for deflate::Write { fn write(&mut self, buf: &[u8]) -> io::Result { self.write_inner(buf, FlushCompress::None) } fn flush(&mut self) -> io::Result<()> { self.write_inner(&[], FlushCompress::Finish).map(|_| ()) } } } #[cfg(test)] mod tests; gix-features-0.36.1/src/zlib/stream/deflate/tests.rs000064400000000000000000000061701046102023000204600ustar 00000000000000mod deflate_stream { use std::{ io, io::{Read, Write}, }; use bstr::ByteSlice; use flate2::Decompress; use crate::zlib::stream::deflate; /// Provide streaming decompression using the `std::io::Read` trait. /// If `std::io::BufReader` is used, an allocation for the input buffer will be performed. struct InflateReader { inner: R, decompressor: Decompress, } impl InflateReader where R: io::BufRead, { pub fn from_read(read: R) -> InflateReader { InflateReader { decompressor: Decompress::new(true), inner: read, } } } impl io::Read for InflateReader where R: io::BufRead, { fn read(&mut self, into: &mut [u8]) -> io::Result { crate::zlib::stream::inflate::read(&mut self.inner, &mut self.decompressor, into) } } #[test] fn small_file_decompress() -> Result<(), Box> { fn fixture_path(path: &str) -> std::path::PathBuf { std::path::PathBuf::from("tests/fixtures").join(path) } let r = InflateReader::from_read(io::BufReader::new(std::fs::File::open(fixture_path( "objects/37/d4e6c5c48ba0d245164c4e10d5f41140cab980", ))?)); let mut bytes = r.bytes(); let content = bytes.by_ref().take(16).collect::, _>>()?; assert_eq!(content.as_slice().as_bstr(), b"blob 9\0hi there\n".as_bstr()); assert!(bytes.next().is_none()); Ok(()) } #[test] fn all_at_once() -> Result<(), Box> { let mut w = deflate::Write::new(Vec::new()); assert_eq!(w.write(b"hello")?, 5); w.flush()?; let out = w.inner; assert!(out.len() == 12 || out.len() == 13); assert_deflate_buffer(out, b"hello") } fn assert_deflate_buffer(out: Vec, expected: &[u8]) -> Result<(), Box> { let mut actual = Vec::new(); InflateReader::from_read(out.as_slice()).read_to_end(&mut actual)?; assert_eq!(actual, expected); Ok(()) } #[test] fn big_file_small_writes() -> Result<(), Box> { let mut w = deflate::Write::new(Vec::new()); let bytes = include_bytes!( "../../../../../gix-odb/tests/fixtures/objects/pack/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack" ); for chunk in bytes.chunks(2) { assert_eq!(w.write(chunk)?, chunk.len()); } w.flush()?; assert_deflate_buffer(w.inner, bytes) } #[test] fn big_file_a_few_big_writes() -> Result<(), Box> { let mut w = deflate::Write::new(Vec::new()); let bytes = include_bytes!( "../../../../../gix-odb/tests/fixtures/objects/pack/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack" ); for chunk in bytes.chunks(4096 * 9) { assert_eq!(w.write(chunk)?, chunk.len()); } w.flush()?; assert_deflate_buffer(w.inner, bytes) } } gix-features-0.36.1/src/zlib/stream/inflate.rs000064400000000000000000000035571046102023000173420ustar 00000000000000use std::{io, io::BufRead}; use flate2::{Decompress, FlushDecompress, Status}; /// Read bytes from `rd` and decompress them using `state` into a pre-allocated fitting buffer `dst`, returning the amount of bytes written. pub fn read(rd: &mut impl BufRead, state: &mut Decompress, mut dst: &mut [u8]) -> io::Result { let mut total_written = 0; loop { let (written, consumed, ret, eof); { let input = rd.fill_buf()?; eof = input.is_empty(); let before_out = state.total_out(); let before_in = state.total_in(); let flush = if eof { FlushDecompress::Finish } else { FlushDecompress::None }; ret = state.decompress(input, dst, flush); written = (state.total_out() - before_out) as usize; total_written += written; dst = &mut dst[written..]; consumed = (state.total_in() - before_in) as usize; } rd.consume(consumed); match ret { // The stream has officially ended, nothing more to do here. Ok(Status::StreamEnd) => return Ok(total_written), // Either input our output are depleted even though the stream is not depleted yet. Ok(Status::Ok | Status::BufError) if eof || dst.is_empty() => return Ok(total_written), // Some progress was made in both the input and the output, it must continue to reach the end. Ok(Status::Ok | Status::BufError) if consumed != 0 || written != 0 => continue, // A strange state, where zlib makes no progress but isn't done either. Call it out. Ok(Status::Ok | Status::BufError) => unreachable!("Definitely a bug somewhere"), Err(..) => return Err(io::Error::new(io::ErrorKind::InvalidInput, "corrupt deflate stream")), } } } gix-features-0.36.1/src/zlib/stream/mod.rs000064400000000000000000000000521046102023000164620ustar 00000000000000/// pub mod deflate; /// pub mod inflate;