bkt-0.6.1/.cargo_vcs_info.json0000644000000001360000000000100116200ustar { "git": { "sha1": "9e0f9efba1f9713f2c303f890b1d276a615b0d09" }, "path_in_vcs": "" }bkt-0.6.1/Cargo.lock0000644000000352270000000000100076040ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "aho-corasick" version = "0.7.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" dependencies = [ "memchr", ] [[package]] name = "anyhow" version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4361135be9122e0870de935d7c439aef945b9f9ddd4199a553b5270b49c82a27" [[package]] name = "bincode" version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" dependencies = [ "serde", ] [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bkt" version = "0.6.1" dependencies = [ "anyhow", "bincode", "clap", "filetime", "humantime", "rand", "regex", "serde", "serde_json", "test_dir", ] [[package]] name = "cc" version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" version = "4.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "046ae530c528f252094e4a77886ee1374437744b2bff1497aa898bbddbbb29b3" dependencies = [ "clap_builder", "clap_derive", "once_cell", ] [[package]] name = "clap_builder" version = "4.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "223163f58c9a40c3b0a43e1c4b50a9ce09f007ea2cb1ec258a687945b4b7929f" dependencies = [ "bitflags", "clap_lex", "terminal_size", ] [[package]] name = "clap_derive" version = "4.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9644cd56d6b87dbe899ef8b053e331c0637664e9e21a33dfcdc36093f5c5c4" dependencies = [ "heck", "proc-macro2", "quote", "syn 2.0.9", ] [[package]] name = "clap_lex" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a2dd5a6fe8c6e3502f568a6353e5273bbb15193ad9a89e457b9970798efbea1" [[package]] name = "errno" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" dependencies = [ "errno-dragonfly", "libc", "windows-sys 0.48.0", ] [[package]] name = "errno-dragonfly" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" dependencies = [ "cc", "libc", ] [[package]] name = "filetime" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "975ccf83d8d9d0d84682850a38c8169027be83368805971cc4f238c2b245bc98" dependencies = [ "cfg-if", "libc", "redox_syscall", "winapi", ] [[package]] name = "getrandom" version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d39cd93900197114fa1fcb7ae84ca742095eed9442088988ae74fa744e930e77" dependencies = [ "cfg-if", "libc", "wasi", ] [[package]] name = "heck" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" [[package]] name = "hermit-abi" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" [[package]] name = "humantime" version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "io-lifetimes" version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c66c74d2ae7e79a5a8f7ac924adbe38ee42a859c6539ad869eb51f0b52dc220" dependencies = [ "hermit-abi", "libc", "windows-sys 0.48.0", ] [[package]] name = "itoa" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" [[package]] name = "libc" version = "0.2.140" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99227334921fae1a979cf0bfdfcc6b3e5ce376ef57e16fb6fb3ea2ed6095f80c" [[package]] name = "linux-raw-sys" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d59d8c75012853d2e872fb56bc8a2e53718e2cafe1a4c823143141c6d90c322f" [[package]] name = "memchr" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" [[package]] name = "once_cell" version = "1.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" [[package]] name = "ppv-lite86" version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" [[package]] name = "proc-macro2" version = "1.0.53" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba466839c78239c09faf015484e5cc04860f88242cff4d03eb038f04b4699b73" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4424af4bf778aae2051a77b60283332f386554255d722233d09fbfc7e30da2fc" dependencies = [ "proc-macro2", ] [[package]] name = "rand" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", "rand_chacha", "rand_core", ] [[package]] name = "rand_chacha" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", "rand_core", ] [[package]] name = "rand_core" version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" dependencies = [ "getrandom", ] [[package]] name = "redox_syscall" version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8380fe0152551244f0747b1bf41737e0f8a74f97a14ccefd1148187271634f3c" dependencies = [ "bitflags", ] [[package]] name = "regex" version = "1.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b1f693b24f6ac912f4893ef08244d70b6067480d2f1a46e950c9691e6749d1d" dependencies = [ "aho-corasick", "memchr", "regex-syntax", ] [[package]] name = "regex-syntax" version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "rustix" version = "0.37.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2aae838e49b3d63e9274e1c01833cc8139d3fec468c3b84688c628f44b1ae11d" dependencies = [ "bitflags", "errno", "io-lifetimes", "libc", "linux-raw-sys", "windows-sys 0.45.0", ] [[package]] name = "ryu" version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f" [[package]] name = "serde" version = "1.0.136" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.136" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9" dependencies = [ "proc-macro2", "quote", "syn 1.0.86", ] [[package]] name = "serde_json" version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95" dependencies = [ "itoa", "ryu", "serde", ] [[package]] name = "syn" version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a65b3f4ffa0092e9887669db0eae07941f023991ab58ea44da8fe8e2d511c6b" dependencies = [ "proc-macro2", "quote", "unicode-xid", ] [[package]] name = "syn" version = "2.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0da4a3c17e109f700685ec577c0f85efd9b19bcf15c913985f14dc1ac01775aa" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "terminal_size" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e6bf6f19e9f8ed8d4048dc22981458ebcf406d67e94cd422e5ecd73d63b3237" dependencies = [ "rustix", "windows-sys 0.48.0", ] [[package]] name = "test_dir" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fc19daf9fc57fadcf740c4abaaa0cd08d9ce22a2a0629aaf6cbd9ae4b80683a" dependencies = [ "rand", ] [[package]] name = "unicode-ident" version = "1.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5464a87b239f13a63a501f2701565754bae92d243d4bb7eb12f6d57d2269bf4" [[package]] name = "unicode-xid" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" [[package]] name = "wasi" version = "0.10.2+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" [[package]] name = "winapi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] name = "windows-sys" version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" dependencies = [ "windows-targets 0.42.2", ] [[package]] name = "windows-sys" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ "windows-targets 0.48.0", ] [[package]] name = "windows-targets" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" dependencies = [ "windows_aarch64_gnullvm 0.42.2", "windows_aarch64_msvc 0.42.2", "windows_i686_gnu 0.42.2", "windows_i686_msvc 0.42.2", "windows_x86_64_gnu 0.42.2", "windows_x86_64_gnullvm 0.42.2", "windows_x86_64_msvc 0.42.2", ] [[package]] name = "windows-targets" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" dependencies = [ "windows_aarch64_gnullvm 0.48.0", "windows_aarch64_msvc 0.48.0", "windows_i686_gnu 0.48.0", "windows_i686_msvc 0.48.0", "windows_x86_64_gnu 0.48.0", "windows_x86_64_gnullvm 0.48.0", "windows_x86_64_msvc 0.48.0", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "597a5118570b68bc08d8d59125332c54f1ba9d9adeedeef5b99b02ba2b0698f8" [[package]] name = "windows_aarch64_gnullvm" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" [[package]] name = "windows_aarch64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e08e8864a60f06ef0d0ff4ba04124db8b0fb3be5776a5cd47641e942e58c4d43" [[package]] name = "windows_aarch64_msvc" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" [[package]] name = "windows_i686_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c61d927d8da41da96a81f029489353e68739737d3beca43145c8afec9a31a84f" [[package]] name = "windows_i686_gnu" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" [[package]] name = "windows_i686_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "44d840b6ec649f480a41c8d80f9c65108b92d89345dd94027bfe06ac444d1060" [[package]] name = "windows_i686_msvc" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" [[package]] name = "windows_x86_64_gnu" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8de912b8b8feb55c064867cf047dda097f92d51efad5b491dfb98f6bbb70cb36" [[package]] name = "windows_x86_64_gnu" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" [[package]] name = "windows_x86_64_gnullvm" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26d41b46a36d453748aedef1486d5c7a85db22e56aff34643984ea85514e94a3" [[package]] name = "windows_x86_64_gnullvm" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" [[package]] name = "windows_x86_64_msvc" version = "0.42.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9aec5da331524158c6d1a4ac0ab1541149c0b9505fde06423b02f5ef0106b9f0" [[package]] name = "windows_x86_64_msvc" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" bkt-0.6.1/Cargo.toml0000644000000031040000000000100076140ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "bkt" version = "0.6.1" authors = ["Michael Diamond "] include = [ "**/*.rs", "Cargo.*", "README.md", "LICENSE", ] description = "CLI and Rust library for caching subprocess invocations" homepage = "http://bkt.rs/" readme = "README.md" keywords = [ "cache", "caching", "subprocess", "cli", "shell", ] categories = [ "caching", "command-line-utilities", ] license = "MIT" repository = "http://github.com/dimo414/bkt" [dependencies.anyhow] version = "1.0" [dependencies.bincode] version = "1.3.1" [dependencies.clap] version = "4.2" features = [ "derive", "env", "error-context", "help", "std", "usage", "wrap_help", ] default-features = false [dependencies.humantime] version = "2.1.0" [dependencies.rand] version = "0.8" [dependencies.serde] version = "1.0" features = ["derive"] [dependencies.serde_json] version = "1.0" optional = true [dev-dependencies.filetime] version = "0.2" [dev-dependencies.regex] version = "1.7" [dev-dependencies.test_dir] version = "0.2" [features] debug = ["serde_json"] bkt-0.6.1/Cargo.toml.orig000075500000000000000000000022171046102023000133040ustar 00000000000000[package] name = "bkt" # When updating the version number tag the commit in git # https://rust-lang.github.io/api-guidelines/documentation.html#release-notes-document-all-significant-changes-c-relnotes version = "0.6.1" authors = ["Michael Diamond "] description = "CLI and Rust library for caching subprocess invocations" repository = "http://github.com/dimo414/bkt" homepage = "http://bkt.rs/" license = "MIT" keywords = ["cache", "caching", "subprocess", "cli", "shell"] categories = ["caching", "command-line-utilities"] edition = "2018" include = [ "**/*.rs", "Cargo.*", "README.md", "LICENSE", ] [features] # Uses JSON to cache the command and output in a human-readable format, to aid debugging. debug = ['serde_json'] [dependencies] anyhow = "1.0" bincode = "1.3.1" humantime = "2.1.0" rand = "0.8" serde = { version = "1.0", features = ["derive"] } [dependencies.clap] version = "4.2" default-features = false features = ["derive", "env", "error-context", "help", "std", "usage", "wrap_help"] [dependencies.serde_json] optional = true version = "1.0" [dev-dependencies] filetime = "0.2" regex = "1.7" test_dir = "0.2" bkt-0.6.1/LICENSE000075500000000000000000000020601046102023000114160ustar 00000000000000MIT License Copyright (c) 2021 Michael Diamond Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. bkt-0.6.1/README.md000075500000000000000000000275141046102023000117030ustar 00000000000000# `bkt` [![releases](https://img.shields.io/github/v/release/dimo414/bkt?sort=semver&logo=github)](https://github.com/dimo414/bkt/releases) [![crates.io](https://img.shields.io/crates/v/bkt?logo=rust)](https://crates.io/crates/bkt) [![docs.rs](https://img.shields.io/docsrs/bkt?label=docs.rs)](https://docs.rs/bkt) [![build status](https://img.shields.io/github/actions/workflow/status/dimo414/bkt/rust.yml?branch=master)](https://github.com/dimo414/bkt/actions) [![issues](https://img.shields.io/github/issues/dimo414/bkt)](https://github.com/dimo414/bkt/issues) [![license](https://img.shields.io/github/license/dimo414/bkt)](https://github.com/dimo414/bkt/blob/master/LICENSE) `bkt` (pronounced "bucket") is a subprocess caching utility written in Rust, inspired by [bash-cache](https://github.com/dimo414/bash-cache). Wrapping expensive process invocations with `bkt` allows callers to reuse recent invocations without complicating their application logic. This can be useful in shell prompts, interactive applications such as [`fzf`](#fzf), and long-running programs that poll other processes. `bkt` is available as a standalone binary as well as a [Rust library](https://crates.io/crates/bkt). See https://docs.rs/bkt/ for library documentation. This README covers the `bkt` binary. ## Installation Run `cargo install bkt` to compile and install `bkt` locally. You will need to [install `cargo`](https://doc.rust-lang.org/cargo/getting-started/installation.html) if it's not already on your system. Pre-compiled binaries for common platforms are attached to each [release](https://github.com/dimo414/bkt/releases) (starting with 0.5). Please open an issue or send a PR if you would like releases to include binaries for additional platforms. Package manager support is being tracked [here](https://github.com/dimo414/bkt/issues/12); volunteers are welcome. [![Packaging status](https://repology.org/badge/vertical-allrepos/bkt.svg)](https://repology.org/project/bkt/versions) ## Usage ``` bkt [--ttl=DURATION] [--stale=DURATION] [--cwd] [--env=ENV ...] [--modtime=FILE ...] [--scope=SCOPE] [--discard-failures] [--warm|--force] -- ... ``` The easiest way to use `bkt` is to simply prefix the command you intend to cache with `bkt --`, for example: ```shell # Execute and cache an invocation of 'date +%s.%N' $ bkt -- date +%s.%N 1631992417.080884000 # A subsequent invocation reuses the same cached output $ bkt -- date +%s.%N 1631992417.080884000 ``` When `bkt` is passed a command it hasn't seen before (or recently) it executes the command synchronously and caches its stdout, stderr, and exit code. Calling `bkt` again with the same command reads the data from the cache and outputs it as if the command had been run again. ### Cache Lifespan Two flags, `--ttl` and `--stale`, configure how long cached data is preserved. By default `bkt` uses a TTL (Time to Live) of 60 seconds, meaning cached data older than sixty seconds will be discarded and the backing command re-run. Passing a different value, such as `--ttl=1d`, will change how long the cached data is considered valid. The default TTL can be overriden by defining a `BKT_TTL` environment variable. When the data expires `bkt` has to re-execute the command synchronously, which can introduce unexpected slowness. To avoid this, pass `--stale` with a shorter duration than the TTL. When the cached data is older than the stale threshold this causes `bkt` to refresh the cache in the background while still promptly returning the cached data. Both flags (and `BKT_TTL`) accept duration strings such as `10s` or `1hour 30min`. The exact syntax is defined in the [humantime](https://docs.rs/humantime/2.1.0/humantime/fn.parse_duration.html) library. ### Execution Environment Some commands' behavior depends on more than just the command line arguments. It's possible to adjust how `bkt` caches such commands so that unrelated invocations are cached separately. #### Working Directory For example, attempting to cache `pwd` will not work as expected by default: ```shell $ $ bkt -- pwd /tmp/foo $ cd ../bar # Cached output for 'pwd' is reused even though the directory has changed $ bkt -- pwd /tmp/foo ``` To have `bkt` key off the current working directory in addition to the command line arguments pass `--cwd`: ```shell $ bkt --cwd -- pwd /tmp/foo $ cd ../bar $ bkt --cwd -- pwd /tmp/bar ``` #### Environment Variables Similarly, to specify one or more environment variables as relevant for the command being cached use `--env`, such as `--env=LANG`. This flag can be provided multiple times to key off additional variables. Invocations with different values for any of the given variables will be cached separately. #### File Modifications It is also possible to have `bkt` check the last-modified time of one or more files and include this in the cache key using `--modtime`. For instance passing `--modtime=/etc/passwd` would cause the backing command to be re-executed any time `/etc/passwd` is modified. ### Refreshing Manually It's also possible to trigger refreshes manually using `--force` or `--warm`. The former behaves exactly as if the cached data was not found, executing the process and caching the result. This is useful if you know the cached data is no longer up-to-date, e.g. because something external changed. Alternatively, it can be useful to refresh the cache asynchronously, which `--warm` provides. This triggers a refresh in the background but immediately ends the current process with no output. This is useful if you expect additional invocations in the near future and want to ensure they get a cache hit. Note that until the warming process completes concurrent calls may still see a cache miss and trigger their own invocation. ### Setting a Cache Scope Cached data is persisted to disk (but see [below](#cache_dir)), and is available to any process that invokes `bkt`. Generally this is desirable, but certain usages may want to isolate their invocations from other potential concurrent calls. To do so pass `--scope=...` with a sufficiently unique argument, such as a fixed label for the calling program, the current process ID, or a timestamp. ```shell $ bkt -- date +%s.%N 1631992417.080884000 # Changing the scope causes the command to be cached separately $ bkt --scope=foo -- date +%s.%N 1631992418.010562000 ``` Alternatively, define a `BKT_SCOPE` environment variable to configure a consistent scope across invocations. This can be useful within a script to ensure all commands share a scope. ```shell #!/bin/bash # Set a unique scope for this script invocation using the PID and current time export BKT_SCOPE="my_script_$$_$(date -Ins)" ``` ### Discarding Failed Invocations By default, all invocations are cached regardless of their output or exit code. In situations where failures should not be cached pass `--discard-failures` to only persist successful invocations (those that return a `0` exit code). **WARNING:** Passing this flag can cause the backing command to be invoked more frequently than the `--ttl` would suggest, which in turn can create unexpected load. If the backing command is failing due to an outage or bug (such as an overloaded website) triggering additional calls can exacerbate the issue and effectively DDoS the hampered system. It is generally safer *not* to set this flag and instead make the client robust to occasional failures. ### Changing the Cache Directory By default, cached data is stored under `/tmp` or a similar temporary directory; this can be customized via the `--cache-dir` flag or by defining a `BKT_CACHE_DIR` environment variable. If a `BKT_TMPDIR` environment variable is defined it wil be used instead of the system's temporary directory. Although `BKT_TMPDIR` and `BKT_CACHE_DIR` have similar effects `BKT_TMPDIR` is intended to be used to configure the global cache location (e.g. by declaring it in your `.bashrc` or similar), while `--cache-dir`/`BKT_CACHE_DIR` should be used to customize the cache location for a given set of invocations that shouldn't use the default cache directory. Note that the choice of directory can affect `bkt`'s performance: if the cache is stored under a [`tmpfs`](https://en.wikipedia.org/wiki/Tmpfs) or solid-state partition it will be significantly faster than caching to a spinning disk. ## Security and Privacy The default cache directory is potentially world-readable. On Unix the cache directory is created with `700` permissions, meaning only the current user can access it, but this is not foolproof. You can customize the cache directory (see [above](#cache_dir)) to a location you trust such as `~/.bkt`, but note that your home directory may be slower than the temporary directory selected by default. In general, if you are not the only user of your system it's wise to configure your `TMPDIR` to a location only you can access. If that is not possible use `BKT_TMPDIR` to configure a custom temporary directory specifically for `bkt`. ## Patterns and Tips **Please share how you're using `bkt` on the [Discussion Board](https://github.com/dimo414/bkt/discussions/categories/show-and-tell)!** ### Speeding up `fzf` and other preview tools `bkt` works well with interactive tools like [`fzf`](https://github.com/junegunn/fzf) that execute other commands. Because `fzf` executes the `--preview` command every time an element is selected it can be slow and tedious to browse when the command takes a long time to run. Using `bkt` allows each selection's preview to be cached. Compare: ```shell $ printf '%s\n' 1 0.2 3 0.1 5 | \ fzf --preview="bash -c 'sleep {}; echo {}'" $ printf '%s\n' 1 0.2 3 0.1 5 | \ fzf --preview="bkt --ttl=10m --stale=10s -- bash -c 'sleep {}; echo {}'" ``` You'll generally want to use a long TTL and a short stale duration so that even if you leave `fzf` running for a while the cache remains warm and is refreshed in the background. You may also want to set a `--scope` if it's important to invalidate the cache on subsequent invocations. Note: one downside to using `bkt` is, currently, `bkt` doesn't [stream](https://github.com/junegunn/fzf/pull/2215) the backing process' output. This means when `bkt` has a cache miss the preview will be absent until the process completes, even if partial output could be displayed sooner. ### Using `bkt` only if installed You may want to distribute shell scripts that utilize `bkt` without requiring every user also install `bkt`. By wrapping `bkt` in a shell function your script can cleanly invoke `bkt` if available without complicating your users' workflow. Of course if they choose to install `bkt` they'll get a faster script as a result! ``` # Cache commands using bkt if installed if command -v bkt >&/dev/null; then bkt() { command bkt "$@"; } else # If bkt isn't installed skip its arguments and just execute directly. # Optionally write a msg to stderr suggesting users install bkt. bkt() { while [[ "$1" == --* ]]; do shift; done "$@" } fi # Now you can call bkt (the function) just like you'd call bkt (the binary): bkt -- expensive_cmd ... ``` ### Decorating commands with `bkt` in shell scripts It is sometimes helpful to cache _all_ invocations of a command in a shell script or in your shell environment. You can use a decorator function pattern similar to what bash-cache does to enable caching transparently, like so: ```shell # This is Bash syntax, but other shells support similar syntax expensive_cmd() { bkt [bkt args ...] -- expensive_cmd "$@" } ``` Calls to `expensive_cmd` in your shell will now go through `bkt` behind the scenes. This can be useful for brevity and consistency but obviously changing behavior like this is a double-edged-sword, so use with caution. Should you need to bypass the cache for a single invocation Bash provides the [`command` builtin](https://www.gnu.org/software/bash/manual/html_node/Bash-Builtins.html#index-command), so `command expensive_cmd ...` will invoke `expensive_cmd` directly. Other shells provide similar features. bkt-0.6.1/src/lib.rs000075500000000000000000001602111046102023000123170ustar 00000000000000//! `bkt` (pronounced "bucket") is a library for caching subprocess executions. It enables reuse of //! expensive invocations across separate processes and supports synchronous and asynchronous //! refreshing, TTLs, and other functionality. `bkt` is also a standalone binary for use by shell //! scripts and other languages, see for binary details. //! //! ```no_run //! # fn do_something(_: &str) {} //! # fn main() -> anyhow::Result<()> { //! # use std::time::Duration; //! let bkt = bkt::Bkt::in_tmp()?; //! let expensive_cmd = bkt::CommandDesc::new(["wget", "http://example.com"]); //! let (result, age) = bkt.retrieve(&expensive_cmd, Duration::from_secs(3600))?; //! do_something(result.stdout_utf8()); //! # Ok(()) } //! ``` #![warn(missing_docs)] use std::collections::{BTreeMap, BTreeSet}; use std::convert::{TryFrom, TryInto}; use std::ffi::{OsString, OsStr}; use std::fs::{File, OpenOptions}; use std::hash::{Hash, Hasher}; use std::io::{self, BufReader, ErrorKind, BufWriter, Write}; use std::path::{PathBuf, Path}; use std::time::{Duration, Instant, SystemTime}; use anyhow::{Context, Error, Result}; use serde::{Serialize, Deserialize}; use serde::de::DeserializeOwned; #[cfg(feature="debug")] macro_rules! debug_msg { ($($arg:tt)*) => { eprintln!("bkt: {}", format!($($arg)*)) } } #[cfg(not(feature="debug"))] macro_rules! debug_msg { ($($arg:tt)*) => { } } /// Returns the modtime of the given path. Returns Ok(None) if the file is not found, and /// otherwise returns an error if the modtime cannot be determined. fn modtime(path: &Path) -> Result> { let metadata = std::fs::metadata(path); match metadata { Ok(metadata) => { Ok(Some(metadata.modified().context("Modtime is not supported")?)) }, Err(ref err) => { match err.kind() { ErrorKind::NotFound => Ok(None), _ => { metadata?; unreachable!() }, } } } } /// A stateless description of a command to be executed and cached. It consists of a command line /// invocation and additional metadata about how the command should be cached which are configured /// via the `with_*` methods. Instances can be persisted and reused. /// /// Calling any of these methods changes how the invocation's cache key will be constructed, /// therefore two invocations with different metadata configured will be cached separately. This /// allows - for example - commands that interact with the current working directory to be cached /// dependent on the working directory even if the command line arguments are equal. /// /// # Examples /// /// ``` /// let cmd = bkt::CommandDesc::new(["echo", "Hello World!"]); /// let with_cwd = bkt::CommandDesc::new(["ls"]).with_cwd(); /// let with_env = bkt::CommandDesc::new(["date"]).with_env("TZ"); /// ``` #[derive(Clone, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)] pub struct CommandDesc { args: Vec, use_cwd: bool, envs: BTreeSet, mod_files: BTreeSet, persist_failures: bool, } impl CommandDesc { /// Constructs a CommandDesc instance for the given command line. /// /// ``` /// let cmd = bkt::CommandDesc::new(["echo", "Hello World!"]); /// ``` pub fn new(command: I) -> Self where I: IntoIterator, S: Into { let ret = CommandDesc { args: command.into_iter().map(Into::into).collect(), use_cwd: false, envs: BTreeSet::new(), mod_files: BTreeSet::new(), persist_failures: true, }; assert!(!ret.args.is_empty(), "Command cannot be empty"); ret } /// Specifies that the current process' working directory should be included in the cache key. /// Commands that depend on the working directory (e.g. `ls` or `git status`) should call this /// in order to cache executions in different working directories separately. /// /// # Examples /// /// ``` /// let cmd = bkt::CommandDesc::new(["pwd"]).with_cwd(); /// ``` pub fn with_cwd(mut self) -> Self { self.use_cwd = true; self } /// Specifies that the given environment variable should be included in the cache key. Commands /// that depend on the values of certain environment variables (e.g. `LANG`, `PATH`, or `TZ`) /// should call this in order to cache such executions separately. Although it's possible to /// pass `PWD` here calling [`with_cwd()`] is generally recommended instead for clarity and /// consistency with subprocesses that don't read this environment variable. /// /// Note: If the given variable name is not found in the current process' environment at /// execution time the variable is _not_ included in the cache key, and the execution will be /// cached as if the environment variable had not been specified at all. /// /// [`with_cwd()`]: CommandDesc::with_cwd /// /// # Examples /// /// ``` /// let cmd = bkt::CommandDesc::new(["date"]).with_env("TZ"); /// ``` pub fn with_env(mut self, key: K) -> Self where K: AsRef { self.envs.insert(key.as_ref().into()); self } /// Specifies that the given environment variables should be included in the cache key. Commands /// that depend on the values of certain environment variables (e.g. `LANG`, `PATH`, or `TZ`) /// should call this in order to cache such executions separately. Although it's possible to /// pass `PWD` here calling [`with_cwd()`] is generally recommended instead for clarity and /// consistency with subprocesses that don't read this environment variable. /// /// Note: If a given variable name is not found in the current process' environment at execution /// time that variable is _not_ included in the cache key, and the execution will be cached as /// if the environment variable had not been specified at all. /// /// [`with_cwd()`]: CommandDesc::with_cwd /// /// # Examples /// /// ``` /// let cmd = bkt::CommandDesc::new(["date"]).with_envs(["LANG", "TZ"]); /// ``` pub fn with_envs(mut self, envs: I) -> Self where I: IntoIterator, E: AsRef, { self.envs.extend(envs.into_iter().map(|e| e.as_ref().into())); self } /// Specifies that the modification time of the given file should be included in the cache key, /// causing cached commands to be invalidated if the file is modified in the future. Commands /// that depend on the contents of certain files should call this in order to invalidate the /// cache when the file changes. /// /// It is recommended to pass absolute paths when this is used along with [`with_cwd()`] or /// [`CommandState::with_working_dir()`] to avoid any ambiguity in how relative paths are /// resolved. /// /// Note: If the given path is not found at execution time the file is _not_ included in the /// cache key, and the execution will be cached as if the file had not been specified at all. /// /// [`with_cwd()`]: CommandDesc::with_cwd /// /// # Examples /// /// ``` /// let cmd = bkt::CommandDesc::new(["..."]).with_modtime("/etc/passwd"); /// ``` pub fn with_modtime

(mut self, file: P) -> Self where P: AsRef { self.mod_files.insert(file.as_ref().into()); self } /// Specifies that the modification time of the given files should be included in the cache key, /// causing cached commands to be invalidated if the files are modified in the future. Commands /// that depend on the contents of certain files should call this in order to invalidate the /// cache when the files change. /// /// It is recommended to pass absolute paths when this is used along with [`with_cwd()`] or /// [`CommandState::with_working_dir()`] to avoid any ambiguity in how relative paths are /// resolved. /// /// Note: If a given path is not found at execution time that file is _not_ included in the /// cache key, and the execution will be cached as if the file had not been specified at all. /// /// [`with_cwd()`]: CommandDesc::with_cwd /// /// # Examples /// /// ``` /// let cmd = bkt::CommandDesc::new(["..."]).with_modtimes(["/etc/passwd", "/etc/group"]); /// ``` pub fn with_modtimes(mut self, files: I) -> Self where I: IntoIterator, P: AsRef, { self.mod_files.extend(files.into_iter().map(|f| f.as_ref().into())); self } /// Specifies this command should only be cached if it succeeds - i.e. it returns a zero exit /// code. Commands that return a non-zero exit code will not be cached, and therefore will be /// rerun on each invocation (until they succeed). /// /// **WARNING:** use this option with caution. Discarding invocations that fail can overload /// downstream resources that were protected by the caching layer limiting QPS. For example, /// if a website is rejecting a fraction of requests to shed load and then clients start /// sending _more_ requests when their attempts fail the website could be taken down outright by /// the added load. In other words, using this option can lead to accidental DDoSes. /// /// ``` /// let cmd = bkt::CommandDesc::new(["grep", "foo", "/var/log/syslog"]).with_discard_failures(true); /// ``` pub fn with_discard_failures(mut self, discard_failures: bool) -> Self { // Invert the boolean so it's not a double negative at usage sites self.persist_failures = !discard_failures; self } /// Constructs a [`CommandState`] instance, capturing application state that will be used in the /// cache key, such as the current working directory and any specified environment variables. /// The `CommandState` can also be further customized to change how the subprocess is invoked. /// /// Most callers should be able to pass a `CommandDesc` directly to a [`Bkt`] instance without /// needing to construct a separate `CommandState` first. /// /// Example: /// /// ```no_run /// # fn main() -> anyhow::Result<()> { /// # use std::time::Duration; /// let bkt = bkt::Bkt::in_tmp()?; /// let cmd = bkt::CommandDesc::new(["foo", "bar"]).capture_state()?.with_env("FOO", "BAR"); /// let (result, age) = bkt.retrieve(cmd, Duration::from_secs(3600))?; /// # Ok(()) } /// ``` pub fn capture_state(&self) -> Result { let cwd = if self.use_cwd { Some(std::env::current_dir()?) } else { None }; let envs = self.envs.iter() .flat_map(|e| std::env::var_os(e).map(|v| (e.clone(), v))) .collect(); let modtimes = self.mod_files.iter() .map(|f| modtime(f).map(|m| (f, m))) .collect::>>()?.into_iter() .flat_map(|(f, m)| m.map(|m| (f.clone(), m))) .collect(); let state = CommandState { args: self.args.clone(), cwd, envs, modtimes, persist_failures: self.persist_failures }; debug_msg!("state: {}", state.debug_info()); Ok(state) } } /// The stateful sibling of [`CommandDesc`] which represents a command to be executed and cached /// along with environment state (e.g. the current working directory) at the time the `CommandState` /// instance is constructed. It consists of a command line invocation and application state /// determining how the command should be cached and executed. Additional `with_*` methods are /// provided on this type for further modifying how the subprocess will be executed. /// /// Calling any of these methods changes how the invocation's cache key will be constructed, /// therefore two invocations with different configured state will be cached separately, in the same /// manner as the `with_*` methods on `CommandDesc`. /// /// # Examples /// /// ``` /// # fn main() -> anyhow::Result<()> { /// let cmd = bkt::CommandDesc::new(["echo", "Hello World!"]).capture_state(); /// let with_custom_wd = bkt::CommandDesc::new(["ls"]).capture_state()?.with_working_dir("/"); /// let with_env = bkt::CommandDesc::new(["date"]).capture_state()?.with_env("TZ", "UTC"); /// # Ok(()) } /// ``` #[derive(Clone, Debug, Eq, PartialEq, Hash, Serialize, Deserialize)] pub struct CommandState { // TODO Borrow> or Cow> might be better, need to validate // serialization. Or maybe just make it &Vec and add a lifetime to CommandState? args: Vec, cwd: Option, envs: BTreeMap, modtimes: BTreeMap, persist_failures: bool, } impl CommandState { /// Sets the working directory the command should be run from, and causes this working directory /// to be included in the cache key. If unset the working directory will be inherited from the /// current process' and will _not_ be used to differentiate invocations in separate working /// directories. /// /// ``` /// # fn main() -> anyhow::Result<()> { /// let cmd = bkt::CommandDesc::new(["pwd"]); /// let state = cmd.capture_state()?.with_working_dir("/tmp"); /// # Ok(()) } /// ``` pub fn with_working_dir>(mut self, cwd: P) -> Self { self.cwd = Some(cwd.as_ref().into()); self } /// Adds the given key/value pair to the environment the command should be run from, and causes /// this pair to be included in the cache key. /// /// ``` /// # fn main() -> anyhow::Result<()> { /// let cmd = bkt::CommandDesc::new(["pwd"]); /// let state = cmd.capture_state()?.with_env("FOO", "bar"); /// # Ok(()) } /// ``` pub fn with_env(mut self, key: K, value: V) -> Self where K: AsRef, V: AsRef { self.envs.insert(key.as_ref().into(), value.as_ref().into()); self } /// Adds the given key/value pairs to the environment the command should be run from, and causes /// these pair to be included in the cache key. /// /// ``` /// # fn main() -> anyhow::Result<()> { /// use std::env; /// use std::collections::HashMap; /// /// let important_envs : HashMap = /// env::vars().filter(|&(ref k, _)| /// k == "TERM" || k == "TZ" || k == "LANG" || k == "PATH" /// ).collect(); /// let cmd = bkt::CommandDesc::new(["..."]); /// let state = cmd.capture_state()?.with_envs(&important_envs); /// # Ok(()) } /// ``` pub fn with_envs(mut self, envs: I) -> Self where I: IntoIterator, K: AsRef, V: AsRef, { for (ref key, ref val) in envs { self.envs.insert(key.as_ref().into(), val.as_ref().into()); } self } /// Format's the CommandState's metadata (information read from the system rather than provided /// by the caller) for diagnostic purposes. #[cfg(feature="debug")] fn debug_info(&self) -> String { fn to_timestamp(time: &SystemTime) -> u128 { time.duration_since(SystemTime::UNIX_EPOCH).expect("Precedes epoch").as_micros() } let mut parts = Vec::new(); if let Some(ref cwd) = self.cwd { parts.push(format!("cwd:{}", cwd.to_string_lossy())); } if !self.envs.is_empty() { parts.push(self.envs.iter() .map(|(k, v)| format!("{}={}", k.to_string_lossy(), v.to_string_lossy())) .collect::>().join(",")); } if !self.modtimes.is_empty() { parts.push(self.modtimes.iter() .map(|(p, m)| format!("{}:{}", p.to_string_lossy(), to_timestamp(m))) .collect::>().join(" ")); } parts.join(" | ") } } impl TryFrom<&CommandDesc> for CommandState { type Error = anyhow::Error; fn try_from(desc: &CommandDesc) -> Result { desc.capture_state() } } impl From<&CommandState> for std::process::Command { fn from(cmd: &CommandState) -> Self { let mut command = std::process::Command::new(&cmd.args[0]); command.args(&cmd.args[1..]); if let Some(cwd) = &cmd.cwd { command.current_dir(cwd); } if !cmd.envs.is_empty() { command.envs(&cmd.envs); } command } } impl CacheKey for CommandState { fn debug_label(&self) -> Option { Some(self.args.iter() .map(|a| a.to_string_lossy()).collect::>().join("-") .chars() .map(|c| if c.is_whitespace() { '_' } else { c }) .filter(|&c| c.is_alphanumeric() || c == '-' || c == '_') .take(100).collect()) } } #[cfg(test)] mod cmd_tests { use super::*; #[test] fn debug_label() { let cmd = CommandDesc::new(["foo", "bar", "b&r _- a"]); assert_eq!(CommandState::try_from(&cmd).unwrap().debug_label(), Some("foo-bar-br__-_a".into())); } #[test] fn collisions() { std::env::set_var("FOO", "BAR"); let commands = [ CommandDesc::new(["foo"]), CommandDesc::new(["foo", "bar"]), CommandDesc::new(["foo", "b", "ar"]), CommandDesc::new(["foo", "b ar"]), CommandDesc::new(["foo"]).with_cwd(), CommandDesc::new(["foo"]).with_env("FOO"), CommandDesc::new(["foo"]).with_cwd().with_env("FOO"), ]; // https://old.reddit.com/r/rust/comments/2koptu/best_way_to_visit_all_pairs_in_a_vec/clnhxr5/ let mut iter = commands.iter(); for a in &commands { iter.next(); for b in iter.clone() { assert_ne!( CommandState::try_from(a).unwrap().cache_key(), CommandState::try_from(b).unwrap().cache_key(), "{:?} and {:?} have equivalent hashes", a, b); } } } } /// The outputs of a cached invocation of a [`CommandDesc`], akin to [`std::process::Output`]. #[derive(Serialize, Deserialize, Debug, Eq, PartialEq)] pub struct Invocation { stdout: Vec, stderr: Vec, exit_code: i32, runtime: Duration, } impl Invocation { /// The data that the process wrote to stdout. pub fn stdout(&self) -> &[u8] { &self.stdout } /// Helper to view stdout as a UTF-8 string. Use [`from_utf8`](std::str::from_utf8) directly if /// you need to handle output that may not be UTF-8. pub fn stdout_utf8(&self) -> &str { std::str::from_utf8(&self.stdout).expect("stdout not valid UTF-8") } /// The data that the process wrote to stderr. pub fn stderr(&self) -> &[u8] { &self.stderr } /// Helper to view stderr as a UTF-8 string. Use [`from_utf8`](std::str::from_utf8) directly if /// you need to handle output that may not be UTF-8. pub fn stderr_utf8(&self) -> &str { std::str::from_utf8(&self.stderr).expect("stderr not valid UTF-8") } /// The exit code of the program, or 126 if the program terminated without an exit status. /// See [`ExitStatus::code()`](std::process::ExitStatus::code()). This is subject to change to /// better support other termination states. pub fn exit_code(&self) -> i32 { self.exit_code } /// The time the process took to complete. pub fn runtime(&self) -> Duration { self.runtime } } /// A file-lock mechanism that holds a lock by atomically creating a file in the given directory, /// and deleting the file upon being dropped. Callers should beware that dropping is not guaranteed /// (e.g. if the program panics). When a conflicting lock file is found its age (mtime) is checked /// to detect stale locks leaked by a separate process that failed to properly drop its lock. #[derive(Debug)] struct FileLock { lock_file: PathBuf, } impl FileLock { fn try_acquire>(lock_dir: P, name: &str, consider_stale: Duration) -> Result> { let lock_file = lock_dir.as_ref().join(name).with_extension("lock"); match OpenOptions::new().create_new(true).write(true).open(&lock_file) { Ok(mut lock) => { write!(lock, "{}", std::process::id())?; Ok(Some(FileLock{ lock_file })) }, Err(io) => { match io.kind() { ErrorKind::AlreadyExists => { if let Ok(lock_metadata) = std::fs::metadata(&lock_file) { if let Ok(age) = lock_metadata.modified()?.elapsed() { if age > consider_stale { return Err(Error::msg(format!( "Lock {} held by PID {} appears stale and may need to be deleted manually.", lock_file.display(), std::fs::read_to_string(&lock_file).unwrap_or_else(|_| "unknown".into())))); } } } Ok(None) }, _ => { Err(Error::new(io)) } } }, } } } impl Drop for FileLock { fn drop(&mut self) { if let Err(e) = std::fs::remove_file(&self.lock_file) { eprintln!("Failed to delete lockfile {}, may need to be deleted manually. Reason: {:?}", self.lock_file.display(), e); } } } #[cfg(test)] mod file_lock_tests { use super::*; use test_dir::{TestDir, DirBuilder}; #[test] fn locks() { let dir = TestDir::temp(); let lock = FileLock::try_acquire(dir.root(), "test", Duration::from_secs(100)).unwrap(); let lock = lock.expect("Could not take lock"); assert!(dir.path("test.lock").exists()); std::mem::drop(lock); assert!(!dir.path("test.lock").exists()); } #[test] fn already_locked() { let dir = TestDir::temp(); let lock = FileLock::try_acquire(dir.root(), "test", Duration::from_secs(100)).unwrap(); let lock = lock.expect("Could not take lock"); let attempt = FileLock::try_acquire(dir.root(), "test", Duration::from_secs(100)).unwrap(); assert!(attempt.is_none()); std::mem::drop(lock); let attempt = FileLock::try_acquire(dir.root(), "test", Duration::from_secs(100)).unwrap(); assert!(attempt.is_some()); } } /// Trait allowing a type to be used as a cache key. It would be nice to blanket-implement /// this for all types that implement the dependent traits, but without a way for specific /// impls to opt-out of the blanket that would prevent customizing the debug_label(). /// Specialization might resolve that issue, in the meantime it's fine since Cache is a /// private type anyways. trait CacheKey: std::fmt::Debug+Hash+PartialEq { /// Label is added to the cache key when run with the debug feature, useful for diagnostics. fn debug_label(&self) -> Option { None } /// Generates a string sufficiently unique to describe the key; typically just the hex encoding /// of the key's hash code. Most impls should not need to override this. fn cache_key(&self) -> String { // The hash_map::DefaultHasher is somewhat underspecified, but it notes that "hashes should // not be relied upon over releases", which implies it is stable across multiple // invocations of the same build.... See cache_tests::stable_hash. let mut s = std::collections::hash_map::DefaultHasher::new(); self.hash(&mut s); let hash = s.finish(); if cfg!(feature = "debug") { if let Some(label) = self.debug_label() { if !label.is_empty() { return format!("{}_{:016X}", label, hash); } } } format!("{:016X}", hash) } } /// Container for serialized key/value pairs. #[derive(Serialize, Deserialize)] struct CacheEntry { key: K, value: V, } // See https://doc.rust-lang.org/std/fs/fn.soft_link.html #[cfg(windows)] fn symlink, Q: AsRef>(original: P, link: Q) -> Result<()> { std::os::windows::fs::symlink_file(original, link) .context("Windows prevents most programs from creating symlinks; see https://github.com/dimo414/bkt/issues/3") } #[cfg(unix)] use std::os::unix::fs::symlink; /// A file-system-backed cache for mapping keys (i.e. `CommandDesc`) to values (i.e. `Invocation`) /// for a given duration. // TODO make this a trait so we can swap out impls, such as an in-memory cache or SQLite-backed #[derive(Clone, Debug)] struct Cache { cache_dir: PathBuf, scope: Option, } impl Cache { fn new>(cache_dir: P) -> Self { Cache{ cache_dir: cache_dir.as_ref().into(), scope: None } } fn scoped(mut self, scope: String) -> Self { assert!(self.scope.is_none()); self.scope = Some(scope); self } #[cfg(not(feature = "debug"))] fn serialize(writer: W, value: &T) -> Result<()> where W: io::Write, T: Serialize + ?Sized { Ok(bincode::serialize_into(writer, value)?) } #[cfg(feature = "debug")] fn serialize(writer: W, value: &T) -> Result<()> where W: io::Write, T: Serialize + ?Sized { Ok(serde_json::to_writer_pretty(writer, value)?) } #[cfg(not(feature = "debug"))] fn deserialize(reader: R) -> Result where R: std::io::Read, T: DeserializeOwned { Ok(bincode::deserialize_from(reader)?) } #[cfg(feature = "debug")] fn deserialize(reader: R) -> Result where R: std::io::Read, T: DeserializeOwned { Ok(serde_json::from_reader(reader)?) } fn key_dir(&self) -> PathBuf { self.cache_dir.join("keys") } fn key_path(&self, key: &str) -> PathBuf { let file = match &self.scope { Some(scope) => format!("{}.{}", scope, key), None => key.into(), }; self.key_dir().join(file) } fn data_dir(&self) -> PathBuf { self.cache_dir.join("data") } /// Looks up the given key in the cache, returning the associated value and its age /// if the data is found and is newer than the max_age. fn lookup(&self, key: &K, max_age: Duration) -> Result> where K: CacheKey+DeserializeOwned, V: DeserializeOwned { let path = self.key_path(&key.cache_key()); let file = File::open(&path); if let Err(ref e) = file { if e.kind() == ErrorKind::NotFound { debug_msg!("lookup {} not found", path.display()); return Ok(None); } } // Missing file is OK; other errors get propagated to the caller let reader = BufReader::new(file.context("Failed to access cache file")?); // TODO consider returning OK(None) if deserialization fails, which could happen if // different types hashed to the same key let found: CacheEntry = Cache::deserialize(reader)?; // Discard data that is too old let mtime = std::fs::metadata(&path)?.modified()?; let elapsed = mtime.elapsed(); if elapsed.is_err() || elapsed.unwrap() > max_age { debug_msg!("lookup {} expired", path.display()); std::fs::remove_file(&path).context("Failed to remove expired data")?; return Ok(None); } // Ignore false-positive hits that happened to collide with the hash code if &found.key != key { debug_msg!("lookup {} hash collision", path.display()); return Ok(None); } debug_msg!("lookup {} found", path.display()); Ok(Some((found.value, mtime))) } fn seconds_ceiling(duration: Duration) -> u64 { duration.as_secs() + if duration.subsec_nanos() != 0 { 1 } else { 0 } } // https://rust-lang-nursery.github.io/rust-cookbook/algorithms/randomness.html#create-random-passwords-from-a-set-of-alphanumeric-characters fn rand_filename(dir: &Path, label: &str) -> PathBuf { use rand::{thread_rng, Rng}; use rand::distributions::Alphanumeric; let rand_str: String = thread_rng().sample_iter(Alphanumeric).take(16).map(char::from).collect(); dir.join(format!("{}.{}", label, rand_str)) } /// Write the given key/value pair to the cache, persisting it for at least the given TTL. /// /// Note: This method takes references to the key and value because they are serialized /// externally, therefore consuming either parameter is unhelpful. An in-memory implementation /// would need to do an internal `.clone()` which is at odds with /// [`C-CALLER-CONTROL`](https://rust-lang.github.io/api-guidelines/flexibility.html) but Cache /// is intended for serialization use cases so some overhead in the in-memory case may be /// acceptable. // TODO C-INTERMEDIATE suggests emulating HashMap::insert and returning any existing value in // the cache, though it would be expensive to construct this so perhaps should be a callback fn store(&self, key: &K, value: &V, ttl: Duration) -> Result<()> where K: CacheKey+Serialize, V: Serialize { assert!(!ttl.is_zero(), "ttl cannot be zero"); let ttl_dir = self.data_dir().join(Cache::seconds_ceiling(ttl).to_string()); std::fs::create_dir_all(&ttl_dir)?; std::fs::create_dir_all(self.key_dir())?; let data_path = Cache::rand_filename(&ttl_dir, "data"); // Note: this will fail if filename collides, could retry in a loop if that happens let file = OpenOptions::new().create_new(true).write(true).open(&data_path)?; let entry = CacheEntry{ key, value }; Cache::serialize(BufWriter::new(&file), &entry).context("Serialization failed")?; debug_msg!("store data {}", data_path.display()); // The target needs to be canonicalized as we're creating the link in a subdirectory, but I'd somewhat prefer // to fix it to be correctly relative to the link's location. Probably not worth the trouble though. let data_path = data_path.canonicalize()?; // Roundabout approach to an atomic symlink replacement // https://github.com/dimo414/bash-cache/issues/26 let tmp_symlink = Cache::rand_filename(&self.key_dir(), "tmp-symlink"); // Note: this call will fail if the tmp_symlink filename collides, could retry in a loop if that happens. symlink(data_path, &tmp_symlink)?; let key_path = self.key_path(&entry.key.cache_key()); debug_msg!("store key {}", key_path.display()); std::fs::rename(&tmp_symlink, key_path)?; Ok(()) } fn cleanup(&self) -> Result<()> { fn delete_stale_file(file: &Path, ttl: Duration) -> Result<()> { let age = std::fs::metadata(file)?.modified()?.elapsed()?; if age > ttl { std::fs::remove_file(file)?; } Ok(()) } // if try_acquire fails, e.g. because the directory does not exist, there's nothing to clean up if let Ok(Some(_lock)) = FileLock::try_acquire(&self.cache_dir, "cleanup", Duration::from_secs(60*10)) { // Don't bother if cleanup has been attempted recently let last_attempt_file = self.cache_dir.join("last_cleanup"); if let Ok(metadata) = last_attempt_file.metadata() { if metadata.modified()?.elapsed()? < Duration::from_secs(30) { debug_msg!("cleanup skip recent"); return Ok(()); } } File::create(&last_attempt_file)?; // resets mtime if already exists // First delete stale data files debug_msg!("cleanup data {}", &self.data_dir().display()); if let Ok(data_dir_iter) = std::fs::read_dir(&self.data_dir()) { for entry in data_dir_iter { let ttl_dir = entry?.path(); let ttl = Duration::from_secs( ttl_dir.file_name().and_then(|s| s.to_str()).and_then(|s| s.parse().ok()) .ok_or_else(|| Error::msg(format!("Invalid ttl directory {}", ttl_dir.display())))?); for entry in std::fs::read_dir(&ttl_dir)? { let file = entry?.path(); // Disregard errors on individual files; typically due to concurrent deletion // or other changes we don't care about. let _ = delete_stale_file(&file, ttl); } } } // Then delete broken symlinks debug_msg!("cleanup keys {}", &self.key_dir().display()); if let Ok(key_dir_iter) = std::fs::read_dir(&self.key_dir()) { for entry in key_dir_iter { let symlink = entry?.path(); // This reads as if we're deleting files that no longer exist, but what it really // means is "if the symlink is broken, try to delete _the symlink_." It would also // try to delete a symlink that happened to be deleted concurrently, but this is // harmless since we ignore the error. // std::fs::symlink_metadata() could be used to check that the symlink itself exists // if needed, but this could still have false-positives due to a TOCTOU race. if !symlink.exists() { let _ = std::fs::remove_file(symlink); } } } } Ok(()) } } #[cfg(test)] mod cache_tests { use super::*; use test_dir::{TestDir, DirBuilder}; impl CacheKey for i32 {} impl CacheKey for String { fn debug_label(&self) -> Option { Some(self.clone()) } } fn modtime>(path: P) -> SystemTime { std::fs::metadata(&path).expect("No metadata").modified().expect("No modtime") } fn make_dir_stale>(dir: P, age: Duration) -> Result<()> { let desired_time = SystemTime::now() - age; let stale_time = filetime::FileTime::from_system_time(desired_time); for entry in std::fs::read_dir(dir)? { let path = entry?.path(); let last_modified = modtime(&path); if path.is_file() && last_modified > desired_time { filetime::set_file_mtime(&path, stale_time)?; } else if path.is_dir() { make_dir_stale(&path, age)?; } } Ok(()) } fn dir_contents>(dir: P) -> Vec { fn contents(dir: &Path, ret: &mut Vec) -> Result<()> { for entry in std::fs::read_dir(dir)? { let path = entry?.path(); if path.is_dir() { contents(&path, ret)?; } else { ret.push(path); } } Ok(()) } let mut paths = vec![]; contents(dir.as_ref(), &mut paths).unwrap(); paths.iter().map(|p| p.strip_prefix(dir.as_ref()).unwrap().display().to_string()).collect() } // Sanity-checking that cache_key's behavior is stable over time. This test may need to be // updated when changing Rust versions / editions. #[test] fn stable_hash() { assert_eq!(100.cache_key(), "7D208C81E8236995"); if cfg!(feature = "debug") { assert_eq!("FooBar".to_string().cache_key(), "FooBar_2C8878C07E3ADA57"); } else { assert_eq!("FooBar".to_string().cache_key(), "2C8878C07E3ADA57"); } } #[test] fn cache() { let dir = TestDir::temp(); let key = "foo".to_string(); let val = "A".to_string(); let cache = Cache::new(dir.root()); let absent = cache.lookup::<_, String>(&key, Duration::from_secs(100)).unwrap(); assert!(absent.is_none()); cache.store(&key, &val, Duration::from_secs(100)).unwrap(); let present = cache.lookup::<_, String>(&key, Duration::from_secs(100)).unwrap(); assert_eq!(present.unwrap().0, val); } #[test] fn lookup_ttls() { let dir = TestDir::temp(); let key = "foo".to_string(); let val = "A".to_string(); let cache = Cache::new(dir.root()); cache.store(&key, &val, Duration::from_secs(5)).unwrap(); // store duration doesn't affect lookups make_dir_stale(dir.root(), Duration::from_secs(15)).unwrap(); // data is still present until a cleanup iteration runs, or a lookup() invalidates it let present = cache.lookup::<_, String>(&key, Duration::from_secs(20)).unwrap(); assert_eq!(present.unwrap().0, "A"); // lookup() finds stale data, deletes it let absent = cache.lookup::<_, String>(&key, Duration::from_secs(10)).unwrap(); assert!(absent.is_none()); // now data is gone, even though this lookup() would have accepted it let absent = cache.lookup::<_, String>(&key, Duration::from_secs(20)).unwrap(); assert!(absent.is_none()); } #[test] fn scoped() { let dir = TestDir::temp(); let key = "foo".to_string(); let val_a = "A".to_string(); let val_b = "B".to_string(); let cache = Cache::new(dir.root()); let cache_scoped = Cache::new(dir.root()).scoped("scope".into()); cache.store(&key, &val_a, Duration::from_secs(100)).unwrap(); cache_scoped.store(&key, &val_b, Duration::from_secs(100)).unwrap(); let present = cache.lookup::<_, String>(&key, Duration::from_secs(20)).unwrap(); assert_eq!(present.unwrap().0, val_a); let present_scoped = cache_scoped.lookup::<_, String>(&key, Duration::from_secs(20)).unwrap(); assert_eq!(present_scoped.unwrap().0, val_b); } #[test] fn cleanup() { let dir = TestDir::temp(); let key = "foo".to_string(); let val = "A".to_string(); let cache = Cache::new(dir.root()); cache.store(&key, &val, Duration::from_secs(5)).unwrap(); make_dir_stale(dir.root(), Duration::from_secs(10)).unwrap(); cache.cleanup().unwrap(); assert_eq!(dir_contents(dir.root()), ["last_cleanup"]); // keys and data dirs are now empty let absent = cache.lookup::<_, String>(&key, Duration::from_secs(20)).unwrap(); assert!(absent.is_none()); } } /// Holds information about the cache status of a given command. #[derive(Debug, Copy, Clone)] pub enum CacheStatus { /// Command was found in the cache. Contains the time the returned invocation was cached. Hit(Instant), /// Command was not found in the cache and was executed. Contains the execution time of the /// subprocess. Miss(Duration), } #[cfg(test)] impl CacheStatus { // Note these functions are intentionally not public for now. They're only currently needed to // make assertions shorter, and should be able to be removed once assert_matches #82775 is // stable. Can be made public if other use-cases arise. fn is_hit(&self) -> bool { match self { CacheStatus::Hit(_) => true, CacheStatus::Miss(_) => false, } } fn is_miss(&self) -> bool { match self { CacheStatus::Hit(_) => false, CacheStatus::Miss(_) => true, } } } /// This struct is the main API entry point for the `bkt` library, allowing callers to invoke and /// cache subprocesses for later reuse. /// /// Example: /// /// ```no_run /// # fn main() -> anyhow::Result<()> { /// # use std::time::Duration; /// let bkt = bkt::Bkt::in_tmp()?; /// let cmd = bkt::CommandDesc::new(["curl", "http://expensive.api/foo"]); /// let (result, age) = bkt.retrieve(&cmd, Duration::from_secs(60*60))?; /// println!("Retrieved: {:?}\nAge: {:?}", result, age); /// # Ok(()) } /// ``` #[derive(Clone, Debug)] pub struct Bkt { cache: Cache, cleanup_on_refresh: bool, } impl Bkt { fn temp_dir() -> PathBuf { std::env::var_os("BKT_TMPDIR").map(PathBuf::from).unwrap_or_else(std::env::temp_dir) } /// Creates a new Bkt instance using the [`std::env::temp_dir`] as the cache location. If a /// `BKT_TMPDIR` environment variable is set that value will be preferred. /// /// # Errors /// /// If preparing the tmp cache directory fails. pub fn in_tmp() -> Result { Bkt::create(Bkt::temp_dir()) } /// Creates a new Bkt instance. /// /// The given `root_dir` will be used as the parent directory of the cache. It's recommended /// this directory be in a tmpfs partition, on an SSD, or similar, so operations are fast. /// /// # Errors /// /// If preparing the cache directory under `root_dir` fails. pub fn create(root_dir: PathBuf) -> Result { // Note the cache is invalidated when the minor version changes // TODO use separate directories per user, like bash-cache // See https://stackoverflow.com/q/57951893/113632 let cache_dir = root_dir .join(format!("bkt-{}.{}-cache", env!("CARGO_PKG_VERSION_MAJOR"), env!("CARGO_PKG_VERSION_MINOR"))); Bkt::restrict_dir(&cache_dir)?; Ok(Bkt { cache: Cache::new(&cache_dir), cleanup_on_refresh: true, }) } /// Associates a scope with this Bkt instance, causing it to namespace its cache keys so that /// they do not collide with other instances using the same cache directory. This is useful when /// separate applications could potentially invoke the same commands but should not share a /// cache. Consider using the application's name, PID, and/or a timestamp in order to create a /// sufficiently unique namespace. pub fn scoped(mut self, scope: String) -> Self { self.cache = self.cache.scoped(scope); self } /// By default a background cleanup thread runs on cache misses and calls to [`Bkt::refresh()`] /// to remove stale data. You may prefer to manage cleanup yourself if you expect frequent cache /// misses and want to minimize the number of threads being created. See [`Bkt::cleanup_once()`] /// and [`Bkt::cleanup_thread()`] if you set this to `false`. pub fn cleanup_on_refresh(mut self, cleanup: bool) -> Self { self.cleanup_on_refresh = cleanup; self } #[cfg(not(unix))] fn restrict_dir(_cache_dir: &Path) -> Result<()> { Ok(()) } #[cfg(unix)] fn restrict_dir(cache_dir: &Path) -> Result<()> { use std::os::unix::fs::PermissionsExt; if !cache_dir.exists() { std::fs::create_dir_all(cache_dir)?; let metadata = std::fs::metadata(cache_dir)?; let mut permissions = metadata.permissions(); permissions.set_mode(0o700); // Only accessible to current user std::fs::set_permissions(cache_dir, permissions)?; } Ok(()) } fn execute_subprocess(cmd: impl Into) -> Result { let mut command: std::process::Command = cmd.into(); let start = Instant::now(); // TODO write to stdout/stderr while running, rather than after the process completes? // See https://stackoverflow.com/q/66060139 let result = command.output() .with_context(|| format!("Failed to run command {}", command.get_args().next().expect("Executable missing").to_string_lossy()))?; let runtime = start.elapsed(); Ok(Invocation { stdout: result.stdout, stderr: result.stderr, // TODO handle signals, see https://stackoverflow.com/q/66272686 exit_code: result.status.code().unwrap_or(126), runtime, }) } /// Looks up the given command in Bkt's cache. If found (and newer than the given TTL) returns /// the cached invocation. If stale or not found the command is executed and the result is /// cached and then returned. /// /// The second element in the returned tuple reports whether or not the invocation was cached /// and includes information such as the cached data's age or the executed subprocess' runtime. /// /// # Errors /// /// If looking up, deserializing, executing, or serializing the command fails. This generally /// reflects a user error such as an invalid command. pub fn retrieve(&self, command: T, ttl: Duration) -> Result<(Invocation, CacheStatus)> where T: TryInto, anyhow::Error: From, // https://stackoverflow.com/a/72627328 { let command = command.try_into()?; let cached = self.cache.lookup(&command, ttl).context("Cache lookup failed")?; let result = match cached { Some((cached, mtime)) => (cached, CacheStatus::Hit(Instant::now() - mtime.elapsed()?)), None => { let cleanup_hook = self.maybe_cleanup_once(); let start = Instant::now(); let result = Bkt::execute_subprocess(&command).context("Subprocess execution failed")?; let runtime = Instant::now() - start; if command.persist_failures || result.exit_code == 0 { self.cache.store(&command, &result, ttl).context("Cache write failed")?; } Bkt::join_cleanup_thread(cleanup_hook); (result, CacheStatus::Miss(runtime)) } }; Ok(result) } /// Unconditionally executes the given command and caches the invocation for the given TTL. /// This can be used to "warm" the cache so that subsequent calls to `execute` are fast. /// /// The second element in the returned tuple is the subprocess' execution time. /// /// # Errors /// /// If executing or serializing the command fails. This generally reflects a user error such as /// an invalid command. pub fn refresh(&self, command: T, ttl: Duration) -> Result<(Invocation, Duration)> where T: TryInto, anyhow::Error: From, // https://stackoverflow.com/a/72627328 { let command = command.try_into()?; let cleanup_hook = self.maybe_cleanup_once(); let start = Instant::now(); let result = Bkt::execute_subprocess(&command).context("Subprocess execution failed")?; let elapsed = Instant::now() - start; if command.persist_failures || result.exit_code == 0 { self.cache.store(&command, &result, ttl).context("Cache write failed")?; } Bkt::join_cleanup_thread(cleanup_hook); Ok((result, elapsed)) } /// Clean the cache in the background on a cache-miss; this will usually /// be much faster than the actual background process. fn maybe_cleanup_once(&self) -> Option>> { if self.cleanup_on_refresh { Some(self.cleanup_once()) } else { None } } fn join_cleanup_thread(cleanup_hook: Option>>) { if let Some(cleanup_hook) = cleanup_hook { if let Err(e) = cleanup_hook.join().expect("cleanup thread panicked") { eprintln!("bkt: cache cleanup failed: {:?}", e); } } } /// Initiates a single cleanup cycle of the cache, removing stale data in the background. This /// should be invoked by short-lived applications early in their lifecycle and then joined /// before exiting. `execute_and_cleanup` can be used instead to only trigger a cleanup on a /// cache miss, avoiding the extra work on cache hits. Long-running applications should /// typically prefer `cleanup_thread` which triggers periodic cleanups. /// /// # Errors /// /// The Result returned by joining indicates whether there were any unexpected errors while /// cleaning up. It should be Ok in all normal circumstances. // TODO if cleanup should always succeed (or no-op) why return Result? pub fn cleanup_once(&self) -> std::thread::JoinHandle> { let cache = self.cache.clone(); std::thread::spawn(move || { cache.cleanup() }) } /// Initiates an infinite-loop thread that triggers periodic cleanups of the cache, removing /// stale data in the background. It is not necessary to `join()` this thread, it will /// be terminated when the main thread exits. pub fn cleanup_thread(&self) -> std::thread::JoinHandle<()> { let cache = self.cache.clone(); std::thread::spawn(move || { // Hard-coded for now, could be made configurable if needed let poll_duration = Duration::from_secs(60); loop { if let Err(e) = cache.cleanup() { eprintln!("Bkt: cache cleanup failed: {:?}", e); } std::thread::sleep(poll_duration); } }) } } // Note: most functionality of Bkt is tested via cli.rs #[cfg(test)] mod bkt_tests { use super::*; use test_dir::{TestDir, DirBuilder, FileType}; // Just validating that Bkt can be cloned to create siblings with different settings. #[test] #[allow(clippy::redundant_clone)] fn cloneable() { let dir = TestDir::temp(); let bkt = Bkt::create(dir.path("cache")).unwrap(); let _scoped = bkt.clone().scoped("scope".into()); let _no_cleanup = bkt.clone().cleanup_on_refresh(false); } #[test] fn cached() { let dir = TestDir::temp(); let file = dir.path("file"); let cmd = CommandDesc::new( ["bash", "-c", r#"echo "$RANDOM" > "${1:?}"; cat "${1:?}""#, "arg0", file.to_str().unwrap()]); let bkt = Bkt::create(dir.path("cache")).unwrap(); let (first_inv, first_status) = bkt.retrieve(&cmd, Duration::from_secs(10)).unwrap(); assert!(first_status.is_miss()); for _ in 1..3 { let (subsequent_inv, subsequent_status) = bkt.retrieve(&cmd, Duration::from_secs(10)).unwrap(); assert_eq!(first_inv, subsequent_inv); assert!(subsequent_status.is_hit()); } } #[test] fn discard_failures() { let dir = TestDir::temp(); let output = dir.path("output"); let code = dir.path("code"); let cmd = CommandDesc::new( ["bash", "-c", r#"cat "${1:?}"; exit "$(< "${2:?}")""#, "arg0", output.to_str().unwrap(), code.to_str().unwrap()]) .with_discard_failures(true); let bkt = Bkt::create(dir.path("cache")).unwrap(); write!(File::create(&output).unwrap(), "A").unwrap(); write!(File::create(&code).unwrap(), "10").unwrap(); let (first_inv, first_status) = bkt.retrieve(&cmd, Duration::from_secs(10)).unwrap(); assert_eq!(first_inv.exit_code, 10, "{:?}\nstderr:{}", first_inv, first_inv.stderr_utf8()); assert_eq!(first_inv.stdout_utf8(), "A"); assert!(first_status.is_miss()); write!(File::create(&output).unwrap(), "B").unwrap(); let (subsequent_inv, subsequent_status) = bkt.retrieve(&cmd, Duration::from_secs(10)).unwrap(); // call is not cached assert_eq!(subsequent_inv.stdout_utf8(), "B"); assert!(subsequent_status.is_miss()); write!(File::create(&output).unwrap(), "C").unwrap(); write!(File::create(&code).unwrap(), "0").unwrap(); let (success_inv, success_status) = bkt.retrieve(&cmd, Duration::from_secs(10)).unwrap(); assert_eq!(success_inv.exit_code, 0); assert_eq!(success_inv.stdout_utf8(), "C"); assert!(success_status.is_miss()); write!(File::create(&output).unwrap(), "D").unwrap(); let (cached_inv, cached_status) = bkt.retrieve(&cmd, Duration::from_secs(10)).unwrap(); assert_eq!(success_inv, cached_inv); assert!(cached_status.is_hit()); } #[test] fn with_working_dir() { let dir = TestDir::temp().create("wd", FileType::Dir); let work_dir = dir.path("wd"); let cmd = CommandDesc::new(["bash", "-c", "echo Hello World > file"]); let state = cmd.capture_state().unwrap().with_working_dir(&work_dir); let bkt = Bkt::create(dir.path("cache")).unwrap(); let (result, status) = bkt.retrieve(state, Duration::from_secs(10)).unwrap(); assert_eq!(result.stderr_utf8(), ""); assert_eq!(result.exit_code(), 0); assert_eq!(std::fs::read_to_string(work_dir.join("file")).unwrap(), "Hello World\n"); assert!(status.is_miss()); } #[test] fn cwd_and_working_dir_share_cache() { let old_cwd = std::env::current_dir().unwrap(); let dir = TestDir::temp().create("wd", FileType::Dir); let wd = dir.path("wd"); let bkt = Bkt::create(dir.path("cache")).unwrap(); // Note we haven't changed the cwd yet - use_cwd() shouldn't read it let cmd = CommandDesc::new(["bash", "-c", "pwd; echo '.' > file"]).with_cwd(); // Now the cwd is captured, but overwritten by with_working_dir() let state = cmd.capture_state().unwrap().with_working_dir(&wd); let (result, status) = bkt.retrieve(state, Duration::from_secs(10)).unwrap(); assert_eq!(result.stdout_utf8(), format!("{}\n", wd.to_str().unwrap())); assert_eq!(result.stderr_utf8(), ""); assert_eq!(result.exit_code(), 0); assert!(status.is_miss()); // now change the cwd and see it get captured lazily std::env::set_current_dir(&wd).unwrap(); let (result, status) = bkt.retrieve(&cmd, Duration::from_secs(10)).unwrap(); assert_eq!(result.stdout_utf8(), format!("{}\n", wd.to_str().unwrap())); assert_eq!(result.stderr_utf8(), ""); assert_eq!(result.exit_code(), 0); assert!(status.is_hit()); // and the file was only written to once, hence the cache was shared assert_eq!(std::fs::read_to_string(wd.join("file")).unwrap(), ".\n"); // Restore the original cwd // NB this could fail to be reached if the test fails, which could cause other confusing // errors. An RAII pattern using Drop, similar to absl::Cleanup, would be nicer but I'm not // aware of a standard pattern for this atm. std::env::set_current_dir(old_cwd).unwrap(); } #[test] // TODO the JSON serializer doesn't support OsString keys, CommandState needs a custom // Serializer (for feature="debug", at least) - see https://stackoverflow.com/q/51276896 // and https://github.com/serde-rs/json/issues/809 #[cfg(not(feature = "debug"))] fn with_env() { let dir = TestDir::temp().create("dir", FileType::Dir); let cmd = CommandDesc::new(["bash", "-c", r#"echo "FOO:${FOO:?}""#]).capture_state().unwrap() .with_env("FOO", "bar"); let bkt = Bkt::create(dir.path("cache")).unwrap(); let (result, status) = bkt.retrieve(cmd, Duration::from_secs(10)).unwrap(); assert_eq!(result.stderr_utf8(), ""); assert_eq!(result.exit_code(), 0); assert_eq!(result.stdout_utf8(), "FOO:bar\n"); assert!(status.is_miss()); } #[test] fn with_modtime() { let dir = TestDir::temp().create("dir", FileType::Dir); let file = dir.path("file"); let cmd = CommandDesc::new(["cat", file.to_str().unwrap()]); let cmd_modtime = cmd.clone().with_modtime(&file); let bkt = Bkt::create(dir.path("cache")).unwrap(); write!(File::create(&file).unwrap(), "A").unwrap(); let (result_a, status_a) = bkt.retrieve(&cmd, Duration::from_secs(10)).unwrap(); let (result_mod_a, status_mod_a) = bkt.retrieve(&cmd_modtime, Duration::from_secs(10)).unwrap(); assert!(status_a.is_miss()); assert!(status_mod_a.is_miss()); // Update the file _and_ reset its modtime because modtime is not consistently updated e.g. // if writes are too close together. write!(File::create(&file).unwrap(), "B").unwrap(); filetime::set_file_mtime(&file, filetime::FileTime::from_system_time(SystemTime::now() - Duration::from_secs(15))).unwrap(); let (result_b, status_b) = bkt.retrieve(&cmd, Duration::from_secs(10)).unwrap(); let (result_mod_b, status_mod_b) = bkt.retrieve(&cmd_modtime, Duration::from_secs(10)).unwrap(); assert_eq!(result_a.stdout_utf8(), result_b.stdout_utf8()); // cached assert!(status_b.is_hit()); assert_eq!(result_mod_a.stdout_utf8(), "A"); assert_eq!(result_mod_b.stdout_utf8(), "B"); assert!(status_mod_b.is_miss()); } } bkt-0.6.1/src/main.rs000075500000000000000000000135531046102023000125030ustar 00000000000000use std::ffi::OsString; use std::io::{self, Write}; use std::path::PathBuf; use std::process::{Command, exit, Stdio}; use std::time::{Duration, Instant}; use anyhow::{Context, Result}; use clap::Parser; use bkt::{CommandDesc, Bkt}; // Re-invokes bkt with --force and then discards the subprocess, causing the cache // to be refreshed asynchronously. fn force_update_async() -> Result<()> { let mut args = std::env::args_os(); let arg0 = args.next().expect("Must always be a 0th argument"); let mut command = match std::env::current_exe() { Ok(path) => Command::new(path), Err(_) => Command::new(arg0), }; // Discard stdout/err so the calling process doesn't wait for them to close. // Intentionally drop the returned Child; after this process exits the // child process will continue running in the background. command.arg("--force").args(args.filter(|a| a != "--warm")) .stdout(Stdio::null()).stderr(Stdio::null()) .spawn().context("Failed to start background process")?; Ok(()) } // Runs bkt after main() handles flag parsing fn run(cli: Cli) -> Result { let ttl: Duration = cli.ttl.into(); let stale: Option = cli.stale.map(Into::into); assert!(!ttl.is_zero(), "--ttl cannot be zero"); if let Some(stale) = stale { assert!(!stale.is_zero(), "--stale cannot be zero"); assert!(stale < ttl, "--stale must be less than --ttl"); } let mut bkt = match cli.cache_dir { Some(cache_dir) => Bkt::create(cache_dir)?, None => Bkt::in_tmp()?, }; if let Some(scope) = cli.scope { bkt = bkt.scoped(scope); } let mut command = CommandDesc::new(cli.command); if cli.cwd { command = command.with_cwd(); } let envs = cli.env; if !envs.is_empty() { command = command.with_envs(&envs); } let files = cli.modtime; if !files.is_empty() { command = command.with_modtimes(&files); } if cli.discard_failures { command = command.with_discard_failures(true); } if cli.warm && !cli.force { force_update_async()?; return Ok(0); } let invocation = if cli.force { bkt.refresh(&command, ttl)?.0 } else { let (invocation, status) = bkt.retrieve(&command, ttl)?; if let Some(stale) = stale { if let bkt::CacheStatus::Hit(cached_at) = status { if (Instant::now() - cached_at) > stale { force_update_async()?; } } } invocation }; // BrokenPipe errors are uninteresting for command line applications; just stop writing to that // descriptor and, if appropriate, exit. Rust doesn't have good support for this presently, see // https://github.com/rust-lang/rust/issues/46016 fn disregard_broken_pipe(result: std::io::Result<()>) -> std::io::Result<()> { use std::io::ErrorKind::*; if let Err(e) = &result { if let BrokenPipe = e.kind() { return Ok(()); } } result } disregard_broken_pipe(io::stdout().write_all(invocation.stdout())) .context("error writing to stdout")?; disregard_broken_pipe(io::stderr().write_all(invocation.stderr())) .context("error writing to stderr")?; Ok(invocation.exit_code()) } #[derive(Debug, Parser)] #[command(about, version)] struct Cli { /// The command to run #[arg(required = true, last = true)] command: Vec, /// Duration the cached result will be valid for #[arg(long, value_name = "DURATION", default_value = "60s", visible_alias = "time-to-live", env = "BKT_TTL")] ttl: humantime::Duration, /// Duration after which the result will be asynchronously refreshed #[arg(long, value_name = "DURATION", conflicts_with = "warm")] stale: Option, /// Asynchronously execute and cache the given command, even if it's already cached #[arg(long)] warm: bool, /// Execute and cache the given command, even if it's already cached #[arg(long, conflicts_with = "warm")] force: bool, /// Includes the current working directory in the cache key, /// so that the same command run in different directories caches separately #[arg(long, visible_alias = "use-working-dir")] cwd: bool, /// Includes the given environment variable in the cache key, /// so that the same command run with different values for the given variables caches separately #[arg(long, value_name = "NAME", visible_alias = "use-environment")] env: Vec, /// Includes the last modification time of the given file(s) in the cache key, /// so that the same command run with different modtimes for the given files caches separately #[arg(long, value_name = "FILE", visible_alias = "use-file-modtime")] modtime: Vec, /// Don't cache invocations that fail (non-zero exit code). /// USE CAUTION when passing this flag, as unexpected failures can lead to a spike in invocations /// which can exacerbate ongoing issues, effectively a DDoS. #[arg(long)] discard_failures: bool, /// If set, all cached data will be scoped to this value, /// preventing collisions with commands cached with different scopes #[arg(long, value_name = "NAME", env = "BKT_SCOPE")] scope: Option, /// The directory under which to persist cached invocations; /// defaults to the system's temp directory. /// Setting this to a directory backed by RAM or an SSD, such as a tmpfs partition, /// will significantly reduce caching overhead. #[arg(long, value_name = "DIR", env = "BKT_CACHE_DIR")] cache_dir: Option, } fn main() { let cli = Cli::parse(); match run(cli) { Ok(code) => exit(code), Err(msg) => { eprintln!("bkt: {:#}", msg); exit(127); } } } bkt-0.6.1/tests/cli.rs000075500000000000000000000561221046102023000127000ustar 00000000000000mod cli { use std::path::Path; use std::process::{Command, Stdio}; use std::time::{SystemTime, Duration}; use anyhow::Result; use test_dir::{TestDir, DirBuilder, FileType}; use std::fs::File; // Bash scripts to pass to -c. // Avoid depending on external programs. const COUNT_INVOCATIONS: &str = r#"file=${1:?} lines=0; \ printf '%s' '.' >> "$file"; \ read < "$file"; \ printf '%s' "${#REPLY}";"#; const PRINT_ARGS: &str = r#"args=("$@"); declare -p args;"#; const EXIT_WITH: &str = r#"exit "${1:?}";"#; const EXIT_WITH_ENV: &str = r#"exit "${EXIT_WITH:?}";"#; const AWAIT_AND_TOUCH: &str = r#"echo awaiting; \ until [[ -e "${1:?}" ]]; do sleep .1; done; \ echo > "${2:?}";"#; fn bkt>(cache_dir: P) -> Command { let test_exe = std::env::current_exe().expect("Could not resolve test location"); let dir = test_exe .parent().expect("Could not resolve test directory") .parent().expect("Could not resolve binary directory"); let mut path = dir.join("bkt"); if !path.exists() { path.set_extension("exe"); } assert!(path.exists(), "Could not find bkt binary in {:?}", dir); let mut bkt = Command::new(&path); bkt.env("BKT_TMPDIR", cache_dir.as_ref().as_os_str()); bkt } #[derive(Eq, PartialEq, Debug)] struct CmdResult { out: String, err: String, status: Option, } impl From for CmdResult { fn from(output: std::process::Output) -> Self { CmdResult { out: std::str::from_utf8(&output.stdout).unwrap().into(), err: std::str::from_utf8(&output.stderr).unwrap().into(), status: output.status.code() } } } fn run(cmd: &mut Command) -> CmdResult { cmd.output().unwrap().into() } fn succeed(cmd: &mut Command) -> String { let result = run(cmd); if cfg!(feature="debug") { if !result.err.is_empty() { eprintln!("stderr:\n{}", result.err); } } else { // debug writes to stderr, so don't bother checking it in that mode assert_eq!(result.err, ""); } assert_eq!(result.status, Some(0)); result.out } // Returns once the given file contains different contents than those provided. Panics if the // file does not change after ~5s. // // Note this could return immediately if the file already doesn't contain initial_contents // (e.g. if the given contents were wrong) because such a check could race. Do additional // checks prior to waiting if needed. fn wait_for_contents_to_change>(file: P, initial_contents: &str) { for _ in 1..50 { if std::fs::read_to_string(&file).unwrap() != initial_contents { return; } std::thread::sleep(Duration::from_millis(100)); } panic!("Contents of {} did not change", file.as_ref().to_string_lossy()); } fn make_dir_stale>(dir: P, age: Duration) -> Result<()> { debug_assert!(dir.as_ref().is_dir()); let desired_time = SystemTime::now() - age; let stale_time = filetime::FileTime::from_system_time(desired_time); for entry in std::fs::read_dir(dir)? { let path = entry?.path(); let last_modified = std::fs::metadata(&path)?.modified()?; if path.is_file() && last_modified > desired_time { filetime::set_file_mtime(&path, stale_time)?; } else if path.is_dir() { make_dir_stale(&path, age)?; } } Ok(()) } fn make_file_stale>(file: P, age: Duration) -> Result<()> { debug_assert!(file.as_ref().is_file()); let desired_time = SystemTime::now() - age; let stale_time = filetime::FileTime::from_system_time(desired_time); filetime::set_file_mtime(&file, stale_time)?; Ok(()) } fn join(beg: &[A], tail: &[A]) -> Vec { beg.iter().chain(tail).cloned().collect() } #[test] fn help() { let dir = TestDir::temp(); let out = succeed(bkt(dir.path("cache")).arg("--help")); assert!(out.contains("bkt [OPTIONS] -- ...")); } #[test] fn cached() { let dir = TestDir::temp(); let file = dir.path("file"); let args = ["--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; let first_result = run(bkt(dir.path("cache")).args(args)); for _ in 1..3 { let subsequent_result = run(bkt(dir.path("cache")).args(args)); if cfg!(feature="debug") { assert_eq!(first_result.status, subsequent_result.status); assert_eq!(first_result.out, subsequent_result.out); } else { assert_eq!(first_result, subsequent_result); } } } #[test] fn cache_expires() { let dir = TestDir::temp(); let file = dir.path("file"); let args = ["--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; let first_result = succeed(bkt(dir.path("cache")).args(args)); assert_eq!(first_result, "1"); // Slightly stale is still cached make_dir_stale(dir.path("cache"), Duration::from_secs(10)).unwrap(); let subsequent_result = succeed(bkt(dir.path("cache")).args(args)); assert_eq!(first_result, subsequent_result); make_dir_stale(dir.path("cache"), Duration::from_secs(120)).unwrap(); let after_stale_result = succeed(bkt(dir.path("cache")).args(args)); assert_eq!(after_stale_result, "2"); // Respects BKT_TTL env var (other tests cover --ttl) make_dir_stale(dir.path("cache"), Duration::from_secs(10)).unwrap(); let env_result = succeed(bkt(dir.path("cache")).env("BKT_TTL", "5s").args(args)); assert_eq!(env_result, "3"); } #[test] fn cache_expires_separately() { let dir = TestDir::temp(); let file1 = dir.path("file1"); let file2 = dir.path("file2"); let args1 = ["--ttl=10s", "--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file1.to_str().unwrap()]; let args2 = ["--ttl=20s", "--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file2.to_str().unwrap()]; // first invocation assert_eq!(succeed(bkt(dir.path("cache")).args(args1)), "1"); assert_eq!(succeed(bkt(dir.path("cache")).args(args2)), "1"); // second invocation, cached assert_eq!(succeed(bkt(dir.path("cache")).args(args1)), "1"); assert_eq!(succeed(bkt(dir.path("cache")).args(args2)), "1"); // only shorter TTL is invalidated make_dir_stale(dir.path("cache"), Duration::from_secs(15)).unwrap(); assert_eq!(succeed(bkt(dir.path("cache")).args(args1)), "2"); assert_eq!(succeed(bkt(dir.path("cache")).args(args2)), "1"); } #[test] fn cache_hits_with_different_settings() { let dir = TestDir::temp(); let file = dir.path("file"); let args1 = ["--ttl=10s", "--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; let args2 = ["--ttl=20s", "--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; // despite different TTLs the invocation is still cached assert_eq!(succeed(bkt(dir.path("cache")).args(args1)), "1"); assert_eq!(succeed(bkt(dir.path("cache")).args(args2)), "1"); // the provided TTL is respected, though it was cached with a smaller TTL make_dir_stale(dir.path("cache"), Duration::from_secs(15)).unwrap(); assert_eq!(succeed(bkt(dir.path("cache")).args(args2)), "1"); // However the cache can be invalidated in the background using the older TTL make_dir_stale(dir.path("cache"), Duration::from_secs(60)).unwrap(); // ensure the following call triggers a cleanup succeed(bkt(dir.path("cache")).args(["--", "bash", "-c", "sleep 1"])); // trigger cleanup via a different command assert_eq!(succeed(bkt(dir.path("cache")).args(args1)), "2"); } #[test] fn cache_refreshes_in_background() { let dir = TestDir::temp(); let file = dir.path("file"); let args = ["--stale=10s", "--ttl=20s", "--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; assert_eq!(succeed(bkt(dir.path("cache")).args(args)), "1"); make_dir_stale(dir.path("cache"), Duration::from_secs(15)).unwrap(); assert_eq!(succeed(bkt(dir.path("cache")).args(args)), "1"); wait_for_contents_to_change(&file, "."); assert_eq!(std::fs::read_to_string(&file).unwrap(), ".."); assert_eq!(succeed(bkt(dir.path("cache")).args(args)), "2"); } #[test] fn discard_failures() { let dir = TestDir::temp(); let file = dir.path("file"); let cmd = format!("{} false;", COUNT_INVOCATIONS); let args = ["--discard-failures", "--", "bash", "-c", &cmd, "arg0", file.to_str().unwrap()]; let result = run(bkt(dir.path("cache")).args(args)); assert_eq!(result.out, "1"); assert_eq!(result.status, Some(1)); // Not cached let result = run(bkt(dir.path("cache")).args(args)); assert_eq!(result.out, "2"); assert_eq!(result.status, Some(1)); } #[test] fn discard_failure_cached_separately() { let dir = TestDir::temp(); let allow_args = ["--", "bash", "-c", EXIT_WITH_ENV, "arg0"]; let discard_args = join(&["--discard-failures"], &allow_args); // without separate caches a --discard-failures invocation could return a previously-cached // failed result. In 0.5.4 and earlier this would mean result2.status == 14. let result1 = run(bkt(dir.path("cache")).args(allow_args).env("EXIT_WITH", "14")); assert_eq!(result1.status, Some(14)); let result2 = run(bkt(dir.path("cache")).args(discard_args).env("EXIT_WITH", "0")); assert_eq!(result2.status, Some(0)); } #[test] fn discard_failures_in_background() { let dir = TestDir::temp(); let file = dir.path("file"); let cmd = format!("{} ! \"${{FAIL:-false}}\";", COUNT_INVOCATIONS); let args = ["--ttl=20s", "--discard-failures", "--", "bash", "-c", &cmd, "arg0", file.to_str().unwrap()]; let stale_args = join(&["--stale=10s"], &args); // Cache result normally assert_eq!(succeed(bkt(dir.path("cache")).args(args)), "1"); // Cause cmd to fail and not be cached std::env::set_var("FAIL", "true"); // returns cached result, but attempts to warm in the background make_dir_stale(dir.path("cache"), Duration::from_secs(15)).unwrap(); assert_eq!(succeed(bkt(dir.path("cache")).args(&stale_args)), "1"); // Verify command ran wait_for_contents_to_change(&file, "."); assert_eq!(std::fs::read_to_string(&file).unwrap(), ".."); // But cached success is still returned assert_eq!(succeed(bkt(dir.path("cache")).args(args)), "1"); } #[test] fn respects_cache_dir() { let dir = TestDir::temp(); let file = dir.path("file"); let args = ["--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; let first_call = succeed(bkt(dir.path("cache")).arg(format!("--cache-dir={}", dir.path("cache").display())).args(args)); assert_eq!(first_call, "1"); assert_eq!(first_call, succeed(bkt(dir.path("cache")).arg(format!("--cache-dir={}", dir.path("cache").display())).args(args))); let diff_cache = succeed(bkt(dir.path("cache")).arg(format!("--cache-dir={}", dir.path("new-cache").display())).args(args)); assert_eq!(diff_cache, "2"); let env_cache = succeed(bkt(dir.path("cache")).env("BKT_CACHE_DIR", dir.path("env-cache").as_os_str()).args(args)); assert_eq!(env_cache, "3"); } // https://github.com/dimo414/bkt/issues/9 #[test] fn respects_relative_cache() { let dir = TestDir::temp(); let cwd = dir.path("cwd"); std::fs::create_dir(&cwd).unwrap(); let file = dir.path("file"); let args = ["--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; let first_call = succeed(bkt(dir.path("unused")).arg("--cache-dir=cache").args(args).current_dir(&cwd)); assert_eq!(first_call, "1"); assert_eq!(first_call, succeed(bkt(dir.path("unused")).arg("--cache-dir=cache").args(args).current_dir(&cwd))); } #[test] fn respects_cache_scope() { let dir = TestDir::temp(); let file = dir.path("file"); let args = ["--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; let first_call = succeed(bkt(dir.path("cache")).args(args)); assert_eq!(first_call, "1"); assert_eq!(first_call, succeed(bkt(dir.path("cache")).args(args))); let diff_scope = succeed(bkt(dir.path("cache")) .arg("--scope=foo").args(args)); assert_eq!(diff_scope, "2"); assert_eq!(diff_scope, succeed(bkt(dir.path("cache")) .arg("--scope=foo").args(args))); assert_eq!(diff_scope, succeed(bkt(dir.path("cache")) .env("BKT_SCOPE", "foo").args(args))); } #[test] fn respects_args() { let dir = TestDir::temp(); let file = dir.path("file"); let args = ["--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; let first_call = succeed(bkt(dir.path("cache")).args(args)); assert_eq!(first_call, "1"); assert_eq!(first_call, succeed(bkt(dir.path("cache")).args(args))); let diff_args = succeed(bkt(dir.path("cache")).args(args).arg("A B")); assert_eq!(diff_args, "2"); let split_args = succeed(bkt(dir.path("cache")).args(args).args(["A", "B"])); assert_eq!(split_args, "3"); } #[test] fn respects_cwd() { let dir = TestDir::temp() .create("dir1", FileType::Dir) .create("dir2", FileType::Dir); let args = ["--", "bash", "-c", "pwd"]; let cwd_args = join(&["--cwd"], &args); let without_cwd_dir1 = succeed(bkt(dir.path("cache")).args(args).current_dir(dir.path("dir1"))); let without_cwd_dir2 = succeed(bkt(dir.path("cache")).args(args).current_dir(dir.path("dir2"))); assert!(without_cwd_dir1.trim().ends_with("/dir1")); assert!(without_cwd_dir2.trim().ends_with("/dir1")); // incorrect! cached too eagerly let cwd_dir1 = succeed(bkt(dir.path("cache")).args(&cwd_args).current_dir(dir.path("dir1"))); let cwd_dir2 = succeed(bkt(dir.path("cache")).args(&cwd_args).current_dir(dir.path("dir2"))); assert!(cwd_dir1.trim().ends_with("/dir1")); assert!(cwd_dir2.trim().ends_with("/dir2")); } #[test] #[cfg(not(feature = "debug"))] // See lib's bkt_tests::with_env fn respects_env() { let dir = TestDir::temp(); let args = ["--", "bash", "-c", r#"printf 'foo:%s bar:%s baz:%s' "$FOO" "$BAR" "$BAZ""#]; let env_args = join(&["--env=FOO", "--env=BAR"], &args); let without_env = succeed(bkt(dir.path("cache")).args(args) .env("FOO", "1").env("BAR", "1").env("BAZ", "1")); assert_eq!(without_env, succeed(bkt(dir.path("cache")).args(args))); // even if --env is set, if the vars are absent cache still hits earlier call assert_eq!(without_env, succeed(bkt(dir.path("cache")).args(&env_args))); let env = succeed(bkt(dir.path("cache")).args(&env_args) .env("FOO", "2").env("BAR", "2").env("BAZ", "2")); assert_eq!(env, "foo:2 bar:2 baz:2"); let env = succeed(bkt(dir.path("cache")).args(&env_args) .env("FOO", "3").env("BAR", "2").env("BAZ", "3")); assert_eq!(env, "foo:3 bar:2 baz:3"); let env = succeed(bkt(dir.path("cache")).args(&env_args) .env("FOO", "4").env("BAR", "4").env("BAZ", "4")); assert_eq!(env, "foo:4 bar:4 baz:4"); let env = succeed(bkt(dir.path("cache")).args(&env_args) .env("FOO", "2").env("BAR", "2").env("BAZ", "5")); assert_eq!(env, "foo:2 bar:2 baz:2"); // BAZ doesn't invalidate cache } #[test] fn respects_modtime() { let dir = TestDir::temp(); let file = dir.path("file"); let watch_file = dir.path("watch"); let args = ["--modtime", watch_file.to_str().unwrap(), "--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; let no_file_result = succeed(bkt(dir.path("cache")).args(args)); // File absent is cached assert_eq!(no_file_result, "1"); assert_eq!(no_file_result, succeed(bkt(dir.path("cache")).args(args))); // create a new file, invalidating cache File::create(&watch_file).unwrap(); let new_file_result = succeed(bkt(dir.path("cache")).args(args)); assert_eq!(new_file_result, "2"); assert_eq!(new_file_result, succeed(bkt(dir.path("cache")).args(args))); // update the modtime, again invalidating the cache make_file_stale(&watch_file, Duration::from_secs(10)).unwrap(); let old_file_result = succeed(bkt(dir.path("cache")).args(args)); assert_eq!(old_file_result, "3"); assert_eq!(old_file_result, succeed(bkt(dir.path("cache")).args(args))); } #[test] #[cfg(not(feature="debug"))] fn no_debug_output() { let dir = TestDir::temp(); let args = ["--", "bash", "-c", "true"]; // Not cached assert_eq!(run(bkt(dir.path("cache")).args(args)), CmdResult { out: "".into(), err: "".into(), status: Some(0) }); // Cached assert_eq!(run(bkt(dir.path("cache")).args(args)), CmdResult { out: "".into(), err: "".into(), status: Some(0) }); } #[test] #[cfg(feature="debug")] fn debug_output() { fn starts_with_bkt(s: &str) -> bool { s.lines().all(|l| l.starts_with("bkt: ")) } let miss_debug_re = regex::Regex::new( "bkt: state: \nbkt: lookup .* not found\nbkt: cleanup data .*\nbkt: cleanup keys .*\nbkt: store data .*\nbkt: store key .*\n").unwrap(); let hit_debug_re = regex::Regex::new("bkt: lookup .* found\n").unwrap(); let dir = TestDir::temp(); let args = ["--", "bash", "-c", PRINT_ARGS, "arg0"]; let miss = run(bkt(dir.path("cache")).args(args)); assert!(starts_with_bkt(&miss.err), "{}", miss.err); assert!(miss_debug_re.is_match(&miss.err), "{}", miss.err); let hit = run(bkt(dir.path("cache")).args(args)); assert!(starts_with_bkt(&hit.err), "{}", hit.err); assert!(hit_debug_re.is_match(&hit.err), "{}", hit.err); } #[test] fn output_preserved() { let dir = TestDir::temp(); fn same_output(dir: &TestDir, args: &[&str]) { let bkt_args = ["--", "bash", "-c", PRINT_ARGS, "arg0"]; // Second call will be cached assert_eq!( succeed(bkt(dir.path("cache")).args(bkt_args).args(args)), succeed(bkt(dir.path("cache")).args(bkt_args).args(args))); } same_output(&dir, &[]); same_output(&dir, &[""]); same_output(&dir, &["a", "b"]); same_output(&dir, &["a b"]); same_output(&dir, &["a b", "c"]); } #[test] #[cfg(not(feature="debug"))] fn sensitive_output() { let dir = TestDir::temp(); let args = ["--", "bash", "-c", r"printf 'foo\0bar'; printf 'bar\0baz\n' >&2"]; // Not cached let output = run(bkt(dir.path("cache")).args(args)); assert_eq!(output, CmdResult { out: "foo\u{0}bar".into(), err: "bar\u{0}baz\n".into(), status: Some(0) }); // Cached assert_eq!(run(bkt(dir.path("cache")).args(args)), output); } #[test] fn exit_code_preserved() { let dir = TestDir::temp(); let args = ["--", "bash", "-c", EXIT_WITH, "arg0"]; assert_eq!(run(bkt(dir.path("cache")).args(args).arg("14")).status, Some(14)); assert_eq!(run(bkt(dir.path("cache")).args(args).arg("14")).status, Some(14)); } #[test] fn warm() { let dir = TestDir::temp(); let await_file = dir.path("await"); let touch_file = dir.path("touch"); let args = ["--", "bash", "-c", AWAIT_AND_TOUCH, "arg0", await_file.to_str().unwrap(), touch_file.to_str().unwrap()]; let warm_args = join(&["--warm"], &args); let output = succeed(bkt(dir.path("cache")).args(warm_args)); assert_eq!(output, ""); assert!(!touch_file.exists()); File::create(&await_file).unwrap(); // allow the bash process to terminate for _ in 0..10 { if touch_file.exists() { break; } std::thread::sleep(Duration::from_millis(200)); } // This ensures the bash process has almost-completed, but it could still race with bkt actually // caching the result and creating a key file. If this proves flaky a more robust check would be // to inspect the keys directory. assert!(touch_file.exists()); std::fs::remove_file(&await_file).unwrap(); // process would not terminate if run again let output = succeed(bkt(dir.path("cache")).args(args)); assert_eq!(output, "awaiting\n"); } #[test] fn force() { let dir = TestDir::temp(); let file = dir.path("file"); let args = ["--", "bash", "-c", COUNT_INVOCATIONS, "arg0", file.to_str().unwrap()]; let args_force = join(&["--force"], &args); let output = succeed(bkt(dir.path("cache")).args(args)); assert_eq!(output, "1"); let output = succeed(bkt(dir.path("cache")).args(args)); assert_eq!(output, "1"); let output = succeed(bkt(dir.path("cache")).args(args_force)); assert_eq!(output, "2"); let output = succeed(bkt(dir.path("cache")).args(args)); assert_eq!(output, "2"); } #[test] fn concurrent_call_race() { let dir = TestDir::temp(); let file = dir.path("file"); let slow_count_invocations = format!(r#"sleep "0.5$RANDOM"; {}"#, COUNT_INVOCATIONS); let args = ["--", "bash", "-c", &slow_count_invocations, "arg0", file.to_str().unwrap()]; println!("{:?}", args); let proc1 = bkt(dir.path("cache")).args(args).stdout(Stdio::piped()).stderr(Stdio::piped()).spawn().unwrap(); let proc2 = bkt(dir.path("cache")).args(args).stdout(Stdio::piped()).stderr(Stdio::piped()).spawn().unwrap(); let result1: CmdResult = proc1.wait_with_output().unwrap().into(); if !cfg!(feature="debug") { assert_eq!(result1.err, ""); } assert_eq!(result1.status, Some(0)); let result2: CmdResult = proc2.wait_with_output().unwrap().into(); if !cfg!(feature="debug") { assert_eq!(result2.err, ""); } assert_eq!(result2.status, Some(0)); assert_eq!(std::fs::read_to_string(&file).unwrap(), ".."); assert!(result1.out == "2" || result2.out == "2"); // arbitrary which completes first } }