b3sum-1.2.0/.cargo_vcs_info.json0000644000000001120000000000100120570ustar { "git": { "sha1": "c61c663ec5dc581a449ed03a69397c698efc4cb5" } } b3sum-1.2.0/Cargo.lock0000644000000301400000000000100100360ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "ansi_term" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" dependencies = [ "winapi", ] [[package]] name = "anyhow" version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ee10e43ae4a853c0a3591d4e2ada1719e553be18199d9da9d4a83f5927c2f5c7" [[package]] name = "arrayref" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544" [[package]] name = "arrayvec" version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" [[package]] name = "atty" version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" dependencies = [ "hermit-abi", "libc", "winapi", ] [[package]] name = "autocfg" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" [[package]] name = "b3sum" version = "1.2.0" dependencies = [ "anyhow", "blake3", "clap", "duct", "hex", "memmap", "rayon", "tempfile", "wild", ] [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "blake3" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "526c210b4520e416420759af363083471656e819a75e831b8d2c9d5a584f2413" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq", "digest", "rayon", ] [[package]] name = "cc" version = "1.0.71" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79c2681d6594606957bbb8631c4b90a7fcaaa72cdb714743a437b156d6a7eedd" [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" version = "2.33.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" dependencies = [ "ansi_term", "atty", "bitflags", "strsim", "textwrap", "unicode-width", "vec_map", ] [[package]] name = "constant_time_eq" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" [[package]] name = "crossbeam-channel" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" dependencies = [ "cfg-if", "crossbeam-utils", ] [[package]] name = "crossbeam-deque" version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" dependencies = [ "cfg-if", "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd" dependencies = [ "cfg-if", "crossbeam-utils", "lazy_static", "memoffset", "scopeguard", ] [[package]] name = "crossbeam-utils" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" dependencies = [ "cfg-if", "lazy_static", ] [[package]] name = "digest" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3dd60d1080a57a05ab032377049e0591415d2b31afd7028356dbf3cc6dcb066" dependencies = [ "generic-array", ] [[package]] name = "duct" version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fc6a0a59ed0888e0041cf708e66357b7ae1a82f1c67247e1f93b5e0818f7d8d" dependencies = [ "libc", "once_cell", "os_pipe", "shared_child", ] [[package]] name = "either" version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" [[package]] name = "generic-array" version = "0.14.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "501466ecc8a30d1d3b7fc9229b122b2ce8ed6e9d9223f1138d4babb253e51817" dependencies = [ "typenum", "version_check", ] [[package]] name = "getrandom" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" dependencies = [ "cfg-if", "libc", "wasi", ] [[package]] name = "glob" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" [[package]] name = "hermit-abi" version = "0.1.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" dependencies = [ "libc", ] [[package]] name = "hex" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" version = "0.2.106" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a60553f9a9e039a333b4e9b20573b9e9b9c0bb3a11e201ccc48ef4283456d673" [[package]] name = "memmap" version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" dependencies = [ "libc", "winapi", ] [[package]] name = "memoffset" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59accc507f1338036a0477ef61afdae33cde60840f4dfe481319ce3ad116ddf9" dependencies = [ "autocfg", ] [[package]] name = "num_cpus" version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" dependencies = [ "hermit-abi", "libc", ] [[package]] name = "once_cell" version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" [[package]] name = "os_pipe" version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb233f06c2307e1f5ce2ecad9f8121cffbbee2c95428f44ea85222e460d0d213" dependencies = [ "libc", "winapi", ] [[package]] name = "ppv-lite86" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba" [[package]] name = "rand" version = "0.8.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8" dependencies = [ "libc", "rand_chacha", "rand_core", "rand_hc", ] [[package]] name = "rand_chacha" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", "rand_core", ] [[package]] name = "rand_core" version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" dependencies = [ "getrandom", ] [[package]] name = "rand_hc" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7" dependencies = [ "rand_core", ] [[package]] name = "rayon" version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" dependencies = [ "autocfg", "crossbeam-deque", "either", "rayon-core", ] [[package]] name = "rayon-core" version = "1.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" dependencies = [ "crossbeam-channel", "crossbeam-deque", "crossbeam-utils", "lazy_static", "num_cpus", ] [[package]] name = "redox_syscall" version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff" dependencies = [ "bitflags", ] [[package]] name = "remove_dir_all" version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" dependencies = [ "winapi", ] [[package]] name = "scopeguard" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" [[package]] name = "shared_child" version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6be9f7d5565b1483af3e72975e2dee33879b3b86bd48c0929fccf6585d79e65a" dependencies = [ "libc", "winapi", ] [[package]] name = "strsim" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" [[package]] name = "tempfile" version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" dependencies = [ "cfg-if", "libc", "rand", "redox_syscall", "remove_dir_all", "winapi", ] [[package]] name = "textwrap" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" dependencies = [ "unicode-width", ] [[package]] name = "typenum" version = "1.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b63708a265f51345575b27fe43f9500ad611579e764c79edbc2037b1121959ec" [[package]] name = "unicode-width" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" [[package]] name = "vec_map" version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" [[package]] name = "version_check" version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" [[package]] name = "wasi" version = "0.10.2+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" [[package]] name = "wild" version = "2.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "035793abb854745033f01a07647a79831eba29ec0be377205f2a25b0aa830020" dependencies = [ "glob", ] [[package]] name = "winapi" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" dependencies = [ "winapi-i686-pc-windows-gnu", "winapi-x86_64-pc-windows-gnu", ] [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" b3sum-1.2.0/Cargo.toml0000644000000023460000000000100100700ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "b3sum" version = "1.2.0" authors = ["Jack O'Connor "] description = "a command line implementation of the BLAKE3 hash function" readme = "README.md" license = "CC0-1.0 OR Apache-2.0" repository = "https://github.com/BLAKE3-team/BLAKE3" [dependencies.anyhow] version = "1.0.25" [dependencies.blake3] version = "1" features = ["rayon"] [dependencies.clap] version = "2.33.1" [dependencies.hex] version = "0.4.0" [dependencies.memmap] version = "0.7.0" [dependencies.rayon] version = "1.2.1" [dependencies.wild] version = "2.0.3" [dev-dependencies.duct] version = "0.13.3" [dev-dependencies.tempfile] version = "3.1.0" [features] neon = ["blake3/neon"] prefer_intrinsics = ["blake3/prefer_intrinsics"] pure = ["blake3/pure"] b3sum-1.2.0/Cargo.toml.orig000064400000000000000000000011650072674642500135770ustar 00000000000000[package] name = "b3sum" version = "1.2.0" authors = ["Jack O'Connor "] description = "a command line implementation of the BLAKE3 hash function" repository = "https://github.com/BLAKE3-team/BLAKE3" license = "CC0-1.0 OR Apache-2.0" readme = "README.md" edition = "2018" [features] neon = ["blake3/neon"] prefer_intrinsics = ["blake3/prefer_intrinsics"] pure = ["blake3/pure"] [dependencies] anyhow = "1.0.25" blake3 = { version = "1", path = "..", features = ["rayon"] } clap = "2.33.1" hex = "0.4.0" memmap = "0.7.0" rayon = "1.2.1" wild = "2.0.3" [dev-dependencies] duct = "0.13.3" tempfile = "3.1.0" b3sum-1.2.0/README.md000064400000000000000000000060230072674642500121650ustar 00000000000000# b3sum A command line utility for calculating [BLAKE3](https://github.com/BLAKE3-team/BLAKE3) hashes, similar to Coreutils tools like `b2sum` or `md5sum`. ``` b3sum 1.2.0 USAGE: b3sum [FLAGS] [OPTIONS] [FILE]... FLAGS: -c, --check Reads BLAKE3 sums from the [file]s and checks them -h, --help Prints help information --keyed Uses the keyed mode. The secret key is read from standard input, and it must be exactly 32 raw bytes. --no-mmap Disables memory mapping. Currently this also disables multithreading. --no-names Omits filenames in the output --quiet Skips printing OK for each successfully verified file. Must be used with --check. --raw Writes raw output bytes to stdout, rather than hex. --no-names is implied. In this case, only a single input is allowed. -V, --version Prints version information OPTIONS: --derive-key Uses the key derivation mode, with the given context string. Cannot be used with --keyed. -l, --length The number of output bytes, prior to hex encoding (default 32) --num-threads The maximum number of threads to use. By default, this is the number of logical cores. If this flag is omitted, or if its value is 0, RAYON_NUM_THREADS is also respected. ARGS: ... Files to hash, or checkfiles to check. When no file is given, or when - is given, read standard input. ``` See also [this document about how the `--check` flag works](https://github.com/BLAKE3-team/BLAKE3/blob/master/b3sum/what_does_check_do.md). # Example Hash the file `foo.txt`: ```bash b3sum foo.txt ``` Time hashing a gigabyte of data, to see how fast it is: ```bash # Create a 1 GB file. head -c 1000000000 /dev/zero > /tmp/bigfile # Hash it with SHA-256. time openssl sha256 /tmp/bigfile # Hash it with BLAKE3. time b3sum /tmp/bigfile ``` # Installation Prebuilt binaries are available for Linux, Windows, and macOS (requiring the [unidentified developer workaround](https://support.apple.com/guide/mac-help/open-a-mac-app-from-an-unidentified-developer-mh40616/mac)) on the [releases page](https://github.com/BLAKE3-team/BLAKE3/releases). If you've [installed Rust and Cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html), you can also build `b3sum` yourself with: ``` cargo install b3sum ``` On Linux for example, Cargo will put the compiled binary in `~/.cargo/bin`. You might want to add that directory to your `$PATH`, or `rustup` might have done it for you when you installed Cargo. If you want to install directly from this directory, you can run `cargo install --path .`. Or you can just build with `cargo build --release`, which puts the binary at `./target/release/b3sum`. b3sum-1.2.0/src/main.rs000064400000000000000000000532260072674642500127760ustar 00000000000000use anyhow::{bail, ensure, Context, Result}; use clap::{App, Arg}; use std::cmp; use std::convert::TryInto; use std::fs::File; use std::io; use std::io::prelude::*; use std::path::{Path, PathBuf}; #[cfg(test)] mod unit_tests; const NAME: &str = "b3sum"; const FILE_ARG: &str = "FILE"; const DERIVE_KEY_ARG: &str = "derive-key"; const KEYED_ARG: &str = "keyed"; const LENGTH_ARG: &str = "length"; const NO_MMAP_ARG: &str = "no-mmap"; const NO_NAMES_ARG: &str = "no-names"; const NUM_THREADS_ARG: &str = "num-threads"; const RAW_ARG: &str = "raw"; const CHECK_ARG: &str = "check"; const QUIET_ARG: &str = "quiet"; struct Args { inner: clap::ArgMatches<'static>, file_args: Vec, base_hasher: blake3::Hasher, } impl Args { fn parse() -> Result { let inner = App::new(NAME) .version(env!("CARGO_PKG_VERSION")) .arg(Arg::with_name(FILE_ARG).multiple(true).help( "Files to hash, or checkfiles to check. When no file is given,\n\ or when - is given, read standard input.", )) .arg( Arg::with_name(LENGTH_ARG) .long(LENGTH_ARG) .short("l") .takes_value(true) .value_name("LEN") .help( "The number of output bytes, prior to hex\n\ encoding (default 32)", ), ) .arg( Arg::with_name(NUM_THREADS_ARG) .long(NUM_THREADS_ARG) .takes_value(true) .value_name("NUM") .help( "The maximum number of threads to use. By\n\ default, this is the number of logical cores.\n\ If this flag is omitted, or if its value is 0,\n\ RAYON_NUM_THREADS is also respected.", ), ) .arg( Arg::with_name(KEYED_ARG) .long(KEYED_ARG) .requires(FILE_ARG) .help( "Uses the keyed mode. The secret key is read from standard\n\ input, and it must be exactly 32 raw bytes.", ), ) .arg( Arg::with_name(DERIVE_KEY_ARG) .long(DERIVE_KEY_ARG) .conflicts_with(KEYED_ARG) .takes_value(true) .value_name("CONTEXT") .help( "Uses the key derivation mode, with the given\n\ context string. Cannot be used with --keyed.", ), ) .arg(Arg::with_name(NO_MMAP_ARG).long(NO_MMAP_ARG).help( "Disables memory mapping. Currently this also disables\n\ multithreading.", )) .arg( Arg::with_name(NO_NAMES_ARG) .long(NO_NAMES_ARG) .help("Omits filenames in the output"), ) .arg(Arg::with_name(RAW_ARG).long(RAW_ARG).help( "Writes raw output bytes to stdout, rather than hex.\n\ --no-names is implied. In this case, only a single\n\ input is allowed.", )) .arg( Arg::with_name(CHECK_ARG) .long(CHECK_ARG) .short("c") .conflicts_with(DERIVE_KEY_ARG) .conflicts_with(KEYED_ARG) .conflicts_with(LENGTH_ARG) .conflicts_with(RAW_ARG) .conflicts_with(NO_NAMES_ARG) .help("Reads BLAKE3 sums from the [file]s and checks them"), ) .arg( Arg::with_name(QUIET_ARG) .long(QUIET_ARG) .requires(CHECK_ARG) .help( "Skips printing OK for each successfully verified file.\n\ Must be used with --check.", ), ) // wild::args_os() is equivalent to std::env::args_os() on Unix, // but on Windows it adds support for globbing. .get_matches_from(wild::args_os()); let file_args = if let Some(iter) = inner.values_of_os(FILE_ARG) { iter.map(|s| s.into()).collect() } else { vec!["-".into()] }; if inner.is_present(RAW_ARG) && file_args.len() > 1 { bail!("Only one filename can be provided when using --raw"); } let base_hasher = if inner.is_present(KEYED_ARG) { // In keyed mode, since stdin is used for the key, we can't handle // `-` arguments. Input::open handles that case below. blake3::Hasher::new_keyed(&read_key_from_stdin()?) } else if let Some(context) = inner.value_of(DERIVE_KEY_ARG) { blake3::Hasher::new_derive_key(context) } else { blake3::Hasher::new() }; Ok(Self { inner, file_args, base_hasher, }) } fn num_threads(&self) -> Result> { if let Some(num_threads_str) = self.inner.value_of(NUM_THREADS_ARG) { Ok(Some( num_threads_str .parse() .context("Failed to parse num threads.")?, )) } else { Ok(None) } } fn check(&self) -> bool { self.inner.is_present(CHECK_ARG) } fn raw(&self) -> bool { self.inner.is_present(RAW_ARG) } fn no_mmap(&self) -> bool { self.inner.is_present(NO_MMAP_ARG) } fn no_names(&self) -> bool { self.inner.is_present(NO_NAMES_ARG) } fn len(&self) -> Result { if let Some(length) = self.inner.value_of(LENGTH_ARG) { length.parse::().context("Failed to parse length.") } else { Ok(blake3::OUT_LEN as u64) } } fn keyed(&self) -> bool { self.inner.is_present(KEYED_ARG) } fn quiet(&self) -> bool { self.inner.is_present(QUIET_ARG) } } enum Input { Mmap(io::Cursor), File(File), Stdin, } impl Input { // Open an input file, using mmap if appropriate. "-" means stdin. Note // that this convention applies both to command line arguments, and to // filepaths that appear in a checkfile. fn open(path: &Path, args: &Args) -> Result { if path == Path::new("-") { if args.keyed() { bail!("Cannot open `-` in keyed mode"); } return Ok(Self::Stdin); } let file = File::open(path)?; if !args.no_mmap() { if let Some(mmap) = maybe_memmap_file(&file)? { return Ok(Self::Mmap(io::Cursor::new(mmap))); } } Ok(Self::File(file)) } fn hash(&mut self, args: &Args) -> Result { let mut hasher = args.base_hasher.clone(); match self { // The fast path: If we mmapped the file successfully, hash using // multiple threads. This doesn't work on stdin, or on some files, // and it can also be disabled with --no-mmap. Self::Mmap(cursor) => { hasher.update_rayon(cursor.get_ref()); } // The slower paths, for stdin or files we didn't/couldn't mmap. // This is currently all single-threaded. Doing multi-threaded // hashing without memory mapping is tricky, since all your worker // threads have to stop every time you refill the buffer, and that // ends up being a lot of overhead. To solve that, we need a more // complicated double-buffering strategy where a background thread // fills one buffer while the worker threads are hashing the other // one. We might implement that in the future, but since this is // the slow path anyway, it's not high priority. Self::File(file) => { copy_wide(file, &mut hasher)?; } Self::Stdin => { let stdin = io::stdin(); let lock = stdin.lock(); copy_wide(lock, &mut hasher)?; } } Ok(hasher.finalize_xof()) } } impl Read for Input { fn read(&mut self, buf: &mut [u8]) -> io::Result { match self { Self::Mmap(cursor) => cursor.read(buf), Self::File(file) => file.read(buf), Self::Stdin => io::stdin().read(buf), } } } // A 16 KiB buffer is enough to take advantage of all the SIMD instruction sets // that we support, but `std::io::copy` currently uses 8 KiB. Most platforms // can support at least 64 KiB, and there's some performance benefit to using // bigger reads, so that's what we use here. fn copy_wide(mut reader: impl Read, hasher: &mut blake3::Hasher) -> io::Result { let mut buffer = [0; 65536]; let mut total = 0; loop { match reader.read(&mut buffer) { Ok(0) => return Ok(total), Ok(n) => { hasher.update(&buffer[..n]); total += n as u64; } Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => return Err(e), } } } // Mmap a file, if it looks like a good idea. Return None in cases where we // know mmap will fail, or if the file is short enough that mmapping isn't // worth it. However, if we do try to mmap and it fails, return the error. fn maybe_memmap_file(file: &File) -> Result> { let metadata = file.metadata()?; let file_size = metadata.len(); Ok(if !metadata.is_file() { // Not a real file. None } else if file_size > isize::max_value() as u64 { // Too long to safely map. // https://github.com/danburkert/memmap-rs/issues/69 None } else if file_size == 0 { // Mapping an empty file currently fails. // https://github.com/danburkert/memmap-rs/issues/72 None } else if file_size < 16 * 1024 { // Mapping small files is not worth it. None } else { // Explicitly set the length of the memory map, so that filesystem // changes can't race to violate the invariants we just checked. let map = unsafe { memmap::MmapOptions::new() .len(file_size as usize) .map(&file)? }; Some(map) }) } fn write_hex_output(mut output: blake3::OutputReader, args: &Args) -> Result<()> { // Encoding multiples of the block size is most efficient. let mut len = args.len()?; let mut block = [0; blake3::guts::BLOCK_LEN]; while len > 0 { output.fill(&mut block); let hex_str = hex::encode(&block[..]); let take_bytes = cmp::min(len, block.len() as u64); print!("{}", &hex_str[..2 * take_bytes as usize]); len -= take_bytes; } Ok(()) } fn write_raw_output(output: blake3::OutputReader, args: &Args) -> Result<()> { let mut output = output.take(args.len()?); let stdout = std::io::stdout(); let mut handler = stdout.lock(); std::io::copy(&mut output, &mut handler)?; Ok(()) } fn read_key_from_stdin() -> Result<[u8; blake3::KEY_LEN]> { let mut bytes = Vec::with_capacity(blake3::KEY_LEN + 1); let n = std::io::stdin() .lock() .take(blake3::KEY_LEN as u64 + 1) .read_to_end(&mut bytes)?; if n < 32 { bail!( "expected {} key bytes from stdin, found {}", blake3::KEY_LEN, n, ) } else if n > 32 { bail!("read more than {} key bytes from stdin", blake3::KEY_LEN) } else { Ok(bytes[..blake3::KEY_LEN].try_into().unwrap()) } } struct FilepathString { filepath_string: String, is_escaped: bool, } // returns (string, did_escape) fn filepath_to_string(filepath: &Path) -> FilepathString { let unicode_cow = filepath.to_string_lossy(); let mut filepath_string = unicode_cow.to_string(); // If we're on Windows, normalize backslashes to forward slashes. This // avoids a lot of ugly escaping in the common case, and it makes // checkfiles created on Windows more likely to be portable to Unix. It // also allows us to set a blanket "no backslashes allowed in checkfiles on // Windows" rule, rather than allowing a Unix backslash to potentially get // interpreted as a directory separator on Windows. if cfg!(windows) { filepath_string = filepath_string.replace('\\', "/"); } let mut is_escaped = false; if filepath_string.contains('\\') || filepath_string.contains('\n') { filepath_string = filepath_string.replace('\\', "\\\\").replace('\n', "\\n"); is_escaped = true; } FilepathString { filepath_string, is_escaped, } } fn hex_half_byte(c: char) -> Result { // The hex characters in the hash must be lowercase for now, though we // could support uppercase too if we wanted to. if '0' <= c && c <= '9' { return Ok(c as u8 - '0' as u8); } if 'a' <= c && c <= 'f' { return Ok(c as u8 - 'a' as u8 + 10); } bail!("Invalid hex"); } // The `check` command is a security tool. That means it's much better for a // check to fail more often than it should (a false negative), than for a check // to ever succeed when it shouldn't (a false positive). By forbidding certain // characters in checked filepaths, we avoid a class of false positives where // two different filepaths can get confused with each other. fn check_for_invalid_characters(utf8_path: &str) -> Result<()> { // Null characters in paths should never happen, but they can result in a // path getting silently truncated on Unix. if utf8_path.contains('\0') { bail!("Null character in path"); } // Because we convert invalid UTF-8 sequences in paths to the Unicode // replacement character, multiple different invalid paths can map to the // same UTF-8 string. if utf8_path.contains('�') { bail!("Unicode replacement character in path"); } // We normalize all Windows backslashes to forward slashes in our output, // so the only natural way to get a backslash in a checkfile on Windows is // to construct it on Unix and copy it over. (Or of course you could just // doctor it by hand.) To avoid confusing this with a directory separator, // we forbid backslashes entirely on Windows. Note that this check comes // after unescaping has been done. if cfg!(windows) && utf8_path.contains('\\') { bail!("Backslash in path"); } Ok(()) } fn unescape(mut path: &str) -> Result { let mut unescaped = String::with_capacity(2 * path.len()); while let Some(i) = path.find('\\') { ensure!(i < path.len() - 1, "Invalid backslash escape"); unescaped.push_str(&path[..i]); match path[i + 1..].chars().next().unwrap() { // Anything other than a recognized escape sequence is an error. 'n' => unescaped.push_str("\n"), '\\' => unescaped.push_str("\\"), _ => bail!("Invalid backslash escape"), } path = &path[i + 2..]; } unescaped.push_str(path); Ok(unescaped) } #[derive(Debug)] struct ParsedCheckLine { file_string: String, is_escaped: bool, file_path: PathBuf, expected_hash: blake3::Hash, } fn parse_check_line(mut line: &str) -> Result { // Trim off the trailing newline, if any. line = line.trim_end_matches('\n'); // If there's a backslash at the front of the line, that means we need to // unescape the path below. This matches the behavior of e.g. md5sum. let first = if let Some(c) = line.chars().next() { c } else { bail!("Empty line"); }; let mut is_escaped = false; if first == '\\' { is_escaped = true; line = &line[1..]; } // The front of the line must be a hash of the usual length, followed by // two spaces. The hex characters in the hash must be lowercase for now, // though we could support uppercase too if we wanted to. let hash_hex_len = 2 * blake3::OUT_LEN; let num_spaces = 2; let prefix_len = hash_hex_len + num_spaces; ensure!(line.len() > prefix_len, "Short line"); ensure!( line.chars().take(prefix_len).all(|c| c.is_ascii()), "Non-ASCII prefix" ); ensure!(&line[hash_hex_len..][..2] == " ", "Invalid space"); // Decode the hash hex. let mut hash_bytes = [0; blake3::OUT_LEN]; let mut hex_chars = line[..hash_hex_len].chars(); for byte in &mut hash_bytes { let high_char = hex_chars.next().unwrap(); let low_char = hex_chars.next().unwrap(); *byte = 16 * hex_half_byte(high_char)? + hex_half_byte(low_char)?; } let expected_hash: blake3::Hash = hash_bytes.into(); let file_string = line[prefix_len..].to_string(); let file_path_string = if is_escaped { // If we detected a backslash at the start of the line earlier, now we // need to unescape backslashes and newlines. unescape(&file_string)? } else { file_string.clone().into() }; check_for_invalid_characters(&file_path_string)?; Ok(ParsedCheckLine { file_string, is_escaped, file_path: file_path_string.into(), expected_hash, }) } fn hash_one_input(path: &Path, args: &Args) -> Result<()> { let mut input = Input::open(path, args)?; let output = input.hash(args)?; if args.raw() { write_raw_output(output, args)?; return Ok(()); } if args.no_names() { write_hex_output(output, args)?; println!(); return Ok(()); } let FilepathString { filepath_string, is_escaped, } = filepath_to_string(path); if is_escaped { print!("\\"); } write_hex_output(output, args)?; println!(" {}", filepath_string); Ok(()) } // Returns true for success. Having a boolean return value here, instead of // passing down the some_file_failed reference, makes it less likely that we // might forget to set it in some error condition. fn check_one_line(line: &str, args: &Args) -> bool { let parse_result = parse_check_line(&line); let ParsedCheckLine { file_string, is_escaped, file_path, expected_hash, } = match parse_result { Ok(parsed) => parsed, Err(e) => { eprintln!("{}: {}", NAME, e); return false; } }; let file_string = if is_escaped { "\\".to_string() + &file_string } else { file_string }; let hash_result: Result = Input::open(&file_path, args) .and_then(|mut input| input.hash(args)) .map(|mut hash_output| { let mut found_hash_bytes = [0; blake3::OUT_LEN]; hash_output.fill(&mut found_hash_bytes); found_hash_bytes.into() }); let found_hash: blake3::Hash = match hash_result { Ok(hash) => hash, Err(e) => { println!("{}: FAILED ({})", file_string, e); return false; } }; // This is a constant-time comparison. if expected_hash == found_hash { if !args.quiet() { println!("{}: OK", file_string); } true } else { println!("{}: FAILED", file_string); false } } fn check_one_checkfile(path: &Path, args: &Args, some_file_failed: &mut bool) -> Result<()> { let checkfile_input = Input::open(path, args)?; let mut bufreader = io::BufReader::new(checkfile_input); let mut line = String::new(); loop { line.clear(); let n = bufreader.read_line(&mut line)?; if n == 0 { return Ok(()); } // check_one_line() prints errors and turns them into a success=false // return, so it doesn't return a Result. let success = check_one_line(&line, args); if !success { *some_file_failed = true; } } } fn main() -> Result<()> { let args = Args::parse()?; let mut thread_pool_builder = rayon::ThreadPoolBuilder::new(); if let Some(num_threads) = args.num_threads()? { thread_pool_builder = thread_pool_builder.num_threads(num_threads); } let thread_pool = thread_pool_builder.build()?; thread_pool.install(|| { let mut some_file_failed = false; // Note that file_args automatically includes `-` if nothing is given. for path in &args.file_args { if args.check() { // A hash mismatch or a failure to read a hashed file will be // printed in the checkfile loop, and will not propagate here. // This is similar to the explicit error handling we do in the // hashing case immediately below. In these cases, // some_file_failed will be set to false. check_one_checkfile(path, &args, &mut some_file_failed)?; } else { // Errors encountered in hashing are tolerated and printed to // stderr. This allows e.g. `b3sum *` to print errors for // non-files and keep going. However, if we encounter any // errors we'll still return non-zero at the end. let result = hash_one_input(path, &args); if let Err(e) = result { some_file_failed = true; eprintln!("{}: {}: {}", NAME, path.to_string_lossy(), e); } } } std::process::exit(if some_file_failed { 1 } else { 0 }); }) } b3sum-1.2.0/src/unit_tests.rs000064400000000000000000000136040072674642500142470ustar 00000000000000use std::path::Path; #[test] fn test_parse_check_line() { // ========================= // ===== Success Cases ===== // ========================= // the basic case let crate::ParsedCheckLine { file_string, is_escaped, file_path, expected_hash, } = crate::parse_check_line( "0909090909090909090909090909090909090909090909090909090909090909 foo", ) .unwrap(); assert_eq!(expected_hash, blake3::Hash::from([0x09; 32])); assert!(!is_escaped); assert_eq!(file_string, "foo"); assert_eq!(file_path, Path::new("foo")); // regular whitespace let crate::ParsedCheckLine { file_string, is_escaped, file_path, expected_hash, } = crate::parse_check_line( "fafafafafafafafafafafafafafafafafafafafafafafafafafafafafafafafa fo \to\n\n\n", ) .unwrap(); assert_eq!(expected_hash, blake3::Hash::from([0xfa; 32])); assert!(!is_escaped); assert_eq!(file_string, "fo \to"); assert_eq!(file_path, Path::new("fo \to")); // path is one space let crate::ParsedCheckLine { file_string, is_escaped, file_path, expected_hash, } = crate::parse_check_line( "4242424242424242424242424242424242424242424242424242424242424242 ", ) .unwrap(); assert_eq!(expected_hash, blake3::Hash::from([0x42; 32])); assert!(!is_escaped); assert_eq!(file_string, " "); assert_eq!(file_path, Path::new(" ")); // *Unescaped* backslashes. Note that this line does *not* start with a // backslash, so something like "\" + "n" is interpreted as *two* // characters. We forbid all backslashes on Windows, so this test is // Unix-only. if cfg!(not(windows)) { let crate::ParsedCheckLine { file_string, is_escaped, file_path, expected_hash, } = crate::parse_check_line( "4343434343434343434343434343434343434343434343434343434343434343 fo\\a\\no", ) .unwrap(); assert_eq!(expected_hash, blake3::Hash::from([0x43; 32])); assert!(!is_escaped); assert_eq!(file_string, "fo\\a\\no"); assert_eq!(file_path, Path::new("fo\\a\\no")); } // escaped newline let crate::ParsedCheckLine { file_string, is_escaped, file_path, expected_hash, } = crate::parse_check_line( "\\4444444444444444444444444444444444444444444444444444444444444444 fo\\n\\no", ) .unwrap(); assert_eq!(expected_hash, blake3::Hash::from([0x44; 32])); assert!(is_escaped); assert_eq!(file_string, "fo\\n\\no"); assert_eq!(file_path, Path::new("fo\n\no")); // Escaped newline and backslash. Again because backslash is not allowed on // Windows, this test is Unix-only. if cfg!(not(windows)) { let crate::ParsedCheckLine { file_string, is_escaped, file_path, expected_hash, } = crate::parse_check_line( "\\4545454545454545454545454545454545454545454545454545454545454545 fo\\n\\\\o", ) .unwrap(); assert_eq!(expected_hash, blake3::Hash::from([0x45; 32])); assert!(is_escaped); assert_eq!(file_string, "fo\\n\\\\o"); assert_eq!(file_path, Path::new("fo\n\\o")); } // non-ASCII path let crate::ParsedCheckLine { file_string, is_escaped, file_path, expected_hash, } = crate::parse_check_line( "4646464646464646464646464646464646464646464646464646464646464646 否认", ) .unwrap(); assert_eq!(expected_hash, blake3::Hash::from([0x46; 32])); assert!(!is_escaped); assert_eq!(file_string, "否认"); assert_eq!(file_path, Path::new("否认")); // ========================= // ===== Failure Cases ===== // ========================= // too short crate::parse_check_line("").unwrap_err(); crate::parse_check_line("0").unwrap_err(); crate::parse_check_line("00").unwrap_err(); crate::parse_check_line("0000000000000000000000000000000000000000000000000000000000000000") .unwrap_err(); crate::parse_check_line("0000000000000000000000000000000000000000000000000000000000000000 ") .unwrap_err(); // not enough spaces crate::parse_check_line("0000000000000000000000000000000000000000000000000000000000000000 foo") .unwrap_err(); // capital letter hex crate::parse_check_line( "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA foo", ) .unwrap_err(); // non-hex hex crate::parse_check_line( "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx foo", ) .unwrap_err(); // non-ASCII hex crate::parse_check_line("你好, 我叫杰克. 认识你很高兴. 要不要吃个香蕉? foo").unwrap_err(); // invalid escape sequence crate::parse_check_line( "\\0000000000000000000000000000000000000000000000000000000000000000 fo\\o", ) .unwrap_err(); // truncated escape sequence crate::parse_check_line( "\\0000000000000000000000000000000000000000000000000000000000000000 foo\\", ) .unwrap_err(); // null char crate::parse_check_line( "0000000000000000000000000000000000000000000000000000000000000000 fo\0o", ) .unwrap_err(); // Unicode replacement char crate::parse_check_line( "0000000000000000000000000000000000000000000000000000000000000000 fo�o", ) .unwrap_err(); // On Windows only, backslashes are not allowed, escaped or otherwise. if cfg!(windows) { crate::parse_check_line( "0000000000000000000000000000000000000000000000000000000000000000 fo\\o", ) .unwrap_err(); crate::parse_check_line( "\\0000000000000000000000000000000000000000000000000000000000000000 fo\\\\o", ) .unwrap_err(); } } b3sum-1.2.0/tests/cli_tests.rs000064400000000000000000000430340072674642500144120ustar 00000000000000use duct::cmd; use std::ffi::OsString; use std::fs; use std::io::prelude::*; use std::path::PathBuf; pub fn b3sum_exe() -> PathBuf { env!("CARGO_BIN_EXE_b3sum").into() } #[test] fn test_hash_one() { let expected = format!("{} -", blake3::hash(b"foo").to_hex()); let output = cmd!(b3sum_exe()).stdin_bytes("foo").read().unwrap(); assert_eq!(&*expected, output); } #[test] fn test_hash_one_raw() { let expected = blake3::hash(b"foo").as_bytes().to_owned(); let output = cmd!(b3sum_exe(), "--raw") .stdin_bytes("foo") .stdout_capture() .run() .unwrap() .stdout; assert_eq!(expected, output.as_slice()); } #[test] fn test_hash_many() { let dir = tempfile::tempdir().unwrap(); let file1 = dir.path().join("file1"); fs::write(&file1, b"foo").unwrap(); let file2 = dir.path().join("file2"); fs::write(&file2, b"bar").unwrap(); let output = cmd!(b3sum_exe(), &file1, &file2).read().unwrap(); let foo_hash = blake3::hash(b"foo"); let bar_hash = blake3::hash(b"bar"); let expected = format!( "{} {}\n{} {}", foo_hash.to_hex(), // account for slash normalization on Windows file1.to_string_lossy().replace("\\", "/"), bar_hash.to_hex(), file2.to_string_lossy().replace("\\", "/"), ); assert_eq!(expected, output); let output_no_names = cmd!(b3sum_exe(), "--no-names", &file1, &file2) .read() .unwrap(); let expected_no_names = format!("{}\n{}", foo_hash.to_hex(), bar_hash.to_hex(),); assert_eq!(expected_no_names, output_no_names); } #[test] fn test_missing_files() { let dir = tempfile::tempdir().unwrap(); let file1 = dir.path().join("file1"); fs::write(&file1, b"foo").unwrap(); let file2 = dir.path().join("file2"); fs::write(&file2, b"bar").unwrap(); let output = cmd!(b3sum_exe(), "file1", "missing_file", "file2") .dir(dir.path()) .stdout_capture() .stderr_capture() .unchecked() .run() .unwrap(); assert!(!output.status.success()); let foo_hash = blake3::hash(b"foo"); let bar_hash = blake3::hash(b"bar"); let expected_stdout = format!( "{} file1\n{} file2\n", foo_hash.to_hex(), bar_hash.to_hex(), ); assert_eq!(expected_stdout.as_bytes(), &output.stdout[..]); let bing_error = fs::File::open(dir.path().join("missing_file")).unwrap_err(); let expected_stderr = format!("b3sum: missing_file: {}\n", bing_error.to_string()); assert_eq!(expected_stderr.as_bytes(), &output.stderr[..]); } #[test] fn test_hash_length() { let mut buf = [0; 100]; blake3::Hasher::new() .update(b"foo") .finalize_xof() .fill(&mut buf); let expected = format!("{} -", hex::encode(&buf[..])); let output = cmd!(b3sum_exe(), "--length=100") .stdin_bytes("foo") .read() .unwrap(); assert_eq!(&*expected, &*output); } #[test] fn test_keyed() { let key = [42; blake3::KEY_LEN]; let f = tempfile::NamedTempFile::new().unwrap(); f.as_file().write_all(b"foo").unwrap(); f.as_file().flush().unwrap(); let expected = blake3::keyed_hash(&key, b"foo").to_hex(); let output = cmd!(b3sum_exe(), "--keyed", "--no-names", f.path()) .stdin_bytes(&key[..]) .read() .unwrap(); assert_eq!(&*expected, &*output); } #[test] fn test_derive_key() { let context = "BLAKE3 2019-12-28 10:28:41 example context"; let f = tempfile::NamedTempFile::new().unwrap(); f.as_file().write_all(b"key material").unwrap(); f.as_file().flush().unwrap(); let expected = hex::encode(blake3::derive_key(context, b"key material")); let output = cmd!(b3sum_exe(), "--derive-key", context, "--no-names", f.path()) .read() .unwrap(); assert_eq!(&*expected, &*output); } #[test] fn test_no_mmap() { let f = tempfile::NamedTempFile::new().unwrap(); f.as_file().write_all(b"foo").unwrap(); f.as_file().flush().unwrap(); let expected = blake3::hash(b"foo").to_hex(); let output = cmd!(b3sum_exe(), "--no-mmap", "--no-names", f.path()) .read() .unwrap(); assert_eq!(&*expected, &*output); } #[test] fn test_length_without_value_is_an_error() { let result = cmd!(b3sum_exe(), "--length") .stdin_bytes("foo") .stderr_capture() .run(); assert!(result.is_err()); } #[test] fn test_raw_with_multi_files_is_an_error() { let f1 = tempfile::NamedTempFile::new().unwrap(); let f2 = tempfile::NamedTempFile::new().unwrap(); // Make sure it doesn't error with just one file let result = cmd!(b3sum_exe(), "--raw", f1.path()).stdout_capture().run(); assert!(result.is_ok()); // Make sure it errors when both file are passed let result = cmd!(b3sum_exe(), "--raw", f1.path(), f2.path()) .stderr_capture() .run(); assert!(result.is_err()); } #[test] #[cfg(unix)] fn test_newline_and_backslash_escaping_on_unix() { let empty_hash = blake3::hash(b"").to_hex(); let dir = tempfile::tempdir().unwrap(); fs::create_dir(dir.path().join("subdir")).unwrap(); let names = [ "abcdef", "abc\ndef", "abc\\def", "abc\rdef", "abc\r\ndef", "subdir/foo", ]; let mut paths = Vec::new(); for name in &names { let path = dir.path().join(name); println!("creating file at {:?}", path); fs::write(&path, b"").unwrap(); paths.push(path); } let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap(); let expected = format!( "\ {0} abcdef \\{0} abc\\ndef \\{0} abc\\\\def {0} abc\rdef \\{0} abc\r\\ndef {0} subdir/foo", empty_hash, ); println!("output"); println!("======"); println!("{}", output); println!(); println!("expected"); println!("========"); println!("{}", expected); println!(); assert_eq!(expected, output); } #[test] #[cfg(windows)] fn test_slash_normalization_on_windows() { let empty_hash = blake3::hash(b"").to_hex(); let dir = tempfile::tempdir().unwrap(); fs::create_dir(dir.path().join("subdir")).unwrap(); // Note that filenames can't contain newlines or backslashes on Windows, so // we don't test escaping here. We only test forward slash and backslash as // directory separators. let names = ["abcdef", "subdir/foo", "subdir\\bar"]; let mut paths = Vec::new(); for name in &names { let path = dir.path().join(name); println!("creating file at {:?}", path); fs::write(&path, b"").unwrap(); paths.push(path); } let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap(); let expected = format!( "\ {0} abcdef {0} subdir/foo {0} subdir/bar", empty_hash, ); println!("output"); println!("======"); println!("{}", output); println!(); println!("expected"); println!("========"); println!("{}", expected); println!(); assert_eq!(expected, output); } #[test] #[cfg(unix)] fn test_invalid_unicode_on_unix() { use std::os::unix::ffi::OsStringExt; let empty_hash = blake3::hash(b"").to_hex(); let dir = tempfile::tempdir().unwrap(); let names = ["abcdef".into(), OsString::from_vec(b"abc\xffdef".to_vec())]; let mut paths = Vec::new(); for name in &names { let path = dir.path().join(name); println!("creating file at {:?}", path); // Note: Some operating systems, macOS in particular, simply don't // allow invalid Unicode in filenames. On those systems, this write // will fail. That's fine, we'll just short-circuit this test in that // case. But assert that at least Linux allows this. let write_result = fs::write(&path, b""); if cfg!(target_os = "linux") { write_result.expect("Linux should allow invalid Unicode"); } else if write_result.is_err() { return; } paths.push(path); } let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap(); let expected = format!( "\ {0} abcdef {0} abc�def", empty_hash, ); println!("output"); println!("======"); println!("{}", output); println!(); println!("expected"); println!("========"); println!("{}", expected); println!(); assert_eq!(expected, output); } #[test] #[cfg(windows)] fn test_invalid_unicode_on_windows() { use std::os::windows::ffi::OsStringExt; let empty_hash = blake3::hash(b"").to_hex(); let dir = tempfile::tempdir().unwrap(); let surrogate_char = 0xDC00; let bad_unicode_wchars = [ 'a' as u16, 'b' as u16, 'c' as u16, surrogate_char, 'd' as u16, 'e' as u16, 'f' as u16, ]; let bad_osstring = OsString::from_wide(&bad_unicode_wchars); let names = ["abcdef".into(), bad_osstring]; let mut paths = Vec::new(); for name in &names { let path = dir.path().join(name); println!("creating file at {:?}", path); fs::write(&path, b"").unwrap(); paths.push(path); } let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap(); let expected = format!( "\ {0} abcdef {0} abc�def", empty_hash, ); println!("output"); println!("======"); println!("{}", output); println!(); println!("expected"); println!("========"); println!("{}", expected); println!(); assert_eq!(expected, output); } #[test] fn test_check() { // Make a directory full of files, and make sure the b3sum output in that // directory is what we expect. let a_hash = blake3::hash(b"a").to_hex(); let b_hash = blake3::hash(b"b").to_hex(); let cd_hash = blake3::hash(b"cd").to_hex(); let dir = tempfile::tempdir().unwrap(); fs::write(dir.path().join("a"), b"a").unwrap(); fs::write(dir.path().join("b"), b"b").unwrap(); fs::create_dir(dir.path().join("c")).unwrap(); fs::write(dir.path().join("c/d"), b"cd").unwrap(); let output = cmd!(b3sum_exe(), "a", "b", "c/d") .dir(dir.path()) .stdout_capture() .stderr_capture() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); let expected_checkfile = format!( "{} a\n\ {} b\n\ {} c/d\n", a_hash, b_hash, cd_hash, ); assert_eq!(expected_checkfile, stdout); assert_eq!("", stderr); // Now use the output we just validated as a checkfile, passed to stdin. let output = cmd!(b3sum_exe(), "--check") .stdin_bytes(expected_checkfile.as_bytes()) .dir(dir.path()) .stdout_capture() .stderr_capture() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); let expected_check_output = "\ a: OK\n\ b: OK\n\ c/d: OK\n"; assert_eq!(expected_check_output, stdout); assert_eq!("", stderr); // Now pass the same checkfile twice on the command line just for fun. let checkfile_path = dir.path().join("checkfile"); fs::write(&checkfile_path, &expected_checkfile).unwrap(); let output = cmd!(b3sum_exe(), "--check", &checkfile_path, &checkfile_path) .dir(dir.path()) .stdout_capture() .stderr_capture() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); let mut double_check_output = String::new(); double_check_output.push_str(&expected_check_output); double_check_output.push_str(&expected_check_output); assert_eq!(double_check_output, stdout); assert_eq!("", stderr); // Corrupt one of the files and check again. fs::write(dir.path().join("b"), b"CORRUPTION").unwrap(); let output = cmd!(b3sum_exe(), "--check", &checkfile_path) .dir(dir.path()) .stdout_capture() .stderr_capture() .unchecked() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); let expected_check_failure = "\ a: OK\n\ b: FAILED\n\ c/d: OK\n"; assert!(!output.status.success()); assert_eq!(expected_check_failure, stdout); assert_eq!("", stderr); // Delete one of the files and check again. fs::remove_file(dir.path().join("b")).unwrap(); let open_file_error = fs::File::open(dir.path().join("b")).unwrap_err(); let output = cmd!(b3sum_exe(), "--check", &checkfile_path) .dir(dir.path()) .stdout_capture() .stderr_capture() .unchecked() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); let expected_check_failure = format!( "a: OK\n\ b: FAILED ({})\n\ c/d: OK\n", open_file_error, ); assert!(!output.status.success()); assert_eq!(expected_check_failure, stdout); assert_eq!("", stderr); // Confirm that --quiet suppresses the OKs but not the FAILEDs. let output = cmd!(b3sum_exe(), "--check", "--quiet", &checkfile_path) .dir(dir.path()) .stdout_capture() .stderr_capture() .unchecked() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); let expected_check_failure = format!("b: FAILED ({})\n", open_file_error); assert!(!output.status.success()); assert_eq!(expected_check_failure, stdout); assert_eq!("", stderr); } #[test] fn test_check_invalid_characters() { // Check that a null character in the path fails. let output = cmd!(b3sum_exe(), "--check") .stdin_bytes("0000000000000000000000000000000000000000000000000000000000000000 \0") .stdout_capture() .stderr_capture() .unchecked() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); assert!(!output.status.success()); assert_eq!("", stdout); assert_eq!("b3sum: Null character in path\n", stderr); // Check that a Unicode replacement character in the path fails. let output = cmd!(b3sum_exe(), "--check") .stdin_bytes("0000000000000000000000000000000000000000000000000000000000000000 �") .stdout_capture() .stderr_capture() .unchecked() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); assert!(!output.status.success()); assert_eq!("", stdout); assert_eq!("b3sum: Unicode replacement character in path\n", stderr); // Check that an invalid escape sequence in the path fails. let output = cmd!(b3sum_exe(), "--check") .stdin_bytes("\\0000000000000000000000000000000000000000000000000000000000000000 \\a") .stdout_capture() .stderr_capture() .unchecked() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); assert!(!output.status.success()); assert_eq!("", stdout); assert_eq!("b3sum: Invalid backslash escape\n", stderr); // Windows also forbids literal backslashes. Check for that if and only if // we're on Windows. if cfg!(windows) { let output = cmd!(b3sum_exe(), "--check") .stdin_bytes("0000000000000000000000000000000000000000000000000000000000000000 \\") .stdout_capture() .stderr_capture() .unchecked() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); assert!(!output.status.success()); assert_eq!("", stdout); assert_eq!("b3sum: Backslash in path\n", stderr); } } #[test] fn test_globbing() { // On Unix, globbing is provided by the shell. On Windows, globbing is // provided by us, using the `wild` crate. let dir = tempfile::tempdir().unwrap(); let file1 = dir.path().join("file1"); fs::write(&file1, b"foo").unwrap(); let file2 = dir.path().join("file2"); fs::write(&file2, b"bar").unwrap(); let foo_hash = blake3::hash(b"foo"); let bar_hash = blake3::hash(b"bar"); // NOTE: This assumes that the glob will be expanded in alphabetical order, // to "file1 file2" rather than "file2 file1". So far, this seems to // be true (guaranteed?) of Unix shell behavior, and true in practice // with the `wild` crate on Windows. It's possible that this could // start failing in the future, though, or on some unknown platform. // If that ever happens, we'll need to relax this test somehow, // probably by just testing for both possible outputs. I'm not // handling that case in advance, though, because I'd prefer to hear // about it if it comes up. let expected = format!("{} file1\n{} file2", foo_hash.to_hex(), bar_hash.to_hex()); let star_command = format!("{} *", b3sum_exe().to_str().unwrap()); let (exe, c_flag) = if cfg!(windows) { ("cmd.exe", "/C") } else { ("/bin/sh", "-c") }; let output = cmd!(exe, c_flag, star_command) .dir(dir.path()) .read() .unwrap(); assert_eq!(expected, output); } b3sum-1.2.0/what_does_check_do.md000064400000000000000000000155440072674642500150340ustar 00000000000000# How does `b3sum --check` behave exactly?
or: Are filepaths...text? Most of the time, `b3sum --check` is a drop-in replacement for `md5sum --check` and other Coreutils hashing tools. It consumes a checkfile (the output of a regular `b3sum` command), re-hashes all the files listed there, and returns success if all of those hashes are still correct. What makes this more complicated than it might seem, is that representing filepaths as text means we need to consider many possible edge cases of unrepresentable filepaths. This document describes all of these edge cases in detail. ## The simple case Here's the result of running `b3sum a b c/d` in a directory that contains those three files: ```bash $ echo hi > a $ echo lo > b $ mkdir c $ echo stuff > c/d $ b3sum a b c/d 0b8b60248fad7ac6dfac221b7e01a8b91c772421a15b387dd1fb2d6a94aee438 a 6ae4a57bbba24f79c461d30bcb4db973b9427d9207877e34d2d74528daa84115 b 2d477356c962e54784f1c5dc5297718d92087006f6ee96b08aeaf7f3cd252377 c/d ``` If we pipe that output into `b3sum --check`, it will exit with status zero (success) and print: ```bash $ b3sum a b c/d | b3sum --check a: OK b: OK c/d: OK ``` If we delete `b` and change the contents of `c/d`, and then use the same checkfile as above, `b3sum --check` will exit with a non-zero status (failure) and print: ```bash $ b3sum a b c/d > checkfile $ rm b $ echo more stuff >> c/d $ b3sum --check checkfile a: OK b: FAILED (No such file or directory (os error 2)) c/d: FAILED ``` In these typical cases, `b3sum` and `md5sum` have identical output for success and very similar output for failure. ## Escaping newlines and backslashes Since the checkfile format (the regular output format of `b3sum`) is newline-separated text, we need to worry about what happens when a filepath contains a newline, or worse. Suppose we create a file named `x[newline]x` (3 characters). One way to create such a file is with a Python one-liner like this: ```python >>> open("x\nx", "w") ``` Here's what happens when we hash that file with `b3sum`: ```bash $ b3sum x* \af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262 x\nx ``` Notice two things. First, `b3sum` puts a single `\` character at the front of the line. This indicates that the filepath contains escape sequences that `b3sum --check` will need to unescape. Then, `b3sum` replaces the newline character in the filepath with the two-character escape sequence `\n`. Similarly, if the filepath contained a backslash, `b3sum` would escape it as `\\` in the output. So far, all of this behavior is still identical to `md5sum`. ## Invalid Unicode This is where `b3sum` and `md5um` diverge. Apart from the newline and backslash escapes described above, `md5sum` copies all other filepath bytes verbatim to its output. That means its output encoding is "ASCII plus whatever bytes we got from the command line". This creates two problems: 1. Printing something that isn't UTF-8 is kind of gross. 2. Windows support. What's the deal with Windows? To start with, there's a fundamental difference in how Unix and Windows represent filepaths. Unix filepaths are "usually UTF-8" and Windows filepaths are "usually UTF-16". That means that a file named `abc` is typically represented as the bytes `[97, 98, 99]` on Unix and as the bytes `[97, 0, 98, 0, 99, 0]` on Windows. The `md5sum` approach won't work if we plan on creating a checkfile on Unix and checking it on Windows, or vice versa. A more portable approach is to convert platform-specific bytes into some consistent Unicode encoding. (In practice this is going to be UTF-8, but in theory it could be anything.) Then when `--check` needs to open a file, we convert the Unicode representation back into platform-specific bytes. This makes important common cases like `abc`, and in fact even `abc[newline]def`, work as expected. Great! But...what did we mean above when we said *usually* UTF-8 and *usually* UTF-16? It turns out that not every possible sequence of bytes is valid UTF-8, and not every possible sequence of 16-bit wide chars is valid UTF-16. For example, the byte 0xFF (255) can never appear in any UTF-8 string. If we ask Python to decode it, it yells at us: ```python >>> b"\xFF".decode("UTF-8") UnicodeDecodeError: 'utf-8' codec can't decode byte 0xff in position 0: invalid start byte ``` However, tragically, we *can* create a file with that byte in its name (on Linux at least, though not usually on macOS): ```python >>> open(b"y\xFFy", "w") ``` So some filepaths aren't representable in Unicode at all. Our plan to "convert platform-specific bytes into some consistent Unicode encoding" isn't going to work for everything. What does `b3sum` do with the file above? ```bash $ b3sum y* af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262 y�y ``` That � in there is a "Unicode replacement character". When we run into filepaths that we can't represent in Unicode, we replace the unrepresentable parts with these characters. On the checking side, to avoid any possible confusion between two different invalid filepaths, we automatically fail if we see a replacement character. Together with a few more details covered in the next section, this gives us an important set of properties: 1. Any file can be hashed locally. 2. Any file with a valid Unicode name not containing the � character can be checked. 3. Checking ambiguous or unrepresentable filepaths always fails. 4. Checkfiles are always valid UTF-8. 5. Checkfiles are portable between Unix and Windows. ## Formal Rules 1. When hashing, filepaths are represented in a platform-specific encoding, which can accommodate any filepath on the current platform. In Rust, this is `OsStr`/`OsString`. 2. In output, filepaths are first converted to UTF-8. Any non-Unicode segments are replaced with Unicode replacement characters (U+FFFD). In Rust, this is `OsStr::to_string_lossy`. 3. Then, if a filepath contains any backslashes (U+005C) or newlines (U+000A), these characters are escaped as `\\` and `\n` respectively. 4. Finally, any output line containing an escape sequence is prefixed with a single backslash. 5. When checking, each line is parsed as UTF-8, separated by a newline (U+000A). Invalid UTF-8 is an error. 6. Then, if a line begins with a backslash, the filepath component is unescaped. Any escape sequence other than `\\` or `\n` is an error. If a line does not begin with a backslash, unescaping is not performed, and any backslashes in the filepath component are interpreted literally. (`b3sum` output never contains unescaped backslashes, but they can occur in checkfiles assembled by hand.) 7. Finally, if a filepath contains a Unicode replacement character (U+FFFD) or a null character (U+0000), it is an error. **Additionally, on Windows only:** 8. In output, all backslashes (U+005C) are replaced with forward slashes (U+002F). 9. When checking, after unescaping, if a filepath contains a backslash, it is an error.