b3sum-1.8.1/.cargo_vcs_info.json0000644000000001430000000000100120720ustar { "git": { "sha1": "ad639b126ef9b5f3b131093363cc3bb6bba4c3bf" }, "path_in_vcs": "b3sum" }b3sum-1.8.1/.gitignore000064400000000000000000000000141046102023000126470ustar 00000000000000!Cargo.lock b3sum-1.8.1/Cargo.lock0000644000000326170000000000100100600ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 4 [[package]] name = "anstream" version = "0.6.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" dependencies = [ "anstyle", "anstyle-parse", "anstyle-query", "anstyle-wincon", "colorchoice", "is_terminal_polyfill", "utf8parse", ] [[package]] name = "anstyle" version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" [[package]] name = "anstyle-parse" version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" dependencies = [ "windows-sys", ] [[package]] name = "anstyle-wincon" version = "3.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" dependencies = [ "anstyle", "once_cell", "windows-sys", ] [[package]] name = "anyhow" version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dcfed56ad506cb2c684a14971b8861fdc3baaaae314b9e5f9bb532cbe3ba7a4f" [[package]] name = "arrayref" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" [[package]] name = "arrayvec" version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "b3sum" version = "1.8.1" dependencies = [ "anyhow", "blake3", "clap", "duct", "hex", "rayon-core", "tempfile", "wild", ] [[package]] name = "bitflags" version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" [[package]] name = "blake3" version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "389a099b34312839e16420d499a9cad9650541715937ffbdd40d36f49e77eeb3" dependencies = [ "arrayref", "arrayvec", "cc", "cfg-if", "constant_time_eq", "memmap2", "rayon-core", ] [[package]] name = "cc" version = "1.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fcb57c740ae1daf453ae85f16e37396f672b039e00d9d866e07ddb24e328e3a" dependencies = [ "shlex", ] [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" version = "4.5.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8aa86934b44c19c50f87cc2790e19f54f7a67aedb64101c2e1a2e5ecfb73944" dependencies = [ "clap_builder", "clap_derive", ] [[package]] name = "clap_builder" version = "4.5.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2414dbb2dd0695280da6ea9261e327479e9d37b0630f6b53ba2a11c60c679fd9" dependencies = [ "anstream", "anstyle", "clap_lex", "strsim", "terminal_size", ] [[package]] name = "clap_derive" version = "4.5.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7" dependencies = [ "heck", "proc-macro2", "quote", "syn", ] [[package]] name = "clap_lex" version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "colorchoice" version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" [[package]] name = "constant_time_eq" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" [[package]] name = "crossbeam-deque" version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" dependencies = [ "crossbeam-epoch", "crossbeam-utils", ] [[package]] name = "crossbeam-epoch" version = "0.9.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" dependencies = [ "crossbeam-utils", ] [[package]] name = "crossbeam-utils" version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" [[package]] name = "duct" version = "0.13.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4ab5718d1224b63252cd0c6f74f6480f9ffeb117438a2e0f5cf6d9a4798929c" dependencies = [ "libc", "once_cell", "os_pipe", "shared_child", ] [[package]] name = "errno" version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", "windows-sys", ] [[package]] name = "fastrand" version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "getrandom" version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0" dependencies = [ "cfg-if", "libc", "r-efi", "wasi", ] [[package]] name = "glob" version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" [[package]] name = "heck" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "hex" version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" [[package]] name = "is_terminal_polyfill" version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] name = "libc" version = "0.2.171" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" [[package]] name = "linux-raw-sys" version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fe7db12097d22ec582439daf8618b8fdd1a7bef6270e9af3b1ebcd30893cf413" [[package]] name = "memmap2" version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f" dependencies = [ "libc", ] [[package]] name = "once_cell" version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "os_pipe" version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ffd2b0a5634335b135d5728d84c5e0fd726954b87111f7506a61c502280d982" dependencies = [ "libc", "windows-sys", ] [[package]] name = "proc-macro2" version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" dependencies = [ "unicode-ident", ] [[package]] name = "quote" version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] [[package]] name = "r-efi" version = "5.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" [[package]] name = "rayon-core" version = "1.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" dependencies = [ "crossbeam-deque", "crossbeam-utils", ] [[package]] name = "rustix" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d97817398dd4bb2e6da002002db259209759911da105da92bec29ccb12cf58bf" dependencies = [ "bitflags", "errno", "libc", "linux-raw-sys", "windows-sys", ] [[package]] name = "shared_child" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09fa9338aed9a1df411814a5b2252f7cd206c55ae9bf2fa763f8de84603aa60c" dependencies = [ "libc", "windows-sys", ] [[package]] name = "shlex" version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "strsim" version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" version = "2.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "tempfile" version = "3.19.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7437ac7763b9b123ccf33c338a5cc1bac6f69b45a136c19bdd8a65e3916435bf" dependencies = [ "fastrand", "getrandom", "once_cell", "rustix", "windows-sys", ] [[package]] name = "terminal_size" version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45c6481c4829e4cc63825e62c49186a34538b7b2750b73b266581ffb612fb5ed" dependencies = [ "rustix", "windows-sys", ] [[package]] name = "unicode-ident" version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "utf8parse" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "wasi" version = "0.14.2+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" dependencies = [ "wit-bindgen-rt", ] [[package]] name = "wild" version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3131afc8c575281e1e80f36ed6a092aa502c08b18ed7524e86fbbb12bb410e1" dependencies = [ "glob", ] [[package]] name = "windows-sys" version = "0.59.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" dependencies = [ "windows-targets", ] [[package]] name = "windows-targets" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_gnullvm", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_i686_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_x86_64_gnu" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "wit-bindgen-rt" version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ "bitflags", ] b3sum-1.8.1/Cargo.toml0000644000000027730000000000100101030ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "b3sum" version = "1.8.1" authors = ["Jack O'Connor "] build = false autolib = false autobins = false autoexamples = false autotests = false autobenches = false description = "a command line implementation of the BLAKE3 hash function" readme = "README.md" license = "CC0-1.0 OR Apache-2.0 OR Apache-2.0 WITH LLVM-exception" repository = "https://github.com/BLAKE3-team/BLAKE3" [features] neon = ["blake3/neon"] prefer_intrinsics = ["blake3/prefer_intrinsics"] pure = ["blake3/pure"] [[bin]] name = "b3sum" path = "src/main.rs" [[test]] name = "cli_tests" path = "tests/cli_tests.rs" [dependencies.anyhow] version = "1.0.25" [dependencies.blake3] version = "1.8.1" features = [ "mmap", "rayon", ] [dependencies.clap] version = "4.0.8" features = [ "derive", "wrap_help", ] [dependencies.hex] version = "0.4.0" [dependencies.rayon-core] version = "1.12.1" [dependencies.wild] version = "2.0.3" [dev-dependencies.duct] version = "0.13.3" [dev-dependencies.tempfile] version = "3.1.0" b3sum-1.8.1/Cargo.toml.orig000064400000000000000000000013111046102023000135470ustar 00000000000000[package] name = "b3sum" version = "1.8.1" authors = ["Jack O'Connor "] description = "a command line implementation of the BLAKE3 hash function" repository = "https://github.com/BLAKE3-team/BLAKE3" license = "CC0-1.0 OR Apache-2.0 OR Apache-2.0 WITH LLVM-exception" readme = "README.md" edition = "2021" [features] neon = ["blake3/neon"] prefer_intrinsics = ["blake3/prefer_intrinsics"] pure = ["blake3/pure"] [dependencies] anyhow = "1.0.25" blake3 = { version = "1.8.1", path = "..", features = ["mmap", "rayon"] } clap = { version = "4.0.8", features = ["derive", "wrap_help"] } hex = "0.4.0" rayon-core = "1.12.1" wild = "2.0.3" [dev-dependencies] duct = "0.13.3" tempfile = "3.1.0" b3sum-1.8.1/LICENSE_A2000064400000000000000000000261411046102023000122170ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2019 Jack O'Connor and Samuel Neves Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. b3sum-1.8.1/LICENSE_A2LLVM000064400000000000000000000277531046102023000127240ustar 00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright 2019 Jack O'Connor and Samuel Neves Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ---- LLVM Exceptions to the Apache 2.0 License ---- As an exception, if, as a result of your compiling your source code, portions of this Software are embedded into an Object form of such source code, you may redistribute such embedded portions in such Object form without complying with the conditions of Sections 4(a), 4(b) and 4(d) of the License. In addition, if you combine or link compiled forms of this Software with software that is licensed under the GPLv2 ("Combined Software") and if a court of competent jurisdiction determines that the patent provision (Section 3), the indemnity provision (Section 9) or other Section of the License conflicts with the conditions of the GPLv2, you may retroactively and prospectively choose to deem waived or otherwise exclude such Section(s) of the License, but only in their entirety and only with respect to the Combined Software. b3sum-1.8.1/LICENSE_CC0000064400000000000000000000156101046102023000123210ustar 00000000000000Creative Commons Legal Code CC0 1.0 Universal CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER. Statement of Purpose The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work"). Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others. For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights. 1. Copyright and Related Rights. A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following: i. the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work; ii. moral rights retained by the original author(s) and/or performer(s); iii. publicity and privacy rights pertaining to a person's image or likeness depicted in a Work; iv. rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below; v. rights protecting the extraction, dissemination, use and reuse of data in a Work; vi. database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and vii. other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof. 2. Waiver. To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose. 3. Public License Fallback. Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose. 4. Limitations and Disclaimers. a. No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document. b. Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law. c. Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work. d. Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work. b3sum-1.8.1/README.md000064400000000000000000000047661046102023000121600ustar 00000000000000# b3sum A command line utility for calculating [BLAKE3](https://github.com/BLAKE3-team/BLAKE3) hashes, similar to Coreutils tools like `b2sum` or `md5sum`. ``` Usage: b3sum [OPTIONS] [FILE]... Arguments: [FILE]... Files to hash, or checkfiles to check Options: --keyed Use the keyed mode, reading the 32-byte key from stdin --derive-key Use the key derivation mode, with the given context string -l, --length The number of output bytes, before hex encoding [default: 32] --seek The starting output byte offset, before hex encoding [default: 0] --num-threads The maximum number of threads to use --no-mmap Disable memory mapping --no-names Omit filenames in the output --raw Write raw output bytes to stdout, rather than hex --tag Output BSD-style checksums: BLAKE3 ([FILE]) = [HASH] -c, --check Read BLAKE3 sums from the [FILE]s and check them --quiet Skip printing OK for each checked file -h, --help Print help (see more with '--help') -V, --version Print version ``` See also [this document about how the `--check` flag works](https://github.com/BLAKE3-team/BLAKE3/blob/master/b3sum/what_does_check_do.md). # Example Hash the file `foo.txt`: ```bash b3sum foo.txt ``` Time hashing a gigabyte of data, to see how fast it is: ```bash # Create a 1 GB file. head -c 1000000000 /dev/zero > /tmp/bigfile # Hash it with SHA-256. time openssl sha256 /tmp/bigfile # Hash it with BLAKE3. time b3sum /tmp/bigfile ``` # Installation Prebuilt binaries are available for Linux, Windows, and macOS (requiring the [unidentified developer workaround](https://support.apple.com/guide/mac-help/open-a-mac-app-from-an-unidentified-developer-mh40616/mac)) on the [releases page](https://github.com/BLAKE3-team/BLAKE3/releases). If you've [installed Rust and Cargo](https://doc.rust-lang.org/cargo/getting-started/installation.html), you can also build `b3sum` yourself with: ``` cargo install b3sum ``` On Linux for example, Cargo will put the compiled binary in `~/.cargo/bin`. You might want to add that directory to your `$PATH`, or `rustup` might have done it for you when you installed Cargo. If you want to install directly from this directory, you can run `cargo install --path .`. Or you can just build with `cargo build --release`, which puts the binary at `./target/release/b3sum`. b3sum-1.8.1/src/main.rs000064400000000000000000000425761046102023000127630ustar 00000000000000use anyhow::{bail, ensure}; use clap::Parser; use std::cmp; use std::fs::File; use std::io; use std::io::prelude::*; use std::path::{Path, PathBuf}; #[cfg(test)] mod unit_tests; const NAME: &str = "b3sum"; const DERIVE_KEY_ARG: &str = "derive_key"; const KEYED_ARG: &str = "keyed"; const LENGTH_ARG: &str = "length"; const NO_NAMES_ARG: &str = "no_names"; const RAW_ARG: &str = "raw"; const TAG_ARG: &str = "tag"; const CHECK_ARG: &str = "check"; #[derive(Parser)] #[command(version, max_term_width(100))] struct Inner { /// Files to hash, or checkfiles to check /// /// When no file is given, or when - is given, read standard input. file: Vec, /// Use the keyed mode, reading the 32-byte key from stdin #[arg(long, requires("file"))] keyed: bool, /// Use the key derivation mode, with the given context string /// /// Cannot be used with --keyed. #[arg(long, value_name("CONTEXT"), conflicts_with(KEYED_ARG))] derive_key: Option, /// The number of output bytes, before hex encoding #[arg( short, long, default_value_t = blake3::OUT_LEN as u64, value_name("LEN") )] length: u64, /// The starting output byte offset, before hex encoding #[arg(long, default_value_t = 0, value_name("SEEK"))] seek: u64, /// The maximum number of threads to use /// /// By default, this is the number of logical cores. If this flag is /// omitted, or if its value is 0, RAYON_NUM_THREADS is also respected. #[arg(long, value_name("NUM"))] num_threads: Option, /// Disable memory mapping /// /// Currently this also disables multithreading. #[arg(long)] no_mmap: bool, /// Omit filenames in the output #[arg(long)] no_names: bool, /// Write raw output bytes to stdout, rather than hex /// /// --no-names is implied. In this case, only a single input is allowed. #[arg(long)] raw: bool, /// Output BSD-style checksums: BLAKE3 ([FILE]) = [HASH] #[arg(long)] tag: bool, /// Read BLAKE3 sums from the [FILE]s and check them #[arg( short, long, conflicts_with(DERIVE_KEY_ARG), conflicts_with(KEYED_ARG), conflicts_with(LENGTH_ARG), conflicts_with(RAW_ARG), conflicts_with(TAG_ARG), conflicts_with(NO_NAMES_ARG) )] check: bool, /// Skip printing OK for each checked file /// /// Must be used with --check. #[arg(long, requires(CHECK_ARG))] quiet: bool, } struct Args { inner: Inner, file_args: Vec, base_hasher: blake3::Hasher, } impl Args { fn parse() -> anyhow::Result { // wild::args_os() is equivalent to std::env::args_os() on Unix, // but on Windows it adds support for globbing. let inner = Inner::parse_from(wild::args_os()); let file_args = if !inner.file.is_empty() { inner.file.clone() } else { vec!["-".into()] }; if inner.raw && file_args.len() > 1 { bail!("Only one filename can be provided when using --raw"); } let base_hasher = if inner.keyed { // In keyed mode, since stdin is used for the key, we can't handle // `-` arguments. Input::open handles that case below. blake3::Hasher::new_keyed(&read_key_from_stdin()?) } else if let Some(ref context) = inner.derive_key { blake3::Hasher::new_derive_key(context) } else { blake3::Hasher::new() }; Ok(Self { inner, file_args, base_hasher, }) } fn num_threads(&self) -> Option { self.inner.num_threads } fn check(&self) -> bool { self.inner.check } fn raw(&self) -> bool { self.inner.raw } fn tag(&self) -> bool { self.inner.tag } fn no_mmap(&self) -> bool { self.inner.no_mmap } fn no_names(&self) -> bool { self.inner.no_names } fn len(&self) -> u64 { self.inner.length } fn seek(&self) -> u64 { self.inner.seek } fn keyed(&self) -> bool { self.inner.keyed } fn quiet(&self) -> bool { self.inner.quiet } } fn hash_path(args: &Args, path: &Path) -> anyhow::Result { let mut hasher = args.base_hasher.clone(); if path == Path::new("-") { if args.keyed() { bail!("Cannot open `-` in keyed mode"); } hasher.update_reader(io::stdin().lock())?; } else if args.no_mmap() { hasher.update_reader(File::open(path)?)?; } else { // The fast path: Try to mmap the file and hash it with multiple threads. hasher.update_mmap_rayon(path)?; } let mut output_reader = hasher.finalize_xof(); output_reader.set_position(args.seek()); Ok(output_reader) } fn write_hex_output(mut output: blake3::OutputReader, args: &Args) -> anyhow::Result<()> { // Encoding multiples of the 64 bytes is most efficient. // TODO: This computes each output block twice when the --seek argument isn't a multiple of 64. // We'll refactor all of this soon anyway, once SIMD optimizations are available for the XOF. let mut len = args.len(); let mut block = [0; blake3::BLOCK_LEN]; while len > 0 { output.fill(&mut block); let hex_str = hex::encode(&block[..]); let take_bytes = cmp::min(len, block.len() as u64); print!("{}", &hex_str[..2 * take_bytes as usize]); len -= take_bytes; } Ok(()) } fn write_raw_output(output: blake3::OutputReader, args: &Args) -> anyhow::Result<()> { let mut output = output.take(args.len()); let stdout = std::io::stdout(); let mut handler = stdout.lock(); std::io::copy(&mut output, &mut handler)?; Ok(()) } fn read_key_from_stdin() -> anyhow::Result<[u8; blake3::KEY_LEN]> { let mut bytes = Vec::with_capacity(blake3::KEY_LEN + 1); let n = std::io::stdin() .lock() .take(blake3::KEY_LEN as u64 + 1) .read_to_end(&mut bytes)?; if n < blake3::KEY_LEN { bail!( "expected {} key bytes from stdin, found {}", blake3::KEY_LEN, n, ) } else if n > blake3::KEY_LEN { bail!("read more than {} key bytes from stdin", blake3::KEY_LEN) } else { Ok(bytes[..blake3::KEY_LEN].try_into().unwrap()) } } struct FilepathString { filepath_string: String, is_escaped: bool, } // returns (string, did_escape) fn filepath_to_string(filepath: &Path) -> FilepathString { let unicode_cow = filepath.to_string_lossy(); let mut filepath_string = unicode_cow.to_string(); // If we're on Windows, normalize backslashes to forward slashes. This // avoids a lot of ugly escaping in the common case, and it makes // checkfiles created on Windows more likely to be portable to Unix. It // also allows us to set a blanket "no backslashes allowed in checkfiles on // Windows" rule, rather than allowing a Unix backslash to potentially get // interpreted as a directory separator on Windows. if cfg!(windows) { filepath_string = filepath_string.replace('\\', "/"); } let mut is_escaped = false; if filepath_string.contains(['\\', '\n', '\r']) { filepath_string = filepath_string .replace('\\', "\\\\") .replace('\n', "\\n") .replace('\r', "\\r"); is_escaped = true; } FilepathString { filepath_string, is_escaped, } } fn hex_half_byte(c: char) -> anyhow::Result { // The hex characters in the hash must be lowercase for now, though we // could support uppercase too if we wanted to. if '0' <= c && c <= '9' { return Ok(c as u8 - '0' as u8); } if 'a' <= c && c <= 'f' { return Ok(c as u8 - 'a' as u8 + 10); } bail!("Invalid hex"); } // The `check` command is a security tool. That means it's much better for a // check to fail more often than it should (a false negative), than for a check // to ever succeed when it shouldn't (a false positive). By forbidding certain // characters in checked filepaths, we avoid a class of false positives where // two different filepaths can get confused with each other. fn check_for_invalid_characters(utf8_path: &str) -> anyhow::Result<()> { // Null characters in paths should never happen, but they can result in a // path getting silently truncated on Unix. if utf8_path.contains('\0') { bail!("Null character in path"); } // Because we convert invalid UTF-8 sequences in paths to the Unicode // replacement character, multiple different invalid paths can map to the // same UTF-8 string. if utf8_path.contains('�') { bail!("Unicode replacement character in path"); } // We normalize all Windows backslashes to forward slashes in our output, // so the only natural way to get a backslash in a checkfile on Windows is // to construct it on Unix and copy it over. (Or of course you could just // doctor it by hand.) To avoid confusing this with a directory separator, // we forbid backslashes entirely on Windows. Note that this check comes // after unescaping has been done. if cfg!(windows) && utf8_path.contains('\\') { bail!("Backslash in path"); } Ok(()) } fn unescape(mut path: &str) -> anyhow::Result { let mut unescaped = String::with_capacity(2 * path.len()); while let Some(i) = path.find('\\') { ensure!(i < path.len() - 1, "Invalid backslash escape"); unescaped.push_str(&path[..i]); match path[i + 1..].chars().next().unwrap() { // Anything other than a recognized escape sequence is an error. 'n' => unescaped.push_str("\n"), 'r' => unescaped.push_str("\r"), '\\' => unescaped.push_str("\\"), _ => bail!("Invalid backslash escape"), } path = &path[i + 2..]; } unescaped.push_str(path); Ok(unescaped) } #[derive(Debug)] struct ParsedCheckLine { file_string: String, is_escaped: bool, file_path: PathBuf, expected_hash: blake3::Hash, } fn split_untagged_check_line(line_after_slash: &str) -> Option<(&str, &str)> { // Of the form " ". The file might contain " ", so we need to split from the // left. line_after_slash.split_once(" ") } fn split_tagged_check_line(line_after_slash: &str) -> Option<(&str, &str)> { // Of the form "BLAKE3 () = ". The file might contain ") = ", so we need to split // from the *right*. let prefix = "BLAKE3 ("; if !line_after_slash.starts_with(prefix) { return None; } line_after_slash[prefix.len()..].rsplit_once(") = ") } fn parse_check_line(mut line: &str) -> anyhow::Result { // Trim off the trailing newlines, if any. line = line.trim_end_matches(['\r', '\n']); // If there's a backslash at the front of the line, that means we need to // unescape the path below. This matches the behavior of e.g. md5sum. let Some(first) = line.chars().next() else { bail!("Empty line"); }; let line_after_slash; let is_escaped; if first == '\\' { is_escaped = true; line_after_slash = &line[1..]; } else { is_escaped = false; line_after_slash = line; } // Split the line. It might be " " or "BLAKE3 () = ". The latter comes // from the --tag flag. let hash_hex; let file_str; if let Some((left, right)) = split_untagged_check_line(line_after_slash) { hash_hex = left; file_str = right; } else if let Some((left, right)) = split_tagged_check_line(line_after_slash) { file_str = left; hash_hex = right; } else { bail!("Invalid check line format"); } // Decode the hex hash. ensure!(hash_hex.len() == 2 * blake3::OUT_LEN, "Invalid hash length"); let mut hex_chars = hash_hex.chars(); let mut hash_bytes = [0; blake3::OUT_LEN]; for byte in &mut hash_bytes { let high_char = hex_chars.next().unwrap(); let low_char = hex_chars.next().unwrap(); *byte = 16 * hex_half_byte(high_char)? + hex_half_byte(low_char)?; } let expected_hash: blake3::Hash = hash_bytes.into(); // Unescape and validate the filepath. let file_path_string = if is_escaped { unescape(file_str)? } else { file_str.to_string() }; ensure!(!file_path_string.is_empty(), "empty file path"); check_for_invalid_characters(&file_path_string)?; Ok(ParsedCheckLine { file_string: file_str.to_string(), is_escaped, file_path: file_path_string.into(), expected_hash, }) } fn hash_one_input(path: &Path, args: &Args) -> anyhow::Result<()> { let output = hash_path(args, path)?; if args.raw() { write_raw_output(output, args)?; return Ok(()); } if args.no_names() { write_hex_output(output, args)?; println!(); return Ok(()); } let FilepathString { filepath_string, is_escaped, } = filepath_to_string(path); if is_escaped { print!("\\"); } if args.tag() { print!("BLAKE3 ({}) = ", filepath_string); write_hex_output(output, args)?; println!(); return Ok(()); } write_hex_output(output, args)?; println!(" {}", filepath_string); Ok(()) } // Returns true for success. Having a boolean return value here, instead of // passing down the files_failed reference, makes it less likely that we might // forget to set it in some error condition. fn check_one_line(line: &str, args: &Args) -> bool { let parse_result = parse_check_line(&line); let ParsedCheckLine { file_string, is_escaped, file_path, expected_hash, } = match parse_result { Ok(parsed) => parsed, Err(e) => { eprintln!("{}: {}", NAME, e); return false; } }; let file_string = if is_escaped { "\\".to_string() + &file_string } else { file_string }; let found_hash: blake3::Hash; match hash_path(args, &file_path) { Ok(mut output) => { let mut found_hash_bytes = [0; blake3::OUT_LEN]; output.fill(&mut found_hash_bytes); found_hash = found_hash_bytes.into(); } Err(e) => { println!("{}: FAILED ({})", file_string, e); return false; } }; // This is a constant-time comparison. if expected_hash == found_hash { if !args.quiet() { println!("{}: OK", file_string); } true } else { println!("{}: FAILED", file_string); false } } fn check_one_checkfile(path: &Path, args: &Args, files_failed: &mut u64) -> anyhow::Result<()> { let mut file; let stdin; let mut stdin_lock; let mut bufreader: io::BufReader<&mut dyn Read>; if path == Path::new("-") { stdin = io::stdin(); stdin_lock = stdin.lock(); bufreader = io::BufReader::new(&mut stdin_lock); } else { file = File::open(path)?; bufreader = io::BufReader::new(&mut file); } let mut line = String::new(); loop { line.clear(); let n = bufreader.read_line(&mut line)?; if n == 0 { return Ok(()); } // check_one_line() prints errors and turns them into a success=false // return, so it doesn't return a Result. let success = check_one_line(&line, args); if !success { // We use `files_failed > 0` to indicate a mismatch, so it's important for correctness // that it's impossible for this counter to overflow. *files_failed = files_failed.saturating_add(1); } } } fn main() -> anyhow::Result<()> { let args = Args::parse()?; let mut thread_pool_builder = rayon_core::ThreadPoolBuilder::new(); if let Some(num_threads) = args.num_threads() { thread_pool_builder = thread_pool_builder.num_threads(num_threads); } let thread_pool = thread_pool_builder.build()?; thread_pool.install(|| { let mut files_failed = 0u64; // Note that file_args automatically includes `-` if nothing is given. for path in &args.file_args { if args.check() { check_one_checkfile(path, &args, &mut files_failed)?; } else { // Errors encountered in hashing are tolerated and printed to // stderr. This allows e.g. `b3sum *` to print errors for // non-files and keep going. However, if we encounter any // errors we'll still return non-zero at the end. let result = hash_one_input(path, &args); if let Err(e) = result { files_failed = files_failed.saturating_add(1); eprintln!("{}: {}: {}", NAME, path.to_string_lossy(), e); } } } if args.check() && files_failed > 0 { eprintln!( "{}: WARNING: {} computed checksum{} did NOT match", NAME, files_failed, if files_failed == 1 { "" } else { "s" }, ); } std::process::exit(if files_failed > 0 { 1 } else { 0 }); }) } #[cfg(test)] mod test { use clap::CommandFactory; #[test] fn test_args() { crate::Inner::command().debug_assert(); } } b3sum-1.8.1/src/unit_tests.rs000064400000000000000000000166231046102023000142320ustar 00000000000000use std::path::Path; #[test] fn test_parse_check_line() { // ========================= // ===== Success Cases ===== // ========================= // the basic case let crate::ParsedCheckLine { file_string, is_escaped, file_path, expected_hash, } = crate::parse_check_line( "0909090909090909090909090909090909090909090909090909090909090909 foo", ) .unwrap(); assert_eq!(expected_hash, blake3::Hash::from([0x09; 32])); assert!(!is_escaped); assert_eq!(file_string, "foo"); assert_eq!(file_path, Path::new("foo")); // regular whitespace let crate::ParsedCheckLine { file_string, is_escaped, file_path, expected_hash, } = crate::parse_check_line( "fafafafafafafafafafafafafafafafafafafafafafafafafafafafafafafafa \t\r\n\n\r \t\r\n\n\r", ) .unwrap(); assert_eq!(expected_hash, blake3::Hash::from([0xfa; 32])); assert!(!is_escaped); assert_eq!(file_string, " \t\r\n\n\r \t"); assert_eq!(file_path, Path::new(" \t\r\n\n\r \t")); // path is one space let crate::ParsedCheckLine { file_string, is_escaped, file_path, expected_hash, } = crate::parse_check_line( "4242424242424242424242424242424242424242424242424242424242424242 ", ) .unwrap(); assert_eq!(expected_hash, blake3::Hash::from([0x42; 32])); assert!(!is_escaped); assert_eq!(file_string, " "); assert_eq!(file_path, Path::new(" ")); // *Unescaped* backslashes. Note that this line does *not* start with a // backslash, so something like "\" + "n" is interpreted as *two* // characters. We forbid all backslashes on Windows, so this test is // Unix-only. if cfg!(not(windows)) { let crate::ParsedCheckLine { file_string, is_escaped, file_path, expected_hash, } = crate::parse_check_line( "4343434343434343434343434343434343434343434343434343434343434343 fo\\a\\no", ) .unwrap(); assert_eq!(expected_hash, blake3::Hash::from([0x43; 32])); assert!(!is_escaped); assert_eq!(file_string, "fo\\a\\no"); assert_eq!(file_path, Path::new("fo\\a\\no")); } // escaped newlines let crate::ParsedCheckLine { file_string, is_escaped, file_path, expected_hash, } = crate::parse_check_line( "\\4444444444444444444444444444444444444444444444444444444444444444 fo\\r\\n\\n\\ro", ) .unwrap(); assert_eq!(expected_hash, blake3::Hash::from([0x44; 32])); assert!(is_escaped); assert_eq!(file_string, "fo\\r\\n\\n\\ro"); assert_eq!(file_path, Path::new("fo\r\n\n\ro")); // Escaped newline and backslash. Again because backslash is not allowed on // Windows, this test is Unix-only. if cfg!(not(windows)) { let crate::ParsedCheckLine { file_string, is_escaped, file_path, expected_hash, } = crate::parse_check_line( "\\4545454545454545454545454545454545454545454545454545454545454545 fo\\n\\\\o", ) .unwrap(); assert_eq!(expected_hash, blake3::Hash::from([0x45; 32])); assert!(is_escaped); assert_eq!(file_string, "fo\\n\\\\o"); assert_eq!(file_path, Path::new("fo\n\\o")); } // non-ASCII path let crate::ParsedCheckLine { file_string, is_escaped, file_path, expected_hash, } = crate::parse_check_line( "4646464646464646464646464646464646464646464646464646464646464646 否认", ) .unwrap(); assert_eq!(expected_hash, blake3::Hash::from([0x46; 32])); assert!(!is_escaped); assert_eq!(file_string, "否认"); assert_eq!(file_path, Path::new("否认")); // untagged separator " " in the file name let crate::ParsedCheckLine { file_string, is_escaped, file_path, expected_hash, } = crate::parse_check_line( "4747474747474747474747474747474747474747474747474747474747474747 foo bar", ) .unwrap(); assert_eq!(expected_hash, blake3::Hash::from([0x47; 32])); assert!(!is_escaped); assert_eq!(file_string, "foo bar"); assert_eq!(file_path, Path::new("foo bar")); // tagged separator ") = " in the file name let crate::ParsedCheckLine { file_string, is_escaped, file_path, expected_hash, } = crate::parse_check_line( "BLAKE3 (foo) = bar) = 4848484848484848484848484848484848484848484848484848484848484848", ) .unwrap(); assert_eq!(expected_hash, blake3::Hash::from([0x48; 32])); assert!(!is_escaped); assert_eq!(file_string, "foo) = bar"); assert_eq!(file_path, Path::new("foo) = bar")); // ========================= // ===== Failure Cases ===== // ========================= // too short crate::parse_check_line("").unwrap_err(); crate::parse_check_line("0").unwrap_err(); crate::parse_check_line("00").unwrap_err(); crate::parse_check_line("0000000000000000000000000000000000000000000000000000000000000000") .unwrap_err(); crate::parse_check_line("0000000000000000000000000000000000000000000000000000000000000000 ") .unwrap_err(); // not enough spaces crate::parse_check_line("0000000000000000000000000000000000000000000000000000000000000000 foo") .unwrap_err(); // capital letter hex crate::parse_check_line( "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA foo", ) .unwrap_err(); // non-hex hex crate::parse_check_line( "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx foo", ) .unwrap_err(); // non-ASCII hex crate::parse_check_line("你好, 我叫杰克. 认识你很高兴. 要不要吃个香蕉? foo").unwrap_err(); // invalid escape sequence crate::parse_check_line( "\\0000000000000000000000000000000000000000000000000000000000000000 fo\\o", ) .unwrap_err(); // truncated escape sequence crate::parse_check_line( "\\0000000000000000000000000000000000000000000000000000000000000000 foo\\", ) .unwrap_err(); // null char crate::parse_check_line( "0000000000000000000000000000000000000000000000000000000000000000 fo\0o", ) .unwrap_err(); // Unicode replacement char crate::parse_check_line( "0000000000000000000000000000000000000000000000000000000000000000 fo�o", ) .unwrap_err(); // On Windows only, backslashes are not allowed, escaped or otherwise. if cfg!(windows) { crate::parse_check_line( "0000000000000000000000000000000000000000000000000000000000000000 fo\\o", ) .unwrap_err(); crate::parse_check_line( "\\0000000000000000000000000000000000000000000000000000000000000000 fo\\\\o", ) .unwrap_err(); } } #[test] fn test_filepath_to_string() { let output = crate::filepath_to_string(Path::new("foo")); assert_eq!(output.filepath_string, "foo"); assert!(!output.is_escaped); let output = crate::filepath_to_string(Path::new("f\\ \t\r\noo")); if cfg!(windows) { // We normalize backslashes to forward slashes on Windows. assert_eq!(output.filepath_string, "f/ \t\\r\\noo"); } else { assert_eq!(output.filepath_string, "f\\\\ \t\\r\\noo"); } assert!(output.is_escaped); } b3sum-1.8.1/tests/cli_tests.rs000064400000000000000000000537771046102023000144100ustar 00000000000000use duct::cmd; use std::ffi::OsString; use std::fs; use std::io::prelude::*; use std::path::PathBuf; pub fn b3sum_exe() -> PathBuf { env!("CARGO_BIN_EXE_b3sum").into() } #[test] fn test_hash_one() { let expected = format!("{} -", blake3::hash(b"foo").to_hex()); let output = cmd!(b3sum_exe()).stdin_bytes("foo").read().unwrap(); assert_eq!(&*expected, output); } #[test] fn test_hash_one_tag() { let expected = format!("BLAKE3 (-) = {}", blake3::hash(b"foo").to_hex()); let output = cmd!(b3sum_exe(), "--tag") .stdin_bytes("foo") .read() .unwrap(); assert_eq!(&*expected, output); } #[test] fn test_hash_one_raw() { let expected = blake3::hash(b"foo").as_bytes().to_owned(); let output = cmd!(b3sum_exe(), "--raw") .stdin_bytes("foo") .stdout_capture() .run() .unwrap() .stdout; assert_eq!(expected, output.as_slice()); } #[test] fn test_hash_many() { let dir = tempfile::tempdir().unwrap(); let file1 = dir.path().join("file1"); fs::write(&file1, b"foo").unwrap(); let file2 = dir.path().join("file2"); fs::write(&file2, b"bar").unwrap(); let output = cmd!(b3sum_exe(), &file1, &file2).read().unwrap(); let foo_hash = blake3::hash(b"foo"); let bar_hash = blake3::hash(b"bar"); let expected = format!( "{} {}\n{} {}", foo_hash.to_hex(), // account for slash normalization on Windows file1.to_string_lossy().replace("\\", "/"), bar_hash.to_hex(), file2.to_string_lossy().replace("\\", "/"), ); assert_eq!(expected, output); let output_no_names = cmd!(b3sum_exe(), "--no-names", &file1, &file2) .read() .unwrap(); let expected_no_names = format!("{}\n{}", foo_hash.to_hex(), bar_hash.to_hex(),); assert_eq!(expected_no_names, output_no_names); } #[test] fn test_hash_many_tag() { let dir = tempfile::tempdir().unwrap(); let file1 = dir.path().join("file1"); fs::write(&file1, b"foo").unwrap(); let file2 = dir.path().join("file2"); fs::write(&file2, b"bar").unwrap(); let output = cmd!(b3sum_exe(), "--tag", &file1, &file2).read().unwrap(); let foo_hash = blake3::hash(b"foo"); let bar_hash = blake3::hash(b"bar"); let expected = format!( "BLAKE3 ({}) = {}\nBLAKE3 ({}) = {}", // account for slash normalization on Windows file1.to_string_lossy().replace("\\", "/"), foo_hash.to_hex(), file2.to_string_lossy().replace("\\", "/"), bar_hash.to_hex(), ); assert_eq!(expected, output); } #[test] fn test_missing_files() { let dir = tempfile::tempdir().unwrap(); let file1 = dir.path().join("file1"); fs::write(&file1, b"foo").unwrap(); let file2 = dir.path().join("file2"); fs::write(&file2, b"bar").unwrap(); let output = cmd!(b3sum_exe(), "file1", "missing_file", "file2") .dir(dir.path()) .stdout_capture() .stderr_capture() .unchecked() .run() .unwrap(); assert!(!output.status.success()); let foo_hash = blake3::hash(b"foo"); let bar_hash = blake3::hash(b"bar"); let expected_stdout = format!( "{} file1\n{} file2\n", foo_hash.to_hex(), bar_hash.to_hex(), ); assert_eq!(expected_stdout.as_bytes(), &output.stdout[..]); let bing_error = fs::File::open(dir.path().join("missing_file")).unwrap_err(); let expected_stderr = format!("b3sum: missing_file: {}\n", bing_error.to_string()); assert_eq!(expected_stderr.as_bytes(), &output.stderr[..]); } #[test] fn test_hash_length_and_seek() { let mut expected = [0; 100]; blake3::Hasher::new() .update(b"foo") .finalize_xof() .fill(&mut expected); let output = cmd!(b3sum_exe(), "--raw", "--length=100") .stdin_bytes("foo") .stdout_capture() .run() .unwrap() .stdout; assert_eq!(expected[..], output); let short_output = cmd!(b3sum_exe(), "--raw", "--length=99") .stdin_bytes("foo") .stdout_capture() .run() .unwrap() .stdout; assert_eq!(expected[..99], short_output); let seek1_output = cmd!(b3sum_exe(), "--raw", "--length=99", "--seek=1") .stdin_bytes("foo") .stdout_capture() .run() .unwrap() .stdout; assert_eq!(expected[1..], seek1_output); let seek99_output = cmd!(b3sum_exe(), "--raw", "--length=1", "--seek=99") .stdin_bytes("foo") .stdout_capture() .run() .unwrap() .stdout; assert_eq!(expected[99..], seek99_output); } #[test] fn test_keyed() { let key = [42; blake3::KEY_LEN]; let f = tempfile::NamedTempFile::new().unwrap(); f.as_file().write_all(b"foo").unwrap(); f.as_file().flush().unwrap(); let expected = blake3::keyed_hash(&key, b"foo").to_hex(); let output = cmd!(b3sum_exe(), "--keyed", "--no-names", f.path()) .stdin_bytes(&key[..]) .read() .unwrap(); assert_eq!(&*expected, &*output); // Make sure that keys of the wrong length lead to errors. for bad_length in [0, 1, blake3::KEY_LEN - 1, blake3::KEY_LEN + 1] { dbg!(bad_length); let output = cmd!(b3sum_exe(), "--keyed", f.path()) .stdin_bytes(vec![0; bad_length]) .stdout_capture() .stderr_capture() .unchecked() .run() .unwrap(); assert!(!output.status.success()); assert!(output.stdout.is_empty()); // Make sure the error message is relevant. let stderr = std::str::from_utf8(&output.stderr).unwrap(); assert!(stderr.contains("key bytes")); } } #[test] fn test_derive_key() { let context = "BLAKE3 2019-12-28 10:28:41 example context"; let f = tempfile::NamedTempFile::new().unwrap(); f.as_file().write_all(b"key material").unwrap(); f.as_file().flush().unwrap(); let expected = hex::encode(blake3::derive_key(context, b"key material")); let output = cmd!(b3sum_exe(), "--derive-key", context, "--no-names", f.path()) .read() .unwrap(); assert_eq!(&*expected, &*output); } #[test] fn test_no_mmap() { let f = tempfile::NamedTempFile::new().unwrap(); f.as_file().write_all(b"foo").unwrap(); f.as_file().flush().unwrap(); let expected = blake3::hash(b"foo").to_hex(); let output = cmd!(b3sum_exe(), "--no-mmap", "--no-names", f.path()) .read() .unwrap(); assert_eq!(&*expected, &*output); } #[test] fn test_length_without_value_is_an_error() { let result = cmd!(b3sum_exe(), "--length") .stdin_bytes("foo") .stderr_capture() .run(); assert!(result.is_err()); } #[test] fn test_raw_with_multi_files_is_an_error() { let f1 = tempfile::NamedTempFile::new().unwrap(); let f2 = tempfile::NamedTempFile::new().unwrap(); // Make sure it doesn't error with just one file let result = cmd!(b3sum_exe(), "--raw", f1.path()).stdout_capture().run(); assert!(result.is_ok()); // Make sure it errors when both file are passed let result = cmd!(b3sum_exe(), "--raw", f1.path(), f2.path()) .stderr_capture() .run(); assert!(result.is_err()); } #[test] #[cfg(unix)] fn test_newline_and_backslash_escaping_on_unix() { let empty_hash = blake3::hash(b"").to_hex(); let dir = tempfile::tempdir().unwrap(); fs::create_dir(dir.path().join("subdir")).unwrap(); let names = [ "abcdef", "abc\ndef", "abc\\def", "abc\rdef", "abc\r\ndef", "subdir/foo", ]; let mut paths = Vec::new(); for name in &names { let path = dir.path().join(name); println!("creating file at {:?}", path); fs::write(&path, b"").unwrap(); paths.push(path); } let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap(); let expected = format!( "\ {0} abcdef \\{0} abc\\ndef \\{0} abc\\\\def \\{0} abc\\rdef \\{0} abc\\r\\ndef {0} subdir/foo", empty_hash, ); println!("output"); println!("======"); println!("{}", output); println!(); println!("expected"); println!("========"); println!("{}", expected); println!(); assert_eq!(expected, output); } #[test] #[cfg(windows)] fn test_slash_normalization_on_windows() { let empty_hash = blake3::hash(b"").to_hex(); let dir = tempfile::tempdir().unwrap(); fs::create_dir(dir.path().join("subdir")).unwrap(); // Note that filenames can't contain newlines or backslashes on Windows, so // we don't test escaping here. We only test forward slash and backslash as // directory separators. let names = ["abcdef", "subdir/foo", "subdir\\bar"]; let mut paths = Vec::new(); for name in &names { let path = dir.path().join(name); println!("creating file at {:?}", path); fs::write(&path, b"").unwrap(); paths.push(path); } let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap(); let expected = format!( "\ {0} abcdef {0} subdir/foo {0} subdir/bar", empty_hash, ); println!("output"); println!("======"); println!("{}", output); println!(); println!("expected"); println!("========"); println!("{}", expected); println!(); assert_eq!(expected, output); } #[test] #[cfg(unix)] fn test_invalid_unicode_on_unix() { use std::os::unix::ffi::OsStringExt; let empty_hash = blake3::hash(b"").to_hex(); let dir = tempfile::tempdir().unwrap(); let names = ["abcdef".into(), OsString::from_vec(b"abc\xffdef".to_vec())]; let mut paths = Vec::new(); for name in &names { let path = dir.path().join(name); println!("creating file at {:?}", path); // Note: Some operating systems, macOS in particular, simply don't // allow invalid Unicode in filenames. On those systems, this write // will fail. That's fine, we'll just short-circuit this test in that // case. But assert that at least Linux allows this. let write_result = fs::write(&path, b""); if cfg!(target_os = "linux") { write_result.expect("Linux should allow invalid Unicode"); } else if write_result.is_err() { return; } paths.push(path); } let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap(); let expected = format!( "\ {0} abcdef {0} abc�def", empty_hash, ); println!("output"); println!("======"); println!("{}", output); println!(); println!("expected"); println!("========"); println!("{}", expected); println!(); assert_eq!(expected, output); } #[test] #[cfg(windows)] fn test_invalid_unicode_on_windows() { use std::os::windows::ffi::OsStringExt; let empty_hash = blake3::hash(b"").to_hex(); let dir = tempfile::tempdir().unwrap(); let surrogate_char = 0xDC00; let bad_unicode_wchars = [ 'a' as u16, 'b' as u16, 'c' as u16, surrogate_char, 'd' as u16, 'e' as u16, 'f' as u16, ]; let bad_osstring = OsString::from_wide(&bad_unicode_wchars); let names = ["abcdef".into(), bad_osstring]; let mut paths = Vec::new(); for name in &names { let path = dir.path().join(name); println!("creating file at {:?}", path); fs::write(&path, b"").unwrap(); paths.push(path); } let output = cmd(b3sum_exe(), &names).dir(dir.path()).read().unwrap(); let expected = format!( "\ {0} abcdef {0} abc�def", empty_hash, ); println!("output"); println!("======"); println!("{}", output); println!(); println!("expected"); println!("========"); println!("{}", expected); println!(); assert_eq!(expected, output); } #[test] fn test_check() { // Make a directory full of files, and make sure the b3sum output in that // directory is what we expect. let a_hash = blake3::hash(b"a").to_hex(); let b_hash = blake3::hash(b"b").to_hex(); let cd_hash = blake3::hash(b"cd").to_hex(); for tagged in [false, true] { let dir = tempfile::tempdir().unwrap(); fs::write(dir.path().join("a"), b"a").unwrap(); fs::write(dir.path().join("b"), b"b").unwrap(); fs::create_dir(dir.path().join("c")).unwrap(); fs::write(dir.path().join("c/d"), b"cd").unwrap(); dbg!(tagged); let mut args = vec!["a", "b", "c/d"]; if tagged { args.push("--tag"); } let output = cmd(b3sum_exe(), args) .dir(dir.path()) .stdout_capture() .stderr_capture() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); let expected_checkfile = if tagged { format!( "BLAKE3 (a) = {}\n\ BLAKE3 (b) = {}\n\ BLAKE3 (c/d) = {}\n", a_hash, b_hash, cd_hash, ) } else { format!( "{} a\n\ {} b\n\ {} c/d\n", a_hash, b_hash, cd_hash, ) }; dbg!(&expected_checkfile); assert_eq!(expected_checkfile, stdout); assert_eq!("", stderr); // Now use the output we just validated as a checkfile, passed to stdin. let output = cmd!(b3sum_exe(), "--check") .stdin_bytes(expected_checkfile.as_bytes()) .dir(dir.path()) .stdout_capture() .stderr_capture() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); let expected_check_output = "\ a: OK\n\ b: OK\n\ c/d: OK\n"; assert_eq!(expected_check_output, stdout); assert_eq!("", stderr); // Check the same file, but with Windows-style newlines. let windows_style = expected_checkfile.replace("\n", "\r\n"); let output = cmd!(b3sum_exe(), "--check") .stdin_bytes(windows_style.as_bytes()) .dir(dir.path()) .stdout_capture() .stderr_capture() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); let expected_check_output = "\ a: OK\n\ b: OK\n\ c/d: OK\n"; assert_eq!(expected_check_output, stdout); assert_eq!("", stderr); // Now pass the same checkfile twice on the command line just for fun. let checkfile_path = dir.path().join("checkfile"); fs::write(&checkfile_path, &expected_checkfile).unwrap(); let output = cmd!(b3sum_exe(), "--check", &checkfile_path, &checkfile_path) .dir(dir.path()) .stdout_capture() .stderr_capture() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); let mut double_check_output = String::new(); double_check_output.push_str(&expected_check_output); double_check_output.push_str(&expected_check_output); assert_eq!(double_check_output, stdout); assert_eq!("", stderr); // Corrupt one of the files and check again. fs::write(dir.path().join("b"), b"CORRUPTION").unwrap(); let output = cmd!(b3sum_exe(), "--check", &checkfile_path) .dir(dir.path()) .stdout_capture() .stderr_capture() .unchecked() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); let expected_check_failure = "\ a: OK\n\ b: FAILED\n\ c/d: OK\n"; assert!(!output.status.success()); assert_eq!(expected_check_failure, stdout); assert_eq!( "b3sum: WARNING: 1 computed checksum did NOT match\n", stderr, ); // Delete one of the files and check again. fs::remove_file(dir.path().join("b")).unwrap(); let open_file_error = fs::File::open(dir.path().join("b")).unwrap_err(); let output = cmd!(b3sum_exe(), "--check", &checkfile_path) .dir(dir.path()) .stdout_capture() .stderr_capture() .unchecked() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); let expected_check_failure = format!( "a: OK\n\ b: FAILED ({})\n\ c/d: OK\n", open_file_error, ); assert!(!output.status.success()); assert_eq!(expected_check_failure, stdout); assert_eq!( "b3sum: WARNING: 1 computed checksum did NOT match\n", stderr, ); // Confirm that --quiet suppresses the OKs but not the FAILEDs. let output = cmd!(b3sum_exe(), "--check", "--quiet", &checkfile_path) .dir(dir.path()) .stdout_capture() .stderr_capture() .unchecked() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); let expected_check_failure = format!("b: FAILED ({})\n", open_file_error); assert!(!output.status.success()); assert_eq!(expected_check_failure, stdout); assert_eq!( "b3sum: WARNING: 1 computed checksum did NOT match\n", stderr, ); } } #[test] fn test_check_invalid_characters() { // Check that a null character in the path fails. let output = cmd!(b3sum_exe(), "--check") .stdin_bytes("0000000000000000000000000000000000000000000000000000000000000000 \0") .stdout_capture() .stderr_capture() .unchecked() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); let expected_stderr = "\ b3sum: Null character in path\n\ b3sum: WARNING: 1 computed checksum did NOT match\n"; assert!(!output.status.success()); assert_eq!("", stdout); assert_eq!(expected_stderr, stderr); // Check that a Unicode replacement character in the path fails. let output = cmd!(b3sum_exe(), "--check") .stdin_bytes("0000000000000000000000000000000000000000000000000000000000000000 �") .stdout_capture() .stderr_capture() .unchecked() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); let expected_stderr = "\ b3sum: Unicode replacement character in path\n\ b3sum: WARNING: 1 computed checksum did NOT match\n"; assert!(!output.status.success()); assert_eq!("", stdout); assert_eq!(expected_stderr, stderr); // Check that an invalid escape sequence in the path fails. let output = cmd!(b3sum_exe(), "--check") .stdin_bytes("\\0000000000000000000000000000000000000000000000000000000000000000 \\a") .stdout_capture() .stderr_capture() .unchecked() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); let expected_stderr = "\ b3sum: Invalid backslash escape\n\ b3sum: WARNING: 1 computed checksum did NOT match\n"; assert!(!output.status.success()); assert_eq!("", stdout); assert_eq!(expected_stderr, stderr); // Windows also forbids literal backslashes. Check for that if and only if // we're on Windows. if cfg!(windows) { let output = cmd!(b3sum_exe(), "--check") .stdin_bytes("0000000000000000000000000000000000000000000000000000000000000000 \\") .stdout_capture() .stderr_capture() .unchecked() .run() .unwrap(); let stdout = std::str::from_utf8(&output.stdout).unwrap(); let stderr = std::str::from_utf8(&output.stderr).unwrap(); let expected_stderr = "\ b3sum: Backslash in path\n\ b3sum: WARNING: 1 computed checksum did NOT match\n"; assert!(!output.status.success()); assert_eq!("", stdout); assert_eq!(expected_stderr, stderr); } } #[test] fn test_globbing() { // On Unix, globbing is provided by the shell. On Windows, globbing is // provided by us, using the `wild` crate. let dir = tempfile::tempdir().unwrap(); let file1 = dir.path().join("file1"); fs::write(&file1, b"foo").unwrap(); let file2 = dir.path().join("file2"); fs::write(&file2, b"bar").unwrap(); let foo_hash = blake3::hash(b"foo"); let bar_hash = blake3::hash(b"bar"); // NOTE: This assumes that the glob will be expanded in alphabetical order, // to "file1 file2" rather than "file2 file1". So far, this seems to // be true (guaranteed?) of Unix shell behavior, and true in practice // with the `wild` crate on Windows. It's possible that this could // start failing in the future, though, or on some unknown platform. // If that ever happens, we'll need to relax this test somehow, // probably by just testing for both possible outputs. I'm not // handling that case in advance, though, because I'd prefer to hear // about it if it comes up. let expected = format!("{} file1\n{} file2", foo_hash.to_hex(), bar_hash.to_hex()); let star_command = format!("{} *", b3sum_exe().to_str().unwrap()); let (exe, c_flag) = if cfg!(windows) { ("cmd.exe", "/C") } else { ("/bin/sh", "-c") }; let output = cmd!(exe, c_flag, star_command) .dir(dir.path()) .read() .unwrap(); assert_eq!(expected, output); } b3sum-1.8.1/what_does_check_do.md000064400000000000000000000155451046102023000150140ustar 00000000000000# How does `b3sum --check` behave exactly?
or: Are filepaths...text? Most of the time, `b3sum --check` is a drop-in replacement for `md5sum --check` and other Coreutils hashing tools. It consumes a checkfile (the output of a regular `b3sum` command), re-hashes all the files listed there, and returns success if all of those hashes are still correct. What makes this more complicated than it might seem, is that representing filepaths as text means we need to consider many possible edge cases of unrepresentable filepaths. This document describes all of these edge cases in detail. ## The simple case Here's the result of running `b3sum a b c/d` in a directory that contains those three files: ```bash $ echo hi > a $ echo lo > b $ mkdir c $ echo stuff > c/d $ b3sum a b c/d 0b8b60248fad7ac6dfac221b7e01a8b91c772421a15b387dd1fb2d6a94aee438 a 6ae4a57bbba24f79c461d30bcb4db973b9427d9207877e34d2d74528daa84115 b 2d477356c962e54784f1c5dc5297718d92087006f6ee96b08aeaf7f3cd252377 c/d ``` If we pipe that output into `b3sum --check`, it will exit with status zero (success) and print: ```bash $ b3sum a b c/d | b3sum --check a: OK b: OK c/d: OK ``` If we delete `b` and change the contents of `c/d`, and then use the same checkfile as above, `b3sum --check` will exit with a non-zero status (failure) and print: ```bash $ b3sum a b c/d > checkfile $ rm b $ echo more stuff >> c/d $ b3sum --check checkfile a: OK b: FAILED (No such file or directory (os error 2)) c/d: FAILED ``` In these typical cases, `b3sum` and `md5sum` have identical output for success and very similar output for failure. ## Escaping newlines and backslashes Since the checkfile format (the regular output format of `b3sum`) is newline-separated text, we need to worry about what happens when a filepath contains a newline, or worse. Suppose we create a file named `x[newline]x` (3 characters). One way to create such a file is with a Python one-liner like this: ```python >>> open("x\nx", "w") ``` Here's what happens when we hash that file with `b3sum`: ```bash $ b3sum x* \af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262 x\nx ``` Notice two things. First, `b3sum` puts a single `\` character at the front of the line. This indicates that the filepath contains escape sequences that `b3sum --check` will need to unescape. Then, `b3sum` replaces the newline character in the filepath with the two-character escape sequence `\n`. Similarly, if the filepath contained a backslash, `b3sum` would escape it as `\\` in the output. So far, all of this behavior is still identical to `md5sum`. ## Invalid Unicode This is where `b3sum` and `md5sum` diverge. Apart from the newline and backslash escapes described above, `md5sum` copies all other filepath bytes verbatim to its output. That means its output encoding is "ASCII plus whatever bytes we got from the command line". This creates two problems: 1. Printing something that isn't UTF-8 is kind of gross. 2. Windows support. What's the deal with Windows? To start with, there's a fundamental difference in how Unix and Windows represent filepaths. Unix filepaths are "usually UTF-8" and Windows filepaths are "usually UTF-16". That means that a file named `abc` is typically represented as the bytes `[97, 98, 99]` on Unix and as the bytes `[97, 0, 98, 0, 99, 0]` on Windows. The `md5sum` approach won't work if we plan on creating a checkfile on Unix and checking it on Windows, or vice versa. A more portable approach is to convert platform-specific bytes into some consistent Unicode encoding. (In practice this is going to be UTF-8, but in theory it could be anything.) Then when `--check` needs to open a file, we convert the Unicode representation back into platform-specific bytes. This makes important common cases like `abc`, and in fact even `abc[newline]def`, work as expected. Great! But...what did we mean above when we said *usually* UTF-8 and *usually* UTF-16? It turns out that not every possible sequence of bytes is valid UTF-8, and not every possible sequence of 16-bit wide chars is valid UTF-16. For example, the byte 0xFF (255) can never appear in any UTF-8 string. If we ask Python to decode it, it yells at us: ```python >>> b"\xFF".decode("UTF-8") UnicodeDecodeError: 'utf-8' codec can't decode byte 0xff in position 0: invalid start byte ``` However, tragically, we *can* create a file with that byte in its name (on Linux at least, though not usually on macOS): ```python >>> open(b"y\xFFy", "w") ``` So some filepaths aren't representable in Unicode at all. Our plan to "convert platform-specific bytes into some consistent Unicode encoding" isn't going to work for everything. What does `b3sum` do with the file above? ```bash $ b3sum y* af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262 y�y ``` That � in there is a "Unicode replacement character". When we run into filepaths that we can't represent in Unicode, we replace the unrepresentable parts with these characters. On the checking side, to avoid any possible confusion between two different invalid filepaths, we automatically fail if we see a replacement character. Together with a few more details covered in the next section, this gives us an important set of properties: 1. Any file can be hashed locally. 2. Any file with a valid Unicode name not containing the � character can be checked. 3. Checking ambiguous or unrepresentable filepaths always fails. 4. Checkfiles are always valid UTF-8. 5. Checkfiles are portable between Unix and Windows. ## Formal Rules 1. When hashing, filepaths are represented in a platform-specific encoding, which can accommodate any filepath on the current platform. In Rust, this is `OsStr`/`OsString`. 2. In output, filepaths are first converted to UTF-8. Any non-Unicode segments are replaced with Unicode replacement characters (U+FFFD). In Rust, this is `OsStr::to_string_lossy`. 3. Then, if a filepath contains any backslashes (U+005C) or newlines (U+000A), these characters are escaped as `\\` and `\n` respectively. 4. Finally, any output line containing an escape sequence is prefixed with a single backslash. 5. When checking, each line is parsed as UTF-8, separated by a newline (U+000A). Invalid UTF-8 is an error. 6. Then, if a line begins with a backslash, the filepath component is unescaped. Any escape sequence other than `\\` or `\n` is an error. If a line does not begin with a backslash, unescaping is not performed, and any backslashes in the filepath component are interpreted literally. (`b3sum` output never contains unescaped backslashes, but they can occur in checkfiles assembled by hand.) 7. Finally, if a filepath contains a Unicode replacement character (U+FFFD) or a null character (U+0000), it is an error. **Additionally, on Windows only:** 8. In output, all backslashes (U+005C) are replaced with forward slashes (U+002F). 9. When checking, after unescaping, if a filepath contains a backslash, it is an error.