lz4_flex-0.11.3/.cargo_vcs_info.json0000644000000001360000000000100126450ustar { "git": { "sha1": "b5fde256c2c58365c0c465b8e124b382a6c15bdb" }, "path_in_vcs": "" }lz4_flex-0.11.3/Cargo.lock0000644000000302410000000000100106200ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "autocfg" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80" [[package]] name = "bit-set" version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" dependencies = [ "bit-vec", ] [[package]] name = "bit-vec" version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" [[package]] name = "bitflags" version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" [[package]] name = "byteorder" version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fc10e8cc6b2580fda3f36eb6dc5316657f812a3df879a44a66fc9f0fdbc4855" [[package]] name = "cc" version = "1.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8cd6604a82acf3039f1144f54b8eb34e91ffba622051189e71b781822d5ee1f5" dependencies = [ "jobserver", "libc", ] [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "errno" version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" dependencies = [ "libc", "windows-sys", ] [[package]] name = "fastrand" version = "2.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "658bd65b1cf4c852a3cc96f18a8ce7b5640f6b703f905c7d74532294c2a63984" [[package]] name = "fnv" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "getrandom" version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" dependencies = [ "cfg-if", "libc", "wasi", ] [[package]] name = "itoa" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "jobserver" version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6" dependencies = [ "libc", ] [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "libm" version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" [[package]] name = "linux-raw-sys" version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" [[package]] name = "lz4-compress" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f966533a922a9bba9e95e594c1fdb3b9bf5fdcdb11e37e51ad84cd76e468b91" dependencies = [ "byteorder", "quick-error", ] [[package]] name = "lz4_flex" version = "0.11.3" dependencies = [ "lz4-compress", "lzzzz", "more-asserts", "proptest", "serde_json", "snap", "twox-hash", ] [[package]] name = "lzzzz" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f57d7cf4e29ef4e4898212846052a28deca85e571b9e8f3d9c46d6fa27feb6e" dependencies = [ "cc", ] [[package]] name = "more-asserts" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fafa6961cabd9c63bcd77a45d7e3b7f3b552b70417831fb0f56db717e72407e" [[package]] name = "num-traits" version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" dependencies = [ "autocfg", "libm", ] [[package]] name = "ppv-lite86" version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro2" version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" dependencies = [ "unicode-ident", ] [[package]] name = "proptest" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31b476131c3c86cb68032fdc5cb6d5a1045e3e42d96b69fa599fd77701e1f5bf" dependencies = [ "bit-set", "bit-vec", "bitflags", "lazy_static", "num-traits", "rand", "rand_chacha", "rand_xorshift", "regex-syntax", "rusty-fork", "tempfile", "unarray", ] [[package]] name = "quick-error" version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] name = "quote" version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] [[package]] name = "rand" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", "rand_chacha", "rand_core", ] [[package]] name = "rand_chacha" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", "rand_core", ] [[package]] name = "rand_core" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ "getrandom", ] [[package]] name = "rand_xorshift" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" dependencies = [ "rand_core", ] [[package]] name = "regex-syntax" version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" [[package]] name = "rustix" version = "0.38.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "65e04861e65f21776e67888bfbea442b3642beaa0138fdb1dd7a84a52dffdb89" dependencies = [ "bitflags", "errno", "libc", "linux-raw-sys", "windows-sys", ] [[package]] name = "rusty-fork" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" dependencies = [ "fnv", "quick-error", "tempfile", "wait-timeout", ] [[package]] name = "ryu" version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" [[package]] name = "serde" version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", "syn", ] [[package]] name = "serde_json" version = "1.0.115" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" dependencies = [ "itoa", "ryu", "serde", ] [[package]] name = "snap" version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "static_assertions" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "syn" version = "2.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "11a6ae1e52eb25aab8f3fb9fca13be982a373b8f1157ca14b897a825ba4a2d35" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] [[package]] name = "tempfile" version = "3.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" dependencies = [ "cfg-if", "fastrand", "rustix", "windows-sys", ] [[package]] name = "twox-hash" version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ "cfg-if", "static_assertions", ] [[package]] name = "unarray" version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" [[package]] name = "unicode-ident" version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" [[package]] name = "wait-timeout" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" dependencies = [ "libc", ] [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "windows-sys" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ "windows-targets", ] [[package]] name = "windows-targets" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" [[package]] name = "windows_aarch64_msvc" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" [[package]] name = "windows_i686_gnu" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" [[package]] name = "windows_i686_msvc" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" [[package]] name = "windows_x86_64_gnu" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" [[package]] name = "windows_x86_64_gnullvm" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" [[package]] name = "windows_x86_64_msvc" version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" lz4_flex-0.11.3/Cargo.toml0000644000000036400000000000100106460ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "lz4_flex" version = "0.11.3" authors = [ "Pascal Seitz ", "Arthur Silva ", "ticki ", ] include = [ "src/*.rs", "src/frame/**/*", "src/block/**/*", "README.md", "LICENSE", ] description = "Fastest LZ4 implementation in Rust, no unsafe by default." homepage = "https://github.com/pseitz/lz4_flex" readme = "README.md" keywords = [ "compression", "lz4", "compress", "decompression", "decompress", ] license = "MIT" repository = "https://github.com/pseitz/lz4_flex" [package.metadata.docs.rs] all-features = true rustdoc-args = [ "--cfg", "docsrs", ] [profile.bench] opt-level = 3 lto = true codegen-units = 1 [profile.release] opt-level = 3 codegen-units = 1 panic = "unwind" [[bench]] name = "crit_bench" path = "benches/crit_bench.rs" harness = false [dependencies.twox-hash] version = "1.6.3" optional = true default-features = false [dev-dependencies.lz4-compress] version = "0.1.1" [dev-dependencies.lzzzz] version = "1.0.4" [dev-dependencies.more-asserts] version = "0.3.1" [dev-dependencies.proptest] version = "1.0.0" [dev-dependencies.serde_json] version = "1.0.91" [dev-dependencies.snap] version = "1.1.0" [features] default = [ "std", "safe-encode", "safe-decode", "frame", ] frame = [ "std", "dep:twox-hash", ] nightly = [] safe-decode = [] safe-encode = [] std = [] lz4_flex-0.11.3/Cargo.toml.orig000064400000000000000000000042761046102023000143350ustar 00000000000000[package] authors = ["Pascal Seitz ", "Arthur Silva ", "ticki "] description = "Fastest LZ4 implementation in Rust, no unsafe by default." edition = "2021" keywords = ["compression", "lz4", "compress", "decompression", "decompress"] name = "lz4_flex" homepage = "https://github.com/pseitz/lz4_flex" repository = "https://github.com/pseitz/lz4_flex" readme = "README.md" license = "MIT" version = "0.11.3" include = ["src/*.rs", "src/frame/**/*", "src/block/**/*", "README.md", "LICENSE"] [package.metadata.docs.rs] all-features = true rustdoc-args = ["--cfg", "docsrs"] [[bench]] harness = false name = "crit_bench" path = "benches/crit_bench.rs" [dev-dependencies] criterion = { git = "https://github.com/PSeitz/criterion.rs/", rev = "cf60ffc"} lzzzz = "1.0.4" lz4-compress = "0.1.1" more-asserts = "0.3.1" snap = "1.1.0" serde_json = "1.0.91" proptest = "1.0.0" [dev-dependencies.lz-fear] git = "https://github.com/main--/rust-lz-fear" #Uncomment to make lz4_flex master available as lz4_flex_master #[dev-dependencies.lz4_flex_master] #rev= "a122673" # v10 #git = "https://github.com/PSeitz/lz4_flex" #package = "lz4_flex" #default-features=false #features = ["std", "safe-encode", "safe-decode", "frame"] [features] default = ["std", "safe-encode", "safe-decode", "frame"] safe-decode = [] safe-encode = [] #unchecked-decode = [] # Removes some checks for additional performance. Only enable on trusted input! frame = ["std", "dep:twox-hash"] std = [] # use nightly compiler features nightly = [] [dependencies] twox-hash = { version = "1.6.3", default-features = false, optional = true } [profile.bench] codegen-units = 1 lto = true opt-level = 3 [profile.release] codegen-units = 1 #debug = true opt-level = 3 panic = "unwind" # [[bench]] # harness = false # name = "quickbench" # path = "benches/quickbench.rs" # [[bench]] # harness = false # name = "bench" # path = "benches/bench.rs" # [[bin]] # name = "decompress_with_stats" # path = "src/test_bins/decompress_with_stats.rs" # [[bin]] # name = "profile_decomp" # path = "src/test_bins/profile_decomp.rs" # [[bin]] # name = "profile_comp" # path = "src/test_bins/profile_comp.rs" lz4_flex-0.11.3/LICENSE000064400000000000000000000020671046102023000124470ustar 00000000000000The MIT License (MIT) Copyright (c) 2020 Pascal Seitz Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. lz4_flex-0.11.3/README.md000064400000000000000000000121111046102023000127100ustar 00000000000000![Rust](https://github.com/PSeitz/lz4_flex/workflows/Rust/badge.svg) [![Docs](https://docs.rs/lz4_flex/badge.svg)](https://docs.rs/crate/lz4_flex/) [![Crates.io](https://img.shields.io/crates/v/lz4_flex.svg)](https://crates.io/crates/lz4_flex) # lz4_flex ![lz4_flex_logo](https://raw.githubusercontent.com/PSeitz/lz4_flex/master/logo.jpg) Fastest LZ4 implementation in Rust. Originally based on [redox-os' lz4 compression](https://crates.io/crates/lz4-compress), but now a complete rewrite. The results in the table are from a benchmark in this project (66Kb JSON, 10MB dickens) with the block format. AMD Ryzen 7 5900HX, rustc 1.69.0 (84c898d65 2023-04-16), Manjaro, CPU Boost Disabled, CPU Governor: Performance 66Kb JSON | Compressor | Compression | Decompression | Ratio | |----------------------|-------------|---------------|---------------| | lz4_flex unsafe w. unchecked_decode | 1615 MiB/s | 5973 MiB/s | 0.2284 | | lz4_flex unsafe | 1615 MiB/s | 5512 MiB/s | 0.2284 | | lz4_flex safe | 1272 MiB/s | 4540 MiB/s | 0.2284 | | lzzz (lz4 1.9.3) | 1469 MiB/s | 5313 MiB/s | 0.2283 | | lz4_fear | 662 MiB/s | 939 MiB/s | 0.2283 | | snap | 1452 MiB/s | 1649 MiB/s | 0.2242 | 10 Mb dickens | Compressor | Compression | Decompression | Ratio | |----------------------|-------------|---------------|---------------| | lz4_flex unsafe w. unchecked_decode | 347 MiB/s | 3168 MiB/s | 0.6372 | | lz4_flex unsafe | 347 MiB/s | 2734 MiB/s | 0.6372 | | lz4_flex safe | 259 MiB/s | 2338 MiB/s | 0.6372 | | lzzz (lz4 1.9.3) | 324 MiB/s | 2759 MiB/s | 0.6372 | | lz4_fear | 201 MiB/s | 370 MiB/s | 0.6372 | | snap | 286 MiB/s | 679 MiB/s | 0.6276 | ## Features - Very good logo - LZ4 Block format - LZ4 Frame format (thanks @arthurprs) - High performance - 1,5s clean release build time - Feature flags to configure safe/unsafe code usage - no-std support with block format (thanks @coolreader18) - 32-bit support ## Usage: Compression and decompression uses no usafe via the default feature flags "safe-encode" and "safe-decode". If you need more performance you can disable them (e.g. with no-default-features). Safe: ``` lz4_flex = { version = "0.11" } ``` Performance: ``` lz4_flex = { version = "0.11", default-features = false } ``` ### Block Format The block format is only valid for smaller data chunks as as block is de/compressed in memory. For larger data use the frame format, which consists of multiple blocks. ```rust use lz4_flex::block::{compress_prepend_size, decompress_size_prepended}; fn main(){ let input: &[u8] = b"Hello people, what's up?"; let compressed = compress_prepend_size(input); let uncompressed = decompress_size_prepended(&compressed).unwrap(); assert_eq!(input, uncompressed); } ``` ## no_std support no_std support is currently only for the block format, since the frame format uses `std::io::Write`, which is not available in core. ## Benchmarks The benchmark is run with criterion, the test files are in the benches folder. Currently 4 implementations are compared, this one, [lz-fear](https://github.com/main--/rust-lz-fear), the [c version via rust bindings](https://crates.io/crates/lzzzz) and [snappy](https://github.com/burntsushi/rust-snappy). The lz4-flex version is tested with the feature flags safe-decode and safe-encode switched on and off. - lz4_cpp: https://crates.io/crates/lzzzz - lz-fear: https://github.com/main--/rust-lz-fear - snap: https://github.com/burntsushi/rust-snappy Tested on AMD Ryzen 7 5900HX, rustc 1.69.0 (84c898d65 2023-04-16), Manjaro, CPU Boost Disabled, CPU 3GHZ ### Results v0.11.0 02-06-2023 (safe-decode and safe-encode off) `cargo bench --no-default-features` ![Compress](./compress_bench.svg) ![Decompress](./decompress_bench.svg) ### Results v0.11.0 02-06-2023 (safe-decode and safe-encode on) `cargo bench` ![Compress](./compress_bench_safe.svg) ![Decompress](./decompress_bench_safe.svg) ## Miri [Miri](https://github.com/rust-lang/miri) can be used to find issues related to incorrect unsafe usage: `MIRIFLAGS="-Zmiri-disable-isolation -Zmiri-disable-stacked-borrows" cargo +nightly miri test --no-default-features --features frame` ## Fuzzer This fuzz target generates corrupted data for the decompressor. `cargo +nightly fuzz run fuzz_decomp_corrupt_block` and `cargo +nightly fuzz run fuzz_decomp_corrupt_frame` This fuzz target asserts that a compression and decompression rountrip returns the original input. `cargo +nightly fuzz run fuzz_roundtrip` and `cargo +nightly fuzz run fuzz_roundtrip_frame` This fuzz target asserts compression with cpp and decompression with lz4_flex returns the original input. `cargo +nightly fuzz run fuzz_roundtrip_cpp_compress` ## Bindings in other languages - Node.js: [lz4-napi](https://github.com/antoniomuso/lz4-napi) - Wasm: [lz4-wasm](https://github.com/PSeitz/lz4-wasm) ## TODO - High compression ## Migrate from v0.10 to v0.11.1 To migrate, just remove the `checked-decode` feature flag if you used it. lz4_flex-0.11.3/src/block/compress.rs000064400000000000000000001044121046102023000155210ustar 00000000000000//! The compression algorithm. //! //! We make use of hash tables to find duplicates. This gives a reasonable compression ratio with a //! high performance. It has fixed memory usage, which contrary to other approachs, makes it less //! memory hungry. use crate::block::hashtable::HashTable; use crate::block::END_OFFSET; use crate::block::LZ4_MIN_LENGTH; use crate::block::MAX_DISTANCE; use crate::block::MFLIMIT; use crate::block::MINMATCH; #[cfg(not(feature = "safe-encode"))] use crate::sink::PtrSink; use crate::sink::Sink; use crate::sink::SliceSink; #[allow(unused_imports)] use alloc::vec; #[allow(unused_imports)] use alloc::vec::Vec; use super::hashtable::HashTable4K; use super::hashtable::HashTable4KU16; use super::{CompressError, WINDOW_SIZE}; /// Increase step size after 1< u32 { unsafe { read_u32_ptr(input.as_ptr().add(n)) } } #[inline] #[cfg(feature = "safe-encode")] pub(super) fn get_batch(input: &[u8], n: usize) -> u32 { u32::from_ne_bytes(input[n..n + 4].try_into().unwrap()) } /// Read an usize sized "batch" from some position. /// /// This will read a native-endian usize from some position. #[inline] #[allow(dead_code)] #[cfg(not(feature = "safe-encode"))] pub(super) fn get_batch_arch(input: &[u8], n: usize) -> usize { unsafe { read_usize_ptr(input.as_ptr().add(n)) } } #[inline] #[allow(dead_code)] #[cfg(feature = "safe-encode")] pub(super) fn get_batch_arch(input: &[u8], n: usize) -> usize { const USIZE_SIZE: usize = core::mem::size_of::(); let arr: &[u8; USIZE_SIZE] = input[n..n + USIZE_SIZE].try_into().unwrap(); usize::from_ne_bytes(*arr) } #[inline] fn token_from_literal(lit_len: usize) -> u8 { if lit_len < 0xF { // Since we can fit the literals length into it, there is no need for saturation. (lit_len as u8) << 4 } else { // We were unable to fit the literals into it, so we saturate to 0xF. We will later // write the extensional value. 0xF0 } } #[inline] fn token_from_literal_and_match_length(lit_len: usize, duplicate_length: usize) -> u8 { let mut token = if lit_len < 0xF { // Since we can fit the literals length into it, there is no need for saturation. (lit_len as u8) << 4 } else { // We were unable to fit the literals into it, so we saturate to 0xF. We will later // write the extensional value. 0xF0 }; token |= if duplicate_length < 0xF { // We could fit it in. duplicate_length as u8 } else { // We were unable to fit it in, so we default to 0xF, which will later be extended. 0xF }; token } /// Counts the number of same bytes in two byte streams. /// `input` is the complete input /// `cur` is the current position in the input. it will be incremented by the number of matched /// bytes `source` either the same as input or an external slice /// `candidate` is the candidate position in `source` /// /// The function ignores the last END_OFFSET bytes in input as those should be literals. #[inline] #[cfg(feature = "safe-encode")] fn count_same_bytes(input: &[u8], cur: &mut usize, source: &[u8], candidate: usize) -> usize { const USIZE_SIZE: usize = core::mem::size_of::(); let cur_slice = &input[*cur..input.len() - END_OFFSET]; let cand_slice = &source[candidate..]; let mut num = 0; for (block1, block2) in cur_slice .chunks_exact(USIZE_SIZE) .zip(cand_slice.chunks_exact(USIZE_SIZE)) { let input_block = usize::from_ne_bytes(block1.try_into().unwrap()); let match_block = usize::from_ne_bytes(block2.try_into().unwrap()); if input_block == match_block { num += USIZE_SIZE; } else { let diff = input_block ^ match_block; num += (diff.to_le().trailing_zeros() / 8) as usize; *cur += num; return num; } } // If we're here we may have 1 to 7 bytes left to check close to the end of input // or source slices. Since this is rare occurrence we mark it cold to get better // ~5% better performance. #[cold] fn count_same_bytes_tail(a: &[u8], b: &[u8], offset: usize) -> usize { a.iter() .zip(b) .skip(offset) .take_while(|(a, b)| a == b) .count() } num += count_same_bytes_tail(cur_slice, cand_slice, num); *cur += num; num } /// Counts the number of same bytes in two byte streams. /// `input` is the complete input /// `cur` is the current position in the input. it will be incremented by the number of matched /// bytes `source` either the same as input OR an external slice /// `candidate` is the candidate position in `source` /// /// The function ignores the last END_OFFSET bytes in input as those should be literals. #[inline] #[cfg(not(feature = "safe-encode"))] fn count_same_bytes(input: &[u8], cur: &mut usize, source: &[u8], candidate: usize) -> usize { let max_input_match = input.len().saturating_sub(*cur + END_OFFSET); let max_candidate_match = source.len() - candidate; // Considering both limits calc how far we may match in input. let input_end = *cur + max_input_match.min(max_candidate_match); let start = *cur; let mut source_ptr = unsafe { source.as_ptr().add(candidate) }; // compare 4/8 bytes blocks depending on the arch const STEP_SIZE: usize = core::mem::size_of::(); while *cur + STEP_SIZE <= input_end { let diff = read_usize_ptr(unsafe { input.as_ptr().add(*cur) }) ^ read_usize_ptr(source_ptr); if diff == 0 { *cur += STEP_SIZE; unsafe { source_ptr = source_ptr.add(STEP_SIZE); } } else { *cur += (diff.to_le().trailing_zeros() / 8) as usize; return *cur - start; } } // compare 4 bytes block #[cfg(target_pointer_width = "64")] { if input_end - *cur >= 4 { let diff = read_u32_ptr(unsafe { input.as_ptr().add(*cur) }) ^ read_u32_ptr(source_ptr); if diff == 0 { *cur += 4; unsafe { source_ptr = source_ptr.add(4); } } else { *cur += (diff.to_le().trailing_zeros() / 8) as usize; return *cur - start; } } } // compare 2 bytes block if input_end - *cur >= 2 && unsafe { read_u16_ptr(input.as_ptr().add(*cur)) == read_u16_ptr(source_ptr) } { *cur += 2; unsafe { source_ptr = source_ptr.add(2); } } if *cur < input_end && unsafe { input.as_ptr().add(*cur).read() } == unsafe { source_ptr.read() } { *cur += 1; } *cur - start } /// Write an integer to the output. /// /// Each additional byte then represent a value from 0 to 255, which is added to the previous value /// to produce a total length. When the byte value is 255, another byte must read and added, and so /// on. There can be any number of bytes of value "255" following token #[inline] #[cfg(feature = "safe-encode")] fn write_integer(output: &mut impl Sink, mut n: usize) { // Note: Since `n` is usually < 0xFF and writing multiple bytes to the output // requires 2 branches of bound check (due to the possibility of add overflows) // the simple byte at a time implementation below is faster in most cases. while n >= 0xFF { n -= 0xFF; push_byte(output, 0xFF); } push_byte(output, n as u8); } /// Write an integer to the output. /// /// Each additional byte then represent a value from 0 to 255, which is added to the previous value /// to produce a total length. When the byte value is 255, another byte must read and added, and so /// on. There can be any number of bytes of value "255" following token #[inline] #[cfg(not(feature = "safe-encode"))] fn write_integer(output: &mut impl Sink, mut n: usize) { // Write the 0xFF bytes as long as the integer is higher than said value. if n >= 4 * 0xFF { // In this unlikelly branch we use a fill instead of a loop, // otherwise rustc may output a large unrolled/vectorized loop. let bulk = n / (4 * 0xFF); n %= 4 * 0xFF; unsafe { core::ptr::write_bytes(output.pos_mut_ptr(), 0xFF, 4 * bulk); output.set_pos(output.pos() + 4 * bulk); } } // Handle last 1 to 4 bytes push_u32(output, 0xFFFFFFFF); // Updating output len for the remainder unsafe { output.set_pos(output.pos() - 4 + 1 + n / 255); // Write the remaining byte. *output.pos_mut_ptr().sub(1) = (n % 255) as u8; } } /// Handle the last bytes from the input as literals #[cold] fn handle_last_literals(output: &mut impl Sink, input: &[u8], start: usize) { let lit_len = input.len() - start; let token = token_from_literal(lit_len); push_byte(output, token); if lit_len >= 0xF { write_integer(output, lit_len - 0xF); } // Now, write the actual literals. output.extend_from_slice(&input[start..]); } /// Moves the cursors back as long as the bytes match, to find additional bytes in a duplicate #[inline] #[cfg(feature = "safe-encode")] fn backtrack_match( input: &[u8], cur: &mut usize, literal_start: usize, source: &[u8], candidate: &mut usize, ) { // Note: Even if iterator version of this loop has less branches inside the loop it has more // branches before the loop. That in practice seems to make it slower than the while version // bellow. TODO: It should be possible remove all bounds checks, since we are walking // backwards while *candidate > 0 && *cur > literal_start && input[*cur - 1] == source[*candidate - 1] { *cur -= 1; *candidate -= 1; } } /// Moves the cursors back as long as the bytes match, to find additional bytes in a duplicate #[inline] #[cfg(not(feature = "safe-encode"))] fn backtrack_match( input: &[u8], cur: &mut usize, literal_start: usize, source: &[u8], candidate: &mut usize, ) { while unsafe { *candidate > 0 && *cur > literal_start && input.get_unchecked(*cur - 1) == source.get_unchecked(*candidate - 1) } { *cur -= 1; *candidate -= 1; } } /// Compress all bytes of `input[input_pos..]` into `output`. /// /// Bytes in `input[..input_pos]` are treated as a preamble and can be used for lookback. /// This part is known as the compressor "prefix". /// Bytes in `ext_dict` logically precede the bytes in `input` and can also be used for lookback. /// /// `input_stream_offset` is the logical position of the first byte of `input`. This allows same /// `dict` to be used for many calls to `compress_internal` as we can "readdress" the first byte of /// `input` to be something other than 0. /// /// `dict` is the dictionary of previously encoded sequences. /// /// This is used to find duplicates in the stream so they are not written multiple times. /// /// Every four bytes are hashed, and in the resulting slot their position in the input buffer /// is placed in the dict. This way we can easily look up a candidate to back references. /// /// Returns the number of bytes written (compressed) into `output`. /// /// # Const parameters /// `USE_DICT`: Disables usage of ext_dict (it'll panic if a non-empty slice is used). /// In other words, this generates more optimized code when an external dictionary isn't used. /// /// A similar const argument could be used to disable the Prefix mode (eg. USE_PREFIX), /// which would impose `input_pos == 0 && input_stream_offset == 0`. Experiments didn't /// show significant improvement though. // Intentionally avoid inlining. // Empirical tests revealed it to be rarely better but often significantly detrimental. #[inline(never)] pub(crate) fn compress_internal( input: &[u8], input_pos: usize, output: &mut S, dict: &mut T, ext_dict: &[u8], input_stream_offset: usize, ) -> Result { assert!(input_pos <= input.len()); if USE_DICT { assert!(ext_dict.len() <= super::WINDOW_SIZE); assert!(ext_dict.len() <= input_stream_offset); // Check for overflow hazard when using ext_dict assert!(input_stream_offset .checked_add(input.len()) .and_then(|i| i.checked_add(ext_dict.len())) .map_or(false, |i| i <= isize::MAX as usize)); } else { assert!(ext_dict.is_empty()); } if output.capacity() - output.pos() < get_maximum_output_size(input.len() - input_pos) { return Err(CompressError::OutputTooSmall); } let output_start_pos = output.pos(); if input.len() - input_pos < LZ4_MIN_LENGTH { handle_last_literals(output, input, input_pos); return Ok(output.pos() - output_start_pos); } let ext_dict_stream_offset = input_stream_offset - ext_dict.len(); let end_pos_check = input.len() - MFLIMIT; let mut literal_start = input_pos; let mut cur = input_pos; if cur == 0 && input_stream_offset == 0 { // According to the spec we can't start with a match, // except when referencing another block. let hash = T::get_hash_at(input, 0); dict.put_at(hash, 0); cur = 1; } loop { // Read the next block into two sections, the literals and the duplicates. let mut step_size; let mut candidate; let mut candidate_source; let mut offset; let mut non_match_count = 1 << INCREASE_STEPSIZE_BITSHIFT; // The number of bytes before our cursor, where the duplicate starts. let mut next_cur = cur; // In this loop we search for duplicates via the hashtable. 4bytes or 8bytes are hashed and // compared. loop { step_size = non_match_count >> INCREASE_STEPSIZE_BITSHIFT; non_match_count += 1; cur = next_cur; next_cur += step_size; // Same as cur + MFLIMIT > input.len() if cur > end_pos_check { handle_last_literals(output, input, literal_start); return Ok(output.pos() - output_start_pos); } // Find a candidate in the dictionary with the hash of the current four bytes. // Unchecked is safe as long as the values from the hash function don't exceed the size // of the table. This is ensured by right shifting the hash values // (`dict_bitshift`) to fit them in the table // [Bounds Check]: Can be elided due to `end_pos_check` above let hash = T::get_hash_at(input, cur); candidate = dict.get_at(hash); dict.put_at(hash, cur + input_stream_offset); // Sanity check: Matches can't be ahead of `cur`. debug_assert!(candidate <= input_stream_offset + cur); // Two requirements to the candidate exists: // - We should not return a position which is merely a hash collision, so that the // candidate actually matches what we search for. // - We can address up to 16-bit offset, hence we are only able to address the candidate // if its offset is less than or equals to 0xFFFF. if input_stream_offset + cur - candidate > MAX_DISTANCE { continue; } if candidate >= input_stream_offset { // match within input offset = (input_stream_offset + cur - candidate) as u16; candidate -= input_stream_offset; candidate_source = input; } else if USE_DICT { // Sanity check, which may fail if we lost history beyond MAX_DISTANCE debug_assert!( candidate >= ext_dict_stream_offset, "Lost history in ext dict mode" ); // match within ext dict offset = (input_stream_offset + cur - candidate) as u16; candidate -= ext_dict_stream_offset; candidate_source = ext_dict; } else { // Match is not reachable anymore // eg. compressing an independent block frame w/o clearing // the matches tables, only increasing input_stream_offset. // Sanity check debug_assert!(input_pos == 0, "Lost history in prefix mode"); continue; } // [Bounds Check]: Candidate is coming from the Hashmap. It can't be out of bounds, but // impossible to prove for the compiler and remove the bounds checks. let cand_bytes: u32 = get_batch(candidate_source, candidate); // [Bounds Check]: Should be able to be elided due to `end_pos_check`. let curr_bytes: u32 = get_batch(input, cur); if cand_bytes == curr_bytes { break; } } // Extend the match backwards if we can backtrack_match( input, &mut cur, literal_start, candidate_source, &mut candidate, ); // The length (in bytes) of the literals section. let lit_len = cur - literal_start; // Generate the higher half of the token. cur += MINMATCH; candidate += MINMATCH; let duplicate_length = count_same_bytes(input, &mut cur, candidate_source, candidate); // Note: The `- 2` offset was copied from the reference implementation, it could be // arbitrary. let hash = T::get_hash_at(input, cur - 2); dict.put_at(hash, cur - 2 + input_stream_offset); let token = token_from_literal_and_match_length(lit_len, duplicate_length); // Push the token to the output stream. push_byte(output, token); // If we were unable to fit the literals length into the token, write the extensional // part. if lit_len >= 0xF { write_integer(output, lit_len - 0xF); } // Now, write the actual literals. // // The unsafe version copies blocks of 8bytes, and therefore may copy up to 7bytes more than // needed. This is safe, because the last 12 bytes (MF_LIMIT) are handled in // handle_last_literals. copy_literals_wild(output, input, literal_start, lit_len); // write the offset in little endian. push_u16(output, offset); // If we were unable to fit the duplicates length into the token, write the // extensional part. if duplicate_length >= 0xF { write_integer(output, duplicate_length - 0xF); } literal_start = cur; } } #[inline] #[cfg(feature = "safe-encode")] fn push_byte(output: &mut impl Sink, el: u8) { output.push(el); } #[inline] #[cfg(not(feature = "safe-encode"))] fn push_byte(output: &mut impl Sink, el: u8) { unsafe { core::ptr::write(output.pos_mut_ptr(), el); output.set_pos(output.pos() + 1); } } #[inline] #[cfg(feature = "safe-encode")] fn push_u16(output: &mut impl Sink, el: u16) { output.extend_from_slice(&el.to_le_bytes()); } #[inline] #[cfg(not(feature = "safe-encode"))] fn push_u16(output: &mut impl Sink, el: u16) { unsafe { core::ptr::copy_nonoverlapping(el.to_le_bytes().as_ptr(), output.pos_mut_ptr(), 2); output.set_pos(output.pos() + 2); } } #[inline] #[cfg(not(feature = "safe-encode"))] fn push_u32(output: &mut impl Sink, el: u32) { unsafe { core::ptr::copy_nonoverlapping(el.to_le_bytes().as_ptr(), output.pos_mut_ptr(), 4); output.set_pos(output.pos() + 4); } } #[inline(always)] // (always) necessary otherwise compiler fails to inline it #[cfg(feature = "safe-encode")] fn copy_literals_wild(output: &mut impl Sink, input: &[u8], input_start: usize, len: usize) { output.extend_from_slice_wild(&input[input_start..input_start + len], len) } #[inline] #[cfg(not(feature = "safe-encode"))] fn copy_literals_wild(output: &mut impl Sink, input: &[u8], input_start: usize, len: usize) { debug_assert!(input_start + len / 8 * 8 + ((len % 8) != 0) as usize * 8 <= input.len()); debug_assert!(output.pos() + len / 8 * 8 + ((len % 8) != 0) as usize * 8 <= output.capacity()); unsafe { // Note: This used to be a wild copy loop of 8 bytes, but the compiler consistently // transformed it into a call to memcopy, which hurts performance significantly for // small copies, which are common. let start_ptr = input.as_ptr().add(input_start); match len { 0..=8 => core::ptr::copy_nonoverlapping(start_ptr, output.pos_mut_ptr(), 8), 9..=16 => core::ptr::copy_nonoverlapping(start_ptr, output.pos_mut_ptr(), 16), 17..=24 => core::ptr::copy_nonoverlapping(start_ptr, output.pos_mut_ptr(), 24), _ => core::ptr::copy_nonoverlapping(start_ptr, output.pos_mut_ptr(), len), } output.set_pos(output.pos() + len); } } /// Compress all bytes of `input` into `output`. /// The method chooses an appropriate hashtable to lookup duplicates. /// output should be preallocated with a size of /// `get_maximum_output_size`. /// /// Returns the number of bytes written (compressed) into `output`. #[inline] pub(crate) fn compress_into_sink_with_dict( input: &[u8], output: &mut impl Sink, mut dict_data: &[u8], ) -> Result { if dict_data.len() + input.len() < u16::MAX as usize { let mut dict = HashTable4KU16::new(); init_dict(&mut dict, &mut dict_data); compress_internal::<_, USE_DICT, _>(input, 0, output, &mut dict, dict_data, dict_data.len()) } else { let mut dict = HashTable4K::new(); init_dict(&mut dict, &mut dict_data); compress_internal::<_, USE_DICT, _>(input, 0, output, &mut dict, dict_data, dict_data.len()) } } #[inline] fn init_dict(dict: &mut T, dict_data: &mut &[u8]) { if dict_data.len() > WINDOW_SIZE { *dict_data = &dict_data[dict_data.len() - WINDOW_SIZE..]; } let mut i = 0usize; while i + core::mem::size_of::() <= dict_data.len() { let hash = T::get_hash_at(dict_data, i); dict.put_at(hash, i); // Note: The 3 byte step was copied from the reference implementation, it could be // arbitrary. i += 3; } } /// Returns the maximum output size of the compressed data. /// Can be used to preallocate capacity on the output vector #[inline] pub const fn get_maximum_output_size(input_len: usize) -> usize { 16 + 4 + (input_len * 110 / 100) as usize } /// Compress all bytes of `input` into `output`. /// The method chooses an appropriate hashtable to lookup duplicates. /// output should be preallocated with a size of /// `get_maximum_output_size`. /// /// Returns the number of bytes written (compressed) into `output`. #[inline] pub fn compress_into(input: &[u8], output: &mut [u8]) -> Result { compress_into_sink_with_dict::(input, &mut SliceSink::new(output, 0), b"") } /// Compress all bytes of `input` into `output`. /// The method chooses an appropriate hashtable to lookup duplicates. /// output should be preallocated with a size of /// `get_maximum_output_size`. /// /// Returns the number of bytes written (compressed) into `output`. #[inline] pub fn compress_into_with_dict( input: &[u8], output: &mut [u8], dict_data: &[u8], ) -> Result { compress_into_sink_with_dict::(input, &mut SliceSink::new(output, 0), dict_data) } #[inline] fn compress_into_vec_with_dict( input: &[u8], prepend_size: bool, mut dict_data: &[u8], ) -> Vec { let prepend_size_num_bytes = if prepend_size { 4 } else { 0 }; let max_compressed_size = get_maximum_output_size(input.len()) + prepend_size_num_bytes; if dict_data.len() <= 3 { dict_data = b""; } #[cfg(feature = "safe-encode")] let mut compressed = { let mut compressed: Vec = vec![0u8; max_compressed_size]; let out = if prepend_size { compressed[..4].copy_from_slice(&(input.len() as u32).to_le_bytes()); &mut compressed[4..] } else { &mut compressed }; let compressed_len = compress_into_sink_with_dict::(input, &mut SliceSink::new(out, 0), dict_data) .unwrap(); compressed.truncate(prepend_size_num_bytes + compressed_len); compressed }; #[cfg(not(feature = "safe-encode"))] let mut compressed = { let mut vec = Vec::with_capacity(max_compressed_size); let start_pos = if prepend_size { vec.extend_from_slice(&(input.len() as u32).to_le_bytes()); 4 } else { 0 }; let compressed_len = compress_into_sink_with_dict::( input, &mut PtrSink::from_vec(&mut vec, start_pos), dict_data, ) .unwrap(); unsafe { vec.set_len(prepend_size_num_bytes + compressed_len); } vec }; compressed.shrink_to_fit(); compressed } /// Compress all bytes of `input` into `output`. The uncompressed size will be prepended as a little /// endian u32. Can be used in conjunction with `decompress_size_prepended` #[inline] pub fn compress_prepend_size(input: &[u8]) -> Vec { compress_into_vec_with_dict::(input, true, b"") } /// Compress all bytes of `input`. #[inline] pub fn compress(input: &[u8]) -> Vec { compress_into_vec_with_dict::(input, false, b"") } /// Compress all bytes of `input` with an external dictionary. #[inline] pub fn compress_with_dict(input: &[u8], ext_dict: &[u8]) -> Vec { compress_into_vec_with_dict::(input, false, ext_dict) } /// Compress all bytes of `input` into `output`. The uncompressed size will be prepended as a little /// endian u32. Can be used in conjunction with `decompress_size_prepended_with_dict` #[inline] pub fn compress_prepend_size_with_dict(input: &[u8], ext_dict: &[u8]) -> Vec { compress_into_vec_with_dict::(input, true, ext_dict) } #[inline] #[cfg(not(feature = "safe-encode"))] fn read_u16_ptr(input: *const u8) -> u16 { let mut num: u16 = 0; unsafe { core::ptr::copy_nonoverlapping(input, &mut num as *mut u16 as *mut u8, 2); } num } #[inline] #[cfg(not(feature = "safe-encode"))] fn read_u32_ptr(input: *const u8) -> u32 { let mut num: u32 = 0; unsafe { core::ptr::copy_nonoverlapping(input, &mut num as *mut u32 as *mut u8, 4); } num } #[inline] #[cfg(not(feature = "safe-encode"))] fn read_usize_ptr(input: *const u8) -> usize { let mut num: usize = 0; unsafe { core::ptr::copy_nonoverlapping( input, &mut num as *mut usize as *mut u8, core::mem::size_of::(), ); } num } #[cfg(test)] mod tests { use super::*; #[test] fn test_count_same_bytes() { // 8byte aligned block, zeros and ones are added because the end/offset let first: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; let second: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ]; assert_eq!(count_same_bytes(first, &mut 0, second, 0), 16); // 4byte aligned block let first: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; let second: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ]; assert_eq!(count_same_bytes(first, &mut 0, second, 0), 20); // 2byte aligned block let first: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; let second: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ]; assert_eq!(count_same_bytes(first, &mut 0, second, 0), 22); // 1byte aligned block let first: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; let second: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ]; assert_eq!(count_same_bytes(first, &mut 0, second, 0), 23); // 1byte aligned block - last byte different let first: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; let second: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ]; assert_eq!(count_same_bytes(first, &mut 0, second, 0), 22); // 1byte aligned block let first: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 9, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; let second: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ]; assert_eq!(count_same_bytes(first, &mut 0, second, 0), 21); for diff_idx in 8..100 { let first: Vec = (0u8..255).cycle().take(100 + 12).collect(); let mut second = first.clone(); second[diff_idx] = 255; for start in 0..=diff_idx { let same_bytes = count_same_bytes(&first, &mut start.clone(), &second, start); assert_eq!(same_bytes, diff_idx - start); } } } #[test] fn test_bug() { let input: &[u8] = &[ 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, ]; let _out = compress(input); } #[test] fn test_dict() { let input: &[u8] = &[ 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, ]; let dict = input; let compressed = compress_with_dict(input, dict); assert_lt!(compressed.len(), compress(input).len()); assert!(compressed.len() < compress(input).len()); let mut uncompressed = vec![0u8; input.len()]; let uncomp_size = crate::block::decompress::decompress_into_with_dict( &compressed, &mut uncompressed, dict, ) .unwrap(); uncompressed.truncate(uncomp_size); assert_eq!(input, uncompressed); } #[test] fn test_dict_no_panic() { let input: &[u8] = &[ 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, ]; let dict = &[10, 12, 14]; let _compressed = compress_with_dict(input, dict); } #[test] fn test_dict_match_crossing() { let input: &[u8] = &[ 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, ]; let dict = input; let compressed = compress_with_dict(input, dict); assert_lt!(compressed.len(), compress(input).len()); let mut uncompressed = vec![0u8; input.len() * 2]; // copy first half of the input into output let dict_cutoff = dict.len() / 2; let output_start = dict.len() - dict_cutoff; uncompressed[..output_start].copy_from_slice(&dict[dict_cutoff..]); let uncomp_len = { let mut sink = SliceSink::new(&mut uncompressed[..], output_start); crate::block::decompress::decompress_internal::( &compressed, &mut sink, &dict[..dict_cutoff], ) .unwrap() }; assert_eq!(input.len(), uncomp_len); assert_eq!( input, &uncompressed[output_start..output_start + uncomp_len] ); } #[test] fn test_conformant_last_block() { // From the spec: // The last match must start at least 12 bytes before the end of block. // The last match is part of the penultimate sequence. It is followed by the last sequence, // which contains only literals. Note that, as a consequence, an independent block < // 13 bytes cannot be compressed, because the match must copy "something", // so it needs at least one prior byte. // When a block can reference data from another block, it can start immediately with a match // and no literal, so a block of 12 bytes can be compressed. let aaas: &[u8] = b"aaaaaaaaaaaaaaa"; // uncompressible let out = compress(&aaas[..12]); assert_gt!(out.len(), 12); // compressible let out = compress(&aaas[..13]); assert_le!(out.len(), 13); let out = compress(&aaas[..14]); assert_le!(out.len(), 14); let out = compress(&aaas[..15]); assert_le!(out.len(), 15); // dict uncompressible let out = compress_with_dict(&aaas[..11], aaas); assert_gt!(out.len(), 11); // compressible let out = compress_with_dict(&aaas[..12], aaas); // According to the spec this _could_ compres, but it doesn't in this lib // as it aborts compression for any input len < LZ4_MIN_LENGTH assert_gt!(out.len(), 12); let out = compress_with_dict(&aaas[..13], aaas); assert_le!(out.len(), 13); let out = compress_with_dict(&aaas[..14], aaas); assert_le!(out.len(), 14); let out = compress_with_dict(&aaas[..15], aaas); assert_le!(out.len(), 15); } #[test] fn test_dict_size() { let dict = vec![b'a'; 1024 * 1024]; let input = &b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaa"[..]; let compressed = compress_prepend_size_with_dict(input, &dict); let decompressed = crate::block::decompress_size_prepended_with_dict(&compressed, &dict).unwrap(); assert_eq!(decompressed, input); } } lz4_flex-0.11.3/src/block/decompress.rs000064400000000000000000000525521046102023000160410ustar 00000000000000//! The block decompression algorithm. use crate::block::{DecompressError, MINMATCH}; use crate::fastcpy_unsafe; use crate::sink::SliceSink; use crate::sink::{PtrSink, Sink}; #[allow(unused_imports)] use alloc::vec::Vec; /// Copies data to output_ptr by self-referential copy from start and match_length #[inline] unsafe fn duplicate( output_ptr: &mut *mut u8, output_end: *mut u8, start: *const u8, match_length: usize, ) { // We cannot simply use memcpy or `extend_from_slice`, because these do not allow // self-referential copies: http://ticki.github.io/img/lz4_runs_encoding_diagram.svg // Considering that `wild_copy_match_16` can copy up to `16 - 1` extra bytes. // Defer to `duplicate_overlapping` in case of an overlapping match // OR the if the wild copy would copy beyond the end of the output. if (output_ptr.offset_from(start) as usize) < match_length + 16 - 1 || (output_end.offset_from(*output_ptr) as usize) < match_length + 16 - 1 { duplicate_overlapping(output_ptr, start, match_length); } else { debug_assert!( output_ptr.add(match_length / 16 * 16 + ((match_length % 16) != 0) as usize * 16) <= output_end ); wild_copy_from_src_16(start, *output_ptr, match_length); *output_ptr = output_ptr.add(match_length); } } #[inline] fn wild_copy_from_src_16(mut source: *const u8, mut dst_ptr: *mut u8, num_items: usize) { // Note: if the compiler auto-vectorizes this it'll hurt performance! // It's not the case for 16 bytes stepsize, but for 8 bytes. unsafe { let dst_ptr_end = dst_ptr.add(num_items); loop { core::ptr::copy_nonoverlapping(source, dst_ptr, 16); source = source.add(16); dst_ptr = dst_ptr.add(16); if dst_ptr >= dst_ptr_end { break; } } } } /// Copy function, if the data start + match_length overlaps into output_ptr #[inline] #[cfg_attr(nightly, optimize(size))] // to avoid loop unrolling unsafe fn duplicate_overlapping( output_ptr: &mut *mut u8, mut start: *const u8, match_length: usize, ) { // There is an edge case when output_ptr == start, which causes the decoder to potentially // expose up to match_length bytes of uninitialized data in the decompression buffer. // To prevent that we write a dummy zero to output, which will zero out output in such cases. // This is the same strategy used by the reference C implementation https://github.com/lz4/lz4/pull/772 output_ptr.write(0u8); let dst_ptr_end = output_ptr.add(match_length); while output_ptr.add(1) < dst_ptr_end { // Note that this loop unrolling is done, so that the compiler doesn't do it in a awful // way. // Without that the compiler will unroll/auto-vectorize the copy with a lot of branches. // This is not what we want, as large overlapping copies are not that common. core::ptr::copy(start, *output_ptr, 1); start = start.add(1); *output_ptr = output_ptr.add(1); core::ptr::copy(start, *output_ptr, 1); start = start.add(1); *output_ptr = output_ptr.add(1); } if *output_ptr < dst_ptr_end { core::ptr::copy(start, *output_ptr, 1); *output_ptr = output_ptr.add(1); } } #[inline] unsafe fn copy_from_dict( output_base: *mut u8, output_ptr: &mut *mut u8, ext_dict: &[u8], offset: usize, match_length: usize, ) -> usize { // If we're here we know offset > output pos, so we have at least 1 byte to copy from dict debug_assert!(output_ptr.offset_from(output_base) >= 0); debug_assert!(offset > output_ptr.offset_from(output_base) as usize); // If unchecked-decode is not disabled we also know that the offset falls within ext_dict debug_assert!(ext_dict.len() + output_ptr.offset_from(output_base) as usize >= offset); let dict_offset = ext_dict.len() + output_ptr.offset_from(output_base) as usize - offset; // Can't copy past ext_dict len, the match may cross dict and output let dict_match_length = match_length.min(ext_dict.len() - dict_offset); // TODO test fastcpy_unsafe core::ptr::copy_nonoverlapping( ext_dict.as_ptr().add(dict_offset), *output_ptr, dict_match_length, ); *output_ptr = output_ptr.add(dict_match_length); dict_match_length } /// Read an integer. /// /// In LZ4, we encode small integers in a way that we can have an arbitrary number of bytes. In /// particular, we add the bytes repeatedly until we hit a non-0xFF byte. When we do, we add /// this byte to our sum and terminate the loop. /// /// # Example /// /// ```notest /// 255, 255, 255, 4, 2, 3, 4, 6, 7 /// ``` /// /// is encoded to _255 + 255 + 255 + 4 = 769_. The bytes after the first 4 is ignored, because /// 4 is the first non-0xFF byte. #[inline] fn read_integer_ptr( input_ptr: &mut *const u8, _input_ptr_end: *const u8, ) -> Result { // We start at zero and count upwards. let mut n: u32 = 0; // If this byte takes value 255 (the maximum value it can take), another byte is read // and added to the sum. This repeats until a byte lower than 255 is read. loop { // We add the next byte until we get a byte which we add to the counting variable. #[cfg(not(feature = "unchecked-decode"))] { if *input_ptr >= _input_ptr_end { return Err(DecompressError::ExpectedAnotherByte); } } let extra = unsafe { input_ptr.read() }; *input_ptr = unsafe { input_ptr.add(1) }; n += extra as u32; // We continue if we got 255, break otherwise. if extra != 0xFF { break; } } // 255, 255, 255, 8 // 111, 111, 111, 101 Ok(n) } /// Read a little-endian 16-bit integer from the input stream. #[inline] fn read_u16_ptr(input_ptr: &mut *const u8) -> u16 { let mut num: u16 = 0; unsafe { core::ptr::copy_nonoverlapping(*input_ptr, &mut num as *mut u16 as *mut u8, 2); *input_ptr = input_ptr.add(2); } u16::from_le(num) } const FIT_TOKEN_MASK_LITERAL: u8 = 0b00001111; const FIT_TOKEN_MASK_MATCH: u8 = 0b11110000; #[test] fn check_token() { assert!(!does_token_fit(15)); assert!(does_token_fit(14)); assert!(does_token_fit(114)); assert!(!does_token_fit(0b11110000)); assert!(does_token_fit(0b10110000)); } /// The token consists of two parts, the literal length (upper 4 bits) and match_length (lower 4 /// bits) if the literal length and match_length are both below 15, we don't need to read additional /// data, so the token does fit the metadata in a single u8. #[inline] fn does_token_fit(token: u8) -> bool { !((token & FIT_TOKEN_MASK_LITERAL) == FIT_TOKEN_MASK_LITERAL || (token & FIT_TOKEN_MASK_MATCH) == FIT_TOKEN_MASK_MATCH) } /// Decompress all bytes of `input` into `output`. /// /// Returns the number of bytes written (decompressed) into `output`. #[inline] pub(crate) fn decompress_internal( input: &[u8], output: &mut S, ext_dict: &[u8], ) -> Result { // Prevent segfault for empty input if input.is_empty() { return Err(DecompressError::ExpectedAnotherByte); } let ext_dict = if USE_DICT { ext_dict } else { // ensure optimizer knows ext_dict length is 0 if !USE_DICT debug_assert!(ext_dict.is_empty()); &[] }; let output_base = unsafe { output.base_mut_ptr() }; let output_end = unsafe { output_base.add(output.capacity()) }; let output_start_pos_ptr = unsafe { output.base_mut_ptr().add(output.pos()) as *mut u8 }; let mut output_ptr = output_start_pos_ptr; let mut input_ptr = input.as_ptr(); let input_ptr_end = unsafe { input.as_ptr().add(input.len()) }; let safe_distance_from_end = (16 /* literal copy */ + 2 /* u16 match offset */ + 1 /* The next token to read (we can skip the check) */).min(input.len()) ; let input_ptr_safe = unsafe { input_ptr_end.sub(safe_distance_from_end) }; let safe_output_ptr = unsafe { let mut output_num_safe_bytes = output .capacity() .saturating_sub(16 /* literal copy */ + 18 /* match copy */); if USE_DICT { // In the dictionary case the output pointer is moved by the match length in the dictionary. // This may be up to 17 bytes without exiting the loop. So we need to ensure that we have // at least additional 17 bytes of space left in the output buffer in the fast loop. output_num_safe_bytes = output_num_safe_bytes.saturating_sub(17); }; output_base.add(output_num_safe_bytes) }; // Exhaust the decoder by reading and decompressing all blocks until the remaining buffer is // empty. loop { // Read the token. The token is the first byte in a block. It is divided into two 4-bit // subtokens, the higher and the lower. // This token contains to 4-bit "fields", a higher and a lower, representing the literals' // length and the back reference's length, respectively. let token = unsafe { input_ptr.read() }; input_ptr = unsafe { input_ptr.add(1) }; // Checking for hot-loop. // In most cases the metadata does fit in a single 1byte token (statistically) and we are in // a safe-distance to the end. This enables some optimized handling. // // Ideally we want to check for safe output pos like: output.pos() <= safe_output_pos; But // that doesn't work when the safe_output_ptr is == output_ptr due to insufficient // capacity. So we use `<` instead of `<=`, which covers that case. if does_token_fit(token) && (input_ptr as usize) <= input_ptr_safe as usize && output_ptr < safe_output_ptr { let literal_length = (token >> 4) as usize; let mut match_length = MINMATCH + (token & 0xF) as usize; // output_ptr <= safe_output_ptr should guarantee we have enough space in output debug_assert!( unsafe { output_ptr.add(literal_length + match_length) } <= output_end, "{literal_length} + {match_length} {} wont fit ", literal_length + match_length ); // Copy the literal // The literal is at max 16 bytes, and the is_safe_distance check assures // that we are far away enough from the end so we can safely copy 16 bytes unsafe { core::ptr::copy_nonoverlapping(input_ptr, output_ptr, 16); input_ptr = input_ptr.add(literal_length); output_ptr = output_ptr.add(literal_length); } // input_ptr <= input_ptr_safe should guarantee we have enough space in input debug_assert!(input_ptr_end as usize - input_ptr as usize >= 2); let offset = read_u16_ptr(&mut input_ptr) as usize; let output_len = unsafe { output_ptr.offset_from(output_base) as usize }; let offset = offset.min(output_len + ext_dict.len()); // Check if part of the match is in the external dict if USE_DICT && offset > output_len { let copied = unsafe { copy_from_dict(output_base, &mut output_ptr, ext_dict, offset, match_length) }; if copied == match_length { continue; } // match crosses ext_dict and output match_length -= copied; } // Calculate the start of this duplicate segment. At this point offset was already // checked to be in bounds and the external dictionary copy, if any, was // already copied and subtracted from match_length. let start_ptr = unsafe { output_ptr.sub(offset) }; debug_assert!(start_ptr >= output_base); debug_assert!(start_ptr < output_end); debug_assert!(unsafe { output_end.offset_from(start_ptr) as usize } >= match_length); // In this branch we know that match_length is at most 18 (14 + MINMATCH). // But the blocks can overlap, so make sure they are at least 18 bytes apart // to enable an optimized copy of 18 bytes. if offset >= match_length { unsafe { // _copy_, not copy_non_overlaping, as it may overlap. // Compiles to the same assembly on x68_64. core::ptr::copy(start_ptr, output_ptr, 18); output_ptr = output_ptr.add(match_length); } } else { unsafe { duplicate_overlapping(&mut output_ptr, start_ptr, match_length); } } continue; } // Now, we read the literals section. // Literal Section // If the initial value is 15, it is indicated that another byte will be read and added to // it let mut literal_length = (token >> 4) as usize; if literal_length != 0 { if literal_length == 15 { // The literal_length length took the maximal value, indicating that there is more // than 15 literal_length bytes. We read the extra integer. literal_length += read_integer_ptr(&mut input_ptr, input_ptr_end)? as usize; } #[cfg(not(feature = "unchecked-decode"))] { // Check if literal is out of bounds for the input, and if there is enough space on // the output if literal_length > input_ptr_end as usize - input_ptr as usize { return Err(DecompressError::LiteralOutOfBounds); } if literal_length > unsafe { output_end.offset_from(output_ptr) as usize } { return Err(DecompressError::OutputTooSmall { expected: unsafe { output_ptr.offset_from(output_base) as usize } + literal_length, actual: output.capacity(), }); } } unsafe { fastcpy_unsafe::slice_copy(input_ptr, output_ptr, literal_length); output_ptr = output_ptr.add(literal_length); input_ptr = input_ptr.add(literal_length); } } // If the input stream is emptied, we break out of the loop. This is only the case // in the end of the stream, since the block is intact otherwise. if input_ptr >= input_ptr_end { break; } // Read duplicate section #[cfg(not(feature = "unchecked-decode"))] { if (input_ptr_end as usize) - (input_ptr as usize) < 2 { return Err(DecompressError::ExpectedAnotherByte); } } let offset = read_u16_ptr(&mut input_ptr) as usize; // Obtain the initial match length. The match length is the length of the duplicate segment // which will later be copied from data previously decompressed into the output buffer. The // initial length is derived from the second part of the token (the lower nibble), we read // earlier. Since having a match length of less than 4 would mean negative compression // ratio, we start at 4 (MINMATCH). // The initial match length can maximally be 19 (MINMATCH + 15). As with the literal length, // this indicates that there are more bytes to read. let mut match_length = MINMATCH + (token & 0xF) as usize; if match_length == MINMATCH + 15 { // The match length took the maximal value, indicating that there is more bytes. We // read the extra integer. match_length += read_integer_ptr(&mut input_ptr, input_ptr_end)? as usize; } // We now copy from the already decompressed buffer. This allows us for storing duplicates // by simply referencing the other location. let output_len = unsafe { output_ptr.offset_from(output_base) as usize }; // We'll do a bounds check except unchecked-decode is enabled. #[cfg(not(feature = "unchecked-decode"))] { if offset > output_len + ext_dict.len() { return Err(DecompressError::OffsetOutOfBounds); } if match_length > unsafe { output_end.offset_from(output_ptr) as usize } { return Err(DecompressError::OutputTooSmall { expected: output_len + match_length, actual: output.capacity(), }); } } if USE_DICT && offset > output_len { let copied = unsafe { copy_from_dict(output_base, &mut output_ptr, ext_dict, offset, match_length) }; if copied == match_length { #[cfg(not(feature = "unchecked-decode"))] { if input_ptr >= input_ptr_end { return Err(DecompressError::ExpectedAnotherByte); } } continue; } // match crosses ext_dict and output match_length -= copied; } // Calculate the start of this duplicate segment. At this point offset was already checked // to be in bounds and the external dictionary copy, if any, was already copied and // subtracted from match_length. let start_ptr = unsafe { output_ptr.sub(offset) }; debug_assert!(start_ptr >= output_base); debug_assert!(start_ptr < output_end); debug_assert!(unsafe { output_end.offset_from(start_ptr) as usize } >= match_length); unsafe { duplicate(&mut output_ptr, output_end, start_ptr, match_length); } #[cfg(not(feature = "unchecked-decode"))] { if input_ptr >= input_ptr_end { return Err(DecompressError::ExpectedAnotherByte); } } } unsafe { output.set_pos(output_ptr.offset_from(output_base) as usize); Ok(output_ptr.offset_from(output_start_pos_ptr) as usize) } } /// Decompress all bytes of `input` into `output`. /// `output` should be preallocated with a size of of the uncompressed data. #[inline] pub fn decompress_into(input: &[u8], output: &mut [u8]) -> Result { decompress_internal::(input, &mut SliceSink::new(output, 0), b"") } /// Decompress all bytes of `input` into `output`. /// /// Returns the number of bytes written (decompressed) into `output`. #[inline] pub fn decompress_into_with_dict( input: &[u8], output: &mut [u8], ext_dict: &[u8], ) -> Result { decompress_internal::(input, &mut SliceSink::new(output, 0), ext_dict) } /// Decompress all bytes of `input` into a new vec. /// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size. /// /// # Panics /// May panic if the parameter `min_uncompressed_size` is smaller than the /// uncompressed data. #[inline] pub fn decompress_with_dict( input: &[u8], min_uncompressed_size: usize, ext_dict: &[u8], ) -> Result, DecompressError> { // Allocate a vector to contain the decompressed stream. let mut vec = Vec::with_capacity(min_uncompressed_size); let decomp_len = decompress_internal::(input, &mut PtrSink::from_vec(&mut vec, 0), ext_dict)?; unsafe { vec.set_len(decomp_len); } Ok(vec) } /// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in /// little endian. Can be used in conjunction with `compress_prepend_size` #[inline] pub fn decompress_size_prepended(input: &[u8]) -> Result, DecompressError> { let (uncompressed_size, input) = super::uncompressed_size(input)?; decompress(input, uncompressed_size) } /// Decompress all bytes of `input` into a new vec. /// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size. /// /// # Panics /// May panic if the parameter `min_uncompressed_size` is smaller than the /// uncompressed data. #[inline] pub fn decompress(input: &[u8], min_uncompressed_size: usize) -> Result, DecompressError> { // Allocate a vector to contain the decompressed stream. let mut vec = Vec::with_capacity(min_uncompressed_size); let decomp_len = decompress_internal::(input, &mut PtrSink::from_vec(&mut vec, 0), b"")?; unsafe { vec.set_len(decomp_len); } Ok(vec) } /// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in /// little endian. Can be used in conjunction with `compress_prepend_size_with_dict` #[inline] pub fn decompress_size_prepended_with_dict( input: &[u8], ext_dict: &[u8], ) -> Result, DecompressError> { let (uncompressed_size, input) = super::uncompressed_size(input)?; decompress_with_dict(input, uncompressed_size, ext_dict) } #[cfg(test)] mod test { use super::*; #[test] fn all_literal() { assert_eq!(decompress(&[0x30, b'a', b'4', b'9'], 3).unwrap(), b"a49"); } // this error test is only valid with checked-decode. #[cfg(not(feature = "unchecked-decode"))] #[test] fn offset_oob() { decompress(&[0x10, b'a', 2, 0], 4).unwrap_err(); decompress(&[0x40, b'a', 1, 0], 4).unwrap_err(); } } lz4_flex-0.11.3/src/block/decompress_safe.rs000064400000000000000000000363131046102023000170340ustar 00000000000000//! The block decompression algorithm. use crate::block::DecompressError; use crate::block::MINMATCH; use crate::sink::Sink; use crate::sink::SliceSink; #[allow(unused_imports)] use alloc::vec; #[allow(unused_imports)] use alloc::vec::Vec; /// Read an integer. /// /// In LZ4, we encode small integers in a way that we can have an arbitrary number of bytes. In /// particular, we add the bytes repeatedly until we hit a non-0xFF byte. When we do, we add /// this byte to our sum and terminate the loop. /// /// # Example /// /// ```notest /// 255, 255, 255, 4, 2, 3, 4, 6, 7 /// ``` /// /// is encoded to _255 + 255 + 255 + 4 = 769_. The bytes after the first 4 is ignored, because /// 4 is the first non-0xFF byte. #[inline] fn read_integer(input: &[u8], input_pos: &mut usize) -> Result { // We start at zero and count upwards. let mut n: u32 = 0; // If this byte takes value 255 (the maximum value it can take), another byte is read // and added to the sum. This repeats until a byte lower than 255 is read. loop { // We add the next byte until we get a byte which we add to the counting variable. let extra: u8 = *input .get(*input_pos) .ok_or(DecompressError::ExpectedAnotherByte)?; *input_pos += 1; n += extra as u32; // We continue if we got 255, break otherwise. if extra != 0xFF { break; } } // 255, 255, 255, 8 // 111, 111, 111, 101 Ok(n) } /// Read a little-endian 16-bit integer from the input stream. #[inline] fn read_u16(input: &[u8], input_pos: &mut usize) -> Result { let dst = input .get(*input_pos..*input_pos + 2) .ok_or(DecompressError::ExpectedAnotherByte)?; *input_pos += 2; Ok(u16::from_le_bytes(dst.try_into().unwrap())) } const FIT_TOKEN_MASK_LITERAL: u8 = 0b00001111; const FIT_TOKEN_MASK_MATCH: u8 = 0b11110000; #[test] fn check_token() { assert!(!does_token_fit(15)); assert!(does_token_fit(14)); assert!(does_token_fit(114)); assert!(!does_token_fit(0b11110000)); assert!(does_token_fit(0b10110000)); } /// The token consists of two parts, the literal length (upper 4 bits) and match_length (lower 4 /// bits) if the literal length and match_length are both below 15, we don't need to read additional /// data, so the token does fit the metadata. #[inline] fn does_token_fit(token: u8) -> bool { !((token & FIT_TOKEN_MASK_LITERAL) == FIT_TOKEN_MASK_LITERAL || (token & FIT_TOKEN_MASK_MATCH) == FIT_TOKEN_MASK_MATCH) } /// Decompress all bytes of `input` into `output`. /// /// Returns the number of bytes written (decompressed) into `output`. #[inline(always)] // (always) necessary to get the best performance in non LTO builds pub(crate) fn decompress_internal( input: &[u8], output: &mut S, ext_dict: &[u8], ) -> Result { let mut input_pos = 0; let initial_output_pos = output.pos(); let safe_input_pos = input .len() .saturating_sub(16 /* literal copy */ + 2 /* u16 match offset */); let mut safe_output_pos = output .capacity() .saturating_sub(16 /* literal copy */ + 18 /* match copy */); if USE_DICT { // In the dictionary case the output pointer is moved by the match length in the dictionary. // This may be up to 17 bytes without exiting the loop. So we need to ensure that we have // at least additional 17 bytes of space left in the output buffer in the fast loop. safe_output_pos = safe_output_pos.saturating_sub(17); }; // Exhaust the decoder by reading and decompressing all blocks until the remaining buffer is // empty. loop { // Read the token. The token is the first byte in a block. It is divided into two 4-bit // subtokens, the higher and the lower. // This token contains to 4-bit "fields", a higher and a lower, representing the literals' // length and the back reference's length, respectively. let token = *input .get(input_pos) .ok_or(DecompressError::ExpectedAnotherByte)?; input_pos += 1; // Checking for hot-loop. // In most cases the metadata does fit in a single 1byte token (statistically) and we are in // a safe-distance to the end. This enables some optimized handling. // // Ideally we want to check for safe output pos like: output.pos() <= safe_output_pos; But // that doesn't work when the safe_output_pos is 0 due to saturated_sub. So we use // `<` instead of `<=`, which covers that case. if does_token_fit(token) && input_pos <= safe_input_pos && output.pos() < safe_output_pos { let literal_length = (token >> 4) as usize; // casting to [u8;u16] doesn't seem to make a difference vs &[u8] (same assembly) let input: &[u8; 16] = input[input_pos..input_pos + 16].try_into().unwrap(); // Copy the literal // The literal is at max 14 bytes, and the is_safe_distance check assures // that we are far away enough from the end so we can safely copy 16 bytes output.extend_from_slice_wild(input, literal_length); input_pos += literal_length; // clone as we don't want to mutate let offset = read_u16(input, &mut literal_length.clone())? as usize; input_pos += 2; let mut match_length = MINMATCH + (token & 0xF) as usize; if USE_DICT && offset > output.pos() { let copied = copy_from_dict(output, ext_dict, offset, match_length)?; if copied == match_length { continue; } // match crosses ext_dict and output, offset is still correct as output pos // increased match_length -= copied; } // In this branch we know that match_length is at most 18 (14 + MINMATCH). // But the blocks can overlap, so make sure they are at least 18 bytes apart // to enable an optimized copy of 18 bytes. let start = output.pos().saturating_sub(offset); if offset >= match_length { output.extend_from_within(start, 18, match_length); } else { output.extend_from_within_overlapping(start, match_length) } continue; } // Now, we read the literals section. // Literal Section // If the initial value is 15, it is indicated that another byte will be read and added to // it let mut literal_length = (token >> 4) as usize; if literal_length != 0 { if literal_length == 15 { // The literal_length length took the maximal value, indicating that there is more // than 15 literal_length bytes. We read the extra integer. literal_length += read_integer(input, &mut input_pos)? as usize; } if literal_length > input.len() - input_pos { return Err(DecompressError::LiteralOutOfBounds); } #[cfg(not(feature = "unchecked-decode"))] if literal_length > output.capacity() - output.pos() { return Err(DecompressError::OutputTooSmall { expected: output.pos() + literal_length, actual: output.capacity(), }); } output.extend_from_slice(&input[input_pos..input_pos + literal_length]); input_pos += literal_length; } // If the input stream is emptied, we break out of the loop. This is only the case // in the end of the stream, since the block is intact otherwise. if input_pos >= input.len() { break; } let offset = read_u16(input, &mut input_pos)? as usize; // Obtain the initial match length. The match length is the length of the duplicate segment // which will later be copied from data previously decompressed into the output buffer. The // initial length is derived from the second part of the token (the lower nibble), we read // earlier. Since having a match length of less than 4 would mean negative compression // ratio, we start at 4 (MINMATCH). // The initial match length can maximally be 19. As with the literal length, this indicates // that there are more bytes to read. let mut match_length = MINMATCH + (token & 0xF) as usize; if match_length == MINMATCH + 15 { // The match length took the maximal value, indicating that there is more bytes. We // read the extra integer. match_length += read_integer(input, &mut input_pos)? as usize; } #[cfg(not(feature = "unchecked-decode"))] if output.pos() + match_length > output.capacity() { return Err(DecompressError::OutputTooSmall { expected: output.pos() + match_length, actual: output.capacity(), }); } if USE_DICT && offset > output.pos() { let copied = copy_from_dict(output, ext_dict, offset, match_length)?; if copied == match_length { continue; } // match crosses ext_dict and output, offset is still correct as output_len was // increased match_length -= copied; } // We now copy from the already decompressed buffer. This allows us for storing duplicates // by simply referencing the other location. duplicate_slice(output, offset, match_length)?; } Ok(output.pos() - initial_output_pos) } #[inline] fn copy_from_dict( output: &mut impl Sink, ext_dict: &[u8], offset: usize, match_length: usize, ) -> Result { // If we're here we know offset > output.pos debug_assert!(offset > output.pos()); let (dict_offset, did_overflow) = ext_dict.len().overflowing_sub(offset - output.pos()); if did_overflow { return Err(DecompressError::OffsetOutOfBounds); } // Can't copy past ext_dict len, the match may cross dict and output let dict_match_length = match_length.min(ext_dict.len() - dict_offset); let ext_match = &ext_dict[dict_offset..dict_offset + dict_match_length]; output.extend_from_slice(ext_match); Ok(dict_match_length) } /// Extends output by self-referential copies #[inline(always)] // (always) necessary otherwise compiler fails to inline it fn duplicate_slice( output: &mut impl Sink, offset: usize, match_length: usize, ) -> Result<(), DecompressError> { // This function assumes output will fit match_length, it might panic otherwise. if match_length > offset { duplicate_overlapping_slice(output, offset, match_length)?; } else { let (start, did_overflow) = output.pos().overflowing_sub(offset); if did_overflow { return Err(DecompressError::OffsetOutOfBounds); } match match_length { 0..=32 if output.pos() + 32 <= output.capacity() => { output.extend_from_within(start, 32, match_length) } 33..=64 if output.pos() + 64 <= output.capacity() => { output.extend_from_within(start, 64, match_length) } _ => output.extend_from_within(start, match_length, match_length), } } Ok(()) } /// self-referential copy for the case data start (end of output - offset) + match_length overlaps /// into output #[inline] fn duplicate_overlapping_slice( sink: &mut impl Sink, offset: usize, match_length: usize, ) -> Result<(), DecompressError> { // This function assumes output will fit match_length, it might panic otherwise. let (start, did_overflow) = sink.pos().overflowing_sub(offset); if did_overflow { return Err(DecompressError::OffsetOutOfBounds); } if offset == 1 { let val = sink.byte_at(start); sink.extend_with_fill(val, match_length); } else { sink.extend_from_within_overlapping(start, match_length); } Ok(()) } /// Decompress all bytes of `input` into `output`. /// `output` should be preallocated with a size of of the uncompressed data. #[inline] pub fn decompress_into(input: &[u8], output: &mut [u8]) -> Result { decompress_internal::(input, &mut SliceSink::new(output, 0), b"") } /// Decompress all bytes of `input` into `output`. /// /// Returns the number of bytes written (decompressed) into `output`. #[inline] pub fn decompress_into_with_dict( input: &[u8], output: &mut [u8], ext_dict: &[u8], ) -> Result { decompress_internal::(input, &mut SliceSink::new(output, 0), ext_dict) } /// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in /// litte endian. Can be used in conjunction with `compress_prepend_size` #[inline] pub fn decompress_size_prepended(input: &[u8]) -> Result, DecompressError> { let (uncompressed_size, input) = super::uncompressed_size(input)?; decompress(input, uncompressed_size) } /// Decompress all bytes of `input` into a new vec. /// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size. /// /// # Panics /// May panic if the parameter `min_uncompressed_size` is smaller than the /// uncompressed data. #[inline] pub fn decompress(input: &[u8], min_uncompressed_size: usize) -> Result, DecompressError> { let mut decompressed: Vec = vec![0; min_uncompressed_size]; let decomp_len = decompress_internal::(input, &mut SliceSink::new(&mut decompressed, 0), b"")?; decompressed.truncate(decomp_len); Ok(decompressed) } /// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in /// little endian. Can be used in conjunction with `compress_prepend_size_with_dict` #[inline] pub fn decompress_size_prepended_with_dict( input: &[u8], ext_dict: &[u8], ) -> Result, DecompressError> { let (uncompressed_size, input) = super::uncompressed_size(input)?; decompress_with_dict(input, uncompressed_size, ext_dict) } /// Decompress all bytes of `input` into a new vec. /// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size. /// /// # Panics /// May panic if the parameter `min_uncompressed_size` is smaller than the /// uncompressed data. #[inline] pub fn decompress_with_dict( input: &[u8], min_uncompressed_size: usize, ext_dict: &[u8], ) -> Result, DecompressError> { let mut decompressed: Vec = vec![0; min_uncompressed_size]; let decomp_len = decompress_internal::(input, &mut SliceSink::new(&mut decompressed, 0), ext_dict)?; decompressed.truncate(decomp_len); Ok(decompressed) } #[cfg(test)] mod test { use super::*; #[test] fn all_literal() { assert_eq!(decompress(&[0x30, b'a', b'4', b'9'], 3).unwrap(), b"a49"); } // this error test is only valid in safe-decode. #[cfg(feature = "safe-decode")] #[test] fn offset_oob() { decompress(&[0x10, b'a', 2, 0], 4).unwrap_err(); decompress(&[0x40, b'a', 1, 0], 4).unwrap_err(); } } lz4_flex-0.11.3/src/block/hashtable.rs000064400000000000000000000106271046102023000156250ustar 00000000000000#[allow(unused_imports)] use alloc::boxed::Box; /// The Hashtable trait used by the compression to store hashed bytes to their position. /// `val` can be maximum the size of the input in bytes. /// /// `pos` can have a maximum value of u16::MAX or 65535 /// If the hashtable is smaller it needs to reduce the pos to its space, e.g. by right /// shifting. /// /// Duplication dictionary size. /// /// Every four bytes is assigned an entry. When this number is lower, fewer entries exists, and /// thus collisions are more likely, hurting the compression ratio. /// hashes and right shifts to a maximum value of 16bit, 65535 /// The right shift is done in order to not exceed, the hashtables capacity #[inline] fn hash(sequence: u32) -> u32 { (sequence.wrapping_mul(2654435761_u32)) >> 16 } /// hashes and right shifts to a maximum value of 16bit, 65535 /// The right shift is done in order to not exceed, the hashtables capacity #[cfg(target_pointer_width = "64")] #[inline] fn hash5(sequence: usize) -> u32 { let primebytes = if cfg!(target_endian = "little") { 889523592379_usize } else { 11400714785074694791_usize }; (((sequence << 24).wrapping_mul(primebytes)) >> 48) as u32 } pub trait HashTable { fn get_at(&self, pos: usize) -> usize; fn put_at(&mut self, pos: usize, val: usize); #[allow(dead_code)] fn clear(&mut self); #[inline] #[cfg(target_pointer_width = "64")] fn get_hash_at(input: &[u8], pos: usize) -> usize { hash5(super::compress::get_batch_arch(input, pos)) as usize } #[inline] #[cfg(target_pointer_width = "32")] fn get_hash_at(input: &[u8], pos: usize) -> usize { hash(super::compress::get_batch(input, pos)) as usize } } const HASHTABLE_SIZE_4K: usize = 4 * 1024; const HASHTABLE_BIT_SHIFT_4K: usize = 4; #[derive(Debug)] #[repr(align(64))] pub struct HashTable4KU16 { dict: Box<[u16; HASHTABLE_SIZE_4K]>, } impl HashTable4KU16 { #[inline] pub fn new() -> Self { // This generates more efficient assembly in contrast to Box::new(slice), because of an // optmized call alloc_zeroed, vs. alloc + memset // try_into is optimized away let dict = alloc::vec![0; HASHTABLE_SIZE_4K] .into_boxed_slice() .try_into() .unwrap(); Self { dict } } } impl HashTable for HashTable4KU16 { #[inline] fn get_at(&self, hash: usize) -> usize { self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] as usize } #[inline] fn put_at(&mut self, hash: usize, val: usize) { self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] = val as u16; } #[inline] fn clear(&mut self) { self.dict.fill(0); } #[inline] fn get_hash_at(input: &[u8], pos: usize) -> usize { hash(super::get_batch(input, pos)) as usize } } #[derive(Debug)] pub struct HashTable4K { dict: Box<[u32; HASHTABLE_SIZE_4K]>, } impl HashTable4K { #[inline] pub fn new() -> Self { let dict = alloc::vec![0; HASHTABLE_SIZE_4K] .into_boxed_slice() .try_into() .unwrap(); Self { dict } } #[cold] #[allow(dead_code)] pub fn reposition(&mut self, offset: u32) { for i in self.dict.iter_mut() { *i = i.saturating_sub(offset); } } } impl HashTable for HashTable4K { #[inline] fn get_at(&self, hash: usize) -> usize { self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] as usize } #[inline] fn put_at(&mut self, hash: usize, val: usize) { self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] = val as u32; } #[inline] fn clear(&mut self) { self.dict.fill(0); } } const HASHTABLE_SIZE_8K: usize = 8 * 1024; const HASH_TABLE_BIT_SHIFT_8K: usize = 3; #[derive(Debug)] pub struct HashTable8K { dict: Box<[u32; HASHTABLE_SIZE_8K]>, } #[allow(dead_code)] impl HashTable8K { #[inline] pub fn new() -> Self { let dict = alloc::vec![0; HASHTABLE_SIZE_8K] .into_boxed_slice() .try_into() .unwrap(); Self { dict } } } impl HashTable for HashTable8K { #[inline] fn get_at(&self, hash: usize) -> usize { self.dict[hash >> HASH_TABLE_BIT_SHIFT_8K] as usize } #[inline] fn put_at(&mut self, hash: usize, val: usize) { self.dict[hash >> HASH_TABLE_BIT_SHIFT_8K] = val as u32; } #[inline] fn clear(&mut self) { self.dict.fill(0); } } lz4_flex-0.11.3/src/block/mod.rs000064400000000000000000000122301046102023000144410ustar 00000000000000//! LZ4 Block Format //! //! As defined in //! //! Currently for no_std support only the block format is supported. //! //! # Example: block format roundtrip //! ``` //! use lz4_flex::block::{compress_prepend_size, decompress_size_prepended}; //! let input: &[u8] = b"Hello people, what's up?"; //! let compressed = compress_prepend_size(input); //! let uncompressed = decompress_size_prepended(&compressed).unwrap(); //! assert_eq!(input, uncompressed); //! ``` //! #[cfg_attr(feature = "safe-encode", forbid(unsafe_code))] pub(crate) mod compress; pub(crate) mod hashtable; #[cfg(feature = "safe-decode")] #[cfg_attr(feature = "safe-decode", forbid(unsafe_code))] pub(crate) mod decompress_safe; #[cfg(feature = "safe-decode")] pub(crate) use decompress_safe as decompress; #[cfg(not(feature = "safe-decode"))] pub(crate) mod decompress; pub use compress::*; pub use decompress::*; use core::fmt; pub(crate) const WINDOW_SIZE: usize = 64 * 1024; /// https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md#end-of-block-restrictions /// The last match must start at least 12 bytes before the end of block. The last match is part of /// the penultimate sequence. It is followed by the last sequence, which contains only literals. /// /// Note that, as a consequence, an independent block < 13 bytes cannot be compressed, because the /// match must copy "something", so it needs at least one prior byte. /// /// When a block can reference data from another block, it can start immediately with a match and no /// literal, so a block of 12 bytes can be compressed. const MFLIMIT: usize = 12; /// The last 5 bytes of input are always literals. Therefore, the last sequence contains at least 5 /// bytes. const LAST_LITERALS: usize = 5; /// Due the way the compression loop is arrange we may read up to (register_size - 2) bytes from the /// current position. So we must end the matches 6 bytes before the end, 1 more than required by the /// spec. const END_OFFSET: usize = LAST_LITERALS + 1; /// https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md#end-of-block-restrictions /// Minimum length of a block /// /// MFLIMIT + 1 for the token. const LZ4_MIN_LENGTH: usize = MFLIMIT + 1; const MAXD_LOG: usize = 16; const MAX_DISTANCE: usize = (1 << MAXD_LOG) - 1; #[allow(dead_code)] const MATCH_LENGTH_MASK: u32 = (1_u32 << 4) - 1; // 0b1111 / 15 /// The minimum length of a duplicate const MINMATCH: usize = 4; #[allow(dead_code)] const FASTLOOP_SAFE_DISTANCE: usize = 64; /// Switch for the hashtable size byU16 #[allow(dead_code)] static LZ4_64KLIMIT: usize = (64 * 1024) + (MFLIMIT - 1); /// An error representing invalid compressed data. #[derive(Debug)] #[non_exhaustive] pub enum DecompressError { /// The provided output is too small OutputTooSmall { /// Minimum expected output size expected: usize, /// Actual size of output actual: usize, }, /// Literal is out of bounds of the input LiteralOutOfBounds, /// Expected another byte, but none found. ExpectedAnotherByte, /// Deduplication offset out of bounds (not in buffer). OffsetOutOfBounds, } #[derive(Debug)] #[non_exhaustive] /// Errors that can happen during compression. pub enum CompressError { /// The provided output is too small. OutputTooSmall, } impl fmt::Display for DecompressError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { DecompressError::OutputTooSmall { expected, actual } => { write!( f, "provided output is too small for the decompressed data, actual {actual}, expected \ {expected}" ) } DecompressError::LiteralOutOfBounds => { f.write_str("literal is out of bounds of the input") } DecompressError::ExpectedAnotherByte => { f.write_str("expected another byte, found none") } DecompressError::OffsetOutOfBounds => { f.write_str("the offset to copy is not contained in the decompressed buffer") } } } } impl fmt::Display for CompressError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { CompressError::OutputTooSmall => f.write_str( "output is too small for the compressed data, use get_maximum_output_size to \ reserve enough space", ), } } } #[cfg(feature = "std")] impl std::error::Error for DecompressError {} #[cfg(feature = "std")] impl std::error::Error for CompressError {} /// This can be used in conjunction with `decompress_size_prepended`. /// It will read the first 4 bytes as little-endian encoded length, and return /// the rest of the bytes after the length encoding. #[inline] pub fn uncompressed_size(input: &[u8]) -> Result<(usize, &[u8]), DecompressError> { let size = input.get(..4).ok_or(DecompressError::ExpectedAnotherByte)?; let size: &[u8; 4] = size.try_into().unwrap(); let uncompressed_size = u32::from_le_bytes(*size) as usize; let rest = &input[4..]; Ok((uncompressed_size, rest)) } lz4_flex-0.11.3/src/fastcpy.rs000064400000000000000000000077061046102023000142550ustar 00000000000000//! # FastCpy //! //! The Rust Compiler calls `memcpy` for slices of unknown length. //! This crate provides a faster implementation of `memcpy` for slices up to 32bytes (64bytes with `avx`). //! If you know most of you copy operations are not too big you can use `fastcpy` to speed up your program. //! //! `fastcpy` is designed to contain not too much assembly, so the overhead is low. //! //! As fall back the standard `memcpy` is called //! //! ## Double Copy Trick //! `fastcpy` employs a double copy trick to copy slices of length 4-32bytes (64bytes with `avx`). //! E.g. Slice of length 6 can be copied with two uncoditional copy operations. //! //! /// [1, 2, 3, 4, 5, 6] //! /// [1, 2, 3, 4] //! /// [3, 4, 5, 6] //! #[inline] pub fn slice_copy(src: &[u8], dst: &mut [u8]) { #[inline(never)] #[cold] #[track_caller] fn len_mismatch_fail(dst_len: usize, src_len: usize) -> ! { panic!( "source slice length ({}) does not match destination slice length ({})", src_len, dst_len, ); } if src.len() != dst.len() { len_mismatch_fail(src.len(), dst.len()); } let len = src.len(); if src.is_empty() { return; } if len < 4 { short_copy(src, dst); return; } if len < 8 { double_copy_trick::<4>(src, dst); return; } if len <= 16 { double_copy_trick::<8>(src, dst); return; } if len <= 32 { double_copy_trick::<16>(src, dst); return; } /// The code will use the vmovdqu instruction to copy 32 bytes at a time. #[cfg(target_feature = "avx")] { if len <= 64 { double_copy_trick::<32>(src, dst); return; } } // For larger sizes we use the default, which calls memcpy // memcpy does some virtual memory tricks to copy large chunks of memory. // // The theory should be that the checks above don't cost much relative to the copy call for // larger copies. // The bounds checks in `copy_from_slice` are elided. dst.copy_from_slice(src); } #[inline(always)] fn short_copy(src: &[u8], dst: &mut [u8]) { let len = src.len(); // length 1-3 dst[0] = src[0]; if len >= 2 { double_copy_trick::<2>(src, dst); } } #[inline(always)] /// [1, 2, 3, 4, 5, 6] /// [1, 2, 3, 4] /// [3, 4, 5, 6] fn double_copy_trick(src: &[u8], dst: &mut [u8]) { dst[0..SIZE].copy_from_slice(&src[0..SIZE]); dst[src.len() - SIZE..].copy_from_slice(&src[src.len() - SIZE..]); } #[cfg(test)] mod tests { use super::slice_copy; use proptest::prelude::*; proptest! { #[test] fn test_fast_short_slice_copy(left: Vec) { let mut right = vec![0u8; left.len()]; slice_copy(&left, &mut right); prop_assert_eq!(&left, &right); } } #[test] fn test_fast_short_slice_copy_edge_cases() { for len in 0..(512 * 2) { let left = (0..len).map(|i| i as u8).collect::>(); let mut right = vec![0u8; len]; slice_copy(&left, &mut right); assert_eq!(left, right); } } #[test] fn test_fail2() { let left = vec![ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ]; let mut right = vec![0u8; left.len()]; slice_copy(&left, &mut right); assert_eq!(left, right); } #[test] fn test_fail() { let left = vec![ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; let mut right = vec![0u8; left.len()]; slice_copy(&left, &mut right); assert_eq!(left, right); } } lz4_flex-0.11.3/src/fastcpy_unsafe.rs000064400000000000000000000115561046102023000156140ustar 00000000000000//! # FastCpy //! //! The Rust Compiler calls `memcpy` for slices of unknown length. //! This crate provides a faster implementation of `memcpy` for slices up to 32bytes (64bytes with `avx`). //! If you know most of you copy operations are not too big you can use `fastcpy` to speed up your program. //! //! `fastcpy` is designed to contain not too much assembly, so the overhead is low. //! //! As fall back the standard `memcpy` is called //! //! ## Double Copy Trick //! `fastcpy` employs a double copy trick to copy slices of length 4-32bytes (64bytes with `avx`). //! E.g. Slice of length 6 can be copied with two uncoditional copy operations. //! //! /// [1, 2, 3, 4, 5, 6] //! /// [1, 2, 3, 4] //! /// [3, 4, 5, 6] //! #[inline] pub fn slice_copy(src: *const u8, dst: *mut u8, num_bytes: usize) { if num_bytes < 4 { short_copy(src, dst, num_bytes); return; } if num_bytes < 8 { double_copy_trick::<4>(src, dst, num_bytes); return; } if num_bytes <= 16 { double_copy_trick::<8>(src, dst, num_bytes); return; } //if num_bytes <= 32 { //double_copy_trick::<16>(src, dst, num_bytes); //return; //} // /// The code will use the vmovdqu instruction to copy 32 bytes at a time. //#[cfg(target_feature = "avx")] //{ //if num_bytes <= 64 { //double_copy_trick::<32>(src, dst, num_bytes); //return; //} //} // For larger sizes we use the default, which calls memcpy // memcpy does some virtual memory tricks to copy large chunks of memory. // // The theory should be that the checks above don't cost much relative to the copy call for // larger copies. // The bounds checks in `copy_from_slice` are elided. //unsafe { core::ptr::copy_nonoverlapping(src, dst, num_bytes) } wild_copy_from_src::<16>(src, dst, num_bytes) } // Inline never because otherwise we get a call to memcpy -.- #[inline] fn wild_copy_from_src( mut source: *const u8, mut dst: *mut u8, num_bytes: usize, ) { // Note: if the compiler auto-vectorizes this it'll hurt performance! // It's not the case for 16 bytes stepsize, but for 8 bytes. let l_last = unsafe { source.add(num_bytes - SIZE) }; let r_last = unsafe { dst.add(num_bytes - SIZE) }; let num_bytes = (num_bytes / SIZE) * SIZE; unsafe { let dst_ptr_end = dst.add(num_bytes); loop { core::ptr::copy_nonoverlapping(source, dst, SIZE); source = source.add(SIZE); dst = dst.add(SIZE); if dst >= dst_ptr_end { break; } } } unsafe { core::ptr::copy_nonoverlapping(l_last, r_last, SIZE); } } #[inline] fn short_copy(src: *const u8, dst: *mut u8, len: usize) { unsafe { *dst = *src; } if len >= 2 { double_copy_trick::<2>(src, dst, len); } } #[inline(always)] /// [1, 2, 3, 4, 5, 6] /// [1, 2, 3, 4] /// [3, 4, 5, 6] fn double_copy_trick(src: *const u8, dst: *mut u8, len: usize) { let l_end = unsafe { src.add(len - SIZE) }; let r_end = unsafe { dst.add(len - SIZE) }; unsafe { core::ptr::copy_nonoverlapping(src, dst, SIZE); core::ptr::copy_nonoverlapping(l_end, r_end, SIZE); } } #[cfg(test)] mod tests { use super::slice_copy; use proptest::prelude::*; proptest! { #[test] fn test_fast_short_slice_copy(left: Vec) { if left.is_empty() { return Ok(()); } let mut right = vec![0u8; left.len()]; slice_copy(left.as_ptr(), right.as_mut_ptr(), left.len()); prop_assert_eq!(&left, &right); } } #[test] fn test_fast_short_slice_copy_edge_cases() { for len in 1..(512 * 2) { let left = (0..len).map(|i| i as u8).collect::>(); let mut right = vec![0u8; len]; slice_copy(left.as_ptr(), right.as_mut_ptr(), left.len()); assert_eq!(left, right); } } #[test] fn test_fail2() { let left = vec![ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ]; let mut right = vec![0u8; left.len()]; slice_copy(left.as_ptr(), right.as_mut_ptr(), left.len()); assert_eq!(left, right); } #[test] fn test_fail() { let left = vec![ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; let mut right = vec![0u8; left.len()]; slice_copy(left.as_ptr(), right.as_mut_ptr(), left.len()); assert_eq!(left, right); } } lz4_flex-0.11.3/src/frame/compress.rs000064400000000000000000000442121046102023000155220ustar 00000000000000use std::{ fmt, hash::Hasher, io::{self, Write}, }; use twox_hash::XxHash32; use crate::{ block::{ compress::compress_internal, hashtable::{HashTable, HashTable4K}, }, sink::vec_sink_for_compression, }; use super::Error; use super::{ header::{BlockInfo, BlockMode, FrameInfo, BLOCK_INFO_SIZE, MAX_FRAME_INFO_SIZE}, BlockSize, }; use crate::block::WINDOW_SIZE; /// A writer for compressing a LZ4 stream. /// /// This `FrameEncoder` wraps any other writer that implements `io::Write`. /// Bytes written to this writer are compressed using the [LZ4 frame /// format](https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md). /// /// Writes are buffered automatically, so there's no need to wrap the given /// writer in a `std::io::BufWriter`. /// /// To ensure a well formed stream the encoder must be finalized by calling /// either the [`finish()`], [`try_finish()`], or [`auto_finish()`] methods. /// /// [`finish()`]: Self::finish /// [`try_finish()`]: Self::try_finish /// [`auto_finish()`]: Self::auto_finish /// /// # Example 1 /// Serializing json values into a compressed file. /// /// ```no_run /// let compressed_file = std::fs::File::create("datafile").unwrap(); /// let mut compressor = lz4_flex::frame::FrameEncoder::new(compressed_file); /// serde_json::to_writer(&mut compressor, &serde_json::json!({ "an": "object" })).unwrap(); /// compressor.finish().unwrap(); /// ``` /// /// # Example 2 /// Serializing multiple json values into a compressed file using linked blocks. /// /// ```no_run /// let compressed_file = std::fs::File::create("datafile").unwrap(); /// let mut frame_info = lz4_flex::frame::FrameInfo::new(); /// frame_info.block_mode = lz4_flex::frame::BlockMode::Linked; /// let mut compressor = lz4_flex::frame::FrameEncoder::with_frame_info(frame_info, compressed_file); /// for i in 0..10u64 { /// serde_json::to_writer(&mut compressor, &serde_json::json!({ "i": i })).unwrap(); /// } /// compressor.finish().unwrap(); /// ``` pub struct FrameEncoder { /// Our buffer of uncompressed bytes. src: Vec, /// Index into src: starting point of bytes not yet compressed src_start: usize, /// Index into src: end point of bytes not not yet compressed src_end: usize, /// Index into src: starting point of external dictionary (applicable in Linked block mode) ext_dict_offset: usize, /// Length of external dictionary ext_dict_len: usize, /// Counter of bytes already compressed to the compression_table /// _Not_ the same as `content_len` as this is reset every to 2GB. src_stream_offset: usize, /// Encoder table compression_table: HashTable4K, /// The underlying writer. w: W, /// Xxhash32 used when content checksum is enabled. content_hasher: XxHash32, /// Number of bytes compressed content_len: u64, /// The compressed bytes buffer. Bytes are compressed from src (usually) /// to dst before being written to w. dst: Vec, /// Whether we have an open frame in the output. is_frame_open: bool, /// Whether we have an frame closed in the output. data_to_frame_written: bool, /// The frame information to be used in this encoder. frame_info: FrameInfo, } impl FrameEncoder { fn init(&mut self) { let max_block_size = self.frame_info.block_size.get_size(); let src_size = if self.frame_info.block_mode == BlockMode::Linked { // In linked mode we consume the input (bumping src_start) but leave the // beginning of src to be used as a prefix in subsequent blocks. // That is at least until we have at least `max_block_size + WINDOW_SIZE` // bytes in src, then we setup an ext_dict with the last WINDOW_SIZE bytes // and the input goes to the beginning of src again. // Since we always want to be able to write a full block (up to max_block_size) // we need a buffer with at least `max_block_size * 2 + WINDOW_SIZE` bytes. max_block_size * 2 + WINDOW_SIZE } else { max_block_size }; // Since this method is called potentially multiple times, don't reserve _additional_ // capacity if not required. self.src .reserve(src_size.saturating_sub(self.src.capacity())); self.dst.reserve( crate::block::compress::get_maximum_output_size(max_block_size) .saturating_sub(self.dst.capacity()), ); } /// Returns a wrapper around `self` that will finish the stream on drop. /// /// # Note /// Errors on drop get silently ignored. If you want to handle errors then use [`finish()`] or /// [`try_finish()`] instead. /// /// [`finish()`]: Self::finish /// [`try_finish()`]: Self::try_finish pub fn auto_finish(self) -> AutoFinishEncoder { AutoFinishEncoder { encoder: Some(self), } } /// Creates a new Encoder with the specified FrameInfo. pub fn with_frame_info(frame_info: FrameInfo, wtr: W) -> Self { FrameEncoder { src: Vec::new(), w: wtr, // 16 KB hash table for matches, same as the reference implementation. compression_table: HashTable4K::new(), content_hasher: XxHash32::with_seed(0), content_len: 0, dst: Vec::new(), is_frame_open: false, data_to_frame_written: false, frame_info, src_start: 0, src_end: 0, ext_dict_offset: 0, ext_dict_len: 0, src_stream_offset: 0, } } /// Creates a new Encoder with the default settings. pub fn new(wtr: W) -> Self { Self::with_frame_info(Default::default(), wtr) } /// The frame information used by this Encoder. pub fn frame_info(&mut self) -> &FrameInfo { &self.frame_info } /// Consumes this encoder, flushing internal buffer and writing stream terminator. pub fn finish(mut self) -> Result { self.try_finish()?; Ok(self.w) } /// Attempt to finish this output stream, flushing internal buffer and writing stream /// terminator. pub fn try_finish(&mut self) -> Result<(), Error> { match self.flush() { Ok(()) => { // Empty input special case // https://github.com/ouch-org/ouch/pull/163#discussion_r1108965151 if !self.is_frame_open && !self.data_to_frame_written { self.begin_frame(0)?; } self.end_frame()?; self.data_to_frame_written = true; Ok(()) } Err(err) => Err(err.into()), } } /// Returns the underlying writer _without_ flushing the stream. /// This may leave the output in an unfinished state. pub fn into_inner(self) -> W { self.w } /// Gets a reference to the underlying writer in this encoder. pub fn get_ref(&self) -> &W { &self.w } /// Gets a reference to the underlying writer in this encoder. /// /// Note that mutating the output/input state of the stream may corrupt /// this encoder, so care must be taken when using this method. pub fn get_mut(&mut self) -> &mut W { &mut self.w } /// Closes the frame by writing the end marker. fn end_frame(&mut self) -> Result<(), Error> { debug_assert!(self.is_frame_open); self.is_frame_open = false; if let Some(expected) = self.frame_info.content_size { if expected != self.content_len { return Err(Error::ContentLengthError { expected, actual: self.content_len, }); } } let mut block_info_buffer = [0u8; BLOCK_INFO_SIZE]; BlockInfo::EndMark.write(&mut block_info_buffer[..])?; self.w.write_all(&block_info_buffer[..])?; if self.frame_info.content_checksum { let content_checksum = self.content_hasher.finish() as u32; self.w.write_all(&content_checksum.to_le_bytes())?; } Ok(()) } /// Begin the frame by writing the frame header. /// It'll also setup the encoder for compressing blocks for the the new frame. fn begin_frame(&mut self, buf_len: usize) -> io::Result<()> { self.is_frame_open = true; if self.frame_info.block_size == BlockSize::Auto { self.frame_info.block_size = BlockSize::from_buf_length(buf_len); } self.init(); let mut frame_info_buffer = [0u8; MAX_FRAME_INFO_SIZE]; let size = self.frame_info.write(&mut frame_info_buffer)?; self.w.write_all(&frame_info_buffer[..size])?; if self.content_len != 0 { // This is the second or later frame for this Encoder, // reset compressor state for the new frame. self.content_len = 0; self.src_stream_offset = 0; self.src.clear(); self.src_start = 0; self.src_end = 0; self.ext_dict_len = 0; self.content_hasher = XxHash32::with_seed(0); self.compression_table.clear(); } Ok(()) } /// Consumes the src contents between src_start and src_end, /// which shouldn't exceed the max block size. fn write_block(&mut self) -> io::Result<()> { debug_assert!(self.is_frame_open); let max_block_size = self.frame_info.block_size.get_size(); debug_assert!(self.src_end - self.src_start <= max_block_size); // Reposition the compression table if we're anywhere near an overflowing hazard if self.src_stream_offset + max_block_size + WINDOW_SIZE >= u32::MAX as usize / 2 { self.compression_table .reposition((self.src_stream_offset - self.ext_dict_len) as _); self.src_stream_offset = self.ext_dict_len; } // input to the compressor, which may include a prefix when blocks are linked let input = &self.src[..self.src_end]; // the contents of the block are between src_start and src_end let src = &input[self.src_start..]; let dst_required_size = crate::block::compress::get_maximum_output_size(src.len()); let compress_result = if self.ext_dict_len != 0 { debug_assert_eq!(self.frame_info.block_mode, BlockMode::Linked); compress_internal::<_, true, _>( input, self.src_start, &mut vec_sink_for_compression(&mut self.dst, 0, 0, dst_required_size), &mut self.compression_table, &self.src[self.ext_dict_offset..self.ext_dict_offset + self.ext_dict_len], self.src_stream_offset, ) } else { compress_internal::<_, false, _>( input, self.src_start, &mut vec_sink_for_compression(&mut self.dst, 0, 0, dst_required_size), &mut self.compression_table, b"", self.src_stream_offset, ) }; let (block_info, block_data) = match compress_result.map_err(Error::CompressionError)? { comp_len if comp_len < src.len() => { (BlockInfo::Compressed(comp_len as _), &self.dst[..comp_len]) } _ => (BlockInfo::Uncompressed(src.len() as _), src), }; // Write the (un)compressed block to the writer and the block checksum (if applicable). let mut block_info_buffer = [0u8; BLOCK_INFO_SIZE]; block_info.write(&mut block_info_buffer[..])?; self.w.write_all(&block_info_buffer[..])?; self.w.write_all(block_data)?; if self.frame_info.block_checksums { let mut block_hasher = XxHash32::with_seed(0); block_hasher.write(block_data); let block_checksum = block_hasher.finish() as u32; self.w.write_all(&block_checksum.to_le_bytes())?; } // Content checksum, if applicable if self.frame_info.content_checksum { self.content_hasher.write(src); } // Buffer and offsets maintenance self.content_len += src.len() as u64; self.src_start += src.len(); debug_assert_eq!(self.src_start, self.src_end); if self.frame_info.block_mode == BlockMode::Linked { // In linked mode we consume the input (bumping src_start) but leave the // beginning of src to be used as a prefix in subsequent blocks. // That is at least until we have at least `max_block_size + WINDOW_SIZE` // bytes in src, then we setup an ext_dict with the last WINDOW_SIZE bytes // and the input goes to the beginning of src again. debug_assert_eq!(self.src.capacity(), max_block_size * 2 + WINDOW_SIZE); if self.src_start >= max_block_size + WINDOW_SIZE { // The ext_dict will become the last WINDOW_SIZE bytes self.ext_dict_offset = self.src_end - WINDOW_SIZE; self.ext_dict_len = WINDOW_SIZE; // Input goes in the beginning of the buffer again. self.src_stream_offset += self.src_end; self.src_start = 0; self.src_end = 0; } else if self.src_start + self.ext_dict_len > WINDOW_SIZE { // There's more than WINDOW_SIZE bytes of lookback adding the prefix and ext_dict. // Since we have a limited buffer we must shrink ext_dict in favor of the prefix, // so that we can fit up to max_block_size bytes between dst_start and ext_dict // start. let delta = self .ext_dict_len .min(self.src_start + self.ext_dict_len - WINDOW_SIZE); self.ext_dict_offset += delta; self.ext_dict_len -= delta; debug_assert!(self.src_start + self.ext_dict_len >= WINDOW_SIZE) } debug_assert!( self.ext_dict_len == 0 || self.src_start + max_block_size <= self.ext_dict_offset ); } else { // In independent block mode we consume the entire src buffer // which is sized equal to the frame max_block_size. debug_assert_eq!(self.ext_dict_len, 0); debug_assert_eq!(self.src.capacity(), max_block_size); self.src_start = 0; self.src_end = 0; // Advance stream offset so we don't have to reset the match dict // for the next block. self.src_stream_offset += src.len(); } debug_assert!(self.src_start <= self.src_end); debug_assert!(self.src_start + max_block_size <= self.src.capacity()); Ok(()) } } impl io::Write for FrameEncoder { fn write(&mut self, mut buf: &[u8]) -> io::Result { if !self.is_frame_open && !buf.is_empty() { self.begin_frame(buf.len())?; } let buf_len = buf.len(); while !buf.is_empty() { let src_filled = self.src_end - self.src_start; let max_fill_len = self.frame_info.block_size.get_size() - src_filled; if max_fill_len == 0 { // make space by writing next block self.write_block()?; debug_assert_eq!(self.src_end, self.src_start); continue; } let fill_len = max_fill_len.min(buf.len()); vec_copy_overwriting(&mut self.src, self.src_end, &buf[..fill_len]); buf = &buf[fill_len..]; self.src_end += fill_len; } Ok(buf_len) } fn flush(&mut self) -> io::Result<()> { if self.src_start != self.src_end { self.write_block()?; } Ok(()) } } /// A wrapper around an [`FrameEncoder`] that finishes the stream on drop. /// /// This can be created by the [`auto_finish()`] method on the [`FrameEncoder`]. /// /// # Note /// Errors on drop get silently ignored. If you want to handle errors then use [`finish()`] or /// [`try_finish()`] instead. /// /// [`finish()`]: FrameEncoder::finish /// [`try_finish()`]: FrameEncoder::try_finish /// [`auto_finish()`]: FrameEncoder::auto_finish pub struct AutoFinishEncoder { // We wrap this in an option to take it during drop. encoder: Option>, } impl Drop for AutoFinishEncoder { fn drop(&mut self) { if let Some(mut encoder) = self.encoder.take() { let _ = encoder.try_finish(); } } } impl Write for AutoFinishEncoder { fn write(&mut self, buf: &[u8]) -> io::Result { self.encoder.as_mut().unwrap().write(buf) } fn flush(&mut self) -> io::Result<()> { self.encoder.as_mut().unwrap().flush() } } impl fmt::Debug for FrameEncoder { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("FrameEncoder") .field("w", &self.w) .field("frame_info", &self.frame_info) .field("is_frame_open", &self.is_frame_open) .field("content_hasher", &self.content_hasher) .field("content_len", &self.content_len) .field("dst", &"[...]") .field("src", &"[...]") .field("src_start", &self.src_start) .field("src_end", &self.src_end) .field("ext_dict_offset", &self.ext_dict_offset) .field("ext_dict_len", &self.ext_dict_len) .field("src_stream_offset", &self.src_stream_offset) .finish() } } /// Copy `src` into `target` starting from the `start` index, overwriting existing data if any. #[inline] fn vec_copy_overwriting(target: &mut Vec, target_start: usize, src: &[u8]) { debug_assert!(target_start + src.len() <= target.capacity()); // By combining overwriting (copy_from_slice) and extending (extend_from_slice) // we can fill the ring buffer without initializing it (eg. filling with 0). let overwrite_len = (target.len() - target_start).min(src.len()); target[target_start..target_start + overwrite_len].copy_from_slice(&src[..overwrite_len]); target.extend_from_slice(&src[overwrite_len..]); } lz4_flex-0.11.3/src/frame/decompress.rs000064400000000000000000000422431046102023000160350ustar 00000000000000use std::{ fmt, hash::Hasher, io::{self, BufRead, ErrorKind}, mem::size_of, }; use twox_hash::XxHash32; use super::header::{ BlockInfo, BlockMode, FrameInfo, LZ4F_LEGACY_MAGIC_NUMBER, MAGIC_NUMBER_SIZE, MAX_FRAME_INFO_SIZE, MIN_FRAME_INFO_SIZE, }; use super::Error; use crate::{ block::WINDOW_SIZE, sink::{vec_sink_for_decompression, SliceSink}, }; /// A reader for decompressing the LZ4 frame format /// /// This Decoder wraps any other reader that implements `io::Read`. /// Bytes read will be decompressed according to the [LZ4 frame format]( /// https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md). /// /// # Example 1 /// Deserializing json values out of a compressed file. /// /// ```no_run /// let compressed_input = std::fs::File::open("datafile").unwrap(); /// let mut decompressed_input = lz4_flex::frame::FrameDecoder::new(compressed_input); /// let json: serde_json::Value = serde_json::from_reader(decompressed_input).unwrap(); /// ``` /// /// # Example /// Deserializing multiple json values out of a compressed file /// /// ```no_run /// let compressed_input = std::fs::File::open("datafile").unwrap(); /// let mut decompressed_input = lz4_flex::frame::FrameDecoder::new(compressed_input); /// loop { /// match serde_json::from_reader::<_, serde_json::Value>(&mut decompressed_input) { /// Ok(json) => { println!("json {:?}", json); } /// Err(e) if e.is_eof() => break, /// Err(e) => panic!("{}", e), /// } /// } /// ``` pub struct FrameDecoder { /// The underlying reader. r: R, /// The FrameInfo of the frame currently being decoded. /// It starts as `None` and is filled with the FrameInfo is read from the input. /// It's reset to `None` once the frame EndMarker is read from the input. current_frame_info: Option, /// Xxhash32 used when content checksum is enabled. content_hasher: XxHash32, /// Total length of decompressed output for the current frame. content_len: u64, /// The compressed bytes buffer, taken from the underlying reader. src: Vec, /// The decompressed bytes buffer. Bytes are decompressed from src to dst /// before being passed back to the caller. dst: Vec, /// Index into dst and length: starting point of bytes previously output /// that are still part of the decompressor window. ext_dict_offset: usize, ext_dict_len: usize, /// Index into dst: starting point of bytes not yet read by caller. dst_start: usize, /// Index into dst: ending point of bytes not yet read by caller. dst_end: usize, } impl FrameDecoder { /// Creates a new Decoder for the specified reader. pub fn new(rdr: R) -> FrameDecoder { FrameDecoder { r: rdr, src: Default::default(), dst: Default::default(), ext_dict_offset: 0, ext_dict_len: 0, dst_start: 0, dst_end: 0, current_frame_info: None, content_hasher: XxHash32::with_seed(0), content_len: 0, } } /// Gets a reference to the underlying reader in this decoder. pub fn get_ref(&self) -> &R { &self.r } /// Gets a mutable reference to the underlying reader in this decoder. /// /// Note that mutation of the stream may result in surprising results if /// this decoder is continued to be used. pub fn get_mut(&mut self) -> &mut R { &mut self.r } /// Consumes the FrameDecoder and returns the underlying reader. pub fn into_inner(self) -> R { self.r } fn read_frame_info(&mut self) -> Result { let mut buffer = [0u8; MAX_FRAME_INFO_SIZE]; match self.r.read(&mut buffer[..MAGIC_NUMBER_SIZE])? { 0 => return Ok(0), MAGIC_NUMBER_SIZE => (), read => self.r.read_exact(&mut buffer[read..MAGIC_NUMBER_SIZE])?, } if u32::from_le_bytes(buffer[0..MAGIC_NUMBER_SIZE].try_into().unwrap()) != LZ4F_LEGACY_MAGIC_NUMBER { match self .r .read(&mut buffer[MAGIC_NUMBER_SIZE..MIN_FRAME_INFO_SIZE])? { 0 => return Ok(0), MIN_FRAME_INFO_SIZE => (), read => self .r .read_exact(&mut buffer[MAGIC_NUMBER_SIZE + read..MIN_FRAME_INFO_SIZE])?, } } let required = FrameInfo::read_size(&buffer[..MIN_FRAME_INFO_SIZE])?; if required != MIN_FRAME_INFO_SIZE && required != MAGIC_NUMBER_SIZE { self.r .read_exact(&mut buffer[MIN_FRAME_INFO_SIZE..required])?; } let frame_info = FrameInfo::read(&buffer[..required])?; if frame_info.dict_id.is_some() { // Unsupported right now so it must be None return Err(Error::DictionaryNotSupported.into()); } let max_block_size = frame_info.block_size.get_size(); let dst_size = if frame_info.block_mode == BlockMode::Linked { // In linked mode we consume the output (bumping dst_start) but leave the // beginning of dst to be used as a prefix in subsequent blocks. // That is at least until we have at least `max_block_size + WINDOW_SIZE` // bytes in dst, then we setup an ext_dict with the last WINDOW_SIZE bytes // and the output goes to the beginning of dst again. // Since we always want to be able to write a full block (up to max_block_size) // we need a buffer with at least `max_block_size * 2 + WINDOW_SIZE` bytes. max_block_size * 2 + WINDOW_SIZE } else { max_block_size }; self.src.clear(); self.dst.clear(); self.src.reserve_exact(max_block_size); self.dst.reserve_exact(dst_size); self.current_frame_info = Some(frame_info); self.content_hasher = XxHash32::with_seed(0); self.content_len = 0; self.ext_dict_len = 0; self.dst_start = 0; self.dst_end = 0; Ok(required) } #[inline] fn read_checksum(r: &mut R) -> Result { let mut checksum_buffer = [0u8; size_of::()]; r.read_exact(&mut checksum_buffer[..])?; let checksum = u32::from_le_bytes(checksum_buffer); Ok(checksum) } #[inline] fn check_block_checksum(data: &[u8], expected_checksum: u32) -> Result<(), io::Error> { let mut block_hasher = XxHash32::with_seed(0); block_hasher.write(data); let calc_checksum = block_hasher.finish() as u32; if calc_checksum != expected_checksum { return Err(Error::BlockChecksumError.into()); } Ok(()) } fn read_block(&mut self) -> io::Result { debug_assert_eq!(self.dst_start, self.dst_end); let frame_info = self.current_frame_info.as_ref().unwrap(); // Adjust dst buffer offsets to decompress the next block let max_block_size = frame_info.block_size.get_size(); if frame_info.block_mode == BlockMode::Linked { // In linked mode we consume the output (bumping dst_start) but leave the // beginning of dst to be used as a prefix in subsequent blocks. // That is at least until we have at least `max_block_size + WINDOW_SIZE` // bytes in dst, then we setup an ext_dict with the last WINDOW_SIZE bytes // and the output goes to the beginning of dst again. debug_assert_eq!(self.dst.capacity(), max_block_size * 2 + WINDOW_SIZE); if self.dst_start + max_block_size > self.dst.capacity() { // Output might not fit in the buffer. // The ext_dict will become the last WINDOW_SIZE bytes debug_assert!(self.dst_start >= max_block_size + WINDOW_SIZE); self.ext_dict_offset = self.dst_start - WINDOW_SIZE; self.ext_dict_len = WINDOW_SIZE; // Output goes in the beginning of the buffer again. self.dst_start = 0; self.dst_end = 0; } else if self.dst_start + self.ext_dict_len > WINDOW_SIZE { // There's more than WINDOW_SIZE bytes of lookback adding the prefix and ext_dict. // Since we have a limited buffer we must shrink ext_dict in favor of the prefix, // so that we can fit up to max_block_size bytes between dst_start and ext_dict // start. let delta = self .ext_dict_len .min(self.dst_start + self.ext_dict_len - WINDOW_SIZE); self.ext_dict_offset += delta; self.ext_dict_len -= delta; debug_assert!(self.dst_start + self.ext_dict_len >= WINDOW_SIZE) } } else { debug_assert_eq!(self.ext_dict_len, 0); debug_assert_eq!(self.dst.capacity(), max_block_size); self.dst_start = 0; self.dst_end = 0; } // Read and decompress block let block_info = { let mut buffer = [0u8; 4]; if let Err(err) = self.r.read_exact(&mut buffer) { if err.kind() == ErrorKind::UnexpectedEof { return Ok(0); } else { return Err(err); } } BlockInfo::read(&buffer)? }; match block_info { BlockInfo::Uncompressed(len) => { let len = len as usize; if len > max_block_size { return Err(Error::BlockTooBig.into()); } // TODO: Attempt to avoid initialization of read buffer when // https://github.com/rust-lang/rust/issues/42788 stabilizes self.r.read_exact(vec_resize_and_get_mut( &mut self.dst, self.dst_start, self.dst_start + len, ))?; if frame_info.block_checksums { let expected_checksum = Self::read_checksum(&mut self.r)?; Self::check_block_checksum( &self.dst[self.dst_start..self.dst_start + len], expected_checksum, )?; } self.dst_end += len; self.content_len += len as u64; } BlockInfo::Compressed(len) => { let len = len as usize; if len > max_block_size { return Err(Error::BlockTooBig.into()); } // TODO: Attempt to avoid initialization of read buffer when // https://github.com/rust-lang/rust/issues/42788 stabilizes self.r .read_exact(vec_resize_and_get_mut(&mut self.src, 0, len))?; if frame_info.block_checksums { let expected_checksum = Self::read_checksum(&mut self.r)?; Self::check_block_checksum(&self.src[..len], expected_checksum)?; } let with_dict_mode = frame_info.block_mode == BlockMode::Linked && self.ext_dict_len != 0; let decomp_size = if with_dict_mode { debug_assert!(self.dst_start + max_block_size <= self.ext_dict_offset); let (head, tail) = self.dst.split_at_mut(self.ext_dict_offset); let ext_dict = &tail[..self.ext_dict_len]; debug_assert!(head.len() - self.dst_start >= max_block_size); crate::block::decompress::decompress_internal::( &self.src[..len], &mut SliceSink::new(head, self.dst_start), ext_dict, ) } else { // Independent blocks OR linked blocks with only prefix data debug_assert!(self.dst.capacity() - self.dst_start >= max_block_size); crate::block::decompress::decompress_internal::( &self.src[..len], &mut vec_sink_for_decompression( &mut self.dst, 0, self.dst_start, self.dst_start + max_block_size, ), b"", ) } .map_err(Error::DecompressionError)?; self.dst_end += decomp_size; self.content_len += decomp_size as u64; } BlockInfo::EndMark => { if let Some(expected) = frame_info.content_size { if self.content_len != expected { return Err(Error::ContentLengthError { expected, actual: self.content_len, } .into()); } } if frame_info.content_checksum { let expected_checksum = Self::read_checksum(&mut self.r)?; let calc_checksum = self.content_hasher.finish() as u32; if calc_checksum != expected_checksum { return Err(Error::ContentChecksumError.into()); } } self.current_frame_info = None; return Ok(0); } } // Content checksum, if applicable if frame_info.content_checksum { self.content_hasher .write(&self.dst[self.dst_start..self.dst_end]); } Ok(self.dst_end - self.dst_start) } fn read_more(&mut self) -> io::Result { if self.current_frame_info.is_none() && self.read_frame_info()? == 0 { return Ok(0); } self.read_block() } } impl io::Read for FrameDecoder { fn read(&mut self, buf: &mut [u8]) -> io::Result { loop { // Fill read buffer if there's uncompressed data left if self.dst_start < self.dst_end { let read_len = std::cmp::min(self.dst_end - self.dst_start, buf.len()); let dst_read_end = self.dst_start + read_len; buf[..read_len].copy_from_slice(&self.dst[self.dst_start..dst_read_end]); self.dst_start = dst_read_end; return Ok(read_len); } if self.read_more()? == 0 { return Ok(0); } } } fn read_to_string(&mut self, buf: &mut String) -> io::Result { let mut written = 0; loop { match self.fill_buf() { Ok([]) => return Ok(written), Ok(b) => { let s = std::str::from_utf8(b).map_err(|_| { io::Error::new( io::ErrorKind::InvalidData, "stream did not contain valid UTF-8", ) })?; buf.push_str(s); let len = s.len(); self.consume(len); written += len; } Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => return Err(e), } } } fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { let mut written = 0; loop { match self.fill_buf() { Ok([]) => return Ok(written), Ok(b) => { buf.extend_from_slice(b); let len = b.len(); self.consume(len); written += len; } Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => return Err(e), } } } } impl io::BufRead for FrameDecoder { fn fill_buf(&mut self) -> io::Result<&[u8]> { if self.dst_start == self.dst_end { self.read_more()?; } Ok(&self.dst[self.dst_start..self.dst_end]) } fn consume(&mut self, amt: usize) { assert!(amt <= self.dst_end - self.dst_start); self.dst_start += amt; } } impl fmt::Debug for FrameDecoder { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("FrameDecoder") .field("r", &self.r) .field("content_hasher", &self.content_hasher) .field("content_len", &self.content_len) .field("src", &"[...]") .field("dst", &"[...]") .field("dst_start", &self.dst_start) .field("dst_end", &self.dst_end) .field("ext_dict_offset", &self.ext_dict_offset) .field("ext_dict_len", &self.ext_dict_len) .field("current_frame_info", &self.current_frame_info) .finish() } } /// Similar to `v.get_mut(start..end) but will adjust the len if needed. #[inline] fn vec_resize_and_get_mut(v: &mut Vec, start: usize, end: usize) -> &mut [u8] { if end > v.len() { v.resize(end, 0) } &mut v[start..end] } lz4_flex-0.11.3/src/frame/header.rs000064400000000000000000000323211046102023000151150ustar 00000000000000use twox_hash::XxHash32; use super::Error; use std::{ fmt::Debug, hash::Hasher, io, io::{Read, Write}, }; const FLG_RESERVED_MASK: u8 = 0b00000010; const FLG_VERSION_MASK: u8 = 0b11000000; const FLG_SUPPORTED_VERSION_BITS: u8 = 0b01000000; const FLG_INDEPENDENT_BLOCKS: u8 = 0b00100000; const FLG_BLOCK_CHECKSUMS: u8 = 0b00010000; const FLG_CONTENT_SIZE: u8 = 0b00001000; const FLG_CONTENT_CHECKSUM: u8 = 0b00000100; const FLG_DICTIONARY_ID: u8 = 0b00000001; const BD_RESERVED_MASK: u8 = !BD_BLOCK_SIZE_MASK; const BD_BLOCK_SIZE_MASK: u8 = 0b01110000; const BD_BLOCK_SIZE_MASK_RSHIFT: u8 = 4; const BLOCK_UNCOMPRESSED_SIZE_BIT: u32 = 0x80000000; const LZ4F_MAGIC_NUMBER: u32 = 0x184D2204; pub(crate) const LZ4F_LEGACY_MAGIC_NUMBER: u32 = 0x184C2102; const LZ4F_SKIPPABLE_MAGIC_RANGE: std::ops::RangeInclusive = 0x184D2A50..=0x184D2A5F; pub(crate) const MAGIC_NUMBER_SIZE: usize = 4; pub(crate) const MIN_FRAME_INFO_SIZE: usize = 7; pub(crate) const MAX_FRAME_INFO_SIZE: usize = 19; pub(crate) const BLOCK_INFO_SIZE: usize = 4; #[derive(Clone, Copy, PartialEq, Debug)] /// Different predefines blocksizes to choose when compressing data. #[derive(Default)] pub enum BlockSize { /// Will detect optimal frame size based on the size of the first write call #[default] Auto = 0, /// The default block size. Max64KB = 4, /// 256KB block size. Max256KB = 5, /// 1MB block size. Max1MB = 6, /// 4MB block size. Max4MB = 7, /// 8MB block size. Max8MB = 8, } impl BlockSize { /// Try to find optimal size based on passed buffer length. pub(crate) fn from_buf_length(buf_len: usize) -> Self { let mut blocksize = BlockSize::Max4MB; for candidate in [BlockSize::Max256KB, BlockSize::Max64KB] { if buf_len > candidate.get_size() { return blocksize; } blocksize = candidate; } BlockSize::Max64KB } pub(crate) fn get_size(&self) -> usize { match self { BlockSize::Auto => unreachable!(), BlockSize::Max64KB => 64 * 1024, BlockSize::Max256KB => 256 * 1024, BlockSize::Max1MB => 1024 * 1024, BlockSize::Max4MB => 4 * 1024 * 1024, BlockSize::Max8MB => 8 * 1024 * 1024, } } } #[derive(Clone, Copy, PartialEq, Debug)] /// The two `BlockMode` operations that can be set on (`FrameInfo`)[FrameInfo] #[derive(Default)] pub enum BlockMode { /// Every block is compressed independently. The default. #[default] Independent, /// Blocks can reference data from previous blocks. /// /// Effective when the stream contains small blocks. Linked, } // From: https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md // // General Structure of LZ4 Frame format // ------------------------------------- // // | MagicNb | F. Descriptor | Block | (...) | EndMark | C. Checksum | // |:-------:|:-------------:| ----- | ----- | ------- | ----------- | // | 4 bytes | 3-15 bytes | | | 4 bytes | 0-4 bytes | // // Frame Descriptor // ---------------- // // | FLG | BD | (Content Size) | (Dictionary ID) | HC | // | ------- | ------- |:--------------:|:---------------:| ------- | // | 1 byte | 1 byte | 0 - 8 bytes | 0 - 4 bytes | 1 byte | // // __FLG byte__ // // | BitNb | 7-6 | 5 | 4 | 3 | 2 | 1 | 0 | // | ------- |-------|-------|----------|------|----------|----------|------| // |FieldName|Version|B.Indep|B.Checksum|C.Size|C.Checksum|*Reserved*|DictID| // // __BD byte__ // // | BitNb | 7 | 6-5-4 | 3-2-1-0 | // | ------- | -------- | ------------- | -------- | // |FieldName|*Reserved*| Block MaxSize |*Reserved*| // // Data Blocks // ----------- // // | Block Size | data | (Block Checksum) | // |:----------:| ------ |:----------------:| // | 4 bytes | | 0 - 4 bytes | // #[derive(Debug, Default, Clone)] /// The metadata for de/compressing with lz4 frame format. pub struct FrameInfo { /// If set, includes the total uncompressed size of data in the frame. pub content_size: Option, /// The identifier for the dictionary that must be used to correctly decode data. /// The compressor and the decompressor must use exactly the same dictionary. /// /// Note that this is currently unsupported and for this reason it's not pub. pub(crate) dict_id: Option, /// The maximum uncompressed size of each data block. pub block_size: BlockSize, /// The block mode. pub block_mode: BlockMode, /// If set, includes a checksum for each data block in the frame. pub block_checksums: bool, /// If set, includes a content checksum to verify that the full frame contents have been /// decoded correctly. pub content_checksum: bool, /// If set, use the legacy frame format pub legacy_frame: bool, } impl FrameInfo { /// Create a new `FrameInfo`. pub fn new() -> Self { Self::default() } /// Whether to include the total uncompressed size of data in the frame. pub fn content_size(mut self, content_size: Option) -> Self { self.content_size = content_size; self } /// The maximum uncompressed size of each data block. pub fn block_size(mut self, block_size: BlockSize) -> Self { self.block_size = block_size; self } /// The block mode. pub fn block_mode(mut self, block_mode: BlockMode) -> Self { self.block_mode = block_mode; self } /// If set, includes a checksum for each data block in the frame. pub fn block_checksums(mut self, block_checksums: bool) -> Self { self.block_checksums = block_checksums; self } /// If set, includes a content checksum to verify that the full frame contents have been /// decoded correctly. pub fn content_checksum(mut self, content_checksum: bool) -> Self { self.content_checksum = content_checksum; self } /// If set, use the legacy frame format. pub fn legacy_frame(mut self, legacy_frame: bool) -> Self { self.legacy_frame = legacy_frame; self } pub(crate) fn read_size(input: &[u8]) -> Result { let mut required = MIN_FRAME_INFO_SIZE; let magic_num = u32::from_le_bytes(input[0..4].try_into().unwrap()); if magic_num == LZ4F_LEGACY_MAGIC_NUMBER { return Ok(MAGIC_NUMBER_SIZE); } if input.len() < required { return Ok(required); } if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) { return Ok(8); } if magic_num != LZ4F_MAGIC_NUMBER { return Err(Error::WrongMagicNumber); } if input[4] & FLG_CONTENT_SIZE != 0 { required += 8; } if input[4] & FLG_DICTIONARY_ID != 0 { required += 4 } Ok(required) } pub(crate) fn write_size(&self) -> usize { let mut required = MIN_FRAME_INFO_SIZE; if self.content_size.is_some() { required += 8; } if self.dict_id.is_some() { required += 4; } required } pub(crate) fn write(&self, output: &mut [u8]) -> Result { let write_size = self.write_size(); if output.len() < write_size { return Err(Error::IoError(io::ErrorKind::UnexpectedEof.into())); } let mut buffer = [0u8; MAX_FRAME_INFO_SIZE]; assert!(write_size <= buffer.len()); buffer[0..4].copy_from_slice(&LZ4F_MAGIC_NUMBER.to_le_bytes()); buffer[4] = FLG_SUPPORTED_VERSION_BITS; if self.block_checksums { buffer[4] |= FLG_BLOCK_CHECKSUMS; } if self.content_checksum { buffer[4] |= FLG_CONTENT_CHECKSUM; } if self.block_mode == BlockMode::Independent { buffer[4] |= FLG_INDEPENDENT_BLOCKS; } buffer[5] = (self.block_size as u8) << BD_BLOCK_SIZE_MASK_RSHIFT; // Optional section let mut offset = 6; if let Some(size) = self.content_size { buffer[4] |= FLG_CONTENT_SIZE; buffer[offset..offset + 8].copy_from_slice(&size.to_le_bytes()); offset += 8; } if let Some(dict_id) = self.dict_id { buffer[4] |= FLG_DICTIONARY_ID; buffer[offset..offset + 4].copy_from_slice(&dict_id.to_le_bytes()); offset += 4; } // Header checksum let mut hasher = XxHash32::with_seed(0); hasher.write(&buffer[4..offset]); let header_checksum = (hasher.finish() >> 8) as u8; buffer[offset] = header_checksum; offset += 1; debug_assert_eq!(offset, write_size); output[..write_size].copy_from_slice(&buffer[..write_size]); Ok(write_size) } pub(crate) fn read(mut input: &[u8]) -> Result { let original_input = input; // 4 byte Magic let magic_num = { let mut buffer = [0u8; 4]; input.read_exact(&mut buffer)?; u32::from_le_bytes(buffer) }; if magic_num == LZ4F_LEGACY_MAGIC_NUMBER { return Ok(FrameInfo { block_size: BlockSize::Max8MB, legacy_frame: true, ..FrameInfo::default() }); } if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) { let mut buffer = [0u8; 4]; input.read_exact(&mut buffer)?; let user_data_len = u32::from_le_bytes(buffer); return Err(Error::SkippableFrame(user_data_len)); } if magic_num != LZ4F_MAGIC_NUMBER { return Err(Error::WrongMagicNumber); } // fixed size section let [flg_byte, bd_byte] = { let mut buffer = [0u8, 0]; input.read_exact(&mut buffer)?; buffer }; if flg_byte & FLG_VERSION_MASK != FLG_SUPPORTED_VERSION_BITS { // version is always 01 return Err(Error::UnsupportedVersion(flg_byte & FLG_VERSION_MASK)); } if flg_byte & FLG_RESERVED_MASK != 0 || bd_byte & BD_RESERVED_MASK != 0 { return Err(Error::ReservedBitsSet); } let block_mode = if flg_byte & FLG_INDEPENDENT_BLOCKS != 0 { BlockMode::Independent } else { BlockMode::Linked }; let content_checksum = flg_byte & FLG_CONTENT_CHECKSUM != 0; let block_checksums = flg_byte & FLG_BLOCK_CHECKSUMS != 0; let block_size = match (bd_byte & BD_BLOCK_SIZE_MASK) >> BD_BLOCK_SIZE_MASK_RSHIFT { i @ 0..=3 => return Err(Error::UnsupportedBlocksize(i)), 4 => BlockSize::Max64KB, 5 => BlockSize::Max256KB, 6 => BlockSize::Max1MB, 7 => BlockSize::Max4MB, _ => unreachable!(), }; // var len section let mut content_size = None; if flg_byte & FLG_CONTENT_SIZE != 0 { let mut buffer = [0u8; 8]; input.read_exact(&mut buffer).unwrap(); content_size = Some(u64::from_le_bytes(buffer)); } let mut dict_id = None; if flg_byte & FLG_DICTIONARY_ID != 0 { let mut buffer = [0u8; 4]; input.read_exact(&mut buffer)?; dict_id = Some(u32::from_le_bytes(buffer)); } // 1 byte header checksum let expected_checksum = { let mut buffer = [0u8; 1]; input.read_exact(&mut buffer)?; buffer[0] }; let mut hasher = XxHash32::with_seed(0); hasher.write(&original_input[4..original_input.len() - input.len() - 1]); let header_hash = (hasher.finish() >> 8) as u8; if header_hash != expected_checksum { return Err(Error::HeaderChecksumError); } Ok(FrameInfo { content_size, dict_id, block_size, block_mode, block_checksums, content_checksum, legacy_frame: false, }) } } #[derive(Debug)] pub(crate) enum BlockInfo { Compressed(u32), Uncompressed(u32), EndMark, } impl BlockInfo { pub(crate) fn read(mut input: &[u8]) -> Result { let mut size_buffer = [0u8; 4]; input.read_exact(&mut size_buffer)?; let size = u32::from_le_bytes(size_buffer); if size == 0 { Ok(BlockInfo::EndMark) } else if size & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 { Ok(BlockInfo::Uncompressed(size & !BLOCK_UNCOMPRESSED_SIZE_BIT)) } else { Ok(BlockInfo::Compressed(size)) } } pub(crate) fn write(&self, mut output: &mut [u8]) -> Result { let value = match self { BlockInfo::Compressed(len) if *len == 0 => return Err(Error::InvalidBlockInfo), BlockInfo::Compressed(len) | BlockInfo::Uncompressed(len) if *len & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 => { return Err(Error::InvalidBlockInfo) } BlockInfo::Compressed(len) => *len, BlockInfo::Uncompressed(len) => *len | BLOCK_UNCOMPRESSED_SIZE_BIT, BlockInfo::EndMark => 0, }; output.write_all(&value.to_le_bytes())?; Ok(4) } } lz4_flex-0.11.3/src/frame/mod.rs000064400000000000000000000067671046102023000144630ustar 00000000000000//! LZ4 Frame Format //! //! As defined in //! //! # Example: compress data on `stdin` with frame format //! This program reads data from `stdin`, compresses it and emits it to `stdout`. //! This example can be found in `examples/compress.rs`: //! ```no_run //! use std::io; //! let stdin = io::stdin(); //! let stdout = io::stdout(); //! let mut rdr = stdin.lock(); //! // Wrap the stdout writer in a LZ4 Frame writer. //! let mut wtr = lz4_flex::frame::FrameEncoder::new(stdout.lock()); //! io::copy(&mut rdr, &mut wtr).expect("I/O operation failed"); //! wtr.finish().unwrap(); //! ``` //! use std::{fmt, io}; #[cfg_attr(feature = "safe-encode", forbid(unsafe_code))] pub(crate) mod compress; #[cfg_attr(feature = "safe-decode", forbid(unsafe_code))] pub(crate) mod decompress; pub(crate) mod header; pub use compress::{AutoFinishEncoder, FrameEncoder}; pub use decompress::FrameDecoder; pub use header::{BlockMode, BlockSize, FrameInfo}; #[derive(Debug)] #[non_exhaustive] /// Errors that can occur when de/compressing lz4. pub enum Error { /// Compression error. CompressionError(crate::block::CompressError), /// Decompression error. DecompressionError(crate::block::DecompressError), /// An io::Error was encountered. IoError(io::Error), /// Unsupported block size. UnsupportedBlocksize(u8), /// Unsupported frame version. UnsupportedVersion(u8), /// Wrong magic number for the LZ4 frame format. WrongMagicNumber, /// Reserved bits set. ReservedBitsSet, /// Block header is malformed. InvalidBlockInfo, /// Read a block larger than specified in the Frame header. BlockTooBig, /// The Frame header checksum doesn't match. HeaderChecksumError, /// The block checksum doesn't match. BlockChecksumError, /// The content checksum doesn't match. ContentChecksumError, /// Read an skippable frame. /// The caller may read the specified amount of bytes from the underlying io::Read. SkippableFrame(u32), /// External dictionaries are not supported. DictionaryNotSupported, /// Content length differs. ContentLengthError { /// Expected content length. expected: u64, /// Actual content lenght. actual: u64, }, } impl From for io::Error { fn from(e: Error) -> Self { match e { Error::IoError(e) => e, Error::CompressionError(_) | Error::DecompressionError(_) | Error::SkippableFrame(_) | Error::DictionaryNotSupported => io::Error::new(io::ErrorKind::Other, e), Error::WrongMagicNumber | Error::UnsupportedBlocksize(..) | Error::UnsupportedVersion(..) | Error::ReservedBitsSet | Error::InvalidBlockInfo | Error::BlockTooBig | Error::HeaderChecksumError | Error::ContentChecksumError | Error::BlockChecksumError | Error::ContentLengthError { .. } => io::Error::new(io::ErrorKind::InvalidData, e), } } } impl From for Error { fn from(e: io::Error) -> Self { match e.get_ref().map(|e| e.downcast_ref::()) { Some(_) => *e.into_inner().unwrap().downcast::().unwrap(), None => Error::IoError(e), } } } impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> std::fmt::Result { write!(f, "{self:?}") } } impl std::error::Error for Error {} lz4_flex-0.11.3/src/lib.rs000064400000000000000000000100511046102023000133350ustar 00000000000000//! Pure Rust, high performance implementation of LZ4 compression. //! //! A detailed explanation of the algorithm can be found [here](http://ticki.github.io/blog/how-lz4-works/). //! //! # Overview //! //! This crate provides two ways to use lz4. The first way is through the //! [`frame::FrameDecoder`](frame/struct.FrameDecoder.html) //! and //! [`frame::FrameEncoder`](frame/struct.FrameEncoder.html) //! types, which implement the `std::io::Read` and `std::io::Write` traits with the //! lz4 frame format. Unless you have a specific reason to the contrary, you //! should only use the lz4 frame format. Specifically, the lz4 frame format //! permits streaming compression or decompression. //! //! The second way is through the //! [`decompress_size_prepended`](fn.decompress_size_prepended.html) //! and //! [`compress_prepend_size`](fn.compress_prepend_size.html) //! functions. These functions provide access to the lz4 block format, and //! don't support a streaming interface directly. You should only use these types //! if you know you specifically need the lz4 block format. //! //! # Example: compress data on `stdin` with frame format //! This program reads data from `stdin`, compresses it and emits it to `stdout`. //! This example can be found in `examples/compress.rs`: //! ```no_run //! use std::io; //! let stdin = io::stdin(); //! let stdout = io::stdout(); //! let mut rdr = stdin.lock(); //! // Wrap the stdout writer in a LZ4 Frame writer. //! let mut wtr = lz4_flex::frame::FrameEncoder::new(stdout.lock()); //! io::copy(&mut rdr, &mut wtr).expect("I/O operation failed"); //! wtr.finish().unwrap(); //! ``` //! # Example: decompress data on `stdin` with frame format //! This program reads data from `stdin`, decompresses it and emits it to `stdout`. //! This example can be found in `examples/decompress.rs`: //! ```no_run //! use std::io; //! let stdin = io::stdin(); //! let stdout = io::stdout(); //! // Wrap the stdin reader in a LZ4 FrameDecoder. //! let mut rdr = lz4_flex::frame::FrameDecoder::new(stdin.lock()); //! let mut wtr = stdout.lock(); //! io::copy(&mut rdr, &mut wtr).expect("I/O operation failed"); //! ``` //! //! # Example: block format roundtrip //! ``` //! use lz4_flex::block::{compress_prepend_size, decompress_size_prepended}; //! let input: &[u8] = b"Hello people, what's up?"; //! let compressed = compress_prepend_size(input); //! let uncompressed = decompress_size_prepended(&compressed).unwrap(); //! assert_eq!(input, uncompressed); //! ``` //! //! ## Feature Flags //! //! - `safe-encode` uses only safe rust for encode. _enabled by default_ //! - `safe-decode` uses only safe rust for encode. _enabled by default_ //! - `frame` support for LZ4 frame format. _implies `std`, enabled by default_ //! - `std` enables dependency on the standard library. _enabled by default_ //! //! For maximum performance use `no-default-features`. //! //! For no_std support only the [`block format`](block/index.html) is supported. //! //! #![deny(warnings)] #![deny(missing_docs)] #![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(docsrs, feature(doc_cfg))] #![cfg_attr(nightly, feature(optimize_attribute))] #[cfg_attr(test, macro_use)] extern crate alloc; #[cfg(test)] #[macro_use] extern crate more_asserts; pub mod block; #[cfg(feature = "frame")] #[cfg_attr(docsrs, doc(cfg(feature = "frame")))] pub mod frame; #[allow(dead_code)] mod fastcpy; #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] #[allow(dead_code)] mod fastcpy_unsafe; #[deprecated( since = "0.11.0", note = "This re-export is deprecated as it can be confused with the frame API and is not suitable for very large data, use block:: instead" )] pub use block::{compress, compress_into, compress_prepend_size}; #[deprecated( since = "0.11.0", note = "This re-export is deprecated as it can be confused with the frame API and is not suitable for very large data, use block:: instead" )] pub use block::{decompress, decompress_into, decompress_size_prepended}; #[cfg_attr( all(feature = "safe-encode", feature = "safe-decode"), forbid(unsafe_code) )] pub(crate) mod sink; lz4_flex-0.11.3/src/sink.rs000064400000000000000000000235451046102023000135470ustar 00000000000000#[allow(unused_imports)] use alloc::vec::Vec; use crate::fastcpy::slice_copy; /// Returns a Sink implementation appropriate for outputing up to `required_capacity` /// bytes at `vec[offset..offset+required_capacity]`. /// It can be either a `SliceSink` (pre-filling the vec with zeroes if necessary) /// when the `safe-decode` feature is enabled, or `VecSink` otherwise. /// The argument `pos` defines the initial output position in the Sink. #[inline] #[cfg(feature = "frame")] pub fn vec_sink_for_compression( vec: &mut Vec, offset: usize, pos: usize, required_capacity: usize, ) -> SliceSink { { vec.resize(offset + required_capacity, 0); SliceSink::new(&mut vec[offset..], pos) } } /// Returns a Sink implementation appropriate for outputing up to `required_capacity` /// bytes at `vec[offset..offset+required_capacity]`. /// It can be either a `SliceSink` (pre-filling the vec with zeroes if necessary) /// when the `safe-decode` feature is enabled, or `VecSink` otherwise. /// The argument `pos` defines the initial output position in the Sink. #[cfg(feature = "frame")] #[inline] pub fn vec_sink_for_decompression( vec: &mut Vec, offset: usize, pos: usize, required_capacity: usize, ) -> SliceSink { { vec.resize(offset + required_capacity, 0); SliceSink::new(&mut vec[offset..], pos) } } pub trait Sink { /// Returns a raw ptr to the first unfilled byte of the Sink. Analogous to `[pos..].as_ptr()`. #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] unsafe fn pos_mut_ptr(&mut self) -> *mut u8; /// read byte at position #[allow(dead_code)] fn byte_at(&mut self, pos: usize) -> u8; /// Pushes a byte to the end of the Sink. #[cfg(feature = "safe-encode")] fn push(&mut self, byte: u8); #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] unsafe fn base_mut_ptr(&mut self) -> *mut u8; fn pos(&self) -> usize; fn capacity(&self) -> usize; #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] unsafe fn set_pos(&mut self, new_pos: usize); #[cfg(feature = "safe-decode")] fn extend_with_fill(&mut self, byte: u8, len: usize); /// Extends the Sink with `data`. fn extend_from_slice(&mut self, data: &[u8]); fn extend_from_slice_wild(&mut self, data: &[u8], copy_len: usize); /// Copies `len` bytes starting from `start` to the end of the Sink. /// # Panics /// Panics if `start` >= `pos`. #[cfg(feature = "safe-decode")] fn extend_from_within(&mut self, start: usize, wild_len: usize, copy_len: usize); #[cfg(feature = "safe-decode")] fn extend_from_within_overlapping(&mut self, start: usize, num_bytes: usize); } /// SliceSink is used as target to de/compress data into a preallocated and possibly uninitialized /// `&[u8]` /// space. /// /// # Handling of Capacity /// Extend methods will panic if there's insufficient capacity left in the Sink. /// /// # Invariants /// - Bytes `[..pos()]` are always initialized. pub struct SliceSink<'a> { /// The working slice, which may contain uninitialized bytes output: &'a mut [u8], /// Number of bytes in start of `output` guaranteed to be initialized pos: usize, } impl<'a> SliceSink<'a> { /// Creates a `Sink` backed by the given byte slice. /// `pos` defines the initial output position in the Sink. /// # Panics /// Panics if `pos` is out of bounds. #[inline] pub fn new(output: &'a mut [u8], pos: usize) -> Self { // SAFETY: Caller guarantees that all elements of `output[..pos]` are initialized. let _ = &mut output[..pos]; // bounds check pos SliceSink { output, pos } } } impl<'a> Sink for SliceSink<'a> { /// Returns a raw ptr to the first unfilled byte of the Sink. Analogous to `[pos..].as_ptr()`. #[inline] #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] unsafe fn pos_mut_ptr(&mut self) -> *mut u8 { self.base_mut_ptr().add(self.pos()) as *mut u8 } /// Pushes a byte to the end of the Sink. #[inline] fn byte_at(&mut self, pos: usize) -> u8 { self.output[pos] } /// Pushes a byte to the end of the Sink. #[inline] #[cfg(feature = "safe-encode")] fn push(&mut self, byte: u8) { self.output[self.pos] = byte; self.pos += 1; } #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] unsafe fn base_mut_ptr(&mut self) -> *mut u8 { self.output.as_mut_ptr() } #[inline] fn pos(&self) -> usize { self.pos } #[inline] fn capacity(&self) -> usize { self.output.len() } #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] #[inline] unsafe fn set_pos(&mut self, new_pos: usize) { debug_assert!(new_pos <= self.capacity()); self.pos = new_pos; } #[inline] #[cfg(feature = "safe-decode")] fn extend_with_fill(&mut self, byte: u8, len: usize) { self.output[self.pos..self.pos + len].fill(byte); self.pos += len; } /// Extends the Sink with `data`. #[inline] fn extend_from_slice(&mut self, data: &[u8]) { self.extend_from_slice_wild(data, data.len()) } #[inline] fn extend_from_slice_wild(&mut self, data: &[u8], copy_len: usize) { assert!(copy_len <= data.len()); slice_copy(data, &mut self.output[self.pos..(self.pos) + data.len()]); self.pos += copy_len; } /// Copies `len` bytes starting from `start` to the end of the Sink. /// # Panics /// Panics if `start` >= `pos`. #[inline] #[cfg(feature = "safe-decode")] fn extend_from_within(&mut self, start: usize, wild_len: usize, copy_len: usize) { self.output.copy_within(start..start + wild_len, self.pos); self.pos += copy_len; } #[inline] #[cfg(feature = "safe-decode")] #[cfg_attr(nightly, optimize(size))] // to avoid loop unrolling fn extend_from_within_overlapping(&mut self, start: usize, num_bytes: usize) { let offset = self.pos - start; for i in start + offset..start + offset + num_bytes { self.output[i] = self.output[i - offset]; } self.pos += num_bytes; } } /// PtrSink is used as target to de/compress data into a preallocated and possibly uninitialized /// `&[u8]` /// space. /// /// #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] pub struct PtrSink { /// The working slice, which may contain uninitialized bytes output: *mut u8, /// Number of bytes in start of `output` guaranteed to be initialized pos: usize, /// Number of bytes in output available cap: usize, } #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] impl PtrSink { /// Creates a `Sink` backed by the given byte slice. /// `pos` defines the initial output position in the Sink. /// # Panics /// Panics if `pos` is out of bounds. #[inline] pub fn from_vec(output: &mut Vec, pos: usize) -> Self { // SAFETY: Bytes behind pointer may be uninitialized. Self { output: output.as_mut_ptr(), pos, cap: output.capacity(), } } } #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] impl Sink for PtrSink { /// Returns a raw ptr to the first unfilled byte of the Sink. Analogous to `[pos..].as_ptr()`. #[inline] #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] unsafe fn pos_mut_ptr(&mut self) -> *mut u8 { self.base_mut_ptr().add(self.pos()) as *mut u8 } /// Pushes a byte to the end of the Sink. #[inline] fn byte_at(&mut self, pos: usize) -> u8 { unsafe { self.output.add(pos).read() } } /// Pushes a byte to the end of the Sink. #[inline] #[cfg(feature = "safe-encode")] fn push(&mut self, byte: u8) { unsafe { self.pos_mut_ptr().write(byte); } self.pos += 1; } #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] unsafe fn base_mut_ptr(&mut self) -> *mut u8 { self.output } #[inline] fn pos(&self) -> usize { self.pos } #[inline] fn capacity(&self) -> usize { self.cap } #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] #[inline] unsafe fn set_pos(&mut self, new_pos: usize) { debug_assert!(new_pos <= self.capacity()); self.pos = new_pos; } #[inline] #[cfg(feature = "safe-decode")] fn extend_with_fill(&mut self, _byte: u8, _len: usize) { unreachable!(); } /// Extends the Sink with `data`. #[inline] fn extend_from_slice(&mut self, data: &[u8]) { self.extend_from_slice_wild(data, data.len()) } #[inline] fn extend_from_slice_wild(&mut self, data: &[u8], copy_len: usize) { assert!(copy_len <= data.len()); unsafe { core::ptr::copy_nonoverlapping(data.as_ptr(), self.pos_mut_ptr(), copy_len); } self.pos += copy_len; } /// Copies `len` bytes starting from `start` to the end of the Sink. /// # Panics /// Panics if `start` >= `pos`. #[inline] #[cfg(feature = "safe-decode")] fn extend_from_within(&mut self, _start: usize, _wild_len: usize, _copy_len: usize) { unreachable!(); } #[inline] #[cfg(feature = "safe-decode")] fn extend_from_within_overlapping(&mut self, _start: usize, _num_bytes: usize) { unreachable!(); } } #[cfg(test)] mod tests { #[test] #[cfg(any(feature = "safe-encode", feature = "safe-decode"))] fn test_sink_slice() { use crate::sink::Sink; use crate::sink::SliceSink; use alloc::vec::Vec; let mut data = Vec::new(); data.resize(5, 0); let sink = SliceSink::new(&mut data, 1); assert_eq!(sink.pos(), 1); assert_eq!(sink.capacity(), 5); } }