lz4_flex-0.11.1/.cargo_vcs_info.json0000644000000001360000000000100126430ustar { "git": { "sha1": "244275aaa33fdaaac6a9e33615348b5b6b7fd77d" }, "path_in_vcs": "" }lz4_flex-0.11.1/Cargo.lock0000644000000306130000000000100106210ustar # This file is automatically @generated by Cargo. # It is not intended for manual editing. version = 3 [[package]] name = "autocfg" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" [[package]] name = "bit-set" version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" dependencies = [ "bit-vec", ] [[package]] name = "bit-vec" version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" [[package]] name = "bitflags" version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "byteorder" version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fc10e8cc6b2580fda3f36eb6dc5316657f812a3df879a44a66fc9f0fdbc4855" [[package]] name = "byteorder" version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] name = "cc" version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" dependencies = [ "jobserver", ] [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "errno" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4bcfec3a70f97c962c307b2d2c56e358cf1d00b558d74262b5f929ee8cc7e73a" dependencies = [ "errno-dragonfly", "libc", "windows-sys", ] [[package]] name = "errno-dragonfly" version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" dependencies = [ "cc", "libc", ] [[package]] name = "fastrand" version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" dependencies = [ "instant", ] [[package]] name = "fnv" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" [[package]] name = "getrandom" version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" dependencies = [ "cfg-if", "libc", "wasi", ] [[package]] name = "hermit-abi" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" [[package]] name = "instant" version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" dependencies = [ "cfg-if", ] [[package]] name = "io-lifetimes" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" dependencies = [ "hermit-abi", "libc", "windows-sys", ] [[package]] name = "itoa" version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" [[package]] name = "jobserver" version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2" dependencies = [ "libc", ] [[package]] name = "lazy_static" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" version = "0.2.146" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f92be4933c13fd498862a9e02a3055f8a8d9c039ce33db97306fd5a6caa7f29b" [[package]] name = "libm" version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" [[package]] name = "linux-raw-sys" version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" [[package]] name = "lz4-compress" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0f966533a922a9bba9e95e594c1fdb3b9bf5fdcdb11e37e51ad84cd76e468b91" dependencies = [ "byteorder 0.5.3", "quick-error", ] [[package]] name = "lz4_flex" version = "0.11.1" dependencies = [ "lz4-compress", "lzzzz", "more-asserts", "proptest", "serde_json", "snap", "twox-hash", ] [[package]] name = "lzzzz" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8014d1362004776e6a91e4c15a3aa7830d1b6650a075b51a9969ebb6d6af13bc" dependencies = [ "cc", ] [[package]] name = "more-asserts" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fafa6961cabd9c63bcd77a45d7e3b7f3b552b70417831fb0f56db717e72407e" [[package]] name = "num-traits" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" dependencies = [ "autocfg", "libm", ] [[package]] name = "ppv-lite86" version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proptest" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4e35c06b98bf36aba164cc17cb25f7e232f5c4aeea73baa14b8a9f0d92dbfa65" dependencies = [ "bit-set", "bitflags", "byteorder 1.4.3", "lazy_static", "num-traits", "rand", "rand_chacha", "rand_xorshift", "regex-syntax", "rusty-fork", "tempfile", "unarray", ] [[package]] name = "quick-error" version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] name = "rand" version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", "rand_chacha", "rand_core", ] [[package]] name = "rand_chacha" version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", "rand_core", ] [[package]] name = "rand_core" version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ "getrandom", ] [[package]] name = "rand_xorshift" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" dependencies = [ "rand_core", ] [[package]] name = "redox_syscall" version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" dependencies = [ "bitflags", ] [[package]] name = "regex-syntax" version = "0.6.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "rustix" version = "0.37.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b96e891d04aa506a6d1f318d2771bcb1c7dfda84e126660ace067c9b474bb2c0" dependencies = [ "bitflags", "errno", "io-lifetimes", "libc", "linux-raw-sys", "windows-sys", ] [[package]] name = "rusty-fork" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" dependencies = [ "fnv", "quick-error", "tempfile", "wait-timeout", ] [[package]] name = "ryu" version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f91339c0467de62360649f8d3e185ca8de4224ff281f66000de5eb2a77a79041" [[package]] name = "serde" version = "1.0.164" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e8c8cf938e98f769bc164923b06dce91cea1751522f46f8466461af04c9027d" [[package]] name = "serde_json" version = "1.0.97" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bdf3bf93142acad5821c99197022e170842cdbc1c30482b98750c688c640842a" dependencies = [ "itoa", "ryu", "serde", ] [[package]] name = "snap" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e9f0ab6ef7eb7353d9119c170a436d1bf248eea575ac42d19d12f4e34130831" [[package]] name = "static_assertions" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "tempfile" version = "3.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31c0432476357e58790aaa47a8efb0c5138f137343f3b5f23bd36a27e3b0a6d6" dependencies = [ "autocfg", "cfg-if", "fastrand", "redox_syscall", "rustix", "windows-sys", ] [[package]] name = "twox-hash" version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ "cfg-if", "static_assertions", ] [[package]] name = "unarray" version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" [[package]] name = "wait-timeout" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" dependencies = [ "libc", ] [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "windows-sys" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" dependencies = [ "windows-targets", ] [[package]] name = "windows-targets" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b1eb6f0cd7c80c79759c929114ef071b87354ce476d9d94271031c0497adfd5" dependencies = [ "windows_aarch64_gnullvm", "windows_aarch64_msvc", "windows_i686_gnu", "windows_i686_msvc", "windows_x86_64_gnu", "windows_x86_64_gnullvm", "windows_x86_64_msvc", ] [[package]] name = "windows_aarch64_gnullvm" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91ae572e1b79dba883e0d315474df7305d12f569b400fcf90581b06062f7e1bc" [[package]] name = "windows_aarch64_msvc" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2ef27e0d7bdfcfc7b868b317c1d32c641a6fe4629c171b8928c7b08d98d7cf3" [[package]] name = "windows_i686_gnu" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "622a1962a7db830d6fd0a69683c80a18fda201879f0f447f065a3b7467daa241" [[package]] name = "windows_i686_msvc" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4542c6e364ce21bf45d69fdd2a8e455fa38d316158cfd43b3ac1c5b1b19f8e00" [[package]] name = "windows_x86_64_gnu" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca2b8a661f7628cbd23440e50b05d705db3686f894fc9580820623656af974b1" [[package]] name = "windows_x86_64_gnullvm" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7896dbc1f41e08872e9d5e8f8baa8fdd2677f29468c4e156210174edc7f7b953" [[package]] name = "windows_x86_64_msvc" version = "0.48.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a" lz4_flex-0.11.1/Cargo.toml0000644000000036210000000000100106430ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "lz4_flex" version = "0.11.1" authors = [ "Pascal Seitz ", "Arthur Silva ", "ticki ", ] include = [ "src/*.rs", "src/frame/**/*", "src/block/**/*", "README.md", ] description = "Fastest LZ4 implementation in Rust, no unsafe by default." homepage = "https://github.com/pseitz/lz4_flex" readme = "README.md" keywords = [ "compression", "lz4", "compress", "decompression", "decompress", ] license = "MIT" repository = "https://github.com/pseitz/lz4_flex" [package.metadata.docs.rs] all-features = true rustdoc-args = [ "--cfg", "docsrs", ] [profile.bench] opt-level = 3 lto = true codegen-units = 1 [profile.release] opt-level = 3 codegen-units = 1 panic = "unwind" [[bench]] name = "crit_bench" path = "benches/crit_bench.rs" harness = false [dependencies.twox-hash] version = "1.6.3" optional = true default-features = false [dev-dependencies.lz4-compress] version = "0.1.1" [dev-dependencies.lzzzz] version = "1.0.4" [dev-dependencies.more-asserts] version = "0.3.1" [dev-dependencies.proptest] version = "1.0.0" [dev-dependencies.serde_json] version = "1.0.91" [dev-dependencies.snap] version = "1.1.0" [features] default = [ "std", "safe-encode", "safe-decode", "frame", ] frame = [ "std", "dep:twox-hash", ] nightly = [] safe-decode = [] safe-encode = [] std = [] lz4_flex-0.11.1/Cargo.toml.orig000064400000000000000000000042631046102023000143270ustar 00000000000000[package] authors = ["Pascal Seitz ", "Arthur Silva ", "ticki "] description = "Fastest LZ4 implementation in Rust, no unsafe by default." edition = "2021" keywords = ["compression", "lz4", "compress", "decompression", "decompress"] name = "lz4_flex" homepage = "https://github.com/pseitz/lz4_flex" repository = "https://github.com/pseitz/lz4_flex" readme = "README.md" license = "MIT" version = "0.11.1" include = ["src/*.rs", "src/frame/**/*", "src/block/**/*", "README.md"] [package.metadata.docs.rs] all-features = true rustdoc-args = ["--cfg", "docsrs"] [[bench]] harness = false name = "crit_bench" path = "benches/crit_bench.rs" [dev-dependencies] criterion = { git = "https://github.com/PSeitz/criterion.rs/", rev = "cf60ffc"} lzzzz = "1.0.4" lz4-compress = "0.1.1" more-asserts = "0.3.1" snap = "1.1.0" serde_json = "1.0.91" proptest = "1.0.0" [dev-dependencies.lz-fear] git = "https://github.com/main--/rust-lz-fear" #Uncomment to make lz4_flex master available as lz4_flex_master #[dev-dependencies.lz4_flex_master] #rev= "a122673" # v10 #git = "https://github.com/PSeitz/lz4_flex" #package = "lz4_flex" #default-features=false #features = ["std", "safe-encode", "safe-decode", "frame"] [features] default = ["std", "safe-encode", "safe-decode", "frame"] safe-decode = [] safe-encode = [] #unchecked-decode = [] # Removes some checks for additional performance. Only enable on trusted input! frame = ["std", "dep:twox-hash"] std = [] # use nightly compiler features nightly = [] [dependencies] twox-hash = { version = "1.6.3", default-features = false, optional = true } [profile.bench] codegen-units = 1 lto = true opt-level = 3 [profile.release] codegen-units = 1 #debug = true opt-level = 3 panic = "unwind" # [[bench]] # harness = false # name = "quickbench" # path = "benches/quickbench.rs" # [[bench]] # harness = false # name = "bench" # path = "benches/bench.rs" # [[bin]] # name = "decompress_with_stats" # path = "src/test_bins/decompress_with_stats.rs" # [[bin]] # name = "profile_decomp" # path = "src/test_bins/profile_decomp.rs" # [[bin]] # name = "profile_comp" # path = "src/test_bins/profile_comp.rs" lz4_flex-0.11.1/README.md000064400000000000000000000121111046102023000127060ustar 00000000000000![Rust](https://github.com/PSeitz/lz4_flex/workflows/Rust/badge.svg) [![Docs](https://docs.rs/lz4_flex/badge.svg)](https://docs.rs/crate/lz4_flex/) [![Crates.io](https://img.shields.io/crates/v/lz4_flex.svg)](https://crates.io/crates/lz4_flex) # lz4_flex ![lz4_flex_logo](https://raw.githubusercontent.com/PSeitz/lz4_flex/master/logo.jpg) Fastest LZ4 implementation in Rust. Originally based on [redox-os' lz4 compression](https://crates.io/crates/lz4-compress), but now a complete rewrite. The results in the table are from a benchmark in this project (66Kb JSON, 10MB dickens) with the block format. AMD Ryzen 7 5900HX, rustc 1.69.0 (84c898d65 2023-04-16), Manjaro, CPU Boost Disabled, CPU Governor: Performance 66Kb JSON | Compressor | Compression | Decompression | Ratio | |----------------------|-------------|---------------|---------------| | lz4_flex unsafe w. unchecked_decode | 1615 MiB/s | 5973 MiB/s | 0.2284 | | lz4_flex unsafe | 1615 MiB/s | 5512 MiB/s | 0.2284 | | lz4_flex safe | 1272 MiB/s | 4540 MiB/s | 0.2284 | | lzzz (lz4 1.9.3) | 1469 MiB/s | 5313 MiB/s | 0.2283 | | lz4_fear | 662 MiB/s | 939 MiB/s | 0.2283 | | snap | 1452 MiB/s | 1649 MiB/s | 0.2242 | 10 Mb dickens | Compressor | Compression | Decompression | Ratio | |----------------------|-------------|---------------|---------------| | lz4_flex unsafe w. unchecked_decode | 347 MiB/s | 3168 MiB/s | 0.6372 | | lz4_flex unsafe | 347 MiB/s | 2734 MiB/s | 0.6372 | | lz4_flex safe | 259 MiB/s | 2338 MiB/s | 0.6372 | | lzzz (lz4 1.9.3) | 324 MiB/s | 2759 MiB/s | 0.6372 | | lz4_fear | 201 MiB/s | 370 MiB/s | 0.6372 | | snap | 286 MiB/s | 679 MiB/s | 0.6276 | ## Features - Very good logo - LZ4 Block format - LZ4 Frame format (thanks @arthurprs) - High performance - 1,5s clean release build time - Feature flags to configure safe/unsafe code usage - no-std support with block format (thanks @coolreader18) - 32-bit support ## Usage: Compression and decompression uses no usafe via the default feature flags "safe-encode" and "safe-decode". If you need more performance you can disable them (e.g. with no-default-features). Safe: ``` lz4_flex = { version = "0.11" } ``` Performance: ``` lz4_flex = { version = "0.11", default-features = false } ``` ### Block Format The block format is only valid for smaller data chunks as as block is de/compressed in memory. For larger data use the frame format, which consists of multiple blocks. ```rust use lz4_flex::block::{compress_prepend_size, decompress_size_prepended}; fn main(){ let input: &[u8] = b"Hello people, what's up?"; let compressed = compress_prepend_size(input); let uncompressed = decompress_size_prepended(&compressed).unwrap(); assert_eq!(input, uncompressed); } ``` ## no_std support no_std support is currently only for the block format, since the frame format uses `std::io::Write`, which is not available in core. ## Benchmarks The benchmark is run with criterion, the test files are in the benches folder. Currently 4 implementations are compared, this one, [lz-fear](https://github.com/main--/rust-lz-fear), the [c version via rust bindings](https://crates.io/crates/lzzzz) and [snappy](https://github.com/burntsushi/rust-snappy). The lz4-flex version is tested with the feature flags safe-decode and safe-encode switched on and off. - lz4_cpp: https://crates.io/crates/lzzzz - lz-fear: https://github.com/main--/rust-lz-fear - snap: https://github.com/burntsushi/rust-snappy Tested on AMD Ryzen 7 5900HX, rustc 1.69.0 (84c898d65 2023-04-16), Manjaro, CPU Boost Disabled, CPU 3GHZ ### Results v0.11.0 02-06-2023 (safe-decode and safe-encode off) `cargo bench --no-default-features` ![Compress](./compress_bench.svg) ![Decompress](./decompress_bench.svg) ### Results v0.11.0 02-06-2023 (safe-decode and safe-encode on) `cargo bench` ![Compress](./compress_bench_safe.svg) ![Decompress](./decompress_bench_safe.svg) ## Miri [Miri](https://github.com/rust-lang/miri) can be used to find issues related to incorrect unsafe usage: `MIRIFLAGS="-Zmiri-disable-isolation -Zmiri-disable-stacked-borrows" cargo +nightly miri test --no-default-features --features frame` ## Fuzzer This fuzz target generates corrupted data for the decompressor. `cargo +nightly fuzz run fuzz_decomp_corrupt_block` and `cargo +nightly fuzz run fuzz_decomp_corrupt_frame` This fuzz target asserts that a compression and decompression rountrip returns the original input. `cargo +nightly fuzz run fuzz_roundtrip` and `cargo +nightly fuzz run fuzz_roundtrip_frame` This fuzz target asserts compression with cpp and decompression with lz4_flex returns the original input. `cargo +nightly fuzz run fuzz_roundtrip_cpp_compress` ## Bindings in other languages - Node.js: [lz4-napi](https://github.com/antoniomuso/lz4-napi) - Wasm: [lz4-wasm](https://github.com/PSeitz/lz4-wasm) ## TODO - High compression ## Migrate from v0.10 to v0.11.1 To migrate, just remove the `checked-decode` feature flag if you used it. lz4_flex-0.11.1/src/block/compress.rs000064400000000000000000001044501046102023000155210ustar 00000000000000//! The compression algorithm. //! //! We make use of hash tables to find duplicates. This gives a reasonable compression ratio with a //! high performance. It has fixed memory usage, which contrary to other approachs, makes it less //! memory hungry. use crate::block::hashtable::HashTable; use crate::block::END_OFFSET; use crate::block::LZ4_MIN_LENGTH; use crate::block::MAX_DISTANCE; use crate::block::MFLIMIT; use crate::block::MINMATCH; #[cfg(not(feature = "safe-encode"))] use crate::sink::PtrSink; use crate::sink::Sink; use crate::sink::SliceSink; #[allow(unused_imports)] use alloc::vec; use alloc::vec::Vec; #[cfg(feature = "safe-encode")] use core::convert::TryInto; use super::hashtable::HashTable4K; use super::hashtable::HashTable4KU16; use super::{CompressError, WINDOW_SIZE}; /// Increase step size after 1< u32 { unsafe { read_u32_ptr(input.as_ptr().add(n)) } } #[inline] #[cfg(feature = "safe-encode")] pub(super) fn get_batch(input: &[u8], n: usize) -> u32 { u32::from_ne_bytes(input[n..n + 4].try_into().unwrap()) } /// Read an usize sized "batch" from some position. /// /// This will read a native-endian usize from some position. #[inline] #[allow(dead_code)] #[cfg(not(feature = "safe-encode"))] pub(super) fn get_batch_arch(input: &[u8], n: usize) -> usize { unsafe { read_usize_ptr(input.as_ptr().add(n)) } } #[inline] #[allow(dead_code)] #[cfg(feature = "safe-encode")] pub(super) fn get_batch_arch(input: &[u8], n: usize) -> usize { const USIZE_SIZE: usize = core::mem::size_of::(); let arr: &[u8; USIZE_SIZE] = input[n..n + USIZE_SIZE].try_into().unwrap(); usize::from_ne_bytes(*arr) } #[inline] fn token_from_literal(lit_len: usize) -> u8 { if lit_len < 0xF { // Since we can fit the literals length into it, there is no need for saturation. (lit_len as u8) << 4 } else { // We were unable to fit the literals into it, so we saturate to 0xF. We will later // write the extensional value. 0xF0 } } #[inline] fn token_from_literal_and_match_length(lit_len: usize, duplicate_length: usize) -> u8 { let mut token = if lit_len < 0xF { // Since we can fit the literals length into it, there is no need for saturation. (lit_len as u8) << 4 } else { // We were unable to fit the literals into it, so we saturate to 0xF. We will later // write the extensional value. 0xF0 }; token |= if duplicate_length < 0xF { // We could fit it in. duplicate_length as u8 } else { // We were unable to fit it in, so we default to 0xF, which will later be extended. 0xF }; token } /// Counts the number of same bytes in two byte streams. /// `input` is the complete input /// `cur` is the current position in the input. it will be incremented by the number of matched /// bytes `source` either the same as input or an external slice /// `candidate` is the candidate position in `source` /// /// The function ignores the last END_OFFSET bytes in input as those should be literals. #[inline] #[cfg(feature = "safe-encode")] fn count_same_bytes(input: &[u8], cur: &mut usize, source: &[u8], candidate: usize) -> usize { const USIZE_SIZE: usize = core::mem::size_of::(); let cur_slice = &input[*cur..input.len() - END_OFFSET]; let cand_slice = &source[candidate..]; let mut num = 0; for (block1, block2) in cur_slice .chunks_exact(USIZE_SIZE) .zip(cand_slice.chunks_exact(USIZE_SIZE)) { let input_block = usize::from_ne_bytes(block1.try_into().unwrap()); let match_block = usize::from_ne_bytes(block2.try_into().unwrap()); if input_block == match_block { num += USIZE_SIZE; } else { let diff = input_block ^ match_block; num += (diff.to_le().trailing_zeros() / 8) as usize; *cur += num; return num; } } // If we're here we may have 1 to 7 bytes left to check close to the end of input // or source slices. Since this is rare occurrence we mark it cold to get better // ~5% better performance. #[cold] fn count_same_bytes_tail(a: &[u8], b: &[u8], offset: usize) -> usize { a.iter() .zip(b) .skip(offset) .take_while(|(a, b)| a == b) .count() } num += count_same_bytes_tail(cur_slice, cand_slice, num); *cur += num; num } /// Counts the number of same bytes in two byte streams. /// `input` is the complete input /// `cur` is the current position in the input. it will be incremented by the number of matched /// bytes `source` either the same as input OR an external slice /// `candidate` is the candidate position in `source` /// /// The function ignores the last END_OFFSET bytes in input as those should be literals. #[inline] #[cfg(not(feature = "safe-encode"))] fn count_same_bytes(input: &[u8], cur: &mut usize, source: &[u8], candidate: usize) -> usize { let max_input_match = input.len().saturating_sub(*cur + END_OFFSET); let max_candidate_match = source.len() - candidate; // Considering both limits calc how far we may match in input. let input_end = *cur + max_input_match.min(max_candidate_match); let start = *cur; let mut source_ptr = unsafe { source.as_ptr().add(candidate) }; // compare 4/8 bytes blocks depending on the arch const STEP_SIZE: usize = core::mem::size_of::(); while *cur + STEP_SIZE <= input_end { let diff = read_usize_ptr(unsafe { input.as_ptr().add(*cur) }) ^ read_usize_ptr(source_ptr); if diff == 0 { *cur += STEP_SIZE; unsafe { source_ptr = source_ptr.add(STEP_SIZE); } } else { *cur += (diff.to_le().trailing_zeros() / 8) as usize; return *cur - start; } } // compare 4 bytes block #[cfg(target_pointer_width = "64")] { if input_end - *cur >= 4 { let diff = read_u32_ptr(unsafe { input.as_ptr().add(*cur) }) ^ read_u32_ptr(source_ptr); if diff == 0 { *cur += 4; unsafe { source_ptr = source_ptr.add(4); } } else { *cur += (diff.to_le().trailing_zeros() / 8) as usize; return *cur - start; } } } // compare 2 bytes block if input_end - *cur >= 2 && unsafe { read_u16_ptr(input.as_ptr().add(*cur)) == read_u16_ptr(source_ptr) } { *cur += 2; unsafe { source_ptr = source_ptr.add(2); } } if *cur < input_end && unsafe { input.as_ptr().add(*cur).read() } == unsafe { source_ptr.read() } { *cur += 1; } *cur - start } /// Write an integer to the output. /// /// Each additional byte then represent a value from 0 to 255, which is added to the previous value /// to produce a total length. When the byte value is 255, another byte must read and added, and so /// on. There can be any number of bytes of value "255" following token #[inline] #[cfg(feature = "safe-encode")] fn write_integer(output: &mut impl Sink, mut n: usize) { // Note: Since `n` is usually < 0xFF and writing multiple bytes to the output // requires 2 branches of bound check (due to the possibility of add overflows) // the simple byte at a time implementation below is faster in most cases. while n >= 0xFF { n -= 0xFF; push_byte(output, 0xFF); } push_byte(output, n as u8); } /// Write an integer to the output. /// /// Each additional byte then represent a value from 0 to 255, which is added to the previous value /// to produce a total length. When the byte value is 255, another byte must read and added, and so /// on. There can be any number of bytes of value "255" following token #[inline] #[cfg(not(feature = "safe-encode"))] fn write_integer(output: &mut impl Sink, mut n: usize) { // Write the 0xFF bytes as long as the integer is higher than said value. if n >= 4 * 0xFF { // In this unlikelly branch we use a fill instead of a loop, // otherwise rustc may output a large unrolled/vectorized loop. let bulk = n / (4 * 0xFF); n %= 4 * 0xFF; unsafe { core::ptr::write_bytes(output.pos_mut_ptr(), 0xFF, 4 * bulk); output.set_pos(output.pos() + 4 * bulk); } } // Handle last 1 to 4 bytes push_u32(output, 0xFFFFFFFF); // Updating output len for the remainder unsafe { output.set_pos(output.pos() - 4 + 1 + n / 255); // Write the remaining byte. *output.pos_mut_ptr().sub(1) = (n % 255) as u8; } } /// Handle the last bytes from the input as literals #[cold] fn handle_last_literals(output: &mut impl Sink, input: &[u8], start: usize) { let lit_len = input.len() - start; let token = token_from_literal(lit_len); push_byte(output, token); if lit_len >= 0xF { write_integer(output, lit_len - 0xF); } // Now, write the actual literals. output.extend_from_slice(&input[start..]); } /// Moves the cursors back as long as the bytes match, to find additional bytes in a duplicate #[inline] #[cfg(feature = "safe-encode")] fn backtrack_match( input: &[u8], cur: &mut usize, literal_start: usize, source: &[u8], candidate: &mut usize, ) { // Note: Even if iterator version of this loop has less branches inside the loop it has more // branches before the loop. That in practice seems to make it slower than the while version // bellow. TODO: It should be possible remove all bounds checks, since we are walking // backwards while *candidate > 0 && *cur > literal_start && input[*cur - 1] == source[*candidate - 1] { *cur -= 1; *candidate -= 1; } } /// Moves the cursors back as long as the bytes match, to find additional bytes in a duplicate #[inline] #[cfg(not(feature = "safe-encode"))] fn backtrack_match( input: &[u8], cur: &mut usize, literal_start: usize, source: &[u8], candidate: &mut usize, ) { while unsafe { *candidate > 0 && *cur > literal_start && input.get_unchecked(*cur - 1) == source.get_unchecked(*candidate - 1) } { *cur -= 1; *candidate -= 1; } } /// Compress all bytes of `input[input_pos..]` into `output`. /// /// Bytes in `input[..input_pos]` are treated as a preamble and can be used for lookback. /// This part is known as the compressor "prefix". /// Bytes in `ext_dict` logically precede the bytes in `input` and can also be used for lookback. /// /// `input_stream_offset` is the logical position of the first byte of `input`. This allows same /// `dict` to be used for many calls to `compress_internal` as we can "readdress" the first byte of /// `input` to be something other than 0. /// /// `dict` is the dictionary of previously encoded sequences. /// /// This is used to find duplicates in the stream so they are not written multiple times. /// /// Every four bytes are hashed, and in the resulting slot their position in the input buffer /// is placed in the dict. This way we can easily look up a candidate to back references. /// /// Returns the number of bytes written (compressed) into `output`. /// /// # Const parameters /// `USE_DICT`: Disables usage of ext_dict (it'll panic if a non-empty slice is used). /// In other words, this generates more optimized code when an external dictionary isn't used. /// /// A similar const argument could be used to disable the Prefix mode (eg. USE_PREFIX), /// which would impose `input_pos == 0 && input_stream_offset == 0`. Experiments didn't /// show significant improvement though. // Intentionally avoid inlining. // Empirical tests revealed it to be rarely better but often significantly detrimental. #[inline(never)] pub(crate) fn compress_internal( input: &[u8], input_pos: usize, output: &mut S, dict: &mut T, ext_dict: &[u8], input_stream_offset: usize, ) -> Result { assert!(input_pos <= input.len()); if USE_DICT { assert!(ext_dict.len() <= super::WINDOW_SIZE); assert!(ext_dict.len() <= input_stream_offset); // Check for overflow hazard when using ext_dict assert!(input_stream_offset .checked_add(input.len()) .and_then(|i| i.checked_add(ext_dict.len())) .map_or(false, |i| i <= isize::MAX as usize)); } else { assert!(ext_dict.is_empty()); } if output.capacity() - output.pos() < get_maximum_output_size(input.len() - input_pos) { return Err(CompressError::OutputTooSmall); } let output_start_pos = output.pos(); if input.len() - input_pos < LZ4_MIN_LENGTH { handle_last_literals(output, input, input_pos); return Ok(output.pos() - output_start_pos); } let ext_dict_stream_offset = input_stream_offset - ext_dict.len(); let end_pos_check = input.len() - MFLIMIT; let mut literal_start = input_pos; let mut cur = input_pos; if cur == 0 && input_stream_offset == 0 { // According to the spec we can't start with a match, // except when referencing another block. let hash = T::get_hash_at(input, 0); dict.put_at(hash, 0); cur = 1; } loop { // Read the next block into two sections, the literals and the duplicates. let mut step_size; let mut candidate; let mut candidate_source; let mut offset; let mut non_match_count = 1 << INCREASE_STEPSIZE_BITSHIFT; // The number of bytes before our cursor, where the duplicate starts. let mut next_cur = cur; // In this loop we search for duplicates via the hashtable. 4bytes or 8bytes are hashed and // compared. loop { step_size = non_match_count >> INCREASE_STEPSIZE_BITSHIFT; non_match_count += 1; cur = next_cur; next_cur += step_size; // Same as cur + MFLIMIT > input.len() if cur > end_pos_check { handle_last_literals(output, input, literal_start); return Ok(output.pos() - output_start_pos); } // Find a candidate in the dictionary with the hash of the current four bytes. // Unchecked is safe as long as the values from the hash function don't exceed the size // of the table. This is ensured by right shifting the hash values // (`dict_bitshift`) to fit them in the table // [Bounds Check]: Can be elided due to `end_pos_check` above let hash = T::get_hash_at(input, cur); candidate = dict.get_at(hash); dict.put_at(hash, cur + input_stream_offset); // Sanity check: Matches can't be ahead of `cur`. debug_assert!(candidate <= input_stream_offset + cur); // Two requirements to the candidate exists: // - We should not return a position which is merely a hash collision, so that the // candidate actually matches what we search for. // - We can address up to 16-bit offset, hence we are only able to address the candidate // if its offset is less than or equals to 0xFFFF. if input_stream_offset + cur - candidate > MAX_DISTANCE { continue; } if candidate >= input_stream_offset { // match within input offset = (input_stream_offset + cur - candidate) as u16; candidate -= input_stream_offset; candidate_source = input; } else if USE_DICT { // Sanity check, which may fail if we lost history beyond MAX_DISTANCE debug_assert!( candidate >= ext_dict_stream_offset, "Lost history in ext dict mode" ); // match within ext dict offset = (input_stream_offset + cur - candidate) as u16; candidate -= ext_dict_stream_offset; candidate_source = ext_dict; } else { // Match is not reachable anymore // eg. compressing an independent block frame w/o clearing // the matches tables, only increasing input_stream_offset. // Sanity check debug_assert!(input_pos == 0, "Lost history in prefix mode"); continue; } // [Bounds Check]: Candidate is coming from the Hashmap. It can't be out of bounds, but // impossible to prove for the compiler and remove the bounds checks. let cand_bytes: u32 = get_batch(candidate_source, candidate); // [Bounds Check]: Should be able to be elided due to `end_pos_check`. let curr_bytes: u32 = get_batch(input, cur); if cand_bytes == curr_bytes { break; } } // Extend the match backwards if we can backtrack_match( input, &mut cur, literal_start, candidate_source, &mut candidate, ); // The length (in bytes) of the literals section. let lit_len = cur - literal_start; // Generate the higher half of the token. cur += MINMATCH; candidate += MINMATCH; let duplicate_length = count_same_bytes(input, &mut cur, candidate_source, candidate); // Note: The `- 2` offset was copied from the reference implementation, it could be // arbitrary. let hash = T::get_hash_at(input, cur - 2); dict.put_at(hash, cur - 2 + input_stream_offset); let token = token_from_literal_and_match_length(lit_len, duplicate_length); // Push the token to the output stream. push_byte(output, token); // If we were unable to fit the literals length into the token, write the extensional // part. if lit_len >= 0xF { write_integer(output, lit_len - 0xF); } // Now, write the actual literals. // // The unsafe version copies blocks of 8bytes, and therefore may copy up to 7bytes more than // needed. This is safe, because the last 12 bytes (MF_LIMIT) are handled in // handle_last_literals. copy_literals_wild(output, input, literal_start, lit_len); // write the offset in little endian. push_u16(output, offset); // If we were unable to fit the duplicates length into the token, write the // extensional part. if duplicate_length >= 0xF { write_integer(output, duplicate_length - 0xF); } literal_start = cur; } } #[inline] #[cfg(feature = "safe-encode")] fn push_byte(output: &mut impl Sink, el: u8) { output.push(el); } #[inline] #[cfg(not(feature = "safe-encode"))] fn push_byte(output: &mut impl Sink, el: u8) { unsafe { core::ptr::write(output.pos_mut_ptr(), el); output.set_pos(output.pos() + 1); } } #[inline] #[cfg(feature = "safe-encode")] fn push_u16(output: &mut impl Sink, el: u16) { output.extend_from_slice(&el.to_le_bytes()); } #[inline] #[cfg(not(feature = "safe-encode"))] fn push_u16(output: &mut impl Sink, el: u16) { unsafe { core::ptr::copy_nonoverlapping(el.to_le_bytes().as_ptr(), output.pos_mut_ptr(), 2); output.set_pos(output.pos() + 2); } } #[inline] #[cfg(not(feature = "safe-encode"))] fn push_u32(output: &mut impl Sink, el: u32) { unsafe { core::ptr::copy_nonoverlapping(el.to_le_bytes().as_ptr(), output.pos_mut_ptr(), 4); output.set_pos(output.pos() + 4); } } #[inline(always)] // (always) necessary otherwise compiler fails to inline it #[cfg(feature = "safe-encode")] fn copy_literals_wild(output: &mut impl Sink, input: &[u8], input_start: usize, len: usize) { output.extend_from_slice_wild(&input[input_start..input_start + len], len) } #[inline] #[cfg(not(feature = "safe-encode"))] fn copy_literals_wild(output: &mut impl Sink, input: &[u8], input_start: usize, len: usize) { debug_assert!(input_start + len / 8 * 8 + ((len % 8) != 0) as usize * 8 <= input.len()); debug_assert!(output.pos() + len / 8 * 8 + ((len % 8) != 0) as usize * 8 <= output.capacity()); unsafe { // Note: This used to be a wild copy loop of 8 bytes, but the compiler consistently // transformed it into a call to memcopy, which hurts performance significantly for // small copies, which are common. let start_ptr = input.as_ptr().add(input_start); match len { 0..=8 => core::ptr::copy_nonoverlapping(start_ptr, output.pos_mut_ptr(), 8), 9..=16 => core::ptr::copy_nonoverlapping(start_ptr, output.pos_mut_ptr(), 16), 17..=24 => core::ptr::copy_nonoverlapping(start_ptr, output.pos_mut_ptr(), 24), _ => core::ptr::copy_nonoverlapping(start_ptr, output.pos_mut_ptr(), len), } output.set_pos(output.pos() + len); } } /// Compress all bytes of `input` into `output`. /// The method chooses an appropriate hashtable to lookup duplicates. /// output should be preallocated with a size of /// `get_maximum_output_size`. /// /// Returns the number of bytes written (compressed) into `output`. #[inline] pub(crate) fn compress_into_sink_with_dict( input: &[u8], output: &mut impl Sink, mut dict_data: &[u8], ) -> Result { if dict_data.len() + input.len() < u16::MAX as usize { let mut dict = HashTable4KU16::new(); init_dict(&mut dict, &mut dict_data); compress_internal::<_, USE_DICT, _>(input, 0, output, &mut dict, dict_data, dict_data.len()) } else { let mut dict = HashTable4K::new(); init_dict(&mut dict, &mut dict_data); compress_internal::<_, USE_DICT, _>(input, 0, output, &mut dict, dict_data, dict_data.len()) } } #[inline] fn init_dict(dict: &mut T, dict_data: &mut &[u8]) { if dict_data.len() > WINDOW_SIZE { *dict_data = &dict_data[dict_data.len() - WINDOW_SIZE..]; } let mut i = 0usize; while i + core::mem::size_of::() <= dict_data.len() { let hash = T::get_hash_at(dict_data, i); dict.put_at(hash, i); // Note: The 3 byte step was copied from the reference implementation, it could be // arbitrary. i += 3; } } /// Returns the maximum output size of the compressed data. /// Can be used to preallocate capacity on the output vector #[inline] pub fn get_maximum_output_size(input_len: usize) -> usize { 16 + 4 + (input_len as f64 * 1.1) as usize } /// Compress all bytes of `input` into `output`. /// The method chooses an appropriate hashtable to lookup duplicates. /// output should be preallocated with a size of /// `get_maximum_output_size`. /// /// Returns the number of bytes written (compressed) into `output`. #[inline] pub fn compress_into(input: &[u8], output: &mut [u8]) -> Result { compress_into_sink_with_dict::(input, &mut SliceSink::new(output, 0), b"") } /// Compress all bytes of `input` into `output`. /// The method chooses an appropriate hashtable to lookup duplicates. /// output should be preallocated with a size of /// `get_maximum_output_size`. /// /// Returns the number of bytes written (compressed) into `output`. #[inline] pub fn compress_into_with_dict( input: &[u8], output: &mut [u8], dict_data: &[u8], ) -> Result { compress_into_sink_with_dict::(input, &mut SliceSink::new(output, 0), dict_data) } #[inline] fn compress_into_vec_with_dict( input: &[u8], prepend_size: bool, mut dict_data: &[u8], ) -> Vec { let prepend_size_num_bytes = if prepend_size { 4 } else { 0 }; let max_compressed_size = get_maximum_output_size(input.len()) + prepend_size_num_bytes; if dict_data.len() <= 3 { dict_data = b""; } #[cfg(feature = "safe-encode")] let mut compressed = { let mut compressed: Vec = vec![0u8; max_compressed_size]; let out = if prepend_size { compressed[..4].copy_from_slice(&(input.len() as u32).to_le_bytes()); &mut compressed[4..] } else { &mut compressed }; let compressed_len = compress_into_sink_with_dict::(input, &mut SliceSink::new(out, 0), dict_data) .unwrap(); compressed.truncate(prepend_size_num_bytes + compressed_len); compressed }; #[cfg(not(feature = "safe-encode"))] let mut compressed = { let mut vec = Vec::with_capacity(max_compressed_size); let start_pos = if prepend_size { vec.extend_from_slice(&(input.len() as u32).to_le_bytes()); 4 } else { 0 }; let compressed_len = compress_into_sink_with_dict::( input, &mut PtrSink::from_vec(&mut vec, start_pos), dict_data, ) .unwrap(); unsafe { vec.set_len(prepend_size_num_bytes + compressed_len); } vec }; compressed.shrink_to_fit(); compressed } /// Compress all bytes of `input` into `output`. The uncompressed size will be prepended as a little /// endian u32. Can be used in conjunction with `decompress_size_prepended` #[inline] pub fn compress_prepend_size(input: &[u8]) -> Vec { compress_into_vec_with_dict::(input, true, b"") } /// Compress all bytes of `input`. #[inline] pub fn compress(input: &[u8]) -> Vec { compress_into_vec_with_dict::(input, false, b"") } /// Compress all bytes of `input` with an external dictionary. #[inline] pub fn compress_with_dict(input: &[u8], ext_dict: &[u8]) -> Vec { compress_into_vec_with_dict::(input, false, ext_dict) } /// Compress all bytes of `input` into `output`. The uncompressed size will be prepended as a little /// endian u32. Can be used in conjunction with `decompress_size_prepended_with_dict` #[inline] pub fn compress_prepend_size_with_dict(input: &[u8], ext_dict: &[u8]) -> Vec { compress_into_vec_with_dict::(input, true, ext_dict) } #[inline] #[cfg(not(feature = "safe-encode"))] fn read_u16_ptr(input: *const u8) -> u16 { let mut num: u16 = 0; unsafe { core::ptr::copy_nonoverlapping(input, &mut num as *mut u16 as *mut u8, 2); } num } #[inline] #[cfg(not(feature = "safe-encode"))] fn read_u32_ptr(input: *const u8) -> u32 { let mut num: u32 = 0; unsafe { core::ptr::copy_nonoverlapping(input, &mut num as *mut u32 as *mut u8, 4); } num } #[inline] #[cfg(not(feature = "safe-encode"))] fn read_usize_ptr(input: *const u8) -> usize { let mut num: usize = 0; unsafe { core::ptr::copy_nonoverlapping( input, &mut num as *mut usize as *mut u8, core::mem::size_of::(), ); } num } #[cfg(test)] mod tests { use super::*; #[test] fn test_count_same_bytes() { // 8byte aligned block, zeros and ones are added because the end/offset let first: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; let second: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ]; assert_eq!(count_same_bytes(first, &mut 0, second, 0), 16); // 4byte aligned block let first: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; let second: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ]; assert_eq!(count_same_bytes(first, &mut 0, second, 0), 20); // 2byte aligned block let first: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; let second: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ]; assert_eq!(count_same_bytes(first, &mut 0, second, 0), 22); // 1byte aligned block let first: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; let second: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ]; assert_eq!(count_same_bytes(first, &mut 0, second, 0), 23); // 1byte aligned block - last byte different let first: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; let second: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ]; assert_eq!(count_same_bytes(first, &mut 0, second, 0), 22); // 1byte aligned block let first: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 9, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; let second: &[u8] = &[ 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 3, 4, 6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ]; assert_eq!(count_same_bytes(first, &mut 0, second, 0), 21); for diff_idx in 8..100 { let first: Vec = (0u8..255).cycle().take(100 + 12).collect(); let mut second = first.clone(); second[diff_idx] = 255; for start in 0..=diff_idx { let same_bytes = count_same_bytes(&first, &mut start.clone(), &second, start); assert_eq!(same_bytes, diff_idx - start); } } } #[test] fn test_bug() { let input: &[u8] = &[ 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, ]; let _out = compress(input); } #[test] fn test_dict() { let input: &[u8] = &[ 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, ]; let dict = input; let compressed = compress_with_dict(input, dict); assert_lt!(compressed.len(), compress(input).len()); assert!(compressed.len() < compress(input).len()); let mut uncompressed = vec![0u8; input.len()]; let uncomp_size = crate::block::decompress::decompress_into_with_dict( &compressed, &mut uncompressed, dict, ) .unwrap(); uncompressed.truncate(uncomp_size); assert_eq!(input, uncompressed); } #[test] fn test_dict_no_panic() { let input: &[u8] = &[ 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, ]; let dict = &[10, 12, 14]; let _compressed = compress_with_dict(input, dict); } #[test] fn test_dict_match_crossing() { let input: &[u8] = &[ 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, 10, 12, 14, 16, 18, ]; let dict = input; let compressed = compress_with_dict(input, dict); assert_lt!(compressed.len(), compress(input).len()); let mut uncompressed = vec![0u8; input.len() * 2]; // copy first half of the input into output let dict_cutoff = dict.len() / 2; let output_start = dict.len() - dict_cutoff; uncompressed[..output_start].copy_from_slice(&dict[dict_cutoff..]); let uncomp_len = { let mut sink = SliceSink::new(&mut uncompressed[..], output_start); crate::block::decompress::decompress_internal::( &compressed, &mut sink, &dict[..dict_cutoff], ) .unwrap() }; assert_eq!(input.len(), uncomp_len); assert_eq!( input, &uncompressed[output_start..output_start + uncomp_len] ); } #[test] fn test_conformant_last_block() { // From the spec: // The last match must start at least 12 bytes before the end of block. // The last match is part of the penultimate sequence. It is followed by the last sequence, // which contains only literals. Note that, as a consequence, an independent block < // 13 bytes cannot be compressed, because the match must copy "something", // so it needs at least one prior byte. // When a block can reference data from another block, it can start immediately with a match // and no literal, so a block of 12 bytes can be compressed. let aaas: &[u8] = b"aaaaaaaaaaaaaaa"; // uncompressible let out = compress(&aaas[..12]); assert_gt!(out.len(), 12); // compressible let out = compress(&aaas[..13]); assert_le!(out.len(), 13); let out = compress(&aaas[..14]); assert_le!(out.len(), 14); let out = compress(&aaas[..15]); assert_le!(out.len(), 15); // dict uncompressible let out = compress_with_dict(&aaas[..11], aaas); assert_gt!(out.len(), 11); // compressible let out = compress_with_dict(&aaas[..12], aaas); // According to the spec this _could_ compres, but it doesn't in this lib // as it aborts compression for any input len < LZ4_MIN_LENGTH assert_gt!(out.len(), 12); let out = compress_with_dict(&aaas[..13], aaas); assert_le!(out.len(), 13); let out = compress_with_dict(&aaas[..14], aaas); assert_le!(out.len(), 14); let out = compress_with_dict(&aaas[..15], aaas); assert_le!(out.len(), 15); } #[test] fn test_dict_size() { let dict = vec![b'a'; 1024 * 1024]; let input = &b"aaaaaaaaaaaaaaaaaaaaaaaaaaaaa"[..]; let compressed = compress_prepend_size_with_dict(input, &dict); let decompressed = crate::block::decompress_size_prepended_with_dict(&compressed, &dict).unwrap(); assert_eq!(decompressed, input); } } lz4_flex-0.11.1/src/block/decompress.rs000064400000000000000000000527611046102023000160410ustar 00000000000000//! The block decompression algorithm. use crate::block::{DecompressError, MINMATCH}; use crate::fastcpy_unsafe; use crate::sink::SliceSink; use crate::sink::{PtrSink, Sink}; use alloc::vec::Vec; /// Copies data to output_ptr by self-referential copy from start and match_length #[inline] unsafe fn duplicate( output_ptr: &mut *mut u8, output_end: *mut u8, start: *const u8, match_length: usize, ) { // We cannot simply use memcpy or `extend_from_slice`, because these do not allow // self-referential copies: http://ticki.github.io/img/lz4_runs_encoding_diagram.svg // Considering that `wild_copy_match_16` can copy up to `16 - 1` extra bytes. // Defer to `duplicate_overlapping` in case of an overlapping match // OR the if the wild copy would copy beyond the end of the output. if (output_ptr.offset_from(start) as usize) < match_length + 16 - 1 || (output_end.offset_from(*output_ptr) as usize) < match_length + 16 - 1 { duplicate_overlapping(output_ptr, start, match_length); } else { debug_assert!( output_ptr.add(match_length / 16 * 16 + ((match_length % 16) != 0) as usize * 16) <= output_end ); wild_copy_from_src_16(start, *output_ptr, match_length); *output_ptr = output_ptr.add(match_length); } } #[inline] fn wild_copy_from_src_16(mut source: *const u8, mut dst_ptr: *mut u8, num_items: usize) { // Note: if the compiler auto-vectorizes this it'll hurt performance! // It's not the case for 16 bytes stepsize, but for 8 bytes. unsafe { let dst_ptr_end = dst_ptr.add(num_items); loop { core::ptr::copy_nonoverlapping(source, dst_ptr, 16); source = source.add(16); dst_ptr = dst_ptr.add(16); if dst_ptr >= dst_ptr_end { break; } } } } /// Copy function, if the data start + match_length overlaps into output_ptr #[inline] #[cfg_attr(nightly, optimize(size))] // to avoid loop unrolling unsafe fn duplicate_overlapping( output_ptr: &mut *mut u8, mut start: *const u8, match_length: usize, ) { // There is an edge case when output_ptr == start, which causes the decoder to potentially // expose up to match_length bytes of uninitialized data in the decompression buffer. // To prevent that we write a dummy zero to output, which will zero out output in such cases. // This is the same strategy used by the reference C implementation https://github.com/lz4/lz4/pull/772 output_ptr.write(0u8); let dst_ptr_end = output_ptr.add(match_length); while output_ptr.add(1) < dst_ptr_end { // Note that this loop unrolling is done, so that the compiler doesn't do it in a awful // way. // Without that the compiler will unroll/auto-vectorize the copy with a lot of branches. // This is not what we want, as large overlapping copies are not that common. core::ptr::copy(start, *output_ptr, 1); start = start.add(1); *output_ptr = output_ptr.add(1); core::ptr::copy(start, *output_ptr, 1); start = start.add(1); *output_ptr = output_ptr.add(1); } if *output_ptr < dst_ptr_end { core::ptr::copy(start, *output_ptr, 1); *output_ptr = output_ptr.add(1); } } #[inline] unsafe fn copy_from_dict( output_base: *mut u8, output_ptr: &mut *mut u8, ext_dict: &[u8], offset: usize, match_length: usize, ) -> usize { // If we're here we know offset > output pos, so we have at least 1 byte to copy from dict debug_assert!(output_ptr.offset_from(output_base) >= 0); debug_assert!(offset > output_ptr.offset_from(output_base) as usize); // If unchecked-decode is not disabled we also know that the offset falls within ext_dict debug_assert!(ext_dict.len() + output_ptr.offset_from(output_base) as usize >= offset); let dict_offset = ext_dict.len() + output_ptr.offset_from(output_base) as usize - offset; // Can't copy past ext_dict len, the match may cross dict and output let dict_match_length = match_length.min(ext_dict.len() - dict_offset); // TODO test fastcpy_unsafe core::ptr::copy_nonoverlapping( ext_dict.as_ptr().add(dict_offset), *output_ptr, dict_match_length, ); *output_ptr = output_ptr.add(dict_match_length); dict_match_length } /// Read an integer. /// /// In LZ4, we encode small integers in a way that we can have an arbitrary number of bytes. In /// particular, we add the bytes repeatedly until we hit a non-0xFF byte. When we do, we add /// this byte to our sum and terminate the loop. /// /// # Example /// /// ```notest /// 255, 255, 255, 4, 2, 3, 4, 6, 7 /// ``` /// /// is encoded to _255 + 255 + 255 + 4 = 769_. The bytes after the first 4 is ignored, because /// 4 is the first non-0xFF byte. #[inline] fn read_integer_ptr( input_ptr: &mut *const u8, _input_ptr_end: *const u8, ) -> Result { // We start at zero and count upwards. let mut n: u32 = 0; // If this byte takes value 255 (the maximum value it can take), another byte is read // and added to the sum. This repeats until a byte lower than 255 is read. loop { // We add the next byte until we get a byte which we add to the counting variable. #[cfg(not(feature = "unchecked-decode"))] { if *input_ptr >= _input_ptr_end { return Err(DecompressError::ExpectedAnotherByte); } } let extra = unsafe { input_ptr.read() }; *input_ptr = unsafe { input_ptr.add(1) }; n += extra as u32; // We continue if we got 255, break otherwise. if extra != 0xFF { break; } } // 255, 255, 255, 8 // 111, 111, 111, 101 Ok(n) } /// Read a little-endian 16-bit integer from the input stream. #[inline] fn read_u16_ptr(input_ptr: &mut *const u8) -> u16 { let mut num: u16 = 0; unsafe { core::ptr::copy_nonoverlapping(*input_ptr, &mut num as *mut u16 as *mut u8, 2); *input_ptr = input_ptr.add(2); } u16::from_le(num) } const FIT_TOKEN_MASK_LITERAL: u8 = 0b00001111; const FIT_TOKEN_MASK_MATCH: u8 = 0b11110000; #[test] fn check_token() { assert!(!does_token_fit(15)); assert!(does_token_fit(14)); assert!(does_token_fit(114)); assert!(!does_token_fit(0b11110000)); assert!(does_token_fit(0b10110000)); } /// The token consists of two parts, the literal length (upper 4 bits) and match_length (lower 4 /// bits) if the literal length and match_length are both below 15, we don't need to read additional /// data, so the token does fit the metadata in a single u8. #[inline] fn does_token_fit(token: u8) -> bool { !((token & FIT_TOKEN_MASK_LITERAL) == FIT_TOKEN_MASK_LITERAL || (token & FIT_TOKEN_MASK_MATCH) == FIT_TOKEN_MASK_MATCH) } /// Decompress all bytes of `input` into `output`. /// /// Returns the number of bytes written (decompressed) into `output`. #[inline] pub(crate) fn decompress_internal( input: &[u8], output: &mut S, ext_dict: &[u8], ) -> Result { // Prevent segfault for empty input if input.is_empty() { return Err(DecompressError::ExpectedAnotherByte); } let ext_dict = if USE_DICT { ext_dict } else { // ensure optimizer knows ext_dict length is 0 if !USE_DICT debug_assert!(ext_dict.is_empty()); &[] }; let output_base = unsafe { output.base_mut_ptr() }; let output_end = unsafe { output_base.add(output.capacity()) }; let output_start_pos_ptr = unsafe { output.base_mut_ptr().add(output.pos()) as *mut u8 }; let mut output_ptr = output_start_pos_ptr; let mut input_ptr = input.as_ptr(); let input_ptr_end = unsafe { input.as_ptr().add(input.len()) }; let safe_distance_from_end = (16 /* literal copy */ + 2 /* u16 match offset */ + 1 /* The next token to read (we can skip the check) */).min(input.len()) ; let input_ptr_safe = unsafe { input_ptr_end.sub(safe_distance_from_end) }; let safe_output_ptr = unsafe { let mut output_num_safe_bytes = output .capacity() .saturating_sub(16 /* literal copy */ + 18 /* match copy */); if USE_DICT { // In the dictionary case the output pointer is moved by the match length in the dictionary. // This may be up to 17 bytes without exiting the loop. So we need to ensure that we have // at least additional 17 bytes of space left in the output buffer in the fast loop. output_num_safe_bytes = output_num_safe_bytes.saturating_sub(17); }; output_base.add(output_num_safe_bytes) }; // Exhaust the decoder by reading and decompressing all blocks until the remaining buffer is // empty. loop { // Read the token. The token is the first byte in a block. It is divided into two 4-bit // subtokens, the higher and the lower. // This token contains to 4-bit "fields", a higher and a lower, representing the literals' // length and the back reference's length, respectively. let token = unsafe { input_ptr.read() }; input_ptr = unsafe { input_ptr.add(1) }; // Checking for hot-loop. // In most cases the metadata does fit in a single 1byte token (statistically) and we are in // a safe-distance to the end. This enables some optimized handling. // // Ideally we want to check for safe output pos like: output.pos() <= safe_output_pos; But // that doesn't work when the safe_output_ptr is == output_ptr due to insufficient // capacity. So we use `<` instead of `<=`, which covers that case. if does_token_fit(token) && (input_ptr as usize) <= input_ptr_safe as usize && output_ptr < safe_output_ptr { let literal_length = (token >> 4) as usize; let mut match_length = MINMATCH + (token & 0xF) as usize; // output_ptr <= safe_output_ptr should guarantee we have enough space in output debug_assert!( unsafe { output_ptr.add(literal_length + match_length) } <= output_end, "{literal_length} + {match_length} {} wont fit ", literal_length + match_length ); // Copy the literal // The literal is at max 16 bytes, and the is_safe_distance check assures // that we are far away enough from the end so we can safely copy 16 bytes unsafe { core::ptr::copy_nonoverlapping(input_ptr, output_ptr, 16); input_ptr = input_ptr.add(literal_length); output_ptr = output_ptr.add(literal_length); } // input_ptr <= input_ptr_safe should guarantee we have enough space in input debug_assert!(input_ptr_end as usize - input_ptr as usize >= 2); let offset = read_u16_ptr(&mut input_ptr) as usize; let output_len = unsafe { output_ptr.offset_from(output_base) as usize }; #[cfg(not(feature = "unchecked-decode"))] { if offset > output_len + ext_dict.len() { return Err(DecompressError::OffsetOutOfBounds); } } // Check if part of the match is in the external dict if USE_DICT && offset > output_len { let copied = unsafe { copy_from_dict(output_base, &mut output_ptr, ext_dict, offset, match_length) }; if copied == match_length { continue; } // match crosses ext_dict and output match_length -= copied; } // Calculate the start of this duplicate segment. At this point offset was already // checked to be in bounds and the external dictionary copy, if any, was // already copied and subtracted from match_length. let start_ptr = unsafe { output_ptr.sub(offset) }; debug_assert!(start_ptr >= output_base); debug_assert!(start_ptr < output_end); debug_assert!(unsafe { output_end.offset_from(start_ptr) as usize } >= match_length); // In this branch we know that match_length is at most 18 (14 + MINMATCH). // But the blocks can overlap, so make sure they are at least 18 bytes apart // to enable an optimized copy of 18 bytes. if offset >= match_length { unsafe { // _copy_, not copy_non_overlaping, as it may overlap. // Compiles to the same assembly on x68_64. core::ptr::copy(start_ptr, output_ptr, 18); output_ptr = output_ptr.add(match_length); } } else { unsafe { duplicate_overlapping(&mut output_ptr, start_ptr, match_length); } } continue; } // Now, we read the literals section. // Literal Section // If the initial value is 15, it is indicated that another byte will be read and added to // it let mut literal_length = (token >> 4) as usize; if literal_length != 0 { if literal_length == 15 { // The literal_length length took the maximal value, indicating that there is more // than 15 literal_length bytes. We read the extra integer. literal_length += read_integer_ptr(&mut input_ptr, input_ptr_end)? as usize; } #[cfg(not(feature = "unchecked-decode"))] { // Check if literal is out of bounds for the input, and if there is enough space on // the output if literal_length > input_ptr_end as usize - input_ptr as usize { return Err(DecompressError::LiteralOutOfBounds); } if literal_length > unsafe { output_end.offset_from(output_ptr) as usize } { return Err(DecompressError::OutputTooSmall { expected: unsafe { output_ptr.offset_from(output_base) as usize } + literal_length, actual: output.capacity(), }); } } unsafe { fastcpy_unsafe::slice_copy(input_ptr, output_ptr, literal_length); output_ptr = output_ptr.add(literal_length); input_ptr = input_ptr.add(literal_length); } } // If the input stream is emptied, we break out of the loop. This is only the case // in the end of the stream, since the block is intact otherwise. if input_ptr >= input_ptr_end { break; } // Read duplicate section #[cfg(not(feature = "unchecked-decode"))] { if (input_ptr_end as usize) - (input_ptr as usize) < 2 { return Err(DecompressError::ExpectedAnotherByte); } } let offset = read_u16_ptr(&mut input_ptr) as usize; // Obtain the initial match length. The match length is the length of the duplicate segment // which will later be copied from data previously decompressed into the output buffer. The // initial length is derived from the second part of the token (the lower nibble), we read // earlier. Since having a match length of less than 4 would mean negative compression // ratio, we start at 4 (MINMATCH). // The initial match length can maximally be 19 (MINMATCH + 15). As with the literal length, // this indicates that there are more bytes to read. let mut match_length = MINMATCH + (token & 0xF) as usize; if match_length == MINMATCH + 15 { // The match length took the maximal value, indicating that there is more bytes. We // read the extra integer. match_length += read_integer_ptr(&mut input_ptr, input_ptr_end)? as usize; } // We now copy from the already decompressed buffer. This allows us for storing duplicates // by simply referencing the other location. let output_len = unsafe { output_ptr.offset_from(output_base) as usize }; // We'll do a bounds check except unchecked-decode is enabled. #[cfg(not(feature = "unchecked-decode"))] { if offset > output_len + ext_dict.len() { return Err(DecompressError::OffsetOutOfBounds); } if match_length > unsafe { output_end.offset_from(output_ptr) as usize } { return Err(DecompressError::OutputTooSmall { expected: output_len + match_length, actual: output.capacity(), }); } } if USE_DICT && offset > output_len { let copied = unsafe { copy_from_dict(output_base, &mut output_ptr, ext_dict, offset, match_length) }; if copied == match_length { #[cfg(not(feature = "unchecked-decode"))] { if input_ptr >= input_ptr_end { return Err(DecompressError::ExpectedAnotherByte); } } continue; } // match crosses ext_dict and output match_length -= copied; } // Calculate the start of this duplicate segment. At this point offset was already checked // to be in bounds and the external dictionary copy, if any, was already copied and // subtracted from match_length. let start_ptr = unsafe { output_ptr.sub(offset) }; debug_assert!(start_ptr >= output_base); debug_assert!(start_ptr < output_end); debug_assert!(unsafe { output_end.offset_from(start_ptr) as usize } >= match_length); unsafe { duplicate(&mut output_ptr, output_end, start_ptr, match_length); } #[cfg(not(feature = "unchecked-decode"))] { if input_ptr >= input_ptr_end { return Err(DecompressError::ExpectedAnotherByte); } } } unsafe { output.set_pos(output_ptr.offset_from(output_base) as usize); Ok(output_ptr.offset_from(output_start_pos_ptr) as usize) } } /// Decompress all bytes of `input` into `output`. /// `output` should be preallocated with a size of of the uncompressed data. #[inline] pub fn decompress_into(input: &[u8], output: &mut [u8]) -> Result { decompress_internal::(input, &mut SliceSink::new(output, 0), b"") } /// Decompress all bytes of `input` into `output`. /// /// Returns the number of bytes written (decompressed) into `output`. #[inline] pub fn decompress_into_with_dict( input: &[u8], output: &mut [u8], ext_dict: &[u8], ) -> Result { decompress_internal::(input, &mut SliceSink::new(output, 0), ext_dict) } /// Decompress all bytes of `input` into a new vec. /// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size. /// /// # Panics /// May panic if the parameter `min_uncompressed_size` is smaller than the /// uncompressed data. #[inline] pub fn decompress_with_dict( input: &[u8], min_uncompressed_size: usize, ext_dict: &[u8], ) -> Result, DecompressError> { // Allocate a vector to contain the decompressed stream. let mut vec = Vec::with_capacity(min_uncompressed_size); let decomp_len = decompress_internal::(input, &mut PtrSink::from_vec(&mut vec, 0), ext_dict)?; unsafe { vec.set_len(decomp_len); } Ok(vec) } /// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in /// little endian. Can be used in conjunction with `compress_prepend_size` #[inline] pub fn decompress_size_prepended(input: &[u8]) -> Result, DecompressError> { let (uncompressed_size, input) = super::uncompressed_size(input)?; decompress(input, uncompressed_size) } /// Decompress all bytes of `input` into a new vec. /// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size. /// /// # Panics /// May panic if the parameter `min_uncompressed_size` is smaller than the /// uncompressed data. #[inline] pub fn decompress(input: &[u8], min_uncompressed_size: usize) -> Result, DecompressError> { // Allocate a vector to contain the decompressed stream. let mut vec = Vec::with_capacity(min_uncompressed_size); let decomp_len = decompress_internal::(input, &mut PtrSink::from_vec(&mut vec, 0), b"")?; unsafe { vec.set_len(decomp_len); } Ok(vec) } /// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in /// little endian. Can be used in conjunction with `compress_prepend_size_with_dict` #[inline] pub fn decompress_size_prepended_with_dict( input: &[u8], ext_dict: &[u8], ) -> Result, DecompressError> { let (uncompressed_size, input) = super::uncompressed_size(input)?; decompress_with_dict(input, uncompressed_size, ext_dict) } #[cfg(test)] mod test { use super::*; #[test] fn all_literal() { assert_eq!(decompress(&[0x30, b'a', b'4', b'9'], 3).unwrap(), b"a49"); } // this error test is only valid with checked-decode. #[cfg(not(feature = "unchecked-decode"))] #[test] fn offset_oob() { decompress(&[0x10, b'a', 2, 0], 4).unwrap_err(); decompress(&[0x40, b'a', 1, 0], 4).unwrap_err(); } } lz4_flex-0.11.1/src/block/decompress_safe.rs000064400000000000000000000364621046102023000170370ustar 00000000000000//! The block decompression algorithm. use core::convert::TryInto; use crate::block::DecompressError; use crate::block::MINMATCH; use crate::sink::Sink; use crate::sink::SliceSink; use alloc::vec; use alloc::vec::Vec; /// Read an integer. /// /// In LZ4, we encode small integers in a way that we can have an arbitrary number of bytes. In /// particular, we add the bytes repeatedly until we hit a non-0xFF byte. When we do, we add /// this byte to our sum and terminate the loop. /// /// # Example /// /// ```notest /// 255, 255, 255, 4, 2, 3, 4, 6, 7 /// ``` /// /// is encoded to _255 + 255 + 255 + 4 = 769_. The bytes after the first 4 is ignored, because /// 4 is the first non-0xFF byte. #[inline] fn read_integer(input: &[u8], input_pos: &mut usize) -> Result { // We start at zero and count upwards. let mut n: u32 = 0; // If this byte takes value 255 (the maximum value it can take), another byte is read // and added to the sum. This repeats until a byte lower than 255 is read. loop { // We add the next byte until we get a byte which we add to the counting variable. let extra: u8 = *input .get(*input_pos) .ok_or(DecompressError::ExpectedAnotherByte)?; *input_pos += 1; n += extra as u32; // We continue if we got 255, break otherwise. if extra != 0xFF { break; } } // 255, 255, 255, 8 // 111, 111, 111, 101 Ok(n) } /// Read a little-endian 16-bit integer from the input stream. #[inline] fn read_u16(input: &[u8], input_pos: &mut usize) -> Result { let dst = input .get(*input_pos..*input_pos + 2) .ok_or(DecompressError::ExpectedAnotherByte)?; *input_pos += 2; Ok(u16::from_le_bytes(dst.try_into().unwrap())) } const FIT_TOKEN_MASK_LITERAL: u8 = 0b00001111; const FIT_TOKEN_MASK_MATCH: u8 = 0b11110000; #[test] fn check_token() { assert!(!does_token_fit(15)); assert!(does_token_fit(14)); assert!(does_token_fit(114)); assert!(!does_token_fit(0b11110000)); assert!(does_token_fit(0b10110000)); } /// The token consists of two parts, the literal length (upper 4 bits) and match_length (lower 4 /// bits) if the literal length and match_length are both below 15, we don't need to read additional /// data, so the token does fit the metadata. #[inline] fn does_token_fit(token: u8) -> bool { !((token & FIT_TOKEN_MASK_LITERAL) == FIT_TOKEN_MASK_LITERAL || (token & FIT_TOKEN_MASK_MATCH) == FIT_TOKEN_MASK_MATCH) } /// Decompress all bytes of `input` into `output`. /// /// Returns the number of bytes written (decompressed) into `output`. #[inline(always)] // (always) necessary to get the best performance in non LTO builds pub(crate) fn decompress_internal( input: &[u8], output: &mut S, ext_dict: &[u8], ) -> Result { let mut input_pos = 0; let initial_output_pos = output.pos(); let safe_input_pos = input .len() .saturating_sub(16 /* literal copy */ + 2 /* u16 match offset */); let mut safe_output_pos = output .capacity() .saturating_sub(16 /* literal copy */ + 18 /* match copy */); if USE_DICT { // In the dictionary case the output pointer is moved by the match length in the dictionary. // This may be up to 17 bytes without exiting the loop. So we need to ensure that we have // at least additional 17 bytes of space left in the output buffer in the fast loop. safe_output_pos = safe_output_pos.saturating_sub(17); }; // Exhaust the decoder by reading and decompressing all blocks until the remaining buffer is // empty. loop { // Read the token. The token is the first byte in a block. It is divided into two 4-bit // subtokens, the higher and the lower. // This token contains to 4-bit "fields", a higher and a lower, representing the literals' // length and the back reference's length, respectively. let token = *input .get(input_pos) .ok_or(DecompressError::ExpectedAnotherByte)?; input_pos += 1; // Checking for hot-loop. // In most cases the metadata does fit in a single 1byte token (statistically) and we are in // a safe-distance to the end. This enables some optimized handling. // // Ideally we want to check for safe output pos like: output.pos() <= safe_output_pos; But // that doesn't work when the safe_output_pos is 0 due to saturated_sub. So we use // `<` instead of `<=`, which covers that case. if does_token_fit(token) && input_pos <= safe_input_pos && output.pos() < safe_output_pos { let literal_length = (token >> 4) as usize; // casting to [u8;u16] doesn't seem to make a difference vs &[u8] (same assembly) let input: &[u8; 16] = input[input_pos..input_pos + 16].try_into().unwrap(); // Copy the literal // The literal is at max 14 bytes, and the is_safe_distance check assures // that we are far away enough from the end so we can safely copy 16 bytes output.extend_from_slice_wild(input, literal_length); input_pos += literal_length; // clone as we don't want to mutate let offset = read_u16(input, &mut literal_length.clone())? as usize; input_pos += 2; let mut match_length = MINMATCH + (token & 0xF) as usize; if USE_DICT && offset > output.pos() { let copied = copy_from_dict(output, ext_dict, offset, match_length)?; if copied == match_length { continue; } // match crosses ext_dict and output, offset is still correct as output pos // increased match_length -= copied; } // In this branch we know that match_length is at most 18 (14 + MINMATCH). // But the blocks can overlap, so make sure they are at least 18 bytes apart // to enable an optimized copy of 18 bytes. let (start, did_overflow) = output.pos().overflowing_sub(offset); if did_overflow { return Err(DecompressError::OffsetOutOfBounds); } if offset >= match_length { output.extend_from_within(start, 18, match_length); } else { output.extend_from_within_overlapping(start, match_length) } continue; } // Now, we read the literals section. // Literal Section // If the initial value is 15, it is indicated that another byte will be read and added to // it let mut literal_length = (token >> 4) as usize; if literal_length != 0 { if literal_length == 15 { // The literal_length length took the maximal value, indicating that there is more // than 15 literal_length bytes. We read the extra integer. literal_length += read_integer(input, &mut input_pos)? as usize; } if literal_length > input.len() - input_pos { return Err(DecompressError::LiteralOutOfBounds); } #[cfg(not(feature = "unchecked-decode"))] if literal_length > output.capacity() - output.pos() { return Err(DecompressError::OutputTooSmall { expected: output.pos() + literal_length, actual: output.capacity(), }); } output.extend_from_slice(&input[input_pos..input_pos + literal_length]); input_pos += literal_length; } // If the input stream is emptied, we break out of the loop. This is only the case // in the end of the stream, since the block is intact otherwise. if input_pos >= input.len() { break; } let offset = read_u16(input, &mut input_pos)? as usize; // Obtain the initial match length. The match length is the length of the duplicate segment // which will later be copied from data previously decompressed into the output buffer. The // initial length is derived from the second part of the token (the lower nibble), we read // earlier. Since having a match length of less than 4 would mean negative compression // ratio, we start at 4 (MINMATCH). // The initial match length can maximally be 19. As with the literal length, this indicates // that there are more bytes to read. let mut match_length = MINMATCH + (token & 0xF) as usize; if match_length == MINMATCH + 15 { // The match length took the maximal value, indicating that there is more bytes. We // read the extra integer. match_length += read_integer(input, &mut input_pos)? as usize; } #[cfg(not(feature = "unchecked-decode"))] if output.pos() + match_length > output.capacity() { return Err(DecompressError::OutputTooSmall { expected: output.pos() + match_length, actual: output.capacity(), }); } if USE_DICT && offset > output.pos() { let copied = copy_from_dict(output, ext_dict, offset, match_length)?; if copied == match_length { continue; } // match crosses ext_dict and output, offset is still correct as output_len was // increased match_length -= copied; } // We now copy from the already decompressed buffer. This allows us for storing duplicates // by simply referencing the other location. duplicate_slice(output, offset, match_length)?; } Ok(output.pos() - initial_output_pos) } #[inline] fn copy_from_dict( output: &mut impl Sink, ext_dict: &[u8], offset: usize, match_length: usize, ) -> Result { // If we're here we know offset > output.pos debug_assert!(offset > output.pos()); let (dict_offset, did_overflow) = ext_dict.len().overflowing_sub(offset - output.pos()); if did_overflow { return Err(DecompressError::OffsetOutOfBounds); } // Can't copy past ext_dict len, the match may cross dict and output let dict_match_length = match_length.min(ext_dict.len() - dict_offset); let ext_match = &ext_dict[dict_offset..dict_offset + dict_match_length]; output.extend_from_slice(ext_match); Ok(dict_match_length) } /// Extends output by self-referential copies #[inline(always)] // (always) necessary otherwise compiler fails to inline it fn duplicate_slice( output: &mut impl Sink, offset: usize, match_length: usize, ) -> Result<(), DecompressError> { // This function assumes output will fit match_length, it might panic otherwise. if match_length > offset { duplicate_overlapping_slice(output, offset, match_length)?; } else { let (start, did_overflow) = output.pos().overflowing_sub(offset); if did_overflow { return Err(DecompressError::OffsetOutOfBounds); } match match_length { 0..=32 if output.pos() + 32 <= output.capacity() => { output.extend_from_within(start, 32, match_length) } 33..=64 if output.pos() + 64 <= output.capacity() => { output.extend_from_within(start, 64, match_length) } _ => output.extend_from_within(start, match_length, match_length), } } Ok(()) } /// self-referential copy for the case data start (end of output - offset) + match_length overlaps /// into output #[inline] fn duplicate_overlapping_slice( sink: &mut impl Sink, offset: usize, match_length: usize, ) -> Result<(), DecompressError> { // This function assumes output will fit match_length, it might panic otherwise. let (start, did_overflow) = sink.pos().overflowing_sub(offset); if did_overflow { return Err(DecompressError::OffsetOutOfBounds); } if offset == 1 { let val = sink.byte_at(start); sink.extend_with_fill(val, match_length); } else { sink.extend_from_within_overlapping(start, match_length); } Ok(()) } /// Decompress all bytes of `input` into `output`. /// `output` should be preallocated with a size of of the uncompressed data. #[inline] pub fn decompress_into(input: &[u8], output: &mut [u8]) -> Result { decompress_internal::(input, &mut SliceSink::new(output, 0), b"") } /// Decompress all bytes of `input` into `output`. /// /// Returns the number of bytes written (decompressed) into `output`. #[inline] pub fn decompress_into_with_dict( input: &[u8], output: &mut [u8], ext_dict: &[u8], ) -> Result { decompress_internal::(input, &mut SliceSink::new(output, 0), ext_dict) } /// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in /// litte endian. Can be used in conjunction with `compress_prepend_size` #[inline] pub fn decompress_size_prepended(input: &[u8]) -> Result, DecompressError> { let (uncompressed_size, input) = super::uncompressed_size(input)?; decompress(input, uncompressed_size) } /// Decompress all bytes of `input` into a new vec. /// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size. /// /// # Panics /// May panic if the parameter `min_uncompressed_size` is smaller than the /// uncompressed data. #[inline] pub fn decompress(input: &[u8], min_uncompressed_size: usize) -> Result, DecompressError> { let mut decompressed: Vec = vec![0; min_uncompressed_size]; let decomp_len = decompress_internal::(input, &mut SliceSink::new(&mut decompressed, 0), b"")?; decompressed.truncate(decomp_len); Ok(decompressed) } /// Decompress all bytes of `input` into a new vec. The first 4 bytes are the uncompressed size in /// little endian. Can be used in conjunction with `compress_prepend_size_with_dict` #[inline] pub fn decompress_size_prepended_with_dict( input: &[u8], ext_dict: &[u8], ) -> Result, DecompressError> { let (uncompressed_size, input) = super::uncompressed_size(input)?; decompress_with_dict(input, uncompressed_size, ext_dict) } /// Decompress all bytes of `input` into a new vec. /// The passed parameter `min_uncompressed_size` needs to be equal or larger than the uncompressed size. /// /// # Panics /// May panic if the parameter `min_uncompressed_size` is smaller than the /// uncompressed data. #[inline] pub fn decompress_with_dict( input: &[u8], min_uncompressed_size: usize, ext_dict: &[u8], ) -> Result, DecompressError> { let mut decompressed: Vec = vec![0; min_uncompressed_size]; let decomp_len = decompress_internal::(input, &mut SliceSink::new(&mut decompressed, 0), ext_dict)?; decompressed.truncate(decomp_len); Ok(decompressed) } #[cfg(test)] mod test { use super::*; #[test] fn all_literal() { assert_eq!(decompress(&[0x30, b'a', b'4', b'9'], 3).unwrap(), b"a49"); } // this error test is only valid in safe-decode. #[cfg(feature = "safe-decode")] #[test] fn offset_oob() { decompress(&[0x10, b'a', 2, 0], 4).unwrap_err(); decompress(&[0x40, b'a', 1, 0], 4).unwrap_err(); } } lz4_flex-0.11.1/src/block/hashtable.rs000064400000000000000000000106021046102023000156140ustar 00000000000000use alloc::boxed::Box; use core::convert::TryInto; /// The Hashtable trait used by the compression to store hashed bytes to their position. /// `val` can be maximum the size of the input in bytes. /// /// `pos` can have a maximum value of u16::MAX or 65535 /// If the hashtable is smaller it needs to reduce the pos to its space, e.g. by right /// shifting. /// /// Duplication dictionary size. /// /// Every four bytes is assigned an entry. When this number is lower, fewer entries exists, and /// thus collisions are more likely, hurting the compression ratio. /// hashes and right shifts to a maximum value of 16bit, 65535 /// The right shift is done in order to not exceed, the hashtables capacity #[inline] fn hash(sequence: u32) -> u32 { (sequence.wrapping_mul(2654435761_u32)) >> 16 } /// hashes and right shifts to a maximum value of 16bit, 65535 /// The right shift is done in order to not exceed, the hashtables capacity #[cfg(target_pointer_width = "64")] #[inline] fn hash5(sequence: usize) -> u32 { let primebytes = if cfg!(target_endian = "little") { 889523592379_usize } else { 11400714785074694791_usize }; (((sequence << 24).wrapping_mul(primebytes)) >> 48) as u32 } pub trait HashTable { fn get_at(&self, pos: usize) -> usize; fn put_at(&mut self, pos: usize, val: usize); fn clear(&mut self); #[inline] #[cfg(target_pointer_width = "64")] fn get_hash_at(input: &[u8], pos: usize) -> usize { hash5(super::compress::get_batch_arch(input, pos)) as usize } #[inline] #[cfg(target_pointer_width = "32")] fn get_hash_at(input: &[u8], pos: usize) -> usize { hash(super::compress::get_batch(input, pos)) as usize } } const HASHTABLE_SIZE_4K: usize = 4 * 1024; const HASHTABLE_BIT_SHIFT_4K: usize = 4; #[derive(Debug)] #[repr(align(64))] pub struct HashTable4KU16 { dict: Box<[u16; HASHTABLE_SIZE_4K]>, } impl HashTable4KU16 { #[inline] pub fn new() -> Self { // This generates more efficient assembly in contrast to Box::new(slice), because of an // optmized call alloc_zeroed, vs. alloc + memset // try_into is optimized away let dict = alloc::vec![0; HASHTABLE_SIZE_4K] .into_boxed_slice() .try_into() .unwrap(); Self { dict } } } impl HashTable for HashTable4KU16 { #[inline] fn get_at(&self, hash: usize) -> usize { self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] as usize } #[inline] fn put_at(&mut self, hash: usize, val: usize) { self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] = val as u16; } #[inline] fn clear(&mut self) { self.dict.fill(0); } #[inline] fn get_hash_at(input: &[u8], pos: usize) -> usize { hash(super::get_batch(input, pos)) as usize } } #[derive(Debug)] pub struct HashTable4K { dict: Box<[u32; HASHTABLE_SIZE_4K]>, } impl HashTable4K { #[inline] pub fn new() -> Self { let dict = alloc::vec![0; HASHTABLE_SIZE_4K] .into_boxed_slice() .try_into() .unwrap(); Self { dict } } #[cold] #[allow(dead_code)] pub fn reposition(&mut self, offset: u32) { for i in self.dict.iter_mut() { *i = i.saturating_sub(offset); } } } impl HashTable for HashTable4K { #[inline] fn get_at(&self, hash: usize) -> usize { self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] as usize } #[inline] fn put_at(&mut self, hash: usize, val: usize) { self.dict[hash >> HASHTABLE_BIT_SHIFT_4K] = val as u32; } #[inline] fn clear(&mut self) { self.dict.fill(0); } } const HASHTABLE_SIZE_8K: usize = 8 * 1024; const HASH_TABLE_BIT_SHIFT_8K: usize = 3; #[derive(Debug)] pub struct HashTable8K { dict: Box<[u32; HASHTABLE_SIZE_8K]>, } #[allow(dead_code)] impl HashTable8K { #[inline] pub fn new() -> Self { let dict = alloc::vec![0; HASHTABLE_SIZE_8K] .into_boxed_slice() .try_into() .unwrap(); Self { dict } } } impl HashTable for HashTable8K { #[inline] fn get_at(&self, hash: usize) -> usize { self.dict[hash >> HASH_TABLE_BIT_SHIFT_8K] as usize } #[inline] fn put_at(&mut self, hash: usize, val: usize) { self.dict[hash >> HASH_TABLE_BIT_SHIFT_8K] = val as u32; } #[inline] fn clear(&mut self) { self.dict.fill(0); } } lz4_flex-0.11.1/src/block/mod.rs000064400000000000000000000122641046102023000144460ustar 00000000000000//! LZ4 Block Format //! //! As defined in //! //! Currently for no_std support only the block format is supported. //! //! # Example: block format roundtrip //! ``` //! use lz4_flex::block::{compress_prepend_size, decompress_size_prepended}; //! let input: &[u8] = b"Hello people, what's up?"; //! let compressed = compress_prepend_size(input); //! let uncompressed = decompress_size_prepended(&compressed).unwrap(); //! assert_eq!(input, uncompressed); //! ``` //! #[cfg_attr(feature = "safe-encode", forbid(unsafe_code))] pub(crate) mod compress; pub(crate) mod hashtable; #[cfg(feature = "safe-decode")] #[cfg_attr(feature = "safe-decode", forbid(unsafe_code))] pub(crate) mod decompress_safe; #[cfg(feature = "safe-decode")] pub(crate) use decompress_safe as decompress; #[cfg(not(feature = "safe-decode"))] pub(crate) mod decompress; pub use compress::*; pub use decompress::*; use core::convert::TryInto; use core::fmt; pub(crate) const WINDOW_SIZE: usize = 64 * 1024; /// https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md#end-of-block-restrictions /// The last match must start at least 12 bytes before the end of block. The last match is part of /// the penultimate sequence. It is followed by the last sequence, which contains only literals. /// /// Note that, as a consequence, an independent block < 13 bytes cannot be compressed, because the /// match must copy "something", so it needs at least one prior byte. /// /// When a block can reference data from another block, it can start immediately with a match and no /// literal, so a block of 12 bytes can be compressed. const MFLIMIT: usize = 12; /// The last 5 bytes of input are always literals. Therefore, the last sequence contains at least 5 /// bytes. const LAST_LITERALS: usize = 5; /// Due the way the compression loop is arrange we may read up to (register_size - 2) bytes from the /// current position. So we must end the matches 6 bytes before the end, 1 more than required by the /// spec. const END_OFFSET: usize = LAST_LITERALS + 1; /// https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md#end-of-block-restrictions /// Minimum length of a block /// /// MFLIMIT + 1 for the token. const LZ4_MIN_LENGTH: usize = MFLIMIT + 1; const MAXD_LOG: usize = 16; const MAX_DISTANCE: usize = (1 << MAXD_LOG) - 1; #[allow(dead_code)] const MATCH_LENGTH_MASK: u32 = (1_u32 << 4) - 1; // 0b1111 / 15 /// The minimum length of a duplicate const MINMATCH: usize = 4; #[allow(dead_code)] const FASTLOOP_SAFE_DISTANCE: usize = 64; /// Switch for the hashtable size byU16 #[allow(dead_code)] static LZ4_64KLIMIT: usize = (64 * 1024) + (MFLIMIT - 1); /// An error representing invalid compressed data. #[derive(Debug)] #[non_exhaustive] pub enum DecompressError { /// The provided output is too small OutputTooSmall { /// Minimum expected output size expected: usize, /// Actual size of output actual: usize, }, /// Literal is out of bounds of the input LiteralOutOfBounds, /// Expected another byte, but none found. ExpectedAnotherByte, /// Deduplication offset out of bounds (not in buffer). OffsetOutOfBounds, } #[derive(Debug)] #[non_exhaustive] /// Errors that can happen during compression. pub enum CompressError { /// The provided output is too small. OutputTooSmall, } impl fmt::Display for DecompressError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { DecompressError::OutputTooSmall { expected, actual } => { write!( f, "provided output is too small for the decompressed data, actual {actual}, expected \ {expected}" ) } DecompressError::LiteralOutOfBounds => { f.write_str("literal is out of bounds of the input") } DecompressError::ExpectedAnotherByte => { f.write_str("expected another byte, found none") } DecompressError::OffsetOutOfBounds => { f.write_str("the offset to copy is not contained in the decompressed buffer") } } } } impl fmt::Display for CompressError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { CompressError::OutputTooSmall => f.write_str( "output is too small for the compressed data, use get_maximum_output_size to \ reserve enough space", ), } } } #[cfg(feature = "std")] impl std::error::Error for DecompressError {} #[cfg(feature = "std")] impl std::error::Error for CompressError {} /// This can be used in conjunction with `decompress_size_prepended`. /// It will read the first 4 bytes as little-endian encoded length, and return /// the rest of the bytes after the length encoding. #[inline] pub fn uncompressed_size(input: &[u8]) -> Result<(usize, &[u8]), DecompressError> { let size = input.get(..4).ok_or(DecompressError::ExpectedAnotherByte)?; let size: &[u8; 4] = size.try_into().unwrap(); let uncompressed_size = u32::from_le_bytes(*size) as usize; let rest = &input[4..]; Ok((uncompressed_size, rest)) } lz4_flex-0.11.1/src/fastcpy.rs000064400000000000000000000077371046102023000142570ustar 00000000000000//! # FastCpy //! //! The Rust Compiler calls `memcpy` for slices of unknown length. //! This crate provides a faster implementation of `memcpy` for slices up to 32bytes (64bytes with `avx`). //! If you know most of you copy operations are not too big you can use `fastcpy` to speed up your program. //! //! `fastcpy` is designed to contain not too much assembly, so the overhead is low. //! //! As fall back the standard `memcpy` is called //! //! ## Double Copy Trick //! `fastcpy` employs a double copy trick to copy slices of length 4-32bytes (64bytes with `avx`). //! E.g. Slice of length 6 can be copied with two uncoditional copy operations. //! //! /// [1, 2, 3, 4, 5, 6] //! /// [1, 2, 3, 4] //! /// [3, 4, 5, 6] //! #[inline] pub fn slice_copy(src: &[u8], dst: &mut [u8]) { #[inline(never)] #[cold] #[track_caller] fn len_mismatch_fail(dst_len: usize, src_len: usize) -> ! { panic!( "source slice length ({}) does not match destination slice length ({})", src_len, dst_len, ); } if src.len() != dst.len() { len_mismatch_fail(src.len(), dst.len()); } let len = src.len(); if src.is_empty() { return; } if len < 4 { short_copy(src, dst); return; } if len < 8 { double_copy_trick::<4>(src, dst); return; } if len <= 16 { double_copy_trick::<8>(src, dst); return; } if len <= 32 { double_copy_trick::<16>(src, dst); return; } /// The code will use the vmovdqu instruction to copy 32 bytes at a time. #[cfg(target_feature = "avx")] { if len <= 64 { double_copy_trick::<32>(src, dst); return; } } // For larger sizes we use the default, which calls memcpy // memcpy does some virtual memory tricks to copy large chunks of memory. // // The theory should be that the checks above don't cost much relative to the copy call for // larger copies. // The bounds checks in `copy_from_slice` are elided. dst.copy_from_slice(src); } #[inline(always)] fn short_copy(src: &[u8], dst: &mut [u8]) { let len = src.len(); // length 1-3 dst[0] = src[0]; if len >= 2 { double_copy_trick::<2>(src, dst); } } #[inline(always)] /// [1, 2, 3, 4, 5, 6] /// [1, 2, 3, 4] /// [3, 4, 5, 6] fn double_copy_trick(src: &[u8], dst: &mut [u8]) { dst[0..SIZE].copy_from_slice(&src[0..SIZE]); dst[src.len() - SIZE..].copy_from_slice(&src[src.len() - SIZE..]); } #[cfg(test)] mod tests { use super::slice_copy; use alloc::vec::Vec; use proptest::prelude::*; proptest! { #[test] fn test_fast_short_slice_copy(left: Vec) { let mut right = vec![0u8; left.len()]; slice_copy(&left, &mut right); prop_assert_eq!(&left, &right); } } #[test] fn test_fast_short_slice_copy_edge_cases() { for len in 0..(512 * 2) { let left = (0..len).map(|i| i as u8).collect::>(); let mut right = vec![0u8; len]; slice_copy(&left, &mut right); assert_eq!(left, right); } } #[test] fn test_fail2() { let left = vec![ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ]; let mut right = vec![0u8; left.len()]; slice_copy(&left, &mut right); assert_eq!(left, right); } #[test] fn test_fail() { let left = vec![ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; let mut right = vec![0u8; left.len()]; slice_copy(&left, &mut right); assert_eq!(left, right); } } lz4_flex-0.11.1/src/fastcpy_unsafe.rs000064400000000000000000000116071046102023000156070ustar 00000000000000//! # FastCpy //! //! The Rust Compiler calls `memcpy` for slices of unknown length. //! This crate provides a faster implementation of `memcpy` for slices up to 32bytes (64bytes with `avx`). //! If you know most of you copy operations are not too big you can use `fastcpy` to speed up your program. //! //! `fastcpy` is designed to contain not too much assembly, so the overhead is low. //! //! As fall back the standard `memcpy` is called //! //! ## Double Copy Trick //! `fastcpy` employs a double copy trick to copy slices of length 4-32bytes (64bytes with `avx`). //! E.g. Slice of length 6 can be copied with two uncoditional copy operations. //! //! /// [1, 2, 3, 4, 5, 6] //! /// [1, 2, 3, 4] //! /// [3, 4, 5, 6] //! #[inline] pub fn slice_copy(src: *const u8, dst: *mut u8, num_bytes: usize) { if num_bytes < 4 { short_copy(src, dst, num_bytes); return; } if num_bytes < 8 { double_copy_trick::<4>(src, dst, num_bytes); return; } if num_bytes <= 16 { double_copy_trick::<8>(src, dst, num_bytes); return; } //if num_bytes <= 32 { //double_copy_trick::<16>(src, dst, num_bytes); //return; //} // /// The code will use the vmovdqu instruction to copy 32 bytes at a time. //#[cfg(target_feature = "avx")] //{ //if num_bytes <= 64 { //double_copy_trick::<32>(src, dst, num_bytes); //return; //} //} // For larger sizes we use the default, which calls memcpy // memcpy does some virtual memory tricks to copy large chunks of memory. // // The theory should be that the checks above don't cost much relative to the copy call for // larger copies. // The bounds checks in `copy_from_slice` are elided. //unsafe { core::ptr::copy_nonoverlapping(src, dst, num_bytes) } wild_copy_from_src::<16>(src, dst, num_bytes) } // Inline never because otherwise we get a call to memcpy -.- #[inline] fn wild_copy_from_src( mut source: *const u8, mut dst: *mut u8, num_bytes: usize, ) { // Note: if the compiler auto-vectorizes this it'll hurt performance! // It's not the case for 16 bytes stepsize, but for 8 bytes. let l_last = unsafe { source.add(num_bytes - SIZE) }; let r_last = unsafe { dst.add(num_bytes - SIZE) }; let num_bytes = (num_bytes / SIZE) * SIZE; unsafe { let dst_ptr_end = dst.add(num_bytes); loop { core::ptr::copy_nonoverlapping(source, dst, SIZE); source = source.add(SIZE); dst = dst.add(SIZE); if dst >= dst_ptr_end { break; } } } unsafe { core::ptr::copy_nonoverlapping(l_last, r_last, SIZE); } } #[inline] fn short_copy(src: *const u8, dst: *mut u8, len: usize) { unsafe { *dst = *src; } if len >= 2 { double_copy_trick::<2>(src, dst, len); } } #[inline(always)] /// [1, 2, 3, 4, 5, 6] /// [1, 2, 3, 4] /// [3, 4, 5, 6] fn double_copy_trick(src: *const u8, dst: *mut u8, len: usize) { let l_end = unsafe { src.add(len - SIZE) }; let r_end = unsafe { dst.add(len - SIZE) }; unsafe { core::ptr::copy_nonoverlapping(src, dst, SIZE); core::ptr::copy_nonoverlapping(l_end, r_end, SIZE); } } #[cfg(test)] mod tests { use super::slice_copy; use alloc::vec::Vec; use proptest::prelude::*; proptest! { #[test] fn test_fast_short_slice_copy(left: Vec) { if left.is_empty() { return Ok(()); } let mut right = vec![0u8; left.len()]; slice_copy(left.as_ptr(), right.as_mut_ptr(), left.len()); prop_assert_eq!(&left, &right); } } #[test] fn test_fast_short_slice_copy_edge_cases() { for len in 1..(512 * 2) { let left = (0..len).map(|i| i as u8).collect::>(); let mut right = vec![0u8; len]; slice_copy(left.as_ptr(), right.as_mut_ptr(), left.len()); assert_eq!(left, right); } } #[test] fn test_fail2() { let left = vec![ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ]; let mut right = vec![0u8; left.len()]; slice_copy(left.as_ptr(), right.as_mut_ptr(), left.len()); assert_eq!(left, right); } #[test] fn test_fail() { let left = vec![ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; let mut right = vec![0u8; left.len()]; slice_copy(left.as_ptr(), right.as_mut_ptr(), left.len()); assert_eq!(left, right); } } lz4_flex-0.11.1/src/frame/compress.rs000064400000000000000000000442121046102023000155200ustar 00000000000000use std::{ fmt, hash::Hasher, io::{self, Write}, }; use twox_hash::XxHash32; use crate::{ block::{ compress::compress_internal, hashtable::{HashTable, HashTable4K}, }, sink::vec_sink_for_compression, }; use super::Error; use super::{ header::{BlockInfo, BlockMode, FrameInfo, BLOCK_INFO_SIZE, MAX_FRAME_INFO_SIZE}, BlockSize, }; use crate::block::WINDOW_SIZE; /// A writer for compressing a LZ4 stream. /// /// This `FrameEncoder` wraps any other writer that implements `io::Write`. /// Bytes written to this writer are compressed using the [LZ4 frame /// format](https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md). /// /// Writes are buffered automatically, so there's no need to wrap the given /// writer in a `std::io::BufWriter`. /// /// To ensure a well formed stream the encoder must be finalized by calling /// either the [`finish()`], [`try_finish()`], or [`auto_finish()`] methods. /// /// [`finish()`]: Self::finish /// [`try_finish()`]: Self::try_finish /// [`auto_finish()`]: Self::auto_finish /// /// # Example 1 /// Serializing json values into a compressed file. /// /// ```no_run /// let compressed_file = std::fs::File::create("datafile").unwrap(); /// let mut compressor = lz4_flex::frame::FrameEncoder::new(compressed_file); /// serde_json::to_writer(&mut compressor, &serde_json::json!({ "an": "object" })).unwrap(); /// compressor.finish().unwrap(); /// ``` /// /// # Example 2 /// Serializing multiple json values into a compressed file using linked blocks. /// /// ```no_run /// let compressed_file = std::fs::File::create("datafile").unwrap(); /// let mut frame_info = lz4_flex::frame::FrameInfo::new(); /// frame_info.block_mode = lz4_flex::frame::BlockMode::Linked; /// let mut compressor = lz4_flex::frame::FrameEncoder::with_frame_info(frame_info, compressed_file); /// for i in 0..10u64 { /// serde_json::to_writer(&mut compressor, &serde_json::json!({ "i": i })).unwrap(); /// } /// compressor.finish().unwrap(); /// ``` pub struct FrameEncoder { /// Our buffer of uncompressed bytes. src: Vec, /// Index into src: starting point of bytes not yet compressed src_start: usize, /// Index into src: end point of bytes not not yet compressed src_end: usize, /// Index into src: starting point of external dictionary (applicable in Linked block mode) ext_dict_offset: usize, /// Length of external dictionary ext_dict_len: usize, /// Counter of bytes already compressed to the compression_table /// _Not_ the same as `content_len` as this is reset every to 2GB. src_stream_offset: usize, /// Encoder table compression_table: HashTable4K, /// The underlying writer. w: W, /// Xxhash32 used when content checksum is enabled. content_hasher: XxHash32, /// Number of bytes compressed content_len: u64, /// The compressed bytes buffer. Bytes are compressed from src (usually) /// to dst before being written to w. dst: Vec, /// Whether we have an open frame in the output. is_frame_open: bool, /// Whether we have an frame closed in the output. data_to_frame_written: bool, /// The frame information to be used in this encoder. frame_info: FrameInfo, } impl FrameEncoder { fn init(&mut self) { let max_block_size = self.frame_info.block_size.get_size(); let src_size = if self.frame_info.block_mode == BlockMode::Linked { // In linked mode we consume the input (bumping src_start) but leave the // beginning of src to be used as a prefix in subsequent blocks. // That is at least until we have at least `max_block_size + WINDOW_SIZE` // bytes in src, then we setup an ext_dict with the last WINDOW_SIZE bytes // and the input goes to the beginning of src again. // Since we always want to be able to write a full block (up to max_block_size) // we need a buffer with at least `max_block_size * 2 + WINDOW_SIZE` bytes. max_block_size * 2 + WINDOW_SIZE } else { max_block_size }; // Since this method is called potentially multiple times, don't reserve _additional_ // capacity if not required. self.src .reserve(src_size.saturating_sub(self.src.capacity())); self.dst.reserve( crate::block::compress::get_maximum_output_size(max_block_size) .saturating_sub(self.dst.capacity()), ); } /// Returns a wrapper around `self` that will finish the stream on drop. /// /// # Note /// Errors on drop get silently ignored. If you want to handle errors then use [`finish()`] or /// [`try_finish()`] instead. /// /// [`finish()`]: Self::finish /// [`try_finish()`]: Self::try_finish pub fn auto_finish(self) -> AutoFinishEncoder { AutoFinishEncoder { encoder: Some(self), } } /// Creates a new Encoder with the specified FrameInfo. pub fn with_frame_info(frame_info: FrameInfo, wtr: W) -> Self { FrameEncoder { src: Vec::new(), w: wtr, // 16 KB hash table for matches, same as the reference implementation. compression_table: HashTable4K::new(), content_hasher: XxHash32::with_seed(0), content_len: 0, dst: Vec::new(), is_frame_open: false, data_to_frame_written: false, frame_info, src_start: 0, src_end: 0, ext_dict_offset: 0, ext_dict_len: 0, src_stream_offset: 0, } } /// Creates a new Encoder with the default settings. pub fn new(wtr: W) -> Self { Self::with_frame_info(Default::default(), wtr) } /// The frame information used by this Encoder. pub fn frame_info(&mut self) -> &FrameInfo { &self.frame_info } /// Consumes this encoder, flushing internal buffer and writing stream terminator. pub fn finish(mut self) -> Result { self.try_finish()?; Ok(self.w) } /// Attempt to finish this output stream, flushing internal buffer and writing stream /// terminator. pub fn try_finish(&mut self) -> Result<(), Error> { match self.flush() { Ok(()) => { // Empty input special case // https://github.com/ouch-org/ouch/pull/163#discussion_r1108965151 if !self.is_frame_open && !self.data_to_frame_written { self.begin_frame(0)?; } self.end_frame()?; self.data_to_frame_written = true; Ok(()) } Err(err) => Err(err.into()), } } /// Returns the underlying writer _without_ flushing the stream. /// This may leave the output in an unfinished state. pub fn into_inner(self) -> W { self.w } /// Gets a reference to the underlying writer in this encoder. pub fn get_ref(&self) -> &W { &self.w } /// Gets a reference to the underlying writer in this encoder. /// /// Note that mutating the output/input state of the stream may corrupt /// this encoder, so care must be taken when using this method. pub fn get_mut(&mut self) -> &mut W { &mut self.w } /// Closes the frame by writing the end marker. fn end_frame(&mut self) -> Result<(), Error> { debug_assert!(self.is_frame_open); self.is_frame_open = false; if let Some(expected) = self.frame_info.content_size { if expected != self.content_len { return Err(Error::ContentLengthError { expected, actual: self.content_len, }); } } let mut block_info_buffer = [0u8; BLOCK_INFO_SIZE]; BlockInfo::EndMark.write(&mut block_info_buffer[..])?; self.w.write_all(&block_info_buffer[..])?; if self.frame_info.content_checksum { let content_checksum = self.content_hasher.finish() as u32; self.w.write_all(&content_checksum.to_le_bytes())?; } Ok(()) } /// Begin the frame by writing the frame header. /// It'll also setup the encoder for compressing blocks for the the new frame. fn begin_frame(&mut self, buf_len: usize) -> io::Result<()> { self.is_frame_open = true; if self.frame_info.block_size == BlockSize::Auto { self.frame_info.block_size = BlockSize::from_buf_length(buf_len); } self.init(); let mut frame_info_buffer = [0u8; MAX_FRAME_INFO_SIZE]; let size = self.frame_info.write(&mut frame_info_buffer)?; self.w.write_all(&frame_info_buffer[..size])?; if self.content_len != 0 { // This is the second or later frame for this Encoder, // reset compressor state for the new frame. self.content_len = 0; self.src_stream_offset = 0; self.src.clear(); self.src_start = 0; self.src_end = 0; self.ext_dict_len = 0; self.content_hasher = XxHash32::with_seed(0); self.compression_table.clear(); } Ok(()) } /// Consumes the src contents between src_start and src_end, /// which shouldn't exceed the max block size. fn write_block(&mut self) -> io::Result<()> { debug_assert!(self.is_frame_open); let max_block_size = self.frame_info.block_size.get_size(); debug_assert!(self.src_end - self.src_start <= max_block_size); // Reposition the compression table if we're anywhere near an overflowing hazard if self.src_stream_offset + max_block_size + WINDOW_SIZE >= u32::MAX as usize / 2 { self.compression_table .reposition((self.src_stream_offset - self.ext_dict_len) as _); self.src_stream_offset = self.ext_dict_len; } // input to the compressor, which may include a prefix when blocks are linked let input = &self.src[..self.src_end]; // the contents of the block are between src_start and src_end let src = &input[self.src_start..]; let dst_required_size = crate::block::compress::get_maximum_output_size(src.len()); let compress_result = if self.ext_dict_len != 0 { debug_assert_eq!(self.frame_info.block_mode, BlockMode::Linked); compress_internal::<_, true, _>( input, self.src_start, &mut vec_sink_for_compression(&mut self.dst, 0, 0, dst_required_size), &mut self.compression_table, &self.src[self.ext_dict_offset..self.ext_dict_offset + self.ext_dict_len], self.src_stream_offset, ) } else { compress_internal::<_, false, _>( input, self.src_start, &mut vec_sink_for_compression(&mut self.dst, 0, 0, dst_required_size), &mut self.compression_table, b"", self.src_stream_offset, ) }; let (block_info, block_data) = match compress_result.map_err(Error::CompressionError)? { comp_len if comp_len < src.len() => { (BlockInfo::Compressed(comp_len as _), &self.dst[..comp_len]) } _ => (BlockInfo::Uncompressed(src.len() as _), src), }; // Write the (un)compressed block to the writer and the block checksum (if applicable). let mut block_info_buffer = [0u8; BLOCK_INFO_SIZE]; block_info.write(&mut block_info_buffer[..])?; self.w.write_all(&block_info_buffer[..])?; self.w.write_all(block_data)?; if self.frame_info.block_checksums { let mut block_hasher = XxHash32::with_seed(0); block_hasher.write(block_data); let block_checksum = block_hasher.finish() as u32; self.w.write_all(&block_checksum.to_le_bytes())?; } // Content checksum, if applicable if self.frame_info.content_checksum { self.content_hasher.write(src); } // Buffer and offsets maintenance self.content_len += src.len() as u64; self.src_start += src.len(); debug_assert_eq!(self.src_start, self.src_end); if self.frame_info.block_mode == BlockMode::Linked { // In linked mode we consume the input (bumping src_start) but leave the // beginning of src to be used as a prefix in subsequent blocks. // That is at least until we have at least `max_block_size + WINDOW_SIZE` // bytes in src, then we setup an ext_dict with the last WINDOW_SIZE bytes // and the input goes to the beginning of src again. debug_assert_eq!(self.src.capacity(), max_block_size * 2 + WINDOW_SIZE); if self.src_start >= max_block_size + WINDOW_SIZE { // The ext_dict will become the last WINDOW_SIZE bytes self.ext_dict_offset = self.src_end - WINDOW_SIZE; self.ext_dict_len = WINDOW_SIZE; // Input goes in the beginning of the buffer again. self.src_stream_offset += self.src_end; self.src_start = 0; self.src_end = 0; } else if self.src_start + self.ext_dict_len > WINDOW_SIZE { // There's more than WINDOW_SIZE bytes of lookback adding the prefix and ext_dict. // Since we have a limited buffer we must shrink ext_dict in favor of the prefix, // so that we can fit up to max_block_size bytes between dst_start and ext_dict // start. let delta = self .ext_dict_len .min(self.src_start + self.ext_dict_len - WINDOW_SIZE); self.ext_dict_offset += delta; self.ext_dict_len -= delta; debug_assert!(self.src_start + self.ext_dict_len >= WINDOW_SIZE) } debug_assert!( self.ext_dict_len == 0 || self.src_start + max_block_size <= self.ext_dict_offset ); } else { // In independent block mode we consume the entire src buffer // which is sized equal to the frame max_block_size. debug_assert_eq!(self.ext_dict_len, 0); debug_assert_eq!(self.src.capacity(), max_block_size); self.src_start = 0; self.src_end = 0; // Advance stream offset so we don't have to reset the match dict // for the next block. self.src_stream_offset += src.len(); } debug_assert!(self.src_start <= self.src_end); debug_assert!(self.src_start + max_block_size <= self.src.capacity()); Ok(()) } } impl io::Write for FrameEncoder { fn write(&mut self, mut buf: &[u8]) -> io::Result { if !self.is_frame_open && !buf.is_empty() { self.begin_frame(buf.len())?; } let buf_len = buf.len(); while !buf.is_empty() { let src_filled = self.src_end - self.src_start; let max_fill_len = self.frame_info.block_size.get_size() - src_filled; if max_fill_len == 0 { // make space by writing next block self.write_block()?; debug_assert_eq!(self.src_end, self.src_start); continue; } let fill_len = max_fill_len.min(buf.len()); vec_copy_overwriting(&mut self.src, self.src_end, &buf[..fill_len]); buf = &buf[fill_len..]; self.src_end += fill_len; } Ok(buf_len) } fn flush(&mut self) -> io::Result<()> { if self.src_start != self.src_end { self.write_block()?; } Ok(()) } } /// A wrapper around an [`FrameEncoder`] that finishes the stream on drop. /// /// This can be created by the [`auto_finish()`] method on the [`FrameEncoder`]. /// /// # Note /// Errors on drop get silently ignored. If you want to handle errors then use [`finish()`] or /// [`try_finish()`] instead. /// /// [`finish()`]: FrameEncoder::finish /// [`try_finish()`]: FrameEncoder::try_finish /// [`auto_finish()`]: FrameEncoder::auto_finish pub struct AutoFinishEncoder { // We wrap this in an option to take it during drop. encoder: Option>, } impl Drop for AutoFinishEncoder { fn drop(&mut self) { if let Some(mut encoder) = self.encoder.take() { let _ = encoder.try_finish(); } } } impl Write for AutoFinishEncoder { fn write(&mut self, buf: &[u8]) -> io::Result { self.encoder.as_mut().unwrap().write(buf) } fn flush(&mut self) -> io::Result<()> { self.encoder.as_mut().unwrap().flush() } } impl fmt::Debug for FrameEncoder { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("FrameEncoder") .field("w", &self.w) .field("frame_info", &self.frame_info) .field("is_frame_open", &self.is_frame_open) .field("content_hasher", &self.content_hasher) .field("content_len", &self.content_len) .field("dst", &"[...]") .field("src", &"[...]") .field("src_start", &self.src_start) .field("src_end", &self.src_end) .field("ext_dict_offset", &self.ext_dict_offset) .field("ext_dict_len", &self.ext_dict_len) .field("src_stream_offset", &self.src_stream_offset) .finish() } } /// Copy `src` into `target` starting from the `start` index, overwriting existing data if any. #[inline] fn vec_copy_overwriting(target: &mut Vec, target_start: usize, src: &[u8]) { debug_assert!(target_start + src.len() <= target.capacity()); // By combining overwriting (copy_from_slice) and extending (extend_from_slice) // we can fill the ring buffer without initializing it (eg. filling with 0). let overwrite_len = (target.len() - target_start).min(src.len()); target[target_start..target_start + overwrite_len].copy_from_slice(&src[..overwrite_len]); target.extend_from_slice(&src[overwrite_len..]); } lz4_flex-0.11.1/src/frame/decompress.rs000064400000000000000000000423271046102023000160360ustar 00000000000000use std::{ convert::TryInto, fmt, hash::Hasher, io::{self, BufRead, ErrorKind}, mem::size_of, }; use twox_hash::XxHash32; use super::header::{ BlockInfo, BlockMode, FrameInfo, LZ4F_LEGACY_MAGIC_NUMBER, MAGIC_NUMBER_SIZE, MAX_FRAME_INFO_SIZE, MIN_FRAME_INFO_SIZE, }; use super::Error; use crate::{ block::WINDOW_SIZE, sink::{vec_sink_for_decompression, SliceSink}, }; /// A reader for decompressing the LZ4 frame format /// /// This Decoder wraps any other reader that implements `io::Read`. /// Bytes read will be decompressed according to the [LZ4 frame format]( /// https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md). /// /// # Example 1 /// Deserializing json values out of a compressed file. /// /// ```no_run /// let compressed_input = std::fs::File::open("datafile").unwrap(); /// let mut decompressed_input = lz4_flex::frame::FrameDecoder::new(compressed_input); /// let json: serde_json::Value = serde_json::from_reader(decompressed_input).unwrap(); /// ``` /// /// # Example /// Deserializing multiple json values out of a compressed file /// /// ```no_run /// let compressed_input = std::fs::File::open("datafile").unwrap(); /// let mut decompressed_input = lz4_flex::frame::FrameDecoder::new(compressed_input); /// loop { /// match serde_json::from_reader::<_, serde_json::Value>(&mut decompressed_input) { /// Ok(json) => { println!("json {:?}", json); } /// Err(e) if e.is_eof() => break, /// Err(e) => panic!("{}", e), /// } /// } /// ``` pub struct FrameDecoder { /// The underlying reader. r: R, /// The FrameInfo of the frame currently being decoded. /// It starts as `None` and is filled with the FrameInfo is read from the input. /// It's reset to `None` once the frame EndMarker is read from the input. current_frame_info: Option, /// Xxhash32 used when content checksum is enabled. content_hasher: XxHash32, /// Total length of decompressed output for the current frame. content_len: u64, /// The compressed bytes buffer, taken from the underlying reader. src: Vec, /// The decompressed bytes buffer. Bytes are decompressed from src to dst /// before being passed back to the caller. dst: Vec, /// Index into dst and length: starting point of bytes previously output /// that are still part of the decompressor window. ext_dict_offset: usize, ext_dict_len: usize, /// Index into dst: starting point of bytes not yet read by caller. dst_start: usize, /// Index into dst: ending point of bytes not yet read by caller. dst_end: usize, } impl FrameDecoder { /// Creates a new Decoder for the specified reader. pub fn new(rdr: R) -> FrameDecoder { FrameDecoder { r: rdr, src: Default::default(), dst: Default::default(), ext_dict_offset: 0, ext_dict_len: 0, dst_start: 0, dst_end: 0, current_frame_info: None, content_hasher: XxHash32::with_seed(0), content_len: 0, } } /// Gets a reference to the underlying reader in this decoder. pub fn get_ref(&self) -> &R { &self.r } /// Gets a mutable reference to the underlying reader in this decoder. /// /// Note that mutation of the stream may result in surprising results if /// this decoder is continued to be used. pub fn get_mut(&mut self) -> &mut R { &mut self.r } /// Consumes the FrameDecoder and returns the underlying reader. pub fn into_inner(self) -> R { self.r } fn read_frame_info(&mut self) -> Result { let mut buffer = [0u8; MAX_FRAME_INFO_SIZE]; match self.r.read(&mut buffer[..MAGIC_NUMBER_SIZE])? { 0 => return Ok(0), MAGIC_NUMBER_SIZE => (), read => self.r.read_exact(&mut buffer[read..MAGIC_NUMBER_SIZE])?, } if u32::from_le_bytes(buffer[0..MAGIC_NUMBER_SIZE].try_into().unwrap()) != LZ4F_LEGACY_MAGIC_NUMBER { match self .r .read(&mut buffer[MAGIC_NUMBER_SIZE..MIN_FRAME_INFO_SIZE])? { 0 => return Ok(0), MIN_FRAME_INFO_SIZE => (), read => self .r .read_exact(&mut buffer[MAGIC_NUMBER_SIZE + read..MIN_FRAME_INFO_SIZE])?, } } let required = FrameInfo::read_size(&buffer[..MIN_FRAME_INFO_SIZE])?; if required != MIN_FRAME_INFO_SIZE && required != MAGIC_NUMBER_SIZE { self.r .read_exact(&mut buffer[MIN_FRAME_INFO_SIZE..required])?; } let frame_info = FrameInfo::read(&buffer[..required])?; if frame_info.dict_id.is_some() { // Unsupported right now so it must be None return Err(Error::DictionaryNotSupported.into()); } let max_block_size = frame_info.block_size.get_size(); let dst_size = if frame_info.block_mode == BlockMode::Linked { // In linked mode we consume the output (bumping dst_start) but leave the // beginning of dst to be used as a prefix in subsequent blocks. // That is at least until we have at least `max_block_size + WINDOW_SIZE` // bytes in dst, then we setup an ext_dict with the last WINDOW_SIZE bytes // and the output goes to the beginning of dst again. // Since we always want to be able to write a full block (up to max_block_size) // we need a buffer with at least `max_block_size * 2 + WINDOW_SIZE` bytes. max_block_size * 2 + WINDOW_SIZE } else { max_block_size }; self.src.clear(); self.dst.clear(); self.src.reserve_exact(max_block_size); self.dst.reserve_exact(dst_size); self.current_frame_info = Some(frame_info); self.content_hasher = XxHash32::with_seed(0); self.content_len = 0; self.ext_dict_len = 0; self.dst_start = 0; self.dst_end = 0; Ok(required) } #[inline] fn read_checksum(r: &mut R) -> Result { let mut checksum_buffer = [0u8; size_of::()]; r.read_exact(&mut checksum_buffer[..])?; let checksum = u32::from_le_bytes(checksum_buffer); Ok(checksum) } #[inline] fn check_block_checksum(data: &[u8], expected_checksum: u32) -> Result<(), io::Error> { let mut block_hasher = XxHash32::with_seed(0); block_hasher.write(data); let calc_checksum = block_hasher.finish() as u32; if calc_checksum != expected_checksum { return Err(Error::BlockChecksumError.into()); } Ok(()) } fn read_block(&mut self) -> io::Result { debug_assert_eq!(self.dst_start, self.dst_end); let frame_info = self.current_frame_info.as_ref().unwrap(); // Adjust dst buffer offsets to decompress the next block let max_block_size = frame_info.block_size.get_size(); if frame_info.block_mode == BlockMode::Linked { // In linked mode we consume the output (bumping dst_start) but leave the // beginning of dst to be used as a prefix in subsequent blocks. // That is at least until we have at least `max_block_size + WINDOW_SIZE` // bytes in dst, then we setup an ext_dict with the last WINDOW_SIZE bytes // and the output goes to the beginning of dst again. debug_assert_eq!(self.dst.capacity(), max_block_size * 2 + WINDOW_SIZE); if self.dst_start + max_block_size > self.dst.capacity() { // Output might not fit in the buffer. // The ext_dict will become the last WINDOW_SIZE bytes debug_assert!(self.dst_start >= max_block_size + WINDOW_SIZE); self.ext_dict_offset = self.dst_start - WINDOW_SIZE; self.ext_dict_len = WINDOW_SIZE; // Output goes in the beginning of the buffer again. self.dst_start = 0; self.dst_end = 0; } else if self.dst_start + self.ext_dict_len > WINDOW_SIZE { // There's more than WINDOW_SIZE bytes of lookback adding the prefix and ext_dict. // Since we have a limited buffer we must shrink ext_dict in favor of the prefix, // so that we can fit up to max_block_size bytes between dst_start and ext_dict // start. let delta = self .ext_dict_len .min(self.dst_start + self.ext_dict_len - WINDOW_SIZE); self.ext_dict_offset += delta; self.ext_dict_len -= delta; debug_assert!(self.dst_start + self.ext_dict_len >= WINDOW_SIZE) } } else { debug_assert_eq!(self.ext_dict_len, 0); debug_assert_eq!(self.dst.capacity(), max_block_size); self.dst_start = 0; self.dst_end = 0; } // Read and decompress block let block_info = { let mut buffer = [0u8; 4]; if let Err(err) = self.r.read_exact(&mut buffer) { if err.kind() == ErrorKind::UnexpectedEof { return Ok(0); } else { return Err(err); } } BlockInfo::read(&buffer)? }; match block_info { BlockInfo::Uncompressed(len) => { let len = len as usize; if len > max_block_size { return Err(Error::BlockTooBig.into()); } // TODO: Attempt to avoid initialization of read buffer when // https://github.com/rust-lang/rust/issues/42788 stabilizes self.r.read_exact(vec_resize_and_get_mut( &mut self.dst, self.dst_start, self.dst_start + len, ))?; if frame_info.block_checksums { let expected_checksum = Self::read_checksum(&mut self.r)?; Self::check_block_checksum( &self.dst[self.dst_start..self.dst_start + len], expected_checksum, )?; } self.dst_end += len; self.content_len += len as u64; } BlockInfo::Compressed(len) => { let len = len as usize; if len > max_block_size { return Err(Error::BlockTooBig.into()); } // TODO: Attempt to avoid initialization of read buffer when // https://github.com/rust-lang/rust/issues/42788 stabilizes self.r .read_exact(vec_resize_and_get_mut(&mut self.src, 0, len))?; if frame_info.block_checksums { let expected_checksum = Self::read_checksum(&mut self.r)?; Self::check_block_checksum(&self.src[..len], expected_checksum)?; } let with_dict_mode = frame_info.block_mode == BlockMode::Linked && self.ext_dict_len != 0; let decomp_size = if with_dict_mode { debug_assert!(self.dst_start + max_block_size <= self.ext_dict_offset); let (head, tail) = self.dst.split_at_mut(self.ext_dict_offset); let ext_dict = &tail[..self.ext_dict_len]; debug_assert!(head.len() - self.dst_start >= max_block_size); crate::block::decompress::decompress_internal::( &self.src[..len], &mut SliceSink::new(head, self.dst_start), ext_dict, ) } else { // Independent blocks OR linked blocks with only prefix data debug_assert!(self.dst.capacity() - self.dst_start >= max_block_size); crate::block::decompress::decompress_internal::( &self.src[..len], &mut vec_sink_for_decompression( &mut self.dst, 0, self.dst_start, self.dst_start + max_block_size, ), b"", ) } .map_err(Error::DecompressionError)?; self.dst_end += decomp_size; self.content_len += decomp_size as u64; } BlockInfo::EndMark => { if let Some(expected) = frame_info.content_size { if self.content_len != expected { return Err(Error::ContentLengthError { expected, actual: self.content_len, } .into()); } } if frame_info.content_checksum { let expected_checksum = Self::read_checksum(&mut self.r)?; let calc_checksum = self.content_hasher.finish() as u32; if calc_checksum != expected_checksum { return Err(Error::ContentChecksumError.into()); } } self.current_frame_info = None; return Ok(0); } } // Content checksum, if applicable if frame_info.content_checksum { self.content_hasher .write(&self.dst[self.dst_start..self.dst_end]); } Ok(self.dst_end - self.dst_start) } fn read_more(&mut self) -> io::Result { if self.current_frame_info.is_none() && self.read_frame_info()? == 0 { return Ok(0); } self.read_block() } } impl io::Read for FrameDecoder { fn read(&mut self, buf: &mut [u8]) -> io::Result { loop { // Fill read buffer if there's uncompressed data left if self.dst_start < self.dst_end { let read_len = std::cmp::min(self.dst_end - self.dst_start, buf.len()); let dst_read_end = self.dst_start + read_len; buf[..read_len].copy_from_slice(&self.dst[self.dst_start..dst_read_end]); self.dst_start = dst_read_end; return Ok(read_len); } if self.read_more()? == 0 { return Ok(0); } } } fn read_to_string(&mut self, buf: &mut String) -> io::Result { let mut written = 0; loop { match self.fill_buf() { Ok(b) if b.is_empty() => return Ok(written), Ok(b) => { let s = std::str::from_utf8(b).map_err(|_| { io::Error::new( io::ErrorKind::InvalidData, "stream did not contain valid UTF-8", ) })?; buf.push_str(s); let len = s.len(); self.consume(len); written += len; } Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => return Err(e), } } } fn read_to_end(&mut self, buf: &mut Vec) -> io::Result { let mut written = 0; loop { match self.fill_buf() { Ok(b) if b.is_empty() => return Ok(written), Ok(b) => { buf.extend_from_slice(b); let len = b.len(); self.consume(len); written += len; } Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue, Err(e) => return Err(e), } } } } impl io::BufRead for FrameDecoder { fn fill_buf(&mut self) -> io::Result<&[u8]> { if self.dst_start == self.dst_end { self.read_more()?; } Ok(&self.dst[self.dst_start..self.dst_end]) } fn consume(&mut self, amt: usize) { assert!(amt <= self.dst_end - self.dst_start); self.dst_start += amt; } } impl fmt::Debug for FrameDecoder { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_struct("FrameDecoder") .field("r", &self.r) .field("content_hasher", &self.content_hasher) .field("content_len", &self.content_len) .field("src", &"[...]") .field("dst", &"[...]") .field("dst_start", &self.dst_start) .field("dst_end", &self.dst_end) .field("ext_dict_offset", &self.ext_dict_offset) .field("ext_dict_len", &self.ext_dict_len) .field("current_frame_info", &self.current_frame_info) .finish() } } /// Similar to `v.get_mut(start..end) but will adjust the len if needed. #[inline] fn vec_resize_and_get_mut(v: &mut Vec, start: usize, end: usize) -> &mut [u8] { if end > v.len() { v.resize(end, 0) } &mut v[start..end] } lz4_flex-0.11.1/src/frame/header.rs000064400000000000000000000323471046102023000151230ustar 00000000000000use twox_hash::XxHash32; use super::Error; use std::{ convert::TryInto, fmt::Debug, hash::Hasher, io, io::{Read, Write}, }; const FLG_RESERVED_MASK: u8 = 0b00000010; const FLG_VERSION_MASK: u8 = 0b11000000; const FLG_SUPPORTED_VERSION_BITS: u8 = 0b01000000; const FLG_INDEPENDENT_BLOCKS: u8 = 0b00100000; const FLG_BLOCK_CHECKSUMS: u8 = 0b00010000; const FLG_CONTENT_SIZE: u8 = 0b00001000; const FLG_CONTENT_CHECKSUM: u8 = 0b00000100; const FLG_DICTIONARY_ID: u8 = 0b00000001; const BD_RESERVED_MASK: u8 = !BD_BLOCK_SIZE_MASK; const BD_BLOCK_SIZE_MASK: u8 = 0b01110000; const BD_BLOCK_SIZE_MASK_RSHIFT: u8 = 4; const BLOCK_UNCOMPRESSED_SIZE_BIT: u32 = 0x80000000; const LZ4F_MAGIC_NUMBER: u32 = 0x184D2204; pub(crate) const LZ4F_LEGACY_MAGIC_NUMBER: u32 = 0x184C2102; const LZ4F_SKIPPABLE_MAGIC_RANGE: std::ops::RangeInclusive = 0x184D2A50..=0x184D2A5F; pub(crate) const MAGIC_NUMBER_SIZE: usize = 4; pub(crate) const MIN_FRAME_INFO_SIZE: usize = 7; pub(crate) const MAX_FRAME_INFO_SIZE: usize = 19; pub(crate) const BLOCK_INFO_SIZE: usize = 4; #[derive(Clone, Copy, PartialEq, Debug)] /// Different predefines blocksizes to choose when compressing data. #[derive(Default)] pub enum BlockSize { /// Will detect optimal frame size based on the size of the first write call #[default] Auto = 0, /// The default block size. Max64KB = 4, /// 256KB block size. Max256KB = 5, /// 1MB block size. Max1MB = 6, /// 4MB block size. Max4MB = 7, /// 8MB block size. Max8MB = 8, } impl BlockSize { /// Try to find optimal size based on passed buffer length. pub(crate) fn from_buf_length(buf_len: usize) -> Self { let mut blocksize = BlockSize::Max4MB; for candidate in [BlockSize::Max256KB, BlockSize::Max64KB] { if buf_len > candidate.get_size() { return blocksize; } blocksize = candidate; } BlockSize::Max64KB } pub(crate) fn get_size(&self) -> usize { match self { BlockSize::Auto => unreachable!(), BlockSize::Max64KB => 64 * 1024, BlockSize::Max256KB => 256 * 1024, BlockSize::Max1MB => 1024 * 1024, BlockSize::Max4MB => 4 * 1024 * 1024, BlockSize::Max8MB => 8 * 1024 * 1024, } } } #[derive(Clone, Copy, PartialEq, Debug)] /// The two `BlockMode` operations that can be set on (`FrameInfo`)[FrameInfo] #[derive(Default)] pub enum BlockMode { /// Every block is compressed independently. The default. #[default] Independent, /// Blocks can reference data from previous blocks. /// /// Effective when the stream contains small blocks. Linked, } // From: https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md // // General Structure of LZ4 Frame format // ------------------------------------- // // | MagicNb | F. Descriptor | Block | (...) | EndMark | C. Checksum | // |:-------:|:-------------:| ----- | ----- | ------- | ----------- | // | 4 bytes | 3-15 bytes | | | 4 bytes | 0-4 bytes | // // Frame Descriptor // ---------------- // // | FLG | BD | (Content Size) | (Dictionary ID) | HC | // | ------- | ------- |:--------------:|:---------------:| ------- | // | 1 byte | 1 byte | 0 - 8 bytes | 0 - 4 bytes | 1 byte | // // __FLG byte__ // // | BitNb | 7-6 | 5 | 4 | 3 | 2 | 1 | 0 | // | ------- |-------|-------|----------|------|----------|----------|------| // |FieldName|Version|B.Indep|B.Checksum|C.Size|C.Checksum|*Reserved*|DictID| // // __BD byte__ // // | BitNb | 7 | 6-5-4 | 3-2-1-0 | // | ------- | -------- | ------------- | -------- | // |FieldName|*Reserved*| Block MaxSize |*Reserved*| // // Data Blocks // ----------- // // | Block Size | data | (Block Checksum) | // |:----------:| ------ |:----------------:| // | 4 bytes | | 0 - 4 bytes | // #[derive(Debug, Default, Clone)] /// The metadata for de/compressing with lz4 frame format. pub struct FrameInfo { /// If set, includes the total uncompressed size of data in the frame. pub content_size: Option, /// The identifier for the dictionary that must be used to correctly decode data. /// The compressor and the decompressor must use exactly the same dictionary. /// /// Note that this is currently unsupported and for this reason it's not pub. pub(crate) dict_id: Option, /// The maximum uncompressed size of each data block. pub block_size: BlockSize, /// The block mode. pub block_mode: BlockMode, /// If set, includes a checksum for each data block in the frame. pub block_checksums: bool, /// If set, includes a content checksum to verify that the full frame contents have been /// decoded correctly. pub content_checksum: bool, /// If set, use the legacy frame format pub legacy_frame: bool, } impl FrameInfo { /// Create a new `FrameInfo`. pub fn new() -> Self { Self::default() } /// Whether to include the total uncompressed size of data in the frame. pub fn content_size(mut self, content_size: Option) -> Self { self.content_size = content_size; self } /// The maximum uncompressed size of each data block. pub fn block_size(mut self, block_size: BlockSize) -> Self { self.block_size = block_size; self } /// The block mode. pub fn block_mode(mut self, block_mode: BlockMode) -> Self { self.block_mode = block_mode; self } /// If set, includes a checksum for each data block in the frame. pub fn block_checksums(mut self, block_checksums: bool) -> Self { self.block_checksums = block_checksums; self } /// If set, includes a content checksum to verify that the full frame contents have been /// decoded correctly. pub fn content_checksum(mut self, content_checksum: bool) -> Self { self.content_checksum = content_checksum; self } /// If set, use the legacy frame format. pub fn legacy_frame(mut self, legacy_frame: bool) -> Self { self.legacy_frame = legacy_frame; self } pub(crate) fn read_size(input: &[u8]) -> Result { let mut required = MIN_FRAME_INFO_SIZE; let magic_num = u32::from_le_bytes(input[0..4].try_into().unwrap()); if magic_num == LZ4F_LEGACY_MAGIC_NUMBER { return Ok(MAGIC_NUMBER_SIZE); } if input.len() < required { return Ok(required); } if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) { return Ok(8); } if magic_num != LZ4F_MAGIC_NUMBER { return Err(Error::WrongMagicNumber); } if input[4] & FLG_CONTENT_SIZE != 0 { required += 8; } if input[4] & FLG_DICTIONARY_ID != 0 { required += 4 } Ok(required) } pub(crate) fn write_size(&self) -> usize { let mut required = MIN_FRAME_INFO_SIZE; if self.content_size.is_some() { required += 8; } if self.dict_id.is_some() { required += 4; } required } pub(crate) fn write(&self, output: &mut [u8]) -> Result { let write_size = self.write_size(); if output.len() < write_size { return Err(Error::IoError(io::ErrorKind::UnexpectedEof.into())); } let mut buffer = [0u8; MAX_FRAME_INFO_SIZE]; assert!(write_size <= buffer.len()); buffer[0..4].copy_from_slice(&LZ4F_MAGIC_NUMBER.to_le_bytes()); buffer[4] = FLG_SUPPORTED_VERSION_BITS; if self.block_checksums { buffer[4] |= FLG_BLOCK_CHECKSUMS; } if self.content_checksum { buffer[4] |= FLG_CONTENT_CHECKSUM; } if self.block_mode == BlockMode::Independent { buffer[4] |= FLG_INDEPENDENT_BLOCKS; } buffer[5] = (self.block_size as u8) << BD_BLOCK_SIZE_MASK_RSHIFT; // Optional section let mut offset = 6; if let Some(size) = self.content_size { buffer[4] |= FLG_CONTENT_SIZE; buffer[offset..offset + 8].copy_from_slice(&size.to_le_bytes()); offset += 8; } if let Some(dict_id) = self.dict_id { buffer[4] |= FLG_DICTIONARY_ID; buffer[offset..offset + 4].copy_from_slice(&dict_id.to_le_bytes()); offset += 4; } // Header checksum let mut hasher = XxHash32::with_seed(0); hasher.write(&buffer[4..offset]); let header_checksum = (hasher.finish() >> 8) as u8; buffer[offset] = header_checksum; offset += 1; debug_assert_eq!(offset, write_size); output[..write_size].copy_from_slice(&buffer[..write_size]); Ok(write_size) } pub(crate) fn read(mut input: &[u8]) -> Result { let original_input = input; // 4 byte Magic let magic_num = { let mut buffer = [0u8; 4]; input.read_exact(&mut buffer)?; u32::from_le_bytes(buffer) }; if magic_num == LZ4F_LEGACY_MAGIC_NUMBER { return Ok(FrameInfo { block_size: BlockSize::Max8MB, legacy_frame: true, ..FrameInfo::default() }); } if LZ4F_SKIPPABLE_MAGIC_RANGE.contains(&magic_num) { let mut buffer = [0u8; 4]; input.read_exact(&mut buffer)?; let user_data_len = u32::from_le_bytes(buffer); return Err(Error::SkippableFrame(user_data_len)); } if magic_num != LZ4F_MAGIC_NUMBER { return Err(Error::WrongMagicNumber); } // fixed size section let [flg_byte, bd_byte] = { let mut buffer = [0u8, 0]; input.read_exact(&mut buffer)?; buffer }; if flg_byte & FLG_VERSION_MASK != FLG_SUPPORTED_VERSION_BITS { // version is always 01 return Err(Error::UnsupportedVersion(flg_byte & FLG_VERSION_MASK)); } if flg_byte & FLG_RESERVED_MASK != 0 || bd_byte & BD_RESERVED_MASK != 0 { return Err(Error::ReservedBitsSet); } let block_mode = if flg_byte & FLG_INDEPENDENT_BLOCKS != 0 { BlockMode::Independent } else { BlockMode::Linked }; let content_checksum = flg_byte & FLG_CONTENT_CHECKSUM != 0; let block_checksums = flg_byte & FLG_BLOCK_CHECKSUMS != 0; let block_size = match (bd_byte & BD_BLOCK_SIZE_MASK) >> BD_BLOCK_SIZE_MASK_RSHIFT { i @ 0..=3 => return Err(Error::UnsupportedBlocksize(i)), 4 => BlockSize::Max64KB, 5 => BlockSize::Max256KB, 6 => BlockSize::Max1MB, 7 => BlockSize::Max4MB, _ => unreachable!(), }; // var len section let mut content_size = None; if flg_byte & FLG_CONTENT_SIZE != 0 { let mut buffer = [0u8; 8]; input.read_exact(&mut buffer).unwrap(); content_size = Some(u64::from_le_bytes(buffer)); } let mut dict_id = None; if flg_byte & FLG_DICTIONARY_ID != 0 { let mut buffer = [0u8; 4]; input.read_exact(&mut buffer)?; dict_id = Some(u32::from_le_bytes(buffer)); } // 1 byte header checksum let expected_checksum = { let mut buffer = [0u8; 1]; input.read_exact(&mut buffer)?; buffer[0] }; let mut hasher = XxHash32::with_seed(0); hasher.write(&original_input[4..original_input.len() - input.len() - 1]); let header_hash = (hasher.finish() >> 8) as u8; if header_hash != expected_checksum { return Err(Error::HeaderChecksumError); } Ok(FrameInfo { content_size, dict_id, block_size, block_mode, block_checksums, content_checksum, legacy_frame: false, }) } } #[derive(Debug)] pub(crate) enum BlockInfo { Compressed(u32), Uncompressed(u32), EndMark, } impl BlockInfo { pub(crate) fn read(mut input: &[u8]) -> Result { let mut size_buffer = [0u8; 4]; input.read_exact(&mut size_buffer)?; let size = u32::from_le_bytes(size_buffer); if size == 0 { Ok(BlockInfo::EndMark) } else if size & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 { Ok(BlockInfo::Uncompressed(size & !BLOCK_UNCOMPRESSED_SIZE_BIT)) } else { Ok(BlockInfo::Compressed(size)) } } pub(crate) fn write(&self, mut output: &mut [u8]) -> Result { let value = match self { BlockInfo::Compressed(len) if *len == 0 => return Err(Error::InvalidBlockInfo), BlockInfo::Compressed(len) | BlockInfo::Uncompressed(len) if *len & BLOCK_UNCOMPRESSED_SIZE_BIT != 0 => { return Err(Error::InvalidBlockInfo) } BlockInfo::Compressed(len) => *len, BlockInfo::Uncompressed(len) => *len | BLOCK_UNCOMPRESSED_SIZE_BIT, BlockInfo::EndMark => 0, }; output.write_all(&value.to_le_bytes())?; Ok(4) } } lz4_flex-0.11.1/src/frame/mod.rs000064400000000000000000000067671046102023000144610ustar 00000000000000//! LZ4 Frame Format //! //! As defined in //! //! # Example: compress data on `stdin` with frame format //! This program reads data from `stdin`, compresses it and emits it to `stdout`. //! This example can be found in `examples/compress.rs`: //! ```no_run //! use std::io; //! let stdin = io::stdin(); //! let stdout = io::stdout(); //! let mut rdr = stdin.lock(); //! // Wrap the stdout writer in a LZ4 Frame writer. //! let mut wtr = lz4_flex::frame::FrameEncoder::new(stdout.lock()); //! io::copy(&mut rdr, &mut wtr).expect("I/O operation failed"); //! wtr.finish().unwrap(); //! ``` //! use std::{fmt, io}; #[cfg_attr(feature = "safe-encode", forbid(unsafe_code))] pub(crate) mod compress; #[cfg_attr(feature = "safe-decode", forbid(unsafe_code))] pub(crate) mod decompress; pub(crate) mod header; pub use compress::{AutoFinishEncoder, FrameEncoder}; pub use decompress::FrameDecoder; pub use header::{BlockMode, BlockSize, FrameInfo}; #[derive(Debug)] #[non_exhaustive] /// Errors that can occur when de/compressing lz4. pub enum Error { /// Compression error. CompressionError(crate::block::CompressError), /// Decompression error. DecompressionError(crate::block::DecompressError), /// An io::Error was encountered. IoError(io::Error), /// Unsupported block size. UnsupportedBlocksize(u8), /// Unsupported frame version. UnsupportedVersion(u8), /// Wrong magic number for the LZ4 frame format. WrongMagicNumber, /// Reserved bits set. ReservedBitsSet, /// Block header is malformed. InvalidBlockInfo, /// Read a block larger than specified in the Frame header. BlockTooBig, /// The Frame header checksum doesn't match. HeaderChecksumError, /// The block checksum doesn't match. BlockChecksumError, /// The content checksum doesn't match. ContentChecksumError, /// Read an skippable frame. /// The caller may read the specified amount of bytes from the underlying io::Read. SkippableFrame(u32), /// External dictionaries are not supported. DictionaryNotSupported, /// Content length differs. ContentLengthError { /// Expected content length. expected: u64, /// Actual content lenght. actual: u64, }, } impl From for io::Error { fn from(e: Error) -> Self { match e { Error::IoError(e) => e, Error::CompressionError(_) | Error::DecompressionError(_) | Error::SkippableFrame(_) | Error::DictionaryNotSupported => io::Error::new(io::ErrorKind::Other, e), Error::WrongMagicNumber | Error::UnsupportedBlocksize(..) | Error::UnsupportedVersion(..) | Error::ReservedBitsSet | Error::InvalidBlockInfo | Error::BlockTooBig | Error::HeaderChecksumError | Error::ContentChecksumError | Error::BlockChecksumError | Error::ContentLengthError { .. } => io::Error::new(io::ErrorKind::InvalidData, e), } } } impl From for Error { fn from(e: io::Error) -> Self { match e.get_ref().map(|e| e.downcast_ref::()) { Some(_) => *e.into_inner().unwrap().downcast::().unwrap(), None => Error::IoError(e), } } } impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> std::fmt::Result { write!(f, "{self:?}") } } impl std::error::Error for Error {} lz4_flex-0.11.1/src/lib.rs000064400000000000000000000077451046102023000133530ustar 00000000000000//! Pure Rust, high performance implementation of LZ4 compression. //! //! A detailed explanation of the algorithm can be found [here](http://ticki.github.io/blog/how-lz4-works/). //! //! # Overview //! //! This crate provides two ways to use lz4. The first way is through the //! [`frame::FrameDecoder`](frame/struct.FrameDecoder.html) //! and //! [`frame::FrameEncoder`](frame/struct.FrameEncoder.html) //! types, which implement the `std::io::Read` and `std::io::Write` traits with the //! lz4 frame format. Unless you have a specific reason to the contrary, you //! should only use the lz4 frame format. Specifically, the lz4 frame format //! permits streaming compression or decompression. //! //! The second way is through the //! [`decompress_size_prepended`](fn.decompress_size_prepended.html) //! and //! [`compress_prepend_size`](fn.compress_prepend_size.html) //! functions. These functions provide access to the lz4 block format, and //! don't support a streaming interface directly. You should only use these types //! if you know you specifically need the lz4 block format. //! //! # Example: compress data on `stdin` with frame format //! This program reads data from `stdin`, compresses it and emits it to `stdout`. //! This example can be found in `examples/compress.rs`: //! ```no_run //! use std::io; //! let stdin = io::stdin(); //! let stdout = io::stdout(); //! let mut rdr = stdin.lock(); //! // Wrap the stdout writer in a LZ4 Frame writer. //! let mut wtr = lz4_flex::frame::FrameEncoder::new(stdout.lock()); //! io::copy(&mut rdr, &mut wtr).expect("I/O operation failed"); //! wtr.finish().unwrap(); //! ``` //! # Example: decompress data on `stdin` with frame format //! This program reads data from `stdin`, decompresses it and emits it to `stdout`. //! This example can be found in `examples/decompress.rs`: //! ```no_run //! use std::io; //! let stdin = io::stdin(); //! let stdout = io::stdout(); //! // Wrap the stdin reader in a LZ4 FrameDecoder. //! let mut rdr = lz4_flex::frame::FrameDecoder::new(stdin.lock()); //! let mut wtr = stdout.lock(); //! io::copy(&mut rdr, &mut wtr).expect("I/O operation failed"); //! ``` //! //! # Example: block format roundtrip //! ``` //! use lz4_flex::block::{compress_prepend_size, decompress_size_prepended}; //! let input: &[u8] = b"Hello people, what's up?"; //! let compressed = compress_prepend_size(input); //! let uncompressed = decompress_size_prepended(&compressed).unwrap(); //! assert_eq!(input, uncompressed); //! ``` //! //! ## Feature Flags //! //! - `safe-encode` uses only safe rust for encode. _enabled by default_ //! - `safe-decode` uses only safe rust for encode. _enabled by default_ //! - `frame` support for LZ4 frame format. _implies `std`, enabled by default_ //! - `std` enables dependency on the standard library. _enabled by default_ //! //! For maximum performance use `no-default-features`. //! //! For no_std support only the [`block format`](block/index.html) is supported. //! //! #![deny(warnings)] #![deny(missing_docs)] #![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(docsrs, feature(doc_cfg))] #![cfg_attr(nightly, feature(optimize_attribute))] #[cfg_attr(test, macro_use)] extern crate alloc; #[cfg(test)] #[macro_use] extern crate more_asserts; pub mod block; #[cfg(feature = "frame")] #[cfg_attr(docsrs, doc(cfg(feature = "frame")))] pub mod frame; #[allow(dead_code)] mod fastcpy; #[allow(dead_code)] mod fastcpy_unsafe; #[deprecated( since = "0.11.0", note = "This re-export is deprecated as it can be confused with the frame API and is not suitable for very large data, use block:: instead" )] pub use block::{compress, compress_into, compress_prepend_size}; #[deprecated( since = "0.11.0", note = "This re-export is deprecated as it can be confused with the frame API and is not suitable for very large data, use block:: instead" )] pub use block::{decompress, decompress_into, decompress_size_prepended}; #[cfg_attr( all(feature = "safe-encode", feature = "safe-decode"), forbid(unsafe_code) )] pub(crate) mod sink; lz4_flex-0.11.1/src/sink.rs000064400000000000000000000235351046102023000135440ustar 00000000000000#[allow(unused_imports)] use alloc::vec::Vec; use crate::fastcpy::slice_copy; /// Returns a Sink implementation appropriate for outputing up to `required_capacity` /// bytes at `vec[offset..offset+required_capacity]`. /// It can be either a `SliceSink` (pre-filling the vec with zeroes if necessary) /// when the `safe-decode` feature is enabled, or `VecSink` otherwise. /// The argument `pos` defines the initial output position in the Sink. #[inline] #[cfg(feature = "frame")] pub fn vec_sink_for_compression( vec: &mut Vec, offset: usize, pos: usize, required_capacity: usize, ) -> SliceSink { return { vec.resize(offset + required_capacity, 0); SliceSink::new(&mut vec[offset..], pos) }; } /// Returns a Sink implementation appropriate for outputing up to `required_capacity` /// bytes at `vec[offset..offset+required_capacity]`. /// It can be either a `SliceSink` (pre-filling the vec with zeroes if necessary) /// when the `safe-decode` feature is enabled, or `VecSink` otherwise. /// The argument `pos` defines the initial output position in the Sink. #[cfg(feature = "frame")] #[inline] pub fn vec_sink_for_decompression( vec: &mut Vec, offset: usize, pos: usize, required_capacity: usize, ) -> SliceSink { return { vec.resize(offset + required_capacity, 0); SliceSink::new(&mut vec[offset..], pos) }; } pub trait Sink { /// Returns a raw ptr to the first unfilled byte of the Sink. Analogous to `[pos..].as_ptr()`. #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] unsafe fn pos_mut_ptr(&mut self) -> *mut u8; /// read byte at position fn byte_at(&mut self, pos: usize) -> u8; /// Pushes a byte to the end of the Sink. #[cfg(feature = "safe-encode")] fn push(&mut self, byte: u8); #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] unsafe fn base_mut_ptr(&mut self) -> *mut u8; fn pos(&self) -> usize; fn capacity(&self) -> usize; #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] unsafe fn set_pos(&mut self, new_pos: usize); #[cfg(feature = "safe-decode")] fn extend_with_fill(&mut self, byte: u8, len: usize); /// Extends the Sink with `data`. fn extend_from_slice(&mut self, data: &[u8]); fn extend_from_slice_wild(&mut self, data: &[u8], copy_len: usize); /// Copies `len` bytes starting from `start` to the end of the Sink. /// # Panics /// Panics if `start` >= `pos`. #[cfg(feature = "safe-decode")] fn extend_from_within(&mut self, start: usize, wild_len: usize, copy_len: usize); #[cfg(feature = "safe-decode")] fn extend_from_within_overlapping(&mut self, start: usize, num_bytes: usize); } /// SliceSink is used as target to de/compress data into a preallocated and possibly uninitialized /// `&[u8]` /// space. /// /// # Handling of Capacity /// Extend methods will panic if there's insufficient capacity left in the Sink. /// /// # Invariants /// - Bytes `[..pos()]` are always initialized. pub struct SliceSink<'a> { /// The working slice, which may contain uninitialized bytes output: &'a mut [u8], /// Number of bytes in start of `output` guaranteed to be initialized pos: usize, } impl<'a> SliceSink<'a> { /// Creates a `Sink` backed by the given byte slice. /// `pos` defines the initial output position in the Sink. /// # Panics /// Panics if `pos` is out of bounds. #[inline] pub fn new(output: &'a mut [u8], pos: usize) -> Self { // SAFETY: Caller guarantees that all elements of `output[..pos]` are initialized. let _ = &mut output[..pos]; // bounds check pos SliceSink { output, pos } } } impl<'a> Sink for SliceSink<'a> { /// Returns a raw ptr to the first unfilled byte of the Sink. Analogous to `[pos..].as_ptr()`. #[inline] #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] unsafe fn pos_mut_ptr(&mut self) -> *mut u8 { self.base_mut_ptr().add(self.pos()) as *mut u8 } /// Pushes a byte to the end of the Sink. #[inline] fn byte_at(&mut self, pos: usize) -> u8 { self.output[pos] } /// Pushes a byte to the end of the Sink. #[inline] #[cfg(feature = "safe-encode")] fn push(&mut self, byte: u8) { self.output[self.pos] = byte; self.pos += 1; } #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] unsafe fn base_mut_ptr(&mut self) -> *mut u8 { self.output.as_mut_ptr() } #[inline] fn pos(&self) -> usize { self.pos } #[inline] fn capacity(&self) -> usize { self.output.len() } #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] #[inline] unsafe fn set_pos(&mut self, new_pos: usize) { debug_assert!(new_pos <= self.capacity()); self.pos = new_pos; } #[inline] #[cfg(feature = "safe-decode")] fn extend_with_fill(&mut self, byte: u8, len: usize) { self.output[self.pos..self.pos + len].fill(byte); self.pos += len; } /// Extends the Sink with `data`. #[inline] fn extend_from_slice(&mut self, data: &[u8]) { self.extend_from_slice_wild(data, data.len()) } #[inline] fn extend_from_slice_wild(&mut self, data: &[u8], copy_len: usize) { assert!(copy_len <= data.len()); slice_copy(data, &mut self.output[self.pos..(self.pos) + data.len()]); self.pos += copy_len; } /// Copies `len` bytes starting from `start` to the end of the Sink. /// # Panics /// Panics if `start` >= `pos`. #[inline] #[cfg(feature = "safe-decode")] fn extend_from_within(&mut self, start: usize, wild_len: usize, copy_len: usize) { self.output.copy_within(start..start + wild_len, self.pos); self.pos += copy_len; } #[inline] #[cfg(feature = "safe-decode")] #[cfg_attr(nightly, optimize(size))] // to avoid loop unrolling fn extend_from_within_overlapping(&mut self, start: usize, num_bytes: usize) { let offset = self.pos - start; for i in start + offset..start + offset + num_bytes { self.output[i] = self.output[i - offset]; } self.pos += num_bytes; } } /// PtrSink is used as target to de/compress data into a preallocated and possibly uninitialized /// `&[u8]` /// space. /// /// #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] pub struct PtrSink { /// The working slice, which may contain uninitialized bytes output: *mut u8, /// Number of bytes in start of `output` guaranteed to be initialized pos: usize, /// Number of bytes in output available cap: usize, } #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] impl PtrSink { /// Creates a `Sink` backed by the given byte slice. /// `pos` defines the initial output position in the Sink. /// # Panics /// Panics if `pos` is out of bounds. #[inline] pub fn from_vec(output: &mut Vec, pos: usize) -> Self { // SAFETY: Bytes behind pointer may be uninitialized. Self { output: output.as_mut_ptr(), pos, cap: output.capacity(), } } } #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] impl Sink for PtrSink { /// Returns a raw ptr to the first unfilled byte of the Sink. Analogous to `[pos..].as_ptr()`. #[inline] #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] unsafe fn pos_mut_ptr(&mut self) -> *mut u8 { self.base_mut_ptr().add(self.pos()) as *mut u8 } /// Pushes a byte to the end of the Sink. #[inline] fn byte_at(&mut self, pos: usize) -> u8 { unsafe { self.output.add(pos).read() } } /// Pushes a byte to the end of the Sink. #[inline] #[cfg(feature = "safe-encode")] fn push(&mut self, byte: u8) { unsafe { self.pos_mut_ptr().write(byte); } self.pos += 1; } #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] unsafe fn base_mut_ptr(&mut self) -> *mut u8 { self.output } #[inline] fn pos(&self) -> usize { self.pos } #[inline] fn capacity(&self) -> usize { self.cap } #[cfg(not(all(feature = "safe-encode", feature = "safe-decode")))] #[inline] unsafe fn set_pos(&mut self, new_pos: usize) { debug_assert!(new_pos <= self.capacity()); self.pos = new_pos; } #[inline] #[cfg(feature = "safe-decode")] fn extend_with_fill(&mut self, _byte: u8, _len: usize) { unreachable!(); } /// Extends the Sink with `data`. #[inline] fn extend_from_slice(&mut self, data: &[u8]) { self.extend_from_slice_wild(data, data.len()) } #[inline] fn extend_from_slice_wild(&mut self, data: &[u8], copy_len: usize) { assert!(copy_len <= data.len()); unsafe { core::ptr::copy_nonoverlapping(data.as_ptr(), self.pos_mut_ptr(), copy_len); } self.pos += copy_len; } /// Copies `len` bytes starting from `start` to the end of the Sink. /// # Panics /// Panics if `start` >= `pos`. #[inline] #[cfg(feature = "safe-decode")] fn extend_from_within(&mut self, _start: usize, _wild_len: usize, _copy_len: usize) { unreachable!(); } #[inline] #[cfg(feature = "safe-decode")] fn extend_from_within_overlapping(&mut self, _start: usize, _num_bytes: usize) { unreachable!(); } } #[cfg(test)] mod tests { #[test] #[cfg(any(feature = "safe-encode", feature = "safe-decode"))] fn test_sink_slice() { use crate::sink::Sink; use crate::sink::SliceSink; use alloc::vec::Vec; let mut data = Vec::new(); data.resize(5, 0); let sink = SliceSink::new(&mut data, 1); assert_eq!(sink.pos(), 1); assert_eq!(sink.capacity(), 5); } }