lzma-rs-0.3.0/.cargo_vcs_info.json0000644000000001360000000000100124210ustar { "git": { "sha1": "da82bd1182993a07db035e285a8f8a87d32a30c2" }, "path_in_vcs": "" }lzma-rs-0.3.0/.gitignore000064400000000000000000000000511046102023000131750ustar 00000000000000/target/ **/*.rs.bk Cargo.lock .DS_Store lzma-rs-0.3.0/CHANGELOG.md000064400000000000000000000056111046102023000130250ustar 00000000000000## 0.3.0 - 2023-01-04 - Update minimum supported Rust version: 1.40.0 -> 1.50.0. - Update dependencies (https://github.com/gendx/lzma-rs/pull/78): - `byteorder`: ^1.0.0 -> 1.4.3 - `crc`: ^1.0.0 -> 3.0.0 - `log`: ^0.4.14 -> 0.4.17 - `env_logger`: ^0.8.3 -> 0.9.0 - Expose a new `raw_decoder` API (https://github.com/gendx/lzma-rs/pull/74). - Reduce the number of allocations (https://github.com/gendx/lzma-rs/pull/77). - Display features on rustdoc (https://github.com/gendx/lzma-rs/pull/70). - Configure formatting style to `imports_granularity = "Module"` (https://github.com/gendx/lzma-rs/pull/82). - Add code coverage reporting (https://github.com/gendx/lzma-rs/pull/86). ## 0.2.0 - 2021-05-02 - Update minimum supported Rust version: 1.32.0 -> 1.40.0. - Update dependencies: - `log`: ^0.4.8 -> ^0.4.14 - `env_logger`: 0.7.1 -> ^0.8.3 - [Breaking change] Rename acronyms to be lowercase, following clippy::upper-case-acronyms. - [Breaking change] Add a memory limit option (https://github.com/gendx/lzma-rs/pull/50). - Fix bug in LZMA2 decompression (https://github.com/gendx/lzma-rs/pull/61). - Fix bug in CRC32 validation (https://github.com/gendx/lzma-rs/pull/56). - Add a streaming mode for LZMA decompression, gated by the `stream` feature. - Add more fuzzing targets, including comparison with the `xz2` crate. - Various improvements: benchmarks, fix lint warnings. - Migrate from Travis-CI to GitHub Actions. ## 0.1.4 - 2021-05-02 - Backports from 0.2.0: - Fix bug in LZMA2 decompression (https://github.com/gendx/lzma-rs/pull/61). - Fix bug in CRC32 validation (https://github.com/gendx/lzma-rs/pull/56). ## 0.1.3 - 2020-05-05 - Minimum supported Rust version: 1.32.0. - Update dependencies: - `log`: ^0.4.0 -> ^0.4.8 - `env_logger`: 0.6.0 -> ^0.7.1 - Gate logging behind an opt-in feature. This improves decoding performance by ~25% (https://github.com/gendx/lzma-rs/pull/31). - Lazily allocate the circular buffer (https://github.com/gendx/lzma-rs/pull/22). This improves memory usage (especially for WebAssembly targets) at the expense of a ~5% performance regression (https://github.com/gendx/lzma-rs/issues/27). - Return an error instead of panicking on unsupported SHA-256 checksum for XZ decoding (https://github.com/gendx/lzma-rs/pull/40). - Add Clippy to CI. - Document public APIs. - Deny missing docs, missing Debug implementations and build warnings. - Forbid unsafe code. - Remove extern statements that are unnecessary on the 2018 edition. ## 0.1.2 - 2019-12-17 - Fix bug in the range coder (https://github.com/gendx/lzma-rs/issues/15). - Add support for specifying the unpacked size outside of the header (https://github.com/gendx/lzma-rs/pull/17). - Migrate to Rust 2018 edition. - Add benchmarks. - Fix some Clippy warnings. ## 0.1.1 - 2019-02-24 - Upgrade `env_logger` dependency. - Refactoring to use `std::io::Take`, operator `?`. ## 0.1.0 - 2018-01-07 - Initial release. lzma-rs-0.3.0/Cargo.toml0000644000000025470000000000100104270ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "lzma-rs" version = "0.3.0" authors = ["Guillaume Endignoux "] exclude = [ "tests/*", "benches/*", "fuzz/*", ".github/*", "Cargo.lock", ] description = "A codec for LZMA, LZMA2 and XZ written in pure Rust" readme = "README.md" keywords = [ "lzma", "compression", "decompression", ] categories = ["compression"] license = "MIT" repository = "https://github.com/gendx/lzma-rs" [package.metadata.docs.rs] features = [ "stream", "raw_decoder", ] rustdoc-args = [ "--cfg", "docsrs", ] [dependencies.byteorder] version = "1.4.3" [dependencies.crc] version = "3.0.0" [dependencies.env_logger] version = "0.9.0" optional = true [dependencies.log] version = "0.4.17" optional = true [dev-dependencies.rust-lzma] version = "0.5" [features] enable_logging = [ "env_logger", "log", ] raw_decoder = [] stream = [] lzma-rs-0.3.0/Cargo.toml.orig000064400000000000000000000014231046102023000141000ustar 00000000000000[package] name = "lzma-rs" description = "A codec for LZMA, LZMA2 and XZ written in pure Rust" version = "0.3.0" license = "MIT" authors = ["Guillaume Endignoux "] repository = "https://github.com/gendx/lzma-rs" readme = "README.md" categories = ["compression"] keywords = ["lzma", "compression", "decompression"] exclude = ["tests/*", "benches/*", "fuzz/*", ".github/*", "Cargo.lock"] edition = "2018" [dependencies] byteorder = "1.4.3" crc = "3.0.0" log = { version = "0.4.17", optional = true } env_logger = { version = "0.9.0", optional = true } [dev-dependencies] rust-lzma = "0.5" [features] enable_logging = ["env_logger", "log"] stream = [] raw_decoder = [] [package.metadata.docs.rs] features = ["stream", "raw_decoder"] rustdoc-args = ["--cfg", "docsrs"] lzma-rs-0.3.0/LICENSE000064400000000000000000000020741046102023000122210ustar 00000000000000MIT License Copyright (c) 2017 - 2018 Guillaume Endignoux Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. lzma-rs-0.3.0/README.md000064400000000000000000000030351046102023000124710ustar 00000000000000# lzma-rs [![Crate](https://img.shields.io/crates/v/lzma-rs.svg)](https://crates.io/crates/lzma-rs) [![Documentation](https://docs.rs/lzma-rs/badge.svg)](https://docs.rs/lzma-rs) [![Safety Dance](https://img.shields.io/badge/unsafe-forbidden-success.svg)](https://github.com/rust-secure-code/safety-dance/) ![Build Status](https://github.com/gendx/lzma-rs/workflows/Build%20and%20run%20tests/badge.svg) [![Minimum rust 1.50](https://img.shields.io/badge/rust-1.50%2B-orange.svg)](https://github.com/rust-lang/rust/blob/master/RELEASES.md#version-1500-2021-02-11) [![Codecov](https://codecov.io/gh/gendx/lzma-rs/branch/master/graph/badge.svg?token=HVo74E0wzh)](https://codecov.io/gh/gendx/lzma-rs) This project is a decoder for LZMA and its variants written in pure Rust, with focus on clarity. It already supports LZMA, LZMA2 and a subset of the `.xz` file format. ## Usage Decompress a `.xz` file. ```rust let filename = "foo.xz"; let mut f = std::io::BufReader::new(std::fs::File::open(filename).unwrap()); // "decomp" can be anything that implements "std::io::Write" let mut decomp: Vec = Vec::new(); lzma_rs::xz_decompress(&mut f, &mut decomp).unwrap(); // Decompressed content is now in "decomp" ``` ## Encoder For now, there is also a dumb encoder that only uses byte literals, with many hard-coded constants for code simplicity. Better encoders are welcome! ## Contributing Pull-requests are welcome, to improve the decoder, add better encoders, or more tests. Ultimately, this project should also implement .xz and .7z files. ## License MIT lzma-rs-0.3.0/rustfmt.toml000064400000000000000000000000371046102023000136120ustar 00000000000000imports_granularity = "Module" lzma-rs-0.3.0/src/decode/lzbuffer.rs000064400000000000000000000202711046102023000154120ustar 00000000000000use crate::error; use std::io; pub trait LzBuffer where W: io::Write, { fn len(&self) -> usize; // Retrieve the last byte or return a default fn last_or(&self, lit: u8) -> u8; // Retrieve the n-th last byte fn last_n(&self, dist: usize) -> error::Result; // Append a literal fn append_literal(&mut self, lit: u8) -> error::Result<()>; // Fetch an LZ sequence (length, distance) from inside the buffer fn append_lz(&mut self, len: usize, dist: usize) -> error::Result<()>; // Get a reference to the output sink fn get_output(&self) -> &W; // Get a mutable reference to the output sink fn get_output_mut(&mut self) -> &mut W; // Consumes this buffer and flushes any data fn finish(self) -> io::Result; // Consumes this buffer without flushing any data fn into_output(self) -> W; } // An accumulating buffer for LZ sequences pub struct LzAccumBuffer where W: io::Write, { stream: W, // Output sink buf: Vec, // Buffer memlimit: usize, // Buffer memory limit len: usize, // Total number of bytes sent through the buffer } impl LzAccumBuffer where W: io::Write, { pub fn from_stream(stream: W, memlimit: usize) -> Self { Self { stream, buf: Vec::new(), memlimit, len: 0, } } // Append bytes pub fn append_bytes(&mut self, buf: &[u8]) { self.buf.extend_from_slice(buf); self.len += buf.len(); } // Reset the internal dictionary pub fn reset(&mut self) -> io::Result<()> { self.stream.write_all(self.buf.as_slice())?; self.buf.clear(); self.len = 0; Ok(()) } } impl LzBuffer for LzAccumBuffer where W: io::Write, { fn len(&self) -> usize { self.len } // Retrieve the last byte or return a default fn last_or(&self, lit: u8) -> u8 { let buf_len = self.buf.len(); if buf_len == 0 { lit } else { self.buf[buf_len - 1] } } // Retrieve the n-th last byte fn last_n(&self, dist: usize) -> error::Result { let buf_len = self.buf.len(); if dist > buf_len { return Err(error::Error::LzmaError(format!( "Match distance {} is beyond output size {}", dist, buf_len ))); } Ok(self.buf[buf_len - dist]) } // Append a literal fn append_literal(&mut self, lit: u8) -> error::Result<()> { let new_len = self.len + 1; if new_len > self.memlimit { Err(error::Error::LzmaError(format!( "exceeded memory limit of {}", self.memlimit ))) } else { self.buf.push(lit); self.len = new_len; Ok(()) } } // Fetch an LZ sequence (length, distance) from inside the buffer fn append_lz(&mut self, len: usize, dist: usize) -> error::Result<()> { lzma_debug!("LZ {{ len: {}, dist: {} }}", len, dist); let buf_len = self.buf.len(); if dist > buf_len { return Err(error::Error::LzmaError(format!( "LZ distance {} is beyond output size {}", dist, buf_len ))); } let mut offset = buf_len - dist; for _ in 0..len { let x = self.buf[offset]; self.buf.push(x); offset += 1; } self.len += len; Ok(()) } // Get a reference to the output sink fn get_output(&self) -> &W { &self.stream } // Get a mutable reference to the output sink fn get_output_mut(&mut self) -> &mut W { &mut self.stream } // Consumes this buffer and flushes any data fn finish(mut self) -> io::Result { self.stream.write_all(self.buf.as_slice())?; self.stream.flush()?; Ok(self.stream) } // Consumes this buffer without flushing any data fn into_output(self) -> W { self.stream } } // A circular buffer for LZ sequences pub struct LzCircularBuffer where W: io::Write, { stream: W, // Output sink buf: Vec, // Circular buffer dict_size: usize, // Length of the buffer memlimit: usize, // Buffer memory limit cursor: usize, // Current position len: usize, // Total number of bytes sent through the buffer } impl LzCircularBuffer where W: io::Write, { pub fn from_stream(stream: W, dict_size: usize, memlimit: usize) -> Self { lzma_info!("Dict size in LZ buffer: {}", dict_size); Self { stream, buf: Vec::new(), dict_size, memlimit, cursor: 0, len: 0, } } fn get(&self, index: usize) -> u8 { *self.buf.get(index).unwrap_or(&0) } fn set(&mut self, index: usize, value: u8) -> error::Result<()> { let new_len = index + 1; if self.buf.len() < new_len { if new_len <= self.memlimit { self.buf.resize(new_len, 0); } else { return Err(error::Error::LzmaError(format!( "exceeded memory limit of {}", self.memlimit ))); } } self.buf[index] = value; Ok(()) } } impl LzBuffer for LzCircularBuffer where W: io::Write, { fn len(&self) -> usize { self.len } // Retrieve the last byte or return a default fn last_or(&self, lit: u8) -> u8 { if self.len == 0 { lit } else { self.get((self.dict_size + self.cursor - 1) % self.dict_size) } } // Retrieve the n-th last byte fn last_n(&self, dist: usize) -> error::Result { if dist > self.dict_size { return Err(error::Error::LzmaError(format!( "Match distance {} is beyond dictionary size {}", dist, self.dict_size ))); } if dist > self.len { return Err(error::Error::LzmaError(format!( "Match distance {} is beyond output size {}", dist, self.len ))); } let offset = (self.dict_size + self.cursor - dist) % self.dict_size; Ok(self.get(offset)) } // Append a literal fn append_literal(&mut self, lit: u8) -> error::Result<()> { self.set(self.cursor, lit)?; self.cursor += 1; self.len += 1; // Flush the circular buffer to the output if self.cursor == self.dict_size { self.stream.write_all(self.buf.as_slice())?; self.cursor = 0; } Ok(()) } // Fetch an LZ sequence (length, distance) from inside the buffer fn append_lz(&mut self, len: usize, dist: usize) -> error::Result<()> { lzma_debug!("LZ {{ len: {}, dist: {} }}", len, dist); if dist > self.dict_size { return Err(error::Error::LzmaError(format!( "LZ distance {} is beyond dictionary size {}", dist, self.dict_size ))); } if dist > self.len { return Err(error::Error::LzmaError(format!( "LZ distance {} is beyond output size {}", dist, self.len ))); } let mut offset = (self.dict_size + self.cursor - dist) % self.dict_size; for _ in 0..len { let x = self.get(offset); self.append_literal(x)?; offset += 1; if offset == self.dict_size { offset = 0 } } Ok(()) } // Get a reference to the output sink fn get_output(&self) -> &W { &self.stream } // Get a mutable reference to the output sink fn get_output_mut(&mut self) -> &mut W { &mut self.stream } // Consumes this buffer and flushes any data fn finish(mut self) -> io::Result { if self.cursor > 0 { self.stream.write_all(&self.buf[0..self.cursor])?; self.stream.flush()?; } Ok(self.stream) } // Consumes this buffer without flushing any data fn into_output(self) -> W { self.stream } } lzma-rs-0.3.0/src/decode/lzma.rs000064400000000000000000000535061046102023000145450ustar 00000000000000use crate::decode::lzbuffer::{LzBuffer, LzCircularBuffer}; use crate::decode::rangecoder::{BitTree, LenDecoder, RangeDecoder}; use crate::decompress::{Options, UnpackedSize}; use crate::error; use crate::util::vec2d::Vec2D; use byteorder::{LittleEndian, ReadBytesExt}; use std::io; /// Maximum input data that can be processed in one iteration. /// Libhtp uses the following equation to define the maximum number of bits /// for the worst case scenario: /// log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160 const MAX_REQUIRED_INPUT: usize = 20; /// Processing mode for decompression. /// /// Tells the decompressor if we should expect more data after parsing the /// current input. #[derive(Debug, PartialEq)] enum ProcessingMode { /// Streaming mode. Process the input bytes but assume there will be more /// chunks of input data to receive in future calls to `process_mode()`. Partial, /// Synchronous mode. Process the input bytes and confirm end of stream has been reached. /// Use this mode if you are processing a fixed buffer of compressed data, or after /// using `Mode::Partial` to check for the end of stream. Finish, } /// Result of the next iteration of processing. /// /// Indicates whether processing should continue or is finished. #[derive(Debug, PartialEq)] enum ProcessingStatus { Continue, Finished, } #[derive(Debug, Copy, Clone)] /// LZMA 'lclppb' decompression properties. pub struct LzmaProperties { /// The number of literal context bits. /// /// The most `lc` significant bits of the previous byte are part of the literal context. /// `lc` must not be greater than 8. pub lc: u32, // 0..=8 /// The number of literal position bits. /// /// `lp` must not be greater than 4. pub lp: u32, // 0..=4 /// The number of position bits. /// /// The context for literal/match is plaintext offset modulo `2^pb`. /// `pb` must not be greater than 4. pub pb: u32, // 0..=4 } impl LzmaProperties { /// Assert the validity of the LZMA properties. pub(crate) fn validate(&self) { assert!(self.lc <= 8); assert!(self.lp <= 4); assert!(self.pb <= 4); } } #[derive(Debug, Copy, Clone)] /// LZMA decompression parameters. pub struct LzmaParams { /// The LZMA 'lclppb' decompression properties. pub(crate) properties: LzmaProperties, /// The dictionary size to use when decompressing. pub(crate) dict_size: u32, /// The size of the unpacked data. pub(crate) unpacked_size: Option, } impl LzmaParams { /// Create an new instance of LZMA parameters. #[cfg(feature = "raw_decoder")] pub fn new( properties: LzmaProperties, dict_size: u32, unpacked_size: Option, ) -> LzmaParams { Self { properties, dict_size, unpacked_size, } } /// Read LZMA parameters from the LZMA stream header. pub fn read_header(input: &mut R, options: &Options) -> error::Result where R: io::BufRead, { // Properties let props = input.read_u8().map_err(error::Error::HeaderTooShort)?; let mut pb = props as u32; if pb >= 225 { return Err(error::Error::LzmaError(format!( "LZMA header invalid properties: {} must be < 225", pb ))); } let lc: u32 = pb % 9; pb /= 9; let lp: u32 = pb % 5; pb /= 5; lzma_info!("Properties {{ lc: {}, lp: {}, pb: {} }}", lc, lp, pb); // Dictionary let dict_size_provided = input .read_u32::() .map_err(error::Error::HeaderTooShort)?; let dict_size = if dict_size_provided < 0x1000 { 0x1000 } else { dict_size_provided }; lzma_info!("Dict size: {}", dict_size); // Unpacked size let unpacked_size: Option = match options.unpacked_size { UnpackedSize::ReadFromHeader => { let unpacked_size_provided = input .read_u64::() .map_err(error::Error::HeaderTooShort)?; let marker_mandatory: bool = unpacked_size_provided == 0xFFFF_FFFF_FFFF_FFFF; if marker_mandatory { None } else { Some(unpacked_size_provided) } } UnpackedSize::ReadHeaderButUseProvided(x) => { input .read_u64::() .map_err(error::Error::HeaderTooShort)?; x } UnpackedSize::UseProvided(x) => x, }; lzma_info!("Unpacked size: {:?}", unpacked_size); let params = LzmaParams { properties: LzmaProperties { lc, lp, pb }, dict_size, unpacked_size, }; Ok(params) } } #[derive(Debug)] pub(crate) struct DecoderState { // Buffer input data here if we need more for decompression. Up to // MAX_REQUIRED_INPUT bytes can be consumed during one iteration. partial_input_buf: std::io::Cursor<[u8; MAX_REQUIRED_INPUT]>, pub(crate) lzma_props: LzmaProperties, unpacked_size: Option, literal_probs: Vec2D, pos_slot_decoder: [BitTree; 4], align_decoder: BitTree, pos_decoders: [u16; 115], is_match: [u16; 192], // true = LZ, false = literal is_rep: [u16; 12], is_rep_g0: [u16; 12], is_rep_g1: [u16; 12], is_rep_g2: [u16; 12], is_rep_0long: [u16; 192], state: usize, rep: [usize; 4], len_decoder: LenDecoder, rep_len_decoder: LenDecoder, } impl DecoderState { pub fn new(lzma_props: LzmaProperties, unpacked_size: Option) -> Self { lzma_props.validate(); DecoderState { partial_input_buf: std::io::Cursor::new([0; MAX_REQUIRED_INPUT]), lzma_props, unpacked_size, literal_probs: Vec2D::init(0x400, (1 << (lzma_props.lc + lzma_props.lp), 0x300)), pos_slot_decoder: [ BitTree::new(6), BitTree::new(6), BitTree::new(6), BitTree::new(6), ], align_decoder: BitTree::new(4), pos_decoders: [0x400; 115], is_match: [0x400; 192], is_rep: [0x400; 12], is_rep_g0: [0x400; 12], is_rep_g1: [0x400; 12], is_rep_g2: [0x400; 12], is_rep_0long: [0x400; 192], state: 0, rep: [0; 4], len_decoder: LenDecoder::new(), rep_len_decoder: LenDecoder::new(), } } pub fn reset_state(&mut self, new_props: LzmaProperties) { new_props.validate(); if self.lzma_props.lc + self.lzma_props.lp == new_props.lc + new_props.lp { // We can reset here by filling the existing buffer with 0x400. self.literal_probs.fill(0x400); } else { // We need to reallocate because of the new size of `lc+lp`. self.literal_probs = Vec2D::init(0x400, (1 << (new_props.lc + new_props.lp), 0x300)); } self.lzma_props = new_props; self.pos_slot_decoder.iter_mut().for_each(|t| t.reset()); self.align_decoder.reset(); // For stack-allocated arrays, it was found to be faster to re-create new arrays // dropping the existing one, rather than using `fill` to reset the contents to zero. // Heap-based arrays use fill to keep their allocation rather than reallocate. self.pos_decoders = [0x400; 115]; self.is_match = [0x400; 192]; self.is_rep = [0x400; 12]; self.is_rep_g0 = [0x400; 12]; self.is_rep_g1 = [0x400; 12]; self.is_rep_g2 = [0x400; 12]; self.is_rep_0long = [0x400; 192]; self.state = 0; self.rep = [0; 4]; self.len_decoder.reset(); self.rep_len_decoder.reset(); } pub fn set_unpacked_size(&mut self, unpacked_size: Option) { self.unpacked_size = unpacked_size; } pub fn process<'a, W: io::Write, LZB: LzBuffer, R: io::BufRead>( &mut self, output: &mut LZB, rangecoder: &mut RangeDecoder<'a, R>, ) -> error::Result<()> { self.process_mode(output, rangecoder, ProcessingMode::Finish) } #[cfg(feature = "stream")] pub fn process_stream<'a, W: io::Write, LZB: LzBuffer, R: io::BufRead>( &mut self, output: &mut LZB, rangecoder: &mut RangeDecoder<'a, R>, ) -> error::Result<()> { self.process_mode(output, rangecoder, ProcessingMode::Partial) } /// Process the next iteration of the loop. /// /// If the update flag is true, the decoder's state will be updated. /// /// Returns `ProcessingStatus` to determine whether one should continue /// processing the loop. fn process_next_inner<'a, W: io::Write, LZB: LzBuffer, R: io::BufRead>( &mut self, output: &mut LZB, rangecoder: &mut RangeDecoder<'a, R>, update: bool, ) -> error::Result { let pos_state = output.len() & ((1 << self.lzma_props.pb) - 1); // Literal if !rangecoder.decode_bit( // TODO: assumes pb = 2 ?? &mut self.is_match[(self.state << 4) + pos_state], update, )? { let byte: u8 = self.decode_literal(output, rangecoder, update)?; if update { lzma_debug!("Literal: {}", byte); output.append_literal(byte)?; self.state = if self.state < 4 { 0 } else if self.state < 10 { self.state - 3 } else { self.state - 6 }; } return Ok(ProcessingStatus::Continue); } // LZ let mut len: usize; // Distance is repeated from LRU if rangecoder.decode_bit(&mut self.is_rep[self.state], update)? { // dist = rep[0] if !rangecoder.decode_bit(&mut self.is_rep_g0[self.state], update)? { // len = 1 if !rangecoder.decode_bit( &mut self.is_rep_0long[(self.state << 4) + pos_state], update, )? { // update state (short rep) if update { self.state = if self.state < 7 { 9 } else { 11 }; let dist = self.rep[0] + 1; output.append_lz(1, dist)?; } return Ok(ProcessingStatus::Continue); } // dist = rep[i] } else { let idx: usize; if !rangecoder.decode_bit(&mut self.is_rep_g1[self.state], update)? { idx = 1; } else if !rangecoder.decode_bit(&mut self.is_rep_g2[self.state], update)? { idx = 2; } else { idx = 3; } if update { // Update LRU let dist = self.rep[idx]; for i in (0..idx).rev() { self.rep[i + 1] = self.rep[i]; } self.rep[0] = dist } } len = self.rep_len_decoder.decode(rangecoder, pos_state, update)?; if update { // update state (rep) self.state = if self.state < 7 { 8 } else { 11 }; } // New distance } else { if update { // Update LRU self.rep[3] = self.rep[2]; self.rep[2] = self.rep[1]; self.rep[1] = self.rep[0]; } len = self.len_decoder.decode(rangecoder, pos_state, update)?; if update { // update state (match) self.state = if self.state < 7 { 7 } else { 10 }; } let rep_0 = self.decode_distance(rangecoder, len, update)?; if update { self.rep[0] = rep_0; if self.rep[0] == 0xFFFF_FFFF { if rangecoder.is_finished_ok()? { return Ok(ProcessingStatus::Finished); } return Err(error::Error::LzmaError(String::from( "Found end-of-stream marker but more bytes are available", ))); } } } if update { len += 2; let dist = self.rep[0] + 1; output.append_lz(len, dist)?; } Ok(ProcessingStatus::Continue) } fn process_next<'a, W: io::Write, LZB: LzBuffer, R: io::BufRead>( &mut self, output: &mut LZB, rangecoder: &mut RangeDecoder<'a, R>, ) -> error::Result { self.process_next_inner(output, rangecoder, true) } /// Try to process the next iteration of the loop. /// /// This will check to see if there is enough data to consume and advance the /// decompressor. Needed in streaming mode to avoid corrupting the state while /// processing incomplete chunks of data. fn try_process_next>( &mut self, output: &mut LZB, buf: &[u8], range: u32, code: u32, ) -> error::Result<()> { let mut temp = std::io::Cursor::new(buf); let mut rangecoder = RangeDecoder::from_parts(&mut temp, range, code); let _ = self.process_next_inner(output, &mut rangecoder, false)?; Ok(()) } /// Utility function to read data into the partial input buffer. fn read_partial_input_buf<'a, R: io::BufRead>( &mut self, rangecoder: &mut RangeDecoder<'a, R>, ) -> error::Result<()> { // Fill as much of the tmp buffer as possible let start = self.partial_input_buf.position() as usize; let bytes_read = rangecoder.read_into(&mut self.partial_input_buf.get_mut()[start..])? as u64; self.partial_input_buf .set_position(self.partial_input_buf.position() + bytes_read); Ok(()) } fn process_mode<'a, W: io::Write, LZB: LzBuffer, R: io::BufRead>( &mut self, output: &mut LZB, rangecoder: &mut RangeDecoder<'a, R>, mode: ProcessingMode, ) -> error::Result<()> { loop { if let Some(unpacked_size) = self.unpacked_size { if output.len() as u64 >= unpacked_size { break; } } else if match mode { ProcessingMode::Partial => { rangecoder.is_eof()? && self.partial_input_buf.position() as usize == 0 } ProcessingMode::Finish => { rangecoder.is_finished_ok()? && self.partial_input_buf.position() as usize == 0 } } { break; } if self.partial_input_buf.position() as usize > 0 { self.read_partial_input_buf(rangecoder)?; let tmp = *self.partial_input_buf.get_ref(); // Check if we need more data to advance the decompressor if mode == ProcessingMode::Partial && (self.partial_input_buf.position() as usize) < MAX_REQUIRED_INPUT && self .try_process_next( output, &tmp[..self.partial_input_buf.position() as usize], rangecoder.range, rangecoder.code, ) .is_err() { return Ok(()); } // Run the decompressor on the tmp buffer let mut tmp_reader = io::Cursor::new(&tmp[..self.partial_input_buf.position() as usize]); let mut tmp_rangecoder = RangeDecoder::from_parts(&mut tmp_reader, rangecoder.range, rangecoder.code); let res = self.process_next(output, &mut tmp_rangecoder)?; // Update the actual rangecoder rangecoder.set(tmp_rangecoder.range, tmp_rangecoder.code); // Update tmp buffer let end = self.partial_input_buf.position(); let new_len = end - tmp_reader.position(); self.partial_input_buf.get_mut()[..new_len as usize] .copy_from_slice(&tmp[tmp_reader.position() as usize..end as usize]); self.partial_input_buf.set_position(new_len); if res == ProcessingStatus::Finished { break; }; } else { let buf: &[u8] = rangecoder.stream.fill_buf()?; if mode == ProcessingMode::Partial && buf.len() < MAX_REQUIRED_INPUT && self .try_process_next(output, buf, rangecoder.range, rangecoder.code) .is_err() { return self.read_partial_input_buf(rangecoder); } if self.process_next(output, rangecoder)? == ProcessingStatus::Finished { break; }; } } if let Some(len) = self.unpacked_size { if mode == ProcessingMode::Finish && len != output.len() as u64 { return Err(error::Error::LzmaError(format!( "Expected unpacked size of {} but decompressed to {}", len, output.len() ))); } } Ok(()) } fn decode_literal<'a, W: io::Write, LZB: LzBuffer, R: io::BufRead>( &mut self, output: &mut LZB, rangecoder: &mut RangeDecoder<'a, R>, update: bool, ) -> error::Result { let def_prev_byte = 0u8; let prev_byte = output.last_or(def_prev_byte) as usize; let mut result: usize = 1; let lit_state = ((output.len() & ((1 << self.lzma_props.lp) - 1)) << self.lzma_props.lc) + (prev_byte >> (8 - self.lzma_props.lc)); let probs = &mut self.literal_probs[lit_state]; if self.state >= 7 { let mut match_byte = output.last_n(self.rep[0] + 1)? as usize; while result < 0x100 { let match_bit = (match_byte >> 7) & 1; match_byte <<= 1; let bit = rangecoder .decode_bit(&mut probs[((1 + match_bit) << 8) + result], update)? as usize; result = (result << 1) ^ bit; if match_bit != bit { break; } } } while result < 0x100 { result = (result << 1) ^ (rangecoder.decode_bit(&mut probs[result], update)? as usize); } Ok((result - 0x100) as u8) } fn decode_distance<'a, R: io::BufRead>( &mut self, rangecoder: &mut RangeDecoder<'a, R>, length: usize, update: bool, ) -> error::Result { let len_state = if length > 3 { 3 } else { length }; let pos_slot = self.pos_slot_decoder[len_state].parse(rangecoder, update)? as usize; if pos_slot < 4 { return Ok(pos_slot); } let num_direct_bits = (pos_slot >> 1) - 1; let mut result = (2 ^ (pos_slot & 1)) << num_direct_bits; if pos_slot < 14 { result += rangecoder.parse_reverse_bit_tree( num_direct_bits, &mut self.pos_decoders, result - pos_slot, update, )? as usize; } else { result += (rangecoder.get(num_direct_bits - 4)? as usize) << 4; result += self.align_decoder.parse_reverse(rangecoder, update)? as usize; } Ok(result) } } #[derive(Debug)] /// Raw decoder for LZMA. pub struct LzmaDecoder { params: LzmaParams, memlimit: usize, state: DecoderState, } impl LzmaDecoder { /// Creates a new object ready for decompressing data that it's given for the input /// dict size, expected unpacked data size, and memory limit for the internal buffer. pub fn new(params: LzmaParams, memlimit: Option) -> error::Result { Ok(Self { params, memlimit: memlimit.unwrap_or(usize::MAX), state: DecoderState::new(params.properties, params.unpacked_size), }) } /// Performs the equivalent of replacing this decompression state with a freshly allocated copy. /// /// Because the decoder state is reset, the unpacked size may optionally be re-specified. If `None` /// is given, the previous unpacked size that the decoder was initialized with remains unchanged. /// /// This function may not allocate memory and will attempt to reuse any previously allocated resources. #[cfg(feature = "raw_decoder")] pub fn reset(&mut self, unpacked_size: Option>) { self.state.reset_state(self.params.properties); if let Some(unpacked_size) = unpacked_size { self.state.set_unpacked_size(unpacked_size); } } /// Decompresses the input data into the output, consuming only as much input as needed and writing as much output as possible. pub fn decompress( &mut self, input: &mut R, output: &mut W, ) -> error::Result<()> { let mut output = LzCircularBuffer::from_stream(output, self.params.dict_size as usize, self.memlimit); let mut rangecoder = RangeDecoder::new(input) .map_err(|e| error::Error::LzmaError(format!("LZMA stream too short: {}", e)))?; self.state.process(&mut output, &mut rangecoder)?; output.finish()?; Ok(()) } } lzma-rs-0.3.0/src/decode/lzma2.rs000064400000000000000000000152531046102023000146240ustar 00000000000000use crate::decode::lzbuffer::LzBuffer; use crate::decode::lzma::{DecoderState, LzmaProperties}; use crate::decode::{lzbuffer, rangecoder}; use crate::error; use byteorder::{BigEndian, ReadBytesExt}; use std::io; use std::io::Read; #[derive(Debug)] /// Raw decoder for LZMA2. pub struct Lzma2Decoder { lzma_state: DecoderState, } impl Lzma2Decoder { /// Creates a new object ready for decompressing data that it's given. pub fn new() -> Lzma2Decoder { Lzma2Decoder { lzma_state: DecoderState::new( LzmaProperties { lc: 0, lp: 0, pb: 0, }, None, ), } } /// Performs the equivalent of replacing this decompression state with a freshly allocated copy. /// /// This function may not allocate memory and will attempt to reuse any previously allocated resources. #[cfg(feature = "raw_decoder")] pub fn reset(&mut self) { self.lzma_state.reset_state(LzmaProperties { lc: 0, lp: 0, pb: 0, }); } /// Decompresses the input data into the output, consuming only as much input as needed and writing as much output as possible. pub fn decompress( &mut self, input: &mut R, output: &mut W, ) -> error::Result<()> { let mut accum = lzbuffer::LzAccumBuffer::from_stream(output, usize::MAX); loop { let status = input.read_u8().map_err(|e| { error::Error::LzmaError(format!("LZMA2 expected new status: {}", e)) })?; lzma_info!("LZMA2 status: {}", status); if status == 0 { lzma_info!("LZMA2 end of input"); break; } else if status == 1 { // uncompressed reset dict Self::parse_uncompressed(&mut accum, input, true)?; } else if status == 2 { // uncompressed no reset Self::parse_uncompressed(&mut accum, input, false)?; } else { self.parse_lzma(&mut accum, input, status)?; } } accum.finish()?; Ok(()) } fn parse_lzma( &mut self, accum: &mut lzbuffer::LzAccumBuffer, input: &mut R, status: u8, ) -> error::Result<()> where R: io::BufRead, W: io::Write, { if status & 0x80 == 0 { return Err(error::Error::LzmaError(format!( "LZMA2 invalid status {}, must be 0, 1, 2 or >= 128", status ))); } let reset_dict: bool; let reset_state: bool; let reset_props: bool; match (status >> 5) & 0x3 { 0 => { reset_dict = false; reset_state = false; reset_props = false; } 1 => { reset_dict = false; reset_state = true; reset_props = false; } 2 => { reset_dict = false; reset_state = true; reset_props = true; } 3 => { reset_dict = true; reset_state = true; reset_props = true; } _ => unreachable!(), } let unpacked_size = input .read_u16::() .map_err(|e| error::Error::LzmaError(format!("LZMA2 expected unpacked size: {}", e)))?; let unpacked_size = ((((status & 0x1F) as u64) << 16) | (unpacked_size as u64)) + 1; let packed_size = input .read_u16::() .map_err(|e| error::Error::LzmaError(format!("LZMA2 expected packed size: {}", e)))?; let packed_size = (packed_size as u64) + 1; lzma_info!( "LZMA2 compressed block {{ unpacked_size: {}, packed_size: {}, reset_dict: {}, reset_state: {}, reset_props: {} }}", unpacked_size, packed_size, reset_dict, reset_state, reset_props ); if reset_dict { accum.reset()?; } if reset_state { let new_props = if reset_props { let props = input.read_u8().map_err(|e| { error::Error::LzmaError(format!("LZMA2 expected new properties: {}", e)) })?; let mut pb = props as u32; if pb >= 225 { return Err(error::Error::LzmaError(format!( "LZMA2 invalid properties: {} must be < 225", pb ))); } let lc = pb % 9; pb /= 9; let lp = pb % 5; pb /= 5; if lc + lp > 4 { return Err(error::Error::LzmaError(format!( "LZMA2 invalid properties: lc + lp ({} + {}) must be <= 4", lc, lp ))); } lzma_info!("Properties {{ lc: {}, lp: {}, pb: {} }}", lc, lp, pb); LzmaProperties { lc, lp, pb } } else { self.lzma_state.lzma_props }; self.lzma_state.reset_state(new_props); } self.lzma_state .set_unpacked_size(Some(unpacked_size + accum.len() as u64)); let mut taken = input.take(packed_size); let mut rangecoder = rangecoder::RangeDecoder::new(&mut taken) .map_err(|e| error::Error::LzmaError(format!("LZMA input too short: {}", e)))?; self.lzma_state.process(accum, &mut rangecoder) } fn parse_uncompressed( accum: &mut lzbuffer::LzAccumBuffer, input: &mut R, reset_dict: bool, ) -> error::Result<()> where R: io::BufRead, W: io::Write, { let unpacked_size = input .read_u16::() .map_err(|e| error::Error::LzmaError(format!("LZMA2 expected unpacked size: {}", e)))?; let unpacked_size = (unpacked_size as usize) + 1; lzma_info!( "LZMA2 uncompressed block {{ unpacked_size: {}, reset_dict: {} }}", unpacked_size, reset_dict ); if reset_dict { accum.reset()?; } let mut buf = vec![0; unpacked_size]; input.read_exact(buf.as_mut_slice()).map_err(|e| { error::Error::LzmaError(format!( "LZMA2 expected {} uncompressed bytes: {}", unpacked_size, e )) })?; accum.append_bytes(buf.as_slice()); Ok(()) } } lzma-rs-0.3.0/src/decode/mod.rs000064400000000000000000000002571046102023000143540ustar 00000000000000//! Decoding logic. pub mod lzbuffer; pub mod lzma; pub mod lzma2; pub mod options; pub mod rangecoder; pub mod util; pub mod xz; #[cfg(feature = "stream")] pub mod stream; lzma-rs-0.3.0/src/decode/options.rs000064400000000000000000000045301046102023000152660ustar 00000000000000/// Options to tweak decompression behavior. #[derive(Clone, Copy, Debug, PartialEq, Eq, Default)] pub struct Options { /// Defines whether the unpacked size should be read from the header or provided. /// /// The default is /// [`UnpackedSize::ReadFromHeader`](enum.UnpackedSize.html#variant.ReadFromHeader). pub unpacked_size: UnpackedSize, /// Defines whether the dictionary's dynamic size should be limited during decompression. /// /// The default is unlimited. pub memlimit: Option, /// Determines whether to bypass end of stream validation. /// /// This option only applies to the [`Stream`](struct.Stream.html) API. /// /// The default is false (always do completion check). pub allow_incomplete: bool, } /// Alternatives for defining the unpacked size of the decoded data. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum UnpackedSize { /// Assume that the 8 bytes used to specify the unpacked size are present in the header. /// If the bytes are `0xFFFF_FFFF_FFFF_FFFF`, assume that there is an end-of-payload marker in /// the file. /// If not, read the 8 bytes as a little-endian encoded u64. ReadFromHeader, /// Assume that there are 8 bytes representing the unpacked size present in the header. /// Read it, but ignore it and use the provided value instead. /// If the provided value is `None`, assume that there is an end-of-payload marker in the file. /// Note that this is a non-standard way of reading LZMA data, /// but is used by certain libraries such as /// [OpenCTM](http://openctm.sourceforge.net/). ReadHeaderButUseProvided(Option), /// Assume that the 8 bytes typically used to represent the unpacked size are *not* present in /// the header. Use the provided value. /// If the provided value is `None`, assume that there is an end-of-payload marker in the file. UseProvided(Option), } impl Default for UnpackedSize { fn default() -> UnpackedSize { UnpackedSize::ReadFromHeader } } #[cfg(test)] mod test { use super::*; #[test] fn test_options() { assert_eq!( Options { unpacked_size: UnpackedSize::ReadFromHeader, memlimit: None, allow_incomplete: false, }, Options::default() ); } } lzma-rs-0.3.0/src/decode/rangecoder.rs000064400000000000000000000154611046102023000157110ustar 00000000000000use crate::decode::util; use crate::error; use byteorder::{BigEndian, ReadBytesExt}; use std::io; pub struct RangeDecoder<'a, R> where R: 'a + io::BufRead, { pub stream: &'a mut R, pub range: u32, pub code: u32, } impl<'a, R> RangeDecoder<'a, R> where R: io::BufRead, { pub fn new(stream: &'a mut R) -> io::Result { let mut dec = Self { stream, range: 0xFFFF_FFFF, code: 0, }; let _ = dec.stream.read_u8()?; dec.code = dec.stream.read_u32::()?; lzma_debug!("0 {{ range: {:08x}, code: {:08x} }}", dec.range, dec.code); Ok(dec) } pub fn from_parts(stream: &'a mut R, range: u32, code: u32) -> Self { Self { stream, range, code, } } pub fn set(&mut self, range: u32, code: u32) { self.range = range; self.code = code; } pub fn read_into(&mut self, dst: &mut [u8]) -> io::Result { self.stream.read(dst) } #[inline] pub fn is_finished_ok(&mut self) -> io::Result { Ok(self.code == 0 && self.is_eof()?) } #[inline] pub fn is_eof(&mut self) -> io::Result { util::is_eof(self.stream) } #[inline] fn normalize(&mut self) -> io::Result<()> { lzma_trace!(" {{ range: {:08x}, code: {:08x} }}", self.range, self.code); if self.range < 0x0100_0000 { self.range <<= 8; self.code = (self.code << 8) ^ (self.stream.read_u8()? as u32); lzma_debug!("+ {{ range: {:08x}, code: {:08x} }}", self.range, self.code); } Ok(()) } #[inline] fn get_bit(&mut self) -> error::Result { self.range >>= 1; let bit = self.code >= self.range; if bit { self.code -= self.range } self.normalize()?; Ok(bit) } pub fn get(&mut self, count: usize) -> error::Result { let mut result = 0u32; for _ in 0..count { result = (result << 1) ^ (self.get_bit()? as u32) } Ok(result) } #[inline] pub fn decode_bit(&mut self, prob: &mut u16, update: bool) -> io::Result { let bound: u32 = (self.range >> 11) * (*prob as u32); lzma_trace!( " bound: {:08x}, prob: {:04x}, bit: {}", bound, prob, (self.code > bound) as u8 ); if self.code < bound { if update { *prob += (0x800_u16 - *prob) >> 5; } self.range = bound; self.normalize()?; Ok(false) } else { if update { *prob -= *prob >> 5; } self.code -= bound; self.range -= bound; self.normalize()?; Ok(true) } } fn parse_bit_tree( &mut self, num_bits: usize, probs: &mut [u16], update: bool, ) -> io::Result { let mut tmp: u32 = 1; for _ in 0..num_bits { let bit = self.decode_bit(&mut probs[tmp as usize], update)?; tmp = (tmp << 1) ^ (bit as u32); } Ok(tmp - (1 << num_bits)) } pub fn parse_reverse_bit_tree( &mut self, num_bits: usize, probs: &mut [u16], offset: usize, update: bool, ) -> io::Result { let mut result = 0u32; let mut tmp: usize = 1; for i in 0..num_bits { let bit = self.decode_bit(&mut probs[offset + tmp], update)?; tmp = (tmp << 1) ^ (bit as usize); result ^= (bit as u32) << i; } Ok(result) } } // TODO: parametrize by constant and use [u16; 1 << num_bits] as soon as Rust supports this #[derive(Debug, Clone)] pub struct BitTree { num_bits: usize, probs: Vec, } impl BitTree { pub fn new(num_bits: usize) -> Self { BitTree { num_bits, probs: vec![0x400; 1 << num_bits], } } pub fn parse( &mut self, rangecoder: &mut RangeDecoder, update: bool, ) -> io::Result { rangecoder.parse_bit_tree(self.num_bits, self.probs.as_mut_slice(), update) } pub fn parse_reverse( &mut self, rangecoder: &mut RangeDecoder, update: bool, ) -> io::Result { rangecoder.parse_reverse_bit_tree(self.num_bits, self.probs.as_mut_slice(), 0, update) } pub fn reset(&mut self) { self.probs.fill(0x400); } } #[derive(Debug)] pub struct LenDecoder { choice: u16, choice2: u16, low_coder: [BitTree; 16], mid_coder: [BitTree; 16], high_coder: BitTree, } impl LenDecoder { pub fn new() -> Self { LenDecoder { choice: 0x400, choice2: 0x400, low_coder: [ BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), ], mid_coder: [ BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), BitTree::new(3), ], high_coder: BitTree::new(8), } } pub fn decode( &mut self, rangecoder: &mut RangeDecoder, pos_state: usize, update: bool, ) -> io::Result { if !rangecoder.decode_bit(&mut self.choice, update)? { Ok(self.low_coder[pos_state].parse(rangecoder, update)? as usize) } else if !rangecoder.decode_bit(&mut self.choice2, update)? { Ok(self.mid_coder[pos_state].parse(rangecoder, update)? as usize + 8) } else { Ok(self.high_coder.parse(rangecoder, update)? as usize + 16) } } pub fn reset(&mut self) { self.choice = 0x400; self.choice2 = 0x400; self.low_coder.iter_mut().for_each(|t| t.reset()); self.mid_coder.iter_mut().for_each(|t| t.reset()); self.high_coder.reset(); } } lzma-rs-0.3.0/src/decode/stream.rs000064400000000000000000000440601046102023000150700ustar 00000000000000use crate::decode::lzbuffer::{LzBuffer, LzCircularBuffer}; use crate::decode::lzma::{DecoderState, LzmaParams}; use crate::decode::rangecoder::RangeDecoder; use crate::decompress::Options; use crate::error::Error; use std::fmt::Debug; use std::io::{self, BufRead, Cursor, Read, Write}; /// Minimum header length to be read. /// - props: u8 (1 byte) /// - dict_size: u32 (4 bytes) const MIN_HEADER_LEN: usize = 5; /// Max header length to be read. /// - unpacked_size: u64 (8 bytes) const MAX_HEADER_LEN: usize = MIN_HEADER_LEN + 8; /// Required bytes after the header. /// - ignore: u8 (1 byte) /// - code: u32 (4 bytes) const START_BYTES: usize = 5; /// Maximum number of bytes to buffer while reading the header. const MAX_TMP_LEN: usize = MAX_HEADER_LEN + START_BYTES; /// Internal state of this streaming decoder. This is needed because we have to /// initialize the stream before processing any data. #[derive(Debug)] enum State where W: Write, { /// Stream is initialized but header values have not yet been read. Header(W), /// Header values have been read and the stream is ready to process more data. Data(RunState), } /// Structures needed while decoding data. struct RunState where W: Write, { decoder: DecoderState, range: u32, code: u32, output: LzCircularBuffer, } impl Debug for RunState where W: Write, { fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { fmt.debug_struct("RunState") .field("range", &self.range) .field("code", &self.code) .finish() } } /// Lzma decompressor that can process multiple chunks of data using the /// `io::Write` interface. #[cfg_attr(docsrs, doc(cfg(stream)))] pub struct Stream where W: Write, { /// Temporary buffer to hold data while the header is being read. tmp: Cursor<[u8; MAX_TMP_LEN]>, /// Whether the stream is initialized and ready to process data. /// An `Option` is used to avoid interior mutability when updating the state. state: Option>, /// Options given when a stream is created. options: Options, } impl Stream where W: Write, { /// Initialize the stream. This will consume the `output` which is the sink /// implementing `io::Write` that will receive decompressed bytes. pub fn new(output: W) -> Self { Self::new_with_options(&Options::default(), output) } /// Initialize the stream with the given `options`. This will consume the /// `output` which is the sink implementing `io::Write` that will /// receive decompressed bytes. pub fn new_with_options(options: &Options, output: W) -> Self { Self { tmp: Cursor::new([0; MAX_TMP_LEN]), state: Some(State::Header(output)), options: *options, } } /// Get a reference to the output sink pub fn get_output(&self) -> Option<&W> { self.state.as_ref().map(|state| match state { State::Header(output) => &output, State::Data(state) => state.output.get_output(), }) } /// Get a mutable reference to the output sink pub fn get_output_mut(&mut self) -> Option<&mut W> { self.state.as_mut().map(|state| match state { State::Header(output) => output, State::Data(state) => state.output.get_output_mut(), }) } /// Consumes the stream and returns the output sink. This also makes sure /// we have properly reached the end of the stream. pub fn finish(mut self) -> crate::error::Result { if let Some(state) = self.state.take() { match state { State::Header(output) => { if self.tmp.position() > 0 { Err(Error::LzmaError("failed to read header".to_string())) } else { Ok(output) } } State::Data(mut state) => { if !self.options.allow_incomplete { // Process one last time with empty input to force end of // stream checks let mut stream = Cursor::new(&self.tmp.get_ref()[0..self.tmp.position() as usize]); let mut range_decoder = RangeDecoder::from_parts(&mut stream, state.range, state.code); state .decoder .process(&mut state.output, &mut range_decoder)?; } let output = state.output.finish()?; Ok(output) } } } else { // this will occur if a call to `write()` fails Err(Error::LzmaError( "can't finish stream because of previous write error".to_string(), )) } } /// Attempts to read the header and transition into a running state. /// /// This function will consume the state, returning the next state on both /// error and success. fn read_header( output: W, mut input: &mut R, options: &Options, ) -> crate::error::Result> { match LzmaParams::read_header(&mut input, options) { Ok(params) => { let decoder = DecoderState::new(params.properties, params.unpacked_size); let output = LzCircularBuffer::from_stream( output, params.dict_size as usize, options.memlimit.unwrap_or(usize::MAX), ); // The RangeDecoder is only kept temporarily as we are processing // chunks of data. if let Ok(rangecoder) = RangeDecoder::new(&mut input) { Ok(State::Data(RunState { decoder, output, range: rangecoder.range, code: rangecoder.code, })) } else { // Failed to create a RangeDecoder because we need more data, // try again later. Ok(State::Header(output.into_output())) } } // Failed to read_header() because we need more data, try again later. Err(Error::HeaderTooShort(_)) => Ok(State::Header(output)), // Fatal error. Don't retry. Err(e) => Err(e), } } /// Process compressed data fn read_data(mut state: RunState, mut input: &mut R) -> io::Result> { // Construct our RangeDecoder from the previous range and code // values. let mut rangecoder = RangeDecoder::from_parts(&mut input, state.range, state.code); // Try to process all bytes of data. state .decoder .process_stream(&mut state.output, &mut rangecoder) .map_err(|e| -> io::Error { e.into() })?; Ok(RunState { decoder: state.decoder, output: state.output, range: rangecoder.range, code: rangecoder.code, }) } } impl Debug for Stream where W: Write + Debug, { fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result { fmt.debug_struct("Stream") .field("tmp", &self.tmp.position()) .field("state", &self.state) .field("options", &self.options) .finish() } } impl Write for Stream where W: Write, { fn write(&mut self, data: &[u8]) -> io::Result { let mut input = Cursor::new(data); if let Some(state) = self.state.take() { let state = match state { // Read the header values and transition into a running state. State::Header(state) => { let res = if self.tmp.position() > 0 { // attempt to fill the tmp buffer let position = self.tmp.position(); let bytes_read = input.read(&mut self.tmp.get_mut()[position as usize..])?; let bytes_read = if bytes_read < std::u64::MAX as usize { bytes_read as u64 } else { return Err(io::Error::new( io::ErrorKind::Other, "Failed to convert integer to u64.", )); }; self.tmp.set_position(position + bytes_read); // attempt to read the header from our tmp buffer let (position, res) = { let mut tmp_input = Cursor::new(&self.tmp.get_ref()[0..self.tmp.position() as usize]); let res = Stream::read_header(state, &mut tmp_input, &self.options); (tmp_input.position(), res) }; // discard all bytes up to position if reading the header // was successful if let Ok(State::Data(_)) = &res { let tmp = *self.tmp.get_ref(); let end = self.tmp.position(); let new_len = end - position; (&mut self.tmp.get_mut()[0..new_len as usize]) .copy_from_slice(&tmp[position as usize..end as usize]); self.tmp.set_position(new_len); } res } else { Stream::read_header(state, &mut input, &self.options) }; match res { // occurs when not enough input bytes were provided to // read the entire header Ok(State::Header(val)) => { if self.tmp.position() == 0 { // reset the cursor because we may have partial reads input.set_position(0); let bytes_read = input.read(&mut self.tmp.get_mut()[..])?; let bytes_read = if bytes_read < std::u64::MAX as usize { bytes_read as u64 } else { return Err(io::Error::new( io::ErrorKind::Other, "Failed to convert integer to u64.", )); }; self.tmp.set_position(bytes_read); } State::Header(val) } // occurs when the header was successfully read and we // move on to the next state Ok(State::Data(val)) => State::Data(val), // occurs when the output was consumed due to a // non-recoverable error Err(e) => { return Err(match e { Error::IoError(e) | Error::HeaderTooShort(e) => e, Error::LzmaError(e) | Error::XzError(e) => { io::Error::new(io::ErrorKind::Other, e) } }); } } } // Process another chunk of data. State::Data(state) => { let state = if self.tmp.position() > 0 { let mut tmp_input = Cursor::new(&self.tmp.get_ref()[0..self.tmp.position() as usize]); let res = Stream::read_data(state, &mut tmp_input)?; self.tmp.set_position(0); res } else { state }; State::Data(Stream::read_data(state, &mut input)?) } }; self.state.replace(state); } Ok(input.position() as usize) } /// Flushes the output sink. The internal buffer isn't flushed to avoid /// corrupting the internal state. Instead, call `finish()` to finalize the /// stream and flush all remaining internal data. fn flush(&mut self) -> io::Result<()> { if let Some(ref mut state) = self.state { match state { State::Header(_) => Ok(()), State::Data(state) => state.output.get_output_mut().flush(), } } else { Ok(()) } } } impl From for io::Error { fn from(error: Error) -> io::Error { io::Error::new(io::ErrorKind::Other, format!("{:?}", error)) } } #[cfg(test)] mod test { use super::*; /// Test an empty stream #[test] fn test_stream_noop() { let stream = Stream::new(Vec::new()); assert!(stream.get_output().unwrap().is_empty()); let output = stream.finish().unwrap(); assert!(output.is_empty()); } /// Test writing an empty slice #[test] fn test_stream_zero() { let mut stream = Stream::new(Vec::new()); stream.write_all(&[]).unwrap(); stream.write_all(&[]).unwrap(); let output = stream.finish().unwrap(); assert!(output.is_empty()); } /// Test a bad header value #[test] #[should_panic(expected = "LZMA header invalid properties: 255 must be < 225")] fn test_bad_header() { let input = [255u8; 32]; let mut stream = Stream::new(Vec::new()); stream.write_all(&input[..]).unwrap(); let output = stream.finish().unwrap(); assert!(output.is_empty()); } /// Test processing only partial data #[test] fn test_stream_incomplete() { let input = b"\x5d\x00\x00\x80\x00\xff\xff\xff\xff\xff\xff\xff\xff\x00\x83\xff\ \xfb\xff\xff\xc0\x00\x00\x00"; // Process until this index is reached. let mut end = 1u64; // Test when we fail to provide the minimum number of bytes required to // read the header. Header size is 13 bytes but we also read the first 5 // bytes of data. while end < (MAX_HEADER_LEN + START_BYTES) as u64 { let mut stream = Stream::new(Vec::new()); stream.write_all(&input[..end as usize]).unwrap(); assert_eq!(stream.tmp.position(), end); let err = stream.finish().unwrap_err(); assert!( err.to_string().contains("failed to read header"), "error was: {}", err ); end += 1; } // Test when we fail to provide enough bytes to terminate the stream. A // properly terminated stream will have a code value of 0. while end < input.len() as u64 { let mut stream = Stream::new(Vec::new()); stream.write_all(&input[..end as usize]).unwrap(); // Header bytes will be buffered until there are enough to read if end < (MAX_HEADER_LEN + START_BYTES) as u64 { assert_eq!(stream.tmp.position(), end); } let err = stream.finish().unwrap_err(); assert!(err.to_string().contains("failed to fill whole buffer")); end += 1; } } /// Test processing all chunk sizes #[test] fn test_stream_chunked() { let small_input = include_bytes!("../../tests/files/small.txt"); let mut reader = io::Cursor::new(&small_input[..]); let mut small_input_compressed = Vec::new(); crate::lzma_compress(&mut reader, &mut small_input_compressed).unwrap(); let input : Vec<(&[u8], &[u8])> = vec![ (b"\x5d\x00\x00\x80\x00\xff\xff\xff\xff\xff\xff\xff\xff\x00\x83\xff\xfb\xff\xff\xc0\x00\x00\x00", b""), (&small_input_compressed[..], small_input)]; for (input, expected) in input { for chunk in 1..input.len() { let mut consumed = 0; let mut stream = Stream::new(Vec::new()); while consumed < input.len() { let end = std::cmp::min(consumed + chunk, input.len()); stream.write_all(&input[consumed..end]).unwrap(); consumed = end; } let output = stream.finish().unwrap(); assert_eq!(expected, &output[..]); } } } #[test] fn test_stream_corrupted() { let mut stream = Stream::new(Vec::new()); let err = stream .write_all(b"corrupted bytes here corrupted bytes here") .unwrap_err(); assert!(err.to_string().contains("beyond output size")); let err = stream.finish().unwrap_err(); assert!(err .to_string() .contains("can\'t finish stream because of previous write error")); } #[test] fn test_allow_incomplete() { let input = include_bytes!("../../tests/files/small.txt"); let mut reader = io::Cursor::new(&input[..]); let mut compressed = Vec::new(); crate::lzma_compress(&mut reader, &mut compressed).unwrap(); let compressed = &compressed[..compressed.len() / 2]; // Should fail to finish() without the allow_incomplete option. let mut stream = Stream::new(Vec::new()); stream.write_all(&compressed[..]).unwrap(); stream.finish().unwrap_err(); // Should succeed with the allow_incomplete option. let mut stream = Stream::new_with_options( &Options { allow_incomplete: true, ..Default::default() }, Vec::new(), ); stream.write_all(&compressed[..]).unwrap(); let output = stream.finish().unwrap(); assert_eq!(output, &input[..26]); } } lzma-rs-0.3.0/src/decode/util.rs000064400000000000000000000045471046102023000145600ustar 00000000000000use std::io; pub fn read_tag(input: &mut R, tag: &[u8]) -> io::Result { let mut buf = vec![0; tag.len()]; input.read_exact(buf.as_mut_slice())?; Ok(buf.as_slice() == tag) } pub fn is_eof(input: &mut R) -> io::Result { let buf = input.fill_buf()?; Ok(buf.is_empty()) } pub fn flush_zero_padding(input: &mut R) -> io::Result { loop { let len = { let buf = input.fill_buf()?; let len = buf.len(); if len == 0 { return Ok(true); } for x in buf { if *x != 0u8 { return Ok(false); } } len }; input.consume(len); } } // A Read computing a digest on the bytes read. pub struct CrcDigestRead<'a, 'b, R, S> where R: 'a + io::Read, S: crc::Width, { read: &'a mut R, // underlying reader digest: &'a mut crc::Digest<'b, S>, // hasher } impl<'a, 'b, R, S> CrcDigestRead<'a, 'b, R, S> where R: io::Read, S: crc::Width, { pub fn new(read: &'a mut R, digest: &'a mut crc::Digest<'b, S>) -> Self { Self { read, digest } } } impl<'a, 'b, R> io::Read for CrcDigestRead<'a, 'b, R, u32> where R: io::Read, { fn read(&mut self, buf: &mut [u8]) -> io::Result { let result = self.read.read(buf)?; self.digest.update(&buf[..result]); Ok(result) } } // A BufRead counting the bytes read. pub struct CountBufRead<'a, R> where R: 'a + io::BufRead, { read: &'a mut R, // underlying reader count: usize, // number of bytes read } impl<'a, R> CountBufRead<'a, R> where R: io::BufRead, { pub fn new(read: &'a mut R) -> Self { Self { read, count: 0 } } pub fn count(&self) -> usize { self.count } } impl<'a, R> io::Read for CountBufRead<'a, R> where R: io::BufRead, { fn read(&mut self, buf: &mut [u8]) -> io::Result { let result = self.read.read(buf)?; self.count += result; Ok(result) } } impl<'a, R> io::BufRead for CountBufRead<'a, R> where R: io::BufRead, { fn fill_buf(&mut self) -> io::Result<&[u8]> { self.read.fill_buf() } fn consume(&mut self, amt: usize) { self.read.consume(amt); self.count += amt; } } lzma-rs-0.3.0/src/decode/xz.rs000064400000000000000000000327411046102023000142410ustar 00000000000000//! Decoder for the `.xz` file format. use crate::decode::lzma2::Lzma2Decoder; use crate::decode::util; use crate::error; use crate::xz::crc::{CRC32, CRC64}; use crate::xz::{footer, header, CheckMethod, StreamFlags}; use byteorder::{BigEndian, LittleEndian, ReadBytesExt}; use std::io; use std::io::Read; #[derive(Debug)] struct Record { unpadded_size: u64, unpacked_size: u64, } pub fn decode_stream(input: &mut R, output: &mut W) -> error::Result<()> where R: io::BufRead, W: io::Write, { let header = header::StreamHeader::parse(input)?; let mut records: Vec = vec![]; let index_size = loop { let mut count_input = util::CountBufRead::new(input); let header_size = count_input.read_u8()?; lzma_info!("XZ block header_size byte: 0x{:02x}", header_size); if header_size == 0 { lzma_info!("XZ records: {:?}", records); check_index(&mut count_input, &records)?; let index_size = count_input.count(); break index_size; } read_block( &mut count_input, output, header.stream_flags.check_method, &mut records, header_size, )?; }; let crc32 = input.read_u32::()?; let mut digest = CRC32.digest(); { let mut digested = util::CrcDigestRead::new(input, &mut digest); let backward_size = digested.read_u32::()?; if index_size as u32 != (backward_size + 1) << 2 { return Err(error::Error::XzError(format!( "Invalid index size: expected {} but got {}", (backward_size + 1) << 2, index_size ))); } let stream_flags = { let field = digested.read_u16::()?; StreamFlags::parse(field)? }; if header.stream_flags != stream_flags { return Err(error::Error::XzError(format!( "Flags in header ({:?}) does not match footer ({:?})", header.stream_flags, stream_flags ))); } } let digest_crc32 = digest.finalize(); if crc32 != digest_crc32 { return Err(error::Error::XzError(format!( "Invalid footer CRC32: expected 0x{:08x} but got 0x{:08x}", crc32, digest_crc32 ))); } if !util::read_tag(input, footer::XZ_MAGIC_FOOTER)? { return Err(error::Error::XzError(format!( "Invalid footer magic, expected {:?}", footer::XZ_MAGIC_FOOTER ))); } if !util::is_eof(input)? { return Err(error::Error::XzError( "Unexpected data after last XZ block".to_string(), )); } Ok(()) } fn check_index<'a, R>( count_input: &mut util::CountBufRead<'a, R>, records: &[Record], ) -> error::Result<()> where R: io::BufRead, { let mut digest = CRC32.digest(); let index_tag = 0u8; digest.update(&[index_tag]); { let mut digested = util::CrcDigestRead::new(count_input, &mut digest); let num_records = get_multibyte(&mut digested)?; if num_records != records.len() as u64 { return Err(error::Error::XzError(format!( "Expected {} records but got {} records", num_records, records.len() ))); } for (i, record) in records.iter().enumerate() { lzma_info!("XZ index checking record {}: {:?}", i, record); let unpadded_size = get_multibyte(&mut digested)?; if unpadded_size != record.unpadded_size { return Err(error::Error::XzError(format!( "Invalid index for record {}: unpadded size ({}) does not match index ({})", i, record.unpadded_size, unpadded_size ))); } let unpacked_size = get_multibyte(&mut digested)?; if unpacked_size != record.unpacked_size { return Err(error::Error::XzError(format!( "Invalid index for record {}: unpacked size ({}) does not match index ({})", i, record.unpacked_size, unpacked_size ))); } } }; // TODO: create padding parser function let count = count_input.count(); let padding_size = ((count ^ 0x03) + 1) & 0x03; lzma_info!( "XZ index: {} byte(s) read, {} byte(s) of padding", count, padding_size ); { let mut digested = util::CrcDigestRead::new(count_input, &mut digest); for _ in 0..padding_size { let byte = digested.read_u8()?; if byte != 0 { return Err(error::Error::XzError( "Invalid index padding, must be null bytes".to_string(), )); } } }; let digest_crc32 = digest.finalize(); lzma_info!("XZ index checking digest 0x{:08x}", digest_crc32); let crc32 = count_input.read_u32::()?; if crc32 != digest_crc32 { return Err(error::Error::XzError(format!( "Invalid index CRC32: expected 0x{:08x} but got 0x{:08x}", crc32, digest_crc32 ))); } Ok(()) } #[derive(Debug)] enum FilterId { Lzma2, } fn get_filter_id(id: u64) -> error::Result { match id { 0x21 => Ok(FilterId::Lzma2), _ => Err(error::Error::XzError(format!("Unknown filter id {}", id))), } } struct Filter { filter_id: FilterId, props: Vec, } struct BlockHeader { filters: Vec, packed_size: Option, unpacked_size: Option, } fn read_block<'a, R, W>( count_input: &mut util::CountBufRead<'a, R>, output: &mut W, check_method: CheckMethod, records: &mut Vec, header_size: u8, ) -> error::Result where R: io::BufRead, W: io::Write, { let mut digest = CRC32.digest(); digest.update(&[header_size]); let header_size = ((header_size as u64) << 2) - 1; let block_header = { let mut taken = count_input.take(header_size); let mut digested = io::BufReader::new(util::CrcDigestRead::new(&mut taken, &mut digest)); read_block_header(&mut digested, header_size)? }; let crc32 = count_input.read_u32::()?; let digest_crc32 = digest.finalize(); if crc32 != digest_crc32 { return Err(error::Error::XzError(format!( "Invalid header CRC32: expected 0x{:08x} but got 0x{:08x}", crc32, digest_crc32 ))); } let mut tmpbuf: Vec = Vec::new(); let filters = block_header.filters; for (i, filter) in filters.iter().enumerate() { if i == 0 { // TODO: use SubBufRead on input if packed_size is known? let packed_size = decode_filter(count_input, &mut tmpbuf, filter)?; if let Some(expected_packed_size) = block_header.packed_size { if (packed_size as u64) != expected_packed_size { return Err(error::Error::XzError(format!( "Invalid compressed size: expected {} but got {}", expected_packed_size, packed_size ))); } } } else { let mut newbuf: Vec = Vec::new(); decode_filter( &mut io::BufReader::new(tmpbuf.as_slice()), &mut newbuf, filter, )?; // TODO: does this move or copy? tmpbuf = newbuf; } } let unpacked_size = tmpbuf.len(); lzma_info!("XZ block decompressed to {} byte(s)", tmpbuf.len()); if let Some(expected_unpacked_size) = block_header.unpacked_size { if (unpacked_size as u64) != expected_unpacked_size { return Err(error::Error::XzError(format!( "Invalid decompressed size: expected {} but got {}", expected_unpacked_size, unpacked_size ))); } } let count = count_input.count(); let padding_size = ((count ^ 0x03) + 1) & 0x03; lzma_info!( "XZ block: {} byte(s) read, {} byte(s) of padding, check method {:?}", count, padding_size, check_method ); for _ in 0..padding_size { let byte = count_input.read_u8()?; if byte != 0 { return Err(error::Error::XzError( "Invalid block padding, must be null bytes".to_string(), )); } } validate_block_check(count_input, tmpbuf.as_slice(), check_method)?; output.write_all(tmpbuf.as_slice())?; records.push(Record { unpadded_size: (count_input.count() - padding_size) as u64, unpacked_size: unpacked_size as u64, }); let finished = false; Ok(finished) } /// Verify block checksum against the "Block Check" field. /// /// See spec section 3.4 for details. fn validate_block_check( input: &mut R, buf: &[u8], check_method: CheckMethod, ) -> error::Result<()> where R: io::BufRead, { match check_method { CheckMethod::None => (), CheckMethod::Crc32 => { let crc32 = input.read_u32::()?; let digest_crc32 = CRC32.checksum(buf); if crc32 != digest_crc32 { return Err(error::Error::XzError(format!( "Invalid block CRC32, expected 0x{:08x} but got 0x{:08x}", crc32, digest_crc32 ))); } } CheckMethod::Crc64 => { let crc64 = input.read_u64::()?; let digest_crc64 = CRC64.checksum(buf); if crc64 != digest_crc64 { return Err(error::Error::XzError(format!( "Invalid block CRC64, expected 0x{:016x} but got 0x{:016x}", crc64, digest_crc64 ))); } } // TODO CheckMethod::Sha256 => { return Err(error::Error::XzError( "Unsupported SHA-256 checksum (not yet implemented)".to_string(), )); } } Ok(()) } fn decode_filter(input: &mut R, output: &mut W, filter: &Filter) -> error::Result where R: io::BufRead, W: io::Write, { let mut count_input = util::CountBufRead::new(input); match filter.filter_id { FilterId::Lzma2 => { if filter.props.len() != 1 { return Err(error::Error::XzError(format!( "Invalid properties for filter {:?}", filter.filter_id ))); } // TODO: properties?? Lzma2Decoder::new().decompress(&mut count_input, output)?; Ok(count_input.count()) } } } fn read_block_header(input: &mut R, header_size: u64) -> error::Result where R: io::BufRead, { let flags = input.read_u8()?; let num_filters = (flags & 0x03) + 1; let reserved = flags & 0x3C; let has_packed_size = flags & 0x40 != 0; let has_unpacked_size = flags & 0x80 != 0; lzma_info!( "XZ block header: {{ header_size: {}, flags: {}, num_filters: {}, has_packed_size: {}, has_unpacked_size: {} }}", header_size, flags, num_filters, has_packed_size, has_unpacked_size ); if reserved != 0 { return Err(error::Error::XzError(format!( "Invalid block flags {}, reserved bits (mask 0x3C) must be zero", flags ))); } let packed_size = if has_packed_size { Some(get_multibyte(input)?) } else { None }; let unpacked_size = if has_unpacked_size { Some(get_multibyte(input)?) } else { None }; lzma_info!( "XZ block header: {{ packed_size: {:?}, unpacked_size: {:?} }}", packed_size, unpacked_size ); let mut filters: Vec = vec![]; for _ in 0..num_filters { let filter_id = get_filter_id(get_multibyte(input)?)?; let size_of_properties = get_multibyte(input)?; lzma_info!( "XZ filter: {{ filter_id: {:?}, size_of_properties: {} }}", filter_id, size_of_properties ); // Early abort to avoid allocating a large vector if size_of_properties > header_size { return Err(error::Error::XzError(format!( "Size of filter properties exceeds block header size ({} > {})", size_of_properties, header_size ))); } let mut buf = vec![0; size_of_properties as usize]; input.read_exact(buf.as_mut_slice()).map_err(|e| { error::Error::XzError(format!( "Could not read filter properties of size {}: {}", size_of_properties, e )) })?; lzma_info!("XZ filter properties: {:?}", buf); filters.push(Filter { filter_id, props: buf, }) } if !util::flush_zero_padding(input)? { return Err(error::Error::XzError( "Invalid block header padding, must be null bytes".to_string(), )); } Ok(BlockHeader { filters, packed_size, unpacked_size, }) } pub fn get_multibyte(input: &mut R) -> error::Result where R: io::Read, { let mut result = 0; for i in 0..9 { let byte = input.read_u8()?; result ^= ((byte & 0x7F) as u64) << (i * 7); if (byte & 0x80) == 0 { return Ok(result); } } Err(error::Error::XzError( "Invalid multi-byte encoding".to_string(), )) } lzma-rs-0.3.0/src/encode/dumbencoder.rs000064400000000000000000000102501046102023000160700ustar 00000000000000use crate::compress::{Options, UnpackedSize}; use crate::encode::rangecoder; use byteorder::{LittleEndian, WriteBytesExt}; use std::io; pub struct Encoder<'a, W> where W: 'a + io::Write, { rangecoder: rangecoder::RangeEncoder<'a, W>, literal_probs: [[u16; 0x300]; 8], is_match: [u16; 4], // true = LZ, false = literal unpacked_size: UnpackedSize, } const LC: u32 = 3; const LP: u32 = 0; const PB: u32 = 2; impl<'a, W> Encoder<'a, W> where W: io::Write, { pub fn from_stream(stream: &'a mut W, options: &Options) -> io::Result { let dict_size = 0x0080_0000; // Properties let props = (LC + 9 * (LP + 5 * PB)) as u8; lzma_info!("Properties {{ lc: {}, lp: {}, pb: {} }}", LC, LP, PB); stream.write_u8(props)?; // Dictionary lzma_info!("Dict size: {}", dict_size); stream.write_u32::(dict_size)?; // Unpacked size match &options.unpacked_size { UnpackedSize::WriteToHeader(unpacked_size) => { let value: u64 = match unpacked_size { None => { lzma_info!("Unpacked size: unknown"); 0xFFFF_FFFF_FFFF_FFFF } Some(x) => { lzma_info!("Unpacked size: {}", x); *x } }; stream.write_u64::(value)?; } UnpackedSize::SkipWritingToHeader => {} }; let encoder = Encoder { rangecoder: rangecoder::RangeEncoder::new(stream), literal_probs: [[0x400; 0x300]; 8], is_match: [0x400; 4], unpacked_size: options.unpacked_size, }; Ok(encoder) } pub fn process(mut self, input: R) -> io::Result<()> where R: io::Read, { let mut prev_byte = 0u8; let mut input_len = 0; for (out_len, byte_result) in input.bytes().enumerate() { let byte = byte_result?; let pos_state = out_len & 3; input_len = out_len; // Literal self.rangecoder .encode_bit(&mut self.is_match[pos_state], false)?; self.encode_literal(byte, prev_byte)?; prev_byte = byte; } self.finish(input_len + 1) } fn finish(&mut self, input_len: usize) -> io::Result<()> { match self.unpacked_size { UnpackedSize::SkipWritingToHeader | UnpackedSize::WriteToHeader(Some(_)) => {} UnpackedSize::WriteToHeader(None) => { // Write end-of-stream marker let pos_state = input_len & 3; // Match self.rangecoder .encode_bit(&mut self.is_match[pos_state], true)?; // New distance self.rangecoder.encode_bit(&mut 0x400, false)?; // Dummy len, as small as possible (len = 0) for _ in 0..4 { self.rangecoder.encode_bit(&mut 0x400, false)?; } // Distance marker = 0xFFFFFFFF // pos_slot = 63 for _ in 0..6 { self.rangecoder.encode_bit(&mut 0x400, true)?; } // num_direct_bits = 30 // result = 3 << 30 = C000_0000 // + 3FFF_FFF0 (26 bits) // + F ( 4 bits) for _ in 0..30 { self.rangecoder.encode_bit(&mut 0x400, true)?; } // = FFFF_FFFF } } // Flush range coder self.rangecoder.finish() } fn encode_literal(&mut self, byte: u8, prev_byte: u8) -> io::Result<()> { let prev_byte = prev_byte as usize; let mut result: usize = 1; let lit_state = prev_byte >> 5; let probs = &mut self.literal_probs[lit_state]; for i in 0..8 { let bit = ((byte >> (7 - i)) & 1) != 0; self.rangecoder.encode_bit(&mut probs[result], bit)?; result = (result << 1) ^ (bit as usize); } Ok(()) } } lzma-rs-0.3.0/src/encode/lzma2.rs000064400000000000000000000011411046102023000146250ustar 00000000000000use byteorder::{BigEndian, WriteBytesExt}; use std::io; pub fn encode_stream(input: &mut R, output: &mut W) -> io::Result<()> where R: io::BufRead, W: io::Write, { let mut buf = vec![0u8; 0x10000]; loop { let n = input.read(&mut buf)?; if n == 0 { // status = EOF output.write_u8(0)?; break; } // status = uncompressed reset dict output.write_u8(1)?; // unpacked size output.write_u16::((n - 1) as u16)?; // contents output.write_all(&buf[..n])?; } Ok(()) } lzma-rs-0.3.0/src/encode/mod.rs000064400000000000000000000001601046102023000143570ustar 00000000000000//! Encoding logic. pub mod dumbencoder; pub mod lzma2; pub mod options; mod rangecoder; mod util; pub mod xz; lzma-rs-0.3.0/src/encode/options.rs000064400000000000000000000024041046102023000152760ustar 00000000000000/// Options for the `lzma_compress` function #[derive(Clone, Copy, Debug, Default)] pub struct Options { /// Defines whether the unpacked size should be written to the header. /// The default is /// [`UnpackedSize::WriteToHeader(None)`](enum.encode.UnpackedSize.html#variant.WriteValueToHeader) pub unpacked_size: UnpackedSize, } /// Alternatives for handling unpacked size #[derive(Clone, Copy, Debug)] pub enum UnpackedSize { /// If the value is `Some(u64)`, write the provided u64 value to the header. /// There is currently no check in place that verifies that this is the actual number of bytes /// provided by the input stream. /// If the value is `None`, write the special `0xFFFF_FFFF_FFFF_FFFF` code to the header, /// indicating that the unpacked size is unknown. WriteToHeader(Option), /// Do not write anything to the header. The unpacked size needs to be stored elsewhere and /// provided when reading the file. Note that this is a non-standard way of writing LZMA data, /// but is used by certain libraries such as /// [OpenCTM](http://openctm.sourceforge.net/). SkipWritingToHeader, } impl Default for UnpackedSize { fn default() -> UnpackedSize { UnpackedSize::WriteToHeader(None) } } lzma-rs-0.3.0/src/encode/rangecoder.rs000064400000000000000000000245051046102023000157220ustar 00000000000000use byteorder::WriteBytesExt; use std::io; pub struct RangeEncoder<'a, W> where W: 'a + io::Write, { stream: &'a mut W, range: u32, low: u64, cache: u8, cachesz: u32, } impl<'a, W> RangeEncoder<'a, W> where W: io::Write, { #[allow(clippy::let_and_return)] pub fn new(stream: &'a mut W) -> Self { let enc = Self { stream, range: 0xFFFF_FFFF, low: 0, cache: 0, cachesz: 1, }; lzma_debug!("0 {{ range: {:08x}, low: {:010x} }}", enc.range, enc.low); enc } fn write_low(&mut self) -> io::Result<()> { if self.low < 0xFF00_0000 || self.low > 0xFFFF_FFFF { let mut tmp = self.cache; loop { let byte = tmp.wrapping_add((self.low >> 32) as u8); self.stream.write_u8(byte)?; lzma_debug!("> byte: {:02x}", byte); tmp = 0xFF; self.cachesz -= 1; if self.cachesz == 0 { break; } } self.cache = (self.low >> 24) as u8; } self.cachesz += 1; self.low = (self.low << 8) & 0xFFFF_FFFF; Ok(()) } pub fn finish(&mut self) -> io::Result<()> { for _ in 0..5 { self.write_low()?; lzma_debug!("$ {{ range: {:08x}, low: {:010x} }}", self.range, self.low); } Ok(()) } fn normalize(&mut self) -> io::Result<()> { while self.range < 0x0100_0000 { lzma_debug!( "+ {{ range: {:08x}, low: {:010x}, cache: {:02x}, {} }}", self.range, self.low, self.cache, self.cachesz ); self.range <<= 8; self.write_low()?; lzma_debug!( "* {{ range: {:08x}, low: {:010x}, cache: {:02x}, {} }}", self.range, self.low, self.cache, self.cachesz ); } lzma_trace!(" {{ range: {:08x}, low: {:010x} }}", self.range, self.low); Ok(()) } pub fn encode_bit(&mut self, prob: &mut u16, bit: bool) -> io::Result<()> { let bound: u32 = (self.range >> 11) * (*prob as u32); lzma_trace!( " bound: {:08x}, prob: {:04x}, bit: {}", bound, prob, bit as u8 ); if bit { *prob -= *prob >> 5; self.low += bound as u64; self.range -= bound; } else { *prob += (0x800_u16 - *prob) >> 5; self.range = bound; } self.normalize() } #[cfg(test)] fn encode_bit_tree( &mut self, num_bits: usize, probs: &mut [u16], value: u32, ) -> io::Result<()> { debug_assert!(value.leading_zeros() as usize + num_bits >= 32); let mut tmp: usize = 1; for i in 0..num_bits { let bit = ((value >> (num_bits - i - 1)) & 1) != 0; self.encode_bit(&mut probs[tmp], bit)?; tmp = (tmp << 1) ^ (bit as usize); } Ok(()) } #[cfg(test)] pub fn encode_reverse_bit_tree( &mut self, num_bits: usize, probs: &mut [u16], offset: usize, mut value: u32, ) -> io::Result<()> { debug_assert!(value.leading_zeros() as usize + num_bits >= 32); let mut tmp: usize = 1; for _ in 0..num_bits { let bit = (value & 1) != 0; value >>= 1; self.encode_bit(&mut probs[offset + tmp], bit)?; tmp = (tmp << 1) ^ (bit as usize); } Ok(()) } } // TODO: parametrize by constant and use [u16; 1 << num_bits] as soon as Rust supports this #[cfg(test)] #[derive(Clone)] pub struct BitTree { num_bits: usize, probs: Vec, } #[cfg(test)] impl BitTree { pub fn new(num_bits: usize) -> Self { BitTree { num_bits, probs: vec![0x400; 1 << num_bits], } } pub fn encode( &mut self, rangecoder: &mut RangeEncoder, value: u32, ) -> io::Result<()> { rangecoder.encode_bit_tree(self.num_bits, self.probs.as_mut_slice(), value) } pub fn encode_reverse( &mut self, rangecoder: &mut RangeEncoder, value: u32, ) -> io::Result<()> { rangecoder.encode_reverse_bit_tree(self.num_bits, self.probs.as_mut_slice(), 0, value) } } #[cfg(test)] pub struct LenEncoder { choice: u16, choice2: u16, low_coder: Vec, mid_coder: Vec, high_coder: BitTree, } #[cfg(test)] impl LenEncoder { pub fn new() -> Self { LenEncoder { choice: 0x400, choice2: 0x400, low_coder: vec![BitTree::new(3); 16], mid_coder: vec![BitTree::new(3); 16], high_coder: BitTree::new(8), } } pub fn encode( &mut self, rangecoder: &mut RangeEncoder, pos_state: usize, value: u32, ) -> io::Result<()> { let is_low: bool = value < 8; rangecoder.encode_bit(&mut self.choice, !is_low)?; if is_low { return self.low_coder[pos_state].encode(rangecoder, value); } let is_middle: bool = value < 16; rangecoder.encode_bit(&mut self.choice2, !is_middle)?; if is_middle { return self.mid_coder[pos_state].encode(rangecoder, value - 8); } self.high_coder.encode(rangecoder, value - 16) } } #[cfg(test)] mod test { use super::*; use crate::decode::rangecoder::{LenDecoder, RangeDecoder}; use crate::{decode, encode}; use std::io::BufReader; fn encode_decode(prob_init: u16, bits: &[bool]) { let mut buf: Vec = Vec::new(); let mut encoder = RangeEncoder::new(&mut buf); let mut prob = prob_init; for &b in bits { encoder.encode_bit(&mut prob, b).unwrap(); } encoder.finish().unwrap(); let mut bufread = BufReader::new(buf.as_slice()); let mut decoder = RangeDecoder::new(&mut bufread).unwrap(); let mut prob = prob_init; for &b in bits { assert_eq!(decoder.decode_bit(&mut prob, true).unwrap(), b); } assert!(decoder.is_finished_ok().unwrap()); } #[test] fn test_encode_decode_zeros() { encode_decode(0x400, &[false; 10000]); } #[test] fn test_encode_decode_ones() { encode_decode(0x400, &[true; 10000]); } fn encode_decode_bittree(num_bits: usize, values: &[u32]) { let mut buf: Vec = Vec::new(); let mut encoder = RangeEncoder::new(&mut buf); let mut tree = encode::rangecoder::BitTree::new(num_bits); for &v in values { tree.encode(&mut encoder, v).unwrap(); } encoder.finish().unwrap(); let mut bufread = BufReader::new(buf.as_slice()); let mut decoder = RangeDecoder::new(&mut bufread).unwrap(); let mut tree = decode::rangecoder::BitTree::new(num_bits); for &v in values { assert_eq!(tree.parse(&mut decoder, true).unwrap(), v); } assert!(decoder.is_finished_ok().unwrap()); } #[test] fn test_encode_decode_bittree_zeros() { for num_bits in 0..16 { encode_decode_bittree(num_bits, &[0; 10000]); } } #[test] fn test_encode_decode_bittree_ones() { for num_bits in 0..16 { encode_decode_bittree(num_bits, &[(1 << num_bits) - 1; 10000]); } } #[test] fn test_encode_decode_bittree_all() { for num_bits in 0..16 { let max = 1 << num_bits; let values: Vec = (0..max).collect(); encode_decode_bittree(num_bits, &values); } } fn encode_decode_reverse_bittree(num_bits: usize, values: &[u32]) { let mut buf: Vec = Vec::new(); let mut encoder = RangeEncoder::new(&mut buf); let mut tree = encode::rangecoder::BitTree::new(num_bits); for &v in values { tree.encode_reverse(&mut encoder, v).unwrap(); } encoder.finish().unwrap(); let mut bufread = BufReader::new(buf.as_slice()); let mut decoder = RangeDecoder::new(&mut bufread).unwrap(); let mut tree = decode::rangecoder::BitTree::new(num_bits); for &v in values { assert_eq!(tree.parse_reverse(&mut decoder, true).unwrap(), v); } assert!(decoder.is_finished_ok().unwrap()); } #[test] fn test_encode_decode_reverse_bittree_zeros() { for num_bits in 0..16 { encode_decode_reverse_bittree(num_bits, &[0; 10000]); } } #[test] fn test_encode_decode_reverse_bittree_ones() { for num_bits in 0..16 { encode_decode_reverse_bittree(num_bits, &[(1 << num_bits) - 1; 10000]); } } #[test] fn test_encode_decode_reverse_bittree_all() { for num_bits in 0..16 { let max = 1 << num_bits; let values: Vec = (0..max).collect(); encode_decode_reverse_bittree(num_bits, &values); } } fn encode_decode_length(pos_state: usize, values: &[u32]) { let mut buf: Vec = Vec::new(); let mut encoder = RangeEncoder::new(&mut buf); let mut len_encoder = LenEncoder::new(); for &v in values { len_encoder.encode(&mut encoder, pos_state, v).unwrap(); } encoder.finish().unwrap(); let mut bufread = BufReader::new(buf.as_slice()); let mut decoder = RangeDecoder::new(&mut bufread).unwrap(); let mut len_decoder = LenDecoder::new(); for &v in values { assert_eq!( len_decoder.decode(&mut decoder, pos_state, true).unwrap(), v as usize ); } assert!(decoder.is_finished_ok().unwrap()); } #[test] fn test_encode_decode_length_zeros() { for pos_state in 0..16 { encode_decode_length(pos_state, &[0; 10000]); } } #[test] fn test_encode_decode_length_all() { for pos_state in 0..16 { let max = (1 << 8) + 16; let values: Vec = (0..max).collect(); encode_decode_length(pos_state, &values); } } } lzma-rs-0.3.0/src/encode/util.rs000064400000000000000000000027671046102023000145740ustar 00000000000000use std::io; // A Write computing a digest on the bytes written. pub struct CrcDigestWrite<'a, 'b, W, S> where W: 'a + io::Write, S: crc::Width, { write: &'a mut W, // underlying writer digest: &'a mut crc::Digest<'b, S>, // hasher } impl<'a, 'b, W, S> CrcDigestWrite<'a, 'b, W, S> where W: io::Write, S: crc::Width, { pub fn new(write: &'a mut W, digest: &'a mut crc::Digest<'b, S>) -> Self { Self { write, digest } } } impl<'a, 'b, W> io::Write for CrcDigestWrite<'a, 'b, W, u32> where W: io::Write, { fn write(&mut self, buf: &[u8]) -> io::Result { let result = self.write.write(buf)?; self.digest.update(&buf[..result]); Ok(result) } fn flush(&mut self) -> io::Result<()> { self.write.flush() } } // A Write counting the bytes written. pub struct CountWrite<'a, W> where W: 'a + io::Write, { write: &'a mut W, // underlying writer count: usize, // number of bytes written } impl<'a, W> CountWrite<'a, W> where W: io::Write, { pub fn new(write: &'a mut W) -> Self { Self { write, count: 0 } } pub fn count(&self) -> usize { self.count } } impl<'a, W> io::Write for CountWrite<'a, W> where W: io::Write, { fn write(&mut self, buf: &[u8]) -> io::Result { let result = self.write.write(buf)?; self.count += result; Ok(result) } fn flush(&mut self) -> io::Result<()> { self.write.flush() } } lzma-rs-0.3.0/src/encode/xz.rs000064400000000000000000000112011046102023000142370ustar 00000000000000use crate::decode; use crate::encode::{lzma2, util}; use crate::xz::crc::CRC32; use crate::xz::{footer, header, CheckMethod, StreamFlags}; use byteorder::{LittleEndian, WriteBytesExt}; use std::io; use std::io::Write; pub fn encode_stream(input: &mut R, output: &mut W) -> io::Result<()> where R: io::BufRead, W: io::Write, { let stream_flags = StreamFlags { check_method: CheckMethod::None, }; // Header write_header(output, stream_flags)?; // Block let (unpadded_size, unpacked_size) = write_block(input, output)?; // Index let index_size = write_index(output, unpadded_size, unpacked_size)?; // Footer write_footer(output, stream_flags, index_size) } fn write_header(output: &mut W, stream_flags: StreamFlags) -> io::Result<()> where W: io::Write, { output.write_all(header::XZ_MAGIC)?; let mut digest = CRC32.digest(); { let mut digested = util::CrcDigestWrite::new(output, &mut digest); stream_flags.serialize(&mut digested)?; } let crc32 = digest.finalize(); output.write_u32::(crc32)?; Ok(()) } fn write_footer(output: &mut W, stream_flags: StreamFlags, index_size: usize) -> io::Result<()> where W: io::Write, { let mut digest = CRC32.digest(); let mut footer_buf: Vec = Vec::new(); { let mut digested = util::CrcDigestWrite::new(&mut footer_buf, &mut digest); let backward_size = (index_size >> 2) - 1; digested.write_u32::(backward_size as u32)?; stream_flags.serialize(&mut digested)?; } let crc32 = digest.finalize(); output.write_u32::(crc32)?; output.write_all(footer_buf.as_slice())?; output.write_all(footer::XZ_MAGIC_FOOTER)?; Ok(()) } fn write_block(input: &mut R, output: &mut W) -> io::Result<(usize, usize)> where R: io::BufRead, W: io::Write, { let (unpadded_size, unpacked_size) = { let mut count_output = util::CountWrite::new(output); // Block header let mut digest = CRC32.digest(); { let mut digested = util::CrcDigestWrite::new(&mut count_output, &mut digest); let header_size = 8; digested.write_u8((header_size >> 2) as u8)?; let flags = 0x00; // 1 filter, no (un)packed size provided digested.write_u8(flags)?; let filter_id = 0x21; // LZMA2 digested.write_u8(filter_id)?; let size_of_properties = 1; digested.write_u8(size_of_properties)?; let properties = 22; // TODO digested.write_u8(properties)?; let padding = [0, 0, 0]; digested.write_all(&padding)?; } let crc32 = digest.finalize(); count_output.write_u32::(crc32)?; // Block let mut count_input = decode::util::CountBufRead::new(input); lzma2::encode_stream(&mut count_input, &mut count_output)?; (count_output.count(), count_input.count()) }; lzma_info!( "Unpadded size = {}, unpacked_size = {}", unpadded_size, unpacked_size ); let padding_size = ((unpadded_size ^ 0x03) + 1) & 0x03; let padding = vec![0; padding_size]; output.write_all(padding.as_slice())?; // Checksum = None (cf. above) Ok((unpadded_size, unpacked_size)) } fn write_index(output: &mut W, unpadded_size: usize, unpacked_size: usize) -> io::Result where W: io::Write, { let mut count_output = util::CountWrite::new(output); let mut digest = CRC32.digest(); { let mut digested = util::CrcDigestWrite::new(&mut count_output, &mut digest); digested.write_u8(0)?; // No more block let num_records = 1; write_multibyte(&mut digested, num_records)?; write_multibyte(&mut digested, unpadded_size as u64)?; write_multibyte(&mut digested, unpacked_size as u64)?; } // Padding let count = count_output.count(); let padding_size = ((count ^ 0x03) + 1) & 0x03; { let mut digested = util::CrcDigestWrite::new(&mut count_output, &mut digest); let padding = vec![0; padding_size]; digested.write_all(padding.as_slice())?; } let crc32 = digest.finalize(); count_output.write_u32::(crc32)?; Ok(count_output.count()) } fn write_multibyte(output: &mut W, mut value: u64) -> io::Result<()> where W: io::Write, { loop { let byte = (value & 0x7F) as u8; value >>= 7; if value == 0 { output.write_u8(byte)?; break; } else { output.write_u8(0x80 | byte)?; } } Ok(()) } lzma-rs-0.3.0/src/error.rs000064400000000000000000000034331046102023000135020ustar 00000000000000//! Error handling. use std::fmt::Display; use std::{io, result}; /// Library errors. #[derive(Debug)] pub enum Error { /// I/O error. IoError(io::Error), /// Not enough bytes to complete header HeaderTooShort(io::Error), /// LZMA error. LzmaError(String), /// XZ error. XzError(String), } /// Library result alias. pub type Result = result::Result; impl From for Error { fn from(e: io::Error) -> Error { Error::IoError(e) } } impl Display for Error { fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Error::IoError(e) => write!(fmt, "io error: {}", e), Error::HeaderTooShort(e) => write!(fmt, "header too short: {}", e), Error::LzmaError(e) => write!(fmt, "lzma error: {}", e), Error::XzError(e) => write!(fmt, "xz error: {}", e), } } } impl std::error::Error for Error { fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { match self { Error::IoError(e) | Error::HeaderTooShort(e) => Some(e), Error::LzmaError(_) | Error::XzError(_) => None, } } } #[cfg(test)] mod test { use super::Error; #[test] fn test_display() { assert_eq!( Error::IoError(std::io::Error::new( std::io::ErrorKind::Other, "this is an error" )) .to_string(), "io error: this is an error" ); assert_eq!( Error::LzmaError("this is an error".to_string()).to_string(), "lzma error: this is an error" ); assert_eq!( Error::XzError("this is an error".to_string()).to_string(), "xz error: this is an error" ); } } lzma-rs-0.3.0/src/lib.rs000064400000000000000000000061371046102023000131230ustar 00000000000000//! Pure-Rust codecs for LZMA, LZMA2, and XZ. #![cfg_attr(docsrs, feature(doc_cfg, doc_cfg_hide))] #![deny(missing_docs)] #![deny(missing_debug_implementations)] #![forbid(unsafe_code)] #[macro_use] mod macros; mod decode; mod encode; pub mod error; mod util; mod xz; use std::io; /// Compression helpers. pub mod compress { pub use crate::encode::options::*; } /// Decompression helpers. pub mod decompress { pub use crate::decode::options::*; #[cfg(feature = "raw_decoder")] #[cfg_attr(docsrs, doc(cfg(raw_decoder)))] pub mod raw { //! Raw decoding primitives for LZMA/LZMA2 streams. pub use crate::decode::lzma::{LzmaDecoder, LzmaParams, LzmaProperties}; pub use crate::decode::lzma2::Lzma2Decoder; } #[cfg(feature = "stream")] #[cfg_attr(docsrs, doc(cfg(stream)))] pub use crate::decode::stream::Stream; } /// Decompress LZMA data with default [`Options`](decompress/struct.Options.html). pub fn lzma_decompress( input: &mut R, output: &mut W, ) -> error::Result<()> { lzma_decompress_with_options(input, output, &decompress::Options::default()) } /// Decompress LZMA data with the provided options. pub fn lzma_decompress_with_options( input: &mut R, output: &mut W, options: &decompress::Options, ) -> error::Result<()> { let params = decode::lzma::LzmaParams::read_header(input, options)?; let mut decoder = decode::lzma::LzmaDecoder::new(params, options.memlimit)?; decoder.decompress(input, output) } /// Compresses data with LZMA and default [`Options`](compress/struct.Options.html). pub fn lzma_compress( input: &mut R, output: &mut W, ) -> io::Result<()> { lzma_compress_with_options(input, output, &compress::Options::default()) } /// Compress LZMA data with the provided options. pub fn lzma_compress_with_options( input: &mut R, output: &mut W, options: &compress::Options, ) -> io::Result<()> { let encoder = encode::dumbencoder::Encoder::from_stream(output, options)?; encoder.process(input) } /// Decompress LZMA2 data with default [`Options`](decompress/struct.Options.html). pub fn lzma2_decompress( input: &mut R, output: &mut W, ) -> error::Result<()> { decode::lzma2::Lzma2Decoder::new().decompress(input, output) } /// Compress data with LZMA2 and default [`Options`](compress/struct.Options.html). pub fn lzma2_compress( input: &mut R, output: &mut W, ) -> io::Result<()> { encode::lzma2::encode_stream(input, output) } /// Decompress XZ data with default [`Options`](decompress/struct.Options.html). pub fn xz_decompress( input: &mut R, output: &mut W, ) -> error::Result<()> { decode::xz::decode_stream(input, output) } /// Compress data with XZ and default [`Options`](compress/struct.Options.html). pub fn xz_compress(input: &mut R, output: &mut W) -> io::Result<()> { encode::xz::encode_stream(input, output) } lzma-rs-0.3.0/src/macros.rs000064400000000000000000000016071046102023000136360ustar 00000000000000/// Log trace message (feature: enabled). #[cfg(feature = "enable_logging")] macro_rules! lzma_trace { ($($arg:tt)+) => { log::trace!($($arg)+); } } /// Log debug message (feature: enabled). #[cfg(feature = "enable_logging")] macro_rules! lzma_debug { ($($arg:tt)+) => { log::debug!($($arg)+); } } /// Log info message (feature: enabled). #[cfg(feature = "enable_logging")] macro_rules! lzma_info { ($($arg:tt)+) => { log::info!($($arg)+); } } /// Log trace message (feature: disabled). #[cfg(not(feature = "enable_logging"))] macro_rules! lzma_trace { ($($arg:tt)+) => {}; } /// Log debug message (feature: disabled). #[cfg(not(feature = "enable_logging"))] macro_rules! lzma_debug { ($($arg:tt)+) => {}; } /// Log info message (feature: disabled). #[cfg(not(feature = "enable_logging"))] macro_rules! lzma_info { ($($arg:tt)+) => {}; } lzma-rs-0.3.0/src/util/mod.rs000064400000000000000000000000171046102023000141000ustar 00000000000000pub mod vec2d; lzma-rs-0.3.0/src/util/vec2d.rs000064400000000000000000000126071046102023000143340ustar 00000000000000use std::ops::{Index, IndexMut}; /// A 2 dimensional matrix in row-major order backed by a contiguous slice. #[derive(Debug)] pub struct Vec2D { data: Box<[T]>, cols: usize, } impl Vec2D { /// Initialize a grid of size (`rows`, `cols`) with the given data element. pub fn init(data: T, size: (usize, usize)) -> Vec2D where T: Clone, { let (rows, cols) = size; let len = rows .checked_mul(cols) .unwrap_or_else(|| panic!("{} rows by {} cols exceeds usize::MAX", rows, cols)); Vec2D { data: vec![data; len].into_boxed_slice(), cols, } } /// Fills the grid with elements by cloning `value`. pub fn fill(&mut self, value: T) where T: Clone, { self.data.fill(value) } } impl Index for Vec2D { type Output = [T]; #[inline] fn index(&self, row: usize) -> &Self::Output { let start_row = row .checked_mul(self.cols) .unwrap_or_else(|| panic!("{} row by {} cols exceeds usize::MAX", row, self.cols)); &self.data[start_row..start_row + self.cols] } } impl IndexMut for Vec2D { #[inline] fn index_mut(&mut self, row: usize) -> &mut Self::Output { let start_row = row .checked_mul(self.cols) .unwrap_or_else(|| panic!("{} row by {} cols exceeds usize::MAX", row, self.cols)); &mut self.data[start_row..start_row + self.cols] } } #[cfg(test)] mod test { use super::*; #[test] fn init() { let vec2d = Vec2D::init(1, (2, 3)); assert_eq!(vec2d[0], [1, 1, 1]); assert_eq!(vec2d[1], [1, 1, 1]); } #[test] #[should_panic] fn init_overflow() { Vec2D::init(1, (usize::MAX, usize::MAX)); } #[test] fn fill() { let mut vec2d = Vec2D::init(0, (2, 3)); vec2d.fill(7); assert_eq!(vec2d[0], [7, 7, 7]); assert_eq!(vec2d[1], [7, 7, 7]); } #[test] fn index() { let vec2d = Vec2D { data: vec![0, 1, 2, 3, 4, 5, 6, 7].into_boxed_slice(), cols: 2, }; assert_eq!(vec2d[0], [0, 1]); assert_eq!(vec2d[1], [2, 3]); assert_eq!(vec2d[2], [4, 5]); assert_eq!(vec2d[3], [6, 7]); } #[test] fn indexmut() { let mut vec2d = Vec2D { data: vec![0, 1, 2, 3, 4, 5, 6, 7].into_boxed_slice(), cols: 2, }; vec2d[1][1] = 9; assert_eq!(vec2d[0], [0, 1]); // (1, 1) should be 9. assert_eq!(vec2d[1], [2, 9]); assert_eq!(vec2d[2], [4, 5]); assert_eq!(vec2d[3], [6, 7]); } #[test] #[should_panic] fn index_out_of_bounds() { let vec2d = Vec2D::init(1, (2, 3)); let _x = vec2d[2][3]; } #[test] #[should_panic] fn index_out_of_bounds_vec_edge() { let vec2d = Vec2D::init(1, (2, 3)); let _x = vec2d[1][3]; } #[test] #[should_panic] fn index_column_out_of_bounds() { let vec2d = Vec2D::init(1, (2, 3)); let _x = vec2d[0][3]; } #[test] #[should_panic] fn index_row_out_of_bounds() { let vec2d = Vec2D::init(1, (2, 3)); let _x = vec2d[2][0]; } #[test] #[should_panic] fn index_mul_overflow() { // Matrix with 4 columns. let matrix = Vec2D::init(0, (3, 4)); // 2^{usize.numbits() - 2}. let row = (usize::MAX / 4) + 1; // Returns the same as matrix[0] if overflow is not caught. let _ = matrix[row]; } #[test] #[should_panic] fn index_add_overflow() { // Matrix with 5 columns. let matrix = Vec2D::init(0, (3, 5)); // Somehow, as long as numbits(usize) is a multiple of 4, then 5 divides usize::MAX. // This is clear in hexadecimal: usize::MAX is 0xFFF...F and usize::MAX / 5 is 0x333...3. let row = usize::MAX / 5; // This will therefore try to index data[usize::MAX..4]. let _ = matrix[row]; } #[test] #[should_panic] fn indexmut_out_of_bounds() { let mut vec2d = Vec2D::init(1, (2, 3)); vec2d[2][3] = 0; } #[test] #[should_panic] fn indexmut_out_of_bounds_vec_edge() { let mut vec2d = Vec2D::init(1, (2, 3)); vec2d[1][3] = 0; } #[test] #[should_panic] fn indexmut_column_out_of_bounds() { let mut vec2d = Vec2D::init(1, (2, 3)); vec2d[0][3] = 0; } #[test] #[should_panic] fn indexmut_row_out_of_bounds() { let mut vec2d = Vec2D::init(1, (2, 3)); vec2d[2][0] = 0; } #[test] #[should_panic] fn indexmut_mul_overflow() { // Matrix with 4 columns. let mut matrix = Vec2D::init(0, (3, 4)); // 2^{usize.numbits() - 2}. let row = (usize::MAX / 4) + 1; // Returns the same as matrix[0] if overflow is not caught. matrix[row][0] = 9; } #[test] #[should_panic] fn indexmut_add_overflow() { // Matrix with 5 columns. let mut matrix = Vec2D::init(0, (3, 5)); // Somehow, as long as numbits(usize) is a multiple of 4, then 5 divides usize::MAX. // This is clear in hexadecimal: usize::MAX is 0xFFF...F and usize::MAX / 5 is 0x333...3. let row = usize::MAX / 5; // This will therefore try to index data[usize::MAX..4]. matrix[row][0] = 9; } } lzma-rs-0.3.0/src/xz/crc.rs000064400000000000000000000002451046102023000135570ustar 00000000000000use crc::{Crc, CRC_32_ISO_HDLC, CRC_64_XZ}; pub const CRC32: Crc = Crc::::new(&CRC_32_ISO_HDLC); pub const CRC64: Crc = Crc::::new(&CRC_64_XZ); lzma-rs-0.3.0/src/xz/footer.rs000064400000000000000000000002011046102023000142760ustar 00000000000000//! XZ footer. /// File format trailing terminator, see sect. 2.1.2.4. pub(crate) const XZ_MAGIC_FOOTER: &[u8] = &[0x59, 0x5A]; lzma-rs-0.3.0/src/xz/header.rs000064400000000000000000000030671046102023000142450ustar 00000000000000//! XZ header. use crate::decode::util; use crate::error; use crate::xz::crc::CRC32; use crate::xz::StreamFlags; use byteorder::{BigEndian, LittleEndian, ReadBytesExt}; /// File format magic header signature, see sect. 2.1.1.1. pub(crate) const XZ_MAGIC: &[u8] = &[0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00]; /// Stream Header, see sect. 2.1.1. #[derive(Clone, Copy, Debug)] pub(crate) struct StreamHeader { pub(crate) stream_flags: StreamFlags, } impl StreamHeader { /// Parse a Stream Header from a buffered reader. pub(crate) fn parse
(input: &mut BR) -> error::Result where BR: std::io::BufRead, { if !util::read_tag(input, XZ_MAGIC)? { return Err(error::Error::XzError(format!( "Invalid XZ magic, expected {:?}", XZ_MAGIC ))); } let (flags, digested) = { let mut digest = CRC32.digest(); let mut digest_rd = util::CrcDigestRead::new(input, &mut digest); let flags = digest_rd.read_u16::()?; (flags, digest.finalize()) }; let crc32 = input.read_u32::()?; if crc32 != digested { return Err(error::Error::XzError(format!( "Invalid header CRC32: expected 0x{:08x} but got 0x{:08x}", crc32, digested ))); } let stream_flags = StreamFlags::parse(flags)?; let header = Self { stream_flags }; lzma_info!("XZ check method: {:?}", header.stream_flags.check_method); Ok(header) } } lzma-rs-0.3.0/src/xz/mod.rs000064400000000000000000000061711046102023000135730ustar 00000000000000//! Logic for handling `.xz` file format. //! //! Format specifications are at [https://tukaani.org/xz/xz-file-format.txt](spec). //! //! [spec]: https://tukaani.org/xz/xz-file-format.txt use crate::error; use std::io; pub(crate) mod crc; pub(crate) mod footer; pub(crate) mod header; /// Stream flags, see sect. 2.1.1.2. /// /// This does not store the leading null byte, which is currently unused. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub(crate) struct StreamFlags { pub(crate) check_method: CheckMethod, } impl StreamFlags { /// Parse Stream Flags from a 16bits value. pub(crate) fn parse(input: u16) -> error::Result { let flags_bytes = input.to_be_bytes(); if flags_bytes[0] != 0x00 { return Err(error::Error::XzError(format!( "Invalid null byte in Stream Flags: {:x}", flags_bytes[0] ))); } let flags = Self { check_method: CheckMethod::try_from(flags_bytes[1])?, }; Ok(flags) } /// Serialize Stream Flags into a writer. pub(crate) fn serialize(self, writer: &mut W) -> io::Result where W: io::Write, { // First byte is currently unused and hard-coded to null. writer .write(&[0x00, self.check_method as u8]) .map_err(Into::into) } } /// Stream check type, see sect. 2.1.1.2. #[derive(Clone, Copy, Debug, Eq, PartialEq)] #[repr(u8)] pub enum CheckMethod { None = 0x00, Crc32 = 0x01, Crc64 = 0x04, Sha256 = 0x0A, } impl CheckMethod { /// Parse Check ID (second byte in Stream Flags). pub fn try_from(id: u8) -> error::Result { match id { 0x00 => Ok(CheckMethod::None), 0x01 => Ok(CheckMethod::Crc32), 0x04 => Ok(CheckMethod::Crc64), 0x0A => Ok(CheckMethod::Sha256), _ => Err(error::Error::XzError(format!( "Invalid check method {:x}, expected one of [0x00, 0x01, 0x04, 0x0A]", id ))), } } } impl From for u8 { fn from(method: CheckMethod) -> u8 { method as u8 } } #[cfg(test)] mod test { use super::*; use byteorder::{BigEndian, ReadBytesExt}; use std::io::{Seek, SeekFrom}; #[test] fn test_checkmethod_roundtrip() { let mut count_valid = 0; for input in 0..std::u8::MAX { if let Ok(check) = CheckMethod::try_from(input) { let output: u8 = check.into(); assert_eq!(input, output); count_valid += 1; } } assert_eq!(count_valid, 4); } #[test] fn test_streamflags_roundtrip() { let input = StreamFlags { check_method: CheckMethod::Crc32, }; let mut cursor = std::io::Cursor::new(vec![0u8; 2]); let len = input.serialize(&mut cursor).unwrap(); assert_eq!(len, 2); cursor.seek(SeekFrom::Start(0)).unwrap(); let field = cursor.read_u16::().unwrap(); let output = StreamFlags::parse(field).unwrap(); assert_eq!(input, output); } }