libflate-0.1.25/.gitignore010066600017500001750000000000221340050224200135600ustar0000000000000000target Cargo.lock libflate-0.1.25/.travis.yml010066600017500001750000000015721340050224200137140ustar0000000000000000language: rust sudo: required rust: - stable - beta - nightly matrix: allow_failures: - rust: nightly env: global: - RUSTFLAGS="-C link-dead-code" addons: apt: packages: - libcurl4-openssl-dev - libelf-dev - libdw-dev - cmake - gcc - binutils-dev - libiberty-dev after_success: | wget https://github.com/SimonKagstrom/kcov/archive/master.tar.gz && tar xzf master.tar.gz && cd kcov-master && mkdir build && cd build && cmake .. && make && make install DESTDIR=../../kcov-build && cd ../.. && rm -rf kcov-master && for file in target/debug/libflate-*[^\.d]; do mkdir -p "target/cov/$(basename $file)"; ./kcov-build/usr/local/bin/kcov --exclude-pattern=/.cargo,/usr/lib --verify "target/cov/$(basename $file)" "$file"; done && bash <(curl -s https://codecov.io/bash) && echo "Uploaded code coverage" libflate-0.1.25/Cargo.toml.orig010066600017500001750000000011511350664246300145040ustar0000000000000000[package] name = "libflate" version = "0.1.25" authors = ["Takeru Ohta "] description = "A Rust implementation of DEFLATE algorithm and related formats (ZLIB, GZIP)" homepage = "https://github.com/sile/libflate" repository = "https://github.com/sile/libflate" readme = "README.md" keywords = ["deflate", "gzip", "zlib"] categories = ["compression"] license = "MIT" [badges] travis-ci = {repository = "sile/libflate"} codecov = {repository = "sile/libflate"} [dependencies] adler32 = "1" byteorder = "1" crc32fast = "1" rle-decode-fast = "1.0.0" take_mut = "0.2.2" [dev-dependencies] clap = "2" libflate-0.1.25/Cargo.toml0000644000000023440000000000000107430ustar00# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g. crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] name = "libflate" version = "0.1.25" authors = ["Takeru Ohta "] description = "A Rust implementation of DEFLATE algorithm and related formats (ZLIB, GZIP)" homepage = "https://github.com/sile/libflate" readme = "README.md" keywords = ["deflate", "gzip", "zlib"] categories = ["compression"] license = "MIT" repository = "https://github.com/sile/libflate" [dependencies.adler32] version = "1" [dependencies.byteorder] version = "1" [dependencies.crc32fast] version = "1" [dependencies.rle-decode-fast] version = "1.0.0" [dependencies.take_mut] version = "0.2.2" [dev-dependencies.clap] version = "2" [badges.codecov] repository = "sile/libflate" [badges.travis-ci] repository = "sile/libflate" libflate-0.1.25/Cargo.toml.orig0000644000000023450000000000000117030ustar00# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] name = "libflate" version = "0.1.25" authors = ["Takeru Ohta "] description = "A Rust implementation of DEFLATE algorithm and related formats (ZLIB, GZIP)" homepage = "https://github.com/sile/libflate" readme = "README.md" keywords = ["deflate", "gzip", "zlib"] categories = ["compression"] license = "MIT" repository = "https://github.com/sile/libflate" [dependencies.adler32] version = "1" [dependencies.byteorder] version = "1" [dependencies.crc32fast] version = "1" [dependencies.rle-decode-fast] version = "1.0.0" [dependencies.take_mut] version = "0.2.2" [dev-dependencies.clap] version = "2" [badges.codecov] repository = "sile/libflate" [badges.travis-ci] repository = "sile/libflate" libflate-0.1.25/LICENSE010066600017500001750000000021051340050224200126010ustar0000000000000000The MIT License Copyright (c) 2016 Takeru Ohta Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. libflate-0.1.25/README.md010066600017500001750000000042231340050224200130560ustar0000000000000000libflate ======== [![libflate](https://img.shields.io/crates/v/libflate.svg)](https://crates.io/crates/libflate) [![Documentation](https://docs.rs/libflate/badge.svg)](https://docs.rs/libflate) [![Build Status](https://travis-ci.org/sile/libflate.svg?branch=master)](https://travis-ci.org/sile/libflate) [![Code Coverage](https://codecov.io/gh/sile/libflate/branch/master/graph/badge.svg)](https://codecov.io/gh/sile/libflate/branch/master) [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) A Rust implementation of DEFLATE algorithm and related formats (ZLIB, GZIP). Documentation ------------- See [RustDoc Documentation](https://docs.rs/libflate). The documentation includes some examples. Installation ------------ Add following lines to your `Cargo.toml`: ```toml [dependencies] libflate = "0.1" ``` An Example ---------- Below is a command to decode GZIP stream that is read from the standard input: ```rust extern crate libflate; use std::io; use libflate::gzip::Decoder; fn main() { let mut input = io::stdin(); let mut decoder = Decoder::new(&mut input).unwrap(); io::copy(&mut decoder, &mut io::stdout()).unwrap(); } ``` An Informal Benchmark --------------------- A brief comparison with [flate2](https://github.com/alexcrichton/flate2-rs) and [inflate](https://github.com/PistonDevelopers/inflate): ```bash $ cd libflate/flate_bench/ $ curl -O https://dumps.wikimedia.org/enwiki/latest/enwiki-latest-all-titles-in-ns0.gz $ gzip -d enwiki-latest-all-titles-in-ns0.gz > ls -lh enwiki-latest-all-titles-in-ns0 -rw-rw-r-- 1 foo foo 265M May 18 05:19 enwiki-latest-all-titles-in-ns0 $ cargo run --release -- enwiki-latest-all-titles-in-ns0 # ENCODE (input_size=277303937) - libflate: elapsed=8.137013s, size=83259010 - flate2: elapsed=9.814607s, size=74692153 # DECODE (input_size=74217004) - libflate: elapsed=1.354556s, size=277303937 - flate2: elapsed=0.960907s, size=277303937 - inflate: elapsed=1.926142s, size=277303937 ``` References ---------- - DEFLATE: [RFC-1951](https://tools.ietf.org/html/rfc1951) - ZLIB: [RFC-1950](https://tools.ietf.org/html/rfc1950) - GZIP: [RFC-1952](https://tools.ietf.org/html/rfc1952) libflate-0.1.25/data/issues_16/crash-1bb6d408475a5bd57247ee40f290830adfe2086e010066600017500001750000000024421340050224200227310ustar0000000000000000x^)*~~~~~~~~~~~Kt<<<<<:<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<3<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<3<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<3<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<3<<<<<<<<<<<<<<<<<<<<<<<<<5 u!z@͕Guݢm)&?{i=Qaj8tU^C*HCl$M;|;m7zFps_+j[ E)Pl@7qQs&KDgyErnuc¡V~pEͷ/A,i-kKfEwE55 u!z@͕Guݢm)&?{i=Qaj8tU^C*HCl$M;|;m7zFps_+j[ E)Pl@7qQs&KDgyErnuc¡V~pEͷ/A,i-kKfEwE5 = if input_filename == "-" { Box::new(io::stdin()) } else { Box::new( fs::File::open(input_filename).expect(&format!("Can't open file: {}", input_filename)), ) }; let mut input = io::BufReader::new(input); let output_filename = matches.value_of("OUTPUT").unwrap(); let output: Box = if output_filename == "-" { Box::new(io::stdout()) } else if output_filename == "/dev/null" { Box::new(io::sink()) } else { Box::new( fs::File::create(output_filename) .expect(&format!("Can't create file: {}", output_filename)), ) }; let mut output = io::BufWriter::new(output); let verbose = matches.is_present("VERBOSE"); if let Some(_matches) = matches.subcommand_matches("copy") { io::copy(&mut input, &mut output).expect("Coyping failed"); } else if let Some(matches) = matches.subcommand_matches("byte-read") { let unit = matches .value_of("UNIT") .and_then(|x| x.parse::().ok()) .unwrap(); let mut buf = vec![0; unit]; let mut reader = input; let mut count = 0; while let Ok(size) = reader.read(&mut buf) { if size == 0 { break; } count += size; } println!("COUNT: {}", count); } else if let Some(_matches) = matches.subcommand_matches("gzip-decode") { let mut decoder = gzip::Decoder::new(input).expect("Read GZIP header failed"); if verbose { let _ = writeln!(&mut io::stderr(), "HEADER: {:?}", decoder.header()); } io::copy(&mut decoder, &mut output).expect("Decoding GZIP stream failed"); } else if let Some(_matches) = matches.subcommand_matches("gzip-decode-multi") { let mut decoder = gzip::MultiDecoder::new(input).expect("Read GZIP header failed"); io::copy(&mut decoder, &mut output).expect("Decoding GZIP stream failed"); } else if let Some(_matches) = matches.subcommand_matches("gzip-encode") { let mut encoder = gzip::Encoder::new(output).unwrap(); io::copy(&mut input, &mut encoder).expect("Encoding GZIP stream failed"); encoder.finish().into_result().unwrap(); } else if let Some(_matches) = matches.subcommand_matches("zlib-decode") { let mut decoder = zlib::Decoder::new(input).expect("Read ZLIB header failed"); if verbose { let _ = writeln!(&mut io::stderr(), "HEADER: {:?}", decoder.header()); } io::copy(&mut decoder, &mut output).expect("Decoding ZLIB stream failed"); } else if let Some(_matches) = matches.subcommand_matches("zlib-encode") { let mut encoder = zlib::Encoder::new(output).unwrap(); io::copy(&mut input, &mut encoder).expect("Encoding ZLIB stream failed"); encoder.finish().into_result().unwrap(); } else { println!("{}", matches.usage()); process::exit(1); } } libflate-0.1.25/src/bit.rs010066600017500001750000000130031340050224200135060ustar0000000000000000use byteorder::LittleEndian; use byteorder::ReadBytesExt; use byteorder::WriteBytesExt; use std::io; #[derive(Debug)] pub struct BitWriter { inner: W, buf: u32, end: u8, } impl BitWriter where W: io::Write, { pub fn new(inner: W) -> Self { BitWriter { inner, buf: 0, end: 0, } } #[inline(always)] pub fn write_bit(&mut self, bit: bool) -> io::Result<()> { self.write_bits(1, bit as u16) } #[inline(always)] pub fn write_bits(&mut self, bitwidth: u8, bits: u16) -> io::Result<()> { debug_assert!(bitwidth < 16); debug_assert!(self.end + bitwidth <= 32); self.buf |= u32::from(bits) << self.end; self.end += bitwidth; self.flush_if_needed() } pub fn flush(&mut self) -> io::Result<()> { while self.end > 0 { self.inner.write_u8(self.buf as u8)?; self.buf >>= 8; self.end = self.end.saturating_sub(8); } self.inner.flush()?; Ok(()) } #[inline(always)] fn flush_if_needed(&mut self) -> io::Result<()> { if self.end >= 16 { self.inner.write_u16::(self.buf as u16)?; self.end -= 16; self.buf >>= 16; } Ok(()) } } impl BitWriter { pub fn as_inner_ref(&self) -> &W { &self.inner } pub fn as_inner_mut(&mut self) -> &mut W { &mut self.inner } pub fn into_inner(self) -> W { self.inner } } #[derive(Debug)] pub struct BitReader { inner: R, last_read: u32, offset: u8, last_error: Option, } impl BitReader where R: io::Read, { pub fn new(inner: R) -> Self { BitReader { inner, last_read: 0, offset: 32, last_error: None, } } #[inline(always)] pub fn set_last_error(&mut self, e: io::Error) { self.last_error = Some(e); } #[inline(always)] pub fn check_last_error(&mut self) -> io::Result<()> { if let Some(e) = self.last_error.take() { Err(e) } else { Ok(()) } } #[inline(always)] pub fn read_bit(&mut self) -> io::Result { self.read_bits(1).map(|b| b != 0) } #[inline(always)] pub fn read_bits(&mut self, bitwidth: u8) -> io::Result { let v = self.read_bits_unchecked(bitwidth); self.check_last_error().map(|_| v) } #[inline(always)] pub fn read_bits_unchecked(&mut self, bitwidth: u8) -> u16 { let bits = self.peek_bits_unchecked(bitwidth); self.skip_bits(bitwidth); bits } #[inline(always)] pub fn peek_bits_unchecked(&mut self, bitwidth: u8) -> u16 { debug_assert!(bitwidth <= 16); while 32 < self.offset + bitwidth { if self.last_error.is_some() { return 0; } if let Err(e) = self.fill_next_u8() { self.last_error = Some(e); return 0; } } debug_assert!(self.offset < 32 || bitwidth == 0); let bits = self.last_read.wrapping_shr(u32::from(self.offset)) as u16; bits & ((1 << bitwidth) - 1) } #[inline(always)] pub fn skip_bits(&mut self, bitwidth: u8) { debug_assert!(self.last_error.is_some() || 32 - self.offset >= bitwidth); self.offset += bitwidth; } #[inline(always)] fn fill_next_u8(&mut self) -> io::Result<()> { self.offset -= 8; self.last_read >>= 8; let next = u32::from(self.inner.read_u8()?); self.last_read |= next << (32 - 8); Ok(()) } #[inline] pub(crate) fn state(&self) -> BitReaderState { BitReaderState { last_read: self.last_read, offset: self.offset, } } #[inline] pub(crate) fn restore_state(&mut self, state: BitReaderState) { self.last_read = state.last_read; self.offset = state.offset; } } impl BitReader { pub fn reset(&mut self) { self.offset = 32; } pub fn as_inner_ref(&self) -> &R { &self.inner } pub fn as_inner_mut(&mut self) -> &mut R { &mut self.inner } pub fn into_inner(self) -> R { self.inner } } #[derive(Debug, Clone, Copy)] pub(crate) struct BitReaderState { last_read: u32, offset: u8, } #[cfg(test)] mod test { use super::*; use std::io; #[test] fn writer_works() { let mut writer = BitWriter::new(Vec::new()); writer.write_bit(true).unwrap(); writer.write_bits(3, 0b010).unwrap(); writer.write_bits(11, 0b10101011010).unwrap(); writer.flush().unwrap(); writer.write_bit(true).unwrap(); writer.flush().unwrap(); let buf = writer.into_inner(); assert_eq!(buf, [0b10100101, 0b01010101, 0b00000001]); } #[test] fn reader_works() { let buf = [0b10100101, 0b11010101]; let mut reader = BitReader::new(&buf[..]); assert_eq!(reader.read_bit().unwrap(), true); assert_eq!(reader.read_bit().unwrap(), false); assert_eq!(reader.read_bits(8).unwrap(), 0b01101001); assert_eq!(reader.peek_bits_unchecked(3), 0b101); assert_eq!(reader.peek_bits_unchecked(3), 0b101); reader.skip_bits(1); assert_eq!(reader.peek_bits_unchecked(3), 0b010); assert_eq!( reader.read_bits(8).map_err(|e| e.kind()), Err(io::ErrorKind::UnexpectedEof) ); } } libflate-0.1.25/src/checksum.rs010066600017500001750000000023201340050301000145240ustar0000000000000000use adler32::RollingAdler32; use crc32fast; use std::fmt; pub struct Adler32(RollingAdler32); impl Adler32 { pub fn new() -> Self { Adler32(RollingAdler32::new()) } pub fn value(&self) -> u32 { self.0.hash() } pub fn update(&mut self, buf: &[u8]) { self.0.update_buffer(buf); } } impl fmt::Debug for Adler32 { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "Adler32(_)") } } pub struct Crc32(crc32fast::Hasher); impl Crc32 { pub fn new() -> Self { Crc32(crc32fast::Hasher::new()) } pub fn value(&self) -> u32 { self.0.clone().finalize() } pub fn update(&mut self, buf: &[u8]) { self.0.update(buf); } } impl fmt::Debug for Crc32 { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "Crc32(_)") } } #[cfg(test)] mod test { use super::*; #[test] fn crc32_works() { let mut crc32 = Crc32::new(); crc32.update(b"abcde"); assert_eq!(crc32.value(), 0x8587D865); } #[test] fn adler32_works() { let mut adler32 = Adler32::new(); adler32.update(b"abcde"); assert_eq!(adler32.value(), 0x05C801F0); } } libflate-0.1.25/src/deflate/decode.rs010066600017500001750000000176741350664244700156240ustar0000000000000000use byteorder::LittleEndian; use byteorder::ReadBytesExt; use rle_decode_fast::rle_decode; use std::cmp; use std::io; use std::io::Read; use super::symbol; use bit; use lz77; /// DEFLATE decoder. #[derive(Debug)] pub struct Decoder { bit_reader: bit::BitReader, buffer: Vec, offset: usize, eos: bool, } impl Decoder where R: Read, { /// Makes a new decoder instance. /// /// `inner` is to be decoded DEFLATE stream. /// /// # Examples /// ``` /// use std::io::{Cursor, Read}; /// use libflate::deflate::Decoder; /// /// let encoded_data = [243, 72, 205, 201, 201, 87, 8, 207, 47, 202, 73, 81, 4, 0]; /// let mut decoder = Decoder::new(&encoded_data[..]); /// let mut buf = Vec::new(); /// decoder.read_to_end(&mut buf).unwrap(); /// /// assert_eq!(buf, b"Hello World!"); /// ``` pub fn new(inner: R) -> Self { Decoder { bit_reader: bit::BitReader::new(inner), buffer: Vec::new(), offset: 0, eos: false, } } /// Returns the immutable reference to the inner stream. pub fn as_inner_ref(&self) -> &R { self.bit_reader.as_inner_ref() } /// Returns the mutable reference to the inner stream. pub fn as_inner_mut(&mut self) -> &mut R { self.bit_reader.as_inner_mut() } /// Unwraps this `Decoder`, returning the underlying reader. /// /// # Examples /// ``` /// use std::io::Cursor; /// use libflate::deflate::Decoder; /// /// let encoded_data = [243, 72, 205, 201, 201, 87, 8, 207, 47, 202, 73, 81, 4, 0]; /// let decoder = Decoder::new(Cursor::new(&encoded_data)); /// assert_eq!(decoder.into_inner().into_inner(), &encoded_data); /// ``` pub fn into_inner(self) -> R { self.bit_reader.into_inner() } fn read_non_compressed_block(&mut self) -> io::Result<()> { self.bit_reader.reset(); let len = self.bit_reader.as_inner_mut().read_u16::()?; let nlen = self.bit_reader.as_inner_mut().read_u16::()?; if !len != nlen { Err(invalid_data_error!( "LEN={} is not the one's complement of NLEN={}", len, nlen )) } else { let old_len = self.buffer.len(); // We cannot use `self.buffer.set_len()` here because that would // pass uninitialized memory to .read_exact(), which does // NOT guarantee that it will never read from the buffer. // See https://github.com/rust-lang/rust/pull/62102/ // Surprisingly, zero-initializing the buffer here // makes decoding 5% **faster** than using reserve() + set_len() self.buffer.resize(old_len + len as usize, 0); self.bit_reader .as_inner_mut() .read_exact(&mut self.buffer[old_len..])?; Ok(()) } } fn read_compressed_block(&mut self, huffman: &H) -> io::Result<()> where H: symbol::HuffmanCodec, { let symbol_decoder = huffman.load(&mut self.bit_reader)?; loop { let s = symbol_decoder.decode_unchecked(&mut self.bit_reader); self.bit_reader.check_last_error()?; match s { symbol::Symbol::Literal(b) => { self.buffer.push(b); } symbol::Symbol::Share { length, distance } => { if self.buffer.len() < distance as usize { return Err(invalid_data_error!( "Too long backword reference: buffer.len={}, distance={}", self.buffer.len(), distance )); } rle_decode(&mut self.buffer, usize::from(distance), usize::from(length)); } symbol::Symbol::EndOfBlock => { break; } } } Ok(()) } fn truncate_old_buffer(&mut self) { if self.buffer.len() > lz77::MAX_DISTANCE as usize * 4 { let old_len = self.buffer.len(); let new_len = lz77::MAX_DISTANCE as usize; { // isolation to please borrow checker let (dst, src) = self.buffer.split_at_mut(old_len - new_len); dst[..new_len].copy_from_slice(src); } self.buffer.truncate(new_len); self.offset = new_len; } } } impl Read for Decoder where R: Read, { fn read(&mut self, buf: &mut [u8]) -> io::Result { if self.offset < self.buffer.len() { let copy_size = cmp::min(buf.len(), self.buffer.len() - self.offset); buf[..copy_size].copy_from_slice(&self.buffer[self.offset..][..copy_size]); self.offset += copy_size; Ok(copy_size) } else if self.eos { Ok(0) } else { let bfinal = self.bit_reader.read_bit()?; let btype = self.bit_reader.read_bits(2)?; self.eos = bfinal; self.truncate_old_buffer(); match btype { 0b00 => { self.read_non_compressed_block()?; self.read(buf) } 0b01 => { self.read_compressed_block(&symbol::FixedHuffmanCodec)?; self.read(buf) } 0b10 => { self.read_compressed_block(&symbol::DynamicHuffmanCodec)?; self.read(buf) } 0b11 => Err(invalid_data_error!( "btype 0x11 of DEFLATE is reserved(error) value" )), _ => unreachable!(), } } } } #[cfg(test)] mod test { use super::*; use deflate::symbol::{DynamicHuffmanCodec, HuffmanCodec}; use std::io; #[test] fn test_issues_3() { // see: https://github.com/sile/libflate/issues/3 let input = [ 180, 253, 73, 143, 28, 201, 150, 46, 8, 254, 150, 184, 139, 75, 18, 69, 247, 32, 157, 51, 27, 141, 132, 207, 78, 210, 167, 116, 243, 160, 223, 136, 141, 66, 205, 76, 221, 76, 195, 213, 84, 236, 234, 224, 78, 227, 34, 145, 221, 139, 126, 232, 69, 173, 170, 208, 192, 219, 245, 67, 3, 15, 149, 120, 171, 70, 53, 106, 213, 175, 23, 21, 153, 139, 254, 27, 249, 75, 234, 124, 71, 116, 56, 71, 68, 212, 204, 121, 115, 64, 222, 160, 203, 119, 142, 170, 169, 138, 202, 112, 228, 140, 38, ]; let mut bit_reader = ::bit::BitReader::new(&input[..]); assert_eq!(bit_reader.read_bit().unwrap(), false); // not final block assert_eq!(bit_reader.read_bits(2).unwrap(), 0b10); // DynamicHuffmanCodec DynamicHuffmanCodec.load(&mut bit_reader).unwrap(); } #[test] fn it_works() { let input = [ 180, 253, 73, 143, 28, 201, 150, 46, 8, 254, 150, 184, 139, 75, 18, 69, 247, 32, 157, 51, 27, 141, 132, 207, 78, 210, 167, 116, 243, 160, 223, 136, 141, 66, 205, 76, 221, 76, 195, 213, 84, 236, 234, 224, 78, 227, 34, 145, 221, 139, 126, 232, 69, 173, 170, 208, 192, 219, 245, 67, 3, 15, 149, 120, 171, 70, 53, 106, 213, 175, 23, 21, 153, 139, 254, 27, 249, 75, 234, 124, 71, 116, 56, 71, 68, 212, 204, 121, 115, 64, 222, 160, 203, 119, 142, 170, 169, 138, 202, 112, 228, 140, 38, 171, 162, 88, 212, 235, 56, 136, 231, 233, 239, 113, 249, 163, 252, 16, 42, 138, 49, 226, 108, 73, 28, 153, ]; let mut decoder = Decoder::new(&input[..]); let result = io::copy(&mut decoder, &mut io::sink()); assert!(result.is_err()); let error = result.err().unwrap(); assert_eq!(error.kind(), io::ErrorKind::InvalidData); assert!(error.to_string().starts_with("Too long backword reference")); } } libflate-0.1.25/src/deflate/encode.rs010066600017500001750000000270741347270214500156230ustar0000000000000000use byteorder::LittleEndian; use byteorder::WriteBytesExt; use std::cmp; use std::io; use super::symbol; use super::BlockType; use bit; use finish::{Complete, Finish}; use lz77; /// The default size of a DEFLATE block. pub const DEFAULT_BLOCK_SIZE: usize = 1024 * 1024; const MAX_NON_COMPRESSED_BLOCK_SIZE: usize = 0xFFFF; /// Options for a DEFLATE encoder. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct EncodeOptions { block_size: usize, dynamic_huffman: bool, lz77: Option, } impl Default for EncodeOptions { fn default() -> Self { Self::new() } } impl EncodeOptions { /// Makes a default instance. /// /// # Examples /// ``` /// use libflate::deflate::{Encoder, EncodeOptions}; /// /// let options = EncodeOptions::new(); /// let encoder = Encoder::with_options(Vec::new(), options); /// ``` pub fn new() -> Self { EncodeOptions { block_size: DEFAULT_BLOCK_SIZE, dynamic_huffman: true, lz77: Some(lz77::DefaultLz77Encoder::new()), } } } impl EncodeOptions where E: lz77::Lz77Encode, { /// Specifies the LZ77 encoder used to compress input data. /// /// # Example /// ``` /// use libflate::lz77::DefaultLz77Encoder; /// use libflate::deflate::{Encoder, EncodeOptions}; /// /// let options = EncodeOptions::with_lz77(DefaultLz77Encoder::new()); /// let encoder = Encoder::with_options(Vec::new(), options); /// ``` pub fn with_lz77(lz77: E) -> Self { EncodeOptions { block_size: DEFAULT_BLOCK_SIZE, dynamic_huffman: true, lz77: Some(lz77), } } /// Disables LZ77 compression. /// /// # Example /// ``` /// use libflate::lz77::DefaultLz77Encoder; /// use libflate::deflate::{Encoder, EncodeOptions}; /// /// let options = EncodeOptions::new().no_compression(); /// let encoder = Encoder::with_options(Vec::new(), options); /// ``` pub fn no_compression(mut self) -> Self { self.lz77 = None; self } /// Specifies the hint of the size of a DEFLATE block. /// /// The default value is `DEFAULT_BLOCK_SIZE`. /// /// # Example /// ``` /// use libflate::deflate::{Encoder, EncodeOptions}; /// /// let options = EncodeOptions::new().block_size(512 * 1024); /// let encoder = Encoder::with_options(Vec::new(), options); /// ``` pub fn block_size(mut self, size: usize) -> Self { self.block_size = size; self } /// Specifies to compress with fixed huffman codes. /// /// # Example /// ``` /// use libflate::deflate::{Encoder, EncodeOptions}; /// /// let options = EncodeOptions::new().fixed_huffman_codes(); /// let encoder = Encoder::with_options(Vec::new(), options); /// ``` pub fn fixed_huffman_codes(mut self) -> Self { self.dynamic_huffman = false; self } fn get_block_type(&self) -> BlockType { if self.lz77.is_none() { BlockType::Raw } else if self.dynamic_huffman { BlockType::Dynamic } else { BlockType::Fixed } } fn get_block_size(&self) -> usize { if self.lz77.is_none() { cmp::min(self.block_size, MAX_NON_COMPRESSED_BLOCK_SIZE) } else { self.block_size } } } /// DEFLATE encoder. #[derive(Debug)] pub struct Encoder { writer: bit::BitWriter, block: Block, } impl Encoder where W: io::Write, { /// Makes a new encoder instance. /// /// Encoded DEFLATE stream is written to `inner`. /// /// # Examples /// ``` /// use std::io::Write; /// use libflate::deflate::Encoder; /// /// let mut encoder = Encoder::new(Vec::new()); /// encoder.write_all(b"Hello World!").unwrap(); /// /// assert_eq!(encoder.finish().into_result().unwrap(), /// [5, 192, 49, 13, 0, 0, 8, 3, 65, 43, 224, 6, 7, 24, 128, 237, /// 147, 38, 245, 63, 244, 230, 65, 181, 50, 215, 1]); /// ``` pub fn new(inner: W) -> Self { Self::with_options(inner, EncodeOptions::default()) } } impl Encoder where W: io::Write, E: lz77::Lz77Encode, { /// Makes a new encoder instance with specified options. /// /// Encoded DEFLATE stream is written to `inner`. /// /// # Examples /// ``` /// use std::io::Write; /// use libflate::deflate::{Encoder, EncodeOptions}; /// /// let options = EncodeOptions::new().no_compression(); /// let mut encoder = Encoder::with_options(Vec::new(), options); /// encoder.write_all(b"Hello World!").unwrap(); /// /// assert_eq!(encoder.finish().into_result().unwrap(), /// [1, 12, 0, 243, 255, 72, 101, 108, 108, 111, 32, 87, 111, /// 114, 108, 100, 33]); /// ``` pub fn with_options(inner: W, options: EncodeOptions) -> Self { Encoder { writer: bit::BitWriter::new(inner), block: Block::new(options), } } /// Flushes internal buffer and returns the inner stream. /// /// # Examples /// ``` /// use std::io::Write; /// use libflate::deflate::Encoder; /// /// let mut encoder = Encoder::new(Vec::new()); /// encoder.write_all(b"Hello World!").unwrap(); /// /// assert_eq!(encoder.finish().into_result().unwrap(), /// [5, 192, 49, 13, 0, 0, 8, 3, 65, 43, 224, 6, 7, 24, 128, 237, /// 147, 38, 245, 63, 244, 230, 65, 181, 50, 215, 1]); /// ``` pub fn finish(mut self) -> Finish { match self.block.finish(&mut self.writer) { Ok(_) => Finish::new(self.writer.into_inner(), None), Err(e) => Finish::new(self.writer.into_inner(), Some(e)), } } /// Returns the immutable reference to the inner stream. pub fn as_inner_ref(&self) -> &W { self.writer.as_inner_ref() } /// Returns the mutable reference to the inner stream. pub fn as_inner_mut(&mut self) -> &mut W { self.writer.as_inner_mut() } /// Unwraps the `Encoder`, returning the inner stream. pub fn into_inner(self) -> W { self.writer.into_inner() } } impl io::Write for Encoder where W: io::Write, E: lz77::Lz77Encode, { fn write(&mut self, buf: &[u8]) -> io::Result { self.block.write(&mut self.writer, buf)?; Ok(buf.len()) } fn flush(&mut self) -> io::Result<()> { self.writer.as_inner_mut().flush() } } impl Complete for Encoder where W: io::Write, E: lz77::Lz77Encode, { fn complete(self) -> io::Result<()> { self.finish().into_result().map(|_| ()) } } #[derive(Debug)] struct Block { block_type: BlockType, block_size: usize, block_buf: BlockBuf, } impl Block where E: lz77::Lz77Encode, { fn new(options: EncodeOptions) -> Self { Block { block_type: options.get_block_type(), block_size: options.get_block_size(), block_buf: BlockBuf::new(options.lz77, options.dynamic_huffman), } } fn write(&mut self, writer: &mut bit::BitWriter, buf: &[u8]) -> io::Result<()> where W: io::Write, { self.block_buf.append(buf); while self.block_buf.len() >= self.block_size { writer.write_bit(false)?; writer.write_bits(2, self.block_type as u16)?; self.block_buf.flush(writer)?; } Ok(()) } fn finish(mut self, writer: &mut bit::BitWriter) -> io::Result<()> where W: io::Write, { writer.write_bit(true)?; writer.write_bits(2, self.block_type as u16)?; self.block_buf.flush(writer)?; writer.flush()?; Ok(()) } } #[derive(Debug)] enum BlockBuf { Raw(RawBuf), Fixed(CompressBuf), Dynamic(CompressBuf), } impl BlockBuf where E: lz77::Lz77Encode, { fn new(lz77: Option, dynamic: bool) -> Self { if let Some(lz77) = lz77 { if dynamic { BlockBuf::Dynamic(CompressBuf::new(symbol::DynamicHuffmanCodec, lz77)) } else { BlockBuf::Fixed(CompressBuf::new(symbol::FixedHuffmanCodec, lz77)) } } else { BlockBuf::Raw(RawBuf::new()) } } fn append(&mut self, buf: &[u8]) { match *self { BlockBuf::Raw(ref mut b) => b.append(buf), BlockBuf::Fixed(ref mut b) => b.append(buf), BlockBuf::Dynamic(ref mut b) => b.append(buf), } } fn len(&self) -> usize { match *self { BlockBuf::Raw(ref b) => b.len(), BlockBuf::Fixed(ref b) => b.len(), BlockBuf::Dynamic(ref b) => b.len(), } } fn flush(&mut self, writer: &mut bit::BitWriter) -> io::Result<()> where W: io::Write, { match *self { BlockBuf::Raw(ref mut b) => b.flush(writer), BlockBuf::Fixed(ref mut b) => b.flush(writer), BlockBuf::Dynamic(ref mut b) => b.flush(writer), } } } #[derive(Debug)] struct RawBuf { buf: Vec, } impl RawBuf { fn new() -> Self { RawBuf { buf: Vec::new() } } fn append(&mut self, buf: &[u8]) { self.buf.extend_from_slice(buf); } fn len(&self) -> usize { self.buf.len() } fn flush(&mut self, writer: &mut bit::BitWriter) -> io::Result<()> where W: io::Write, { let size = cmp::min(self.buf.len(), MAX_NON_COMPRESSED_BLOCK_SIZE); writer.flush()?; writer .as_inner_mut() .write_u16::(size as u16)?; writer .as_inner_mut() .write_u16::(!size as u16)?; writer.as_inner_mut().write_all(&self.buf[..size])?; self.buf.drain(0..size); Ok(()) } } #[derive(Debug)] struct CompressBuf { huffman: H, lz77: E, buf: Vec, original_size: usize, } impl CompressBuf where H: symbol::HuffmanCodec, E: lz77::Lz77Encode, { fn new(huffman: H, lz77: E) -> Self { CompressBuf { huffman, lz77, buf: Vec::new(), original_size: 0, } } fn append(&mut self, buf: &[u8]) { self.original_size += buf.len(); self.lz77.encode(buf, &mut self.buf); } fn len(&self) -> usize { self.original_size } fn flush(&mut self, writer: &mut bit::BitWriter) -> io::Result<()> where W: io::Write, { self.lz77.flush(&mut self.buf); self.buf.push(symbol::Symbol::EndOfBlock); let symbol_encoder = self.huffman.build(&self.buf)?; self.huffman.save(writer, &symbol_encoder)?; for s in self.buf.drain(..) { symbol_encoder.encode(writer, &s)?; } self.original_size = 0; Ok(()) } } impl lz77::Sink for Vec { fn consume(&mut self, code: lz77::Code) { let symbol = match code { lz77::Code::Literal(b) => symbol::Symbol::Literal(b), lz77::Code::Pointer { length, backward_distance, } => symbol::Symbol::Share { length, distance: backward_distance, }, }; self.push(symbol); } } libflate-0.1.25/src/deflate/mod.rs010066600017500001750000000031261340050224200151200ustar0000000000000000//! The encoder and decoder of the DEFLATE format and algorithm. //! //! The DEFLATE is defined in [RFC-1951](https://tools.ietf.org/html/rfc1951). //! //! # Examples //! ``` //! use std::io::{self, Read}; //! use libflate::deflate::{Encoder, Decoder}; //! //! // Encoding //! let mut encoder = Encoder::new(Vec::new()); //! io::copy(&mut &b"Hello World!"[..], &mut encoder).unwrap(); //! let encoded_data = encoder.finish().into_result().unwrap(); //! //! // Decoding //! let mut decoder = Decoder::new(&encoded_data[..]); //! let mut decoded_data = Vec::new(); //! decoder.read_to_end(&mut decoded_data).unwrap(); //! //! assert_eq!(decoded_data, b"Hello World!"); //! ``` pub use self::decode::Decoder; pub use self::encode::EncodeOptions; pub use self::encode::Encoder; pub use self::encode::DEFAULT_BLOCK_SIZE; mod decode; mod encode; pub(crate) mod symbol; #[derive(Debug, Clone, Copy)] enum BlockType { Raw = 0b00, Fixed = 0b01, Dynamic = 0b10, } #[cfg(test)] mod test { use std::io::{Read, Write}; use super::*; use lz77; #[test] fn encode_and_decode_works() { let plain = (0..lz77::MAX_DISTANCE as u32 * 32) .map(|i| i as u8) .collect::>(); let buffer = Vec::new(); let mut encoder = Encoder::new(buffer); encoder.write_all(&plain[..]).expect("encode"); let encoded = encoder.finish().into_result().unwrap(); let mut buffer = Vec::new(); let mut decoder = Decoder::new(&encoded[..]); decoder.read_to_end(&mut buffer).expect("decode"); assert_eq!(buffer, plain); } } libflate-0.1.25/src/deflate/symbol.rs010066600017500001750000000372161350614225000156630ustar0000000000000000use std::cmp; use std::io; use std::iter; use std::ops::Range; use bit; use huffman; use huffman::Builder; const FIXED_LITERAL_OR_LENGTH_CODE_TABLE: [(u8, Range, u16); 4] = [ (8, 000..144, 0b0_0011_0000), (9, 144..256, 0b1_1001_0000), (7, 256..280, 0b0_0000_0000), (8, 280..288, 0b0_1100_0000), ]; const BITWIDTH_CODE_ORDER: [usize; 19] = [ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15, ]; const END_OF_BLOCK: u16 = 256; const LENGTH_TABLE: [(u16, u8); 29] = [ (3, 0), (4, 0), (5, 0), (6, 0), (7, 0), (8, 0), (9, 0), (10, 0), (11, 1), (13, 1), (15, 1), (17, 1), (19, 2), (23, 2), (27, 2), (31, 2), (35, 3), (43, 3), (51, 3), (59, 3), (67, 4), (83, 4), (99, 4), (115, 4), (131, 5), (163, 5), (195, 5), (227, 5), (258, 0), ]; const MAX_DISTANCE_CODE_COUNT: usize = 30; const DISTANCE_TABLE: [(u16, u8); 30] = [ (1, 0), (2, 0), (3, 0), (4, 0), (5, 1), (7, 1), (9, 2), (13, 2), (17, 3), (25, 3), (33, 4), (49, 4), (65, 5), (97, 5), (129, 6), (193, 6), (257, 7), (385, 7), (513, 8), (769, 8), (1025, 9), (1537, 9), (2049, 10), (3073, 10), (4097, 11), (6145, 11), (8193, 12), (12_289, 12), (16_385, 13), (24_577, 13), ]; #[derive(Debug, PartialEq, Eq)] pub enum Symbol { EndOfBlock, Literal(u8), Share { length: u16, distance: u16 }, } impl Symbol { pub fn code(&self) -> u16 { match *self { Symbol::Literal(b) => u16::from(b), Symbol::EndOfBlock => 256, Symbol::Share { length, .. } => match length { 3...10 => 257 + length - 3, 11...18 => 265 + (length - 11) / 2, 19...34 => 269 + (length - 19) / 4, 35...66 => 273 + (length - 35) / 8, 67...130 => 277 + (length - 67) / 16, 131...257 => 281 + (length - 131) / 32, 258 => 285, _ => unreachable!(), }, } } pub fn extra_lengh(&self) -> Option<(u8, u16)> { if let Symbol::Share { length, .. } = *self { match length { 3...10 | 258 => None, 11...18 => Some((1, (length - 11) % 2)), 19...34 => Some((2, (length - 19) % 4)), 35...66 => Some((3, (length - 35) % 8)), 67...130 => Some((4, (length - 67) % 16)), 131...257 => Some((5, (length - 131) % 32)), _ => unreachable!(), } } else { None } } pub fn distance(&self) -> Option<(u8, u8, u16)> { if let Symbol::Share { distance, .. } = *self { if distance <= 4 { Some((distance as u8 - 1, 0, 0)) } else { let mut extra_bits = 1; let mut code = 4; let mut base = 4; while base * 2 < distance { extra_bits += 1; code += 2; base *= 2; } let half = base / 2; let delta = distance - base - 1; if distance <= base + half { Some((code, extra_bits, delta % half)) } else { Some((code + 1, extra_bits, delta % half)) } } } else { None } } } #[derive(Debug)] pub struct Encoder { literal: huffman::Encoder, distance: huffman::Encoder, } impl Encoder { pub fn encode(&self, writer: &mut bit::BitWriter, symbol: &Symbol) -> io::Result<()> where W: io::Write, { self.literal.encode(writer, symbol.code())?; if let Some((bits, extra)) = symbol.extra_lengh() { writer.write_bits(bits, extra)?; } if let Some((code, bits, extra)) = symbol.distance() { self.distance.encode(writer, u16::from(code))?; if bits > 0 { writer.write_bits(bits, extra)?; } } Ok(()) } } #[derive(Debug)] pub struct Decoder { literal: huffman::Decoder, distance: huffman::Decoder, } impl Decoder { #[inline(always)] pub fn decode_unchecked(&self, reader: &mut bit::BitReader) -> Symbol where R: io::Read, { let mut symbol = self.decode_literal_or_length(reader); if let Symbol::Share { ref mut distance, .. } = symbol { *distance = self.decode_distance(reader); } symbol } #[inline(always)] fn decode_literal_or_length(&self, reader: &mut bit::BitReader) -> Symbol where R: io::Read, { let decoded = self.literal.decode_unchecked(reader); match decoded { 0...255 => Symbol::Literal(decoded as u8), 256 => Symbol::EndOfBlock, 286 | 287 => { let message = format!("The value {} must not occur in compressed data", decoded); reader.set_last_error(io::Error::new(io::ErrorKind::InvalidData, message)); Symbol::EndOfBlock // dummy value } length_code => { let (base, extra_bits) = LENGTH_TABLE[length_code as usize - 257]; let extra = reader.read_bits_unchecked(extra_bits); Symbol::Share { length: base + extra, distance: 0, } } } } #[inline(always)] fn decode_distance(&self, reader: &mut bit::BitReader) -> u16 where R: io::Read, { let decoded = self.distance.decode_unchecked(reader) as usize; let (base, extra_bits) = DISTANCE_TABLE[decoded]; let extra = reader.read_bits_unchecked(extra_bits); base + extra } } pub trait HuffmanCodec { fn build(&self, symbols: &[Symbol]) -> io::Result; fn save(&self, writer: &mut bit::BitWriter, codec: &Encoder) -> io::Result<()> where W: io::Write; fn load(&self, reader: &mut bit::BitReader) -> io::Result where R: io::Read; } #[derive(Debug)] pub struct FixedHuffmanCodec; impl HuffmanCodec for FixedHuffmanCodec { #[allow(unused_variables)] fn build(&self, symbols: &[Symbol]) -> io::Result { let mut literal_builder = huffman::EncoderBuilder::new(288); for &(bitwidth, ref symbols, code_base) in &FIXED_LITERAL_OR_LENGTH_CODE_TABLE { for (code, symbol) in symbols .clone() .enumerate() .map(|(i, s)| (code_base + i as u16, s)) { literal_builder.set_mapping(symbol, huffman::Code::new(bitwidth, code))?; } } let mut distance_builder = huffman::EncoderBuilder::new(30); for i in 0..30 { distance_builder.set_mapping(i, huffman::Code::new(5, i))?; } Ok(Encoder { literal: literal_builder.finish(), distance: distance_builder.finish(), }) } #[allow(unused_variables)] fn save(&self, writer: &mut bit::BitWriter, codec: &Encoder) -> io::Result<()> where W: io::Write, { Ok(()) } #[allow(unused_variables)] fn load(&self, reader: &mut bit::BitReader) -> io::Result where R: io::Read, { let mut literal_builder = huffman::DecoderBuilder::new(9, Some(END_OF_BLOCK)); for &(bitwidth, ref symbols, code_base) in &FIXED_LITERAL_OR_LENGTH_CODE_TABLE { for (code, symbol) in symbols .clone() .enumerate() .map(|(i, s)| (code_base + i as u16, s)) { literal_builder.set_mapping(symbol, huffman::Code::new(bitwidth, code))?; } } let mut distance_builder = huffman::DecoderBuilder::new(5, None); for i in 0..30 { distance_builder.set_mapping(i, huffman::Code::new(5, i))?; } Ok(Decoder { literal: literal_builder.finish(), distance: distance_builder.finish(), }) } } #[derive(Debug)] pub struct DynamicHuffmanCodec; impl HuffmanCodec for DynamicHuffmanCodec { fn build(&self, symbols: &[Symbol]) -> io::Result { let mut literal_counts = [0; 286]; let mut distance_counts = [0; 30]; let mut empty_distance_table = true; for s in symbols { literal_counts[s.code() as usize] += 1; if let Some((d, _, _)) = s.distance() { empty_distance_table = false; distance_counts[d as usize] += 1; } } if empty_distance_table { // Sets a dummy value because an empty distance table causes decoding error on Windows. // // See https://github.com/sile/libflate/issues/23 for more details. distance_counts[0] = 1; } Ok(Encoder { literal: huffman::EncoderBuilder::from_frequencies(&literal_counts, 15)?, distance: huffman::EncoderBuilder::from_frequencies(&distance_counts, 15)?, }) } fn save(&self, writer: &mut bit::BitWriter, codec: &Encoder) -> io::Result<()> where W: io::Write, { let literal_code_count = cmp::max(257, codec.literal.used_max_symbol().unwrap_or(0) + 1); let distance_code_count = cmp::max(1, codec.distance.used_max_symbol().unwrap_or(0) + 1); let codes = build_bitwidth_codes(codec, literal_code_count, distance_code_count); let mut code_counts = [0; 19]; for x in &codes { code_counts[x.0 as usize] += 1; } let bitwidth_encoder = huffman::EncoderBuilder::from_frequencies(&code_counts, 7)?; let bitwidth_code_count = cmp::max( 4, BITWIDTH_CODE_ORDER .iter() .rev() .position(|&i| bitwidth_encoder.lookup(i as u16).width > 0) .map_or(0, |trailing_zeros| 19 - trailing_zeros), ) as u16; writer.write_bits(5, literal_code_count - 257)?; writer.write_bits(5, distance_code_count - 1)?; writer.write_bits(4, bitwidth_code_count - 4)?; for &i in BITWIDTH_CODE_ORDER .iter() .take(bitwidth_code_count as usize) { let width = if code_counts[i] == 0 { 0 } else { u16::from(bitwidth_encoder.lookup(i as u16).width) }; writer.write_bits(3, width)?; } for &(code, bits, extra) in &codes { bitwidth_encoder.encode(writer, u16::from(code))?; if bits > 0 { writer.write_bits(bits, u16::from(extra))?; } } Ok(()) } fn load(&self, reader: &mut bit::BitReader) -> io::Result where R: io::Read, { let literal_code_count = reader.read_bits(5)? + 257; let distance_code_count = reader.read_bits(5)? + 1; let bitwidth_code_count = reader.read_bits(4)? + 4; if distance_code_count as usize > MAX_DISTANCE_CODE_COUNT { let message = format!( "The value of HDIST is too big: max={}, actual={}", MAX_DISTANCE_CODE_COUNT, distance_code_count ); return Err(io::Error::new(io::ErrorKind::InvalidData, message)); } let mut bitwidth_code_bitwidthes = [0; 19]; for &i in BITWIDTH_CODE_ORDER .iter() .take(bitwidth_code_count as usize) { bitwidth_code_bitwidthes[i] = reader.read_bits(3)? as u8; } let bitwidth_decoder = huffman::DecoderBuilder::from_bitwidthes(&bitwidth_code_bitwidthes, None)?; let mut literal_code_bitwidthes = Vec::with_capacity(literal_code_count as usize); while literal_code_bitwidthes.len() < literal_code_count as usize { let c = bitwidth_decoder.decode(reader)?; let last = literal_code_bitwidthes.last().cloned(); literal_code_bitwidthes.extend(load_bitwidthes(reader, c, last)?); } let mut distance_code_bitwidthes = literal_code_bitwidthes .drain(literal_code_count as usize..) .collect::>(); while distance_code_bitwidthes.len() < distance_code_count as usize { let c = bitwidth_decoder.decode(reader)?; let last = distance_code_bitwidthes .last() .cloned() .or_else(|| literal_code_bitwidthes.last().cloned()); distance_code_bitwidthes.extend(load_bitwidthes(reader, c, last)?); } if distance_code_bitwidthes.len() > distance_code_count as usize { let message = format!( "The length of `distance_code_bitwidthes` is too large: actual={}, expected={}", distance_code_bitwidthes.len(), distance_code_count ); return Err(io::Error::new(io::ErrorKind::InvalidData, message)); } Ok(Decoder { literal: huffman::DecoderBuilder::from_bitwidthes( &literal_code_bitwidthes, Some(END_OF_BLOCK), )?, distance: huffman::DecoderBuilder::from_bitwidthes(&distance_code_bitwidthes, None)?, }) } } fn load_bitwidthes( reader: &mut bit::BitReader, code: u16, last: Option, ) -> io::Result>> where R: io::Read, { Ok(match code { 0...15 => Box::new(iter::once(code as u8)), 16 => { let count = reader.read_bits(2)? + 3; let last = last.ok_or_else(|| invalid_data_error!("No preceding value"))?; Box::new(iter::repeat(last).take(count as usize)) } 17 => { let zeros = reader.read_bits(3)? + 3; Box::new(iter::repeat(0).take(zeros as usize)) } 18 => { let zeros = reader.read_bits(7)? + 11; Box::new(iter::repeat(0).take(zeros as usize)) } _ => unreachable!(), }) } fn build_bitwidth_codes( codec: &Encoder, literal_code_count: u16, distance_code_count: u16, ) -> Vec<(u8, u8, u8)> { struct RunLength { value: u8, count: usize, } let mut run_lens: Vec = Vec::new(); for &(e, size) in &[ (&codec.literal, literal_code_count), (&codec.distance, distance_code_count), ] { for (i, c) in (0..size).map(|x| e.lookup(x as u16).width).enumerate() { if i > 0 && run_lens.last().map_or(false, |s| s.value == c) { run_lens.last_mut().unwrap().count += 1; } else { run_lens.push(RunLength { value: c, count: 1 }) } } } let mut codes: Vec<(u8, u8, u8)> = Vec::new(); for r in run_lens { if r.value == 0 { let mut c = r.count; while c >= 11 { let n = cmp::min(138, c) as u8; codes.push((18, 7, n - 11)); c -= n as usize; } if c >= 3 { codes.push((17, 3, c as u8 - 3)); c = 0; } for _ in 0..c { codes.push((0, 0, 0)); } } else { codes.push((r.value, 0, 0)); let mut c = r.count - 1; while c >= 3 { let n = cmp::min(6, c) as u8; codes.push((16, 2, n - 3)); c -= n as usize; } for _ in 0..c { codes.push((r.value, 0, 0)); } } } codes } libflate-0.1.25/src/finish.rs010066600017500001750000000145101340050224200142140ustar0000000000000000//! `Finish` and related types. use std::io::{self, Write}; use std::ops::{Deref, DerefMut}; /// `Finish` is a type that represents a value which /// may have an error occurred during the computation. /// /// Logically, `Finish` is equivalent to `Result`. #[derive(Debug, Default, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)] pub struct Finish { value: T, error: Option, } impl Finish { /// Makes a new instance. /// /// # Examples /// ``` /// use libflate::Finish; /// /// // The result value of a succeeded computation /// let succeeded = Finish::new("value", None as Option<()>); /// assert_eq!(succeeded.into_result(), Ok("value")); /// /// // The result value of a failed computation /// let failed = Finish::new("value", Some("error")); /// assert_eq!(failed.into_result(), Err("error")); /// ``` pub fn new(value: T, error: Option) -> Self { Finish { value, error } } /// Unwraps the instance. /// /// # Examples /// ``` /// use libflate::Finish; /// /// let succeeded = Finish::new("value", None as Option<()>); /// assert_eq!(succeeded.unwrap(), ("value", None)); /// /// let failed = Finish::new("value", Some("error")); /// assert_eq!(failed.unwrap(), ("value", Some("error"))); /// ``` pub fn unwrap(self) -> (T, Option) { (self.value, self.error) } /// Converts from `Finish` to `Result`. /// /// # Examples /// ``` /// use libflate::Finish; /// /// let succeeded = Finish::new("value", None as Option<()>); /// assert_eq!(succeeded.into_result(), Ok("value")); /// /// let failed = Finish::new("value", Some("error")); /// assert_eq!(failed.into_result(), Err("error")); /// ``` pub fn into_result(self) -> Result { if let Some(e) = self.error { Err(e) } else { Ok(self.value) } } /// Converts from `Finish` to `Result<&T, &E>`. /// /// # Examples /// ``` /// use libflate::Finish; /// /// let succeeded = Finish::new("value", None as Option<()>); /// assert_eq!(succeeded.as_result(), Ok(&"value")); /// /// let failed = Finish::new("value", Some("error")); /// assert_eq!(failed.as_result(), Err(&"error")); /// ``` pub fn as_result(&self) -> Result<&T, &E> { if let Some(ref e) = self.error { Err(e) } else { Ok(&self.value) } } } /// A wrapper struct that completes the processing of the underlying instance when drops. /// /// This calls `Complete:::complete` method of `T` when drops. /// /// # Panics /// /// If the invocation of `Complete::complete(T)` returns an error, `AutoFinish::drop()` will panic. #[derive(Debug)] pub struct AutoFinish { inner: Option, } impl AutoFinish { /// Makes a new `AutoFinish` instance. /// /// # Examples /// /// ``` /// use std::io; /// use libflate::finish::AutoFinish; /// use libflate::gzip::Encoder; /// /// let plain = b"Hello World!"; /// let mut buf = Vec::new(); /// let mut encoder = AutoFinish::new(Encoder::new(&mut buf).unwrap()); /// io::copy(&mut &plain[..], &mut encoder).unwrap(); /// ``` pub fn new(inner: T) -> Self { AutoFinish { inner: Some(inner) } } /// Unwraps this `AutoFinish` instance, returning the underlying instance. pub fn into_inner(mut self) -> T { self.inner.take().expect("Never fails") } } impl Drop for AutoFinish { fn drop(&mut self) { if let Some(inner) = self.inner.take() { if let Err(e) = inner.complete() { panic!("{}", e); } } } } impl Deref for AutoFinish { type Target = T; fn deref(&self) -> &Self::Target { self.inner.as_ref().expect("Never fails") } } impl DerefMut for AutoFinish { fn deref_mut(&mut self) -> &mut Self::Target { self.inner.as_mut().expect("Never fails") } } impl Write for AutoFinish { fn write(&mut self, buf: &[u8]) -> io::Result { self.deref_mut().write(buf) } fn flush(&mut self) -> io::Result<()> { self.deref_mut().flush() } } /// A wrapper struct that completes the processing of the underlying instance when drops. /// /// This calls `Complete:::complete` method of `T` when drops. /// /// Note that this ignores the result of the invocation of `Complete::complete(T)`. #[derive(Debug)] pub struct AutoFinishUnchecked { inner: Option, } impl AutoFinishUnchecked { /// Makes a new `AutoFinishUnchecked` instance. /// /// # Examples /// /// ``` /// use std::io; /// use libflate::finish::AutoFinishUnchecked; /// use libflate::gzip::Encoder; /// /// let plain = b"Hello World!"; /// let mut buf = Vec::new(); /// let mut encoder = AutoFinishUnchecked::new(Encoder::new(&mut buf).unwrap()); /// io::copy(&mut &plain[..], &mut encoder).unwrap(); /// ``` pub fn new(inner: T) -> Self { AutoFinishUnchecked { inner: Some(inner) } } /// Unwraps this `AutoFinishUnchecked` instance, returning the underlying instance. pub fn into_inner(mut self) -> T { self.inner.take().expect("Never fails") } } impl Drop for AutoFinishUnchecked { fn drop(&mut self) { if let Some(inner) = self.inner.take() { let _ = inner.complete(); } } } impl Deref for AutoFinishUnchecked { type Target = T; fn deref(&self) -> &Self::Target { self.inner.as_ref().expect("Never fails") } } impl DerefMut for AutoFinishUnchecked { fn deref_mut(&mut self) -> &mut Self::Target { self.inner.as_mut().expect("Never fails") } } impl Write for AutoFinishUnchecked { fn write(&mut self, buf: &[u8]) -> io::Result { self.deref_mut().write(buf) } fn flush(&mut self) -> io::Result<()> { self.deref_mut().flush() } } /// This trait allows to complete an I/O related processing. pub trait Complete { /// Completes the current processing and returns the result. fn complete(self) -> io::Result<()>; } libflate-0.1.25/src/gzip.rs010066600017500001750000001132611350614225000137160ustar0000000000000000//! The encoder and decoder of the GZIP format. //! //! The GZIP format is defined in [RFC-1952](https://tools.ietf.org/html/rfc1952). //! //! # Examples //! ``` //! use std::io::{self, Read}; //! use libflate::gzip::{Encoder, Decoder}; //! //! // Encoding //! let mut encoder = Encoder::new(Vec::new()).unwrap(); //! io::copy(&mut &b"Hello World!"[..], &mut encoder).unwrap(); //! let encoded_data = encoder.finish().into_result().unwrap(); //! //! // Decoding //! let mut decoder = Decoder::new(&encoded_data[..]).unwrap(); //! let mut decoded_data = Vec::new(); //! decoder.read_to_end(&mut decoded_data).unwrap(); //! //! assert_eq!(decoded_data, b"Hello World!"); //! ``` use byteorder::LittleEndian; use byteorder::ReadBytesExt; use byteorder::WriteBytesExt; use std::ffi::CString; use std::io; use std::time; use checksum; use deflate; use finish::{Complete, Finish}; use lz77; const GZIP_ID: [u8; 2] = [31, 139]; const COMPRESSION_METHOD_DEFLATE: u8 = 8; const OS_FAT: u8 = 0; const OS_AMIGA: u8 = 1; const OS_VMS: u8 = 2; const OS_UNIX: u8 = 3; const OS_VM_CMS: u8 = 4; const OS_ATARI_TOS: u8 = 5; const OS_HPFS: u8 = 6; const OS_MACINTOSH: u8 = 7; const OS_Z_SYSTEM: u8 = 8; const OS_CPM: u8 = 9; const OS_TOPS20: u8 = 10; const OS_NTFS: u8 = 11; const OS_QDOS: u8 = 12; const OS_ACORN_RISCOS: u8 = 13; const OS_UNKNOWN: u8 = 255; const F_TEXT: u8 = 0b00_0001; const F_HCRC: u8 = 0b00_0010; const F_EXTRA: u8 = 0b00_0100; const F_NAME: u8 = 0b00_1000; const F_COMMENT: u8 = 0b01_0000; /// Compression levels defined by the GZIP format. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum CompressionLevel { /// Compressor used fastest algorithm. Fastest, /// Compressor used maximum compression, slowest algorithm. Slowest, /// No information about compression method. Unknown, } impl CompressionLevel { fn to_u8(&self) -> u8 { match *self { CompressionLevel::Fastest => 4, CompressionLevel::Slowest => 2, CompressionLevel::Unknown => 0, } } fn from_u8(x: u8) -> Self { match x { 4 => CompressionLevel::Fastest, 2 => CompressionLevel::Slowest, _ => CompressionLevel::Unknown, } } } impl From for CompressionLevel { fn from(f: lz77::CompressionLevel) -> Self { match f { lz77::CompressionLevel::Fast => CompressionLevel::Fastest, lz77::CompressionLevel::Best => CompressionLevel::Slowest, _ => CompressionLevel::Unknown, } } } #[derive(Debug, Clone)] pub(crate) struct Trailer { crc32: u32, input_size: u32, } impl Trailer { pub fn crc32(&self) -> u32 { self.crc32 } pub fn read_from(mut reader: R) -> io::Result where R: io::Read, { Ok(Trailer { crc32: reader.read_u32::()?, input_size: reader.read_u32::()?, }) } fn write_to(&self, mut writer: W) -> io::Result<()> where W: io::Write, { writer.write_u32::(self.crc32)?; writer.write_u32::(self.input_size)?; Ok(()) } } /// GZIP header builder. #[derive(Debug, Clone)] pub struct HeaderBuilder { header: Header, } impl HeaderBuilder { /// Makes a new builder instance. /// /// # Examples /// ``` /// use libflate::gzip::{HeaderBuilder, CompressionLevel, Os}; /// /// let header = HeaderBuilder::new().finish(); /// assert_eq!(header.compression_level(), CompressionLevel::Unknown); /// assert_eq!(header.os(), Os::Unix); /// assert_eq!(header.is_text(), false); /// assert_eq!(header.is_verified(), false); /// assert_eq!(header.extra_field(), None); /// assert_eq!(header.filename(), None); /// assert_eq!(header.comment(), None); /// ``` pub fn new() -> Self { // wasm-unknown-unknown does not implement the time module #[cfg(not(target_arch = "wasm32"))] let modification_time = time::UNIX_EPOCH .elapsed() .map(|d| d.as_secs() as u32) .unwrap_or(0); #[cfg(target_arch = "wasm32")] let modification_time = 0; let header = Header { modification_time, compression_level: CompressionLevel::Unknown, os: Os::Unix, is_text: false, is_verified: false, extra_field: None, filename: None, comment: None, }; HeaderBuilder { header } } /// Sets the modification time (UNIX timestamp). /// /// # Examples /// ``` /// use libflate::gzip::HeaderBuilder; /// /// let header = HeaderBuilder::new().modification_time(10).finish(); /// assert_eq!(header.modification_time(), 10); /// ``` pub fn modification_time(&mut self, modification_time: u32) -> &mut Self { self.header.modification_time = modification_time; self } /// Sets the OS type. /// /// ``` /// use libflate::gzip::{HeaderBuilder, Os}; /// /// let header = HeaderBuilder::new().os(Os::Ntfs).finish(); /// assert_eq!(header.os(), Os::Ntfs); /// ``` pub fn os(&mut self, os: Os) -> &mut Self { self.header.os = os; self } /// Indicates the encoding data is a ASCII text. /// /// # Examples /// ``` /// use libflate::gzip::HeaderBuilder; /// /// let header = HeaderBuilder::new().text().finish(); /// assert_eq!(header.is_text(), true); /// ``` pub fn text(&mut self) -> &mut Self { self.header.is_text = true; self } /// Specifies toe verify header bytes using CRC-16. /// /// # Examples /// ``` /// use libflate::gzip::HeaderBuilder; /// /// let header = HeaderBuilder::new().verify().finish(); /// assert_eq!(header.is_verified(), true); /// ``` pub fn verify(&mut self) -> &mut Self { self.header.is_verified = true; self } /// Sets the extra field. /// /// # Examples /// ``` /// use libflate::gzip::{HeaderBuilder, ExtraField, ExtraSubField}; /// /// let subfield = ExtraSubField{id: [0, 1], data: vec![2, 3, 4]}; /// let extra = ExtraField{subfields: vec![subfield]}; /// let header = HeaderBuilder::new().extra_field(extra.clone()).finish(); /// assert_eq!(header.extra_field(), Some(&extra)); /// ``` pub fn extra_field(&mut self, extra: ExtraField) -> &mut Self { self.header.extra_field = Some(extra); self } /// Sets the file name. /// /// # Examples /// ``` /// use std::ffi::CString; /// use libflate::gzip::HeaderBuilder; /// /// let header = HeaderBuilder::new().filename(CString::new("foo").unwrap()).finish(); /// assert_eq!(header.filename(), Some(&CString::new("foo").unwrap())); /// ``` pub fn filename(&mut self, filename: CString) -> &mut Self { self.header.filename = Some(filename); self } /// Sets the comment. /// /// # Examples /// ``` /// use std::ffi::CString; /// use libflate::gzip::HeaderBuilder; /// /// let header = HeaderBuilder::new().comment(CString::new("foo").unwrap()).finish(); /// assert_eq!(header.comment(), Some(&CString::new("foo").unwrap())); /// ``` pub fn comment(&mut self, comment: CString) -> &mut Self { self.header.comment = Some(comment); self } /// Returns the result header. pub fn finish(&self) -> Header { self.header.clone() } } impl Default for HeaderBuilder { fn default() -> Self { Self::new() } } /// GZIP Header. #[derive(Debug, Clone)] pub struct Header { modification_time: u32, compression_level: CompressionLevel, os: Os, is_text: bool, is_verified: bool, extra_field: Option, filename: Option, comment: Option, } impl Header { /// Returns the modification time (UNIX timestamp). pub fn modification_time(&self) -> u32 { self.modification_time } /// Returns the compression level. pub fn compression_level(&self) -> CompressionLevel { self.compression_level.clone() } /// Returns the OS type. pub fn os(&self) -> Os { self.os.clone() } /// Returns `true` if the stream is probably ASCII text, `false` otherwise. pub fn is_text(&self) -> bool { self.is_text } /// Returns `true` if the header bytes is verified by CRC-16, `false` otherwise. pub fn is_verified(&self) -> bool { self.is_verified } /// Returns the extra field. pub fn extra_field(&self) -> Option<&ExtraField> { self.extra_field.as_ref() } /// Returns the file name. pub fn filename(&self) -> Option<&CString> { self.filename.as_ref() } /// Returns the comment. pub fn comment(&self) -> Option<&CString> { self.comment.as_ref() } fn flags(&self) -> u8 { [ (F_TEXT, self.is_text), (F_HCRC, self.is_verified), (F_EXTRA, self.extra_field.is_some()), (F_NAME, self.filename.is_some()), (F_COMMENT, self.comment.is_some()), ] .iter() .filter(|e| e.1) .map(|e| e.0) .sum() } fn crc16(&self) -> u16 { let mut crc = checksum::Crc32::new(); let mut buf = Vec::new(); Header { is_verified: false, ..self.clone() } .write_to(&mut buf) .unwrap(); crc.update(&buf); crc.value() as u16 } fn write_to(&self, mut writer: W) -> io::Result<()> where W: io::Write, { writer.write_all(&GZIP_ID)?; writer.write_u8(COMPRESSION_METHOD_DEFLATE)?; writer.write_u8(self.flags())?; writer.write_u32::(self.modification_time)?; writer.write_u8(self.compression_level.to_u8())?; writer.write_u8(self.os.to_u8())?; if let Some(ref x) = self.extra_field { x.write_to(&mut writer)?; } if let Some(ref x) = self.filename { writer.write_all(x.as_bytes_with_nul())?; } if let Some(ref x) = self.comment { writer.write_all(x.as_bytes_with_nul())?; } if self.is_verified { writer.write_u16::(self.crc16())?; } Ok(()) } pub(crate) fn read_from(mut reader: R) -> io::Result where R: io::Read, { let mut this = HeaderBuilder::new().finish(); let mut id = [0; 2]; reader.read_exact(&mut id)?; if id != GZIP_ID { return Err(invalid_data_error!( "Unexpected GZIP ID: value={:?}, \ expected={:?}", id, GZIP_ID )); } let compression_method = reader.read_u8()?; if compression_method != COMPRESSION_METHOD_DEFLATE { return Err(invalid_data_error!( "Compression methods other than DEFLATE(8) are \ unsupported: method={}", compression_method )); } let flags = reader.read_u8()?; this.modification_time = reader.read_u32::()?; this.compression_level = CompressionLevel::from_u8(reader.read_u8()?); this.os = Os::from_u8(reader.read_u8()?); if flags & F_EXTRA != 0 { this.extra_field = Some(ExtraField::read_from(&mut reader)?); } if flags & F_NAME != 0 { this.filename = Some(read_cstring(&mut reader)?); } if flags & F_COMMENT != 0 { this.comment = Some(read_cstring(&mut reader)?); } // Checksum verification is skipped during fuzzing // so that random data from fuzzer can reach actually interesting code. // Compilation flag 'fuzzing' is automatically set by all 3 Rust fuzzers. if flags & F_HCRC != 0 && cfg!(not(fuzzing)) { let crc = reader.read_u16::()?; let expected = this.crc16(); if crc != expected { return Err(invalid_data_error!( "CRC16 of GZIP header mismatched: value={}, \ expected={}", crc, expected )); } this.is_verified = true; } Ok(this) } } fn read_cstring(mut reader: R) -> io::Result where R: io::Read, { let mut buf = Vec::new(); loop { let b = reader.read_u8()?; if b == 0 { return Ok(unsafe { CString::from_vec_unchecked(buf) }); } buf.push(b); } } /// Extra field of a GZIP header. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct ExtraField { /// Data of the extra field. pub subfields: Vec, } impl ExtraField { fn read_from(mut reader: R) -> io::Result where R: io::Read, { let mut subfields = Vec::new(); let data_size = reader.read_u16::()? as usize; let mut reader = reader.take(data_size as u64); while reader.limit() > 0 { subfields.push(ExtraSubField::read_from(&mut reader)?); } Ok(ExtraField { subfields }) } fn write_to(&self, mut writer: W) -> io::Result<()> where W: io::Write, { let len = self.subfields.iter().map(|f| f.write_len()).sum::(); if len > 0xFFFF { return Err(invalid_data_error!("extra field too long: {}", len)); } writer.write_u16::(len as u16)?; for f in &self.subfields { f.write_to(&mut writer)?; } Ok(()) } } /// A sub field in the extra field of a GZIP header. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct ExtraSubField { /// ID of the field. pub id: [u8; 2], /// Data of the field. pub data: Vec, } impl ExtraSubField { fn read_from(mut reader: R) -> io::Result where R: io::Read, { let mut field = ExtraSubField { id: [0; 2], data: Vec::new(), }; reader.read_exact(&mut field.id)?; let data_size = reader.read_u16::()? as usize; field.data.resize(data_size, 0); reader.read_exact(&mut field.data)?; Ok(field) } fn write_to(&self, mut writer: W) -> io::Result<()> where W: io::Write, { writer.write_all(&self.id)?; writer.write_u16::(self.data.len() as u16)?; writer.write_all(&self.data)?; Ok(()) } fn write_len(&self) -> usize { 4 + self.data.len() } } /// OS type. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Os { /// FAT filesystem (MS-DOS, OS/2, NT/Win32) Fat, /// Amiga Amiga, /// VMS (or OpenVMS) Vms, /// Unix Unix, /// VM/CMS VmCms, /// Atari TOS AtariTos, /// HPFS filesystem (OS/2, NT) Hpfs, /// Macintosh Macintosh, /// Z-System ZSystem, /// CP/M CpM, /// TOPS-20 Tops20, /// NTFS filesystem (NT) Ntfs, /// QDOS Qdos, /// Acorn RISCOS AcornRiscos, /// Unknown Unknown, /// Undefined value in RFC-1952 Undefined(u8), } impl Os { fn to_u8(&self) -> u8 { match *self { Os::Fat => OS_FAT, Os::Amiga => OS_AMIGA, Os::Vms => OS_VMS, Os::Unix => OS_UNIX, Os::VmCms => OS_VM_CMS, Os::AtariTos => OS_ATARI_TOS, Os::Hpfs => OS_HPFS, Os::Macintosh => OS_MACINTOSH, Os::ZSystem => OS_Z_SYSTEM, Os::CpM => OS_CPM, Os::Tops20 => OS_TOPS20, Os::Ntfs => OS_NTFS, Os::Qdos => OS_QDOS, Os::AcornRiscos => OS_ACORN_RISCOS, Os::Unknown => OS_UNKNOWN, Os::Undefined(os) => os, } } fn from_u8(x: u8) -> Self { match x { OS_FAT => Os::Fat, OS_AMIGA => Os::Amiga, OS_VMS => Os::Vms, OS_UNIX => Os::Unix, OS_VM_CMS => Os::VmCms, OS_ATARI_TOS => Os::AtariTos, OS_HPFS => Os::Hpfs, OS_MACINTOSH => Os::Macintosh, OS_Z_SYSTEM => Os::ZSystem, OS_CPM => Os::CpM, OS_TOPS20 => Os::Tops20, OS_NTFS => Os::Ntfs, OS_QDOS => Os::Qdos, OS_ACORN_RISCOS => Os::AcornRiscos, OS_UNKNOWN => Os::Unknown, os => Os::Undefined(os), } } } /// Options for a GZIP encoder. #[derive(Debug)] pub struct EncodeOptions where E: lz77::Lz77Encode, { header: Header, options: deflate::EncodeOptions, } impl Default for EncodeOptions { fn default() -> Self { EncodeOptions { header: HeaderBuilder::new().finish(), options: Default::default(), } } } impl EncodeOptions { /// Makes a default instance. /// /// # Examples /// ``` /// use libflate::gzip::{Encoder, EncodeOptions}; /// /// let options = EncodeOptions::new(); /// let encoder = Encoder::with_options(Vec::new(), options).unwrap(); /// ``` pub fn new() -> Self { Self::default() } } impl EncodeOptions where E: lz77::Lz77Encode, { /// Specifies the LZ77 encoder used to compress input data. /// /// # Example /// ``` /// use libflate::lz77::DefaultLz77Encoder; /// use libflate::gzip::{Encoder, EncodeOptions}; /// /// let options = EncodeOptions::with_lz77(DefaultLz77Encoder::new()); /// let encoder = Encoder::with_options(Vec::new(), options).unwrap(); /// ``` pub fn with_lz77(lz77: E) -> Self { let mut header = HeaderBuilder::new().finish(); header.compression_level = From::from(lz77.compression_level()); EncodeOptions { header, options: deflate::EncodeOptions::with_lz77(lz77), } } /// Disables LZ77 compression. /// /// # Example /// ``` /// use libflate::lz77::DefaultLz77Encoder; /// use libflate::gzip::{Encoder, EncodeOptions}; /// /// let options = EncodeOptions::new().no_compression(); /// let encoder = Encoder::with_options(Vec::new(), options).unwrap(); /// ``` pub fn no_compression(mut self) -> Self { self.options = self.options.no_compression(); self.header.compression_level = CompressionLevel::Unknown; self } /// Sets the GZIP header which will be written to the output stream. /// /// # Example /// ``` /// use libflate::gzip::{Encoder, EncodeOptions, HeaderBuilder}; /// /// let header = HeaderBuilder::new().text().modification_time(100).finish(); /// let options = EncodeOptions::new().header(header); /// let encoder = Encoder::with_options(Vec::new(), options).unwrap(); /// ``` pub fn header(mut self, header: Header) -> Self { self.header = header; self } /// Specifies the hint of the size of a DEFLATE block. /// /// The default value is `deflate::DEFAULT_BLOCK_SIZE`. /// /// # Example /// ``` /// use libflate::gzip::{Encoder, EncodeOptions}; /// /// let options = EncodeOptions::new().block_size(512 * 1024); /// let encoder = Encoder::with_options(Vec::new(), options).unwrap(); /// ``` pub fn block_size(mut self, size: usize) -> Self { self.options = self.options.block_size(size); self } /// Specifies to compress with fixed huffman codes. /// /// # Example /// ``` /// use libflate::gzip::{Encoder, EncodeOptions}; /// /// let options = EncodeOptions::new().fixed_huffman_codes(); /// let encoder = Encoder::with_options(Vec::new(), options).unwrap(); /// ``` pub fn fixed_huffman_codes(mut self) -> Self { self.options = self.options.fixed_huffman_codes(); self } } /// GZIP encoder. pub struct Encoder { header: Header, crc32: checksum::Crc32, input_size: u32, writer: deflate::Encoder, } impl Encoder where W: io::Write, { /// Makes a new encoder instance. /// /// Encoded GZIP stream is written to `inner`. /// /// # Examples /// ``` /// use std::io::Write; /// use libflate::gzip::Encoder; /// /// let mut encoder = Encoder::new(Vec::new()).unwrap(); /// encoder.write_all(b"Hello World!").unwrap(); /// encoder.finish().into_result().unwrap(); /// ``` pub fn new(inner: W) -> io::Result { Self::with_options(inner, EncodeOptions::new()) } } impl Encoder where W: io::Write, E: lz77::Lz77Encode, { /// Makes a new encoder instance with specified options. /// /// Encoded GZIP stream is written to `inner`. /// /// # Examples /// ``` /// use std::io::Write; /// use libflate::gzip::{Encoder, EncodeOptions, HeaderBuilder}; /// /// let header = HeaderBuilder::new().modification_time(123).finish(); /// let options = EncodeOptions::new().no_compression().header(header); /// let mut encoder = Encoder::with_options(Vec::new(), options).unwrap(); /// encoder.write_all(b"Hello World!").unwrap(); /// /// assert_eq!(encoder.finish().into_result().unwrap(), /// &[31, 139, 8, 0, 123, 0, 0, 0, 0, 3, 1, 12, 0, 243, 255, 72, 101, 108, 108, /// 111, 32, 87, 111, 114, 108, 100, 33, 163, 28, 41, 28, 12, 0, 0, 0][..]); /// ``` pub fn with_options(mut inner: W, options: EncodeOptions) -> io::Result { options.header.write_to(&mut inner)?; Ok(Encoder { header: options.header.clone(), crc32: checksum::Crc32::new(), input_size: 0, writer: deflate::Encoder::with_options(inner, options.options), }) } /// Returns the header of the GZIP stream. /// /// # Examples /// ``` /// use libflate::gzip::{Encoder, Os}; /// /// let encoder = Encoder::new(Vec::new()).unwrap(); /// assert_eq!(encoder.header().os(), Os::Unix); /// ``` pub fn header(&self) -> &Header { &self.header } /// Writes the GZIP trailer and returns the inner stream. /// /// # Examples /// ``` /// use std::io::Write; /// use libflate::gzip::Encoder; /// /// let mut encoder = Encoder::new(Vec::new()).unwrap(); /// encoder.write_all(b"Hello World!").unwrap(); /// /// assert!(encoder.finish().as_result().is_ok()) /// ``` /// /// # Note /// /// If you are not concerned the result of this encoding, /// it may be convenient to use `AutoFinishUnchecked` instead of the explicit invocation of this method. /// /// ``` /// use std::io; /// use libflate::finish::AutoFinishUnchecked; /// use libflate::gzip::Encoder; /// /// let plain = b"Hello World!"; /// let mut buf = Vec::new(); /// let mut encoder = AutoFinishUnchecked::new(Encoder::new(&mut buf).unwrap()); /// io::copy(&mut &plain[..], &mut encoder).unwrap(); /// ``` pub fn finish(self) -> Finish { let trailer = Trailer { crc32: self.crc32.value(), input_size: self.input_size, }; let mut inner = finish_try!(self.writer.finish()); match trailer.write_to(&mut inner).and_then(|_| inner.flush()) { Ok(_) => Finish::new(inner, None), Err(e) => Finish::new(inner, Some(e)), } } /// Returns the immutable reference to the inner stream. pub fn as_inner_ref(&self) -> &W { self.writer.as_inner_ref() } /// Returns the mutable reference to the inner stream. pub fn as_inner_mut(&mut self) -> &mut W { self.writer.as_inner_mut() } /// Unwraps the `Encoder`, returning the inner stream. pub fn into_inner(self) -> W { self.writer.into_inner() } } impl io::Write for Encoder where W: io::Write, E: lz77::Lz77Encode, { fn write(&mut self, buf: &[u8]) -> io::Result { let written_size = self.writer.write(buf)?; self.crc32.update(&buf[..written_size]); self.input_size = self.input_size.wrapping_add(written_size as u32); Ok(written_size) } fn flush(&mut self) -> io::Result<()> { self.writer.flush() } } impl Complete for Encoder where W: io::Write, E: lz77::Lz77Encode, { fn complete(self) -> io::Result<()> { self.finish().into_result().map(|_| ()) } } /// GZIP decoder. #[derive(Debug)] pub struct Decoder { header: Header, reader: deflate::Decoder, crc32: checksum::Crc32, eos: bool, } impl Decoder where R: io::Read, { /// Makes a new decoder instance. /// /// `inner` is to be decoded GZIP stream. /// /// # Examples /// ``` /// use std::io::Read; /// use libflate::gzip::Decoder; /// /// let encoded_data = [31, 139, 8, 0, 123, 0, 0, 0, 0, 3, 1, 12, 0, 243, 255, /// 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, /// 163, 28, 41, 28, 12, 0, 0, 0]; /// /// let mut decoder = Decoder::new(&encoded_data[..]).unwrap(); /// let mut buf = Vec::new(); /// decoder.read_to_end(&mut buf).unwrap(); /// /// assert_eq!(buf, b"Hello World!"); /// ``` pub fn new(mut inner: R) -> io::Result { let header = Header::read_from(&mut inner)?; Ok(Self::with_header(inner, header)) } /// Returns the header of the GZIP stream. /// /// # Examples /// ``` /// use libflate::gzip::{Decoder, Os}; /// /// let encoded_data = [31, 139, 8, 0, 123, 0, 0, 0, 0, 3, 1, 12, 0, 243, 255, /// 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, /// 163, 28, 41, 28, 12, 0, 0, 0]; /// /// let decoder = Decoder::new(&encoded_data[..]).unwrap(); /// assert_eq!(decoder.header().os(), Os::Unix); /// ``` pub fn header(&self) -> &Header { &self.header } /// Returns the immutable reference to the inner stream. pub fn as_inner_ref(&self) -> &R { self.reader.as_inner_ref() } /// Returns the mutable reference to the inner stream. pub fn as_inner_mut(&mut self) -> &mut R { self.reader.as_inner_mut() } /// Unwraps this `Decoder`, returning the underlying reader. /// /// # Examples /// ``` /// use std::io::Cursor; /// use libflate::gzip::Decoder; /// /// let encoded_data = [31, 139, 8, 0, 123, 0, 0, 0, 0, 3, 1, 12, 0, 243, 255, /// 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, /// 163, 28, 41, 28, 12, 0, 0, 0]; /// /// let decoder = Decoder::new(Cursor::new(&encoded_data[..])).unwrap(); /// assert_eq!(decoder.into_inner().into_inner(), &encoded_data[..]); /// ``` pub fn into_inner(self) -> R { self.reader.into_inner() } fn with_header(inner: R, header: Header) -> Self { Decoder { header, reader: deflate::Decoder::new(inner), crc32: checksum::Crc32::new(), eos: false, } } } impl io::Read for Decoder where R: io::Read, { fn read(&mut self, buf: &mut [u8]) -> io::Result { if self.eos { Ok(0) } else { let read_size = self.reader.read(buf)?; self.crc32.update(&buf[..read_size]); if read_size == 0 { self.eos = true; let trailer = Trailer::read_from(self.reader.as_inner_mut())?; // checksum verification is skipped during fuzzing // so that random data from fuzzer can reach actually interesting code // Compilation flag 'fuzzing' is automatically set by all 3 Rust fuzzers. if cfg!(not(fuzzing)) && trailer.crc32 != self.crc32.value() { Err(invalid_data_error!( "CRC32 mismatched: value={}, expected={}", self.crc32.value(), trailer.crc32 )) } else { Ok(0) } } else { Ok(read_size) } } } } /// A decoder that decodes all members in a GZIP stream. #[derive(Debug)] pub struct MultiDecoder { header: Header, decoder: Result, R>, } impl MultiDecoder where R: io::Read, { /// Makes a new decoder instance. /// /// `inner` is to be decoded GZIP stream. /// /// # Examples /// ``` /// use std::io::Read; /// use libflate::gzip::MultiDecoder; /// /// let mut encoded_data = Vec::new(); /// /// // Add a member (a GZIP binary that represents "Hello ") /// encoded_data.extend(&[31, 139, 8, 0, 51, 206, 75, 90, 0, 3, 5, 128, 49, 9, 0, 0, 0, 194, 170, 24, /// 199, 34, 126, 3, 251, 127, 163, 131, 71, 192, 252, 45, 234, 6, 0, 0, 0][..]); /// /// // Add another member (a GZIP binary that represents "World!") /// encoded_data.extend(&[31, 139, 8, 0, 227, 207, 75, 90, 0, 3, 5, 128, 49, 9, 0, 0, 0, 194, 178, 152, /// 202, 2, 158, 130, 96, 255, 99, 120, 111, 4, 222, 157, 40, 118, 6, 0, 0, 0][..]); /// /// let mut decoder = MultiDecoder::new(&encoded_data[..]).unwrap(); /// let mut buf = Vec::new(); /// decoder.read_to_end(&mut buf).unwrap(); /// /// assert_eq!(buf, b"Hello World!"); /// ``` pub fn new(inner: R) -> io::Result { let decoder = Decoder::new(inner)?; Ok(MultiDecoder { header: decoder.header().clone(), decoder: Ok(decoder), }) } /// Returns the header of the current member in the GZIP stream. /// /// # Examples /// ``` /// use libflate::gzip::{MultiDecoder, Os}; /// /// let encoded_data = [31, 139, 8, 0, 123, 0, 0, 0, 0, 3, 1, 12, 0, 243, 255, /// 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, /// 163, 28, 41, 28, 12, 0, 0, 0]; /// /// let decoder = MultiDecoder::new(&encoded_data[..]).unwrap(); /// assert_eq!(decoder.header().os(), Os::Unix); /// ``` pub fn header(&self) -> &Header { &self.header } /// Returns the immutable reference to the inner stream. pub fn as_inner_ref(&self) -> &R { match self.decoder { Err(ref reader) => reader, Ok(ref decoder) => decoder.as_inner_ref(), } } /// Returns the mutable reference to the inner stream. pub fn as_inner_mut(&mut self) -> &mut R { match self.decoder { Err(ref mut reader) => reader, Ok(ref mut decoder) => decoder.as_inner_mut(), } } /// Unwraps this `MultiDecoder`, returning the underlying reader. /// /// # Examples /// ``` /// use std::io::Cursor; /// use libflate::gzip::MultiDecoder; /// /// let encoded_data = [31, 139, 8, 0, 123, 0, 0, 0, 0, 3, 1, 12, 0, 243, 255, /// 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, /// 163, 28, 41, 28, 12, 0, 0, 0]; /// /// let decoder = MultiDecoder::new(Cursor::new(&encoded_data[..])).unwrap(); /// assert_eq!(decoder.into_inner().into_inner(), &encoded_data[..]); /// ``` pub fn into_inner(self) -> R { match self.decoder { Err(reader) => reader, Ok(decoder) => decoder.into_inner(), } } } impl io::Read for MultiDecoder where R: io::Read, { fn read(&mut self, buf: &mut [u8]) -> io::Result { let read_size = match self.decoder { Err(_) => return Ok(0), Ok(ref mut decoder) => decoder.read(buf)?, }; // take_mut closure must have the type it borrows as return type, // so we put the function return result to this variable instead. // If function logic is correct, these initial values will never be returned. let mut result: io::Result = Err(io::Error::new( io::ErrorKind::Other, "If you see this error, please report a bug in libflate", )); if read_size == 0 { take_mut::take(self, |mut owned_self| { let mut reader = owned_self .decoder .ok() .take() .expect("Never fails") .into_inner(); match Header::read_from(&mut reader) { Err(e) => { if e.kind() == io::ErrorKind::UnexpectedEof { result = Ok(0); } else { result = Err(e); } owned_self.decoder = Err(reader); owned_self } Ok(header) => { owned_self.header = header.clone(); owned_self.decoder = Ok(Decoder::with_header(reader, header)); result = owned_self.read(buf); owned_self } } }) } else { result = Ok(read_size); } result } } #[cfg(test)] mod test { use super::*; use finish::AutoFinish; use std::io::{self, Write}; fn decode(buf: &[u8]) -> io::Result> { let mut decoder = Decoder::new(buf).unwrap(); let mut buf = Vec::with_capacity(buf.len()); io::copy(&mut decoder, &mut buf)?; Ok(buf) } fn decode_multi(buf: &[u8]) -> io::Result> { let mut decoder = MultiDecoder::new(buf).unwrap(); let mut buf = Vec::with_capacity(buf.len()); io::copy(&mut decoder, &mut buf)?; Ok(buf) } fn encode(text: &[u8]) -> io::Result> { let mut encoder = Encoder::new(Vec::new()).unwrap(); io::copy(&mut &text[..], &mut encoder).unwrap(); encoder.finish().into_result() } #[test] fn encode_works() { let plain = b"Hello World! Hello GZIP!!"; let mut encoder = Encoder::new(Vec::new()).unwrap(); io::copy(&mut &plain[..], &mut encoder).unwrap(); let encoded = encoder.finish().into_result().unwrap(); assert_eq!(decode(&encoded).unwrap(), plain); } #[test] fn encoder_auto_finish_works() { let plain = b"Hello World! Hello GZIP!!"; let mut buf = Vec::new(); { let mut encoder = AutoFinish::new(Encoder::new(&mut buf).unwrap()); io::copy(&mut &plain[..], &mut encoder).unwrap(); } assert_eq!(decode(&buf).unwrap(), plain); } #[test] fn multi_decode_works() { use std::iter; let text = b"Hello World!"; let encoded: Vec = iter::repeat(encode(text).unwrap()) .take(2) .flat_map(|b| b) .collect(); assert_eq!(decode(&encoded).unwrap(), b"Hello World!"); assert_eq!(decode_multi(&encoded).unwrap(), b"Hello World!Hello World!"); } #[test] /// See: https://github.com/sile/libflate/issues/15 and https://github.com/RazrFalcon/usvg/issues/20 fn issue_15_1() { let data = b"\x1F\x8B\x08\xC1\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x7B\x80\x80\x80\x80\x7B\x7B\x7B\x7B\x7B\x7B\x97\x7B\x7B\x7B\x86\x27\xEB\x60\xA7\xA8\x46\x6E\x1F\x33\x51\x5C\x34\xE0\xD2\x2E\xE8\x0C\x19\x1D\x3D\x3C\xFD\x3B\x6A\xFA\x63\xDF\x28\x87\x86\xF2\xA6\xAC\x87\x86\xF2\xA6\xAC\xD5"; assert!(decode(&data[..]).is_err()); } #[test] /// See: https://github.com/sile/libflate/issues/15 and https://github.com/RazrFalcon/usvg/issues/21 fn issue_15_2() { let data = b"\x1F\x8B\x08\xC1\x7B\x7B\x7B\x7B\x7B\xFC\x5D\x2D\xDC\x08\xC1\x7B\x7B\x7B\x7B\x7B\xFC\x5D\x2D\xDC\x08\xC1\x7B\x7F\x7B\x7B\x7B\xFC\x5D\x2D\xDC\x69\x32\x48\x22\x5A\x81\x81\x42\x42\x81\x7E\x81\x81\x81\x81\xF2\x17"; assert!(decode(&data[..]).is_err()); } #[test] /// See: https://github.com/sile/libflate/issues/15 and https://github.com/RazrFalcon/usvg/issues/22 fn issue_15_3() { let data = b"\x1F\x8B\x08\xC1\x91\x28\x71\xDC\xF2\x2D\x34\x35\x31\x35\x34\x30\x70\x6E\x60\x35\x31\x32\x32\x33\x32\x33\x37\x32\x36\x38\xDD\x1C\xE5\x2A\xDD\xDD\xDD\x22\xDD\xDD\xDD\xDC\x88\x13\xC9\x40\x60\xA7"; assert!(decode(&data[..]).is_err()); } #[test] fn extra_field() { let f = ExtraField { subfields: vec![ExtraSubField { id: [0, 0x42], data: "abc".into(), }], }; let mut buf = Vec::new(); f.write_to(&mut buf).unwrap(); assert_eq!(ExtraField::read_from(&buf[..]).unwrap(), f); } #[test] fn encode_with_extra_field() { let mut buf = Vec::new(); let extra_field = ExtraField { subfields: vec![ExtraSubField { id: [0, 0x42], data: "abc".into(), }], }; { // encode let header = HeaderBuilder::new() .extra_field(extra_field.clone()) .finish(); let ops = EncodeOptions::new().header(header); let mut encoder = Encoder::with_options(&mut buf, ops).unwrap(); write!(encoder, "hello world").unwrap(); encoder.finish().as_result().unwrap(); } { // decode let mut decoder = Decoder::new(&buf[..]).unwrap(); io::copy(&mut decoder, &mut io::sink()).unwrap(); assert_eq!(decoder.header().extra_field(), Some(&extra_field)); } } } libflate-0.1.25/src/huffman.rs010066600017500001750000000242341350614220100143660ustar0000000000000000use std::cmp; /// Length-limited Huffman Codes /// use std::io; use bit; const MAX_BITWIDTH: u8 = 15; #[derive(Debug, Clone, PartialEq, Eq)] pub struct Code { pub width: u8, pub bits: u16, } impl Code { pub fn new(width: u8, bits: u16) -> Self { debug_assert!(width <= MAX_BITWIDTH); Code { width, bits } } fn inverse_endian(&self) -> Self { let mut f = self.bits; let mut t = 0; for _ in 0..self.width { t <<= 1; t |= f & 1; f >>= 1; } Code::new(self.width, t) } } pub trait Builder: Sized { type Instance; fn set_mapping(&mut self, symbol: u16, code: Code) -> io::Result<()>; fn finish(self) -> Self::Instance; fn restore_canonical_huffman_codes(mut self, bitwidthes: &[u8]) -> io::Result { debug_assert!(!bitwidthes.is_empty()); let mut symbols = bitwidthes .iter() .enumerate() .filter(|&(_, &code_bitwidth)| code_bitwidth > 0) .map(|(symbol, &code_bitwidth)| (symbol as u16, code_bitwidth)) .collect::>(); symbols.sort_by_key(|x| x.1); let mut code = 0; let mut prev_width = 0; for (symbol, bitwidth) in symbols { code <<= bitwidth - prev_width; self.set_mapping(symbol, Code::new(bitwidth, code))?; code += 1; prev_width = bitwidth; } Ok(self.finish()) } } pub struct DecoderBuilder { table: Vec, eob_symbol: Option, eob_bitwidth: u8, max_bitwidth: u8, } impl DecoderBuilder { pub fn new(max_bitwidth: u8, eob_symbol: Option) -> Self { debug_assert!(max_bitwidth <= MAX_BITWIDTH); DecoderBuilder { table: vec![u16::from(MAX_BITWIDTH) + 1; 1 << max_bitwidth], eob_symbol, eob_bitwidth: max_bitwidth, max_bitwidth, } } pub fn from_bitwidthes(bitwidthes: &[u8], eob_symbol: Option) -> io::Result { let builder = Self::new(bitwidthes.iter().cloned().max().unwrap_or(0), eob_symbol); builder.restore_canonical_huffman_codes(bitwidthes) } } impl Builder for DecoderBuilder { type Instance = Decoder; fn set_mapping(&mut self, symbol: u16, code: Code) -> io::Result<()> { debug_assert!(code.width <= self.max_bitwidth); if Some(symbol) == self.eob_symbol { self.eob_bitwidth = code.width; } // `bitwidth` encoded `to` value let value = (symbol << 5) | u16::from(code.width); // Sets the mapping to all possible indices let code_be = code.inverse_endian(); for padding in 0..(1 << (self.max_bitwidth - code.width)) { let i = ((padding << code.width) | code_be.bits) as usize; if self.table[i] != u16::from(MAX_BITWIDTH) + 1 { let message = format!( "Bit region conflict: i={}, old_value={}, new_value={}, symbol={}, code={:?}", i, self.table[i], value, symbol, code ); return Err(io::Error::new(io::ErrorKind::InvalidData, message)); } self.table[i] = value; } Ok(()) } fn finish(self) -> Self::Instance { Decoder { table: self.table, eob_bitwidth: self.eob_bitwidth, max_bitwidth: self.max_bitwidth, } } } #[derive(Debug)] pub struct Decoder { table: Vec, eob_bitwidth: u8, max_bitwidth: u8, } impl Decoder { #[inline(always)] pub fn decode(&self, reader: &mut bit::BitReader) -> io::Result where R: io::Read, { let v = self.decode_unchecked(reader); reader.check_last_error()?; Ok(v) } #[inline(always)] pub fn decode_unchecked(&self, reader: &mut bit::BitReader) -> u16 where R: io::Read, { let code = reader.peek_bits_unchecked(self.eob_bitwidth); let mut value = self.table[code as usize]; let mut bitwidth = (value & 0b1_1111) as u8; if bitwidth > self.eob_bitwidth { let code = reader.peek_bits_unchecked(self.max_bitwidth); value = self.table[code as usize]; bitwidth = (value & 0b1_1111) as u8; if bitwidth > self.max_bitwidth { reader.set_last_error(invalid_data_error!("Invalid huffman coded stream")); } } reader.skip_bits(bitwidth as u8); value >> 5 } } #[derive(Debug)] pub struct EncoderBuilder { table: Vec, } impl EncoderBuilder { pub fn new(symbol_count: usize) -> Self { EncoderBuilder { table: vec![Code::new(0, 0); symbol_count], } } pub fn from_bitwidthes(bitwidthes: &[u8]) -> io::Result { let symbol_count = bitwidthes .iter() .enumerate() .filter(|e| *e.1 > 0) .last() .map_or(0, |e| e.0) + 1; let builder = Self::new(symbol_count); builder.restore_canonical_huffman_codes(bitwidthes) } pub fn from_frequencies(symbol_frequencies: &[usize], max_bitwidth: u8) -> io::Result { let max_bitwidth = cmp::min( max_bitwidth, ordinary_huffman_codes::calc_optimal_max_bitwidth(symbol_frequencies), ); let code_bitwidthes = length_limited_huffman_codes::calc(max_bitwidth, symbol_frequencies); Self::from_bitwidthes(&code_bitwidthes) } } impl Builder for EncoderBuilder { type Instance = Encoder; fn set_mapping(&mut self, symbol: u16, code: Code) -> io::Result<()> { debug_assert_eq!(self.table[symbol as usize], Code::new(0, 0)); self.table[symbol as usize] = code.inverse_endian(); Ok(()) } fn finish(self) -> Self::Instance { Encoder { table: self.table } } } #[derive(Debug, Clone)] pub struct Encoder { table: Vec, } impl Encoder { #[inline(always)] pub fn encode(&self, writer: &mut bit::BitWriter, symbol: u16) -> io::Result<()> where W: io::Write, { let code = self.lookup(symbol); debug_assert_ne!(code, Code::new(0, 0)); writer.write_bits(code.width, code.bits) } #[inline(always)] pub fn lookup(&self, symbol: u16) -> Code { debug_assert!( symbol < self.table.len() as u16, "symbol:{}, table:{}", symbol, self.table.len() ); self.table[symbol as usize].clone() } pub fn used_max_symbol(&self) -> Option { self.table .iter() .rev() .position(|x| x.width > 0) .map(|trailing_zeros| (self.table.len() - 1 - trailing_zeros) as u16) } } #[allow(dead_code)] mod ordinary_huffman_codes { use std::cmp; use std::collections::BinaryHeap; pub fn calc_optimal_max_bitwidth(frequencies: &[usize]) -> u8 { let mut heap = BinaryHeap::new(); for &freq in frequencies.iter().filter(|&&f| f > 0) { let weight = -(freq as isize); heap.push((weight, 0 as u8)); } while heap.len() > 1 { let (weight1, width1) = heap.pop().unwrap(); let (weight2, width2) = heap.pop().unwrap(); heap.push((weight1 + weight2, 1 + cmp::max(width1, width2))); } let max_bitwidth = heap.pop().map_or(0, |x| x.1); cmp::max(1, max_bitwidth) } } mod length_limited_huffman_codes { use std::mem; #[derive(Debug, Clone)] struct Node { symbols: Vec, weight: usize, } impl Node { pub fn empty() -> Self { Node { symbols: vec![], weight: 0, } } pub fn single(symbol: u16, weight: usize) -> Self { Node { symbols: vec![symbol], weight, } } pub fn merge(&mut self, other: Self) { self.weight += other.weight; self.symbols.extend(other.symbols); } } /// Reference: [A Fast Algorithm for Optimal Length-Limited Huffman Codes][LenLimHuff.pdf] /// /// [LenLimHuff.pdf]: https://www.ics.uci.edu/~dan/pubs/LenLimHuff.pdf pub fn calc(max_bitwidth: u8, frequencies: &[usize]) -> Vec { // NOTE: unoptimized implementation let mut source = frequencies .iter() .enumerate() .filter(|&(_, &f)| f > 0) .map(|(symbol, &weight)| Node::single(symbol as u16, weight)) .collect::>(); source.sort_by_key(|o| o.weight); let weighted = (0..max_bitwidth - 1).fold(source.clone(), |w, _| merge(package(w), source.clone())); let mut code_bitwidthes = vec![0; frequencies.len()]; for symbol in package(weighted) .into_iter() .flat_map(|n| n.symbols.into_iter()) { code_bitwidthes[symbol as usize] += 1; } code_bitwidthes } fn merge(x: Vec, y: Vec) -> Vec { let mut z = Vec::with_capacity(x.len() + y.len()); let mut x = x.into_iter().peekable(); let mut y = y.into_iter().peekable(); loop { let x_weight = x.peek().map(|s| s.weight); let y_weight = y.peek().map(|s| s.weight); if x_weight.is_none() { z.extend(y); break; } else if y_weight.is_none() { z.extend(x); break; } else if x_weight < y_weight { z.push(x.next().unwrap()); } else { z.push(y.next().unwrap()); } } z } fn package(mut nodes: Vec) -> Vec { if nodes.len() >= 2 { let new_len = nodes.len() / 2; for i in 0..new_len { nodes[i] = mem::replace(&mut nodes[i * 2], Node::empty()); let other = mem::replace(&mut nodes[i * 2 + 1], Node::empty()); nodes[i].merge(other); } nodes.truncate(new_len); } nodes } } #[cfg(test)] mod test { #[test] fn it_works() {} } libflate-0.1.25/src/lib.rs010066600017500001750000000014671350664244000135250ustar0000000000000000//! A Rust implementation of DEFLATE algorithm and related formats (ZLIB, GZIP). #![warn(missing_docs)] extern crate adler32; extern crate byteorder; extern crate crc32fast; extern crate rle_decode_fast; extern crate take_mut; pub use finish::Finish; macro_rules! invalid_data_error { ($fmt:expr) => { invalid_data_error!("{}", $fmt) }; ($fmt:expr, $($arg:tt)*) => { ::std::io::Error::new(::std::io::ErrorKind::InvalidData, format!($fmt, $($arg)*)) } } macro_rules! finish_try { ($e:expr) => { match $e.unwrap() { (inner, None) => inner, (inner, error) => return ::finish::Finish::new(inner, error), } }; } pub mod deflate; pub mod finish; pub mod gzip; pub mod lz77; pub mod non_blocking; pub mod zlib; mod bit; mod checksum; mod huffman; mod util; libflate-0.1.25/src/lz77/default.rs010066600017500001750000000131471350614220100151720ustar0000000000000000use std::cmp; use std::collections::HashMap; use super::Code; use super::Lz77Encode; use super::Sink; /// A `Lz77Encode` implementation used by default. #[derive(Debug)] pub struct DefaultLz77Encoder { window_size: u16, buf: Vec, } impl DefaultLz77Encoder { /// Makes a new encoder instance. /// /// # Examples /// ``` /// use libflate::deflate; /// use libflate::lz77::{self, Lz77Encode, DefaultLz77Encoder}; /// /// let lz77 = DefaultLz77Encoder::new(); /// assert_eq!(lz77.window_size(), lz77::MAX_WINDOW_SIZE); /// /// let options = deflate::EncodeOptions::with_lz77(lz77); /// let _deflate = deflate::Encoder::with_options(Vec::new(), options); /// ``` pub fn new() -> Self { Self::with_window_size(super::MAX_WINDOW_SIZE) } /// Makes a new encoder instance with specified window size. /// /// Larger window size is prefered to raise compression ratio, /// but it may require more working memory to encode and decode data. /// /// # Examples /// ``` /// use libflate::deflate; /// use libflate::lz77::{self, Lz77Encode, DefaultLz77Encoder}; /// /// let lz77 = DefaultLz77Encoder::with_window_size(1024); /// assert_eq!(lz77.window_size(), 1024); /// /// let options = deflate::EncodeOptions::with_lz77(lz77); /// let _deflate = deflate::Encoder::with_options(Vec::new(), options); /// ``` pub fn with_window_size(size: u16) -> Self { DefaultLz77Encoder { window_size: cmp::min(size, super::MAX_WINDOW_SIZE), buf: Vec::new(), } } } impl Default for DefaultLz77Encoder { fn default() -> Self { Self::new() } } impl Lz77Encode for DefaultLz77Encoder { fn encode(&mut self, buf: &[u8], sink: S) where S: Sink, { self.buf.extend_from_slice(buf); if self.buf.len() >= self.window_size as usize * 8 { self.flush(sink); } } fn flush(&mut self, mut sink: S) where S: Sink, { let mut prefix_table = PrefixTable::new(self.buf.len()); let mut i = 0; let end = cmp::max(3, self.buf.len()) - 3; while i < end { let key = prefix(&self.buf[i..]); let matched = prefix_table.insert(key, i as u32); if let Some(j) = matched.map(|j| j as usize) { let distance = i - j; if distance <= self.window_size as usize { let length = 3 + longest_common_prefix(&self.buf, i + 3, j + 3); sink.consume(Code::Pointer { length, backward_distance: distance as u16, }); for k in (i..).take(length as usize).skip(1) { if k >= end { break; } prefix_table.insert(prefix(&self.buf[k..]), k as u32); } i += length as usize; continue; } } sink.consume(Code::Literal(self.buf[i])); i += 1; } for b in &self.buf[i..] { sink.consume(Code::Literal(*b)); } self.buf.clear(); } fn window_size(&self) -> u16 { self.window_size } } #[inline] fn prefix(input_buf: &[u8]) -> [u8; 3] { let buf: &[u8] = &input_buf[..3]; // perform bounds check once [buf[0], buf[1], buf[2]] } #[inline] fn longest_common_prefix(buf: &[u8], i: usize, j: usize) -> u16 { buf[i..] .iter() .take(super::MAX_LENGTH as usize - 3) .zip(&buf[j..]) .take_while(|&(x, y)| x == y) .count() as u16 } #[derive(Debug)] enum PrefixTable { Small(HashMap<[u8; 3], u32>), Large(LargePrefixTable), } impl PrefixTable { fn new(bytes: usize) -> Self { if bytes < super::MAX_WINDOW_SIZE as usize { PrefixTable::Small(HashMap::new()) } else { PrefixTable::Large(LargePrefixTable::new()) } } #[inline] fn insert(&mut self, prefix: [u8; 3], position: u32) -> Option { match *self { PrefixTable::Small(ref mut x) => x.insert(prefix, position), PrefixTable::Large(ref mut x) => x.insert(prefix, position), } } } #[derive(Debug)] struct LargePrefixTable { table: Vec>, } impl LargePrefixTable { fn new() -> Self { LargePrefixTable { table: (0..=0xFFFF).map(|_| Vec::new()).collect(), } } #[inline] fn insert(&mut self, prefix: [u8; 3], position: u32) -> Option { let p0 = prefix[0] as usize; let p1 = prefix[1] as usize; let p2 = prefix[2]; let i = (p0 << 8) + p1; let positions = &mut self.table[i]; for &mut (key, ref mut value) in positions.iter_mut() { if key == p2 { let old = *value; *value = position; return Some(old); } } positions.push((p2, position)); None } } #[cfg(test)] mod tests { use super::*; use deflate::symbol::Symbol; #[test] // See: https://github.com/sile/libflate/issues/21 fn issue21() { let mut enc = DefaultLz77Encoder::new(); let mut sink = Vec::new(); enc.encode(b"aaaaa", &mut sink); enc.flush(&mut sink); assert_eq!( sink, vec![ Symbol::Literal(97), Symbol::Share { length: 4, distance: 1 } ] ); } } libflate-0.1.25/src/lz77/mod.rs010066600017500001750000000066611340050224200143260ustar0000000000000000//! The interface and implementations of LZ77 compression algorithm. //! //! LZ77 is a compression algorithm used in [DEFLATE](https://tools.ietf.org/html/rfc1951). pub use self::default::DefaultLz77Encoder; mod default; /// Maximum length of sharable bytes in a pointer. pub const MAX_LENGTH: u16 = 258; /// Maximum backward distance of a pointer. pub const MAX_DISTANCE: u16 = 32_768; /// Maximum size of a sliding window. pub const MAX_WINDOW_SIZE: u16 = MAX_DISTANCE; /// A LZ77 encoded data. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Code { /// Literal byte. Literal(u8), /// Backward pointer to shared data. Pointer { /// Length of the shared data. /// The values must be limited to `MAX_LENGTH`. length: u16, /// Distance between current position and start position of the shared data. /// The values must be limited to `MAX_DISTANCE`. backward_distance: u16, }, } /// Compression level. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum CompressionLevel { /// No compression. None, /// Best speed. Fast, /// Balanced between speed and size. Balance, /// Best compression. Best, } /// The `Sink` trait represents a consumer of LZ77 encoded data. pub trait Sink { /// Consumes a LZ77 encoded `Code`. fn consume(&mut self, code: Code); } impl<'a, T> Sink for &'a mut T where T: Sink, { fn consume(&mut self, code: Code) { (*self).consume(code); } } /// The `LZ77Encode` trait defines the interface of LZ77 encoding algorithm. pub trait Lz77Encode { /// Encodes a buffer and writes result LZ77 codes to `sink`. fn encode(&mut self, buf: &[u8], sink: S) where S: Sink; /// Flushes the encoder, ensuring that all intermediately buffered codes are consumed by `sink`. fn flush(&mut self, sink: S) where S: Sink; /// Returns the compression level of the encoder. /// /// If the implementation is omitted, `CompressionLevel::Balance` will be returned. fn compression_level(&self) -> CompressionLevel { CompressionLevel::Balance } /// Returns the window size of the encoder. /// /// If the implementation is omitted, `MAX_WINDOW_SIZE` will be returned. fn window_size(&self) -> u16 { MAX_WINDOW_SIZE } } /// A no compression implementation of `LZ77Encode` trait. #[derive(Debug, Default)] pub struct NoCompressionLz77Encoder; impl NoCompressionLz77Encoder { /// Makes a new encoder instance. /// /// # Examples /// ``` /// use libflate::deflate; /// use libflate::lz77::{Lz77Encode, NoCompressionLz77Encoder, CompressionLevel}; /// /// let lz77 = NoCompressionLz77Encoder::new(); /// assert_eq!(lz77.compression_level(), CompressionLevel::None); /// /// let options = deflate::EncodeOptions::with_lz77(lz77); /// let _deflate = deflate::Encoder::with_options(Vec::new(), options); /// ``` pub fn new() -> Self { NoCompressionLz77Encoder } } impl Lz77Encode for NoCompressionLz77Encoder { fn encode(&mut self, buf: &[u8], mut sink: S) where S: Sink, { for c in buf.iter().cloned().map(Code::Literal) { sink.consume(c); } } #[allow(unused_variables)] fn flush(&mut self, sink: S) where S: Sink, { } fn compression_level(&self) -> CompressionLevel { CompressionLevel::None } } libflate-0.1.25/src/non_blocking/deflate/decode.rs010066600017500001750000000265751350664244700202660ustar0000000000000000use byteorder::LittleEndian; use byteorder::ReadBytesExt; use rle_decode_fast::rle_decode; use std::cmp; use std::io; use std::io::Read; use deflate::symbol::{self, HuffmanCodec}; use lz77; use non_blocking::transaction::TransactionalBitReader; /// DEFLATE decoder which supports non-blocking I/O. #[derive(Debug)] pub struct Decoder { state: DecoderState, eos: bool, bit_reader: TransactionalBitReader, block_decoder: BlockDecoder, } impl Decoder { /// Makes a new decoder instance. /// /// `inner` is to be decoded DEFLATE stream. /// /// # Examples /// ``` /// use std::io::{Cursor, Read}; /// use libflate::non_blocking::deflate::Decoder; /// /// let encoded_data = [243, 72, 205, 201, 201, 87, 8, 207, 47, 202, 73, 81, 4, 0]; /// let mut decoder = Decoder::new(&encoded_data[..]); /// let mut buf = Vec::new(); /// decoder.read_to_end(&mut buf).unwrap(); /// /// assert_eq!(buf, b"Hello World!"); /// ``` pub fn new(inner: R) -> Self { Decoder { state: DecoderState::ReadBlockHeader, eos: false, bit_reader: TransactionalBitReader::new(inner), block_decoder: BlockDecoder::new(), } } /// Returns the immutable reference to the inner stream. pub fn as_inner_ref(&self) -> &R { self.bit_reader.as_inner_ref() } /// Returns the mutable reference to the inner stream. pub fn as_inner_mut(&mut self) -> &mut R { self.bit_reader.as_inner_mut() } /// Unwraps this `Decoder`, returning the underlying reader. /// /// # Examples /// ``` /// use std::io::Cursor; /// use libflate::non_blocking::deflate::Decoder; /// /// let encoded_data = [243, 72, 205, 201, 201, 87, 8, 207, 47, 202, 73, 81, 4, 0]; /// let decoder = Decoder::new(Cursor::new(&encoded_data)); /// assert_eq!(decoder.into_inner().into_inner(), &encoded_data); /// ``` pub fn into_inner(self) -> R { self.bit_reader.into_inner() } pub(crate) fn bit_reader_mut(&mut self) -> &mut TransactionalBitReader { &mut self.bit_reader } } impl Read for Decoder { fn read(&mut self, buf: &mut [u8]) -> io::Result { let mut read_size; loop { let next = match self.state { DecoderState::ReadBlockHeader => { let (bfinal, btype) = self.bit_reader.transaction(|r| { let bfinal = r.read_bit()?; let btype = r.read_bits(2)?; Ok((bfinal, btype)) })?; self.eos = bfinal; self.block_decoder.enter_new_block(); match btype { 0b00 => DecoderState::ReadNonCompressedBlockLen, 0b01 => DecoderState::LoadFixedHuffmanCode, 0b10 => DecoderState::LoadDynamicHuffmanCode, 0b11 => { return Err(invalid_data_error!( "btype 0x11 of DEFLATE is reserved(error) value" )); } _ => unreachable!(), } } DecoderState::ReadNonCompressedBlockLen => { let len = self.bit_reader.transaction(|r| { r.reset(); let len = r.as_inner_mut().read_u16::()?; let nlen = r.as_inner_mut().read_u16::()?; if !len != nlen { Err(invalid_data_error!( "LEN={} is not the one's complement of NLEN={}", len, nlen )) } else { Ok(len) } })?; self.block_decoder.buffer.reserve(len as usize); DecoderState::ReadNonCompressedBlock { len } } DecoderState::ReadNonCompressedBlock { len: 0 } => { if self.eos { read_size = 0; break; } else { DecoderState::ReadBlockHeader } } DecoderState::ReadNonCompressedBlock { ref mut len } => { let buf_len = buf.len(); let buf = &mut buf[..cmp::min(buf_len, *len as usize)]; read_size = self.bit_reader.as_inner_mut().read(buf)?; self.block_decoder.extend(&buf[..read_size]); *len -= read_size as u16; break; } DecoderState::LoadFixedHuffmanCode => { let symbol_decoder = self .bit_reader .transaction(|r| symbol::FixedHuffmanCodec.load(r))?; DecoderState::DecodeBlock(symbol_decoder) } DecoderState::LoadDynamicHuffmanCode => { let symbol_decoder = self .bit_reader .transaction(|r| symbol::DynamicHuffmanCodec.load(r))?; DecoderState::DecodeBlock(symbol_decoder) } DecoderState::DecodeBlock(ref mut symbol_decoder) => { self.block_decoder .decode(&mut self.bit_reader, symbol_decoder)?; read_size = self.block_decoder.read(buf)?; if read_size == 0 && !buf.is_empty() && !self.eos { DecoderState::ReadBlockHeader } else { break; } } }; self.state = next; } Ok(read_size) } } #[derive(Debug)] enum DecoderState { ReadBlockHeader, ReadNonCompressedBlockLen, ReadNonCompressedBlock { len: u16 }, LoadFixedHuffmanCode, LoadDynamicHuffmanCode, DecodeBlock(symbol::Decoder), } #[derive(Debug)] struct BlockDecoder { buffer: Vec, offset: usize, eob: bool, } impl BlockDecoder { pub fn new() -> Self { BlockDecoder { buffer: Vec::new(), offset: 0, eob: false, } } pub fn enter_new_block(&mut self) { self.eob = false; self.truncate_old_buffer(); } pub fn decode( &mut self, bit_reader: &mut TransactionalBitReader, symbol_decoder: &mut symbol::Decoder, ) -> io::Result<()> { if self.eob { return Ok(()); } while let Some(s) = self.decode_symbol(bit_reader, symbol_decoder)? { match s { symbol::Symbol::Literal(b) => { self.buffer.push(b); } symbol::Symbol::Share { length, distance } => { if self.buffer.len() < distance as usize { return Err(invalid_data_error!( "Too long backword reference: buffer.len={}, distance={}", self.buffer.len(), distance )); } rle_decode(&mut self.buffer, usize::from(distance), usize::from(length)); } symbol::Symbol::EndOfBlock => { self.eob = true; break; } } } Ok(()) } fn truncate_old_buffer(&mut self) { if self.buffer.len() > lz77::MAX_DISTANCE as usize * 4 { let old_len = self.buffer.len(); let new_len = lz77::MAX_DISTANCE as usize; { // isolation to please borrow checker let (dst, src) = self.buffer.split_at_mut(old_len - new_len); dst[..new_len].copy_from_slice(src); } self.buffer.truncate(new_len); self.offset = new_len; } } fn extend(&mut self, buf: &[u8]) { self.buffer.extend_from_slice(buf); self.offset += buf.len(); } fn decode_symbol( &mut self, bit_reader: &mut TransactionalBitReader, symbol_decoder: &mut symbol::Decoder, ) -> io::Result> { let result = bit_reader.transaction(|bit_reader| { let s = symbol_decoder.decode_unchecked(bit_reader); bit_reader.check_last_error().map(|()| s) }); match result { Err(ref e) if e.kind() == io::ErrorKind::WouldBlock => Ok(None), Err(e) => Err(e), Ok(s) => Ok(Some(s)), } } } impl Read for BlockDecoder { fn read(&mut self, buf: &mut [u8]) -> io::Result { if self.offset < self.buffer.len() { let copy_size = cmp::min(buf.len(), self.buffer.len() - self.offset); buf[..copy_size].copy_from_slice(&self.buffer[self.offset..][..copy_size]); self.offset += copy_size; Ok(copy_size) } else if self.eob { Ok(0) } else { Err(io::Error::new(io::ErrorKind::WouldBlock, "Would block")) } } } #[cfg(test)] mod test { use super::*; use deflate::{EncodeOptions, Encoder}; use std::io::{self, Read}; use util::{nb_read_to_end, WouldBlockReader}; #[test] fn it_works() { let mut encoder = Encoder::new(Vec::new()); io::copy(&mut &b"Hello World!"[..], &mut encoder).unwrap(); let encoded_data = encoder.finish().into_result().unwrap(); let mut decoder = Decoder::new(&encoded_data[..]); let mut decoded_data = Vec::new(); decoder.read_to_end(&mut decoded_data).unwrap(); assert_eq!(decoded_data, b"Hello World!"); } #[test] fn non_blocking_io_works() { let mut encoder = Encoder::new(Vec::new()); io::copy(&mut &b"Hello World!"[..], &mut encoder).unwrap(); let encoded_data = encoder.finish().into_result().unwrap(); let decoder = Decoder::new(WouldBlockReader::new(&encoded_data[..])); let decoded_data = nb_read_to_end(decoder).unwrap(); assert_eq!(decoded_data, b"Hello World!"); } #[test] fn non_blocking_io_for_large_text_works() { let text: String = (0..10000) .into_iter() .map(|i| format!("test {}", i)) .collect(); let mut encoder = ::deflate::Encoder::new(Vec::new()); io::copy(&mut text.as_bytes(), &mut encoder).unwrap(); let encoded_data = encoder.finish().into_result().unwrap(); let decoder = Decoder::new(WouldBlockReader::new(&encoded_data[..])); let decoded_data = nb_read_to_end(decoder).unwrap(); assert_eq!(decoded_data, text.as_bytes()); } #[test] fn non_compressed_non_blocking_io_works() { let mut encoder = Encoder::with_options(Vec::new(), EncodeOptions::new().no_compression()); io::copy(&mut &b"Hello World!"[..], &mut encoder).unwrap(); let encoded_data = encoder.finish().into_result().unwrap(); let decoder = Decoder::new(WouldBlockReader::new(&encoded_data[..])); let decoded_data = nb_read_to_end(decoder).unwrap(); assert_eq!(decoded_data, b"Hello World!"); } } libflate-0.1.25/src/non_blocking/deflate/mod.rs010066600017500001750000000013521340050224200175610ustar0000000000000000//! The decoder of the DEFLATE format and algorithm. //! //! The DEFLATE is defined in [RFC-1951](https://tools.ietf.org/html/rfc1951). //! //! # Examples //! ``` //! use std::io::{self, Read}; //! use libflate::deflate::Encoder; //! use libflate::non_blocking::deflate::Decoder; //! //! // Encoding //! let mut encoder = Encoder::new(Vec::new()); //! io::copy(&mut &b"Hello World!"[..], &mut encoder).unwrap(); //! let encoded_data = encoder.finish().into_result().unwrap(); //! //! // Decoding //! let mut decoder = Decoder::new(&encoded_data[..]); //! let mut decoded_data = Vec::new(); //! decoder.read_to_end(&mut decoded_data).unwrap(); //! //! assert_eq!(decoded_data, b"Hello World!"); //! ``` pub use self::decode::Decoder; mod decode; libflate-0.1.25/src/non_blocking/gzip.rs010066600017500001750000000136261340050224200163560ustar0000000000000000//! The encoder and decoder of the GZIP format. //! //! The GZIP format is defined in [RFC-1952](https://tools.ietf.org/html/rfc1952). //! //! # Examples //! ``` //! use std::io::{self, Read}; //! use libflate::gzip::Encoder; //! use libflate::non_blocking::gzip::Decoder; //! //! // Encoding //! let mut encoder = Encoder::new(Vec::new()).unwrap(); //! io::copy(&mut &b"Hello World!"[..], &mut encoder).unwrap(); //! let encoded_data = encoder.finish().into_result().unwrap(); //! //! // Decoding //! let mut decoder = Decoder::new(&encoded_data[..]); //! let mut decoded_data = Vec::new(); //! decoder.read_to_end(&mut decoded_data).unwrap(); //! //! assert_eq!(decoded_data, b"Hello World!"); //! ``` use std::io::{self, Read}; use checksum; use gzip::{Header, Trailer}; use non_blocking::deflate; /// GZIP decoder which supports non-blocking I/O. #[derive(Debug)] pub struct Decoder { header: Option
, reader: deflate::Decoder, crc32: checksum::Crc32, eos: bool, } impl Decoder { /// Makes a new decoder instance. /// /// `inner` is to be decoded GZIP stream. /// /// # Examples /// ``` /// use std::io::Read; /// use libflate::non_blocking::gzip::Decoder; /// /// let encoded_data = [31, 139, 8, 0, 123, 0, 0, 0, 0, 3, 1, 12, 0, 243, 255, /// 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, /// 163, 28, 41, 28, 12, 0, 0, 0]; /// /// let mut decoder = Decoder::new(&encoded_data[..]); /// let mut buf = Vec::new(); /// decoder.read_to_end(&mut buf).unwrap(); /// /// assert_eq!(buf, b"Hello World!"); /// ``` pub fn new(inner: R) -> Self { Decoder { header: None, reader: deflate::Decoder::new(inner), crc32: checksum::Crc32::new(), eos: false, } } /// Returns the header of the GZIP stream. /// /// # Examples /// ``` /// use libflate::gzip::Os; /// use libflate::non_blocking::gzip::Decoder; /// /// let encoded_data = [31, 139, 8, 0, 123, 0, 0, 0, 0, 3, 1, 12, 0, 243, 255, /// 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, /// 163, 28, 41, 28, 12, 0, 0, 0]; /// /// let mut decoder = Decoder::new(&encoded_data[..]); /// assert_eq!(decoder.header().unwrap().os(), Os::Unix); /// ``` pub fn header(&mut self) -> io::Result<&Header> { if let Some(ref header) = self.header { Ok(header) } else { let header = self .reader .bit_reader_mut() .transaction(|r| Header::read_from(r.as_inner_mut()))?; self.header = Some(header); self.header() } } /// Returns the immutable reference to the inner stream. pub fn as_inner_ref(&self) -> &R { self.reader.as_inner_ref() } /// Returns the mutable reference to the inner stream. pub fn as_inner_mut(&mut self) -> &mut R { self.reader.as_inner_mut() } /// Unwraps this `Decoder`, returning the underlying reader. /// /// # Examples /// ``` /// use std::io::Cursor; /// use libflate::non_blocking::gzip::Decoder; /// /// let encoded_data = [31, 139, 8, 0, 123, 0, 0, 0, 0, 3, 1, 12, 0, 243, 255, /// 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, /// 163, 28, 41, 28, 12, 0, 0, 0]; /// /// let decoder = Decoder::new(Cursor::new(&encoded_data[..])); /// assert_eq!(decoder.into_inner().into_inner(), &encoded_data[..]); /// ``` pub fn into_inner(self) -> R { self.reader.into_inner() } } impl Read for Decoder { fn read(&mut self, buf: &mut [u8]) -> io::Result { if self.header.is_none() { self.header()?; } if self.eos { Ok(0) } else { let read_size = self.reader.read(buf)?; if read_size == 0 { let trailer = self .reader .bit_reader_mut() .transaction(|r| Trailer::read_from(r.as_inner_mut()))?; self.eos = true; // checksum verification is skipped during fuzzing // so that random data from fuzzer can reach actually interesting code // Compilation flag 'fuzzing' is automatically set by all 3 Rust fuzzers. if cfg!(not(fuzzing)) && trailer.crc32() != self.crc32.value() { Err(invalid_data_error!( "CRC32 mismatched: value={}, expected={}", self.crc32.value(), trailer.crc32() )) } else { Ok(0) } } else { self.crc32.update(&buf[..read_size]); Ok(read_size) } } } } #[cfg(test)] mod test { use super::*; use gzip::Encoder; use std::io; use util::{nb_read_to_end, WouldBlockReader}; fn decode_all(buf: &[u8]) -> io::Result> { let decoder = Decoder::new(WouldBlockReader::new(buf)); nb_read_to_end(decoder) } #[test] fn encode_works() { let plain = b"Hello World! Hello GZIP!!"; let mut encoder = Encoder::new(Vec::new()).unwrap(); io::copy(&mut &plain[..], &mut encoder).unwrap(); let encoded = encoder.finish().into_result().unwrap(); assert_eq!(decode_all(&encoded).unwrap(), plain); } #[test] fn decode_works_noncompressed_block_offset_sync() { let encoded = include_bytes!("../../data/noncompressed_block_offset_sync/offset.gz"); let decoded = include_bytes!("../../data/noncompressed_block_offset_sync/offset"); // decode_all(encoded).unwrap(); assert_eq!(decode_all(encoded).unwrap(), decoded.to_vec()); } } libflate-0.1.25/src/non_blocking/mod.rs010066600017500001750000000014471340050224200161620ustar0000000000000000//! Implementations that can handle non-blocking I/O. //! //! The implementations in this module can handle non-blocking //! `Reader`s and `Writer`s which will return `ErrorKind::WouldBlock` error //! when I/O operations would block. //! //! If inner `Reader`s and `Writer`s return `ErrorKind::WouldBlock` error, //! `Decoder`s and `Encoder`s in this module will also return `ErrorKind::WouldBlock`. //! //! If retrying the operation after the inner I/O become available, it will proceed successfully. //! //! # NOTICE //! //! There is some performance penalty for non-blocking implementations //! against those that do not consider nonblocking I / O. //! So, it is recommended to use the latter if you are not need to handle non-blocking I/O. pub mod deflate; pub mod gzip; pub mod zlib; mod transaction; libflate-0.1.25/src/non_blocking/transaction.rs010066600017500001750000000057231340050224200177310ustar0000000000000000use std::cmp; use std::io::{self, Read}; use bit; #[derive(Debug)] pub struct TransactionalBitReader { inner: bit::BitReader>, savepoint: bit::BitReaderState, } impl TransactionalBitReader { pub fn new(inner: R) -> Self { let inner = bit::BitReader::new(TransactionalReader::new(inner)); let savepoint = inner.state(); TransactionalBitReader { inner, savepoint } } #[inline] pub fn transaction(&mut self, f: F) -> io::Result where F: FnOnce(&mut bit::BitReader>) -> io::Result, { self.start_transaction(); let result = f(&mut self.inner); if result.is_ok() { self.commit_transaction(); } else { self.abort_transaction(); } result } #[inline] pub fn start_transaction(&mut self) { self.inner.as_inner_mut().start_transaction(); self.savepoint = self.inner.state(); } #[inline] pub fn abort_transaction(&mut self) { self.inner.as_inner_mut().abort_transaction(); self.inner.restore_state(self.savepoint); } #[inline] pub fn commit_transaction(&mut self) { self.inner.as_inner_mut().commit_transaction(); } } impl TransactionalBitReader { pub fn as_inner_ref(&self) -> &R { &self.inner.as_inner_ref().inner } pub fn as_inner_mut(&mut self) -> &mut R { &mut self.inner.as_inner_mut().inner } pub fn into_inner(self) -> R { self.inner.into_inner().inner } } #[derive(Debug)] pub struct TransactionalReader { inner: R, in_transaction: bool, buffer: Vec, offset: usize, } impl TransactionalReader { pub fn new(inner: R) -> Self { TransactionalReader { inner, buffer: Vec::new(), in_transaction: false, offset: 0, } } #[inline] pub fn start_transaction(&mut self) { assert!(!self.in_transaction); self.in_transaction = true; } #[inline] pub fn commit_transaction(&mut self) { self.in_transaction = false; self.offset = 0; self.buffer.clear(); } #[inline] pub fn abort_transaction(&mut self) { self.in_transaction = false; self.offset = 0; } } impl Read for TransactionalReader { fn read(&mut self, buf: &mut [u8]) -> io::Result { if self.offset < self.buffer.len() { let unread_buf_size = self.buffer.len() - self.offset; let size = cmp::min(buf.len(), unread_buf_size); (&mut buf[0..size]).copy_from_slice(&self.buffer[self.offset..self.offset + size]); self.offset += size; return Ok(size); } let size = self.inner.read(buf)?; if self.in_transaction { self.buffer.extend_from_slice(&buf[0..size]); self.offset += size; } Ok(size) } } libflate-0.1.25/src/non_blocking/zlib.rs010066600017500001750000000203251340050224200163370ustar0000000000000000//! The encoder and decoder of the ZLIB format. //! //! The ZLIB format is defined in [RFC-1950](https://tools.ietf.org/html/rfc1950). //! //! # Examples //! ``` //! use std::io::{self, Read}; //! use libflate::zlib::Encoder; //! use libflate::non_blocking::zlib::Decoder; //! //! // Encoding //! let mut encoder = Encoder::new(Vec::new()).unwrap(); //! io::copy(&mut &b"Hello World!"[..], &mut encoder).unwrap(); //! let encoded_data = encoder.finish().into_result().unwrap(); //! //! // Decoding //! let mut decoder = Decoder::new(&encoded_data[..]); //! let mut decoded_data = Vec::new(); //! decoder.read_to_end(&mut decoded_data).unwrap(); //! //! assert_eq!(decoded_data, b"Hello World!"); //! ``` use byteorder::BigEndian; use byteorder::ReadBytesExt; use std::io::{self, Read}; use checksum; use non_blocking::deflate; use zlib::Header; /// ZLIB decoder which supports non-blocking I/O. #[derive(Debug)] pub struct Decoder { header: Option
, reader: deflate::Decoder, adler32: checksum::Adler32, eos: bool, } impl Decoder { /// Makes a new decoder instance. /// /// `inner` is to be decoded ZLIB stream. /// /// # Examples /// ``` /// use std::io::Read; /// use libflate::non_blocking::zlib::Decoder; /// /// let encoded_data = [120, 156, 243, 72, 205, 201, 201, 87, 8, 207, 47, /// 202, 73, 81, 4, 0, 28, 73, 4, 62]; /// /// let mut decoder = Decoder::new(&encoded_data[..]); /// let mut buf = Vec::new(); /// decoder.read_to_end(&mut buf).unwrap(); /// /// assert_eq!(buf, b"Hello World!"); /// ``` pub fn new(inner: R) -> Self { Decoder { header: None, reader: deflate::Decoder::new(inner), adler32: checksum::Adler32::new(), eos: false, } } /// Returns the header of the ZLIB stream. /// /// # Examples /// ``` /// use libflate::zlib::CompressionLevel; /// use libflate::non_blocking::zlib::Decoder; /// /// let encoded_data = [120, 156, 243, 72, 205, 201, 201, 87, 8, 207, 47, /// 202, 73, 81, 4, 0, 28, 73, 4, 62]; /// /// let mut decoder = Decoder::new(&encoded_data[..]); /// assert_eq!(decoder.header().unwrap().compression_level(), /// CompressionLevel::Default); /// ``` pub fn header(&mut self) -> io::Result<&Header> { if let Some(ref header) = self.header { Ok(header) } else { let header = self .reader .bit_reader_mut() .transaction(|r| Header::read_from(r.as_inner_mut()))?; self.header = Some(header); self.header() } } /// Returns the immutable reference to the inner stream. pub fn as_inner_ref(&self) -> &R { self.reader.as_inner_ref() } /// Returns the mutable reference to the inner stream. pub fn as_inner_mut(&mut self) -> &mut R { self.reader.as_inner_mut() } /// Unwraps this `Decoder`, returning the underlying reader. /// /// # Examples /// ``` /// use std::io::Cursor; /// use libflate::non_blocking::zlib::Decoder; /// /// let encoded_data = [120, 156, 243, 72, 205, 201, 201, 87, 8, 207, 47, /// 202, 73, 81, 4, 0, 28, 73, 4, 62]; /// /// let decoder = Decoder::new(Cursor::new(&encoded_data)); /// assert_eq!(decoder.into_inner().into_inner(), &encoded_data); /// ``` pub fn into_inner(self) -> R { self.reader.into_inner() } } impl Read for Decoder { fn read(&mut self, buf: &mut [u8]) -> io::Result { if self.header.is_none() { self.header()?; } if self.eos { Ok(0) } else { let read_size = self.reader.read(buf)?; if read_size == 0 { let adler32 = self .reader .bit_reader_mut() .transaction(|r| r.as_inner_mut().read_u32::())?; self.eos = true; // checksum verification is skipped during fuzzing // so that random data from fuzzer can reach actually interesting code // Compilation flag 'fuzzing' is automatically set by all 3 Rust fuzzers. if cfg!(not(fuzzing)) && adler32 != self.adler32.value() { Err(invalid_data_error!( "Adler32 checksum mismatched: value={}, expected={}", self.adler32.value(), adler32 )) } else { Ok(0) } } else { self.adler32.update(&buf[..read_size]); Ok(read_size) } } } } #[cfg(test)] mod test { use super::*; use std::io; use util::{nb_read_to_end, WouldBlockReader}; use zlib::{EncodeOptions, Encoder}; fn decode_all(buf: &[u8]) -> io::Result> { let decoder = Decoder::new(WouldBlockReader::new(buf)); nb_read_to_end(decoder) } fn default_encode(buf: &[u8]) -> io::Result> { let mut encoder = Encoder::new(Vec::new()).unwrap(); io::copy(&mut &buf[..], &mut encoder).unwrap(); encoder.finish().into_result() } macro_rules! assert_encode_decode { ($input:expr) => {{ let encoded = default_encode(&$input[..]).unwrap(); assert_eq!(decode_all(&encoded).unwrap(), &$input[..]); }}; } const DECODE_WORKS_TESTDATA: [u8; 20] = [ 120, 156, 243, 72, 205, 201, 201, 87, 8, 207, 47, 202, 73, 81, 4, 0, 28, 73, 4, 62, ]; #[test] fn decode_works() { let encoded = DECODE_WORKS_TESTDATA; let buf = decode_all(&encoded[..]).unwrap(); let expected = b"Hello World!"; assert_eq!(buf, expected); } #[test] fn default_encode_works() { let plain = b"Hello World! Hello ZLIB!!"; let mut encoder = Encoder::new(Vec::new()).unwrap(); io::copy(&mut &plain[..], &mut encoder).unwrap(); let encoded = encoder.finish().into_result().unwrap(); assert_eq!(decode_all(&encoded).unwrap(), plain); } #[test] fn best_speed_encode_works() { let plain = b"Hello World! Hello ZLIB!!"; let mut encoder = Encoder::with_options(Vec::new(), EncodeOptions::default().fixed_huffman_codes()) .unwrap(); io::copy(&mut &plain[..], &mut encoder).unwrap(); let encoded = encoder.finish().into_result().unwrap(); assert_eq!(decode_all(&encoded).unwrap(), plain); } const RAW_ENCODE_WORKS_EXPECTED: [u8; 23] = [ 120, 1, 1, 12, 0, 243, 255, 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 28, 73, 4, 62, ]; #[test] fn raw_encode_works() { let plain = b"Hello World!"; let mut encoder = Encoder::with_options(Vec::new(), EncodeOptions::new().no_compression()).unwrap(); io::copy(&mut &plain[..], &mut encoder).unwrap(); let encoded = encoder.finish().into_result().unwrap(); let expected = RAW_ENCODE_WORKS_EXPECTED; assert_eq!(encoded, expected); assert_eq!(decode_all(&encoded).unwrap(), plain); } #[test] fn test_issue_2() { // See: https://github.com/sile/libflate/issues/2 assert_encode_decode!([ 163, 181, 167, 40, 62, 239, 41, 125, 189, 217, 61, 122, 20, 136, 160, 178, 119, 217, 217, 41, 125, 189, 97, 195, 101, 47, 170, ]); assert_encode_decode!([ 162, 58, 99, 211, 7, 64, 96, 36, 57, 155, 53, 166, 76, 14, 238, 66, 66, 148, 154, 124, 162, 58, 99, 188, 138, 131, 171, 189, 54, 229, 192, 38, 29, 240, 122, 28, ]); assert_encode_decode!([ 239, 238, 212, 42, 5, 46, 186, 67, 122, 247, 30, 61, 219, 62, 228, 202, 164, 205, 139, 109, 99, 181, 99, 181, 99, 122, 30, 12, 62, 46, 27, 145, 241, 183, 137, ]); assert_encode_decode!([ 88, 202, 64, 12, 125, 108, 153, 49, 164, 250, 71, 19, 4, 108, 111, 108, 237, 205, 208, 77, 217, 100, 118, 49, 10, 64, 12, 125, 51, 202, 69, 67, 181, 146, 86, ]); } } libflate-0.1.25/src/util.rs010066600017500001750000000027441350664244000137330ustar0000000000000000#[cfg(test)] use std::io::{self, Read}; #[cfg(test)] pub struct WouldBlockReader { inner: R, do_block: bool, } #[cfg(test)] impl WouldBlockReader { pub fn new(inner: R) -> Self { WouldBlockReader { inner, do_block: false, } } } #[cfg(test)] impl Read for WouldBlockReader { fn read(&mut self, buf: &mut [u8]) -> io::Result { self.do_block = !self.do_block; if self.do_block { Err(io::Error::new(io::ErrorKind::WouldBlock, "Would block")) } else if buf.is_empty() { Ok(0) } else { let mut byte = [0; 1]; if self.inner.read(&mut byte[..])? == 1 { buf[0] = byte[0]; Ok(1) } else { Ok(0) } } } } #[cfg(test)] pub fn nb_read_to_end(mut reader: R) -> io::Result> { let mut buf = vec![0; 1024]; let mut offset = 0; loop { match reader.read(&mut buf[offset..]) { Err(e) => { if e.kind() != io::ErrorKind::WouldBlock { return Err(e); } } Ok(0) => { buf.truncate(offset); break; } Ok(size) => { offset += size; if offset == buf.len() { buf.resize(offset * 2, 0); } } } } Ok(buf) } libflate-0.1.25/src/zlib.rs010066600017500001750000000564761347270214500137320ustar0000000000000000//! The encoder and decoder of the ZLIB format. //! //! The ZLIB format is defined in [RFC-1950](https://tools.ietf.org/html/rfc1950). //! //! # Examples //! ``` //! use std::io::{self, Read}; //! use libflate::zlib::{Encoder, Decoder}; //! //! // Encoding //! let mut encoder = Encoder::new(Vec::new()).unwrap(); //! io::copy(&mut &b"Hello World!"[..], &mut encoder).unwrap(); //! let encoded_data = encoder.finish().into_result().unwrap(); //! //! // Decoding //! let mut decoder = Decoder::new(&encoded_data[..]).unwrap(); //! let mut decoded_data = Vec::new(); //! decoder.read_to_end(&mut decoded_data).unwrap(); //! //! assert_eq!(decoded_data, b"Hello World!"); //! ``` use byteorder::BigEndian; use byteorder::ReadBytesExt; use byteorder::WriteBytesExt; use std::io; use checksum; use deflate; use finish::{Complete, Finish}; use lz77; const COMPRESSION_METHOD_DEFLATE: u8 = 8; /// Compression levels defined by the ZLIB format. #[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)] pub enum CompressionLevel { /// Compressor used fastest algorithm. Fastest = 0, /// Compressor used fast algorithm. Fast = 1, /// Compressor used default algorithm. Default = 2, /// Compressor used maximum compression, slowest algorithm. Slowest = 3, } impl CompressionLevel { fn from_u2(level: u8) -> Self { match level { 0 => CompressionLevel::Fastest, 1 => CompressionLevel::Fast, 2 => CompressionLevel::Default, 3 => CompressionLevel::Slowest, _ => unreachable!(), } } fn as_u2(&self) -> u8 { self.clone() as u8 } } impl From for CompressionLevel { fn from(f: lz77::CompressionLevel) -> Self { match f { lz77::CompressionLevel::None => CompressionLevel::Fastest, lz77::CompressionLevel::Fast => CompressionLevel::Fast, lz77::CompressionLevel::Balance => CompressionLevel::Default, lz77::CompressionLevel::Best => CompressionLevel::Slowest, } } } /// LZ77 Window sizes defined by the ZLIB format. #[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)] pub enum Lz77WindowSize { /// 256 bytes B256 = 0, /// 512 btyes B512 = 1, /// 1 kilobyte KB1 = 2, /// 2 kilobytes KB2 = 3, /// 4 kitobytes KB4 = 4, /// 8 kitobytes KB8 = 5, /// 16 kitobytes KB16 = 6, /// 32 kitobytes KB32 = 7, } impl Lz77WindowSize { fn from_u4(compression_info: u8) -> Option { match compression_info { 0 => Some(Lz77WindowSize::B256), 1 => Some(Lz77WindowSize::B512), 2 => Some(Lz77WindowSize::KB1), 3 => Some(Lz77WindowSize::KB2), 4 => Some(Lz77WindowSize::KB4), 5 => Some(Lz77WindowSize::KB8), 6 => Some(Lz77WindowSize::KB16), 7 => Some(Lz77WindowSize::KB32), _ => None, } } fn as_u4(&self) -> u8 { self.clone() as u8 } /// Converts from `u16` to Lz77WindowSize`. /// /// Fractions are rounded to next upper window size. /// If `size` exceeds maximum window size, /// `lz77::MAX_WINDOW_SIZE` will be used instead. /// /// # Examples /// ``` /// use libflate::zlib::Lz77WindowSize; /// /// assert_eq!(Lz77WindowSize::from_u16(15000), Lz77WindowSize::KB16); /// assert_eq!(Lz77WindowSize::from_u16(16384), Lz77WindowSize::KB16); /// assert_eq!(Lz77WindowSize::from_u16(16385), Lz77WindowSize::KB32); /// assert_eq!(Lz77WindowSize::from_u16(40000), Lz77WindowSize::KB32); /// ``` pub fn from_u16(size: u16) -> Self { use self::Lz77WindowSize::*; if 16_384 < size { KB32 } else if 8192 < size { KB16 } else if 4096 < size { KB8 } else if 2048 < size { KB4 } else if 1024 < size { KB2 } else if 512 < size { KB1 } else if 256 < size { B512 } else { B256 } } /// Converts from `Lz77WindowSize` to `u16`. /// /// # Examples /// ``` /// use libflate::zlib::Lz77WindowSize; /// /// assert_eq!(Lz77WindowSize::KB16.to_u16(), 16384u16); /// ``` pub fn to_u16(&self) -> u16 { use self::Lz77WindowSize::*; match *self { B256 => 256, B512 => 512, KB1 => 1024, KB2 => 2048, KB4 => 4096, KB8 => 8192, KB16 => 16_384, KB32 => 32_768, } } } /// ZLIB header. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Header { window_size: Lz77WindowSize, compression_level: CompressionLevel, } impl Header { /// Returns the LZ77 window size stored in the header. pub fn window_size(&self) -> Lz77WindowSize { self.window_size.clone() } /// Returns the compression level stored in the header. pub fn compression_level(&self) -> CompressionLevel { self.compression_level.clone() } fn from_lz77(lz77: &E) -> Self where E: lz77::Lz77Encode, { Header { compression_level: From::from(lz77.compression_level()), window_size: Lz77WindowSize::from_u16(lz77.window_size()), } } pub(crate) fn read_from(mut reader: R) -> io::Result where R: io::Read, { let cmf = reader.read_u8()?; let flg = reader.read_u8()?; let check = (u16::from(cmf) << 8) + u16::from(flg); if check % 31 != 0 { return Err(invalid_data_error!( "Inconsistent ZLIB check bits: `CMF({}) * 256 + \ FLG({})` must be a multiple of 31", cmf, flg )); } let compression_method = cmf & 0b1111; let compression_info = cmf >> 4; if compression_method != COMPRESSION_METHOD_DEFLATE { return Err(invalid_data_error!( "Compression methods other than DEFLATE(8) are \ unsupported: method={}", compression_method )); } let window_size = Lz77WindowSize::from_u4(compression_info).ok_or_else(|| { invalid_data_error!("CINFO above 7 are not allowed: value={}", compression_info) })?; let dict_flag = (flg & 0b100_000) != 0; if dict_flag { let dictionary_id = reader.read_u32::()?; return Err(invalid_data_error!( "Preset dictionaries are not supported: \ dictionary_id=0x{:X}", dictionary_id )); } let compression_level = CompressionLevel::from_u2(flg >> 6); Ok(Header { window_size, compression_level, }) } fn write_to(&self, mut writer: W) -> io::Result<()> where W: io::Write, { let cmf = (self.window_size.as_u4() << 4) | COMPRESSION_METHOD_DEFLATE; let mut flg = self.compression_level.as_u2() << 6; let check = (u16::from(cmf) << 8) + u16::from(flg); if check % 31 != 0 { flg += (31 - check % 31) as u8; } writer.write_u8(cmf)?; writer.write_u8(flg)?; Ok(()) } } /// ZLIB decoder. #[derive(Debug)] pub struct Decoder { header: Header, reader: deflate::Decoder, adler32: checksum::Adler32, eos: bool, } impl Decoder where R: io::Read, { /// Makes a new decoder instance. /// /// `inner` is to be decoded ZLIB stream. /// /// # Examples /// ``` /// use std::io::Read; /// use libflate::zlib::Decoder; /// /// let encoded_data = [120, 156, 243, 72, 205, 201, 201, 87, 8, 207, 47, /// 202, 73, 81, 4, 0, 28, 73, 4, 62]; /// /// let mut decoder = Decoder::new(&encoded_data[..]).unwrap(); /// let mut buf = Vec::new(); /// decoder.read_to_end(&mut buf).unwrap(); /// /// assert_eq!(buf, b"Hello World!"); /// ``` pub fn new(mut inner: R) -> io::Result { let header = Header::read_from(&mut inner)?; Ok(Decoder { header, reader: deflate::Decoder::new(inner), adler32: checksum::Adler32::new(), eos: false, }) } /// Returns the header of the ZLIB stream. /// /// # Examples /// ``` /// use libflate::zlib::{Decoder, CompressionLevel}; /// /// let encoded_data = [120, 156, 243, 72, 205, 201, 201, 87, 8, 207, 47, /// 202, 73, 81, 4, 0, 28, 73, 4, 62]; /// /// let decoder = Decoder::new(&encoded_data[..]).unwrap(); /// assert_eq!(decoder.header().compression_level(), /// CompressionLevel::Default); /// ``` pub fn header(&self) -> &Header { &self.header } /// Returns the immutable reference to the inner stream. pub fn as_inner_ref(&self) -> &R { self.reader.as_inner_ref() } /// Returns the mutable reference to the inner stream. pub fn as_inner_mut(&mut self) -> &mut R { self.reader.as_inner_mut() } /// Unwraps this `Decoder`, returning the underlying reader. /// /// # Examples /// ``` /// use std::io::Cursor; /// use libflate::zlib::Decoder; /// /// let encoded_data = [120, 156, 243, 72, 205, 201, 201, 87, 8, 207, 47, /// 202, 73, 81, 4, 0, 28, 73, 4, 62]; /// /// let decoder = Decoder::new(Cursor::new(&encoded_data)).unwrap(); /// assert_eq!(decoder.into_inner().into_inner(), &encoded_data); /// ``` pub fn into_inner(self) -> R { self.reader.into_inner() } } impl io::Read for Decoder where R: io::Read, { fn read(&mut self, buf: &mut [u8]) -> io::Result { if self.eos { Ok(0) } else { let read_size = self.reader.read(buf)?; if read_size == 0 { self.eos = true; let adler32 = self.reader.as_inner_mut().read_u32::()?; // checksum verification is skipped during fuzzing // so that random data from fuzzer can reach actually interesting code // Compilation flag 'fuzzing' is automatically set by all 3 Rust fuzzers. if cfg!(not(fuzzing)) && adler32 != self.adler32.value() { Err(invalid_data_error!( "Adler32 checksum mismatched: value={}, expected={}", self.adler32.value(), adler32 )) } else { Ok(0) } } else { self.adler32.update(&buf[..read_size]); Ok(read_size) } } } } /// Options for a ZLIB encoder. #[derive(Debug)] pub struct EncodeOptions where E: lz77::Lz77Encode, { header: Header, options: deflate::EncodeOptions, } impl Default for EncodeOptions { fn default() -> Self { EncodeOptions { header: Header::from_lz77(&lz77::DefaultLz77Encoder::new()), options: Default::default(), } } } impl EncodeOptions { /// Makes a default instance. /// /// # Examples /// ``` /// use libflate::zlib::{Encoder, EncodeOptions}; /// /// let options = EncodeOptions::new(); /// let encoder = Encoder::with_options(Vec::new(), options).unwrap(); /// ``` pub fn new() -> Self { Self::default() } } impl EncodeOptions where E: lz77::Lz77Encode, { /// Specifies the LZ77 encoder used to compress input data. /// /// # Example /// ``` /// use libflate::lz77::DefaultLz77Encoder; /// use libflate::zlib::{Encoder, EncodeOptions}; /// /// let options = EncodeOptions::with_lz77(DefaultLz77Encoder::new()); /// let encoder = Encoder::with_options(Vec::new(), options).unwrap(); /// ``` pub fn with_lz77(lz77: E) -> Self { EncodeOptions { header: Header::from_lz77(&lz77), options: deflate::EncodeOptions::with_lz77(lz77), } } /// Disables LZ77 compression. /// /// # Example /// ``` /// use libflate::lz77::DefaultLz77Encoder; /// use libflate::zlib::{Encoder, EncodeOptions}; /// /// let options = EncodeOptions::new().no_compression(); /// let encoder = Encoder::with_options(Vec::new(), options).unwrap(); /// ``` pub fn no_compression(mut self) -> Self { self.options = self.options.no_compression(); self.header.compression_level = CompressionLevel::Fastest; self } /// Specifies the hint of the size of a DEFLATE block. /// /// The default value is `deflate::DEFAULT_BLOCK_SIZE`. /// /// # Example /// ``` /// use libflate::zlib::{Encoder, EncodeOptions}; /// /// let options = EncodeOptions::new().block_size(512 * 1024); /// let encoder = Encoder::with_options(Vec::new(), options).unwrap(); /// ``` pub fn block_size(mut self, size: usize) -> Self { self.options = self.options.block_size(size); self } /// Specifies to compress with fixed huffman codes. /// /// # Example /// ``` /// use libflate::zlib::{Encoder, EncodeOptions}; /// /// let options = EncodeOptions::new().fixed_huffman_codes(); /// let encoder = Encoder::with_options(Vec::new(), options).unwrap(); /// ``` pub fn fixed_huffman_codes(mut self) -> Self { self.options = self.options.fixed_huffman_codes(); self } } /// ZLIB encoder. #[derive(Debug)] pub struct Encoder { header: Header, writer: deflate::Encoder, adler32: checksum::Adler32, } impl Encoder where W: io::Write, { /// Makes a new encoder instance. /// /// Encoded ZLIB stream is written to `inner`. /// /// # Examples /// ``` /// use std::io::Write; /// use libflate::zlib::Encoder; /// /// let mut encoder = Encoder::new(Vec::new()).unwrap(); /// encoder.write_all(b"Hello World!").unwrap(); /// /// assert_eq!(encoder.finish().into_result().unwrap(), /// vec![120, 156, 5, 192, 49, 13, 0, 0, 8, 3, 65, 43, 224, 6, 7, 24, 128, /// 237, 147, 38, 245, 63, 244, 230, 65, 181, 50, 215, 1, 28, 73, 4, 62]); /// ``` pub fn new(inner: W) -> io::Result { Self::with_options(inner, EncodeOptions::default()) } } impl Encoder where W: io::Write, E: lz77::Lz77Encode, { /// Makes a new encoder instance with specified options. /// /// Encoded ZLIB stream is written to `inner`. /// /// # Examples /// ``` /// use std::io::Write; /// use libflate::zlib::{Encoder, EncodeOptions}; /// /// let options = EncodeOptions::new().no_compression(); /// let mut encoder = Encoder::with_options(Vec::new(), options).unwrap(); /// encoder.write_all(b"Hello World!").unwrap(); /// /// assert_eq!(encoder.finish().into_result().unwrap(), /// [120, 1, 1, 12, 0, 243, 255, 72, 101, 108, 108, 111, 32, 87, 111, /// 114, 108, 100, 33, 28, 73, 4, 62]); /// ``` pub fn with_options(mut inner: W, options: EncodeOptions) -> io::Result { options.header.write_to(&mut inner)?; Ok(Encoder { header: options.header, writer: deflate::Encoder::with_options(inner, options.options), adler32: checksum::Adler32::new(), }) } /// Returns the header of the ZLIB stream. /// /// # Examples /// ``` /// use libflate::zlib::{Encoder, Lz77WindowSize}; /// /// let encoder = Encoder::new(Vec::new()).unwrap(); /// assert_eq!(encoder.header().window_size(), Lz77WindowSize::KB32); /// ``` pub fn header(&self) -> &Header { &self.header } /// Writes the ZLIB trailer and returns the inner stream. /// /// # Examples /// ``` /// use std::io::Write; /// use libflate::zlib::Encoder; /// /// let mut encoder = Encoder::new(Vec::new()).unwrap(); /// encoder.write_all(b"Hello World!").unwrap(); /// /// assert_eq!(encoder.finish().into_result().unwrap(), /// vec![120, 156, 5, 192, 49, 13, 0, 0, 8, 3, 65, 43, 224, 6, 7, 24, 128, /// 237, 147, 38, 245, 63, 244, 230, 65, 181, 50, 215, 1, 28, 73, 4, 62]); /// ``` /// /// # Note /// /// If you are not concerned the result of this encoding, /// it may be convenient to use `AutoFinishUnchecked` instead of the explicit invocation of this method. /// /// ``` /// use std::io; /// use libflate::finish::AutoFinishUnchecked; /// use libflate::zlib::Encoder; /// /// let plain = b"Hello World!"; /// let mut buf = Vec::new(); /// let mut encoder = AutoFinishUnchecked::new(Encoder::new(&mut buf).unwrap()); /// io::copy(&mut &plain[..], &mut encoder).unwrap(); /// ``` pub fn finish(self) -> Finish { let mut inner = finish_try!(self.writer.finish()); match inner .write_u32::(self.adler32.value()) .and_then(|_| inner.flush()) { Ok(_) => Finish::new(inner, None), Err(e) => Finish::new(inner, Some(e)), } } /// Returns the immutable reference to the inner stream. pub fn as_inner_ref(&self) -> &W { self.writer.as_inner_ref() } /// Returns the mutable reference to the inner stream. pub fn as_inner_mut(&mut self) -> &mut W { self.writer.as_inner_mut() } /// Unwraps the `Encoder`, returning the inner stream. pub fn into_inner(self) -> W { self.writer.into_inner() } } impl io::Write for Encoder where W: io::Write, E: lz77::Lz77Encode, { fn write(&mut self, buf: &[u8]) -> io::Result { let written_size = self.writer.write(buf)?; self.adler32.update(&buf[..written_size]); Ok(written_size) } fn flush(&mut self) -> io::Result<()> { self.writer.flush() } } impl Complete for Encoder where W: io::Write, E: lz77::Lz77Encode, { fn complete(self) -> io::Result<()> { self.finish().into_result().map(|_| ()) } } #[cfg(test)] mod test { use super::*; use finish::AutoFinish; use std::io; fn decode_all(buf: &[u8]) -> io::Result> { let mut decoder = Decoder::new(buf).unwrap(); let mut buf = Vec::with_capacity(buf.len()); io::copy(&mut decoder, &mut buf)?; Ok(buf) } fn default_encode(buf: &[u8]) -> io::Result> { let mut encoder = Encoder::new(Vec::new()).unwrap(); io::copy(&mut &buf[..], &mut encoder).unwrap(); encoder.finish().into_result() } macro_rules! assert_encode_decode { ($input:expr) => {{ let encoded = default_encode(&$input[..]).unwrap(); assert_eq!(decode_all(&encoded).unwrap(), &$input[..]); }}; } const DECODE_WORKS_TESTDATA: [u8; 20] = [ 120, 156, 243, 72, 205, 201, 201, 87, 8, 207, 47, 202, 73, 81, 4, 0, 28, 73, 4, 62, ]; #[test] fn decode_works() { let encoded = DECODE_WORKS_TESTDATA; let mut decoder = Decoder::new(&encoded[..]).unwrap(); assert_eq!( *decoder.header(), Header { window_size: Lz77WindowSize::KB32, compression_level: CompressionLevel::Default, } ); let mut buf = Vec::new(); io::copy(&mut decoder, &mut buf).unwrap(); let expected = b"Hello World!"; assert_eq!(buf, expected); } #[test] fn default_encode_works() { let plain = b"Hello World! Hello ZLIB!!"; let mut encoder = Encoder::new(Vec::new()).unwrap(); io::copy(&mut &plain[..], &mut encoder).unwrap(); let encoded = encoder.finish().into_result().unwrap(); assert_eq!(decode_all(&encoded).unwrap(), plain); } #[test] fn best_speed_encode_works() { let plain = b"Hello World! Hello ZLIB!!"; let mut encoder = Encoder::with_options(Vec::new(), EncodeOptions::default().fixed_huffman_codes()) .unwrap(); io::copy(&mut &plain[..], &mut encoder).unwrap(); let encoded = encoder.finish().into_result().unwrap(); assert_eq!(decode_all(&encoded).unwrap(), plain); } const RAW_ENCODE_WORKS_EXPECTED: [u8; 23] = [ 120, 1, 1, 12, 0, 243, 255, 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, 28, 73, 4, 62, ]; #[test] fn raw_encode_works() { let plain = b"Hello World!"; let mut encoder = Encoder::with_options(Vec::new(), EncodeOptions::new().no_compression()).unwrap(); io::copy(&mut &plain[..], &mut encoder).unwrap(); let encoded = encoder.finish().into_result().unwrap(); let expected = RAW_ENCODE_WORKS_EXPECTED; assert_eq!(encoded, expected); assert_eq!(decode_all(&encoded).unwrap(), plain); } #[test] fn encoder_auto_finish_works() { let plain = b"Hello World! Hello ZLIB!!"; let mut buf = Vec::new(); { let mut encoder = AutoFinish::new(Encoder::new(&mut buf).unwrap()); io::copy(&mut &plain[..], &mut encoder).unwrap(); } assert_eq!(decode_all(&buf).unwrap(), plain); } #[test] fn test_issue_2() { // See: https://github.com/sile/libflate/issues/2 assert_encode_decode!([ 163, 181, 167, 40, 62, 239, 41, 125, 189, 217, 61, 122, 20, 136, 160, 178, 119, 217, 217, 41, 125, 189, 97, 195, 101, 47, 170, ]); assert_encode_decode!([ 162, 58, 99, 211, 7, 64, 96, 36, 57, 155, 53, 166, 76, 14, 238, 66, 66, 148, 154, 124, 162, 58, 99, 188, 138, 131, 171, 189, 54, 229, 192, 38, 29, 240, 122, 28, ]); assert_encode_decode!([ 239, 238, 212, 42, 5, 46, 186, 67, 122, 247, 30, 61, 219, 62, 228, 202, 164, 205, 139, 109, 99, 181, 99, 181, 99, 122, 30, 12, 62, 46, 27, 145, 241, 183, 137, ]); assert_encode_decode!([ 88, 202, 64, 12, 125, 108, 153, 49, 164, 250, 71, 19, 4, 108, 111, 108, 237, 205, 208, 77, 217, 100, 118, 49, 10, 64, 12, 125, 51, 202, 69, 67, 181, 146, 86, ]); } #[test] fn test_issues_16() { // See: https://github.com/sile/libflate/issues/16 let encoded = include_bytes!("../data/issues_16/crash-1bb6d408475a5bd57247ee40f290830adfe2086e"); assert_eq!( decode_all(&encoded[..]).err().map(|e| e.to_string()), Some("The value of HDIST is too big: max=30, actual=32".to_owned()) ); let encoded = include_bytes!("../data/issues_16/crash-369e8509a0e76356f4549c292ceedee429cfe125"); assert_eq!( decode_all(&encoded[..]).err().map(|e| e.to_string()), Some("The value of HDIST is too big: max=30, actual=32".to_owned()) ); let encoded = include_bytes!("../data/issues_16/crash-e75959d935650306881140df7f6d1d73e33425cb"); assert_eq!( decode_all(&encoded[..]).err().map(|e| e.to_string()), Some("The value of HDIST is too big: max=30, actual=32".to_owned()) ); } } libflate-0.1.25/.cargo_vcs_info.json0000644000000001120000000000000127340ustar00{ "git": { "sha1": "2efa0ab0d59698128b75ba2e6ec19195b403c83e" } }