libflate_lz77-1.1.0/.cargo_vcs_info.json0000644000000001120000000000000134710ustar { "git": { "sha1": "2985d72de49db2529b875a05c6ef9348a23de3df" } } libflate_lz77-1.1.0/Cargo.toml0000644000000017570000000000000115070ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2018" name = "libflate_lz77" version = "1.1.0" authors = ["Takeru Ohta "] description = "LZ77 encoder for libflate crate" homepage = "https://github.com/sile/libflate" readme = "README.md" keywords = ["lz77"] categories = ["compression"] license = "MIT" repository = "https://github.com/sile/libflate" [dependencies.rle-decode-fast] version = "1.0.0" [dev-dependencies.libflate] version = "1" [badges.coveralls] repository = "sile/libflate" libflate_lz77-1.1.0/Cargo.toml.orig000064400000000000000000000007650000000000000151440ustar 00000000000000[package] name = "libflate_lz77" version = "1.1.0" authors = ["Takeru Ohta "] edition = "2018" description = "LZ77 encoder for libflate crate" homepage = "https://github.com/sile/libflate" repository = "https://github.com/sile/libflate" readme = "README.md" keywords = ["lz77"] categories = ["compression"] license = "MIT" [badges] coveralls = {repository = "sile/libflate"} [dependencies] rle-decode-fast = "1.0.0" [dev-dependencies] libflate = { path = "../", version = "1" } libflate_lz77-1.1.0/LICENSE000064400000000000000000000021050000000000000132500ustar 00000000000000The MIT License Copyright (c) 2016 Takeru Ohta Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. libflate_lz77-1.1.0/README.md000064400000000000000000000010410000000000000135200ustar 00000000000000libflate_lz77 ============= [![libflate_lz77](https://img.shields.io/crates/v/libflate_lz77.svg)](https://crates.io/crates/libflate_lz77) [![Documentation](https://docs.rs/libflate_lz77/badge.svg)](https://docs.rs/libflate_lz77) [![Build Status](https://travis-ci.org/sile/libflate.svg?branch=master)](https://travis-ci.org/sile/libflate) [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) The interface and implementations of LZ77 compression algorithm for [libflate]. [libflate]: https://github.com/sile/libflate libflate_lz77-1.1.0/src/default.rs000064400000000000000000000166150000000000000150370ustar 00000000000000use std::cmp; use std::collections::HashMap; use super::Code; use super::Lz77Encode; use super::Sink; /// A [`Lz77Encode`] implementation used by default. #[derive(Debug)] pub struct DefaultLz77Encoder { window_size: u16, max_length: u16, buf: Vec, } impl DefaultLz77Encoder { /// Makes a new encoder instance. /// /// # Examples /// ``` /// use libflate::deflate; /// use libflate::lz77::{self, Lz77Encode, DefaultLz77Encoder}; /// /// let lz77 = DefaultLz77Encoder::new(); /// assert_eq!(lz77.window_size(), lz77::MAX_WINDOW_SIZE); /// /// let options = deflate::EncodeOptions::with_lz77(lz77); /// let _deflate = deflate::Encoder::with_options(Vec::new(), options); /// ``` pub fn new() -> Self { DefaultLz77EncoderBuilder::new().build() } /// Makes a new encoder instance with specified window size. /// /// Larger window size is prefered to raise compression ratio, /// but it may require more working memory to encode and decode data. /// /// # Examples /// ``` /// use libflate::deflate; /// use libflate::lz77::{self, Lz77Encode, DefaultLz77Encoder}; /// /// let lz77 = DefaultLz77Encoder::with_window_size(1024); /// assert_eq!(lz77.window_size(), 1024); /// /// let options = deflate::EncodeOptions::with_lz77(lz77); /// let _deflate = deflate::Encoder::with_options(Vec::new(), options); /// ``` pub fn with_window_size(size: u16) -> Self { DefaultLz77EncoderBuilder::new() .window_size(cmp::min(size, super::MAX_WINDOW_SIZE)) .build() } } impl Default for DefaultLz77Encoder { fn default() -> Self { Self::new() } } impl Lz77Encode for DefaultLz77Encoder { fn encode(&mut self, buf: &[u8], sink: S) where S: Sink, { self.buf.extend_from_slice(buf); if self.buf.len() >= self.window_size as usize * 8 { self.flush(sink); } } fn flush(&mut self, mut sink: S) where S: Sink, { let mut prefix_table = PrefixTable::new(self.buf.len()); let mut i = 0; let end = cmp::max(3, self.buf.len()) - 3; while i < end { let key = prefix(&self.buf[i..]); let matched = prefix_table.insert(key, i as u32); if let Some(j) = matched.map(|j| j as usize) { let distance = i - j; if distance <= self.window_size as usize { let length = 3 + longest_common_prefix( &self.buf, i + 3, j + 3, self.max_length as usize, ); sink.consume(Code::Pointer { length, backward_distance: distance as u16, }); for k in (i..).take(length as usize).skip(1) { if k >= end { break; } prefix_table.insert(prefix(&self.buf[k..]), k as u32); } i += length as usize; continue; } } sink.consume(Code::Literal(self.buf[i])); i += 1; } for b in &self.buf[i..] { sink.consume(Code::Literal(*b)); } self.buf.clear(); } fn window_size(&self) -> u16 { self.window_size } } #[inline] fn prefix(input_buf: &[u8]) -> [u8; 3] { let buf: &[u8] = &input_buf[..3]; // perform bounds check once [buf[0], buf[1], buf[2]] } #[inline] fn longest_common_prefix(buf: &[u8], i: usize, j: usize, max: usize) -> u16 { buf[i..] .iter() .take(max - 3) .zip(&buf[j..]) .take_while(|&(x, y)| x == y) .count() as u16 } #[derive(Debug)] enum PrefixTable { Small(HashMap<[u8; 3], u32>), Large(LargePrefixTable), } impl PrefixTable { fn new(bytes: usize) -> Self { if bytes < super::MAX_WINDOW_SIZE as usize { PrefixTable::Small(HashMap::new()) } else { PrefixTable::Large(LargePrefixTable::new()) } } #[inline] fn insert(&mut self, prefix: [u8; 3], position: u32) -> Option { match *self { PrefixTable::Small(ref mut x) => x.insert(prefix, position), PrefixTable::Large(ref mut x) => x.insert(prefix, position), } } } #[derive(Debug)] struct LargePrefixTable { table: Vec>, } impl LargePrefixTable { fn new() -> Self { LargePrefixTable { table: (0..=0xFFFF).map(|_| Vec::new()).collect(), } } #[inline] fn insert(&mut self, prefix: [u8; 3], position: u32) -> Option { let p0 = prefix[0] as usize; let p1 = prefix[1] as usize; let p2 = prefix[2]; let i = (p0 << 8) + p1; let positions = &mut self.table[i]; for &mut (key, ref mut value) in positions.iter_mut() { if key == p2 { let old = *value; *value = position; return Some(old); } } positions.push((p2, position)); None } } /// Type for constructing instances of [`DefaultLz77Encoder`]. /// /// # Examples /// ``` /// use libflate_lz77::{ /// DefaultLz77EncoderBuilder, /// MAX_LENGTH, /// MAX_WINDOW_SIZE, /// }; /// /// // Produce an encoder explicitly with the default window size and max copy length /// let _encoder = DefaultLz77EncoderBuilder::new() /// .window_size(MAX_WINDOW_SIZE) /// .max_length(MAX_LENGTH) /// .build(); /// ``` #[derive(Debug)] pub struct DefaultLz77EncoderBuilder { window_size: u16, max_length: u16, } impl DefaultLz77EncoderBuilder { /// Create a builder with the default parameters for the encoder. pub fn new() -> Self { DefaultLz77EncoderBuilder { window_size: super::MAX_WINDOW_SIZE, max_length: super::MAX_LENGTH, } } /// Set the size of the sliding search window used during compression. /// /// Larger values require more memory. The standard window size may be /// unsuitable for a particular Sink; for example, if the encoding used /// cannot express pointer distances past a certain size, you would want the /// window size to be no greater than the Sink's limit. pub fn window_size(self, window_size: u16) -> Self { DefaultLz77EncoderBuilder { window_size: cmp::min(window_size, super::MAX_WINDOW_SIZE), ..self } } /// Set the maximum length of a pointer command this encoder will emit. /// /// Some uses of LZ77 may not be able to encode pointers of the standard /// maximum length of 258 bytes. In this case, you may set your own maximum /// which can be encoded by the Sink. pub fn max_length(self, max_length: u16) -> Self { DefaultLz77EncoderBuilder { max_length: cmp::min(max_length, super::MAX_LENGTH), ..self } } /// Build the encoder with the builder state's parameters. pub fn build(self) -> DefaultLz77Encoder { DefaultLz77Encoder { window_size: self.window_size, max_length: self.max_length, buf: Vec::new(), } } } impl Default for DefaultLz77EncoderBuilder { fn default() -> Self { Self::new() } } libflate_lz77-1.1.0/src/lib.rs000064400000000000000000000163640000000000000141620ustar 00000000000000//! The interface and implementations of LZ77 compression algorithm. //! //! LZ77 is a compression algorithm used in [DEFLATE](https://tools.ietf.org/html/rfc1951). #![warn(missing_docs)] pub use self::default::{DefaultLz77Encoder, DefaultLz77EncoderBuilder}; use rle_decode_fast::rle_decode; mod default; /// Maximum length of sharable bytes in a pointer. pub const MAX_LENGTH: u16 = 258; /// Maximum backward distance of a pointer. pub const MAX_DISTANCE: u16 = 32_768; /// Maximum size of a sliding window. pub const MAX_WINDOW_SIZE: u16 = MAX_DISTANCE; /// A LZ77 encoded data. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum Code { /// Literal byte. Literal(u8), /// Backward pointer to shared data. Pointer { /// Length of the shared data. /// The values must be limited to [`MAX_LENGTH`]. length: u16, /// Distance between current position and start position of the shared data. /// The values must be limited to [`MAX_DISTANCE`]. backward_distance: u16, }, } /// Compression level. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum CompressionLevel { /// No compression. None, /// Best speed. Fast, /// Balanced between speed and size. Balance, /// Best compression. Best, } /// The [`Sink`] trait represents a consumer of LZ77 encoded data. pub trait Sink { /// Consumes a LZ77 encoded `Code`. fn consume(&mut self, code: Code); } impl<'a, T> Sink for &'a mut T where T: Sink, { fn consume(&mut self, code: Code) { (*self).consume(code); } } impl Sink for Vec where T: From, { fn consume(&mut self, code: Code) { self.push(T::from(code)); } } /// The [`Lz77Encode`] trait defines the interface of LZ77 encoding algorithm. pub trait Lz77Encode { /// Encodes a buffer and writes result LZ77 codes to `sink`. fn encode(&mut self, buf: &[u8], sink: S) where S: Sink; /// Flushes the encoder, ensuring that all intermediately buffered codes are consumed by `sink`. fn flush(&mut self, sink: S) where S: Sink; /// Returns the compression level of the encoder. /// /// If the implementation is omitted, [`CompressionLevel::Balance`] will be returned. fn compression_level(&self) -> CompressionLevel { CompressionLevel::Balance } /// Returns the window size of the encoder. /// /// If the implementation is omitted, [`MAX_WINDOW_SIZE`] will be returned. fn window_size(&self) -> u16 { MAX_WINDOW_SIZE } } /// A no compression implementation of [`Lz77Encode`] trait. #[derive(Debug, Default)] pub struct NoCompressionLz77Encoder; impl NoCompressionLz77Encoder { /// Makes a new encoder instance. /// /// # Examples /// ``` /// use libflate::deflate; /// use libflate::lz77::{Lz77Encode, NoCompressionLz77Encoder, CompressionLevel}; /// /// let lz77 = NoCompressionLz77Encoder::new(); /// assert_eq!(lz77.compression_level(), CompressionLevel::None); /// /// let options = deflate::EncodeOptions::with_lz77(lz77); /// let _deflate = deflate::Encoder::with_options(Vec::new(), options); /// ``` pub fn new() -> Self { NoCompressionLz77Encoder } } impl Lz77Encode for NoCompressionLz77Encoder { fn encode(&mut self, buf: &[u8], mut sink: S) where S: Sink, { for c in buf.iter().cloned().map(Code::Literal) { sink.consume(c); } } #[allow(unused_variables)] fn flush(&mut self, sink: S) where S: Sink, { } fn compression_level(&self) -> CompressionLevel { CompressionLevel::None } } /// LZ77 decoder. #[derive(Debug, Default)] pub struct Lz77Decoder { buffer: Vec, offset: usize, } impl Lz77Decoder { /// Makes a new [`Lz77Decoder`] instance. pub fn new() -> Self { Self::default() } /// Decodes a [`Code`]. /// /// The decoded bytes are appended to the buffer of [`Lz77Decoder`]. #[inline] pub fn decode(&mut self, code: Code) -> std::io::Result<()> { match code { Code::Literal(b) => { self.buffer.push(b); } Code::Pointer { length, backward_distance, } => { if self.buffer.len() < backward_distance as usize { return Err(std::io::Error::new( std::io::ErrorKind::InvalidData, format!( "Too long backword reference: buffer.len={}, distance={}", self.buffer.len(), backward_distance ), )); } rle_decode( &mut self.buffer, usize::from(backward_distance), usize::from(length), ); } } Ok(()) } /// Appends the bytes read from `reader` to the buffer of [`Lz77Decoder`]. pub fn extend_from_reader( &mut self, mut reader: R, ) -> std::io::Result { reader.read_to_end(&mut self.buffer) } /// Appends the given bytes to the buffer of [`Lz77Decoder`]. pub fn extend_from_slice(&mut self, buf: &[u8]) { self.buffer.extend_from_slice(buf); self.offset += buf.len(); } /// Clears the buffer of [`Lz77Decoder`]. pub fn clear(&mut self) { self.buffer.clear(); self.offset = 0; } /// Returns the buffer of [`Lz77Decoder`]. #[inline] pub fn buffer(&self) -> &[u8] { &self.buffer[self.offset..] } fn truncate_old_buffer(&mut self) { if self.buffer().is_empty() && self.buffer.len() > MAX_DISTANCE as usize * 4 { let old_len = self.buffer.len(); let new_len = MAX_DISTANCE as usize; { // isolation to please borrow checker let (dst, src) = self.buffer.split_at_mut(old_len - new_len); dst[..new_len].copy_from_slice(src); } self.buffer.truncate(new_len); self.offset = new_len; } } } impl std::io::Read for Lz77Decoder { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { let copy_size = std::cmp::min(buf.len(), self.buffer.len() - self.offset); buf[..copy_size].copy_from_slice(&self.buffer[self.offset..][..copy_size]); self.offset += copy_size; self.truncate_old_buffer(); Ok(copy_size) } } #[cfg(test)] mod tests { use super::*; use std::io::Read as _; #[test] fn encoder_and_decoder_works() { let mut codes = Vec::new(); let mut encoder = DefaultLz77Encoder::new(); encoder.encode(b"hello world!", &mut codes); encoder.flush(&mut codes); assert!(!codes.is_empty()); let mut decoder = Lz77Decoder::new(); for code in codes { decoder.decode(code).unwrap(); } assert_eq!(decoder.buffer(), b"hello world!"); let mut decoded = Vec::new(); decoder.read_to_end(&mut decoded).unwrap(); assert_eq!(decoded, b"hello world!"); assert!(decoder.buffer().is_empty()); } }