pax_global_header00006660000000000000000000000064136017270250014515gustar00rootroot0000000000000052 comment=3107ae1f7e6150edc65ae1225709abcc41fd065c libdeflate-1.5/000077500000000000000000000000001360172702500134555ustar00rootroot00000000000000libdeflate-1.5/.cirrus.yml000066400000000000000000000002751360172702500155710ustar00rootroot00000000000000task: freebsd_instance: matrix: - image: freebsd-11-2-release-amd64 - image: freebsd-12-0-release-amd64 install_script: pkg install -y gmake script: - gmake check libdeflate-1.5/.gitignore000066400000000000000000000002651360172702500154500ustar00rootroot00000000000000*.a *.def *.dll *.dylib *.exe *.exp *.lib *.o *.obj *.so *.so.* /.lib-cflags /.prog-cflags /programs/config.h /benchmark /checksum /gzip /gunzip /run_tests.log /test_* tags cscope* libdeflate-1.5/.travis.yml000066400000000000000000000032661360172702500155750ustar00rootroot00000000000000language: c env: global: - CFLAGS=-Werror matrix: include: - name: Native tests (Linux) os: linux dist: bionic before_install: - sudo apt-get install -y libz-dev gcc-multilib libz-dev:i386 libc6-dev-i386 valgrind clang gcc-4.8-multilib gcc-mingw-w64-i686 script: - tools/run_tests.sh native - name: Checksum, static analysis, and edge case tests (Linux) os: linux dist: bionic before_install: - sudo apt-get install -y libz-dev gcc-multilib libz-dev:i386 libc6-dev-i386 clang python3 script: - tools/run_tests.sh checksum_benchmarks static_analysis edge_case - name: gzip and cross-compile-for-Windows tests (Linux) os: linux dist: bionic before_install: - sudo apt-get install -y libz-dev valgrind gcc-mingw-w64-x86-64 libz-mingw-w64-dev script: - tools/run_tests.sh gzip windows - name: Basic tests (old Linux distro, gcc) os: linux dist: precise compiler: gcc script: - make all check - name: Basic tests (old Linux distro, clang) os: linux dist: precise compiler: clang script: - make all check - name: Basic tests (macOS, xcode11) os: osx osx_image: xcode11 script: - make all check - name: Basic tests (macOS, xcode9.4) os: osx osx_image: xcode9.4 script: - make all check - name: Basic tests (macOS, xcode7.3) os: osx osx_image: xcode7.3 script: - make all check - name: Basic tests (Windows, MinGW) os: windows script: - mingw32-make all check libdeflate-1.5/COPYING000066400000000000000000000020341360172702500145070ustar00rootroot00000000000000Copyright 2016 Eric Biggers Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. libdeflate-1.5/Makefile000066400000000000000000000250751360172702500151260ustar00rootroot00000000000000# # Use 'make help' to list available targets. # # Define V=1 to enable "verbose" mode, showing all executed commands. # # Define DECOMPRESSION_ONLY to omit all compression code, building a # decompression-only library. If doing this, you must also build a specific # library target such as 'libdeflate.a', as the programs will no longer compile. # # Define DISABLE_GZIP to disable support for the gzip wrapper format. # # Define DISABLE_ZLIB to disable support for the zlib wrapper format. # # Define PREFIX to override the installation prefix, like './configure --prefix' # in autotools-based projects (default: /usr/local) # # Define BINDIR to override where to install binaries, like './configure # --bindir' in autotools-based projects (default: PREFIX/bin) # # Define INCDIR to override where to install headers, like './configure # --includedir' in autotools-based projects (default: PREFIX/include) # # Define LIBDIR to override where to install libraries, like './configure # --libdir' in autotools-based projects (default: PREFIX/lib) # # Define DESTDIR to override the installation destination directory # (default: empty string) # # You can also specify custom CFLAGS, CPPFLAGS, and/or LDFLAGS. # ############################################################################## #### Common compiler flags. You can add additional flags by defining CFLAGS #### in the environment or on the 'make' command line. #### #### The default optimization flags can be overridden, e.g. via CFLAGS="-O3" or #### CFLAGS="-O0 -fno-omit-frame-pointer". But this usually isn't recommended; #### you're unlikely to get significantly better performance even with -O3. cc-option = $(shell if $(CC) $(1) -c -x c /dev/null -o /dev/null \ 1>&2 2>/dev/null; then echo $(1); fi) override CFLAGS := \ -O2 -fomit-frame-pointer $(CFLAGS) -std=c99 -I. -Icommon \ -Wall -Wundef \ $(call cc-option,-Wpedantic) \ $(call cc-option,-Wdeclaration-after-statement) \ $(call cc-option,-Wmissing-prototypes) \ $(call cc-option,-Wstrict-prototypes) \ $(call cc-option,-Wvla) \ $(call cc-option,-Wimplicit-fallthrough) # We don't define any CPPFLAGS, but support the user specifying it. ############################################################################## PREFIX ?= /usr/local BINDIR ?= $(PREFIX)/bin INCDIR ?= $(PREFIX)/include LIBDIR ?= $(PREFIX)/lib SOVERSION := 0 STATIC_LIB_SUFFIX := .a PROG_SUFFIX := PROG_CFLAGS := HARD_LINKS := 1 # Compiling for Windows with MinGW? ifneq ($(findstring -mingw,$(shell $(CC) -dumpmachine 2>/dev/null)),) STATIC_LIB_SUFFIX := static.lib SHARED_LIB := libdeflate.dll SHARED_LIB_SYMLINK := SHARED_LIB_CFLAGS := SHARED_LIB_LDFLAGS := -Wl,--out-implib,libdeflate.lib \ -Wl,--output-def,libdeflate.def \ -Wl,--add-stdcall-alias PROG_SUFFIX := .exe PROG_CFLAGS := -static -municode HARD_LINKS := override CFLAGS := $(CFLAGS) $(call cc-option,-Wno-pedantic-ms-format) # If AR was not already overridden, then derive it from $(CC). # Note that CC may take different forms, e.g. "cc", "gcc", # "x86_64-w64-mingw32-gcc", or "x86_64-w64-mingw32-gcc-6.3.1". # On Windows it may also have a .exe extension. ifeq ($(AR),ar) AR := $(shell echo $(CC) | \ sed -E 's/g?cc(-?[0-9]+(\.[0-9]+)*)?(\.exe)?$$/ar\3/') endif # macOS? else ifeq ($(shell uname),Darwin) SHARED_LIB := libdeflate.$(SOVERSION).dylib SHARED_LIB_SYMLINK := libdeflate.dylib SHARED_LIB_CFLAGS := -fPIC SHARED_LIB_LDFLAGS := -install_name $(SHARED_LIB) # Linux, FreeBSD, etc. else SHARED_LIB := libdeflate.so.$(SOVERSION) SHARED_LIB_SYMLINK := libdeflate.so SHARED_LIB_CFLAGS := -fPIC SHARED_LIB_LDFLAGS := -Wl,-soname=$(SHARED_LIB) endif ############################################################################## #### Quiet make is enabled by default. Define V=1 to disable. ifneq ($(findstring s,$(MAKEFLAGS)),s) ifneq ($(V),1) QUIET_CC = @echo ' CC ' $@; QUIET_CCLD = @echo ' CCLD ' $@; QUIET_AR = @echo ' AR ' $@; QUIET_LN = @echo ' LN ' $@; QUIET_CP = @echo ' CP ' $@; QUIET_GEN = @echo ' GEN ' $@; endif endif ############################################################################## COMMON_HEADERS := $(wildcard common/*.h) libdeflate.h DEFAULT_TARGETS := #### Library STATIC_LIB := libdeflate$(STATIC_LIB_SUFFIX) LIB_CFLAGS += $(CFLAGS) -fvisibility=hidden -D_ANSI_SOURCE LIB_HEADERS := $(wildcard lib/*.h) $(wildcard lib/*/*.h) LIB_SRC := lib/aligned_malloc.c lib/deflate_decompress.c \ $(wildcard lib/*/cpu_features.c) DECOMPRESSION_ONLY := ifndef DECOMPRESSION_ONLY LIB_SRC += lib/deflate_compress.c endif DISABLE_ZLIB := ifndef DISABLE_ZLIB LIB_SRC += lib/adler32.c lib/zlib_decompress.c ifndef DECOMPRESSION_ONLY LIB_SRC += lib/zlib_compress.c endif endif DISABLE_GZIP := ifndef DISABLE_GZIP LIB_SRC += lib/crc32.c lib/gzip_decompress.c ifndef DECOMPRESSION_ONLY LIB_SRC += lib/gzip_compress.c endif endif STATIC_LIB_OBJ := $(LIB_SRC:.c=.o) SHARED_LIB_OBJ := $(LIB_SRC:.c=.shlib.o) # Compile static library object files $(STATIC_LIB_OBJ): %.o: %.c $(LIB_HEADERS) $(COMMON_HEADERS) .lib-cflags $(QUIET_CC) $(CC) -o $@ -c $(CPPFLAGS) $(LIB_CFLAGS) $< # Compile shared library object files $(SHARED_LIB_OBJ): %.shlib.o: %.c $(LIB_HEADERS) $(COMMON_HEADERS) .lib-cflags $(QUIET_CC) $(CC) -o $@ -c $(CPPFLAGS) $(LIB_CFLAGS) \ $(SHARED_LIB_CFLAGS) -DLIBDEFLATE_DLL $< # Create static library $(STATIC_LIB):$(STATIC_LIB_OBJ) $(QUIET_AR) $(AR) cr $@ $+ DEFAULT_TARGETS += $(STATIC_LIB) # Create shared library $(SHARED_LIB):$(SHARED_LIB_OBJ) $(QUIET_CCLD) $(CC) -o $@ $(LDFLAGS) $(LIB_CFLAGS) \ $(SHARED_LIB_LDFLAGS) -shared $+ DEFAULT_TARGETS += $(SHARED_LIB) ifdef SHARED_LIB_SYMLINK # Create the symlink libdeflate.so => libdeflate.so.$SOVERSION $(SHARED_LIB_SYMLINK):$(SHARED_LIB) $(QUIET_LN) ln -sf $+ $@ DEFAULT_TARGETS += $(SHARED_LIB_SYMLINK) endif # Rebuild if CC, LIB_CFLAGS, or CPPFLAGS changed .lib-cflags: FORCE @flags='$(CC):$(LIB_CFLAGS):$(CPPFLAGS)'; \ if [ "$$flags" != "`cat $@ 2>/dev/null`" ]; then \ [ -e $@ ] && echo "Rebuilding library due to new compiler flags"; \ echo "$$flags" > $@; \ fi ############################################################################## #### Programs PROG_CFLAGS += $(CFLAGS) \ -D_POSIX_C_SOURCE=200809L \ -D_FILE_OFFSET_BITS=64 \ -DHAVE_CONFIG_H ALL_PROG_COMMON_HEADERS := programs/config.h \ programs/prog_util.h \ programs/test_util.h PROG_COMMON_SRC := programs/prog_util.c \ programs/tgetopt.c NONTEST_PROG_SRC := programs/gzip.c TEST_PROG_COMMON_SRC := programs/test_util.c TEST_PROG_SRC := programs/benchmark.c \ programs/checksum.c \ programs/test_checksums.c \ programs/test_incomplete_codes.c \ programs/test_slow_decompression.c NONTEST_PROGRAMS := $(NONTEST_PROG_SRC:programs/%.c=%$(PROG_SUFFIX)) DEFAULT_TARGETS += $(NONTEST_PROGRAMS) TEST_PROGRAMS := $(TEST_PROG_SRC:programs/%.c=%$(PROG_SUFFIX)) PROG_COMMON_OBJ := $(PROG_COMMON_SRC:%.c=%.o) NONTEST_PROG_OBJ := $(NONTEST_PROG_SRC:%.c=%.o) TEST_PROG_COMMON_OBJ := $(TEST_PROG_COMMON_SRC:%.c=%.o) TEST_PROG_OBJ := $(TEST_PROG_SRC:%.c=%.o) ALL_PROG_OBJ := $(PROG_COMMON_OBJ) $(NONTEST_PROG_OBJ) \ $(TEST_PROG_COMMON_OBJ) $(TEST_PROG_OBJ) # Generate autodetected configuration header programs/config.h:programs/detect.sh .prog-cflags $(QUIET_GEN) CC="$(CC)" CFLAGS="$(PROG_CFLAGS)" $< > $@ # Compile program object files $(ALL_PROG_OBJ): %.o: %.c $(ALL_PROG_COMMON_HEADERS) $(COMMON_HEADERS) \ .prog-cflags $(QUIET_CC) $(CC) -o $@ -c $(CPPFLAGS) $(PROG_CFLAGS) $< # Link the programs. # # Note: the test programs are not compiled by default. One reason is that the # test programs must be linked with zlib for doing comparisons. $(NONTEST_PROGRAMS): %$(PROG_SUFFIX): programs/%.o $(PROG_COMMON_OBJ) \ $(STATIC_LIB) $(QUIET_CCLD) $(CC) -o $@ $(LDFLAGS) $(PROG_CFLAGS) $+ $(TEST_PROGRAMS): %$(PROG_SUFFIX): programs/%.o $(PROG_COMMON_OBJ) \ $(TEST_PROG_COMMON_OBJ) $(STATIC_LIB) $(QUIET_CCLD) $(CC) -o $@ $(LDFLAGS) $(PROG_CFLAGS) $+ -lz ifdef HARD_LINKS # Hard link gunzip to gzip gunzip$(PROG_SUFFIX):gzip$(PROG_SUFFIX) $(QUIET_LN) ln -f $< $@ else # No hard links; copy gzip to gunzip gunzip$(PROG_SUFFIX):gzip$(PROG_SUFFIX) $(QUIET_CP) cp -f $< $@ endif DEFAULT_TARGETS += gunzip$(PROG_SUFFIX) # Rebuild if CC, PROG_CFLAGS, or CPPFLAGS changed .prog-cflags: FORCE @flags='$(CC):$(PROG_CFLAGS):$(CPPFLAGS)'; \ if [ "$$flags" != "`cat $@ 2>/dev/null`" ]; then \ [ -e $@ ] && echo "Rebuilding programs due to new compiler flags"; \ echo "$$flags" > $@; \ fi ############################################################################## all:$(DEFAULT_TARGETS) # Install the files. Note: not all versions of the 'install' program have the # '-D' and '-t' options, so don't use them; use portable commands only. install:all install -d $(DESTDIR)$(LIBDIR) $(DESTDIR)$(INCDIR) $(DESTDIR)$(BINDIR) install -m644 $(STATIC_LIB) $(DESTDIR)$(LIBDIR) install -m755 $(SHARED_LIB) $(DESTDIR)$(LIBDIR) ln -sf $(SHARED_LIB) $(DESTDIR)$(LIBDIR)/libdeflate.so install -m644 libdeflate.h $(DESTDIR)$(INCDIR) install -m755 gzip $(DESTDIR)$(BINDIR)/libdeflate-gzip ln -f $(DESTDIR)$(BINDIR)/libdeflate-gzip $(DESTDIR)$(BINDIR)/libdeflate-gunzip uninstall: rm -f $(DESTDIR)$(LIBDIR)/$(STATIC_LIB) \ $(DESTDIR)$(LIBDIR)/$(SHARED_LIB) \ $(DESTDIR)$(LIBDIR)/libdeflate.so \ $(DESTDIR)$(INCDIR)/libdeflate.h \ $(DESTDIR)$(BINDIR)/libdeflate-gzip \ $(DESTDIR)$(BINDIR)/libdeflate-gunzip test_programs:$(TEST_PROGRAMS) # A minimal 'make check' target. This only runs some quick tests; # use tools/run_tests.sh if you want to run the full tests. check:test_programs ./benchmark$(PROG_SUFFIX) < ./benchmark$(PROG_SUFFIX) for prog in test_*; do \ ./$$prog || exit 1; \ done help: @echo "Available targets:" @echo "------------------" @for target in $(DEFAULT_TARGETS) $(TEST_PROGRAMS); do \ echo -e "$$target"; \ done clean: rm -f *.a *.dll *.exe *.exp *.so \ lib/*.o lib/*/*.o \ lib/*.obj lib/*/*.obj \ lib/*.dllobj lib/*/*.dllobj \ programs/*.o programs/*.obj \ $(DEFAULT_TARGETS) $(TEST_PROGRAMS) programs/config.h \ libdeflate.lib libdeflate.def libdeflatestatic.lib \ .lib-cflags .prog-cflags realclean: clean rm -f tags cscope* run_tests.log FORCE: .PHONY: all install uninstall test_programs help clean realclean .DEFAULT_GOAL = all libdeflate-1.5/Makefile.msc000066400000000000000000000025461360172702500157050ustar00rootroot00000000000000# # Makefile for the Microsoft toolchain # # Usage: # nmake /f Makefile.msc # .SUFFIXES: .c .obj .dllobj CC = cl LD = link AR = lib CFLAGS = /MD /O2 -I. -Icommon LDFLAGS = STATIC_LIB = libdeflatestatic.lib SHARED_LIB = libdeflate.dll IMPORT_LIB = libdeflate.lib STATIC_LIB_OBJ = \ lib/aligned_malloc.obj \ lib/adler32.obj \ lib/crc32.obj \ lib/deflate_compress.obj \ lib/deflate_decompress.obj \ lib/gzip_compress.obj \ lib/gzip_decompress.obj \ lib/x86/cpu_features.obj \ lib/zlib_compress.obj \ lib/zlib_decompress.obj SHARED_LIB_OBJ = $(STATIC_LIB_OBJ:.obj=.dllobj) PROG_COMMON_OBJ = programs/prog_util.obj \ programs/tgetopt.obj \ $(STATIC_LIB) PROG_CFLAGS = $(CFLAGS) -Iprograms all: $(STATIC_LIB) $(SHARED_LIB) $(IMPORT_LIB) gzip.exe gunzip.exe .c.obj: $(CC) -c /Fo$@ $(CFLAGS) $** .c.dllobj: $(CC) -c /Fo$@ $(CFLAGS) /DLIBDEFLATE_DLL $** $(STATIC_LIB): $(STATIC_LIB_OBJ) $(AR) $(ARFLAGS) -out:$@ $(STATIC_LIB_OBJ) $(SHARED_LIB): $(SHARED_LIB_OBJ) $(LD) $(LDFLAGS) -out:$@ -dll -implib:$(IMPORT_LIB) $(SHARED_LIB_OBJ) $(IMPORT_LIB): $(SHARED_LIB) gzip.exe:programs/gzip.obj $(PROG_COMMON_OBJ) $(LD) $(LDFLAGS) -out:$@ $** gunzip.exe:gzip.exe copy $** $@ clean: -del *.dll *.exe *.exp libdeflate.lib libdeflatestatic.lib gzip.lib \ lib\*.obj lib\*\*.obj lib\*.dllobj lib\*\*.dllobj \ programs\*.obj 2>nul libdeflate-1.5/NEWS000066400000000000000000000121571360172702500141620ustar00rootroot00000000000000Version 1.5: Fixed up stdcall support on 32-bit Windows: the functions are now exported using both suffixed and non-suffixed names, and fixed libdeflate.h to be MSVC-compatible again. Version 1.4: The 32-bit Windows build of libdeflate now uses the "stdcall" calling convention instead of "cdecl". If you're calling libdeflate.dll directly from C or C++, you'll need to recompile your code. If you're calling it from another language, or calling it indirectly using LoadLibrary(), you'll need to update your code to use the stdcall calling convention. The Makefile now supports building libdeflate as a shared library (.dylib) on macOS. Fixed a bug where support for certain optimizations and optional features (file access hints and more precise timestamps) was incorrectly omitted when libdeflate was compiled with -Werror. Added 'make check' target to the Makefile. Added CI configuration files. Version 1.3: `make install` now supports customizing the directories into which binaries, headers, and libraries are installed. `make install` now installs into /usr/local by default. To change it, use e.g. `make install PREFIX=/usr`. `make install` now works on more platforms. The Makefile now supports overriding the optimization flags. The compression functions now correctly handle an output data buffer >= 4 GiB in size, and `gzip` and `gunzip` now correctly handle multi-gigabyte files (if enough memory is available). Version 1.2: Slight improvements to decompression speed. Added an AVX-512BW implementation of Adler-32. The Makefile now supports a user-specified installation PREFIX. Fixed build error with some Visual Studio versions. Version 1.1: Fixed crash in CRC-32 code when the prebuilt libdeflate for 32-bit Windows was called by a program built with Visual Studio. Improved the worst-case decompression speed of malicious data. Fixed build error when compiling for an ARM processor without hardware floating point support. Improved performance on the PowerPC64 architecture. Added soname to libdeflate.so, to make packaging easier. Added 'make install' target to the Makefile. The Makefile now supports user-specified CPPFLAGS. The Windows binary releases now include the import library for libdeflate.dll. libdeflate.lib is now the import library, and libdeflatestatic.lib is the static library. Version 1.0: Added support for multi-member gzip files. Moved architecture-specific code into subdirectories. If you aren't using the provided Makefile to build libdeflate, you now need to compile lib/*.c and lib/*/*.c instead of just lib/*.c. Added an ARM PMULL implementation of CRC-32, which speeds up gzip compression and decompression on 32-bit and 64-bit ARM processors that have the Cryptography Extensions. Improved detection of CPU features, resulting in accelerated functions being used in more cases. This includes: - Detect CPU features on 32-bit x86, not just 64-bit as was done previously. - Detect CPU features on ARM, both 32 and 64-bit. (Limited to Linux only currently.) Version 0.8: Build fixes for certain platforms and compilers. libdeflate now produces the same output on all CPU architectures. Improved documentation for building libdeflate on Windows. Version 0.7: Fixed a very rare bug that caused data to be compressed incorrectly. The bug affected compression levels 7 and below since libdeflate v0.2. Although there have been no user reports of the bug, and I believe it would have been highly unlikely to encounter on realistic data, it could occur on data specially crafted to reproduce it. Fixed a compilation error when building with clang 3.7. Version 0.6: Various improvements to the gzip program's behavior. Faster CRC-32 on AVX-capable processors. Other minor changes. Version 0.5: The CRC-32 checksum algorithm has been optimized with carryless multiplication instructions for x86_64 (PCLMUL). This speeds up gzip compression and decompression. Build fixes for certain platforms and compilers. Added more test programs and scripts. libdeflate is now entirely MIT-licensed. Version 0.4: The Adler-32 checksum algorithm has been optimized with vector instructions for x86_64 (SSE2 and AVX2) and ARM (NEON). This speeds up zlib compression and decompression. To avoid naming collisions, functions and definitions in libdeflate's API have been renamed to be prefixed with "libdeflate_" or "LIBDEFLATE_". Programs using the old API will need to be updated. Various bug fixes and other improvements. Version 0.3: Some bug fixes and other minor changes. Version 0.2: Implemented a new block splitting algorithm which typically improves the compression ratio slightly at all compression levels. The compressor now outputs each block using the cheapest type (dynamic Huffman, static Huffman, or uncompressed). The gzip program has received an overhaul and now behaves more like the standard version. Build system updates, including: some build options were changed and some build options were removed, and the default 'make' target now includes the gzip program as well as the library. Version 0.1: Initial official release. libdeflate-1.5/README.md000066400000000000000000000246661360172702500147520ustar00rootroot00000000000000[![Build Status](https://travis-ci.org/ebiggers/libdeflate.svg?branch=master)](https://travis-ci.org/ebiggers/libdeflate) # Overview libdeflate is a library for fast, whole-buffer DEFLATE-based compression and decompression. The supported formats are: - DEFLATE (raw) - zlib (a.k.a. DEFLATE with a zlib wrapper) - gzip (a.k.a. DEFLATE with a gzip wrapper) libdeflate is heavily optimized. It is significantly faster than the zlib library, both for compression and decompression, and especially on x86 processors. In addition, libdeflate provides optional high compression modes that provide a better compression ratio than the zlib's "level 9". libdeflate itself is a library, but the following command-line programs which use this library are also provided: * gzip (or gunzip), a program which mostly behaves like the standard equivalent, except that it does not yet have good streaming support and therefore does not yet support very large files * benchmark, a program for benchmarking in-memory compression and decompression # Building ## For UNIX Just run `make`, then (if desired) `make install`. You need GNU Make and either GCC or Clang. GCC is recommended because it builds slightly faster binaries. By default, the following targets are built: the static library `libdeflate.a`, the shared library `libdeflate.so`, the `gzip` program, and the `gunzip` program (which is actually just a hard link to `gzip`). Benchmarking and test programs such as `benchmark` are not built by default. You can run `make help` to display the available build targets. There are also many options which can be set on the `make` command line, e.g. to omit library features or to customize the directories into which `make install` installs files. See the Makefile for details. ## For Windows Prebuilt Windows binaries can be downloaded from https://github.com/ebiggers/libdeflate/releases. But if you need to build the binaries yourself, MinGW (gcc) is the recommended compiler to use. If you're performing the build *on* Windows (as opposed to cross-compiling for Windows on Linux, for example), you'll need to follow the directions in **one** of the two sections below to set up a minimal UNIX-compatible environment using either Cygwin or MSYS2, then do the build. (Other MinGW distributions may not work, as they often omit basic UNIX tools such as `sh`.) Alternatively, libdeflate may be built using the Visual Studio toolchain by running `nmake /f Makefile.msc`. However, while this is supported in the sense that it will produce working binaries, it is not recommended because the binaries built with MinGW will be significantly faster. Also note that 64-bit binaries are faster than 32-bit binaries and should be preferred whenever possible. ### Using Cygwin Run the Cygwin installer, available from https://cygwin.com/setup-x86_64.exe. When you get to the package selection screen, choose the following additional packages from category "Devel": - git - make - mingw64-i686-binutils - mingw64-i686-gcc-g++ - mingw64-x86_64-binutils - mingw64-x86_64-gcc-g++ (You may skip the mingw64-i686 packages if you don't need to build 32-bit binaries.) After the installation finishes, open a Cygwin terminal. Then download libdeflate's source code (if you haven't already) and `cd` into its directory: git clone https://github.com/ebiggers/libdeflate cd libdeflate (Note that it's not required to use `git`; an alternative is to extract a .zip or .tar.gz archive of the source code downloaded from the releases page. Also, in case you need to find it in the file browser, note that your home directory in Cygwin is usually located at `C:\cygwin64\home\`.) Then, to build 64-bit binaries: make CC=x86_64-w64-mingw32-gcc or to build 32-bit binaries: make CC=i686-w64-mingw32-gcc ### Using MSYS2 Run the MSYS2 installer, available from http://www.msys2.org/. After installing, open an MSYS2 shell and run: pacman -Syu Say `y`, then when it's finished, close the shell window and open a new one. Then run the same command again: pacman -Syu Then, install the packages needed to build libdeflate: pacman -S git \ make \ mingw-w64-i686-binutils \ mingw-w64-i686-gcc \ mingw-w64-x86_64-binutils \ mingw-w64-x86_64-gcc (You may skip the mingw-w64-i686 packages if you don't need to build 32-bit binaries.) Then download libdeflate's source code (if you haven't already): git clone https://github.com/ebiggers/libdeflate (Note that it's not required to use `git`; an alternative is to extract a .zip or .tar.gz archive of the source code downloaded from the releases page. Also, in case you need to find it in the file browser, note that your home directory in MSYS2 is usually located at `C:\msys64\home\`.) Then, to build 64-bit binaries, open "MSYS2 MinGW 64-bit" from the Start menu and run the following commands: cd libdeflate make clean make Or to build 32-bit binaries, do the same but use "MSYS2 MinGW 32-bit" instead. # API libdeflate has a simple API that is not zlib-compatible. You can create compressors and decompressors and use them to compress or decompress buffers. See libdeflate.h for details. There is currently no support for streaming. This has been considered, but it always significantly increases complexity and slows down fast paths. Unfortunately, at this point it remains a future TODO. So: if your application compresses data in "chunks", say, less than 1 MB in size, then libdeflate is a great choice for you; that's what it's designed to do. This is perfect for certain use cases such as transparent filesystem compression. But if your application compresses large files as a single compressed stream, similarly to the `gzip` program, then libdeflate isn't for you. Note that with chunk-based compression, you generally should have the uncompressed size of each chunk stored outside of the compressed data itself. This enables you to allocate an output buffer of the correct size without guessing. However, libdeflate's decompression routines do optionally provide the actual number of output bytes in case you need it. Windows developers: note that the calling convention of libdeflate.dll is "stdcall" -- the same as the Win32 API. If you call into libdeflate.dll using a non-C/C++ language, or dynamically using LoadLibrary(), make sure to use the stdcall convention. Using the wrong convention may crash your application. (Note: older versions of libdeflate used the "cdecl" convention instead.) # DEFLATE vs. zlib vs. gzip The DEFLATE format ([rfc1951](https://www.ietf.org/rfc/rfc1951.txt)), the zlib format ([rfc1950](https://www.ietf.org/rfc/rfc1950.txt)), and the gzip format ([rfc1952](https://www.ietf.org/rfc/rfc1952.txt)) are commonly confused with each other as well as with the [zlib software library](http://zlib.net), which actually supports all three formats. libdeflate (this library) also supports all three formats. Briefly, DEFLATE is a raw compressed stream, whereas zlib and gzip are different wrappers for this stream. Both zlib and gzip include checksums, but gzip can include extra information such as the original filename. Generally, you should choose a format as follows: - If you are compressing whole files with no subdivisions, similar to the `gzip` program, you probably should use the gzip format. - Otherwise, if you don't need the features of the gzip header and footer but do still want a checksum for corruption detection, you probably should use the zlib format. - Otherwise, you probably should use raw DEFLATE. This is ideal if you don't need checksums, e.g. because they're simply not needed for your use case or because you already compute your own checksums that are stored separately from the compressed stream. Note that gzip and zlib streams can be distinguished from each other based on their starting bytes, but this is not necessarily true of raw DEFLATE streams. # Compression levels An often-underappreciated fact of compression formats such as DEFLATE is that there are an enormous number of different ways that a given input could be compressed. Different algorithms and different amounts of computation time will result in different compression ratios, while remaining equally compatible with the decompressor. For this reason, the commonly used zlib library provides nine compression levels. Level 1 is the fastest but provides the worst compression; level 9 provides the best compression but is the slowest. It defaults to level 6. libdeflate uses this same design but is designed to improve on both zlib's performance *and* compression ratio at every compression level. In addition, libdeflate's levels go [up to 12](https://xkcd.com/670/) to make room for a minimum-cost-path based algorithm (sometimes called "optimal parsing") that can significantly improve on zlib's compression ratio. If you are using DEFLATE (or zlib, or gzip) in your application, you should test different levels to see which works best for your application. # Motivation Despite DEFLATE's widespread use mainly through the zlib library, in the compression community this format from the early 1990s is often considered obsolete. And in a few significant ways, it is. So why implement DEFLATE at all, instead of focusing entirely on bzip2/LZMA/xz/LZ4/LZX/ZSTD/Brotli/LZHAM/LZFSE/[insert cool new format here]? To do something better, you need to understand what came before. And it turns out that most ideas from DEFLATE are still relevant. Many of the newer formats share a similar structure as DEFLATE, with different tweaks. The effects of trivial but very useful tweaks, such as increasing the sliding window size, are often confused with the effects of nontrivial but less useful tweaks. And actually, many of these formats are similar enough that common algorithms and optimizations (e.g. those dealing with LZ77 matchfinding) can be reused. In addition, comparing compressors fairly is difficult because the performance of a compressor depends heavily on optimizations which are not intrinsic to the compression format itself. In this respect, the zlib library sometimes compares poorly to certain newer code because zlib is not well optimized for modern processors. libdeflate addresses this by providing an optimized DEFLATE implementation which can be used for benchmarking purposes. And, of course, real applications can use it as well. # License libdeflate is [MIT-licensed](COPYING). I am not aware of any patents or patent applications relevant to libdeflate. libdeflate-1.5/common/000077500000000000000000000000001360172702500147455ustar00rootroot00000000000000libdeflate-1.5/common/common_defs.h000066400000000000000000000243121360172702500174110ustar00rootroot00000000000000/* * common_defs.h * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #ifndef COMMON_COMMON_DEFS_H #define COMMON_COMMON_DEFS_H #ifdef __GNUC__ # include "compiler_gcc.h" #elif defined(_MSC_VER) # include "compiler_msc.h" #else # pragma message("Unrecognized compiler. Please add a header file for your compiler. Compilation will proceed, but performance may suffer!") #endif /* ========================================================================== */ /* Type definitions */ /* ========================================================================== */ #include /* size_t */ #ifndef __bool_true_false_are_defined # include /* bool */ #endif /* Fixed-width integer types */ #ifndef PRIu32 # include #endif typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; typedef uint64_t u64; typedef int8_t s8; typedef int16_t s16; typedef int32_t s32; typedef int64_t s64; /* * Word type of the target architecture. Use 'size_t' instead of 'unsigned * long' to account for platforms such as Windows that use 32-bit 'unsigned * long' on 64-bit architectures. */ typedef size_t machine_word_t; /* Number of bytes in a word */ #define WORDBYTES ((int)sizeof(machine_word_t)) /* Number of bits in a word */ #define WORDBITS (8 * WORDBYTES) /* ========================================================================== */ /* Optional compiler features */ /* ========================================================================== */ /* LIBEXPORT - export a function from a shared library */ #ifndef LIBEXPORT # define LIBEXPORT #endif /* inline - suggest that a function be inlined */ #ifndef inline # define inline #endif /* forceinline - force a function to be inlined, if possible */ #ifndef forceinline # define forceinline inline #endif /* restrict - annotate a non-aliased pointer */ #ifndef restrict # define restrict #endif /* likely(expr) - hint that an expression is usually true */ #ifndef likely # define likely(expr) (expr) #endif /* unlikely(expr) - hint that an expression is usually false */ #ifndef unlikely # define unlikely(expr) (expr) #endif /* prefetchr(addr) - prefetch into L1 cache for read */ #ifndef prefetchr # define prefetchr(addr) #endif /* prefetchw(addr) - prefetch into L1 cache for write */ #ifndef prefetchw # define prefetchw(addr) #endif /* Does the compiler support the 'target' function attribute? */ #ifndef COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE # define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE 0 #endif /* Which targets are supported with the 'target' function attribute? */ #ifndef COMPILER_SUPPORTS_BMI2_TARGET # define COMPILER_SUPPORTS_BMI2_TARGET 0 #endif #ifndef COMPILER_SUPPORTS_AVX_TARGET # define COMPILER_SUPPORTS_AVX_TARGET 0 #endif #ifndef COMPILER_SUPPORTS_AVX512BW_TARGET # define COMPILER_SUPPORTS_AVX512BW_TARGET 0 #endif /* * Which targets are supported with the 'target' function attribute and have * intrinsics that work within 'target'-ed functions? */ #ifndef COMPILER_SUPPORTS_SSE2_TARGET_INTRINSICS # define COMPILER_SUPPORTS_SSE2_TARGET_INTRINSICS 0 #endif #ifndef COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS # define COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS 0 #endif #ifndef COMPILER_SUPPORTS_AVX2_TARGET_INTRINSICS # define COMPILER_SUPPORTS_AVX2_TARGET_INTRINSICS 0 #endif #ifndef COMPILER_SUPPORTS_AVX512BW_TARGET_INTRINSICS # define COMPILER_SUPPORTS_AVX512BW_TARGET_INTRINSICS 0 #endif #ifndef COMPILER_SUPPORTS_NEON_TARGET_INTRINSICS # define COMPILER_SUPPORTS_NEON_TARGET_INTRINSICS 0 #endif #ifndef COMPILER_SUPPORTS_PMULL_TARGET_INTRINSICS # define COMPILER_SUPPORTS_PMULL_TARGET_INTRINSICS 0 #endif /* _aligned_attribute(n) - declare that the annotated variable, or variables of * the annotated type, are to be aligned on n-byte boundaries */ #ifndef _aligned_attribute #endif /* ========================================================================== */ /* Miscellaneous macros */ /* ========================================================================== */ #define ARRAY_LEN(A) (sizeof(A) / sizeof((A)[0])) #define MIN(a, b) ((a) <= (b) ? (a) : (b)) #define MAX(a, b) ((a) >= (b) ? (a) : (b)) #define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) #define STATIC_ASSERT(expr) ((void)sizeof(char[1 - 2 * !(expr)])) #define ALIGN(n, a) (((n) + (a) - 1) & ~((a) - 1)) /* ========================================================================== */ /* Endianness handling */ /* ========================================================================== */ /* * CPU_IS_LITTLE_ENDIAN() - a macro which evaluates to 1 if the CPU is little * endian or 0 if it is big endian. The macro should be defined in a way such * that the compiler can evaluate it at compilation time. If not defined, a * fallback is used. */ #ifndef CPU_IS_LITTLE_ENDIAN static forceinline int CPU_IS_LITTLE_ENDIAN(void) { union { unsigned int v; unsigned char b; } u; u.v = 1; return u.b; } #endif /* bswap16(n) - swap the bytes of a 16-bit integer */ #ifndef bswap16 static forceinline u16 bswap16(u16 n) { return (n << 8) | (n >> 8); } #endif /* bswap32(n) - swap the bytes of a 32-bit integer */ #ifndef bswap32 static forceinline u32 bswap32(u32 n) { return ((n & 0x000000FF) << 24) | ((n & 0x0000FF00) << 8) | ((n & 0x00FF0000) >> 8) | ((n & 0xFF000000) >> 24); } #endif /* bswap64(n) - swap the bytes of a 64-bit integer */ #ifndef bswap64 static forceinline u64 bswap64(u64 n) { return ((n & 0x00000000000000FF) << 56) | ((n & 0x000000000000FF00) << 40) | ((n & 0x0000000000FF0000) << 24) | ((n & 0x00000000FF000000) << 8) | ((n & 0x000000FF00000000) >> 8) | ((n & 0x0000FF0000000000) >> 24) | ((n & 0x00FF000000000000) >> 40) | ((n & 0xFF00000000000000) >> 56); } #endif #define le16_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap16(n)) #define le32_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap32(n)) #define le64_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? (n) : bswap64(n)) #define be16_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap16(n) : (n)) #define be32_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap32(n) : (n)) #define be64_bswap(n) (CPU_IS_LITTLE_ENDIAN() ? bswap64(n) : (n)) /* ========================================================================== */ /* Unaligned memory accesses */ /* ========================================================================== */ /* * UNALIGNED_ACCESS_IS_FAST should be defined to 1 if unaligned memory accesses * can be performed efficiently on the target platform. */ #ifndef UNALIGNED_ACCESS_IS_FAST # define UNALIGNED_ACCESS_IS_FAST 0 #endif /* * DEFINE_UNALIGNED_TYPE(type) - a macro that, given an integer type 'type', * defines load_type_unaligned(addr) and store_type_unaligned(v, addr) functions * which load and store variables of type 'type' from/to unaligned memory * addresses. If not defined, a fallback is used. */ #ifndef DEFINE_UNALIGNED_TYPE /* * Although memcpy() may seem inefficient, it *usually* gets optimized * appropriately by modern compilers. It's portable and may be the best we can * do for a fallback... */ #include #define DEFINE_UNALIGNED_TYPE(type) \ \ static forceinline type \ load_##type##_unaligned(const void *p) \ { \ type v; \ memcpy(&v, p, sizeof(v)); \ return v; \ } \ \ static forceinline void \ store_##type##_unaligned(type v, void *p) \ { \ memcpy(p, &v, sizeof(v)); \ } #endif /* !DEFINE_UNALIGNED_TYPE */ /* ========================================================================== */ /* Bit scan functions */ /* ========================================================================== */ /* * Bit Scan Reverse (BSR) - find the 0-based index (relative to the least * significant end) of the *most* significant 1 bit in the input value. The * input value must be nonzero! */ #ifndef bsr32 static forceinline unsigned bsr32(u32 n) { unsigned i = 0; while ((n >>= 1) != 0) i++; return i; } #endif #ifndef bsr64 static forceinline unsigned bsr64(u64 n) { unsigned i = 0; while ((n >>= 1) != 0) i++; return i; } #endif static forceinline unsigned bsrw(machine_word_t n) { STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); if (WORDBITS == 32) return bsr32(n); else return bsr64(n); } /* * Bit Scan Forward (BSF) - find the 0-based index (relative to the least * significant end) of the *least* significant 1 bit in the input value. The * input value must be nonzero! */ #ifndef bsf32 static forceinline unsigned bsf32(u32 n) { unsigned i = 0; while ((n & 1) == 0) { i++; n >>= 1; } return i; } #endif #ifndef bsf64 static forceinline unsigned bsf64(u64 n) { unsigned i = 0; while ((n & 1) == 0) { i++; n >>= 1; } return i; } #endif static forceinline unsigned bsfw(machine_word_t n) { STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); if (WORDBITS == 32) return bsf32(n); else return bsf64(n); } #endif /* COMMON_COMMON_DEFS_H */ libdeflate-1.5/common/compiler_gcc.h000066400000000000000000000122151360172702500175450ustar00rootroot00000000000000/* * compiler_gcc.h - definitions for the GNU C Compiler. This also handles clang * and the Intel C Compiler (icc). * * TODO: icc is not well tested, so some things are currently disabled even * though they maybe can be enabled on some icc versions. */ #if !defined(__clang__) && !defined(__INTEL_COMPILER) # define GCC_PREREQ(major, minor) \ (__GNUC__ > (major) || \ (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))) #else # define GCC_PREREQ(major, minor) 0 #endif /* Note: only check the clang version when absolutely necessary! * "Vendors" such as Apple can use different version numbers. */ #ifdef __clang__ # ifdef __apple_build_version__ # define CLANG_PREREQ(major, minor, apple_version) \ (__apple_build_version__ >= (apple_version)) # else # define CLANG_PREREQ(major, minor, apple_version) \ (__clang_major__ > (major) || \ (__clang_major__ == (major) && __clang_minor__ >= (minor))) # endif #else # define CLANG_PREREQ(major, minor, apple_version) 0 #endif #ifndef __has_attribute # define __has_attribute(attribute) 0 #endif #ifndef __has_feature # define __has_feature(feature) 0 #endif #ifndef __has_builtin # define __has_builtin(builtin) 0 #endif #ifdef _WIN32 # define LIBEXPORT __declspec(dllexport) #else # define LIBEXPORT __attribute__((visibility("default"))) #endif #define inline inline #define forceinline inline __attribute__((always_inline)) #define restrict __restrict__ #define likely(expr) __builtin_expect(!!(expr), 1) #define unlikely(expr) __builtin_expect(!!(expr), 0) #define prefetchr(addr) __builtin_prefetch((addr), 0) #define prefetchw(addr) __builtin_prefetch((addr), 1) #define _aligned_attribute(n) __attribute__((aligned(n))) #define COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE \ (GCC_PREREQ(4, 4) || __has_attribute(target)) #if COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE # if defined(__i386__) || defined(__x86_64__) # define COMPILER_SUPPORTS_PCLMUL_TARGET \ (GCC_PREREQ(4, 4) || __has_builtin(__builtin_ia32_pclmulqdq128)) # define COMPILER_SUPPORTS_AVX_TARGET \ (GCC_PREREQ(4, 6) || __has_builtin(__builtin_ia32_maxps256)) # define COMPILER_SUPPORTS_BMI2_TARGET \ (GCC_PREREQ(4, 7) || __has_builtin(__builtin_ia32_pdep_di)) # define COMPILER_SUPPORTS_AVX2_TARGET \ (GCC_PREREQ(4, 7) || __has_builtin(__builtin_ia32_psadbw256)) # define COMPILER_SUPPORTS_AVX512BW_TARGET \ (GCC_PREREQ(5, 1) || __has_builtin(__builtin_ia32_psadbw512)) /* * Prior to gcc 4.9 (r200349) and clang 3.8 (r239883), x86 intrinsics * not available in the main target could not be used in 'target' * attribute functions. Unfortunately clang has no feature test macro * for this so we have to check its version. */ # if GCC_PREREQ(4, 9) || CLANG_PREREQ(3, 8, 7030000) # define COMPILER_SUPPORTS_SSE2_TARGET_INTRINSICS 1 # define COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS \ COMPILER_SUPPORTS_PCLMUL_TARGET # define COMPILER_SUPPORTS_AVX2_TARGET_INTRINSICS \ COMPILER_SUPPORTS_AVX2_TARGET # define COMPILER_SUPPORTS_AVX512BW_TARGET_INTRINSICS \ COMPILER_SUPPORTS_AVX512BW_TARGET # endif # elif (defined(__arm__) && defined(__ARM_FP)) || defined(__aarch64__) /* arm: including arm_neon.h requires hardware fp support */ /* * Prior to gcc 6.1 (r230411 for arm, r226563 for aarch64), NEON * and crypto intrinsics not available in the main target could not be * used in 'target' attribute functions. * * clang as of 5.0.1 still doesn't allow it. But, it does seem to allow * the pmull intrinsics if only __ARM_NEON is enabled. */ # define COMPILER_SUPPORTS_NEON_TARGET_INTRINSICS GCC_PREREQ(6, 1) # ifdef __ARM_NEON # define COMPILER_SUPPORTS_PMULL_TARGET_INTRINSICS \ (GCC_PREREQ(6, 1) || __has_builtin(__builtin_neon_vmull_p64)) # else # define COMPILER_SUPPORTS_PMULL_TARGET_INTRINSICS \ (GCC_PREREQ(6, 1)) # endif # endif #endif /* COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE */ /* Newer gcc supports __BYTE_ORDER__. Older gcc doesn't. */ #ifdef __BYTE_ORDER__ # define CPU_IS_LITTLE_ENDIAN() (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) #endif #if GCC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16) # define bswap16 __builtin_bswap16 #endif #if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap32) # define bswap32 __builtin_bswap32 #endif #if GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap64) # define bswap64 __builtin_bswap64 #endif #if defined(__x86_64__) || defined(__i386__) || defined(__ARM_FEATURE_UNALIGNED) || defined(__powerpc64__) # define UNALIGNED_ACCESS_IS_FAST 1 #endif /* With gcc, we can access unaligned memory through 'packed' structures. */ #define DEFINE_UNALIGNED_TYPE(type) \ \ struct type##unaligned { \ type v; \ } __attribute__((packed)); \ \ static forceinline type \ load_##type##_unaligned(const void *p) \ { \ return ((const struct type##unaligned *)p)->v; \ } \ \ static forceinline void \ store_##type##_unaligned(type v, void *p) \ { \ ((struct type##unaligned *)p)->v = v; \ } #define bsr32(n) (31 - __builtin_clz(n)) #define bsr64(n) (63 - __builtin_clzll(n)) #define bsf32(n) __builtin_ctz(n) #define bsf64(n) __builtin_ctzll(n) libdeflate-1.5/common/compiler_msc.h000066400000000000000000000037211360172702500175750ustar00rootroot00000000000000/* * compiler_msc.h - definitions for the Microsoft C Compiler */ #define LIBEXPORT __declspec(dllexport) /* * Old versions (e.g. VS2010) of MSC don't have the C99 header stdbool.h. * Beware: the below replacement isn't fully standard, since normally any value * != 0 should be implicitly cast to a bool with value 1... but that doesn't * happen if bool is really just an 'int'. */ typedef int bool; #define true 1 #define false 0 #define __bool_true_false_are_defined 1 /* Define ssize_t */ #ifdef _WIN64 typedef long long ssize_t; #else typedef int ssize_t; #endif /* * Old versions (e.g. VS2010) of MSC have stdint.h but not the C99 header * inttypes.h. Work around this by defining the PRI* macros ourselves. */ #include #define PRIu8 "hhu" #define PRIu16 "hu" #define PRIu32 "u" #define PRIu64 "llu" #define PRIi8 "hhi" #define PRIi16 "hi" #define PRIi32 "i" #define PRIi64 "lli" #define PRIx8 "hhx" #define PRIx16 "hx" #define PRIx32 "x" #define PRIx64 "llx" /* Assume a little endian architecture with fast unaligned access */ #define CPU_IS_LITTLE_ENDIAN() 1 #define UNALIGNED_ACCESS_IS_FAST 1 /* __restrict has nonstandard behavior; don't use it */ #define restrict /* ... but we can use __inline and __forceinline */ #define inline __inline #define forceinline __forceinline /* Byte swap functions */ #include #define bswap16 _byteswap_ushort #define bswap32 _byteswap_ulong #define bswap64 _byteswap_uint64 /* Bit scan functions (32-bit) */ static forceinline unsigned bsr32(uint32_t n) { _BitScanReverse(&n, n); return n; } #define bsr32 bsr32 static forceinline unsigned bsf32(uint32_t n) { _BitScanForward(&n, n); return n; } #define bsf32 bsf32 #ifdef _M_X64 /* Bit scan functions (64-bit) */ static forceinline unsigned bsr64(uint64_t n) { _BitScanReverse64(&n, n); return n; } #define bsr64 bsr64 static forceinline unsigned bsf64(uint64_t n) { _BitScanForward64(&n, n); return n; } #define bsf64 bsf64 #endif /* _M_X64 */ libdeflate-1.5/lib/000077500000000000000000000000001360172702500142235ustar00rootroot00000000000000libdeflate-1.5/lib/adler32.c000066400000000000000000000070661360172702500156340ustar00rootroot00000000000000/* * adler32.c - Adler-32 checksum algorithm * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "lib_common.h" #include "libdeflate.h" /* The Adler-32 divisor, or "base", value. */ #define DIVISOR 65521 /* * MAX_CHUNK_SIZE is the most bytes that can be processed without the * possibility of s2 overflowing when it is represented as an unsigned 32-bit * integer. This value was computed using the following Python script: * * divisor = 65521 * count = 0 * s1 = divisor - 1 * s2 = divisor - 1 * while True: * s1 += 0xFF * s2 += s1 * if s2 > 0xFFFFFFFF: * break * count += 1 * print(count) * * Note that to get the correct worst-case value, we must assume that every byte * has value 0xFF and that s1 and s2 started with the highest possible values * modulo the divisor. */ #define MAX_CHUNK_SIZE 5552 typedef u32 (*adler32_func_t)(u32, const u8 *, size_t); /* Include architecture-specific implementations if available */ #undef DEFAULT_IMPL #undef DISPATCH #if defined(__arm__) || defined(__aarch64__) # include "arm/adler32_impl.h" #elif defined(__i386__) || defined(__x86_64__) # include "x86/adler32_impl.h" #endif /* Define a generic implementation if needed */ #ifndef DEFAULT_IMPL #define DEFAULT_IMPL adler32_generic static u32 adler32_generic(u32 adler, const u8 *p, size_t size) { u32 s1 = adler & 0xFFFF; u32 s2 = adler >> 16; const u8 * const end = p + size; while (p != end) { size_t chunk_size = MIN(end - p, MAX_CHUNK_SIZE); const u8 *chunk_end = p + chunk_size; size_t num_unrolled_iterations = chunk_size / 4; while (num_unrolled_iterations--) { s1 += *p++; s2 += s1; s1 += *p++; s2 += s1; s1 += *p++; s2 += s1; s1 += *p++; s2 += s1; } while (p != chunk_end) { s1 += *p++; s2 += s1; } s1 %= DIVISOR; s2 %= DIVISOR; } return (s2 << 16) | s1; } #endif /* !DEFAULT_IMPL */ #ifdef DISPATCH static u32 dispatch(u32, const u8 *, size_t); static volatile adler32_func_t adler32_impl = dispatch; /* Choose the fastest implementation at runtime */ static u32 dispatch(u32 adler, const u8 *buffer, size_t size) { adler32_func_t f = arch_select_adler32_func(); if (f == NULL) f = DEFAULT_IMPL; adler32_impl = f; return adler32_impl(adler, buffer, size); } #else # define adler32_impl DEFAULT_IMPL /* only one implementation, use it */ #endif LIBDEFLATEEXPORT u32 LIBDEFLATEAPI libdeflate_adler32(u32 adler, const void *buffer, size_t size) { if (buffer == NULL) /* return initial value */ return 1; return adler32_impl(adler, buffer, size); } libdeflate-1.5/lib/adler32_vec_template.h000066400000000000000000000075161360172702500203710ustar00rootroot00000000000000/* * adler32_vec_template.h - template for vectorized Adler-32 implementations * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ /* * This file contains a template for vectorized Adler-32 implementations. * * The inner loop between reductions modulo 65521 of an unvectorized Adler-32 * implementation looks something like this: * * do { * s1 += *p; * s2 += s1; * } while (++p != chunk_end); * * For vectorized calculation of s1, we only need to sum the input bytes. They * can be accumulated into multiple counters which are eventually summed * together. * * For vectorized calculation of s2, the basic idea is that for each iteration * that processes N bytes, we can perform the following vectorizable * calculation: * * s2 += N*byte_1 + (N-1)*byte_2 + (N-2)*byte_3 + ... + 1*byte_N * * Or, equivalently, we can sum the byte_1...byte_N for each iteration into N * separate counters, then do the multiplications by N...1 just once at the end * rather than once per iteration. * * Also, we must account for how previous bytes will affect s2 by doing the * following at beginning of each iteration: * * s2 += s1 * N * * Furthermore, like s1, "s2" can actually be multiple counters which are * eventually summed together. */ static u32 ATTRIBUTES FUNCNAME(u32 adler, const u8 *p, size_t size) { u32 s1 = adler & 0xFFFF; u32 s2 = adler >> 16; const u8 * const end = p + size; const u8 *vend; const size_t max_chunk_size = MIN(MAX_CHUNK_SIZE, IMPL_MAX_CHUNK_SIZE) - (MIN(MAX_CHUNK_SIZE, IMPL_MAX_CHUNK_SIZE) % IMPL_SEGMENT_SIZE); /* Process a byte at a time until the needed alignment is reached */ if (p != end && (uintptr_t)p % IMPL_ALIGNMENT) { do { s1 += *p++; s2 += s1; } while (p != end && (uintptr_t)p % IMPL_ALIGNMENT); s1 %= DIVISOR; s2 %= DIVISOR; } /* * Process "chunks" of bytes using vector instructions. Chunk sizes are * limited to MAX_CHUNK_SIZE, which guarantees that s1 and s2 never * overflow before being reduced modulo DIVISOR. For vector processing, * chunk sizes are also made evenly divisible by IMPL_SEGMENT_SIZE and * may be further limited to IMPL_MAX_CHUNK_SIZE. */ STATIC_ASSERT(IMPL_SEGMENT_SIZE % IMPL_ALIGNMENT == 0); vend = end - ((size_t)(end - p) % IMPL_SEGMENT_SIZE); while (p != vend) { size_t chunk_size = MIN((size_t)(vend - p), max_chunk_size); s2 += s1 * chunk_size; FUNCNAME_CHUNK((const void *)p, (const void *)(p + chunk_size), &s1, &s2); p += chunk_size; s1 %= DIVISOR; s2 %= DIVISOR; } /* Process any remaining bytes */ if (p != end) { do { s1 += *p++; s2 += s1; } while (p != end); s1 %= DIVISOR; s2 %= DIVISOR; } return (s2 << 16) | s1; } #undef FUNCNAME #undef FUNCNAME_CHUNK #undef ATTRIBUTES #undef IMPL_ALIGNMENT #undef IMPL_SEGMENT_SIZE #undef IMPL_MAX_CHUNK_SIZE libdeflate-1.5/lib/aligned_malloc.c000066400000000000000000000034521360172702500173250ustar00rootroot00000000000000/* * aligned_malloc.c - aligned memory allocation * * Originally public domain; changes after 2016-09-07 are copyrighted. * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ /* * This file provides portable aligned memory allocation functions that only * use malloc() and free(). This avoids portability problems with * posix_memalign(), aligned_alloc(), etc. */ #include #include "aligned_malloc.h" void * aligned_malloc(size_t alignment, size_t size) { void *ptr = malloc(sizeof(void *) + alignment - 1 + size); if (ptr) { void *orig_ptr = ptr; ptr = (void *)ALIGN((uintptr_t)ptr + sizeof(void *), alignment); ((void **)ptr)[-1] = orig_ptr; } return ptr; } void aligned_free(void *ptr) { if (ptr) free(((void **)ptr)[-1]); } libdeflate-1.5/lib/aligned_malloc.h000066400000000000000000000004201360172702500173220ustar00rootroot00000000000000/* * aligned_malloc.c - aligned memory allocation */ #ifndef LIB_ALIGNED_MALLOC_H #define LIB_ALIGNED_MALLOC_H #include "lib_common.h" extern void *aligned_malloc(size_t alignment, size_t size); extern void aligned_free(void *ptr); #endif /* LIB_ALIGNED_MALLOC_H */ libdeflate-1.5/lib/arm/000077500000000000000000000000001360172702500150025ustar00rootroot00000000000000libdeflate-1.5/lib/arm/adler32_impl.h000066400000000000000000000105131360172702500174300ustar00rootroot00000000000000/* * arm/adler32_impl.h - ARM implementations of Adler-32 checksum algorithm * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "cpu_features.h" /* NEON implementation */ #undef DISPATCH_NEON #if !defined(DEFAULT_IMPL) && \ (defined(__ARM_NEON) || (ARM_CPU_FEATURES_ENABLED && \ COMPILER_SUPPORTS_NEON_TARGET_INTRINSICS)) # define FUNCNAME adler32_neon # define FUNCNAME_CHUNK adler32_neon_chunk # define IMPL_ALIGNMENT 16 # define IMPL_SEGMENT_SIZE 32 /* Prevent unsigned overflow of the 16-bit precision byte counters */ # define IMPL_MAX_CHUNK_SIZE (32 * (0xFFFF / 0xFF)) # ifdef __ARM_NEON # define ATTRIBUTES # define DEFAULT_IMPL adler32_neon # else # ifdef __arm__ # define ATTRIBUTES __attribute__((target("fpu=neon"))) # else # define ATTRIBUTES __attribute__((target("+simd"))) # endif # define DISPATCH 1 # define DISPATCH_NEON 1 # endif # include static forceinline ATTRIBUTES void adler32_neon_chunk(const uint8x16_t *p, const uint8x16_t * const end, u32 *s1, u32 *s2) { uint32x4_t v_s1 = (uint32x4_t) { 0, 0, 0, 0 }; uint32x4_t v_s2 = (uint32x4_t) { 0, 0, 0, 0 }; uint16x8_t v_byte_sums_a = (uint16x8_t) { 0, 0, 0, 0, 0, 0, 0, 0 }; uint16x8_t v_byte_sums_b = (uint16x8_t) { 0, 0, 0, 0, 0, 0, 0, 0 }; uint16x8_t v_byte_sums_c = (uint16x8_t) { 0, 0, 0, 0, 0, 0, 0, 0 }; uint16x8_t v_byte_sums_d = (uint16x8_t) { 0, 0, 0, 0, 0, 0, 0, 0 }; do { const uint8x16_t bytes1 = *p++; const uint8x16_t bytes2 = *p++; uint16x8_t tmp; v_s2 += v_s1; /* Vector Pairwise Add Long (u8 => u16) */ tmp = vpaddlq_u8(bytes1); /* Vector Pairwise Add and Accumulate Long (u8 => u16) */ tmp = vpadalq_u8(tmp, bytes2); /* Vector Pairwise Add and Accumulate Long (u16 => u32) */ v_s1 = vpadalq_u16(v_s1, tmp); /* Vector Add Wide (u8 => u16) */ v_byte_sums_a = vaddw_u8(v_byte_sums_a, vget_low_u8(bytes1)); v_byte_sums_b = vaddw_u8(v_byte_sums_b, vget_high_u8(bytes1)); v_byte_sums_c = vaddw_u8(v_byte_sums_c, vget_low_u8(bytes2)); v_byte_sums_d = vaddw_u8(v_byte_sums_d, vget_high_u8(bytes2)); } while (p != end); /* Vector Shift Left (u32) */ v_s2 = vqshlq_n_u32(v_s2, 5); /* Vector Multiply Accumulate Long (u16 => u32) */ v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_a), (uint16x4_t) { 32, 31, 30, 29 }); v_s2 = vmlal_u16(v_s2, vget_high_u16(v_byte_sums_a), (uint16x4_t) { 28, 27, 26, 25 }); v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_b), (uint16x4_t) { 24, 23, 22, 21 }); v_s2 = vmlal_u16(v_s2, vget_high_u16(v_byte_sums_b), (uint16x4_t) { 20, 19, 18, 17 }); v_s2 = vmlal_u16(v_s2, vget_low_u16(v_byte_sums_c), (uint16x4_t) { 16, 15, 14, 13 }); v_s2 = vmlal_u16(v_s2, vget_high_u16(v_byte_sums_c), (uint16x4_t) { 12, 11, 10, 9 }); v_s2 = vmlal_u16(v_s2, vget_low_u16 (v_byte_sums_d), (uint16x4_t) { 8, 7, 6, 5 }); v_s2 = vmlal_u16(v_s2, vget_high_u16(v_byte_sums_d), (uint16x4_t) { 4, 3, 2, 1 }); *s1 += v_s1[0] + v_s1[1] + v_s1[2] + v_s1[3]; *s2 += v_s2[0] + v_s2[1] + v_s2[2] + v_s2[3]; } # include "../adler32_vec_template.h" #endif /* NEON implementation */ #ifdef DISPATCH static inline adler32_func_t arch_select_adler32_func(void) { u32 features = get_cpu_features(); #ifdef DISPATCH_NEON if (features & ARM_CPU_FEATURE_NEON) return adler32_neon; #endif return NULL; } #endif /* DISPATCH */ libdeflate-1.5/lib/arm/cpu_features.c000066400000000000000000000062431360172702500176400ustar00rootroot00000000000000/* * arm/cpu_features.c - feature detection for ARM processors * * Copyright 2018 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ /* * ARM processors don't have a standard way for unprivileged programs to detect * processor features. But, on Linux we can read the AT_HWCAP and AT_HWCAP2 * values from /proc/self/auxv. * * Ideally we'd use the C library function getauxval(), but it's not guaranteed * to be available: it was only added to glibc in 2.16, and in Android it was * added to API level 18 for ARM and level 21 for AArch64. */ #include "cpu_features.h" #if ARM_CPU_FEATURES_ENABLED #include #include #include #include #define AT_HWCAP 16 #define AT_HWCAP2 26 volatile u32 _cpu_features = 0; static void scan_auxv(unsigned long *hwcap, unsigned long *hwcap2) { int fd; unsigned long auxbuf[32]; int filled = 0; int i; fd = open("/proc/self/auxv", O_RDONLY); if (fd < 0) return; for (;;) { do { int ret = read(fd, &((char *)auxbuf)[filled], sizeof(auxbuf) - filled); if (ret <= 0) { if (ret < 0 && errno == EINTR) continue; goto out; } filled += ret; } while (filled < 2 * sizeof(long)); i = 0; do { unsigned long type = auxbuf[i]; unsigned long value = auxbuf[i + 1]; if (type == AT_HWCAP) *hwcap = value; else if (type == AT_HWCAP2) *hwcap2 = value; i += 2; filled -= 2 * sizeof(long); } while (filled >= 2 * sizeof(long)); memmove(auxbuf, &auxbuf[i], filled); } out: close(fd); } void setup_cpu_features(void) { u32 features = 0; unsigned long hwcap = 0; unsigned long hwcap2 = 0; scan_auxv(&hwcap, &hwcap2); #ifdef __arm__ STATIC_ASSERT(sizeof(long) == 4); if (hwcap & (1 << 12)) /* HWCAP_NEON */ features |= ARM_CPU_FEATURE_NEON; if (hwcap2 & (1 << 1)) /* HWCAP2_PMULL */ features |= ARM_CPU_FEATURE_PMULL; #else STATIC_ASSERT(sizeof(long) == 8); if (hwcap & (1 << 1)) /* HWCAP_ASIMD */ features |= ARM_CPU_FEATURE_NEON; if (hwcap & (1 << 4)) /* HWCAP_PMULL */ features |= ARM_CPU_FEATURE_PMULL; #endif _cpu_features = features | ARM_CPU_FEATURES_KNOWN; } #endif /* ARM_CPU_FEATURES_ENABLED */ libdeflate-1.5/lib/arm/cpu_features.h000066400000000000000000000014311360172702500176370ustar00rootroot00000000000000/* * arm/cpu_features.h - feature detection for ARM processors */ #ifndef LIB_ARM_CPU_FEATURES_H #define LIB_ARM_CPU_FEATURES_H #include "../lib_common.h" #if (defined(__arm__) || defined(__aarch64__)) && \ defined(__linux__) && COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE # define ARM_CPU_FEATURES_ENABLED 1 #else # define ARM_CPU_FEATURES_ENABLED 0 #endif #if ARM_CPU_FEATURES_ENABLED #define ARM_CPU_FEATURE_NEON 0x00000001 #define ARM_CPU_FEATURE_PMULL 0x00000002 #define ARM_CPU_FEATURES_KNOWN 0x80000000 extern volatile u32 _cpu_features; extern void setup_cpu_features(void); static inline u32 get_cpu_features(void) { if (_cpu_features == 0) setup_cpu_features(); return _cpu_features; } #endif /* ARM_CPU_FEATURES_ENABLED */ #endif /* LIB_ARM_CPU_FEATURES_H */ libdeflate-1.5/lib/arm/crc32_impl.h000066400000000000000000000123421360172702500171120ustar00rootroot00000000000000/* * arm/crc32_impl.h * * Copyright 2017 Jun He * Copyright 2018 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "cpu_features.h" /* * CRC-32 folding with ARM Crypto extension-PMULL * * This works the same way as the x86 PCLMUL version. * See x86/crc32_pclmul_template.h for an explanation. */ #undef DISPATCH_PMULL #if (defined(__ARM_FEATURE_CRYPTO) || \ (ARM_CPU_FEATURES_ENABLED && \ COMPILER_SUPPORTS_PMULL_TARGET_INTRINSICS)) && \ /* not yet tested on big endian, probably needs changes to work there */ \ (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && \ /* clang as of v5.0.1 doesn't allow pmull intrinsics in 32-bit mode, even * when compiling with -mfpu=crypto-neon-fp-armv8 */ \ !(defined(__clang__) && defined(__arm__)) # define FUNCNAME crc32_pmull # define FUNCNAME_ALIGNED crc32_pmull_aligned # ifdef __ARM_FEATURE_CRYPTO # define ATTRIBUTES # define DEFAULT_IMPL crc32_pmull # else # ifdef __arm__ # define ATTRIBUTES __attribute__((target("fpu=crypto-neon-fp-armv8"))) # else # ifdef __clang__ # define ATTRIBUTES __attribute__((target("crypto"))) # else # define ATTRIBUTES __attribute__((target("+crypto"))) # endif # endif # define DISPATCH 1 # define DISPATCH_PMULL 1 # endif #include static forceinline ATTRIBUTES uint8x16_t clmul_00(uint8x16_t a, uint8x16_t b) { return (uint8x16_t)vmull_p64((poly64_t)vget_low_u8(a), (poly64_t)vget_low_u8(b)); } static forceinline ATTRIBUTES uint8x16_t clmul_10(uint8x16_t a, uint8x16_t b) { return (uint8x16_t)vmull_p64((poly64_t)vget_low_u8(a), (poly64_t)vget_high_u8(b)); } static forceinline ATTRIBUTES uint8x16_t clmul_11(uint8x16_t a, uint8x16_t b) { return (uint8x16_t)vmull_high_p64((poly64x2_t)a, (poly64x2_t)b); } static forceinline ATTRIBUTES uint8x16_t fold_128b(uint8x16_t dst, uint8x16_t src, uint8x16_t multipliers) { return dst ^ clmul_00(src, multipliers) ^ clmul_11(src, multipliers); } static forceinline ATTRIBUTES u32 crc32_pmull_aligned(u32 remainder, const uint8x16_t *p, size_t nr_segs) { /* Constants precomputed by gen_crc32_multipliers.c. Do not edit! */ const uint8x16_t multipliers_4 = (uint8x16_t)(uint64x2_t){ 0x8F352D95, 0x1D9513D7 }; const uint8x16_t multipliers_1 = (uint8x16_t)(uint64x2_t){ 0xAE689191, 0xCCAA009E }; const uint8x16_t final_multiplier = (uint8x16_t)(uint64x2_t){ 0xB8BC6765 }; const uint8x16_t mask32 = (uint8x16_t)(uint32x4_t){ 0xFFFFFFFF }; const uint8x16_t barrett_reduction_constants = (uint8x16_t)(uint64x2_t){ 0x00000001F7011641, 0x00000001DB710641 }; const uint8x16_t zeroes = (uint8x16_t){ 0 }; const uint8x16_t * const end = p + nr_segs; const uint8x16_t * const end512 = p + (nr_segs & ~3); uint8x16_t x0, x1, x2, x3; x0 = *p++ ^ (uint8x16_t)(uint32x4_t){ remainder }; if (nr_segs >= 4) { x1 = *p++; x2 = *p++; x3 = *p++; /* Fold 512 bits at a time */ while (p != end512) { x0 = fold_128b(*p++, x0, multipliers_4); x1 = fold_128b(*p++, x1, multipliers_4); x2 = fold_128b(*p++, x2, multipliers_4); x3 = fold_128b(*p++, x3, multipliers_4); } /* Fold 512 bits => 128 bits */ x1 = fold_128b(x1, x0, multipliers_1); x2 = fold_128b(x2, x1, multipliers_1); x0 = fold_128b(x3, x2, multipliers_1); } /* Fold 128 bits at a time */ while (p != end) x0 = fold_128b(*p++, x0, multipliers_1); /* Fold 128 => 96 bits, implicitly appending 32 zeroes */ x0 = vextq_u8(x0, zeroes, 8) ^ clmul_10(x0, multipliers_1); /* Fold 96 => 64 bits */ x0 = vextq_u8(x0, zeroes, 4) ^ clmul_00(x0 & mask32, final_multiplier); /* Reduce 64 => 32 bits using Barrett reduction */ x1 = x0; x0 = clmul_00(x0 & mask32, barrett_reduction_constants); x0 = clmul_10(x0 & mask32, barrett_reduction_constants); return vgetq_lane_u32((uint32x4_t)(x0 ^ x1), 1); } #define IMPL_ALIGNMENT 16 #define IMPL_SEGMENT_SIZE 16 #include "../crc32_vec_template.h" #endif /* PMULL implementation */ #ifdef DISPATCH static inline crc32_func_t arch_select_crc32_func(void) { u32 features = get_cpu_features(); #ifdef DISPATCH_PMULL if (features & ARM_CPU_FEATURE_PMULL) return crc32_pmull; #endif return NULL; } #endif /* DISPATCH */ libdeflate-1.5/lib/arm/matchfinder_impl.h000066400000000000000000000054051360172702500204640ustar00rootroot00000000000000/* * arm/matchfinder_impl.h - ARM implementations of matchfinder functions * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #ifdef __ARM_NEON # if MATCHFINDER_ALIGNMENT < 16 # undef MATCHFINDER_ALIGNMENT # define MATCHFINDER_ALIGNMENT 16 # endif # include static forceinline bool matchfinder_init_neon(mf_pos_t *data, size_t size) { int16x8_t v, *p; size_t n; if (size % (sizeof(int16x8_t) * 4) != 0) return false; STATIC_ASSERT(sizeof(mf_pos_t) == 2); v = (int16x8_t) { MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, MATCHFINDER_INITVAL, }; p = (int16x8_t *)data; n = size / (sizeof(int16x8_t) * 4); do { p[0] = v; p[1] = v; p[2] = v; p[3] = v; p += 4; } while (--n); return true; } #undef arch_matchfinder_init #define arch_matchfinder_init matchfinder_init_neon static forceinline bool matchfinder_rebase_neon(mf_pos_t *data, size_t size) { int16x8_t v, *p; size_t n; if (size % (sizeof(int16x8_t) * 4) != 0) return false; STATIC_ASSERT(sizeof(mf_pos_t) == 2); v = (int16x8_t) { (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE, (u16)-MATCHFINDER_WINDOW_SIZE, }; p = (int16x8_t *)data; n = size / (sizeof(int16x8_t) * 4); do { p[0] = vqaddq_s16(p[0], v); p[1] = vqaddq_s16(p[1], v); p[2] = vqaddq_s16(p[2], v); p[3] = vqaddq_s16(p[3], v); p += 4; } while (--n); return true; } #undef arch_matchfinder_rebase #define arch_matchfinder_rebase matchfinder_rebase_neon #endif /* __ARM_NEON */ libdeflate-1.5/lib/bt_matchfinder.h000066400000000000000000000273641360172702500173610ustar00rootroot00000000000000/* * bt_matchfinder.h - Lempel-Ziv matchfinding with a hash table of binary trees * * Originally public domain; changes after 2016-09-07 are copyrighted. * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * ---------------------------------------------------------------------------- * * This is a Binary Trees (bt) based matchfinder. * * The main data structure is a hash table where each hash bucket contains a * binary tree of sequences whose first 4 bytes share the same hash code. Each * sequence is identified by its starting position in the input buffer. Each * binary tree is always sorted such that each left child represents a sequence * lexicographically lesser than its parent and each right child represents a * sequence lexicographically greater than its parent. * * The algorithm processes the input buffer sequentially. At each byte * position, the hash code of the first 4 bytes of the sequence beginning at * that position (the sequence being matched against) is computed. This * identifies the hash bucket to use for that position. Then, a new binary tree * node is created to represent the current sequence. Then, in a single tree * traversal, the hash bucket's binary tree is searched for matches and is * re-rooted at the new node. * * Compared to the simpler algorithm that uses linked lists instead of binary * trees (see hc_matchfinder.h), the binary tree version gains more information * at each node visitation. Ideally, the binary tree version will examine only * 'log(n)' nodes to find the same matches that the linked list version will * find by examining 'n' nodes. In addition, the binary tree version can * examine fewer bytes at each node by taking advantage of the common prefixes * that result from the sort order, whereas the linked list version may have to * examine up to the full length of the match at each node. * * However, it is not always best to use the binary tree version. It requires * nearly twice as much memory as the linked list version, and it takes time to * keep the binary trees sorted, even at positions where the compressor does not * need matches. Generally, when doing fast compression on small buffers, * binary trees are the wrong approach. They are best suited for thorough * compression and/or large buffers. * * ---------------------------------------------------------------------------- */ #include "matchfinder_common.h" #define BT_MATCHFINDER_HASH3_ORDER 16 #define BT_MATCHFINDER_HASH3_WAYS 2 #define BT_MATCHFINDER_HASH4_ORDER 16 #define BT_MATCHFINDER_TOTAL_HASH_LENGTH \ ((1UL << BT_MATCHFINDER_HASH3_ORDER) * BT_MATCHFINDER_HASH3_WAYS + \ (1UL << BT_MATCHFINDER_HASH4_ORDER)) /* Representation of a match found by the bt_matchfinder */ struct lz_match { /* The number of bytes matched. */ u16 length; /* The offset back from the current position that was matched. */ u16 offset; }; struct bt_matchfinder { /* The hash table for finding length 3 matches */ mf_pos_t hash3_tab[1UL << BT_MATCHFINDER_HASH3_ORDER][BT_MATCHFINDER_HASH3_WAYS]; /* The hash table which contains the roots of the binary trees for * finding length 4+ matches */ mf_pos_t hash4_tab[1UL << BT_MATCHFINDER_HASH4_ORDER]; /* The child node references for the binary trees. The left and right * children of the node for the sequence with position 'pos' are * 'child_tab[pos * 2]' and 'child_tab[pos * 2 + 1]', respectively. */ mf_pos_t child_tab[2UL * MATCHFINDER_WINDOW_SIZE]; } #ifdef _aligned_attribute _aligned_attribute(MATCHFINDER_ALIGNMENT) #endif ; /* Prepare the matchfinder for a new input buffer. */ static forceinline void bt_matchfinder_init(struct bt_matchfinder *mf) { matchfinder_init((mf_pos_t *)mf, BT_MATCHFINDER_TOTAL_HASH_LENGTH); } static forceinline void bt_matchfinder_slide_window(struct bt_matchfinder *mf) { matchfinder_rebase((mf_pos_t *)mf, sizeof(struct bt_matchfinder) / sizeof(mf_pos_t)); } static forceinline mf_pos_t * bt_left_child(struct bt_matchfinder *mf, s32 node) { return &mf->child_tab[2 * (node & (MATCHFINDER_WINDOW_SIZE - 1)) + 0]; } static forceinline mf_pos_t * bt_right_child(struct bt_matchfinder *mf, s32 node) { return &mf->child_tab[2 * (node & (MATCHFINDER_WINDOW_SIZE - 1)) + 1]; } /* The minimum permissible value of 'max_len' for bt_matchfinder_get_matches() * and bt_matchfinder_skip_position(). There must be sufficiently many bytes * remaining to load a 32-bit integer from the *next* position. */ #define BT_MATCHFINDER_REQUIRED_NBYTES 5 /* Advance the binary tree matchfinder by one byte, optionally recording * matches. @record_matches should be a compile-time constant. */ static forceinline struct lz_match * bt_matchfinder_advance_one_byte(struct bt_matchfinder * const restrict mf, const u8 * const restrict in_base, const ptrdiff_t cur_pos, const u32 max_len, const u32 nice_len, const u32 max_search_depth, u32 * const restrict next_hashes, u32 * const restrict best_len_ret, struct lz_match * restrict lz_matchptr, const bool record_matches) { const u8 *in_next = in_base + cur_pos; u32 depth_remaining = max_search_depth; const s32 cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE; u32 next_hashseq; u32 hash3; u32 hash4; s32 cur_node; #if BT_MATCHFINDER_HASH3_WAYS >= 2 s32 cur_node_2; #endif const u8 *matchptr; mf_pos_t *pending_lt_ptr, *pending_gt_ptr; u32 best_lt_len, best_gt_len; u32 len; u32 best_len = 3; STATIC_ASSERT(BT_MATCHFINDER_HASH3_WAYS >= 1 && BT_MATCHFINDER_HASH3_WAYS <= 2); next_hashseq = get_unaligned_le32(in_next + 1); hash3 = next_hashes[0]; hash4 = next_hashes[1]; next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, BT_MATCHFINDER_HASH3_ORDER); next_hashes[1] = lz_hash(next_hashseq, BT_MATCHFINDER_HASH4_ORDER); prefetchw(&mf->hash3_tab[next_hashes[0]]); prefetchw(&mf->hash4_tab[next_hashes[1]]); cur_node = mf->hash3_tab[hash3][0]; mf->hash3_tab[hash3][0] = cur_pos; #if BT_MATCHFINDER_HASH3_WAYS >= 2 cur_node_2 = mf->hash3_tab[hash3][1]; mf->hash3_tab[hash3][1] = cur_node; #endif if (record_matches && cur_node > cutoff) { u32 seq3 = load_u24_unaligned(in_next); if (seq3 == load_u24_unaligned(&in_base[cur_node])) { lz_matchptr->length = 3; lz_matchptr->offset = in_next - &in_base[cur_node]; lz_matchptr++; } #if BT_MATCHFINDER_HASH3_WAYS >= 2 else if (cur_node_2 > cutoff && seq3 == load_u24_unaligned(&in_base[cur_node_2])) { lz_matchptr->length = 3; lz_matchptr->offset = in_next - &in_base[cur_node_2]; lz_matchptr++; } #endif } cur_node = mf->hash4_tab[hash4]; mf->hash4_tab[hash4] = cur_pos; pending_lt_ptr = bt_left_child(mf, cur_pos); pending_gt_ptr = bt_right_child(mf, cur_pos); if (cur_node <= cutoff) { *pending_lt_ptr = MATCHFINDER_INITVAL; *pending_gt_ptr = MATCHFINDER_INITVAL; *best_len_ret = best_len; return lz_matchptr; } best_lt_len = 0; best_gt_len = 0; len = 0; for (;;) { matchptr = &in_base[cur_node]; if (matchptr[len] == in_next[len]) { len = lz_extend(in_next, matchptr, len + 1, max_len); if (!record_matches || len > best_len) { if (record_matches) { best_len = len; lz_matchptr->length = len; lz_matchptr->offset = in_next - matchptr; lz_matchptr++; } if (len >= nice_len) { *pending_lt_ptr = *bt_left_child(mf, cur_node); *pending_gt_ptr = *bt_right_child(mf, cur_node); *best_len_ret = best_len; return lz_matchptr; } } } if (matchptr[len] < in_next[len]) { *pending_lt_ptr = cur_node; pending_lt_ptr = bt_right_child(mf, cur_node); cur_node = *pending_lt_ptr; best_lt_len = len; if (best_gt_len < len) len = best_gt_len; } else { *pending_gt_ptr = cur_node; pending_gt_ptr = bt_left_child(mf, cur_node); cur_node = *pending_gt_ptr; best_gt_len = len; if (best_lt_len < len) len = best_lt_len; } if (cur_node <= cutoff || !--depth_remaining) { *pending_lt_ptr = MATCHFINDER_INITVAL; *pending_gt_ptr = MATCHFINDER_INITVAL; *best_len_ret = best_len; return lz_matchptr; } } } /* * Retrieve a list of matches with the current position. * * @mf * The matchfinder structure. * @in_base * Pointer to the next byte in the input buffer to process _at the last * time bt_matchfinder_init() or bt_matchfinder_slide_window() was called_. * @cur_pos * The current position in the input buffer relative to @in_base (the * position of the sequence being matched against). * @max_len * The maximum permissible match length at this position. Must be >= * BT_MATCHFINDER_REQUIRED_NBYTES. * @nice_len * Stop searching if a match of at least this length is found. * Must be <= @max_len. * @max_search_depth * Limit on the number of potential matches to consider. Must be >= 1. * @next_hashes * The precomputed hash codes for the sequence beginning at @in_next. * These will be used and then updated with the precomputed hashcodes for * the sequence beginning at @in_next + 1. * @best_len_ret * If a match of length >= 4 was found, then the length of the longest such * match is written here; otherwise 3 is written here. (Note: this is * redundant with the 'struct lz_match' array, but this is easier for the * compiler to optimize when inlined and the caller immediately does a * check against 'best_len'.) * @lz_matchptr * An array in which this function will record the matches. The recorded * matches will be sorted by strictly increasing length and (non-strictly) * increasing offset. The maximum number of matches that may be found is * 'nice_len - 2'. * * The return value is a pointer to the next available slot in the @lz_matchptr * array. (If no matches were found, this will be the same as @lz_matchptr.) */ static forceinline struct lz_match * bt_matchfinder_get_matches(struct bt_matchfinder *mf, const u8 *in_base, ptrdiff_t cur_pos, u32 max_len, u32 nice_len, u32 max_search_depth, u32 next_hashes[2], u32 *best_len_ret, struct lz_match *lz_matchptr) { return bt_matchfinder_advance_one_byte(mf, in_base, cur_pos, max_len, nice_len, max_search_depth, next_hashes, best_len_ret, lz_matchptr, true); } /* * Advance the matchfinder, but don't record any matches. * * This is very similar to bt_matchfinder_get_matches() because both functions * must do hashing and tree re-rooting. */ static forceinline void bt_matchfinder_skip_position(struct bt_matchfinder *mf, const u8 *in_base, ptrdiff_t cur_pos, u32 nice_len, u32 max_search_depth, u32 next_hashes[2]) { u32 best_len; bt_matchfinder_advance_one_byte(mf, in_base, cur_pos, nice_len, nice_len, max_search_depth, next_hashes, &best_len, NULL, false); } libdeflate-1.5/lib/crc32.c000066400000000000000000000243771360172702500153200ustar00rootroot00000000000000/* * crc32.c - CRC-32 checksum algorithm for the gzip format * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ /* * High-level description of CRC * ============================= * * Consider a bit sequence 'bits[1...len]'. Interpret 'bits' as the "message" * polynomial M(x) with coefficients in GF(2) (the field of integers modulo 2), * where the coefficient of 'x^i' is 'bits[len - i]'. Then, compute: * * R(x) = M(x)*x^n mod G(x) * * where G(x) is a selected "generator" polynomial of degree 'n'. The remainder * R(x) is a polynomial of max degree 'n - 1'. The CRC of 'bits' is R(x) * interpreted as a bitstring of length 'n'. * * CRC used in gzip * ================ * * In the gzip format (RFC 1952): * * - The bitstring to checksum is formed from the bytes of the uncompressed * data by concatenating the bits from the bytes in order, proceeding * from the low-order bit to the high-order bit within each byte. * * - The generator polynomial G(x) is: x^32 + x^26 + x^23 + x^22 + x^16 + * x^12 + x^11 + x^10 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1. * Consequently, the CRC length is 32 bits ("CRC-32"). * * - The highest order 32 coefficients of M(x)*x^n are inverted. * * - All 32 coefficients of R(x) are inverted. * * The two inversions cause added leading and trailing zero bits to affect the * resulting CRC, whereas with a regular CRC such bits would have no effect on * the CRC. * * Computation and optimizations * ============================= * * We can compute R(x) through "long division", maintaining only 32 bits of * state at any given time. Multiplication by 'x' can be implemented as * right-shifting by 1 (assuming the polynomial<=>bitstring mapping where the * highest order bit represents the coefficient of x^0), and both addition and * subtraction can be implemented as bitwise exclusive OR (since we are working * in GF(2)). Here is an unoptimized implementation: * * static u32 crc32_gzip(const u8 *buffer, size_t size) * { * u32 remainder = 0; * const u32 divisor = 0xEDB88320; * * for (size_t i = 0; i < size * 8 + 32; i++) { * int bit; * u32 multiple; * * if (i < size * 8) * bit = (buffer[i / 8] >> (i % 8)) & 1; * else * bit = 0; // one of the 32 appended 0 bits * * if (i < 32) // the first 32 bits are inverted * bit ^= 1; * * if (remainder & 1) * multiple = divisor; * else * multiple = 0; * * remainder >>= 1; * remainder |= (u32)bit << 31; * remainder ^= multiple; * } * * return ~remainder; * } * * In this implementation, the 32-bit integer 'remainder' maintains the * remainder of the currently processed portion of the message (with 32 zero * bits appended) when divided by the generator polynomial. 'remainder' is the * representation of R(x), and 'divisor' is the representation of G(x) excluding * the x^32 coefficient. For each bit to process, we multiply R(x) by 'x^1', * then add 'x^0' if the new bit is a 1. If this causes R(x) to gain a nonzero * x^32 term, then we subtract G(x) from R(x). * * We can speed this up by taking advantage of the fact that XOR is commutative * and associative, so the order in which we combine the inputs into 'remainder' * is unimportant. And since each message bit we add doesn't affect the choice * of 'multiple' until 32 bits later, we need not actually add each message bit * until that point: * * static u32 crc32_gzip(const u8 *buffer, size_t size) * { * u32 remainder = ~0; * const u32 divisor = 0xEDB88320; * * for (size_t i = 0; i < size * 8; i++) { * int bit; * u32 multiple; * * bit = (buffer[i / 8] >> (i % 8)) & 1; * remainder ^= bit; * if (remainder & 1) * multiple = divisor; * else * multiple = 0; * remainder >>= 1; * remainder ^= multiple; * } * * return ~remainder; * } * * With the above implementation we get the effect of 32 appended 0 bits for * free; they never affect the choice of a divisor, nor would they change the * value of 'remainder' if they were to be actually XOR'ed in. And by starting * with a remainder of all 1 bits, we get the effect of complementing the first * 32 message bits. * * The next optimization is to process the input in multi-bit units. Suppose * that we insert the next 'n' message bits into the remainder. Then we get an * intermediate remainder of length '32 + n' bits, and the CRC of the extra 'n' * bits is the amount by which the low 32 bits of the remainder will change as a * result of cancelling out those 'n' bits. Taking n=8 (one byte) and * precomputing a table containing the CRC of each possible byte, we get * crc32_slice1() defined below. * * As a further optimization, we could increase the multi-bit unit size to 16. * However, that is inefficient because the table size explodes from 256 entries * (1024 bytes) to 65536 entries (262144 bytes), which wastes memory and won't * fit in L1 cache on typical processors. * * However, we can actually process 4 bytes at a time using 4 different tables * with 256 entries each. Logically, we form a 64-bit intermediate remainder * and cancel out the high 32 bits in 8-bit chunks. Bits 32-39 are cancelled * out by the CRC of those bits, whereas bits 40-47 are be cancelled out by the * CRC of those bits with 8 zero bits appended, and so on. This method is * implemented in crc32_slice4(), defined below. * * In crc32_slice8(), this method is extended to 8 bytes at a time. The * intermediate remainder (which we never actually store explicitly) is 96 bits. * * On CPUs that support fast carryless multiplication, CRCs can be computed even * more quickly via "folding". See e.g. the x86 PCLMUL implementation. */ #include "lib_common.h" #include "libdeflate.h" typedef u32 (*crc32_func_t)(u32, const u8 *, size_t); /* Include architecture-specific implementations if available */ #undef CRC32_SLICE1 #undef CRC32_SLICE4 #undef CRC32_SLICE8 #undef DEFAULT_IMPL #undef DISPATCH #if defined(__arm__) || defined(__aarch64__) # include "arm/crc32_impl.h" #elif defined(__i386__) || defined(__x86_64__) # include "x86/crc32_impl.h" #endif /* * Define a generic implementation (crc32_slice8()) if needed. crc32_slice1() * may also be needed as a fallback for architecture-specific implementations. */ #ifndef DEFAULT_IMPL # define CRC32_SLICE8 1 # define DEFAULT_IMPL crc32_slice8 #endif #if defined(CRC32_SLICE1) || defined(CRC32_SLICE4) || defined(CRC32_SLICE8) #include "crc32_table.h" static forceinline u32 crc32_update_byte(u32 remainder, u8 next_byte) { return (remainder >> 8) ^ crc32_table[(u8)remainder ^ next_byte]; } #endif #ifdef CRC32_SLICE1 static u32 crc32_slice1(u32 remainder, const u8 *buffer, size_t size) { size_t i; STATIC_ASSERT(ARRAY_LEN(crc32_table) >= 0x100); for (i = 0; i < size; i++) remainder = crc32_update_byte(remainder, buffer[i]); return remainder; } #endif /* CRC32_SLICE1 */ #ifdef CRC32_SLICE4 static u32 crc32_slice4(u32 remainder, const u8 *buffer, size_t size) { const u8 *p = buffer; const u8 *end = buffer + size; const u8 *end32; STATIC_ASSERT(ARRAY_LEN(crc32_table) >= 0x400); for (; ((uintptr_t)p & 3) && p != end; p++) remainder = crc32_update_byte(remainder, *p); end32 = p + ((end - p) & ~3); for (; p != end32; p += 4) { u32 v = le32_bswap(*(const u32 *)p); remainder = crc32_table[0x300 + (u8)((remainder ^ v) >> 0)] ^ crc32_table[0x200 + (u8)((remainder ^ v) >> 8)] ^ crc32_table[0x100 + (u8)((remainder ^ v) >> 16)] ^ crc32_table[0x000 + (u8)((remainder ^ v) >> 24)]; } for (; p != end; p++) remainder = crc32_update_byte(remainder, *p); return remainder; } #endif /* CRC32_SLICE4 */ #ifdef CRC32_SLICE8 static u32 crc32_slice8(u32 remainder, const u8 *buffer, size_t size) { const u8 *p = buffer; const u8 *end = buffer + size; const u8 *end64; STATIC_ASSERT(ARRAY_LEN(crc32_table) >= 0x800); for (; ((uintptr_t)p & 7) && p != end; p++) remainder = crc32_update_byte(remainder, *p); end64 = p + ((end - p) & ~7); for (; p != end64; p += 8) { u32 v1 = le32_bswap(*(const u32 *)(p + 0)); u32 v2 = le32_bswap(*(const u32 *)(p + 4)); remainder = crc32_table[0x700 + (u8)((remainder ^ v1) >> 0)] ^ crc32_table[0x600 + (u8)((remainder ^ v1) >> 8)] ^ crc32_table[0x500 + (u8)((remainder ^ v1) >> 16)] ^ crc32_table[0x400 + (u8)((remainder ^ v1) >> 24)] ^ crc32_table[0x300 + (u8)(v2 >> 0)] ^ crc32_table[0x200 + (u8)(v2 >> 8)] ^ crc32_table[0x100 + (u8)(v2 >> 16)] ^ crc32_table[0x000 + (u8)(v2 >> 24)]; } for (; p != end; p++) remainder = crc32_update_byte(remainder, *p); return remainder; } #endif /* CRC32_SLICE8 */ #ifdef DISPATCH static u32 dispatch(u32, const u8 *, size_t); static volatile crc32_func_t crc32_impl = dispatch; /* Choose the fastest implementation at runtime */ static u32 dispatch(u32 remainder, const u8 *buffer, size_t size) { crc32_func_t f = arch_select_crc32_func(); if (f == NULL) f = DEFAULT_IMPL; crc32_impl = f; return crc32_impl(remainder, buffer, size); } #else # define crc32_impl DEFAULT_IMPL /* only one implementation, use it */ #endif LIBDEFLATEEXPORT u32 LIBDEFLATEAPI libdeflate_crc32(u32 remainder, const void *buffer, size_t size) { if (buffer == NULL) /* return initial value */ return 0; return ~crc32_impl(~remainder, buffer, size); } libdeflate-1.5/lib/crc32_table.h000066400000000000000000000615501360172702500164660ustar00rootroot00000000000000/* * crc32_table.h - data table to accelerate CRC-32 computation * * THIS FILE WAS AUTOMATICALLY GENERATED BY gen_crc32_table.c. DO NOT EDIT. */ #include static const uint32_t crc32_table[] = { 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d, #if defined(CRC32_SLICE4) || defined(CRC32_SLICE8) 0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3, 0x646cc504, 0x7d77f445, 0x565aa786, 0x4f4196c7, 0xc8d98a08, 0xd1c2bb49, 0xfaefe88a, 0xe3f4d9cb, 0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e, 0x87981ccf, 0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192, 0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496, 0x821b9859, 0x9b00a918, 0xb02dfadb, 0xa936cb9a, 0xe6775d5d, 0xff6c6c1c, 0xd4413fdf, 0xcd5a0e9e, 0x958424a2, 0x8c9f15e3, 0xa7b24620, 0xbea97761, 0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265, 0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69, 0x39316bae, 0x202a5aef, 0x0b07092c, 0x121c386d, 0xdf4636f3, 0xc65d07b2, 0xed705471, 0xf46b6530, 0xbb2af3f7, 0xa231c2b6, 0x891c9175, 0x9007a034, 0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38, 0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c, 0xf0794f05, 0xe9627e44, 0xc24f2d87, 0xdb541cc6, 0x94158a01, 0x8d0ebb40, 0xa623e883, 0xbf38d9c2, 0x38a0c50d, 0x21bbf44c, 0x0a96a78f, 0x138d96ce, 0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca, 0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97, 0xded79850, 0xc7cca911, 0xece1fad2, 0xf5facb93, 0x7262d75c, 0x6b79e61d, 0x4054b5de, 0x594f849f, 0x160e1258, 0x0f152319, 0x243870da, 0x3d23419b, 0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864, 0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60, 0xad24e1af, 0xb43fd0ee, 0x9f12832d, 0x8609b26c, 0xc94824ab, 0xd05315ea, 0xfb7e4629, 0xe2657768, 0x2f3f79f6, 0x362448b7, 0x1d091b74, 0x04122a35, 0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31, 0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d, 0x838a36fa, 0x9a9107bb, 0xb1bc5478, 0xa8a76539, 0x3b83984b, 0x2298a90a, 0x09b5fac9, 0x10aecb88, 0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd, 0x74c20e8c, 0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180, 0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484, 0x71418a1a, 0x685abb5b, 0x4377e898, 0x5a6cd9d9, 0x152d4f1e, 0x0c367e5f, 0x271b2d9c, 0x3e001cdd, 0xb9980012, 0xa0833153, 0x8bae6290, 0x92b553d1, 0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5, 0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a, 0xca6b79ed, 0xd37048ac, 0xf85d1b6f, 0xe1462a2e, 0x66de36e1, 0x7fc507a0, 0x54e85463, 0x4df36522, 0x02b2f3e5, 0x1ba9c2a4, 0x30849167, 0x299fa026, 0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b, 0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f, 0x2c1c24b0, 0x350715f1, 0x1e2a4632, 0x07317773, 0x4870e1b4, 0x516bd0f5, 0x7a468336, 0x635db277, 0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc, 0xe0d7848d, 0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189, 0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85, 0x674f9842, 0x7e54a903, 0x5579fac0, 0x4c62cb81, 0x8138c51f, 0x9823f45e, 0xb30ea79d, 0xaa1596dc, 0xe554001b, 0xfc4f315a, 0xd7626299, 0xce7953d8, 0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4, 0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0, 0x5e7ef3ec, 0x4765c2ad, 0x6c48916e, 0x7553a02f, 0x3a1236e8, 0x230907a9, 0x0824546a, 0x113f652b, 0x96a779e4, 0x8fbc48a5, 0xa4911b66, 0xbd8a2a27, 0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23, 0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e, 0x70d024b9, 0x69cb15f8, 0x42e6463b, 0x5bfd777a, 0xdc656bb5, 0xc57e5af4, 0xee530937, 0xf7483876, 0xb809aeb1, 0xa1129ff0, 0x8a3fcc33, 0x9324fd72, 0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59, 0x0709a8dc, 0x06cbc2eb, 0x048d7cb2, 0x054f1685, 0x0e1351b8, 0x0fd13b8f, 0x0d9785d6, 0x0c55efe1, 0x091af964, 0x08d89353, 0x0a9e2d0a, 0x0b5c473d, 0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29, 0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5, 0x1235f2c8, 0x13f798ff, 0x11b126a6, 0x10734c91, 0x153c5a14, 0x14fe3023, 0x16b88e7a, 0x177ae44d, 0x384d46e0, 0x398f2cd7, 0x3bc9928e, 0x3a0bf8b9, 0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065, 0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901, 0x3157bf84, 0x3095d5b3, 0x32d36bea, 0x331101dd, 0x246be590, 0x25a98fa7, 0x27ef31fe, 0x262d5bc9, 0x23624d4c, 0x22a0277b, 0x20e69922, 0x2124f315, 0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71, 0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad, 0x709a8dc0, 0x7158e7f7, 0x731e59ae, 0x72dc3399, 0x7793251c, 0x76514f2b, 0x7417f172, 0x75d59b45, 0x7e89dc78, 0x7f4bb64f, 0x7d0d0816, 0x7ccf6221, 0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd, 0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9, 0x6bb5866c, 0x6a77ec5b, 0x68315202, 0x69f33835, 0x62af7f08, 0x636d153f, 0x612bab66, 0x60e9c151, 0x65a6d7d4, 0x6464bde3, 0x662203ba, 0x67e0698d, 0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579, 0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5, 0x46c49a98, 0x4706f0af, 0x45404ef6, 0x448224c1, 0x41cd3244, 0x400f5873, 0x4249e62a, 0x438b8c1d, 0x54f16850, 0x55330267, 0x5775bc3e, 0x56b7d609, 0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5, 0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1, 0x5deb9134, 0x5c29fb03, 0x5e6f455a, 0x5fad2f6d, 0xe1351b80, 0xe0f771b7, 0xe2b1cfee, 0xe373a5d9, 0xe63cb35c, 0xe7fed96b, 0xe5b86732, 0xe47a0d05, 0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461, 0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd, 0xfd13b8f0, 0xfcd1d2c7, 0xfe976c9e, 0xff5506a9, 0xfa1a102c, 0xfbd87a1b, 0xf99ec442, 0xf85cae75, 0xf300e948, 0xf2c2837f, 0xf0843d26, 0xf1465711, 0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd, 0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339, 0xde71f5bc, 0xdfb39f8b, 0xddf521d2, 0xdc374be5, 0xd76b0cd8, 0xd6a966ef, 0xd4efd8b6, 0xd52db281, 0xd062a404, 0xd1a0ce33, 0xd3e6706a, 0xd2241a5d, 0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049, 0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895, 0xcb4dafa8, 0xca8fc59f, 0xc8c97bc6, 0xc90b11f1, 0xcc440774, 0xcd866d43, 0xcfc0d31a, 0xce02b92d, 0x91af9640, 0x906dfc77, 0x922b422e, 0x93e92819, 0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5, 0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1, 0x98b56f24, 0x99770513, 0x9b31bb4a, 0x9af3d17d, 0x8d893530, 0x8c4b5f07, 0x8e0de15e, 0x8fcf8b69, 0x8a809dec, 0x8b42f7db, 0x89044982, 0x88c623b5, 0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1, 0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d, 0xa9e2d0a0, 0xa820ba97, 0xaa6604ce, 0xaba46ef9, 0xaeeb787c, 0xaf29124b, 0xad6fac12, 0xacadc625, 0xa7f18118, 0xa633eb2f, 0xa4755576, 0xa5b73f41, 0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d, 0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89, 0xb2cddb0c, 0xb30fb13b, 0xb1490f62, 0xb08b6555, 0xbbd72268, 0xba15485f, 0xb853f606, 0xb9919c31, 0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda, 0xbe9834ed, 0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee, 0x8f629757, 0x37def032, 0x256b5fdc, 0x9dd738b9, 0xc5b428ef, 0x7d084f8a, 0x6fbde064, 0xd7018701, 0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733, 0x58631056, 0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871, 0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26, 0x95ad7f70, 0x2d111815, 0x3fa4b7fb, 0x8718d09e, 0x1acfe827, 0xa2738f42, 0xb0c620ac, 0x087a47c9, 0xa032af3e, 0x188ec85b, 0x0a3b67b5, 0xb28700d0, 0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787, 0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f, 0xeae41086, 0x525877e3, 0x40edd80d, 0xf851bf68, 0xf02bf8a1, 0x48979fc4, 0x5a22302a, 0xe29e574f, 0x7f496ff6, 0xc7f50893, 0xd540a77d, 0x6dfcc018, 0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0, 0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7, 0x9b14583d, 0x23a83f58, 0x311d90b6, 0x89a1f7d3, 0x1476cf6a, 0xaccaa80f, 0xbe7f07e1, 0x06c36084, 0x5ea070d2, 0xe61c17b7, 0xf4a9b859, 0x4c15df3c, 0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b, 0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c, 0x446f98f5, 0xfcd3ff90, 0xee66507e, 0x56da371b, 0x0eb9274d, 0xb6054028, 0xa4b0efc6, 0x1c0c88a3, 0x81dbb01a, 0x3967d77f, 0x2bd27891, 0x936e1ff4, 0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed, 0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba, 0xfe92dfec, 0x462eb889, 0x549b1767, 0xec277002, 0x71f048bb, 0xc94c2fde, 0xdbf98030, 0x6345e755, 0x6b3fa09c, 0xd383c7f9, 0xc1366817, 0x798a0f72, 0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825, 0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d, 0x21e91f24, 0x99557841, 0x8be0d7af, 0x335cb0ca, 0xed59b63b, 0x55e5d15e, 0x47507eb0, 0xffec19d5, 0x623b216c, 0xda874609, 0xc832e9e7, 0x708e8e82, 0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a, 0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d, 0xbd40e1a4, 0x05fc86c1, 0x1749292f, 0xaff54e4a, 0x322276f3, 0x8a9e1196, 0x982bbe78, 0x2097d91d, 0x78f4c94b, 0xc048ae2e, 0xd2fd01c0, 0x6a4166a5, 0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2, 0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb, 0xc2098e52, 0x7ab5e937, 0x680046d9, 0xd0bc21bc, 0x88df31ea, 0x3063568f, 0x22d6f961, 0x9a6a9e04, 0x07bda6bd, 0xbf01c1d8, 0xadb46e36, 0x15080953, 0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174, 0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623, 0xd8c66675, 0x607a0110, 0x72cfaefe, 0xca73c99b, 0x57a4f122, 0xef189647, 0xfdad39a9, 0x45115ecc, 0x764dee06, 0xcef18963, 0xdc44268d, 0x64f841e8, 0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf, 0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907, 0x3c9b51be, 0x842736db, 0x96929935, 0x2e2efe50, 0x2654b999, 0x9ee8defc, 0x8c5d7112, 0x34e11677, 0xa9362ece, 0x118a49ab, 0x033fe645, 0xbb838120, 0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98, 0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf, 0xd67f4138, 0x6ec3265d, 0x7c7689b3, 0xc4caeed6, 0x591dd66f, 0xe1a1b10a, 0xf3141ee4, 0x4ba87981, 0x13cb69d7, 0xab770eb2, 0xb9c2a15c, 0x017ec639, 0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e, 0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949, 0x090481f0, 0xb1b8e695, 0xa30d497b, 0x1bb12e1e, 0x43d23e48, 0xfb6e592d, 0xe9dbf6c3, 0x516791a6, 0xccb0a91f, 0x740cce7a, 0x66b96194, 0xde0506f1, #endif /* CRC32_SLICE4 || CRC32_SLICE8 */ #if defined(CRC32_SLICE8) 0x00000000, 0x3d6029b0, 0x7ac05360, 0x47a07ad0, 0xf580a6c0, 0xc8e08f70, 0x8f40f5a0, 0xb220dc10, 0x30704bc1, 0x0d106271, 0x4ab018a1, 0x77d03111, 0xc5f0ed01, 0xf890c4b1, 0xbf30be61, 0x825097d1, 0x60e09782, 0x5d80be32, 0x1a20c4e2, 0x2740ed52, 0x95603142, 0xa80018f2, 0xefa06222, 0xd2c04b92, 0x5090dc43, 0x6df0f5f3, 0x2a508f23, 0x1730a693, 0xa5107a83, 0x98705333, 0xdfd029e3, 0xe2b00053, 0xc1c12f04, 0xfca106b4, 0xbb017c64, 0x866155d4, 0x344189c4, 0x0921a074, 0x4e81daa4, 0x73e1f314, 0xf1b164c5, 0xccd14d75, 0x8b7137a5, 0xb6111e15, 0x0431c205, 0x3951ebb5, 0x7ef19165, 0x4391b8d5, 0xa121b886, 0x9c419136, 0xdbe1ebe6, 0xe681c256, 0x54a11e46, 0x69c137f6, 0x2e614d26, 0x13016496, 0x9151f347, 0xac31daf7, 0xeb91a027, 0xd6f18997, 0x64d15587, 0x59b17c37, 0x1e1106e7, 0x23712f57, 0x58f35849, 0x659371f9, 0x22330b29, 0x1f532299, 0xad73fe89, 0x9013d739, 0xd7b3ade9, 0xead38459, 0x68831388, 0x55e33a38, 0x124340e8, 0x2f236958, 0x9d03b548, 0xa0639cf8, 0xe7c3e628, 0xdaa3cf98, 0x3813cfcb, 0x0573e67b, 0x42d39cab, 0x7fb3b51b, 0xcd93690b, 0xf0f340bb, 0xb7533a6b, 0x8a3313db, 0x0863840a, 0x3503adba, 0x72a3d76a, 0x4fc3feda, 0xfde322ca, 0xc0830b7a, 0x872371aa, 0xba43581a, 0x9932774d, 0xa4525efd, 0xe3f2242d, 0xde920d9d, 0x6cb2d18d, 0x51d2f83d, 0x167282ed, 0x2b12ab5d, 0xa9423c8c, 0x9422153c, 0xd3826fec, 0xeee2465c, 0x5cc29a4c, 0x61a2b3fc, 0x2602c92c, 0x1b62e09c, 0xf9d2e0cf, 0xc4b2c97f, 0x8312b3af, 0xbe729a1f, 0x0c52460f, 0x31326fbf, 0x7692156f, 0x4bf23cdf, 0xc9a2ab0e, 0xf4c282be, 0xb362f86e, 0x8e02d1de, 0x3c220dce, 0x0142247e, 0x46e25eae, 0x7b82771e, 0xb1e6b092, 0x8c869922, 0xcb26e3f2, 0xf646ca42, 0x44661652, 0x79063fe2, 0x3ea64532, 0x03c66c82, 0x8196fb53, 0xbcf6d2e3, 0xfb56a833, 0xc6368183, 0x74165d93, 0x49767423, 0x0ed60ef3, 0x33b62743, 0xd1062710, 0xec660ea0, 0xabc67470, 0x96a65dc0, 0x248681d0, 0x19e6a860, 0x5e46d2b0, 0x6326fb00, 0xe1766cd1, 0xdc164561, 0x9bb63fb1, 0xa6d61601, 0x14f6ca11, 0x2996e3a1, 0x6e369971, 0x5356b0c1, 0x70279f96, 0x4d47b626, 0x0ae7ccf6, 0x3787e546, 0x85a73956, 0xb8c710e6, 0xff676a36, 0xc2074386, 0x4057d457, 0x7d37fde7, 0x3a978737, 0x07f7ae87, 0xb5d77297, 0x88b75b27, 0xcf1721f7, 0xf2770847, 0x10c70814, 0x2da721a4, 0x6a075b74, 0x576772c4, 0xe547aed4, 0xd8278764, 0x9f87fdb4, 0xa2e7d404, 0x20b743d5, 0x1dd76a65, 0x5a7710b5, 0x67173905, 0xd537e515, 0xe857cca5, 0xaff7b675, 0x92979fc5, 0xe915e8db, 0xd475c16b, 0x93d5bbbb, 0xaeb5920b, 0x1c954e1b, 0x21f567ab, 0x66551d7b, 0x5b3534cb, 0xd965a31a, 0xe4058aaa, 0xa3a5f07a, 0x9ec5d9ca, 0x2ce505da, 0x11852c6a, 0x562556ba, 0x6b457f0a, 0x89f57f59, 0xb49556e9, 0xf3352c39, 0xce550589, 0x7c75d999, 0x4115f029, 0x06b58af9, 0x3bd5a349, 0xb9853498, 0x84e51d28, 0xc34567f8, 0xfe254e48, 0x4c059258, 0x7165bbe8, 0x36c5c138, 0x0ba5e888, 0x28d4c7df, 0x15b4ee6f, 0x521494bf, 0x6f74bd0f, 0xdd54611f, 0xe03448af, 0xa794327f, 0x9af41bcf, 0x18a48c1e, 0x25c4a5ae, 0x6264df7e, 0x5f04f6ce, 0xed242ade, 0xd044036e, 0x97e479be, 0xaa84500e, 0x4834505d, 0x755479ed, 0x32f4033d, 0x0f942a8d, 0xbdb4f69d, 0x80d4df2d, 0xc774a5fd, 0xfa148c4d, 0x78441b9c, 0x4524322c, 0x028448fc, 0x3fe4614c, 0x8dc4bd5c, 0xb0a494ec, 0xf704ee3c, 0xca64c78c, 0x00000000, 0xcb5cd3a5, 0x4dc8a10b, 0x869472ae, 0x9b914216, 0x50cd91b3, 0xd659e31d, 0x1d0530b8, 0xec53826d, 0x270f51c8, 0xa19b2366, 0x6ac7f0c3, 0x77c2c07b, 0xbc9e13de, 0x3a0a6170, 0xf156b2d5, 0x03d6029b, 0xc88ad13e, 0x4e1ea390, 0x85427035, 0x9847408d, 0x531b9328, 0xd58fe186, 0x1ed33223, 0xef8580f6, 0x24d95353, 0xa24d21fd, 0x6911f258, 0x7414c2e0, 0xbf481145, 0x39dc63eb, 0xf280b04e, 0x07ac0536, 0xccf0d693, 0x4a64a43d, 0x81387798, 0x9c3d4720, 0x57619485, 0xd1f5e62b, 0x1aa9358e, 0xebff875b, 0x20a354fe, 0xa6372650, 0x6d6bf5f5, 0x706ec54d, 0xbb3216e8, 0x3da66446, 0xf6fab7e3, 0x047a07ad, 0xcf26d408, 0x49b2a6a6, 0x82ee7503, 0x9feb45bb, 0x54b7961e, 0xd223e4b0, 0x197f3715, 0xe82985c0, 0x23755665, 0xa5e124cb, 0x6ebdf76e, 0x73b8c7d6, 0xb8e41473, 0x3e7066dd, 0xf52cb578, 0x0f580a6c, 0xc404d9c9, 0x4290ab67, 0x89cc78c2, 0x94c9487a, 0x5f959bdf, 0xd901e971, 0x125d3ad4, 0xe30b8801, 0x28575ba4, 0xaec3290a, 0x659ffaaf, 0x789aca17, 0xb3c619b2, 0x35526b1c, 0xfe0eb8b9, 0x0c8e08f7, 0xc7d2db52, 0x4146a9fc, 0x8a1a7a59, 0x971f4ae1, 0x5c439944, 0xdad7ebea, 0x118b384f, 0xe0dd8a9a, 0x2b81593f, 0xad152b91, 0x6649f834, 0x7b4cc88c, 0xb0101b29, 0x36846987, 0xfdd8ba22, 0x08f40f5a, 0xc3a8dcff, 0x453cae51, 0x8e607df4, 0x93654d4c, 0x58399ee9, 0xdeadec47, 0x15f13fe2, 0xe4a78d37, 0x2ffb5e92, 0xa96f2c3c, 0x6233ff99, 0x7f36cf21, 0xb46a1c84, 0x32fe6e2a, 0xf9a2bd8f, 0x0b220dc1, 0xc07ede64, 0x46eaacca, 0x8db67f6f, 0x90b34fd7, 0x5bef9c72, 0xdd7beedc, 0x16273d79, 0xe7718fac, 0x2c2d5c09, 0xaab92ea7, 0x61e5fd02, 0x7ce0cdba, 0xb7bc1e1f, 0x31286cb1, 0xfa74bf14, 0x1eb014d8, 0xd5ecc77d, 0x5378b5d3, 0x98246676, 0x852156ce, 0x4e7d856b, 0xc8e9f7c5, 0x03b52460, 0xf2e396b5, 0x39bf4510, 0xbf2b37be, 0x7477e41b, 0x6972d4a3, 0xa22e0706, 0x24ba75a8, 0xefe6a60d, 0x1d661643, 0xd63ac5e6, 0x50aeb748, 0x9bf264ed, 0x86f75455, 0x4dab87f0, 0xcb3ff55e, 0x006326fb, 0xf135942e, 0x3a69478b, 0xbcfd3525, 0x77a1e680, 0x6aa4d638, 0xa1f8059d, 0x276c7733, 0xec30a496, 0x191c11ee, 0xd240c24b, 0x54d4b0e5, 0x9f886340, 0x828d53f8, 0x49d1805d, 0xcf45f2f3, 0x04192156, 0xf54f9383, 0x3e134026, 0xb8873288, 0x73dbe12d, 0x6eded195, 0xa5820230, 0x2316709e, 0xe84aa33b, 0x1aca1375, 0xd196c0d0, 0x5702b27e, 0x9c5e61db, 0x815b5163, 0x4a0782c6, 0xcc93f068, 0x07cf23cd, 0xf6999118, 0x3dc542bd, 0xbb513013, 0x700de3b6, 0x6d08d30e, 0xa65400ab, 0x20c07205, 0xeb9ca1a0, 0x11e81eb4, 0xdab4cd11, 0x5c20bfbf, 0x977c6c1a, 0x8a795ca2, 0x41258f07, 0xc7b1fda9, 0x0ced2e0c, 0xfdbb9cd9, 0x36e74f7c, 0xb0733dd2, 0x7b2fee77, 0x662adecf, 0xad760d6a, 0x2be27fc4, 0xe0beac61, 0x123e1c2f, 0xd962cf8a, 0x5ff6bd24, 0x94aa6e81, 0x89af5e39, 0x42f38d9c, 0xc467ff32, 0x0f3b2c97, 0xfe6d9e42, 0x35314de7, 0xb3a53f49, 0x78f9ecec, 0x65fcdc54, 0xaea00ff1, 0x28347d5f, 0xe368aefa, 0x16441b82, 0xdd18c827, 0x5b8cba89, 0x90d0692c, 0x8dd55994, 0x46898a31, 0xc01df89f, 0x0b412b3a, 0xfa1799ef, 0x314b4a4a, 0xb7df38e4, 0x7c83eb41, 0x6186dbf9, 0xaada085c, 0x2c4e7af2, 0xe712a957, 0x15921919, 0xdececabc, 0x585ab812, 0x93066bb7, 0x8e035b0f, 0x455f88aa, 0xc3cbfa04, 0x089729a1, 0xf9c19b74, 0x329d48d1, 0xb4093a7f, 0x7f55e9da, 0x6250d962, 0xa90c0ac7, 0x2f987869, 0xe4c4abcc, 0x00000000, 0xa6770bb4, 0x979f1129, 0x31e81a9d, 0xf44f2413, 0x52382fa7, 0x63d0353a, 0xc5a73e8e, 0x33ef4e67, 0x959845d3, 0xa4705f4e, 0x020754fa, 0xc7a06a74, 0x61d761c0, 0x503f7b5d, 0xf64870e9, 0x67de9cce, 0xc1a9977a, 0xf0418de7, 0x56368653, 0x9391b8dd, 0x35e6b369, 0x040ea9f4, 0xa279a240, 0x5431d2a9, 0xf246d91d, 0xc3aec380, 0x65d9c834, 0xa07ef6ba, 0x0609fd0e, 0x37e1e793, 0x9196ec27, 0xcfbd399c, 0x69ca3228, 0x582228b5, 0xfe552301, 0x3bf21d8f, 0x9d85163b, 0xac6d0ca6, 0x0a1a0712, 0xfc5277fb, 0x5a257c4f, 0x6bcd66d2, 0xcdba6d66, 0x081d53e8, 0xae6a585c, 0x9f8242c1, 0x39f54975, 0xa863a552, 0x0e14aee6, 0x3ffcb47b, 0x998bbfcf, 0x5c2c8141, 0xfa5b8af5, 0xcbb39068, 0x6dc49bdc, 0x9b8ceb35, 0x3dfbe081, 0x0c13fa1c, 0xaa64f1a8, 0x6fc3cf26, 0xc9b4c492, 0xf85cde0f, 0x5e2bd5bb, 0x440b7579, 0xe27c7ecd, 0xd3946450, 0x75e36fe4, 0xb044516a, 0x16335ade, 0x27db4043, 0x81ac4bf7, 0x77e43b1e, 0xd19330aa, 0xe07b2a37, 0x460c2183, 0x83ab1f0d, 0x25dc14b9, 0x14340e24, 0xb2430590, 0x23d5e9b7, 0x85a2e203, 0xb44af89e, 0x123df32a, 0xd79acda4, 0x71edc610, 0x4005dc8d, 0xe672d739, 0x103aa7d0, 0xb64dac64, 0x87a5b6f9, 0x21d2bd4d, 0xe47583c3, 0x42028877, 0x73ea92ea, 0xd59d995e, 0x8bb64ce5, 0x2dc14751, 0x1c295dcc, 0xba5e5678, 0x7ff968f6, 0xd98e6342, 0xe86679df, 0x4e11726b, 0xb8590282, 0x1e2e0936, 0x2fc613ab, 0x89b1181f, 0x4c162691, 0xea612d25, 0xdb8937b8, 0x7dfe3c0c, 0xec68d02b, 0x4a1fdb9f, 0x7bf7c102, 0xdd80cab6, 0x1827f438, 0xbe50ff8c, 0x8fb8e511, 0x29cfeea5, 0xdf879e4c, 0x79f095f8, 0x48188f65, 0xee6f84d1, 0x2bc8ba5f, 0x8dbfb1eb, 0xbc57ab76, 0x1a20a0c2, 0x8816eaf2, 0x2e61e146, 0x1f89fbdb, 0xb9fef06f, 0x7c59cee1, 0xda2ec555, 0xebc6dfc8, 0x4db1d47c, 0xbbf9a495, 0x1d8eaf21, 0x2c66b5bc, 0x8a11be08, 0x4fb68086, 0xe9c18b32, 0xd82991af, 0x7e5e9a1b, 0xefc8763c, 0x49bf7d88, 0x78576715, 0xde206ca1, 0x1b87522f, 0xbdf0599b, 0x8c184306, 0x2a6f48b2, 0xdc27385b, 0x7a5033ef, 0x4bb82972, 0xedcf22c6, 0x28681c48, 0x8e1f17fc, 0xbff70d61, 0x198006d5, 0x47abd36e, 0xe1dcd8da, 0xd034c247, 0x7643c9f3, 0xb3e4f77d, 0x1593fcc9, 0x247be654, 0x820cede0, 0x74449d09, 0xd23396bd, 0xe3db8c20, 0x45ac8794, 0x800bb91a, 0x267cb2ae, 0x1794a833, 0xb1e3a387, 0x20754fa0, 0x86024414, 0xb7ea5e89, 0x119d553d, 0xd43a6bb3, 0x724d6007, 0x43a57a9a, 0xe5d2712e, 0x139a01c7, 0xb5ed0a73, 0x840510ee, 0x22721b5a, 0xe7d525d4, 0x41a22e60, 0x704a34fd, 0xd63d3f49, 0xcc1d9f8b, 0x6a6a943f, 0x5b828ea2, 0xfdf58516, 0x3852bb98, 0x9e25b02c, 0xafcdaab1, 0x09baa105, 0xfff2d1ec, 0x5985da58, 0x686dc0c5, 0xce1acb71, 0x0bbdf5ff, 0xadcafe4b, 0x9c22e4d6, 0x3a55ef62, 0xabc30345, 0x0db408f1, 0x3c5c126c, 0x9a2b19d8, 0x5f8c2756, 0xf9fb2ce2, 0xc813367f, 0x6e643dcb, 0x982c4d22, 0x3e5b4696, 0x0fb35c0b, 0xa9c457bf, 0x6c636931, 0xca146285, 0xfbfc7818, 0x5d8b73ac, 0x03a0a617, 0xa5d7ada3, 0x943fb73e, 0x3248bc8a, 0xf7ef8204, 0x519889b0, 0x6070932d, 0xc6079899, 0x304fe870, 0x9638e3c4, 0xa7d0f959, 0x01a7f2ed, 0xc400cc63, 0x6277c7d7, 0x539fdd4a, 0xf5e8d6fe, 0x647e3ad9, 0xc209316d, 0xf3e12bf0, 0x55962044, 0x90311eca, 0x3646157e, 0x07ae0fe3, 0xa1d90457, 0x579174be, 0xf1e67f0a, 0xc00e6597, 0x66796e23, 0xa3de50ad, 0x05a95b19, 0x34414184, 0x92364a30, 0x00000000, 0xccaa009e, 0x4225077d, 0x8e8f07e3, 0x844a0efa, 0x48e00e64, 0xc66f0987, 0x0ac50919, 0xd3e51bb5, 0x1f4f1b2b, 0x91c01cc8, 0x5d6a1c56, 0x57af154f, 0x9b0515d1, 0x158a1232, 0xd92012ac, 0x7cbb312b, 0xb01131b5, 0x3e9e3656, 0xf23436c8, 0xf8f13fd1, 0x345b3f4f, 0xbad438ac, 0x767e3832, 0xaf5e2a9e, 0x63f42a00, 0xed7b2de3, 0x21d12d7d, 0x2b142464, 0xe7be24fa, 0x69312319, 0xa59b2387, 0xf9766256, 0x35dc62c8, 0xbb53652b, 0x77f965b5, 0x7d3c6cac, 0xb1966c32, 0x3f196bd1, 0xf3b36b4f, 0x2a9379e3, 0xe639797d, 0x68b67e9e, 0xa41c7e00, 0xaed97719, 0x62737787, 0xecfc7064, 0x205670fa, 0x85cd537d, 0x496753e3, 0xc7e85400, 0x0b42549e, 0x01875d87, 0xcd2d5d19, 0x43a25afa, 0x8f085a64, 0x562848c8, 0x9a824856, 0x140d4fb5, 0xd8a74f2b, 0xd2624632, 0x1ec846ac, 0x9047414f, 0x5ced41d1, 0x299dc2ed, 0xe537c273, 0x6bb8c590, 0xa712c50e, 0xadd7cc17, 0x617dcc89, 0xeff2cb6a, 0x2358cbf4, 0xfa78d958, 0x36d2d9c6, 0xb85dde25, 0x74f7debb, 0x7e32d7a2, 0xb298d73c, 0x3c17d0df, 0xf0bdd041, 0x5526f3c6, 0x998cf358, 0x1703f4bb, 0xdba9f425, 0xd16cfd3c, 0x1dc6fda2, 0x9349fa41, 0x5fe3fadf, 0x86c3e873, 0x4a69e8ed, 0xc4e6ef0e, 0x084cef90, 0x0289e689, 0xce23e617, 0x40ace1f4, 0x8c06e16a, 0xd0eba0bb, 0x1c41a025, 0x92cea7c6, 0x5e64a758, 0x54a1ae41, 0x980baedf, 0x1684a93c, 0xda2ea9a2, 0x030ebb0e, 0xcfa4bb90, 0x412bbc73, 0x8d81bced, 0x8744b5f4, 0x4beeb56a, 0xc561b289, 0x09cbb217, 0xac509190, 0x60fa910e, 0xee7596ed, 0x22df9673, 0x281a9f6a, 0xe4b09ff4, 0x6a3f9817, 0xa6959889, 0x7fb58a25, 0xb31f8abb, 0x3d908d58, 0xf13a8dc6, 0xfbff84df, 0x37558441, 0xb9da83a2, 0x7570833c, 0x533b85da, 0x9f918544, 0x111e82a7, 0xddb48239, 0xd7718b20, 0x1bdb8bbe, 0x95548c5d, 0x59fe8cc3, 0x80de9e6f, 0x4c749ef1, 0xc2fb9912, 0x0e51998c, 0x04949095, 0xc83e900b, 0x46b197e8, 0x8a1b9776, 0x2f80b4f1, 0xe32ab46f, 0x6da5b38c, 0xa10fb312, 0xabcaba0b, 0x6760ba95, 0xe9efbd76, 0x2545bde8, 0xfc65af44, 0x30cfafda, 0xbe40a839, 0x72eaa8a7, 0x782fa1be, 0xb485a120, 0x3a0aa6c3, 0xf6a0a65d, 0xaa4de78c, 0x66e7e712, 0xe868e0f1, 0x24c2e06f, 0x2e07e976, 0xe2ade9e8, 0x6c22ee0b, 0xa088ee95, 0x79a8fc39, 0xb502fca7, 0x3b8dfb44, 0xf727fbda, 0xfde2f2c3, 0x3148f25d, 0xbfc7f5be, 0x736df520, 0xd6f6d6a7, 0x1a5cd639, 0x94d3d1da, 0x5879d144, 0x52bcd85d, 0x9e16d8c3, 0x1099df20, 0xdc33dfbe, 0x0513cd12, 0xc9b9cd8c, 0x4736ca6f, 0x8b9ccaf1, 0x8159c3e8, 0x4df3c376, 0xc37cc495, 0x0fd6c40b, 0x7aa64737, 0xb60c47a9, 0x3883404a, 0xf42940d4, 0xfeec49cd, 0x32464953, 0xbcc94eb0, 0x70634e2e, 0xa9435c82, 0x65e95c1c, 0xeb665bff, 0x27cc5b61, 0x2d095278, 0xe1a352e6, 0x6f2c5505, 0xa386559b, 0x061d761c, 0xcab77682, 0x44387161, 0x889271ff, 0x825778e6, 0x4efd7878, 0xc0727f9b, 0x0cd87f05, 0xd5f86da9, 0x19526d37, 0x97dd6ad4, 0x5b776a4a, 0x51b26353, 0x9d1863cd, 0x1397642e, 0xdf3d64b0, 0x83d02561, 0x4f7a25ff, 0xc1f5221c, 0x0d5f2282, 0x079a2b9b, 0xcb302b05, 0x45bf2ce6, 0x89152c78, 0x50353ed4, 0x9c9f3e4a, 0x121039a9, 0xdeba3937, 0xd47f302e, 0x18d530b0, 0x965a3753, 0x5af037cd, 0xff6b144a, 0x33c114d4, 0xbd4e1337, 0x71e413a9, 0x7b211ab0, 0xb78b1a2e, 0x39041dcd, 0xf5ae1d53, 0x2c8e0fff, 0xe0240f61, 0x6eab0882, 0xa201081c, 0xa8c40105, 0x646e019b, 0xeae10678, 0x264b06e6, #endif /* CRC32_SLICE8 */ }; libdeflate-1.5/lib/crc32_vec_template.h000066400000000000000000000041021360172702500200350ustar00rootroot00000000000000/* * crc32_vec_template.h - template for vectorized CRC-32 implementations * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #define CRC32_SLICE1 1 static u32 crc32_slice1(u32, const u8 *, size_t); /* * Template for vectorized CRC-32 implementations. * * Note: on unaligned ends of the buffer, we fall back to crc32_slice1() instead * of crc32_slice8() because only a few bytes need to be processed, so a smaller * table is preferable. */ static u32 ATTRIBUTES FUNCNAME(u32 remainder, const u8 *p, size_t size) { if ((uintptr_t)p % IMPL_ALIGNMENT) { size_t n = MIN(size, -(uintptr_t)p % IMPL_ALIGNMENT); remainder = crc32_slice1(remainder, p, n); p += n; size -= n; } if (size >= IMPL_SEGMENT_SIZE) { remainder = FUNCNAME_ALIGNED(remainder, (const void *)p, size / IMPL_SEGMENT_SIZE); p += size - (size % IMPL_SEGMENT_SIZE); size %= IMPL_SEGMENT_SIZE; } return crc32_slice1(remainder, p, size); } #undef FUNCNAME #undef FUNCNAME_ALIGNED #undef ATTRIBUTES #undef IMPL_ALIGNMENT #undef IMPL_SEGMENT_SIZE libdeflate-1.5/lib/decompress_template.h000066400000000000000000000307401360172702500204370ustar00rootroot00000000000000/* * decompress_template.h * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ /* * This is the actual DEFLATE decompression routine, lifted out of * deflate_decompress.c so that it can be compiled multiple times with different * target instruction sets. */ static enum libdeflate_result ATTRIBUTES FUNCNAME(struct libdeflate_decompressor * restrict d, const void * restrict in, size_t in_nbytes, void * restrict out, size_t out_nbytes_avail, size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret) { u8 *out_next = out; u8 * const out_end = out_next + out_nbytes_avail; const u8 *in_next = in; const u8 * const in_end = in_next + in_nbytes; bitbuf_t bitbuf = 0; unsigned bitsleft = 0; size_t overrun_count = 0; unsigned i; unsigned is_final_block; unsigned block_type; u16 len; u16 nlen; unsigned num_litlen_syms; unsigned num_offset_syms; u16 tmp16; u32 tmp32; next_block: /* Starting to read the next block. */ ; STATIC_ASSERT(CAN_ENSURE(1 + 2 + 5 + 5 + 4)); ENSURE_BITS(1 + 2 + 5 + 5 + 4); /* BFINAL: 1 bit */ is_final_block = POP_BITS(1); /* BTYPE: 2 bits */ block_type = POP_BITS(2); if (block_type == DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN) { /* Dynamic Huffman block. */ /* The order in which precode lengths are stored. */ static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; unsigned num_explicit_precode_lens; /* Read the codeword length counts. */ STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == ((1 << 5) - 1) + 257); num_litlen_syms = POP_BITS(5) + 257; STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == ((1 << 5) - 1) + 1); num_offset_syms = POP_BITS(5) + 1; STATIC_ASSERT(DEFLATE_NUM_PRECODE_SYMS == ((1 << 4) - 1) + 4); num_explicit_precode_lens = POP_BITS(4) + 4; d->static_codes_loaded = false; /* Read the precode codeword lengths. */ STATIC_ASSERT(DEFLATE_MAX_PRE_CODEWORD_LEN == (1 << 3) - 1); for (i = 0; i < num_explicit_precode_lens; i++) { ENSURE_BITS(3); d->u.precode_lens[deflate_precode_lens_permutation[i]] = POP_BITS(3); } for (; i < DEFLATE_NUM_PRECODE_SYMS; i++) d->u.precode_lens[deflate_precode_lens_permutation[i]] = 0; /* Build the decode table for the precode. */ SAFETY_CHECK(build_precode_decode_table(d)); /* Expand the literal/length and offset codeword lengths. */ for (i = 0; i < num_litlen_syms + num_offset_syms; ) { u32 entry; unsigned presym; u8 rep_val; unsigned rep_count; ENSURE_BITS(DEFLATE_MAX_PRE_CODEWORD_LEN + 7); /* (The code below assumes that the precode decode table * does not have any subtables.) */ STATIC_ASSERT(PRECODE_TABLEBITS == DEFLATE_MAX_PRE_CODEWORD_LEN); /* Read the next precode symbol. */ entry = d->u.l.precode_decode_table[BITS(DEFLATE_MAX_PRE_CODEWORD_LEN)]; REMOVE_BITS(entry & HUFFDEC_LENGTH_MASK); presym = entry >> HUFFDEC_RESULT_SHIFT; if (presym < 16) { /* Explicit codeword length */ d->u.l.lens[i++] = presym; continue; } /* Run-length encoded codeword lengths */ /* Note: we don't need verify that the repeat count * doesn't overflow the number of elements, since we * have enough extra spaces to allow for the worst-case * overflow (138 zeroes when only 1 length was * remaining). * * In the case of the small repeat counts (presyms 16 * and 17), it is fastest to always write the maximum * number of entries. That gets rid of branches that * would otherwise be required. * * It is not just because of the numerical order that * our checks go in the order 'presym < 16', 'presym == * 16', and 'presym == 17'. For typical data this is * ordered from most frequent to least frequent case. */ STATIC_ASSERT(DEFLATE_MAX_LENS_OVERRUN == 138 - 1); if (presym == 16) { /* Repeat the previous length 3 - 6 times */ SAFETY_CHECK(i != 0); rep_val = d->u.l.lens[i - 1]; STATIC_ASSERT(3 + ((1 << 2) - 1) == 6); rep_count = 3 + POP_BITS(2); d->u.l.lens[i + 0] = rep_val; d->u.l.lens[i + 1] = rep_val; d->u.l.lens[i + 2] = rep_val; d->u.l.lens[i + 3] = rep_val; d->u.l.lens[i + 4] = rep_val; d->u.l.lens[i + 5] = rep_val; i += rep_count; } else if (presym == 17) { /* Repeat zero 3 - 10 times */ STATIC_ASSERT(3 + ((1 << 3) - 1) == 10); rep_count = 3 + POP_BITS(3); d->u.l.lens[i + 0] = 0; d->u.l.lens[i + 1] = 0; d->u.l.lens[i + 2] = 0; d->u.l.lens[i + 3] = 0; d->u.l.lens[i + 4] = 0; d->u.l.lens[i + 5] = 0; d->u.l.lens[i + 6] = 0; d->u.l.lens[i + 7] = 0; d->u.l.lens[i + 8] = 0; d->u.l.lens[i + 9] = 0; i += rep_count; } else { /* Repeat zero 11 - 138 times */ STATIC_ASSERT(11 + ((1 << 7) - 1) == 138); rep_count = 11 + POP_BITS(7); memset(&d->u.l.lens[i], 0, rep_count * sizeof(d->u.l.lens[i])); i += rep_count; } } } else if (block_type == DEFLATE_BLOCKTYPE_UNCOMPRESSED) { /* Uncompressed block: copy 'len' bytes literally from the input * buffer to the output buffer. */ ALIGN_INPUT(); SAFETY_CHECK(in_end - in_next >= 4); len = READ_U16(); nlen = READ_U16(); SAFETY_CHECK(len == (u16)~nlen); if (unlikely(len > out_end - out_next)) return LIBDEFLATE_INSUFFICIENT_SPACE; SAFETY_CHECK(len <= in_end - in_next); memcpy(out_next, in_next, len); in_next += len; out_next += len; goto block_done; } else { SAFETY_CHECK(block_type == DEFLATE_BLOCKTYPE_STATIC_HUFFMAN); /* * Static Huffman block: build the decode tables for the static * codes. Skip doing so if the tables are already set up from * an earlier static block; this speeds up decompression of * degenerate input of many empty or very short static blocks. * * Afterwards, the remainder is the same as decompressing a * dynamic Huffman block. */ if (d->static_codes_loaded) goto have_decode_tables; d->static_codes_loaded = true; STATIC_ASSERT(DEFLATE_NUM_LITLEN_SYMS == 288); STATIC_ASSERT(DEFLATE_NUM_OFFSET_SYMS == 32); for (i = 0; i < 144; i++) d->u.l.lens[i] = 8; for (; i < 256; i++) d->u.l.lens[i] = 9; for (; i < 280; i++) d->u.l.lens[i] = 7; for (; i < 288; i++) d->u.l.lens[i] = 8; for (; i < 288 + 32; i++) d->u.l.lens[i] = 5; num_litlen_syms = 288; num_offset_syms = 32; } /* Decompressing a Huffman block (either dynamic or static) */ SAFETY_CHECK(build_offset_decode_table(d, num_litlen_syms, num_offset_syms)); SAFETY_CHECK(build_litlen_decode_table(d, num_litlen_syms, num_offset_syms)); have_decode_tables: /* The main DEFLATE decode loop */ for (;;) { u32 entry; u32 length; u32 offset; const u8 *src; u8 *dst; /* Decode a litlen symbol. */ ENSURE_BITS(DEFLATE_MAX_LITLEN_CODEWORD_LEN); entry = d->u.litlen_decode_table[BITS(LITLEN_TABLEBITS)]; if (entry & HUFFDEC_SUBTABLE_POINTER) { /* Litlen subtable required (uncommon case) */ REMOVE_BITS(LITLEN_TABLEBITS); entry = d->u.litlen_decode_table[ ((entry >> HUFFDEC_RESULT_SHIFT) & 0xFFFF) + BITS(entry & HUFFDEC_LENGTH_MASK)]; } REMOVE_BITS(entry & HUFFDEC_LENGTH_MASK); if (entry & HUFFDEC_LITERAL) { /* Literal */ if (unlikely(out_next == out_end)) return LIBDEFLATE_INSUFFICIENT_SPACE; *out_next++ = (u8)(entry >> HUFFDEC_RESULT_SHIFT); continue; } /* Match or end-of-block */ entry >>= HUFFDEC_RESULT_SHIFT; ENSURE_BITS(MAX_ENSURE); /* Pop the extra length bits and add them to the length base to * produce the full length. */ length = (entry >> HUFFDEC_LENGTH_BASE_SHIFT) + POP_BITS(entry & HUFFDEC_EXTRA_LENGTH_BITS_MASK); /* The match destination must not end after the end of the * output buffer. For efficiency, combine this check with the * end-of-block check. We're using 0 for the special * end-of-block length, so subtract 1 and it turn it into * SIZE_MAX. */ STATIC_ASSERT(HUFFDEC_END_OF_BLOCK_LENGTH == 0); if (unlikely((size_t)length - 1 >= out_end - out_next)) { if (unlikely(length != HUFFDEC_END_OF_BLOCK_LENGTH)) return LIBDEFLATE_INSUFFICIENT_SPACE; goto block_done; } /* Decode the match offset. */ entry = d->offset_decode_table[BITS(OFFSET_TABLEBITS)]; if (entry & HUFFDEC_SUBTABLE_POINTER) { /* Offset subtable required (uncommon case) */ REMOVE_BITS(OFFSET_TABLEBITS); entry = d->offset_decode_table[ ((entry >> HUFFDEC_RESULT_SHIFT) & 0xFFFF) + BITS(entry & HUFFDEC_LENGTH_MASK)]; } REMOVE_BITS(entry & HUFFDEC_LENGTH_MASK); entry >>= HUFFDEC_RESULT_SHIFT; STATIC_ASSERT(CAN_ENSURE(DEFLATE_MAX_EXTRA_LENGTH_BITS + DEFLATE_MAX_OFFSET_CODEWORD_LEN) && CAN_ENSURE(DEFLATE_MAX_EXTRA_OFFSET_BITS)); if (!CAN_ENSURE(DEFLATE_MAX_EXTRA_LENGTH_BITS + DEFLATE_MAX_OFFSET_CODEWORD_LEN + DEFLATE_MAX_EXTRA_OFFSET_BITS)) ENSURE_BITS(DEFLATE_MAX_EXTRA_OFFSET_BITS); /* Pop the extra offset bits and add them to the offset base to * produce the full offset. */ offset = (entry & HUFFDEC_OFFSET_BASE_MASK) + POP_BITS(entry >> HUFFDEC_EXTRA_OFFSET_BITS_SHIFT); /* The match source must not begin before the beginning of the * output buffer. */ SAFETY_CHECK(offset <= out_next - (const u8 *)out); /* * Copy the match: 'length' bytes at 'out_next - offset' to * 'out_next', possibly overlapping. If the match doesn't end * too close to the end of the buffer and offset >= WORDBYTES || * offset == 1, take a fast path which copies a word at a time * -- potentially more than the length of the match, but that's * fine as long as we check for enough extra space. * * The remaining cases are not performance-critical so are * handled by a simple byte-by-byte copy. */ src = out_next - offset; dst = out_next; out_next += length; if (UNALIGNED_ACCESS_IS_FAST && /* max overrun is writing 3 words for a min length match */ likely(out_end - out_next >= 3 * WORDBYTES - DEFLATE_MIN_MATCH_LEN)) { if (offset >= WORDBYTES) { /* words don't overlap? */ copy_word_unaligned(src, dst); src += WORDBYTES; dst += WORDBYTES; copy_word_unaligned(src, dst); src += WORDBYTES; dst += WORDBYTES; do { copy_word_unaligned(src, dst); src += WORDBYTES; dst += WORDBYTES; } while (dst < out_next); } else if (offset == 1) { /* RLE encoding of previous byte, common if the * data contains many repeated bytes */ machine_word_t v = repeat_byte(*src); store_word_unaligned(v, dst); dst += WORDBYTES; store_word_unaligned(v, dst); dst += WORDBYTES; do { store_word_unaligned(v, dst); dst += WORDBYTES; } while (dst < out_next); } else { *dst++ = *src++; *dst++ = *src++; do { *dst++ = *src++; } while (dst < out_next); } } else { STATIC_ASSERT(DEFLATE_MIN_MATCH_LEN == 3); *dst++ = *src++; *dst++ = *src++; do { *dst++ = *src++; } while (dst < out_next); } } block_done: /* Finished decoding a block. */ if (!is_final_block) goto next_block; /* That was the last block. */ /* Discard any readahead bits and check for excessive overread */ ALIGN_INPUT(); /* Optionally return the actual number of bytes read */ if (actual_in_nbytes_ret) *actual_in_nbytes_ret = in_next - (u8 *)in; /* Optionally return the actual number of bytes written */ if (actual_out_nbytes_ret) { *actual_out_nbytes_ret = out_next - (u8 *)out; } else { if (out_next != out_end) return LIBDEFLATE_SHORT_OUTPUT; } return LIBDEFLATE_SUCCESS; } #undef FUNCNAME #undef ATTRIBUTES libdeflate-1.5/lib/deflate_compress.c000066400000000000000000002670351360172702500177230ustar00rootroot00000000000000/* * deflate_compress.c - a compressor for DEFLATE * * Originally public domain; changes after 2016-09-07 are copyrighted. * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include "aligned_malloc.h" #include "deflate_compress.h" #include "deflate_constants.h" #include "unaligned.h" #include "libdeflate.h" /* * By default, the near-optimal parsing algorithm is enabled at compression * level 8 and above. The near-optimal parsing algorithm produces a compression * ratio significantly better than the greedy and lazy algorithms implemented * here, and also the algorithm used by zlib at level 9. However, it is slow. */ #define SUPPORT_NEAR_OPTIMAL_PARSING 1 /* * Define to 1 to maintain the full map from match offsets to offset slots. * This slightly speeds up translations of match offsets to offset slots, but it * uses 32769 bytes of memory rather than the 512 bytes used by the condensed * map. The speedup provided by the larger map is most helpful when the * near-optimal parsing algorithm is being used. */ #define USE_FULL_OFFSET_SLOT_FAST SUPPORT_NEAR_OPTIMAL_PARSING /* * DEFLATE uses a 32768 byte sliding window; set the matchfinder parameters * appropriately. */ #define MATCHFINDER_WINDOW_ORDER 15 #include "hc_matchfinder.h" #if SUPPORT_NEAR_OPTIMAL_PARSING # include "bt_matchfinder.h" #endif /* * The compressor always chooses a block of at least MIN_BLOCK_LENGTH bytes, * except if the last block has to be shorter. */ #define MIN_BLOCK_LENGTH 10000 /* * The compressor attempts to end blocks after SOFT_MAX_BLOCK_LENGTH bytes, but * the final length might be slightly longer due to matches extending beyond * this limit. */ #define SOFT_MAX_BLOCK_LENGTH 300000 /* * The number of observed matches or literals that represents sufficient data to * decide whether the current block should be terminated or not. */ #define NUM_OBSERVATIONS_PER_BLOCK_CHECK 512 #if SUPPORT_NEAR_OPTIMAL_PARSING /* Constants specific to the near-optimal parsing algorithm */ /* * The maximum number of matches the matchfinder can find at a single position. * Since the matchfinder never finds more than one match for the same length, * presuming one of each possible length is sufficient for an upper bound. * (This says nothing about whether it is worthwhile to consider so many * matches; this is just defining the worst case.) */ # define MAX_MATCHES_PER_POS (DEFLATE_MAX_MATCH_LEN - DEFLATE_MIN_MATCH_LEN + 1) /* * The number of lz_match structures in the match cache, excluding the extra * "overflow" entries. This value should be high enough so that nearly the * time, all matches found in a given block can fit in the match cache. * However, fallback behavior (immediately terminating the block) on cache * overflow is still required. */ # define CACHE_LENGTH (SOFT_MAX_BLOCK_LENGTH * 5) #endif /* SUPPORT_NEAR_OPTIMAL_PARSING */ /* * These are the compressor-side limits on the codeword lengths for each Huffman * code. To make outputting bits slightly faster, some of these limits are * lower than the limits defined by the DEFLATE format. This does not * significantly affect the compression ratio, at least for the block lengths we * use. */ #define MAX_LITLEN_CODEWORD_LEN 14 #define MAX_OFFSET_CODEWORD_LEN DEFLATE_MAX_OFFSET_CODEWORD_LEN #define MAX_PRE_CODEWORD_LEN DEFLATE_MAX_PRE_CODEWORD_LEN /* Table: length slot => length slot base value */ static const unsigned deflate_length_slot_base[] = { 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 13 , 15 , 17 , 19 , 23 , 27 , 31 , 35 , 43 , 51 , 59 , 67 , 83 , 99 , 115 , 131 , 163 , 195 , 227 , 258 , }; /* Table: length slot => number of extra length bits */ static const u8 deflate_extra_length_bits[] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 1 , 2 , 2 , 2 , 2 , 3 , 3 , 3 , 3 , 4 , 4 , 4 , 4 , 5 , 5 , 5 , 5 , 0 , }; /* Table: offset slot => offset slot base value */ static const unsigned deflate_offset_slot_base[] = { 1 , 2 , 3 , 4 , 5 , 7 , 9 , 13 , 17 , 25 , 33 , 49 , 65 , 97 , 129 , 193 , 257 , 385 , 513 , 769 , 1025 , 1537 , 2049 , 3073 , 4097 , 6145 , 8193 , 12289 , 16385 , 24577 , }; /* Table: offset slot => number of extra offset bits */ static const u8 deflate_extra_offset_bits[] = { 0 , 0 , 0 , 0 , 1 , 1 , 2 , 2 , 3 , 3 , 4 , 4 , 5 , 5 , 6 , 6 , 7 , 7 , 8 , 8 , 9 , 9 , 10 , 10 , 11 , 11 , 12 , 12 , 13 , 13 , }; /* Table: length => length slot */ static const u8 deflate_length_slot[DEFLATE_MAX_MATCH_LEN + 1] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, }; /* The order in which precode codeword lengths are stored */ static const u8 deflate_precode_lens_permutation[DEFLATE_NUM_PRECODE_SYMS] = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; /* Codewords for the DEFLATE Huffman codes. */ struct deflate_codewords { u32 litlen[DEFLATE_NUM_LITLEN_SYMS]; u32 offset[DEFLATE_NUM_OFFSET_SYMS]; }; /* Codeword lengths (in bits) for the DEFLATE Huffman codes. * A zero length means the corresponding symbol had zero frequency. */ struct deflate_lens { u8 litlen[DEFLATE_NUM_LITLEN_SYMS]; u8 offset[DEFLATE_NUM_OFFSET_SYMS]; }; /* Codewords and lengths for the DEFLATE Huffman codes. */ struct deflate_codes { struct deflate_codewords codewords; struct deflate_lens lens; }; /* Symbol frequency counters for the DEFLATE Huffman codes. */ struct deflate_freqs { u32 litlen[DEFLATE_NUM_LITLEN_SYMS]; u32 offset[DEFLATE_NUM_OFFSET_SYMS]; }; #if SUPPORT_NEAR_OPTIMAL_PARSING /* Costs for the near-optimal parsing algorithm. */ struct deflate_costs { /* The cost to output each possible literal. */ u32 literal[DEFLATE_NUM_LITERALS]; /* The cost to output each possible match length. */ u32 length[DEFLATE_MAX_MATCH_LEN + 1]; /* The cost to output a match offset of each possible offset slot. */ u32 offset_slot[DEFLATE_NUM_OFFSET_SYMS]; }; /* * COST_SHIFT is a scaling factor that makes it possible to consider fractional * bit costs. A token requiring 'n' bits to represent has cost n << COST_SHIFT. * * Note: this is only useful as a statistical trick for when the true costs are * unknown. In reality, each token in DEFLATE requires a whole number of bits * to output. */ #define COST_SHIFT 3 /* * The NOSTAT_BITS value for a given alphabet is the number of bits assumed to * be needed to output a symbol that was unused in the previous optimization * pass. Assigning a default cost allows the symbol to be used in the next * optimization pass. However, the cost should be relatively high because the * symbol probably won't be used very many times (if at all). */ #define LITERAL_NOSTAT_BITS 13 #define LENGTH_NOSTAT_BITS 13 #define OFFSET_NOSTAT_BITS 10 #endif /* SUPPORT_NEAR_OPTIMAL_PARSING */ /* * Represents a run of literals followed by a match or end-of-block. This * struct is needed to temporarily store items chosen by the parser, since items * cannot be written until all items for the block have been chosen and the * block's Huffman codes have been computed. */ struct deflate_sequence { /* Bits 0..22: the number of literals in this run. This may be 0 and * can be at most about SOFT_MAX_BLOCK_LENGTH. The literals are not * stored explicitly in this structure; instead, they are read directly * from the uncompressed data. * * Bits 23..31: the length of the match which follows the literals, or 0 * if this literal run was the last in the block, so there is no match * which follows it. */ u32 litrunlen_and_length; /* If 'length' doesn't indicate end-of-block, then this is the offset of * the match which follows the literals. */ u16 offset; /* If 'length' doesn't indicate end-of-block, then this is the offset * symbol of the match which follows the literals. */ u8 offset_symbol; /* If 'length' doesn't indicate end-of-block, then this is the length * slot of the match which follows the literals. */ u8 length_slot; }; #if SUPPORT_NEAR_OPTIMAL_PARSING /* * This structure represents a byte position in the input data and a node in the * graph of possible match/literal choices for the current block. * * Logically, each incoming edge to this node is labeled with a literal or a * match that can be taken to reach this position from an earlier position; and * each outgoing edge from this node is labeled with a literal or a match that * can be taken to advance from this position to a later position. * * But these "edges" are actually stored elsewhere (in 'match_cache'). Here we * associate with each node just two pieces of information: * * 'cost_to_end' is the minimum cost to reach the end of the block from * this position. * * 'item' represents the literal or match that must be chosen from here to * reach the end of the block with the minimum cost. Equivalently, this * can be interpreted as the label of the outgoing edge on the minimum-cost * path to the "end of block" node from this node. */ struct deflate_optimum_node { u32 cost_to_end; /* * Notes on the match/literal representation used here: * * The low bits of 'item' are the length: 1 if this is a literal, * or the match length if this is a match. * * The high bits of 'item' are the actual literal byte if this is a * literal, or the match offset if this is a match. */ #define OPTIMUM_OFFSET_SHIFT 9 #define OPTIMUM_LEN_MASK (((u32)1 << OPTIMUM_OFFSET_SHIFT) - 1) u32 item; }; #endif /* SUPPORT_NEAR_OPTIMAL_PARSING */ /* Block split statistics. See "Block splitting algorithm" below. */ #define NUM_LITERAL_OBSERVATION_TYPES 8 #define NUM_MATCH_OBSERVATION_TYPES 2 #define NUM_OBSERVATION_TYPES (NUM_LITERAL_OBSERVATION_TYPES + NUM_MATCH_OBSERVATION_TYPES) struct block_split_stats { u32 new_observations[NUM_OBSERVATION_TYPES]; u32 observations[NUM_OBSERVATION_TYPES]; u32 num_new_observations; u32 num_observations; }; /* The main DEFLATE compressor structure */ struct libdeflate_compressor { /* Pointer to the compress() implementation chosen at allocation time */ size_t (*impl)(struct libdeflate_compressor *, const u8 *, size_t, u8 *, size_t); /* Frequency counters for the current block */ struct deflate_freqs freqs; /* Dynamic Huffman codes for the current block */ struct deflate_codes codes; /* Static Huffman codes */ struct deflate_codes static_codes; /* Block split statistics for the currently pending block */ struct block_split_stats split_stats; /* A table for fast lookups of offset slot by match offset. * * If the full table is being used, it is a direct mapping from offset * to offset slot. * * If the condensed table is being used, the first 256 entries map * directly to the offset slots of offsets 1 through 256. The next 256 * entries map to the offset slots for the remaining offsets, stepping * through the offsets with a stride of 128. This relies on the fact * that each of the remaining offset slots contains at least 128 offsets * and has an offset base that is a multiple of 128. */ #if USE_FULL_OFFSET_SLOT_FAST u8 offset_slot_fast[DEFLATE_MAX_MATCH_OFFSET + 1]; #else u8 offset_slot_fast[512]; #endif /* The "nice" match length: if a match of this length is found, choose * it immediately without further consideration. */ unsigned nice_match_length; /* The maximum search depth: consider at most this many potential * matches at each position. */ unsigned max_search_depth; /* The compression level with which this compressor was created. */ unsigned compression_level; /* Temporary space for Huffman code output */ u32 precode_freqs[DEFLATE_NUM_PRECODE_SYMS]; u8 precode_lens[DEFLATE_NUM_PRECODE_SYMS]; u32 precode_codewords[DEFLATE_NUM_PRECODE_SYMS]; unsigned precode_items[DEFLATE_NUM_LITLEN_SYMS + DEFLATE_NUM_OFFSET_SYMS]; unsigned num_litlen_syms; unsigned num_offset_syms; unsigned num_explicit_lens; unsigned num_precode_items; union { /* Data for greedy or lazy parsing */ struct { /* Hash chain matchfinder */ struct hc_matchfinder hc_mf; /* The matches and literals that the parser has chosen * for the current block. The required length of this * array is limited by the maximum number of matches * that can ever be chosen for a single block, plus one * for the special entry at the end. */ struct deflate_sequence sequences[ DIV_ROUND_UP(SOFT_MAX_BLOCK_LENGTH, DEFLATE_MIN_MATCH_LEN) + 1]; } g; /* (g)reedy */ #if SUPPORT_NEAR_OPTIMAL_PARSING /* Data for near-optimal parsing */ struct { /* Binary tree matchfinder */ struct bt_matchfinder bt_mf; /* * Cached matches for the current block. This array * contains the matches that were found at each position * in the block. Specifically, for each position, there * is a list of matches found at that position, if any, * sorted by strictly increasing length. In addition, * following the matches for each position, there is a * special 'struct lz_match' whose 'length' member * contains the number of matches found at that * position, and whose 'offset' member contains the * literal at that position. * * Note: in rare cases, there will be a very high number * of matches in the block and this array will overflow. * If this happens, we force the end of the current * block. CACHE_LENGTH is the length at which we * actually check for overflow. The extra slots beyond * this are enough to absorb the worst case overflow, * which occurs if starting at &match_cache[CACHE_LENGTH * - 1], we write MAX_MATCHES_PER_POS matches and a * match count header, then skip searching for matches * at 'DEFLATE_MAX_MATCH_LEN - 1' positions and write * the match count header for each. */ struct lz_match match_cache[CACHE_LENGTH + MAX_MATCHES_PER_POS + DEFLATE_MAX_MATCH_LEN - 1]; /* * Array of nodes, one per position, for running the * minimum-cost path algorithm. * * This array must be large enough to accommodate the * worst-case number of nodes, which occurs if we find a * match of length DEFLATE_MAX_MATCH_LEN at position * SOFT_MAX_BLOCK_LENGTH - 1, producing a block of * length SOFT_MAX_BLOCK_LENGTH - 1 + * DEFLATE_MAX_MATCH_LEN. Add one for the end-of-block * node. */ struct deflate_optimum_node optimum_nodes[SOFT_MAX_BLOCK_LENGTH - 1 + DEFLATE_MAX_MATCH_LEN + 1]; /* The current cost model being used. */ struct deflate_costs costs; unsigned num_optim_passes; } n; /* (n)ear-optimal */ #endif /* SUPPORT_NEAR_OPTIMAL_PARSING */ } p; /* (p)arser */ }; /* * The type for the bitbuffer variable, which temporarily holds bits that are * being packed into bytes and written to the output buffer. For best * performance, this should have size equal to a machine word. */ typedef machine_word_t bitbuf_t; #define BITBUF_NBITS (8 * sizeof(bitbuf_t)) /* Can the specified number of bits always be added to 'bitbuf' after any * pending bytes have been flushed? */ #define CAN_BUFFER(n) ((n) <= BITBUF_NBITS - 7) /* * Structure to keep track of the current state of sending bits to the * compressed output buffer. */ struct deflate_output_bitstream { /* Bits that haven't yet been written to the output buffer. */ bitbuf_t bitbuf; /* Number of bits currently held in @bitbuf. */ unsigned bitcount; /* Pointer to the beginning of the output buffer. */ u8 *begin; /* Pointer to the position in the output buffer at which the next byte * should be written. */ u8 *next; /* Pointer just past the end of the output buffer. */ u8 *end; }; /* * OUTPUT_END_PADDING is the size, in bytes, of the extra space that must be * present following os->end, in order to not overrun the buffer when generating * output. When UNALIGNED_ACCESS_IS_FAST, we need at least sizeof(bitbuf_t) * bytes for put_unaligned_leword(). Otherwise we need only 1 byte. However, * to make the compression algorithm produce the same result on all CPU * architectures (which is sometimes desirable), we have to unconditionally use * the maximum for any CPU, which is sizeof(bitbuf_t) == 8. */ #define OUTPUT_END_PADDING 8 /* Initialize the output bitstream. 'size' is assumed to be at least * OUTPUT_END_PADDING. */ static void deflate_init_output(struct deflate_output_bitstream *os, void *buffer, size_t size) { os->bitbuf = 0; os->bitcount = 0; os->begin = buffer; os->next = os->begin; os->end = os->begin + size - OUTPUT_END_PADDING; } /* Add some bits to the bitbuffer variable of the output bitstream. The caller * must make sure there is enough room. */ static forceinline void deflate_add_bits(struct deflate_output_bitstream *os, const bitbuf_t bits, const unsigned num_bits) { os->bitbuf |= bits << os->bitcount; os->bitcount += num_bits; } /* Flush bits from the bitbuffer variable to the output buffer. */ static forceinline void deflate_flush_bits(struct deflate_output_bitstream *os) { if (UNALIGNED_ACCESS_IS_FAST) { /* Flush a whole word (branchlessly). */ put_unaligned_leword(os->bitbuf, os->next); os->bitbuf >>= os->bitcount & ~7; os->next += MIN(os->end - os->next, os->bitcount >> 3); os->bitcount &= 7; } else { /* Flush a byte at a time. */ while (os->bitcount >= 8) { *os->next = os->bitbuf; if (os->next != os->end) os->next++; os->bitcount -= 8; os->bitbuf >>= 8; } } } /* Align the bitstream on a byte boundary. */ static forceinline void deflate_align_bitstream(struct deflate_output_bitstream *os) { os->bitcount += -os->bitcount & 7; deflate_flush_bits(os); } /* * Flush any remaining bits to the output buffer if needed. Return the total * number of bytes written to the output buffer, or 0 if an overflow occurred. */ static size_t deflate_flush_output(struct deflate_output_bitstream *os) { if (os->next == os->end) /* overflow? */ return 0; while ((int)os->bitcount > 0) { *os->next++ = os->bitbuf; os->bitcount -= 8; os->bitbuf >>= 8; } return os->next - os->begin; } /* Given the binary tree node A[subtree_idx] whose children already * satisfy the maxheap property, swap the node with its greater child * until it is greater than both its children, so that the maxheap * property is satisfied in the subtree rooted at A[subtree_idx]. */ static void heapify_subtree(u32 A[], unsigned length, unsigned subtree_idx) { unsigned parent_idx; unsigned child_idx; u32 v; v = A[subtree_idx]; parent_idx = subtree_idx; while ((child_idx = parent_idx * 2) <= length) { if (child_idx < length && A[child_idx + 1] > A[child_idx]) child_idx++; if (v >= A[child_idx]) break; A[parent_idx] = A[child_idx]; parent_idx = child_idx; } A[parent_idx] = v; } /* Rearrange the array 'A' so that it satisfies the maxheap property. * 'A' uses 1-based indices, so the children of A[i] are A[i*2] and A[i*2 + 1]. */ static void heapify_array(u32 A[], unsigned length) { unsigned subtree_idx; for (subtree_idx = length / 2; subtree_idx >= 1; subtree_idx--) heapify_subtree(A, length, subtree_idx); } /* * Sort the array 'A', which contains 'length' unsigned 32-bit integers. * * Note: name this function heap_sort() instead of heapsort() to avoid colliding * with heapsort() from stdlib.h on BSD-derived systems --- though this isn't * necessary when compiling with -D_ANSI_SOURCE, which is the better solution. */ static void heap_sort(u32 A[], unsigned length) { A--; /* Use 1-based indices */ heapify_array(A, length); while (length >= 2) { u32 tmp = A[length]; A[length] = A[1]; A[1] = tmp; length--; heapify_subtree(A, length, 1); } } #define NUM_SYMBOL_BITS 10 #define SYMBOL_MASK ((1 << NUM_SYMBOL_BITS) - 1) #define GET_NUM_COUNTERS(num_syms) ((((num_syms) + 3 / 4) + 3) & ~3) /* * Sort the symbols primarily by frequency and secondarily by symbol * value. Discard symbols with zero frequency and fill in an array with * the remaining symbols, along with their frequencies. The low * NUM_SYMBOL_BITS bits of each array entry will contain the symbol * value, and the remaining bits will contain the frequency. * * @num_syms * Number of symbols in the alphabet. * Can't be greater than (1 << NUM_SYMBOL_BITS). * * @freqs[num_syms] * The frequency of each symbol. * * @lens[num_syms] * An array that eventually will hold the length of each codeword. * This function only fills in the codeword lengths for symbols that * have zero frequency, which are not well defined per se but will * be set to 0. * * @symout[num_syms] * The output array, described above. * * Returns the number of entries in 'symout' that were filled. This is * the number of symbols that have nonzero frequency. */ static unsigned sort_symbols(unsigned num_syms, const u32 freqs[restrict], u8 lens[restrict], u32 symout[restrict]) { unsigned sym; unsigned i; unsigned num_used_syms; unsigned num_counters; unsigned counters[GET_NUM_COUNTERS(DEFLATE_MAX_NUM_SYMS)]; /* We rely on heapsort, but with an added optimization. Since * it's common for most symbol frequencies to be low, we first do * a count sort using a limited number of counters. High * frequencies will be counted in the last counter, and only they * will be sorted with heapsort. * * Note: with more symbols, it is generally beneficial to have more * counters. About 1 counter per 4 symbols seems fast. * * Note: I also tested radix sort, but even for large symbol * counts (> 255) and frequencies bounded at 16 bits (enabling * radix sort by just two base-256 digits), it didn't seem any * faster than the method implemented here. * * Note: I tested the optimized quicksort implementation from * glibc (with indirection overhead removed), but it was only * marginally faster than the simple heapsort implemented here. * * Tests were done with building the codes for LZX. Results may * vary for different compression algorithms...! */ num_counters = GET_NUM_COUNTERS(num_syms); memset(counters, 0, num_counters * sizeof(counters[0])); /* Count the frequencies. */ for (sym = 0; sym < num_syms; sym++) counters[MIN(freqs[sym], num_counters - 1)]++; /* Make the counters cumulative, ignoring the zero-th, which * counted symbols with zero frequency. As a side effect, this * calculates the number of symbols with nonzero frequency. */ num_used_syms = 0; for (i = 1; i < num_counters; i++) { unsigned count = counters[i]; counters[i] = num_used_syms; num_used_syms += count; } /* Sort nonzero-frequency symbols using the counters. At the * same time, set the codeword lengths of zero-frequency symbols * to 0. */ for (sym = 0; sym < num_syms; sym++) { u32 freq = freqs[sym]; if (freq != 0) { symout[counters[MIN(freq, num_counters - 1)]++] = sym | (freq << NUM_SYMBOL_BITS); } else { lens[sym] = 0; } } /* Sort the symbols counted in the last counter. */ heap_sort(symout + counters[num_counters - 2], counters[num_counters - 1] - counters[num_counters - 2]); return num_used_syms; } /* * Build the Huffman tree. * * This is an optimized implementation that * (a) takes advantage of the frequencies being already sorted; * (b) only generates non-leaf nodes, since the non-leaf nodes of a * Huffman tree are sufficient to generate a canonical code; * (c) Only stores parent pointers, not child pointers; * (d) Produces the nodes in the same memory used for input * frequency information. * * Array 'A', which contains 'sym_count' entries, is used for both input * and output. For this function, 'sym_count' must be at least 2. * * For input, the array must contain the frequencies of the symbols, * sorted in increasing order. Specifically, each entry must contain a * frequency left shifted by NUM_SYMBOL_BITS bits. Any data in the low * NUM_SYMBOL_BITS bits of the entries will be ignored by this function. * Although these bits will, in fact, contain the symbols that correspond * to the frequencies, this function is concerned with frequencies only * and keeps the symbols as-is. * * For output, this function will produce the non-leaf nodes of the * Huffman tree. These nodes will be stored in the first (sym_count - 1) * entries of the array. Entry A[sym_count - 2] will represent the root * node. Each other node will contain the zero-based index of its parent * node in 'A', left shifted by NUM_SYMBOL_BITS bits. The low * NUM_SYMBOL_BITS bits of each entry in A will be kept as-is. Again, * note that although these low bits will, in fact, contain a symbol * value, this symbol will have *no relationship* with the Huffman tree * node that happens to occupy the same slot. This is because this * implementation only generates the non-leaf nodes of the tree. */ static void build_tree(u32 A[], unsigned sym_count) { /* Index, in 'A', of next lowest frequency symbol that has not * yet been processed. */ unsigned i = 0; /* Index, in 'A', of next lowest frequency parentless non-leaf * node; or, if equal to 'e', then no such node exists yet. */ unsigned b = 0; /* Index, in 'A', of next node to allocate as a non-leaf. */ unsigned e = 0; do { unsigned m, n; u32 freq_shifted; /* Choose the two next lowest frequency entries. */ if (i != sym_count && (b == e || (A[i] >> NUM_SYMBOL_BITS) <= (A[b] >> NUM_SYMBOL_BITS))) m = i++; else m = b++; if (i != sym_count && (b == e || (A[i] >> NUM_SYMBOL_BITS) <= (A[b] >> NUM_SYMBOL_BITS))) n = i++; else n = b++; /* Allocate a non-leaf node and link the entries to it. * * If we link an entry that we're visiting for the first * time (via index 'i'), then we're actually linking a * leaf node and it will have no effect, since the leaf * will be overwritten with a non-leaf when index 'e' * catches up to it. But it's not any slower to * unconditionally set the parent index. * * We also compute the frequency of the non-leaf node as * the sum of its two children's frequencies. */ freq_shifted = (A[m] & ~SYMBOL_MASK) + (A[n] & ~SYMBOL_MASK); A[m] = (A[m] & SYMBOL_MASK) | (e << NUM_SYMBOL_BITS); A[n] = (A[n] & SYMBOL_MASK) | (e << NUM_SYMBOL_BITS); A[e] = (A[e] & SYMBOL_MASK) | freq_shifted; e++; } while (sym_count - e > 1); /* When just one entry remains, it is a "leaf" that was * linked to some other node. We ignore it, since the * rest of the array contains the non-leaves which we * need. (Note that we're assuming the cases with 0 or 1 * symbols were handled separately.) */ } /* * Given the stripped-down Huffman tree constructed by build_tree(), * determine the number of codewords that should be assigned each * possible length, taking into account the length-limited constraint. * * @A * The array produced by build_tree(), containing parent index * information for the non-leaf nodes of the Huffman tree. Each * entry in this array is a node; a node's parent always has a * greater index than that node itself. This function will * overwrite the parent index information in this array, so * essentially it will destroy the tree. However, the data in the * low NUM_SYMBOL_BITS of each entry will be preserved. * * @root_idx * The 0-based index of the root node in 'A', and consequently one * less than the number of tree node entries in 'A'. (Or, really 2 * less than the actual length of 'A'.) * * @len_counts * An array of length ('max_codeword_len' + 1) in which the number of * codewords having each length <= max_codeword_len will be * returned. * * @max_codeword_len * The maximum permissible codeword length. */ static void compute_length_counts(u32 A[restrict], unsigned root_idx, unsigned len_counts[restrict], unsigned max_codeword_len) { unsigned len; int node; /* The key observations are: * * (1) We can traverse the non-leaf nodes of the tree, always * visiting a parent before its children, by simply iterating * through the array in reverse order. Consequently, we can * compute the depth of each node in one pass, overwriting the * parent indices with depths. * * (2) We can initially assume that in the real Huffman tree, * both children of the root are leaves. This corresponds to two * codewords of length 1. Then, whenever we visit a (non-leaf) * node during the traversal, we modify this assumption to * account for the current node *not* being a leaf, but rather * its two children being leaves. This causes the loss of one * codeword for the current depth and the addition of two * codewords for the current depth plus one. * * (3) We can handle the length-limited constraint fairly easily * by simply using the largest length available when a depth * exceeds max_codeword_len. */ for (len = 0; len <= max_codeword_len; len++) len_counts[len] = 0; len_counts[1] = 2; /* Set the root node's depth to 0. */ A[root_idx] &= SYMBOL_MASK; for (node = root_idx - 1; node >= 0; node--) { /* Calculate the depth of this node. */ unsigned parent = A[node] >> NUM_SYMBOL_BITS; unsigned parent_depth = A[parent] >> NUM_SYMBOL_BITS; unsigned depth = parent_depth + 1; unsigned len = depth; /* Set the depth of this node so that it is available * when its children (if any) are processed. */ A[node] = (A[node] & SYMBOL_MASK) | (depth << NUM_SYMBOL_BITS); /* If needed, decrease the length to meet the * length-limited constraint. This is not the optimal * method for generating length-limited Huffman codes! * But it should be good enough. */ if (len >= max_codeword_len) { len = max_codeword_len; do { len--; } while (len_counts[len] == 0); } /* Account for the fact that we have a non-leaf node at * the current depth. */ len_counts[len]--; len_counts[len + 1] += 2; } } /* * Generate the codewords for a canonical Huffman code. * * @A * The output array for codewords. In addition, initially this * array must contain the symbols, sorted primarily by frequency and * secondarily by symbol value, in the low NUM_SYMBOL_BITS bits of * each entry. * * @len * Output array for codeword lengths. * * @len_counts * An array that provides the number of codewords that will have * each possible length <= max_codeword_len. * * @max_codeword_len * Maximum length, in bits, of each codeword. * * @num_syms * Number of symbols in the alphabet, including symbols with zero * frequency. This is the length of the 'A' and 'len' arrays. */ static void gen_codewords(u32 A[restrict], u8 lens[restrict], const unsigned len_counts[restrict], unsigned max_codeword_len, unsigned num_syms) { u32 next_codewords[DEFLATE_MAX_CODEWORD_LEN + 1]; unsigned i; unsigned len; unsigned sym; /* Given the number of codewords that will have each length, * assign codeword lengths to symbols. We do this by assigning * the lengths in decreasing order to the symbols sorted * primarily by increasing frequency and secondarily by * increasing symbol value. */ for (i = 0, len = max_codeword_len; len >= 1; len--) { unsigned count = len_counts[len]; while (count--) lens[A[i++] & SYMBOL_MASK] = len; } /* Generate the codewords themselves. We initialize the * 'next_codewords' array to provide the lexicographically first * codeword of each length, then assign codewords in symbol * order. This produces a canonical code. */ next_codewords[0] = 0; next_codewords[1] = 0; for (len = 2; len <= max_codeword_len; len++) next_codewords[len] = (next_codewords[len - 1] + len_counts[len - 1]) << 1; for (sym = 0; sym < num_syms; sym++) A[sym] = next_codewords[lens[sym]]++; } /* * --------------------------------------------------------------------- * make_canonical_huffman_code() * --------------------------------------------------------------------- * * Given an alphabet and the frequency of each symbol in it, construct a * length-limited canonical Huffman code. * * @num_syms * The number of symbols in the alphabet. The symbols are the * integers in the range [0, num_syms - 1]. This parameter must be * at least 2 and can't be greater than (1 << NUM_SYMBOL_BITS). * * @max_codeword_len * The maximum permissible codeword length. * * @freqs * An array of @num_syms entries, each of which specifies the * frequency of the corresponding symbol. It is valid for some, * none, or all of the frequencies to be 0. * * @lens * An array of @num_syms entries in which this function will return * the length, in bits, of the codeword assigned to each symbol. * Symbols with 0 frequency will not have codewords per se, but * their entries in this array will be set to 0. No lengths greater * than @max_codeword_len will be assigned. * * @codewords * An array of @num_syms entries in which this function will return * the codeword for each symbol, right-justified and padded on the * left with zeroes. Codewords for symbols with 0 frequency will be * undefined. * * --------------------------------------------------------------------- * * This function builds a length-limited canonical Huffman code. * * A length-limited Huffman code contains no codewords longer than some * specified length, and has exactly (with some algorithms) or * approximately (with the algorithm used here) the minimum weighted path * length from the root, given this constraint. * * A canonical Huffman code satisfies the properties that a longer * codeword never lexicographically precedes a shorter codeword, and the * lexicographic ordering of codewords of the same length is the same as * the lexicographic ordering of the corresponding symbols. A canonical * Huffman code, or more generally a canonical prefix code, can be * reconstructed from only a list containing the codeword length of each * symbol. * * The classic algorithm to generate a Huffman code creates a node for * each symbol, then inserts these nodes into a min-heap keyed by symbol * frequency. Then, repeatedly, the two lowest-frequency nodes are * removed from the min-heap and added as the children of a new node * having frequency equal to the sum of its two children, which is then * inserted into the min-heap. When only a single node remains in the * min-heap, it is the root of the Huffman tree. The codeword for each * symbol is determined by the path needed to reach the corresponding * node from the root. Descending to the left child appends a 0 bit, * whereas descending to the right child appends a 1 bit. * * The classic algorithm is relatively easy to understand, but it is * subject to a number of inefficiencies. In practice, it is fastest to * first sort the symbols by frequency. (This itself can be subject to * an optimization based on the fact that most frequencies tend to be * low.) At the same time, we sort secondarily by symbol value, which * aids the process of generating a canonical code. Then, during tree * construction, no heap is necessary because both the leaf nodes and the * unparented non-leaf nodes can be easily maintained in sorted order. * Consequently, there can never be more than two possibilities for the * next-lowest-frequency node. * * In addition, because we're generating a canonical code, we actually * don't need the leaf nodes of the tree at all, only the non-leaf nodes. * This is because for canonical code generation we don't need to know * where the symbols are in the tree. Rather, we only need to know how * many leaf nodes have each depth (codeword length). And this * information can, in fact, be quickly generated from the tree of * non-leaves only. * * Furthermore, we can build this stripped-down Huffman tree directly in * the array in which the codewords are to be generated, provided that * these array slots are large enough to hold a symbol and frequency * value. * * Still furthermore, we don't even need to maintain explicit child * pointers. We only need the parent pointers, and even those can be * overwritten in-place with depth information as part of the process of * extracting codeword lengths from the tree. So in summary, we do NOT * need a big structure like: * * struct huffman_tree_node { * unsigned int symbol; * unsigned int frequency; * unsigned int depth; * struct huffman_tree_node *left_child; * struct huffman_tree_node *right_child; * }; * * * ... which often gets used in "naive" implementations of Huffman code * generation. * * Many of these optimizations are based on the implementation in 7-Zip * (source file: C/HuffEnc.c), which has been placed in the public domain * by Igor Pavlov. */ static void make_canonical_huffman_code(unsigned num_syms, unsigned max_codeword_len, const u32 freqs[restrict], u8 lens[restrict], u32 codewords[restrict]) { u32 *A = codewords; unsigned num_used_syms; STATIC_ASSERT(DEFLATE_MAX_NUM_SYMS <= 1 << NUM_SYMBOL_BITS); /* We begin by sorting the symbols primarily by frequency and * secondarily by symbol value. As an optimization, the array * used for this purpose ('A') shares storage with the space in * which we will eventually return the codewords. */ num_used_syms = sort_symbols(num_syms, freqs, lens, A); /* 'num_used_syms' is the number of symbols with nonzero * frequency. This may be less than @num_syms. 'num_used_syms' * is also the number of entries in 'A' that are valid. Each * entry consists of a distinct symbol and a nonzero frequency * packed into a 32-bit integer. */ /* Handle special cases where only 0 or 1 symbols were used (had * nonzero frequency). */ if (unlikely(num_used_syms == 0)) { /* Code is empty. sort_symbols() already set all lengths * to 0, so there is nothing more to do. */ return; } if (unlikely(num_used_syms == 1)) { /* Only one symbol was used, so we only need one * codeword. But two codewords are needed to form the * smallest complete Huffman code, which uses codewords 0 * and 1. Therefore, we choose another symbol to which * to assign a codeword. We use 0 (if the used symbol is * not 0) or 1 (if the used symbol is 0). In either * case, the lesser-valued symbol must be assigned * codeword 0 so that the resulting code is canonical. */ unsigned sym = A[0] & SYMBOL_MASK; unsigned nonzero_idx = sym ? sym : 1; codewords[0] = 0; lens[0] = 1; codewords[nonzero_idx] = 1; lens[nonzero_idx] = 1; return; } /* Build a stripped-down version of the Huffman tree, sharing the * array 'A' with the symbol values. Then extract length counts * from the tree and use them to generate the final codewords. */ build_tree(A, num_used_syms); { unsigned len_counts[DEFLATE_MAX_CODEWORD_LEN + 1]; compute_length_counts(A, num_used_syms - 2, len_counts, max_codeword_len); gen_codewords(A, lens, len_counts, max_codeword_len, num_syms); } } /* * Clear the Huffman symbol frequency counters. * This must be called when starting a new DEFLATE block. */ static void deflate_reset_symbol_frequencies(struct libdeflate_compressor *c) { memset(&c->freqs, 0, sizeof(c->freqs)); } /* Reverse the Huffman codeword 'codeword', which is 'len' bits in length. */ static u32 deflate_reverse_codeword(u32 codeword, u8 len) { /* The following branchless algorithm is faster than going bit by bit. * Note: since no codewords are longer than 16 bits, we only need to * reverse the low 16 bits of the 'u32'. */ STATIC_ASSERT(DEFLATE_MAX_CODEWORD_LEN <= 16); /* Flip adjacent 1-bit fields */ codeword = ((codeword & 0x5555) << 1) | ((codeword & 0xAAAA) >> 1); /* Flip adjacent 2-bit fields */ codeword = ((codeword & 0x3333) << 2) | ((codeword & 0xCCCC) >> 2); /* Flip adjacent 4-bit fields */ codeword = ((codeword & 0x0F0F) << 4) | ((codeword & 0xF0F0) >> 4); /* Flip adjacent 8-bit fields */ codeword = ((codeword & 0x00FF) << 8) | ((codeword & 0xFF00) >> 8); /* Return the high 'len' bits of the bit-reversed 16 bit value. */ return codeword >> (16 - len); } /* Make a canonical Huffman code with bit-reversed codewords. */ static void deflate_make_huffman_code(unsigned num_syms, unsigned max_codeword_len, const u32 freqs[], u8 lens[], u32 codewords[]) { unsigned sym; make_canonical_huffman_code(num_syms, max_codeword_len, freqs, lens, codewords); for (sym = 0; sym < num_syms; sym++) codewords[sym] = deflate_reverse_codeword(codewords[sym], lens[sym]); } /* * Build the literal/length and offset Huffman codes for a DEFLATE block. * * This takes as input the frequency tables for each code and produces as output * a set of tables that map symbols to codewords and codeword lengths. */ static void deflate_make_huffman_codes(const struct deflate_freqs *freqs, struct deflate_codes *codes) { STATIC_ASSERT(MAX_LITLEN_CODEWORD_LEN <= DEFLATE_MAX_LITLEN_CODEWORD_LEN); STATIC_ASSERT(MAX_OFFSET_CODEWORD_LEN <= DEFLATE_MAX_OFFSET_CODEWORD_LEN); deflate_make_huffman_code(DEFLATE_NUM_LITLEN_SYMS, MAX_LITLEN_CODEWORD_LEN, freqs->litlen, codes->lens.litlen, codes->codewords.litlen); deflate_make_huffman_code(DEFLATE_NUM_OFFSET_SYMS, MAX_OFFSET_CODEWORD_LEN, freqs->offset, codes->lens.offset, codes->codewords.offset); } /* Initialize c->static_codes. */ static void deflate_init_static_codes(struct libdeflate_compressor *c) { unsigned i; for (i = 0; i < 144; i++) c->freqs.litlen[i] = 1 << (9 - 8); for (; i < 256; i++) c->freqs.litlen[i] = 1 << (9 - 9); for (; i < 280; i++) c->freqs.litlen[i] = 1 << (9 - 7); for (; i < 288; i++) c->freqs.litlen[i] = 1 << (9 - 8); for (i = 0; i < 32; i++) c->freqs.offset[i] = 1 << (5 - 5); deflate_make_huffman_codes(&c->freqs, &c->static_codes); } /* Return the offset slot for the specified match offset. */ static forceinline unsigned deflate_get_offset_slot(struct libdeflate_compressor *c, unsigned offset) { #if USE_FULL_OFFSET_SLOT_FAST return c->offset_slot_fast[offset]; #else if (offset <= 256) return c->offset_slot_fast[offset - 1]; else return c->offset_slot_fast[256 + ((offset - 1) >> 7)]; #endif } /* Write the header fields common to all DEFLATE block types. */ static void deflate_write_block_header(struct deflate_output_bitstream *os, bool is_final_block, unsigned block_type) { deflate_add_bits(os, is_final_block, 1); deflate_add_bits(os, block_type, 2); deflate_flush_bits(os); } static unsigned deflate_compute_precode_items(const u8 lens[restrict], const unsigned num_lens, u32 precode_freqs[restrict], unsigned precode_items[restrict]) { unsigned *itemptr; unsigned run_start; unsigned run_end; unsigned extra_bits; u8 len; memset(precode_freqs, 0, DEFLATE_NUM_PRECODE_SYMS * sizeof(precode_freqs[0])); itemptr = precode_items; run_start = 0; do { /* Find the next run of codeword lengths. */ /* len = the length being repeated */ len = lens[run_start]; /* Extend the run. */ run_end = run_start; do { run_end++; } while (run_end != num_lens && len == lens[run_end]); if (len == 0) { /* Run of zeroes. */ /* Symbol 18: RLE 11 to 138 zeroes at a time. */ while ((run_end - run_start) >= 11) { extra_bits = MIN((run_end - run_start) - 11, 0x7F); precode_freqs[18]++; *itemptr++ = 18 | (extra_bits << 5); run_start += 11 + extra_bits; } /* Symbol 17: RLE 3 to 10 zeroes at a time. */ if ((run_end - run_start) >= 3) { extra_bits = MIN((run_end - run_start) - 3, 0x7); precode_freqs[17]++; *itemptr++ = 17 | (extra_bits << 5); run_start += 3 + extra_bits; } } else { /* A run of nonzero lengths. */ /* Symbol 16: RLE 3 to 6 of the previous length. */ if ((run_end - run_start) >= 4) { precode_freqs[len]++; *itemptr++ = len; run_start++; do { extra_bits = MIN((run_end - run_start) - 3, 0x3); precode_freqs[16]++; *itemptr++ = 16 | (extra_bits << 5); run_start += 3 + extra_bits; } while ((run_end - run_start) >= 3); } } /* Output any remaining lengths without RLE. */ while (run_start != run_end) { precode_freqs[len]++; *itemptr++ = len; run_start++; } } while (run_start != num_lens); return itemptr - precode_items; } /* * Huffman codeword lengths for dynamic Huffman blocks are compressed using a * separate Huffman code, the "precode", which contains a symbol for each * possible codeword length in the larger code as well as several special * symbols to represent repeated codeword lengths (a form of run-length * encoding). The precode is itself constructed in canonical form, and its * codeword lengths are represented literally in 19 3-bit fields that * immediately precede the compressed codeword lengths of the larger code. */ /* Precompute the information needed to output Huffman codes. */ static void deflate_precompute_huffman_header(struct libdeflate_compressor *c) { /* Compute how many litlen and offset symbols are needed. */ for (c->num_litlen_syms = DEFLATE_NUM_LITLEN_SYMS; c->num_litlen_syms > 257; c->num_litlen_syms--) if (c->codes.lens.litlen[c->num_litlen_syms - 1] != 0) break; for (c->num_offset_syms = DEFLATE_NUM_OFFSET_SYMS; c->num_offset_syms > 1; c->num_offset_syms--) if (c->codes.lens.offset[c->num_offset_syms - 1] != 0) break; /* If we're not using the full set of literal/length codeword lengths, * then temporarily move the offset codeword lengths over so that the * literal/length and offset codeword lengths are contiguous. */ STATIC_ASSERT(offsetof(struct deflate_lens, offset) == DEFLATE_NUM_LITLEN_SYMS); if (c->num_litlen_syms != DEFLATE_NUM_LITLEN_SYMS) { memmove((u8 *)&c->codes.lens + c->num_litlen_syms, (u8 *)&c->codes.lens + DEFLATE_NUM_LITLEN_SYMS, c->num_offset_syms); } /* Compute the "items" (RLE / literal tokens and extra bits) with which * the codeword lengths in the larger code will be output. */ c->num_precode_items = deflate_compute_precode_items((u8 *)&c->codes.lens, c->num_litlen_syms + c->num_offset_syms, c->precode_freqs, c->precode_items); /* Build the precode. */ STATIC_ASSERT(MAX_PRE_CODEWORD_LEN <= DEFLATE_MAX_PRE_CODEWORD_LEN); deflate_make_huffman_code(DEFLATE_NUM_PRECODE_SYMS, MAX_PRE_CODEWORD_LEN, c->precode_freqs, c->precode_lens, c->precode_codewords); /* Count how many precode lengths we actually need to output. */ for (c->num_explicit_lens = DEFLATE_NUM_PRECODE_SYMS; c->num_explicit_lens > 4; c->num_explicit_lens--) if (c->precode_lens[deflate_precode_lens_permutation[ c->num_explicit_lens - 1]] != 0) break; /* Restore the offset codeword lengths if needed. */ if (c->num_litlen_syms != DEFLATE_NUM_LITLEN_SYMS) { memmove((u8 *)&c->codes.lens + DEFLATE_NUM_LITLEN_SYMS, (u8 *)&c->codes.lens + c->num_litlen_syms, c->num_offset_syms); } } /* Output the Huffman codes. */ static void deflate_write_huffman_header(struct libdeflate_compressor *c, struct deflate_output_bitstream *os) { unsigned i; deflate_add_bits(os, c->num_litlen_syms - 257, 5); deflate_add_bits(os, c->num_offset_syms - 1, 5); deflate_add_bits(os, c->num_explicit_lens - 4, 4); deflate_flush_bits(os); /* Output the lengths of the codewords in the precode. */ for (i = 0; i < c->num_explicit_lens; i++) { deflate_add_bits(os, c->precode_lens[ deflate_precode_lens_permutation[i]], 3); deflate_flush_bits(os); } /* Output the encoded lengths of the codewords in the larger code. */ for (i = 0; i < c->num_precode_items; i++) { unsigned precode_item = c->precode_items[i]; unsigned precode_sym = precode_item & 0x1F; deflate_add_bits(os, c->precode_codewords[precode_sym], c->precode_lens[precode_sym]); if (precode_sym >= 16) { if (precode_sym == 16) deflate_add_bits(os, precode_item >> 5, 2); else if (precode_sym == 17) deflate_add_bits(os, precode_item >> 5, 3); else deflate_add_bits(os, precode_item >> 5, 7); } STATIC_ASSERT(CAN_BUFFER(DEFLATE_MAX_PRE_CODEWORD_LEN + 7)); deflate_flush_bits(os); } } static void deflate_write_sequences(struct deflate_output_bitstream * restrict os, const struct deflate_codes * restrict codes, const struct deflate_sequence sequences[restrict], const u8 * restrict in_next) { const struct deflate_sequence *seq = sequences; for (;;) { u32 litrunlen = seq->litrunlen_and_length & 0x7FFFFF; unsigned length = seq->litrunlen_and_length >> 23; unsigned length_slot; unsigned litlen_symbol; unsigned offset_symbol; if (litrunlen) { #if 1 while (litrunlen >= 4) { unsigned lit0 = in_next[0]; unsigned lit1 = in_next[1]; unsigned lit2 = in_next[2]; unsigned lit3 = in_next[3]; deflate_add_bits(os, codes->codewords.litlen[lit0], codes->lens.litlen[lit0]); if (!CAN_BUFFER(2 * MAX_LITLEN_CODEWORD_LEN)) deflate_flush_bits(os); deflate_add_bits(os, codes->codewords.litlen[lit1], codes->lens.litlen[lit1]); if (!CAN_BUFFER(4 * MAX_LITLEN_CODEWORD_LEN)) deflate_flush_bits(os); deflate_add_bits(os, codes->codewords.litlen[lit2], codes->lens.litlen[lit2]); if (!CAN_BUFFER(2 * MAX_LITLEN_CODEWORD_LEN)) deflate_flush_bits(os); deflate_add_bits(os, codes->codewords.litlen[lit3], codes->lens.litlen[lit3]); deflate_flush_bits(os); in_next += 4; litrunlen -= 4; } if (litrunlen-- != 0) { deflate_add_bits(os, codes->codewords.litlen[*in_next], codes->lens.litlen[*in_next]); if (!CAN_BUFFER(3 * MAX_LITLEN_CODEWORD_LEN)) deflate_flush_bits(os); in_next++; if (litrunlen-- != 0) { deflate_add_bits(os, codes->codewords.litlen[*in_next], codes->lens.litlen[*in_next]); if (!CAN_BUFFER(3 * MAX_LITLEN_CODEWORD_LEN)) deflate_flush_bits(os); in_next++; if (litrunlen-- != 0) { deflate_add_bits(os, codes->codewords.litlen[*in_next], codes->lens.litlen[*in_next]); if (!CAN_BUFFER(3 * MAX_LITLEN_CODEWORD_LEN)) deflate_flush_bits(os); in_next++; } } if (CAN_BUFFER(3 * MAX_LITLEN_CODEWORD_LEN)) deflate_flush_bits(os); } #else do { unsigned lit = *in_next++; deflate_add_bits(os, codes->codewords.litlen[lit], codes->lens.litlen[lit]); deflate_flush_bits(os); } while (--litrunlen); #endif } if (length == 0) return; in_next += length; length_slot = seq->length_slot; litlen_symbol = 257 + length_slot; /* Litlen symbol */ deflate_add_bits(os, codes->codewords.litlen[litlen_symbol], codes->lens.litlen[litlen_symbol]); /* Extra length bits */ STATIC_ASSERT(CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN + DEFLATE_MAX_EXTRA_LENGTH_BITS)); deflate_add_bits(os, length - deflate_length_slot_base[length_slot], deflate_extra_length_bits[length_slot]); if (!CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN + DEFLATE_MAX_EXTRA_LENGTH_BITS + MAX_OFFSET_CODEWORD_LEN + DEFLATE_MAX_EXTRA_OFFSET_BITS)) deflate_flush_bits(os); /* Offset symbol */ offset_symbol = seq->offset_symbol; deflate_add_bits(os, codes->codewords.offset[offset_symbol], codes->lens.offset[offset_symbol]); if (!CAN_BUFFER(MAX_OFFSET_CODEWORD_LEN + DEFLATE_MAX_EXTRA_OFFSET_BITS)) deflate_flush_bits(os); /* Extra offset bits */ deflate_add_bits(os, seq->offset - deflate_offset_slot_base[offset_symbol], deflate_extra_offset_bits[offset_symbol]); deflate_flush_bits(os); seq++; } } #if SUPPORT_NEAR_OPTIMAL_PARSING /* * Follow the minimum-cost path in the graph of possible match/literal choices * for the current block and write out the matches/literals using the specified * Huffman codes. * * Note: this is slightly duplicated with deflate_write_sequences(), the reason * being that we don't want to waste time translating between intermediate * match/literal representations. */ static void deflate_write_item_list(struct deflate_output_bitstream *os, const struct deflate_codes *codes, struct libdeflate_compressor *c, u32 block_length) { struct deflate_optimum_node *cur_node = &c->p.n.optimum_nodes[0]; struct deflate_optimum_node * const end_node = &c->p.n.optimum_nodes[block_length]; do { unsigned length = cur_node->item & OPTIMUM_LEN_MASK; unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT; unsigned litlen_symbol; unsigned length_slot; unsigned offset_slot; if (length == 1) { /* Literal */ litlen_symbol = offset; deflate_add_bits(os, codes->codewords.litlen[litlen_symbol], codes->lens.litlen[litlen_symbol]); deflate_flush_bits(os); } else { /* Match length */ length_slot = deflate_length_slot[length]; litlen_symbol = 257 + length_slot; deflate_add_bits(os, codes->codewords.litlen[litlen_symbol], codes->lens.litlen[litlen_symbol]); deflate_add_bits(os, length - deflate_length_slot_base[length_slot], deflate_extra_length_bits[length_slot]); if (!CAN_BUFFER(MAX_LITLEN_CODEWORD_LEN + DEFLATE_MAX_EXTRA_LENGTH_BITS + MAX_OFFSET_CODEWORD_LEN + DEFLATE_MAX_EXTRA_OFFSET_BITS)) deflate_flush_bits(os); /* Match offset */ offset_slot = deflate_get_offset_slot(c, offset); deflate_add_bits(os, codes->codewords.offset[offset_slot], codes->lens.offset[offset_slot]); if (!CAN_BUFFER(MAX_OFFSET_CODEWORD_LEN + DEFLATE_MAX_EXTRA_OFFSET_BITS)) deflate_flush_bits(os); deflate_add_bits(os, offset - deflate_offset_slot_base[offset_slot], deflate_extra_offset_bits[offset_slot]); deflate_flush_bits(os); } cur_node += length; } while (cur_node != end_node); } #endif /* SUPPORT_NEAR_OPTIMAL_PARSING */ /* Output the end-of-block symbol. */ static void deflate_write_end_of_block(struct deflate_output_bitstream *os, const struct deflate_codes *codes) { deflate_add_bits(os, codes->codewords.litlen[DEFLATE_END_OF_BLOCK], codes->lens.litlen[DEFLATE_END_OF_BLOCK]); deflate_flush_bits(os); } static void deflate_write_uncompressed_block(struct deflate_output_bitstream *os, const u8 *data, u16 len, bool is_final_block) { deflate_write_block_header(os, is_final_block, DEFLATE_BLOCKTYPE_UNCOMPRESSED); deflate_align_bitstream(os); if (4 + (u32)len >= os->end - os->next) { os->next = os->end; return; } put_unaligned_le16(len, os->next); os->next += 2; put_unaligned_le16(~len, os->next); os->next += 2; memcpy(os->next, data, len); os->next += len; } static void deflate_write_uncompressed_blocks(struct deflate_output_bitstream *os, const u8 *data, u32 data_length, bool is_final_block) { do { u16 len = MIN(data_length, UINT16_MAX); deflate_write_uncompressed_block(os, data, len, is_final_block && len == data_length); data += len; data_length -= len; } while (data_length != 0); } /* * Choose the best type of block to use (dynamic Huffman, static Huffman, or * uncompressed), then output it. */ static void deflate_flush_block(struct libdeflate_compressor * restrict c, struct deflate_output_bitstream * restrict os, const u8 * restrict block_begin, u32 block_length, bool is_final_block, bool use_item_list) { static const u8 deflate_extra_precode_bits[DEFLATE_NUM_PRECODE_SYMS] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 7, }; /* Costs are measured in bits */ u32 dynamic_cost = 0; u32 static_cost = 0; u32 uncompressed_cost = 0; struct deflate_codes *codes; int block_type; unsigned sym; /* Tally the end-of-block symbol. */ c->freqs.litlen[DEFLATE_END_OF_BLOCK]++; /* Build dynamic Huffman codes. */ deflate_make_huffman_codes(&c->freqs, &c->codes); /* Account for the cost of sending dynamic Huffman codes. */ deflate_precompute_huffman_header(c); dynamic_cost += 5 + 5 + 4 + (3 * c->num_explicit_lens); for (sym = 0; sym < DEFLATE_NUM_PRECODE_SYMS; sym++) { u32 extra = deflate_extra_precode_bits[sym]; dynamic_cost += c->precode_freqs[sym] * (extra + c->precode_lens[sym]); } /* Account for the cost of encoding literals. */ for (sym = 0; sym < 256; sym++) { dynamic_cost += c->freqs.litlen[sym] * c->codes.lens.litlen[sym]; } for (sym = 0; sym < 144; sym++) static_cost += c->freqs.litlen[sym] * 8; for (; sym < 256; sym++) static_cost += c->freqs.litlen[sym] * 9; /* Account for the cost of encoding the end-of-block symbol. */ dynamic_cost += c->codes.lens.litlen[256]; static_cost += 7; /* Account for the cost of encoding lengths. */ for (sym = 257; sym < 257 + ARRAY_LEN(deflate_extra_length_bits); sym++) { u32 extra = deflate_extra_length_bits[sym - 257]; dynamic_cost += c->freqs.litlen[sym] * (extra + c->codes.lens.litlen[sym]); static_cost += c->freqs.litlen[sym] * (extra + c->static_codes.lens.litlen[sym]); } /* Account for the cost of encoding offsets. */ for (sym = 0; sym < ARRAY_LEN(deflate_extra_offset_bits); sym++) { u32 extra = deflate_extra_offset_bits[sym]; dynamic_cost += c->freqs.offset[sym] * (extra + c->codes.lens.offset[sym]); static_cost += c->freqs.offset[sym] * (extra + 5); } /* Compute the cost of using uncompressed blocks. */ uncompressed_cost += (-(os->bitcount + 3) & 7) + 32 + (40 * (DIV_ROUND_UP(block_length, UINT16_MAX) - 1)) + (8 * block_length); /* Choose the cheapest block type. */ if (dynamic_cost < MIN(static_cost, uncompressed_cost)) { block_type = DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN; codes = &c->codes; } else if (static_cost < uncompressed_cost) { block_type = DEFLATE_BLOCKTYPE_STATIC_HUFFMAN; codes = &c->static_codes; } else { block_type = DEFLATE_BLOCKTYPE_UNCOMPRESSED; } /* Now actually output the block. */ if (block_type == DEFLATE_BLOCKTYPE_UNCOMPRESSED) { /* Note: the length being flushed may exceed the maximum length * of an uncompressed block (65535 bytes). Therefore, more than * one uncompressed block might be needed. */ deflate_write_uncompressed_blocks(os, block_begin, block_length, is_final_block); } else { /* Output the block header. */ deflate_write_block_header(os, is_final_block, block_type); /* Output the Huffman codes (dynamic Huffman blocks only). */ if (block_type == DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN) deflate_write_huffman_header(c, os); /* Output the literals, matches, and end-of-block symbol. */ #if SUPPORT_NEAR_OPTIMAL_PARSING if (use_item_list) deflate_write_item_list(os, codes, c, block_length); else #endif deflate_write_sequences(os, codes, c->p.g.sequences, block_begin); deflate_write_end_of_block(os, codes); } } static forceinline void deflate_choose_literal(struct libdeflate_compressor *c, unsigned literal, u32 *litrunlen_p) { c->freqs.litlen[literal]++; ++*litrunlen_p; } static forceinline void deflate_choose_match(struct libdeflate_compressor *c, unsigned length, unsigned offset, u32 *litrunlen_p, struct deflate_sequence **next_seq_p) { struct deflate_sequence *seq = *next_seq_p; unsigned length_slot = deflate_length_slot[length]; unsigned offset_slot = deflate_get_offset_slot(c, offset); c->freqs.litlen[257 + length_slot]++; c->freqs.offset[offset_slot]++; seq->litrunlen_and_length = ((u32)length << 23) | *litrunlen_p; seq->offset = offset; seq->length_slot = length_slot; seq->offset_symbol = offset_slot; *litrunlen_p = 0; *next_seq_p = seq + 1; } static forceinline void deflate_finish_sequence(struct deflate_sequence *seq, u32 litrunlen) { seq->litrunlen_and_length = litrunlen; /* length = 0 */ } /******************************************************************************/ /* * Block splitting algorithm. The problem is to decide when it is worthwhile to * start a new block with new Huffman codes. There is a theoretically optimal * solution: recursively consider every possible block split, considering the * exact cost of each block, and choose the minimum cost approach. But this is * far too slow. Instead, as an approximation, we can count symbols and after * every N symbols, compare the expected distribution of symbols based on the * previous data with the actual distribution. If they differ "by enough", then * start a new block. * * As an optimization and heuristic, we don't distinguish between every symbol * but rather we combine many symbols into a single "observation type". For * literals we only look at the high bits and low bits, and for matches we only * look at whether the match is long or not. The assumption is that for typical * "real" data, places that are good block boundaries will tend to be noticeable * based only on changes in these aggregate frequencies, without looking for * subtle differences in individual symbols. For example, a change from ASCII * bytes to non-ASCII bytes, or from few matches (generally less compressible) * to many matches (generally more compressible), would be easily noticed based * on the aggregates. * * For determining whether the frequency distributions are "different enough" to * start a new block, the simply heuristic of splitting when the sum of absolute * differences exceeds a constant seems to be good enough. We also add a number * proportional to the block length so that the algorithm is more likely to end * long blocks than short blocks. This reflects the general expectation that it * will become increasingly beneficial to start a new block as the current * block grows longer. * * Finally, for an approximation, it is not strictly necessary that the exact * symbols being used are considered. With "near-optimal parsing", for example, * the actual symbols that will be used are unknown until after the block * boundary is chosen and the block has been optimized. Since the final choices * cannot be used, we can use preliminary "greedy" choices instead. */ /* Initialize the block split statistics when starting a new block. */ static void init_block_split_stats(struct block_split_stats *stats) { int i; for (i = 0; i < NUM_OBSERVATION_TYPES; i++) { stats->new_observations[i] = 0; stats->observations[i] = 0; } stats->num_new_observations = 0; stats->num_observations = 0; } /* Literal observation. Heuristic: use the top 2 bits and low 1 bits of the * literal, for 8 possible literal observation types. */ static forceinline void observe_literal(struct block_split_stats *stats, u8 lit) { stats->new_observations[((lit >> 5) & 0x6) | (lit & 1)]++; stats->num_new_observations++; } /* Match observation. Heuristic: use one observation type for "short match" and * one observation type for "long match". */ static forceinline void observe_match(struct block_split_stats *stats, unsigned length) { stats->new_observations[NUM_LITERAL_OBSERVATION_TYPES + (length >= 9)]++; stats->num_new_observations++; } static bool do_end_block_check(struct block_split_stats *stats, u32 block_length) { int i; if (stats->num_observations > 0) { /* Note: to avoid slow divisions, we do not divide by * 'num_observations', but rather do all math with the numbers * multiplied by 'num_observations'. */ u32 total_delta = 0; for (i = 0; i < NUM_OBSERVATION_TYPES; i++) { u32 expected = stats->observations[i] * stats->num_new_observations; u32 actual = stats->new_observations[i] * stats->num_observations; u32 delta = (actual > expected) ? actual - expected : expected - actual; total_delta += delta; } /* Ready to end the block? */ if (total_delta + (block_length / 4096) * stats->num_observations >= NUM_OBSERVATIONS_PER_BLOCK_CHECK * 200 / 512 * stats->num_observations) return true; } for (i = 0; i < NUM_OBSERVATION_TYPES; i++) { stats->num_observations += stats->new_observations[i]; stats->observations[i] += stats->new_observations[i]; stats->new_observations[i] = 0; } stats->num_new_observations = 0; return false; } static forceinline bool should_end_block(struct block_split_stats *stats, const u8 *in_block_begin, const u8 *in_next, const u8 *in_end) { /* Ready to check block split statistics? */ if (stats->num_new_observations < NUM_OBSERVATIONS_PER_BLOCK_CHECK || in_next - in_block_begin < MIN_BLOCK_LENGTH || in_end - in_next < MIN_BLOCK_LENGTH) return false; return do_end_block_check(stats, in_next - in_block_begin); } /******************************************************************************/ /* * This is the "greedy" DEFLATE compressor. It always chooses the longest match. */ static size_t deflate_compress_greedy(struct libdeflate_compressor * restrict c, const u8 * restrict in, size_t in_nbytes, u8 * restrict out, size_t out_nbytes_avail) { const u8 *in_next = in; const u8 *in_end = in_next + in_nbytes; struct deflate_output_bitstream os; const u8 *in_cur_base = in_next; unsigned max_len = DEFLATE_MAX_MATCH_LEN; unsigned nice_len = MIN(c->nice_match_length, max_len); u32 next_hashes[2] = {0, 0}; deflate_init_output(&os, out, out_nbytes_avail); hc_matchfinder_init(&c->p.g.hc_mf); do { /* Starting a new DEFLATE block. */ const u8 * const in_block_begin = in_next; const u8 * const in_max_block_end = in_next + MIN(in_end - in_next, SOFT_MAX_BLOCK_LENGTH); u32 litrunlen = 0; struct deflate_sequence *next_seq = c->p.g.sequences; init_block_split_stats(&c->split_stats); deflate_reset_symbol_frequencies(c); do { u32 length; u32 offset; /* Decrease the maximum and nice match lengths if we're * approaching the end of the input buffer. */ if (unlikely(max_len > in_end - in_next)) { max_len = in_end - in_next; nice_len = MIN(nice_len, max_len); } length = hc_matchfinder_longest_match(&c->p.g.hc_mf, &in_cur_base, in_next, DEFLATE_MIN_MATCH_LEN - 1, max_len, nice_len, c->max_search_depth, next_hashes, &offset); if (length >= DEFLATE_MIN_MATCH_LEN) { /* Match found. */ deflate_choose_match(c, length, offset, &litrunlen, &next_seq); observe_match(&c->split_stats, length); in_next = hc_matchfinder_skip_positions(&c->p.g.hc_mf, &in_cur_base, in_next + 1, in_end, length - 1, next_hashes); } else { /* No match found. */ deflate_choose_literal(c, *in_next, &litrunlen); observe_literal(&c->split_stats, *in_next); in_next++; } /* Check if it's time to output another block. */ } while (in_next < in_max_block_end && !should_end_block(&c->split_stats, in_block_begin, in_next, in_end)); deflate_finish_sequence(next_seq, litrunlen); deflate_flush_block(c, &os, in_block_begin, in_next - in_block_begin, in_next == in_end, false); } while (in_next != in_end); return deflate_flush_output(&os); } /* * This is the "lazy" DEFLATE compressor. Before choosing a match, it checks to * see if there's a longer match at the next position. If yes, it outputs a * literal and continues to the next position. If no, it outputs the match. */ static size_t deflate_compress_lazy(struct libdeflate_compressor * restrict c, const u8 * restrict in, size_t in_nbytes, u8 * restrict out, size_t out_nbytes_avail) { const u8 *in_next = in; const u8 *in_end = in_next + in_nbytes; struct deflate_output_bitstream os; const u8 *in_cur_base = in_next; unsigned max_len = DEFLATE_MAX_MATCH_LEN; unsigned nice_len = MIN(c->nice_match_length, max_len); u32 next_hashes[2] = {0, 0}; deflate_init_output(&os, out, out_nbytes_avail); hc_matchfinder_init(&c->p.g.hc_mf); do { /* Starting a new DEFLATE block. */ const u8 * const in_block_begin = in_next; const u8 * const in_max_block_end = in_next + MIN(in_end - in_next, SOFT_MAX_BLOCK_LENGTH); u32 litrunlen = 0; struct deflate_sequence *next_seq = c->p.g.sequences; init_block_split_stats(&c->split_stats); deflate_reset_symbol_frequencies(c); do { unsigned cur_len; unsigned cur_offset; unsigned next_len; unsigned next_offset; if (unlikely(in_end - in_next < DEFLATE_MAX_MATCH_LEN)) { max_len = in_end - in_next; nice_len = MIN(nice_len, max_len); } /* Find the longest match at the current position. */ cur_len = hc_matchfinder_longest_match(&c->p.g.hc_mf, &in_cur_base, in_next, DEFLATE_MIN_MATCH_LEN - 1, max_len, nice_len, c->max_search_depth, next_hashes, &cur_offset); in_next += 1; if (cur_len < DEFLATE_MIN_MATCH_LEN) { /* No match found. Choose a literal. */ deflate_choose_literal(c, *(in_next - 1), &litrunlen); observe_literal(&c->split_stats, *(in_next - 1)); continue; } have_cur_match: observe_match(&c->split_stats, cur_len); /* We have a match at the current position. */ /* If the current match is very long, choose it * immediately. */ if (cur_len >= nice_len) { deflate_choose_match(c, cur_len, cur_offset, &litrunlen, &next_seq); in_next = hc_matchfinder_skip_positions(&c->p.g.hc_mf, &in_cur_base, in_next, in_end, cur_len - 1, next_hashes); continue; } /* * Try to find a match at the next position. * * Note: since we already have a match at the *current* * position, we use only half the 'max_search_depth' * when checking the *next* position. This is a useful * trade-off because it's more worthwhile to use a * greater search depth on the initial match. * * Note: it's possible to structure the code such that * there's only one call to longest_match(), which * handles both the "find the initial match" and "try to * find a longer match" cases. However, it is faster to * have two call sites, with longest_match() inlined at * each. */ if (unlikely(in_end - in_next < DEFLATE_MAX_MATCH_LEN)) { max_len = in_end - in_next; nice_len = MIN(nice_len, max_len); } next_len = hc_matchfinder_longest_match(&c->p.g.hc_mf, &in_cur_base, in_next, cur_len, max_len, nice_len, c->max_search_depth / 2, next_hashes, &next_offset); in_next += 1; if (next_len > cur_len) { /* Found a longer match at the next position. * Output a literal. Then the next match * becomes the current match. */ deflate_choose_literal(c, *(in_next - 2), &litrunlen); cur_len = next_len; cur_offset = next_offset; goto have_cur_match; } /* No longer match at the next position. * Output the current match. */ deflate_choose_match(c, cur_len, cur_offset, &litrunlen, &next_seq); in_next = hc_matchfinder_skip_positions(&c->p.g.hc_mf, &in_cur_base, in_next, in_end, cur_len - 2, next_hashes); /* Check if it's time to output another block. */ } while (in_next < in_max_block_end && !should_end_block(&c->split_stats, in_block_begin, in_next, in_end)); deflate_finish_sequence(next_seq, litrunlen); deflate_flush_block(c, &os, in_block_begin, in_next - in_block_begin, in_next == in_end, false); } while (in_next != in_end); return deflate_flush_output(&os); } #if SUPPORT_NEAR_OPTIMAL_PARSING /* * Follow the minimum-cost path in the graph of possible match/literal choices * for the current block and compute the frequencies of the Huffman symbols that * would be needed to output those matches and literals. */ static void deflate_tally_item_list(struct libdeflate_compressor *c, u32 block_length) { struct deflate_optimum_node *cur_node = &c->p.n.optimum_nodes[0]; struct deflate_optimum_node *end_node = &c->p.n.optimum_nodes[block_length]; do { unsigned length = cur_node->item & OPTIMUM_LEN_MASK; unsigned offset = cur_node->item >> OPTIMUM_OFFSET_SHIFT; if (length == 1) { /* Literal */ c->freqs.litlen[offset]++; } else { /* Match */ c->freqs.litlen[257 + deflate_length_slot[length]]++; c->freqs.offset[deflate_get_offset_slot(c, offset)]++; } cur_node += length; } while (cur_node != end_node); } /* Set the current cost model from the codeword lengths specified in @lens. */ static void deflate_set_costs_from_codes(struct libdeflate_compressor *c, const struct deflate_lens *lens) { unsigned i; /* Literals */ for (i = 0; i < DEFLATE_NUM_LITERALS; i++) { u32 bits = (lens->litlen[i] ? lens->litlen[i] : LITERAL_NOSTAT_BITS); c->p.n.costs.literal[i] = bits << COST_SHIFT; } /* Lengths */ for (i = DEFLATE_MIN_MATCH_LEN; i <= DEFLATE_MAX_MATCH_LEN; i++) { unsigned length_slot = deflate_length_slot[i]; unsigned litlen_sym = 257 + length_slot; u32 bits = (lens->litlen[litlen_sym] ? lens->litlen[litlen_sym] : LENGTH_NOSTAT_BITS); bits += deflate_extra_length_bits[length_slot]; c->p.n.costs.length[i] = bits << COST_SHIFT; } /* Offset slots */ for (i = 0; i < ARRAY_LEN(deflate_offset_slot_base); i++) { u32 bits = (lens->offset[i] ? lens->offset[i] : OFFSET_NOSTAT_BITS); bits += deflate_extra_offset_bits[i]; c->p.n.costs.offset_slot[i] = bits << COST_SHIFT; } } static forceinline u32 deflate_default_literal_cost(unsigned literal) { STATIC_ASSERT(COST_SHIFT == 3); /* 66 is 8.25 bits/symbol */ return 66; } static forceinline u32 deflate_default_length_slot_cost(unsigned length_slot) { STATIC_ASSERT(COST_SHIFT == 3); /* 60 is 7.5 bits/symbol */ return 60 + ((u32)deflate_extra_length_bits[length_slot] << COST_SHIFT); } static forceinline u32 deflate_default_offset_slot_cost(unsigned offset_slot) { STATIC_ASSERT(COST_SHIFT == 3); /* 39 is 4.875 bits/symbol */ return 39 + ((u32)deflate_extra_offset_bits[offset_slot] << COST_SHIFT); } /* * Set default symbol costs for the first block's first optimization pass. * * It works well to assume that each symbol is equally probable. This results * in each symbol being assigned a cost of (-log2(1.0/num_syms) * (1 << * COST_SHIFT)) where 'num_syms' is the number of symbols in the corresponding * alphabet. However, we intentionally bias the parse towards matches rather * than literals by using a slightly lower default cost for length symbols than * for literals. This often improves the compression ratio slightly. */ static void deflate_set_default_costs(struct libdeflate_compressor *c) { unsigned i; /* Literals */ for (i = 0; i < DEFLATE_NUM_LITERALS; i++) c->p.n.costs.literal[i] = deflate_default_literal_cost(i); /* Lengths */ for (i = DEFLATE_MIN_MATCH_LEN; i <= DEFLATE_MAX_MATCH_LEN; i++) c->p.n.costs.length[i] = deflate_default_length_slot_cost( deflate_length_slot[i]); /* Offset slots */ for (i = 0; i < ARRAY_LEN(deflate_offset_slot_base); i++) c->p.n.costs.offset_slot[i] = deflate_default_offset_slot_cost(i); } static forceinline void deflate_adjust_cost(u32 *cost_p, u32 default_cost) { *cost_p += ((s32)default_cost - (s32)*cost_p) >> 1; } /* * Adjust the costs when beginning a new block. * * Since the current costs have been optimized for the data, it's undesirable to * throw them away and start over with the default costs. At the same time, we * don't want to bias the parse by assuming that the next block will be similar * to the current block. As a compromise, make the costs closer to the * defaults, but don't simply set them to the defaults. */ static void deflate_adjust_costs(struct libdeflate_compressor *c) { unsigned i; /* Literals */ for (i = 0; i < DEFLATE_NUM_LITERALS; i++) deflate_adjust_cost(&c->p.n.costs.literal[i], deflate_default_literal_cost(i)); /* Lengths */ for (i = DEFLATE_MIN_MATCH_LEN; i <= DEFLATE_MAX_MATCH_LEN; i++) deflate_adjust_cost(&c->p.n.costs.length[i], deflate_default_length_slot_cost( deflate_length_slot[i])); /* Offset slots */ for (i = 0; i < ARRAY_LEN(deflate_offset_slot_base); i++) deflate_adjust_cost(&c->p.n.costs.offset_slot[i], deflate_default_offset_slot_cost(i)); } /* * Find the minimum-cost path through the graph of possible match/literal * choices for this block. * * We find the minimum cost path from 'c->p.n.optimum_nodes[0]', which * represents the node at the beginning of the block, to * 'c->p.n.optimum_nodes[block_length]', which represents the node at the end of * the block. Edge costs are evaluated using the cost model 'c->p.n.costs'. * * The algorithm works backwards, starting at the end node and proceeding * backwards one node at a time. At each node, the minimum cost to reach the * end node is computed and the match/literal choice that begins that path is * saved. */ static void deflate_find_min_cost_path(struct libdeflate_compressor *c, const u32 block_length, const struct lz_match *cache_ptr) { struct deflate_optimum_node *end_node = &c->p.n.optimum_nodes[block_length]; struct deflate_optimum_node *cur_node = end_node; cur_node->cost_to_end = 0; do { unsigned num_matches; unsigned literal; u32 best_cost_to_end; cur_node--; cache_ptr--; num_matches = cache_ptr->length; literal = cache_ptr->offset; /* It's always possible to choose a literal. */ best_cost_to_end = c->p.n.costs.literal[literal] + (cur_node + 1)->cost_to_end; cur_node->item = ((u32)literal << OPTIMUM_OFFSET_SHIFT) | 1; /* Also consider matches if there are any. */ if (num_matches) { const struct lz_match *match; unsigned len; unsigned offset; unsigned offset_slot; u32 offset_cost; u32 cost_to_end; /* * Consider each length from the minimum * (DEFLATE_MIN_MATCH_LEN) to the length of the longest * match found at this position. For each length, we * consider only the smallest offset for which that * length is available. Although this is not guaranteed * to be optimal due to the possibility of a larger * offset costing less than a smaller offset to code, * this is a very useful heuristic. */ match = cache_ptr - num_matches; len = DEFLATE_MIN_MATCH_LEN; do { offset = match->offset; offset_slot = deflate_get_offset_slot(c, offset); offset_cost = c->p.n.costs.offset_slot[offset_slot]; do { cost_to_end = offset_cost + c->p.n.costs.length[len] + (cur_node + len)->cost_to_end; if (cost_to_end < best_cost_to_end) { best_cost_to_end = cost_to_end; cur_node->item = ((u32)offset << OPTIMUM_OFFSET_SHIFT) | len; } } while (++len <= match->length); } while (++match != cache_ptr); cache_ptr -= num_matches; } cur_node->cost_to_end = best_cost_to_end; } while (cur_node != &c->p.n.optimum_nodes[0]); } /* * Choose the literal/match sequence to use for the current block. The basic * algorithm finds a minimum-cost path through the block's graph of * literal/match choices, given a cost model. However, the cost of each symbol * is unknown until the Huffman codes have been built, but at the same time the * Huffman codes depend on the frequencies of chosen symbols. Consequently, * multiple passes must be used to try to approximate an optimal solution. The * first pass uses default costs, mixed with the costs from the previous block * if any. Later passes use the Huffman codeword lengths from the previous pass * as the costs. */ static void deflate_optimize_block(struct libdeflate_compressor *c, u32 block_length, const struct lz_match *cache_ptr, bool is_first_block) { unsigned num_passes_remaining = c->p.n.num_optim_passes; u32 i; /* Force the block to really end at the desired length, even if some * matches extend beyond it. */ for (i = block_length; i <= MIN(block_length - 1 + DEFLATE_MAX_MATCH_LEN, ARRAY_LEN(c->p.n.optimum_nodes) - 1); i++) c->p.n.optimum_nodes[i].cost_to_end = 0x80000000; /* Set the initial costs. */ if (is_first_block) deflate_set_default_costs(c); else deflate_adjust_costs(c); for (;;) { /* Find the minimum cost path for this pass. */ deflate_find_min_cost_path(c, block_length, cache_ptr); /* Compute frequencies of the chosen symbols. */ deflate_reset_symbol_frequencies(c); deflate_tally_item_list(c, block_length); if (--num_passes_remaining == 0) break; /* At least one optimization pass remains; update the costs. */ deflate_make_huffman_codes(&c->freqs, &c->codes); deflate_set_costs_from_codes(c, &c->codes.lens); } } /* * This is the "near-optimal" DEFLATE compressor. It computes the optimal * representation of each DEFLATE block using a minimum-cost path search over * the graph of possible match/literal choices for that block, assuming a * certain cost for each Huffman symbol. * * For several reasons, the end result is not guaranteed to be optimal: * * - Nonoptimal choice of blocks * - Heuristic limitations on which matches are actually considered * - Symbol costs are unknown until the symbols have already been chosen * (so iterative optimization must be used) */ static size_t deflate_compress_near_optimal(struct libdeflate_compressor * restrict c, const u8 * restrict in, size_t in_nbytes, u8 * restrict out, size_t out_nbytes_avail) { const u8 *in_next = in; const u8 *in_end = in_next + in_nbytes; struct deflate_output_bitstream os; const u8 *in_cur_base = in_next; const u8 *in_next_slide = in_next + MIN(in_end - in_next, MATCHFINDER_WINDOW_SIZE); unsigned max_len = DEFLATE_MAX_MATCH_LEN; unsigned nice_len = MIN(c->nice_match_length, max_len); u32 next_hashes[2] = {0, 0}; deflate_init_output(&os, out, out_nbytes_avail); bt_matchfinder_init(&c->p.n.bt_mf); do { /* Starting a new DEFLATE block. */ struct lz_match *cache_ptr = c->p.n.match_cache; const u8 * const in_block_begin = in_next; const u8 * const in_max_block_end = in_next + MIN(in_end - in_next, SOFT_MAX_BLOCK_LENGTH); const u8 *next_observation = in_next; init_block_split_stats(&c->split_stats); /* * Find matches until we decide to end the block. We end the * block if any of the following is true: * * (1) Maximum block length has been reached * (2) Match catch may overflow. * (3) Block split heuristic says to split now. */ do { struct lz_match *matches; unsigned best_len; /* Slide the window forward if needed. */ if (in_next == in_next_slide) { bt_matchfinder_slide_window(&c->p.n.bt_mf); in_cur_base = in_next; in_next_slide = in_next + MIN(in_end - in_next, MATCHFINDER_WINDOW_SIZE); } /* Decrease the maximum and nice match lengths if we're * approaching the end of the input buffer. */ if (unlikely(max_len > in_end - in_next)) { max_len = in_end - in_next; nice_len = MIN(nice_len, max_len); } /* * Find matches with the current position using the * binary tree matchfinder and save them in * 'match_cache'. * * Note: the binary tree matchfinder is more suited for * optimal parsing than the hash chain matchfinder. The * reasons for this include: * * - The binary tree matchfinder can find more matches * in the same number of steps. * - One of the major advantages of hash chains is that * skipping positions (not searching for matches at * them) is faster; however, with optimal parsing we * search for matches at almost all positions, so this * advantage of hash chains is negated. */ matches = cache_ptr; best_len = 0; if (likely(max_len >= BT_MATCHFINDER_REQUIRED_NBYTES)) { cache_ptr = bt_matchfinder_get_matches(&c->p.n.bt_mf, in_cur_base, in_next - in_cur_base, max_len, nice_len, c->max_search_depth, next_hashes, &best_len, matches); } if (in_next >= next_observation) { if (best_len >= 4) { observe_match(&c->split_stats, best_len); next_observation = in_next + best_len; } else { observe_literal(&c->split_stats, *in_next); next_observation = in_next + 1; } } cache_ptr->length = cache_ptr - matches; cache_ptr->offset = *in_next; in_next++; cache_ptr++; /* * If there was a very long match found, don't cache any * matches for the bytes covered by that match. This * avoids degenerate behavior when compressing highly * redundant data, where the number of matches can be * very large. * * This heuristic doesn't actually hurt the compression * ratio very much. If there's a long match, then the * data must be highly compressible, so it doesn't * matter much what we do. */ if (best_len >= DEFLATE_MIN_MATCH_LEN && best_len >= nice_len) { --best_len; do { if (in_next == in_next_slide) { bt_matchfinder_slide_window(&c->p.n.bt_mf); in_cur_base = in_next; in_next_slide = in_next + MIN(in_end - in_next, MATCHFINDER_WINDOW_SIZE); } if (unlikely(max_len > in_end - in_next)) { max_len = in_end - in_next; nice_len = MIN(nice_len, max_len); } if (max_len >= BT_MATCHFINDER_REQUIRED_NBYTES) { bt_matchfinder_skip_position(&c->p.n.bt_mf, in_cur_base, in_next - in_cur_base, nice_len, c->max_search_depth, next_hashes); } cache_ptr->length = 0; cache_ptr->offset = *in_next; in_next++; cache_ptr++; } while (--best_len); } } while (in_next < in_max_block_end && cache_ptr < &c->p.n.match_cache[CACHE_LENGTH] && !should_end_block(&c->split_stats, in_block_begin, in_next, in_end)); /* All the matches for this block have been cached. Now choose * the sequence of items to output and flush the block. */ deflate_optimize_block(c, in_next - in_block_begin, cache_ptr, in_block_begin == in); deflate_flush_block(c, &os, in_block_begin, in_next - in_block_begin, in_next == in_end, true); } while (in_next != in_end); return deflate_flush_output(&os); } #endif /* SUPPORT_NEAR_OPTIMAL_PARSING */ /* Initialize c->offset_slot_fast. */ static void deflate_init_offset_slot_fast(struct libdeflate_compressor *c) { unsigned offset_slot; unsigned offset; unsigned offset_end; for (offset_slot = 0; offset_slot < ARRAY_LEN(deflate_offset_slot_base); offset_slot++) { offset = deflate_offset_slot_base[offset_slot]; #if USE_FULL_OFFSET_SLOT_FAST offset_end = offset + (1 << deflate_extra_offset_bits[offset_slot]); do { c->offset_slot_fast[offset] = offset_slot; } while (++offset != offset_end); #else if (offset <= 256) { offset_end = offset + (1 << deflate_extra_offset_bits[offset_slot]); do { c->offset_slot_fast[offset - 1] = offset_slot; } while (++offset != offset_end); } else { offset_end = offset + (1 << deflate_extra_offset_bits[offset_slot]); do { c->offset_slot_fast[256 + ((offset - 1) >> 7)] = offset_slot; } while ((offset += (1 << 7)) != offset_end); } #endif } } LIBDEFLATEEXPORT struct libdeflate_compressor * LIBDEFLATEAPI libdeflate_alloc_compressor(int compression_level) { struct libdeflate_compressor *c; size_t size; #if SUPPORT_NEAR_OPTIMAL_PARSING if (compression_level >= 8) size = offsetof(struct libdeflate_compressor, p) + sizeof(c->p.n); else #endif size = offsetof(struct libdeflate_compressor, p) + sizeof(c->p.g); c = aligned_malloc(MATCHFINDER_ALIGNMENT, size); if (!c) return NULL; switch (compression_level) { case 1: c->impl = deflate_compress_greedy; c->max_search_depth = 2; c->nice_match_length = 8; break; case 2: c->impl = deflate_compress_greedy; c->max_search_depth = 6; c->nice_match_length = 10; break; case 3: c->impl = deflate_compress_greedy; c->max_search_depth = 12; c->nice_match_length = 14; break; case 4: c->impl = deflate_compress_greedy; c->max_search_depth = 24; c->nice_match_length = 24; break; case 5: c->impl = deflate_compress_lazy; c->max_search_depth = 20; c->nice_match_length = 30; break; case 6: c->impl = deflate_compress_lazy; c->max_search_depth = 40; c->nice_match_length = 65; break; case 7: c->impl = deflate_compress_lazy; c->max_search_depth = 100; c->nice_match_length = 130; break; #if SUPPORT_NEAR_OPTIMAL_PARSING case 8: c->impl = deflate_compress_near_optimal; c->max_search_depth = 12; c->nice_match_length = 20; c->p.n.num_optim_passes = 1; break; case 9: c->impl = deflate_compress_near_optimal; c->max_search_depth = 16; c->nice_match_length = 26; c->p.n.num_optim_passes = 2; break; case 10: c->impl = deflate_compress_near_optimal; c->max_search_depth = 30; c->nice_match_length = 50; c->p.n.num_optim_passes = 2; break; case 11: c->impl = deflate_compress_near_optimal; c->max_search_depth = 60; c->nice_match_length = 80; c->p.n.num_optim_passes = 3; break; case 12: c->impl = deflate_compress_near_optimal; c->max_search_depth = 100; c->nice_match_length = 133; c->p.n.num_optim_passes = 4; break; #else case 8: c->impl = deflate_compress_lazy; c->max_search_depth = 150; c->nice_match_length = 200; break; case 9: c->impl = deflate_compress_lazy; c->max_search_depth = 200; c->nice_match_length = DEFLATE_MAX_MATCH_LEN; break; #endif default: aligned_free(c); return NULL; } c->compression_level = compression_level; deflate_init_offset_slot_fast(c); deflate_init_static_codes(c); return c; } LIBDEFLATEEXPORT size_t LIBDEFLATEAPI libdeflate_deflate_compress(struct libdeflate_compressor *c, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail) { if (unlikely(out_nbytes_avail < OUTPUT_END_PADDING)) return 0; /* For extremely small inputs just use a single uncompressed block. */ if (unlikely(in_nbytes < 16)) { struct deflate_output_bitstream os; deflate_init_output(&os, out, out_nbytes_avail); if (in_nbytes == 0) in = &os; /* Avoid passing NULL to memcpy() */ deflate_write_uncompressed_block(&os, in, in_nbytes, true); return deflate_flush_output(&os); } return (*c->impl)(c, in, in_nbytes, out, out_nbytes_avail); } LIBDEFLATEEXPORT void LIBDEFLATEAPI libdeflate_free_compressor(struct libdeflate_compressor *c) { aligned_free(c); } unsigned int deflate_get_compression_level(struct libdeflate_compressor *c) { return c->compression_level; } LIBDEFLATEEXPORT size_t LIBDEFLATEAPI libdeflate_deflate_compress_bound(struct libdeflate_compressor *c, size_t in_nbytes) { /* * The worst case is all uncompressed blocks where one block has length * <= MIN_BLOCK_LENGTH and the others have length MIN_BLOCK_LENGTH. * Each uncompressed block has 5 bytes of overhead: 1 for BFINAL, BTYPE, * and alignment to a byte boundary; 2 for LEN; and 2 for NLEN. */ size_t max_num_blocks = MAX(DIV_ROUND_UP(in_nbytes, MIN_BLOCK_LENGTH), 1); return (5 * max_num_blocks) + in_nbytes + 1 + OUTPUT_END_PADDING; } libdeflate-1.5/lib/deflate_compress.h000066400000000000000000000006171360172702500177170ustar00rootroot00000000000000#ifndef LIB_DEFLATE_COMPRESS_H #define LIB_DEFLATE_COMPRESS_H #include "lib_common.h" /* DEFLATE compression is private to deflate_compress.c, but we do need to be * able to query the compression level for zlib and gzip header generation. */ struct libdeflate_compressor; extern unsigned int deflate_get_compression_level(struct libdeflate_compressor *c); #endif /* LIB_DEFLATE_COMPRESS_H */ libdeflate-1.5/lib/deflate_constants.h000066400000000000000000000043031360172702500200740ustar00rootroot00000000000000/* * deflate_constants.h - constants for the DEFLATE compression format */ #ifndef LIB_DEFLATE_CONSTANTS_H #define LIB_DEFLATE_CONSTANTS_H /* Valid block types */ #define DEFLATE_BLOCKTYPE_UNCOMPRESSED 0 #define DEFLATE_BLOCKTYPE_STATIC_HUFFMAN 1 #define DEFLATE_BLOCKTYPE_DYNAMIC_HUFFMAN 2 /* Minimum and maximum supported match lengths (in bytes) */ #define DEFLATE_MIN_MATCH_LEN 3 #define DEFLATE_MAX_MATCH_LEN 258 /* Minimum and maximum supported match offsets (in bytes) */ #define DEFLATE_MIN_MATCH_OFFSET 1 #define DEFLATE_MAX_MATCH_OFFSET 32768 #define DEFLATE_MAX_WINDOW_SIZE 32768 /* Number of symbols in each Huffman code. Note: for the literal/length * and offset codes, these are actually the maximum values; a given block * might use fewer symbols. */ #define DEFLATE_NUM_PRECODE_SYMS 19 #define DEFLATE_NUM_LITLEN_SYMS 288 #define DEFLATE_NUM_OFFSET_SYMS 32 /* The maximum number of symbols across all codes */ #define DEFLATE_MAX_NUM_SYMS 288 /* Division of symbols in the literal/length code */ #define DEFLATE_NUM_LITERALS 256 #define DEFLATE_END_OF_BLOCK 256 #define DEFLATE_NUM_LEN_SYMS 31 /* Maximum codeword length, in bits, within each Huffman code */ #define DEFLATE_MAX_PRE_CODEWORD_LEN 7 #define DEFLATE_MAX_LITLEN_CODEWORD_LEN 15 #define DEFLATE_MAX_OFFSET_CODEWORD_LEN 15 /* The maximum codeword length across all codes */ #define DEFLATE_MAX_CODEWORD_LEN 15 /* Maximum possible overrun when decoding codeword lengths */ #define DEFLATE_MAX_LENS_OVERRUN 137 /* * Maximum number of extra bits that may be required to represent a match * length or offset. * * TODO: are we going to have full DEFLATE64 support? If so, up to 16 * length bits must be supported. */ #define DEFLATE_MAX_EXTRA_LENGTH_BITS 5 #define DEFLATE_MAX_EXTRA_OFFSET_BITS 14 /* The maximum number of bits in which a match can be represented. This * is the absolute worst case, which assumes the longest possible Huffman * codewords and the maximum numbers of extra bits. */ #define DEFLATE_MAX_MATCH_BITS \ (DEFLATE_MAX_LITLEN_CODEWORD_LEN + DEFLATE_MAX_EXTRA_LENGTH_BITS + \ DEFLATE_MAX_OFFSET_CODEWORD_LEN + DEFLATE_MAX_EXTRA_OFFSET_BITS) #endif /* LIB_DEFLATE_CONSTANTS_H */ libdeflate-1.5/lib/deflate_decompress.c000066400000000000000000001137121360172702500202240ustar00rootroot00000000000000/* * deflate_decompress.c - a decompressor for DEFLATE * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * --------------------------------------------------------------------------- * * This is a highly optimized DEFLATE decompressor. When compiled with gcc on * x86_64, it decompresses data in about 52% of the time of zlib (48% if BMI2 * instructions are available). On other architectures it should still be * significantly faster than zlib, but the difference may be smaller. * * Why this is faster than zlib's implementation: * * - Word accesses rather than byte accesses when reading input * - Word accesses rather than byte accesses when copying matches * - Faster Huffman decoding combined with various DEFLATE-specific tricks * - Larger bitbuffer variable that doesn't need to be filled as often * - Other optimizations to remove unnecessary branches * - Only full-buffer decompression is supported, so the code doesn't need to * support stopping and resuming decompression. * - On x86_64, compile a version of the decompression routine using BMI2 * instructions and use it automatically at runtime when supported. */ #include #include #include #include "deflate_constants.h" #include "unaligned.h" #include "libdeflate.h" /* * If the expression passed to SAFETY_CHECK() evaluates to false, then the * decompression routine immediately returns LIBDEFLATE_BAD_DATA, indicating the * compressed data is invalid. * * Theoretically, these checks could be disabled for specialized applications * where all input to the decompressor will be trusted. */ #if 0 # pragma message("UNSAFE DECOMPRESSION IS ENABLED. THIS MUST ONLY BE USED IF THE DECOMPRESSOR INPUT WILL ALWAYS BE TRUSTED!") # define SAFETY_CHECK(expr) (void)(expr) #else # define SAFETY_CHECK(expr) if (unlikely(!(expr))) return LIBDEFLATE_BAD_DATA #endif /* * Each TABLEBITS number is the base-2 logarithm of the number of entries in the * main portion of the corresponding decode table. Each number should be large * enough to ensure that for typical data, the vast majority of symbols can be * decoded by a direct lookup of the next TABLEBITS bits of compressed data. * However, this must be balanced against the fact that a larger table requires * more memory and requires more time to fill. * * Note: you cannot change a TABLEBITS number without also changing the * corresponding ENOUGH number! */ #define PRECODE_TABLEBITS 7 #define LITLEN_TABLEBITS 10 #define OFFSET_TABLEBITS 8 /* * Each ENOUGH number is the maximum number of decode table entries that may be * required for the corresponding Huffman code, including the main table and all * subtables. Each number depends on three parameters: * * (1) the maximum number of symbols in the code (DEFLATE_NUM_*_SYMS) * (2) the number of main table bits (the TABLEBITS numbers defined above) * (3) the maximum allowed codeword length (DEFLATE_MAX_*_CODEWORD_LEN) * * The ENOUGH numbers were computed using the utility program 'enough' from * zlib. This program enumerates all possible relevant Huffman codes to find * the worst-case usage of decode table entries. */ #define PRECODE_ENOUGH 128 /* enough 19 7 7 */ #define LITLEN_ENOUGH 1334 /* enough 288 10 15 */ #define OFFSET_ENOUGH 402 /* enough 32 8 15 */ /* * Type for codeword lengths. */ typedef u8 len_t; /* * The main DEFLATE decompressor structure. Since this implementation only * supports full buffer decompression, this structure does not store the entire * decompression state, but rather only some arrays that are too large to * comfortably allocate on the stack. */ struct libdeflate_decompressor { /* * The arrays aren't all needed at the same time. 'precode_lens' and * 'precode_decode_table' are unneeded after 'lens' has been filled. * Furthermore, 'lens' need not be retained after building the litlen * and offset decode tables. In fact, 'lens' can be in union with * 'litlen_decode_table' provided that 'offset_decode_table' is separate * and is built first. */ union { len_t precode_lens[DEFLATE_NUM_PRECODE_SYMS]; struct { len_t lens[DEFLATE_NUM_LITLEN_SYMS + DEFLATE_NUM_OFFSET_SYMS + DEFLATE_MAX_LENS_OVERRUN]; u32 precode_decode_table[PRECODE_ENOUGH]; } l; u32 litlen_decode_table[LITLEN_ENOUGH]; } u; u32 offset_decode_table[OFFSET_ENOUGH]; /* used only during build_decode_table() */ u16 sorted_syms[DEFLATE_MAX_NUM_SYMS]; bool static_codes_loaded; }; /***************************************************************************** * Input bitstream * *****************************************************************************/ /* * The state of the "input bitstream" consists of the following variables: * * - in_next: pointer to the next unread byte in the input buffer * * - in_end: pointer just past the end of the input buffer * * - bitbuf: a word-sized variable containing bits that have been read from * the input buffer. The buffered bits are right-aligned * (they're the low-order bits). * * - bitsleft: number of bits in 'bitbuf' that are valid. * * To make it easier for the compiler to optimize the code by keeping variables * in registers, these are declared as normal variables and manipulated using * macros. */ /* * The type for the bitbuffer variable ('bitbuf' described above). For best * performance, this should have size equal to a machine word. * * 64-bit platforms have a significant advantage: they get a bigger bitbuffer * which they have to fill less often. */ typedef machine_word_t bitbuf_t; /* * Number of bits the bitbuffer variable can hold. * * This is one less than the obvious value because of the optimized arithmetic * in FILL_BITS_WORDWISE() that leaves 'bitsleft' in the range * [WORDBITS - 8, WORDBITS - 1] rather than [WORDBITS - 7, WORDBITS]. */ #define BITBUF_NBITS (8 * sizeof(bitbuf_t) - 1) /* * The maximum number of bits that can be ensured in the bitbuffer variable, * i.e. the maximum value of 'n' that can be passed ENSURE_BITS(n). The decoder * only reads whole bytes from memory, so this is the lowest value of 'bitsleft' * at which another byte cannot be read without first consuming some bits. */ #define MAX_ENSURE (BITBUF_NBITS - 7) /* * Evaluates to true if 'n' is a valid argument to ENSURE_BITS(n), or false if * 'n' is too large to be passed to ENSURE_BITS(n). Note: if 'n' is a compile * time constant, then this expression will be a compile-type constant. * Therefore, CAN_ENSURE() can be used choose between alternative * implementations at compile time. */ #define CAN_ENSURE(n) ((n) <= MAX_ENSURE) /* * Fill the bitbuffer variable, reading one byte at a time. * * If we would overread the input buffer, we just don't read anything, leaving * the bits zeroed but marking them filled. This simplifies the decompressor * because it removes the need to distinguish between real overreads and * overreads that occur only because of the decompressor's own lookahead. * * The disadvantage is that real overreads are not detected immediately. * However, this is safe because the decompressor is still guaranteed to make * forward progress when presented never-ending 0 bits. In an existing block * output will be getting generated, whereas new blocks can only be uncompressed * (since the type code for uncompressed blocks is 0), for which we check for * previous overread. But even if we didn't check, uncompressed blocks would * fail to validate because LEN would not equal ~NLEN. So the decompressor will * eventually either detect that the output buffer is full, or detect invalid * input, or finish the final block. */ #define FILL_BITS_BYTEWISE() \ do { \ if (likely(in_next != in_end)) \ bitbuf |= (bitbuf_t)*in_next++ << bitsleft; \ else \ overrun_count++; \ bitsleft += 8; \ } while (bitsleft <= BITBUF_NBITS - 8) /* * Fill the bitbuffer variable by reading the next word from the input buffer * and branchlessly updating 'in_next' and 'bitsleft' based on how many bits * were filled. This can be significantly faster than FILL_BITS_BYTEWISE(). * However, for this to work correctly, the word must be interpreted in * little-endian format. In addition, the memory access may be unaligned. * Therefore, this method is most efficient on little-endian architectures that * support fast unaligned access, such as x86 and x86_64. * * For faster updating of 'bitsleft', we consider the bitbuffer size in bits to * be 1 less than the word size and therefore be all 1 bits. Then the number of * bits filled is the value of the 0 bits in position >= 3 when changed to 1. * E.g. if words are 64 bits and bitsleft = 16 = b010000 then we refill b101000 * = 40 bits = 5 bytes. This uses only 4 operations to update 'in_next' and * 'bitsleft': one each of +, ^, >>, and |. (Not counting operations the * compiler optimizes out.) In contrast, the alternative of: * * in_next += (BITBUF_NBITS - bitsleft) >> 3; * bitsleft += (BITBUF_NBITS - bitsleft) & ~7; * * (where BITBUF_NBITS would be WORDBITS rather than WORDBITS - 1) would on * average refill an extra bit, but uses 5 operations: two +, and one each of * -, >>, and &. Also the - and & must be completed before 'bitsleft' can be * updated, while the current solution updates 'bitsleft' with no dependencies. */ #define FILL_BITS_WORDWISE() \ do { \ /* BITBUF_NBITS must be all 1's in binary, see above */ \ STATIC_ASSERT((BITBUF_NBITS & (BITBUF_NBITS + 1)) == 0);\ \ bitbuf |= get_unaligned_leword(in_next) << bitsleft; \ in_next += (bitsleft ^ BITBUF_NBITS) >> 3; \ bitsleft |= BITBUF_NBITS & ~7; \ } while (0) /* * Does the bitbuffer variable currently contain at least 'n' bits? */ #define HAVE_BITS(n) (bitsleft >= (n)) /* * Load more bits from the input buffer until the specified number of bits is * present in the bitbuffer variable. 'n' cannot be too large; see MAX_ENSURE * and CAN_ENSURE(). */ #define ENSURE_BITS(n) \ if (!HAVE_BITS(n)) { \ if (CPU_IS_LITTLE_ENDIAN() && \ UNALIGNED_ACCESS_IS_FAST && \ likely(in_end - in_next >= sizeof(bitbuf_t))) \ FILL_BITS_WORDWISE(); \ else \ FILL_BITS_BYTEWISE(); \ } /* * Return the next 'n' bits from the bitbuffer variable without removing them. */ #define BITS(n) ((u32)bitbuf & (((u32)1 << (n)) - 1)) /* * Remove the next 'n' bits from the bitbuffer variable. */ #define REMOVE_BITS(n) (bitbuf >>= (n), bitsleft -= (n)) /* * Remove and return the next 'n' bits from the bitbuffer variable. */ #define POP_BITS(n) (tmp32 = BITS(n), REMOVE_BITS(n), tmp32) /* * Verify that the input buffer hasn't been overread, then align the input to * the next byte boundary, discarding any remaining bits in the current byte. * * Note that if the bitbuffer variable currently contains more than 7 bits, then * we must rewind 'in_next', effectively putting those bits back. Only the bits * in what would be the "current" byte if we were reading one byte at a time can * be actually discarded. */ #define ALIGN_INPUT() \ do { \ SAFETY_CHECK(overrun_count <= (bitsleft >> 3)); \ in_next -= (bitsleft >> 3) - overrun_count; \ overrun_count = 0; \ bitbuf = 0; \ bitsleft = 0; \ } while(0) /* * Read a 16-bit value from the input. This must have been preceded by a call * to ALIGN_INPUT(), and the caller must have already checked for overrun. */ #define READ_U16() (tmp16 = get_unaligned_le16(in_next), in_next += 2, tmp16) /***************************************************************************** * Huffman decoding * *****************************************************************************/ /* * A decode table for order TABLEBITS consists of a main table of (1 << * TABLEBITS) entries followed by a variable number of subtables. * * The decoding algorithm takes the next TABLEBITS bits of compressed data and * uses them as an index into the decode table. The resulting entry is either a * "direct entry", meaning that it contains the value desired, or a "subtable * pointer", meaning that the entry references a subtable that must be indexed * using more bits of the compressed data to decode the symbol. * * Each decode table (a main table along with its subtables, if any) is * associated with a Huffman code. Logically, the result of a decode table * lookup is a symbol from the alphabet from which the corresponding Huffman * code was constructed. A symbol with codeword length n <= TABLEBITS is * associated with 2**(TABLEBITS - n) direct entries in the table, whereas a * symbol with codeword length n > TABLEBITS is associated with one or more * subtable entries. * * On top of this basic design, we implement several optimizations: * * - We store the length of each codeword directly in each of its decode table * entries. This allows the codeword length to be produced without indexing * an additional table. * * - When beneficial, we don't store the Huffman symbol itself, but instead data * generated from it. For example, when decoding an offset symbol in DEFLATE, * it's more efficient if we can decode the offset base and number of extra * offset bits directly rather than decoding the offset symbol and then * looking up both of those values in an additional table or tables. * * The size of each decode table entry is 32 bits, which provides slightly * better performance than 16-bit entries on 32 and 64 bit processers, provided * that the table doesn't get so large that it takes up too much memory and * starts generating cache misses. The bits of each decode table entry are * defined as follows: * * - Bits 30 -- 31: flags (see below) * - Bits 8 -- 29: decode result: a Huffman symbol or related data * - Bits 0 -- 7: codeword length */ /* * This flag is set in all main decode table entries that represent subtable * pointers. */ #define HUFFDEC_SUBTABLE_POINTER 0x80000000 /* * This flag is set in all entries in the litlen decode table that represent * literals. */ #define HUFFDEC_LITERAL 0x40000000 /* Mask for extracting the codeword length from a decode table entry. */ #define HUFFDEC_LENGTH_MASK 0xFF /* Shift to extract the decode result from a decode table entry. */ #define HUFFDEC_RESULT_SHIFT 8 /* Shift a decode result into its position in the decode table entry. */ #define HUFFDEC_RESULT_ENTRY(result) ((u32)(result) << HUFFDEC_RESULT_SHIFT) /* The decode result for each precode symbol. There is no special optimization * for the precode; the decode result is simply the symbol value. */ static const u32 precode_decode_results[DEFLATE_NUM_PRECODE_SYMS] = { #define ENTRY(presym) HUFFDEC_RESULT_ENTRY(presym) ENTRY(0) , ENTRY(1) , ENTRY(2) , ENTRY(3) , ENTRY(4) , ENTRY(5) , ENTRY(6) , ENTRY(7) , ENTRY(8) , ENTRY(9) , ENTRY(10) , ENTRY(11) , ENTRY(12) , ENTRY(13) , ENTRY(14) , ENTRY(15) , ENTRY(16) , ENTRY(17) , ENTRY(18) , #undef ENTRY }; /* The decode result for each litlen symbol. For literals, this is the literal * value itself and the HUFFDEC_LITERAL flag. For lengths, this is the length * base and the number of extra length bits. */ static const u32 litlen_decode_results[DEFLATE_NUM_LITLEN_SYMS] = { /* Literals */ #define ENTRY(literal) (HUFFDEC_LITERAL | HUFFDEC_RESULT_ENTRY(literal)) ENTRY(0) , ENTRY(1) , ENTRY(2) , ENTRY(3) , ENTRY(4) , ENTRY(5) , ENTRY(6) , ENTRY(7) , ENTRY(8) , ENTRY(9) , ENTRY(10) , ENTRY(11) , ENTRY(12) , ENTRY(13) , ENTRY(14) , ENTRY(15) , ENTRY(16) , ENTRY(17) , ENTRY(18) , ENTRY(19) , ENTRY(20) , ENTRY(21) , ENTRY(22) , ENTRY(23) , ENTRY(24) , ENTRY(25) , ENTRY(26) , ENTRY(27) , ENTRY(28) , ENTRY(29) , ENTRY(30) , ENTRY(31) , ENTRY(32) , ENTRY(33) , ENTRY(34) , ENTRY(35) , ENTRY(36) , ENTRY(37) , ENTRY(38) , ENTRY(39) , ENTRY(40) , ENTRY(41) , ENTRY(42) , ENTRY(43) , ENTRY(44) , ENTRY(45) , ENTRY(46) , ENTRY(47) , ENTRY(48) , ENTRY(49) , ENTRY(50) , ENTRY(51) , ENTRY(52) , ENTRY(53) , ENTRY(54) , ENTRY(55) , ENTRY(56) , ENTRY(57) , ENTRY(58) , ENTRY(59) , ENTRY(60) , ENTRY(61) , ENTRY(62) , ENTRY(63) , ENTRY(64) , ENTRY(65) , ENTRY(66) , ENTRY(67) , ENTRY(68) , ENTRY(69) , ENTRY(70) , ENTRY(71) , ENTRY(72) , ENTRY(73) , ENTRY(74) , ENTRY(75) , ENTRY(76) , ENTRY(77) , ENTRY(78) , ENTRY(79) , ENTRY(80) , ENTRY(81) , ENTRY(82) , ENTRY(83) , ENTRY(84) , ENTRY(85) , ENTRY(86) , ENTRY(87) , ENTRY(88) , ENTRY(89) , ENTRY(90) , ENTRY(91) , ENTRY(92) , ENTRY(93) , ENTRY(94) , ENTRY(95) , ENTRY(96) , ENTRY(97) , ENTRY(98) , ENTRY(99) , ENTRY(100) , ENTRY(101) , ENTRY(102) , ENTRY(103) , ENTRY(104) , ENTRY(105) , ENTRY(106) , ENTRY(107) , ENTRY(108) , ENTRY(109) , ENTRY(110) , ENTRY(111) , ENTRY(112) , ENTRY(113) , ENTRY(114) , ENTRY(115) , ENTRY(116) , ENTRY(117) , ENTRY(118) , ENTRY(119) , ENTRY(120) , ENTRY(121) , ENTRY(122) , ENTRY(123) , ENTRY(124) , ENTRY(125) , ENTRY(126) , ENTRY(127) , ENTRY(128) , ENTRY(129) , ENTRY(130) , ENTRY(131) , ENTRY(132) , ENTRY(133) , ENTRY(134) , ENTRY(135) , ENTRY(136) , ENTRY(137) , ENTRY(138) , ENTRY(139) , ENTRY(140) , ENTRY(141) , ENTRY(142) , ENTRY(143) , ENTRY(144) , ENTRY(145) , ENTRY(146) , ENTRY(147) , ENTRY(148) , ENTRY(149) , ENTRY(150) , ENTRY(151) , ENTRY(152) , ENTRY(153) , ENTRY(154) , ENTRY(155) , ENTRY(156) , ENTRY(157) , ENTRY(158) , ENTRY(159) , ENTRY(160) , ENTRY(161) , ENTRY(162) , ENTRY(163) , ENTRY(164) , ENTRY(165) , ENTRY(166) , ENTRY(167) , ENTRY(168) , ENTRY(169) , ENTRY(170) , ENTRY(171) , ENTRY(172) , ENTRY(173) , ENTRY(174) , ENTRY(175) , ENTRY(176) , ENTRY(177) , ENTRY(178) , ENTRY(179) , ENTRY(180) , ENTRY(181) , ENTRY(182) , ENTRY(183) , ENTRY(184) , ENTRY(185) , ENTRY(186) , ENTRY(187) , ENTRY(188) , ENTRY(189) , ENTRY(190) , ENTRY(191) , ENTRY(192) , ENTRY(193) , ENTRY(194) , ENTRY(195) , ENTRY(196) , ENTRY(197) , ENTRY(198) , ENTRY(199) , ENTRY(200) , ENTRY(201) , ENTRY(202) , ENTRY(203) , ENTRY(204) , ENTRY(205) , ENTRY(206) , ENTRY(207) , ENTRY(208) , ENTRY(209) , ENTRY(210) , ENTRY(211) , ENTRY(212) , ENTRY(213) , ENTRY(214) , ENTRY(215) , ENTRY(216) , ENTRY(217) , ENTRY(218) , ENTRY(219) , ENTRY(220) , ENTRY(221) , ENTRY(222) , ENTRY(223) , ENTRY(224) , ENTRY(225) , ENTRY(226) , ENTRY(227) , ENTRY(228) , ENTRY(229) , ENTRY(230) , ENTRY(231) , ENTRY(232) , ENTRY(233) , ENTRY(234) , ENTRY(235) , ENTRY(236) , ENTRY(237) , ENTRY(238) , ENTRY(239) , ENTRY(240) , ENTRY(241) , ENTRY(242) , ENTRY(243) , ENTRY(244) , ENTRY(245) , ENTRY(246) , ENTRY(247) , ENTRY(248) , ENTRY(249) , ENTRY(250) , ENTRY(251) , ENTRY(252) , ENTRY(253) , ENTRY(254) , ENTRY(255) , #undef ENTRY #define HUFFDEC_EXTRA_LENGTH_BITS_MASK 0xFF #define HUFFDEC_LENGTH_BASE_SHIFT 8 #define HUFFDEC_END_OF_BLOCK_LENGTH 0 #define ENTRY(length_base, num_extra_bits) HUFFDEC_RESULT_ENTRY( \ ((u32)(length_base) << HUFFDEC_LENGTH_BASE_SHIFT) | (num_extra_bits)) /* End of block */ ENTRY(HUFFDEC_END_OF_BLOCK_LENGTH, 0), /* Lengths */ ENTRY(3 , 0) , ENTRY(4 , 0) , ENTRY(5 , 0) , ENTRY(6 , 0), ENTRY(7 , 0) , ENTRY(8 , 0) , ENTRY(9 , 0) , ENTRY(10 , 0), ENTRY(11 , 1) , ENTRY(13 , 1) , ENTRY(15 , 1) , ENTRY(17 , 1), ENTRY(19 , 2) , ENTRY(23 , 2) , ENTRY(27 , 2) , ENTRY(31 , 2), ENTRY(35 , 3) , ENTRY(43 , 3) , ENTRY(51 , 3) , ENTRY(59 , 3), ENTRY(67 , 4) , ENTRY(83 , 4) , ENTRY(99 , 4) , ENTRY(115, 4), ENTRY(131, 5) , ENTRY(163, 5) , ENTRY(195, 5) , ENTRY(227, 5), ENTRY(258, 0) , ENTRY(258, 0) , ENTRY(258, 0) , #undef ENTRY }; /* The decode result for each offset symbol. This is the offset base and the * number of extra offset bits. */ static const u32 offset_decode_results[DEFLATE_NUM_OFFSET_SYMS] = { #define HUFFDEC_EXTRA_OFFSET_BITS_SHIFT 16 #define HUFFDEC_OFFSET_BASE_MASK (((u32)1 << HUFFDEC_EXTRA_OFFSET_BITS_SHIFT) - 1) #define ENTRY(offset_base, num_extra_bits) HUFFDEC_RESULT_ENTRY( \ ((u32)(num_extra_bits) << HUFFDEC_EXTRA_OFFSET_BITS_SHIFT) | \ (offset_base)) ENTRY(1 , 0) , ENTRY(2 , 0) , ENTRY(3 , 0) , ENTRY(4 , 0) , ENTRY(5 , 1) , ENTRY(7 , 1) , ENTRY(9 , 2) , ENTRY(13 , 2) , ENTRY(17 , 3) , ENTRY(25 , 3) , ENTRY(33 , 4) , ENTRY(49 , 4) , ENTRY(65 , 5) , ENTRY(97 , 5) , ENTRY(129 , 6) , ENTRY(193 , 6) , ENTRY(257 , 7) , ENTRY(385 , 7) , ENTRY(513 , 8) , ENTRY(769 , 8) , ENTRY(1025 , 9) , ENTRY(1537 , 9) , ENTRY(2049 , 10) , ENTRY(3073 , 10) , ENTRY(4097 , 11) , ENTRY(6145 , 11) , ENTRY(8193 , 12) , ENTRY(12289 , 12) , ENTRY(16385 , 13) , ENTRY(24577 , 13) , ENTRY(32769 , 14) , ENTRY(49153 , 14) , #undef ENTRY }; /* * Build a table for fast decoding of symbols from a Huffman code. As input, * this function takes the codeword length of each symbol which may be used in * the code. As output, it produces a decode table for the canonical Huffman * code described by the codeword lengths. The decode table is built with the * assumption that it will be indexed with "bit-reversed" codewords, where the * low-order bit is the first bit of the codeword. This format is used for all * Huffman codes in DEFLATE. * * @decode_table * The array in which the decode table will be generated. This array must * have sufficient length; see the definition of the ENOUGH numbers. * @lens * An array which provides, for each symbol, the length of the * corresponding codeword in bits, or 0 if the symbol is unused. This may * alias @decode_table, since nothing is written to @decode_table until all * @lens have been consumed. All codeword lengths are assumed to be <= * @max_codeword_len but are otherwise considered untrusted. If they do * not form a valid Huffman code, then the decode table is not built and * %false is returned. * @num_syms * The number of symbols in the code, including all unused symbols. * @decode_results * An array which provides, for each symbol, the actual value to store into * the decode table. This value will be directly produced as the result of * decoding that symbol, thereby moving the indirection out of the decode * loop and into the table initialization. * @table_bits * The log base-2 of the number of main table entries to use. * @max_codeword_len * The maximum allowed codeword length for this Huffman code. * Must be <= DEFLATE_MAX_CODEWORD_LEN. * @sorted_syms * A temporary array of length @num_syms. * * Returns %true if successful; %false if the codeword lengths do not form a * valid Huffman code. */ static bool build_decode_table(u32 decode_table[], const len_t lens[], const unsigned num_syms, const u32 decode_results[], const unsigned table_bits, const unsigned max_codeword_len, u16 *sorted_syms) { unsigned len_counts[DEFLATE_MAX_CODEWORD_LEN + 1]; unsigned offsets[DEFLATE_MAX_CODEWORD_LEN + 1]; unsigned sym; /* current symbol */ unsigned codeword; /* current codeword, bit-reversed */ unsigned len; /* current codeword length in bits */ unsigned count; /* num codewords remaining with this length */ u32 codespace_used; /* codespace used out of '2^max_codeword_len' */ unsigned cur_table_end; /* end index of current table */ unsigned subtable_prefix; /* codeword prefix of current subtable */ unsigned subtable_start; /* start index of current subtable */ unsigned subtable_bits; /* log2 of current subtable length */ /* Count how many codewords have each length, including 0. */ for (len = 0; len <= max_codeword_len; len++) len_counts[len] = 0; for (sym = 0; sym < num_syms; sym++) len_counts[lens[sym]]++; /* * Sort the symbols primarily by increasing codeword length and * secondarily by increasing symbol value; or equivalently by their * codewords in lexicographic order, since a canonical code is assumed. * * For efficiency, also compute 'codespace_used' in the same pass over * 'len_counts[]' used to build 'offsets[]' for sorting. */ /* Ensure that 'codespace_used' cannot overflow. */ STATIC_ASSERT(sizeof(codespace_used) == 4); STATIC_ASSERT(UINT32_MAX / (1U << (DEFLATE_MAX_CODEWORD_LEN - 1)) >= DEFLATE_MAX_NUM_SYMS); offsets[0] = 0; offsets[1] = len_counts[0]; codespace_used = 0; for (len = 1; len < max_codeword_len; len++) { offsets[len + 1] = offsets[len] + len_counts[len]; codespace_used = (codespace_used << 1) + len_counts[len]; } codespace_used = (codespace_used << 1) + len_counts[len]; for (sym = 0; sym < num_syms; sym++) sorted_syms[offsets[lens[sym]]++] = sym; sorted_syms += offsets[0]; /* Skip unused symbols */ /* lens[] is done being used, so we can write to decode_table[] now. */ /* * Check whether the lengths form a complete code (exactly fills the * codespace), an incomplete code (doesn't fill the codespace), or an * overfull code (overflows the codespace). A codeword of length 'n' * uses proportion '1/(2^n)' of the codespace. An overfull code is * nonsensical, so is considered invalid. An incomplete code is * considered valid only in two specific cases; see below. */ /* overfull code? */ if (unlikely(codespace_used > (1U << max_codeword_len))) return false; /* incomplete code? */ if (unlikely(codespace_used < (1U << max_codeword_len))) { u32 entry; unsigned i; if (codespace_used == 0) { /* * An empty code is allowed. This can happen for the * offset code in DEFLATE, since a dynamic Huffman block * need not contain any matches. */ /* sym=0, len=1 (arbitrary) */ entry = decode_results[0] | 1; } else { /* * Allow codes with a single used symbol, with codeword * length 1. The DEFLATE RFC is unclear regarding this * case. What zlib's decompressor does is permit this * for the litlen and offset codes and assume the * codeword is '0' rather than '1'. We do the same * except we allow this for precodes too, since there's * no convincing reason to treat the codes differently. * We also assign both codewords '0' and '1' to the * symbol to avoid having to handle '1' specially. */ if (codespace_used != (1U << (max_codeword_len - 1)) || len_counts[1] != 1) return false; entry = decode_results[*sorted_syms] | 1; } /* * Note: the decode table still must be fully initialized, in * case the stream is malformed and contains bits from the part * of the codespace the incomplete code doesn't use. */ for (i = 0; i < (1U << table_bits); i++) decode_table[i] = entry; return true; } /* * The lengths form a complete code. Now, enumerate the codewords in * lexicographic order and fill the decode table entries for each one. * * First, process all codewords with len <= table_bits. Each one gets * '2^(table_bits-len)' direct entries in the table. * * Since DEFLATE uses bit-reversed codewords, these entries aren't * consecutive but rather are spaced '2^len' entries apart. This makes * filling them naively somewhat awkward and inefficient, since strided * stores are less cache-friendly and preclude the use of word or * vector-at-a-time stores to fill multiple entries per instruction. * * To optimize this, we incrementally double the table size. When * processing codewords with length 'len', the table is treated as * having only '2^len' entries, so each codeword uses just one entry. * Then, each time 'len' is incremented, the table size is doubled and * the first half is copied to the second half. This significantly * improves performance over naively doing strided stores. * * Note that some entries copied for each table doubling may not have * been initialized yet, but it doesn't matter since they're guaranteed * to be initialized later (because the Huffman code is complete). */ codeword = 0; len = 1; while ((count = len_counts[len]) == 0) len++; cur_table_end = 1U << len; while (len <= table_bits) { /* Process all 'count' codewords with length 'len' bits. */ do { unsigned bit; /* Fill the first entry for the current codeword. */ decode_table[codeword] = decode_results[*sorted_syms++] | len; if (codeword == cur_table_end - 1) { /* Last codeword (all 1's) */ for (; len < table_bits; len++) { memcpy(&decode_table[cur_table_end], decode_table, cur_table_end * sizeof(decode_table[0])); cur_table_end <<= 1; } return true; } /* * To advance to the lexicographically next codeword in * the canonical code, the codeword must be incremented, * then 0's must be appended to the codeword as needed * to match the next codeword's length. * * Since the codeword is bit-reversed, appending 0's is * a no-op. However, incrementing it is nontrivial. To * do so efficiently, use the 'bsr' instruction to find * the last (highest order) 0 bit in the codeword, set * it, and clear any later (higher order) 1 bits. But * 'bsr' actually finds the highest order 1 bit, so to * use it first flip all bits in the codeword by XOR'ing * it with (1U << len) - 1 == cur_table_end - 1. */ bit = 1U << bsr32(codeword ^ (cur_table_end - 1)); codeword &= bit - 1; codeword |= bit; } while (--count); /* Advance to the next codeword length. */ do { if (++len <= table_bits) { memcpy(&decode_table[cur_table_end], decode_table, cur_table_end * sizeof(decode_table[0])); cur_table_end <<= 1; } } while ((count = len_counts[len]) == 0); } /* Process codewords with len > table_bits. These require subtables. */ cur_table_end = 1U << table_bits; subtable_prefix = -1; subtable_start = 0; for (;;) { u32 entry; unsigned i; unsigned stride; unsigned bit; /* * Start a new subtable if the first 'table_bits' bits of the * codeword don't match the prefix of the current subtable. */ if ((codeword & ((1U << table_bits) - 1)) != subtable_prefix) { subtable_prefix = (codeword & ((1U << table_bits) - 1)); subtable_start = cur_table_end; /* * Calculate the subtable length. If the codeword has * length 'table_bits + n', then the subtable needs * '2^n' entries. But it may need more; if fewer than * '2^n' codewords of length 'table_bits + n' remain, * then the length will need to be incremented to bring * in longer codewords until the subtable can be * completely filled. Note that because the Huffman * code is complete, it will always be possible to fill * the subtable eventually. */ subtable_bits = len - table_bits; codespace_used = count; while (codespace_used < (1U << subtable_bits)) { subtable_bits++; codespace_used = (codespace_used << 1) + len_counts[table_bits + subtable_bits]; } cur_table_end = subtable_start + (1U << subtable_bits); /* * Create the entry that points from the main table to * the subtable. This entry contains the index of the * start of the subtable and the number of bits with * which the subtable is indexed (the log base 2 of the * number of entries it contains). */ decode_table[subtable_prefix] = HUFFDEC_SUBTABLE_POINTER | HUFFDEC_RESULT_ENTRY(subtable_start) | subtable_bits; } /* Fill the subtable entries for the current codeword. */ entry = decode_results[*sorted_syms++] | (len - table_bits); i = subtable_start + (codeword >> table_bits); stride = 1U << (len - table_bits); do { decode_table[i] = entry; i += stride; } while (i < cur_table_end); /* Advance to the next codeword. */ if (codeword == (1U << len) - 1) /* last codeword (all 1's)? */ return true; bit = 1U << bsr32(codeword ^ ((1U << len) - 1)); codeword &= bit - 1; codeword |= bit; count--; while (count == 0) count = len_counts[++len]; } } /* Build the decode table for the precode. */ static bool build_precode_decode_table(struct libdeflate_decompressor *d) { /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */ STATIC_ASSERT(PRECODE_TABLEBITS == 7 && PRECODE_ENOUGH == 128); return build_decode_table(d->u.l.precode_decode_table, d->u.precode_lens, DEFLATE_NUM_PRECODE_SYMS, precode_decode_results, PRECODE_TABLEBITS, DEFLATE_MAX_PRE_CODEWORD_LEN, d->sorted_syms); } /* Build the decode table for the literal/length code. */ static bool build_litlen_decode_table(struct libdeflate_decompressor *d, unsigned num_litlen_syms, unsigned num_offset_syms) { /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */ STATIC_ASSERT(LITLEN_TABLEBITS == 10 && LITLEN_ENOUGH == 1334); return build_decode_table(d->u.litlen_decode_table, d->u.l.lens, num_litlen_syms, litlen_decode_results, LITLEN_TABLEBITS, DEFLATE_MAX_LITLEN_CODEWORD_LEN, d->sorted_syms); } /* Build the decode table for the offset code. */ static bool build_offset_decode_table(struct libdeflate_decompressor *d, unsigned num_litlen_syms, unsigned num_offset_syms) { /* When you change TABLEBITS, you must change ENOUGH, and vice versa! */ STATIC_ASSERT(OFFSET_TABLEBITS == 8 && OFFSET_ENOUGH == 402); return build_decode_table(d->offset_decode_table, d->u.l.lens + num_litlen_syms, num_offset_syms, offset_decode_results, OFFSET_TABLEBITS, DEFLATE_MAX_OFFSET_CODEWORD_LEN, d->sorted_syms); } static forceinline machine_word_t repeat_byte(u8 b) { machine_word_t v; STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); v = b; v |= v << 8; v |= v << 16; v |= v << ((WORDBITS == 64) ? 32 : 0); return v; } static forceinline void copy_word_unaligned(const void *src, void *dst) { store_word_unaligned(load_word_unaligned(src), dst); } /***************************************************************************** * Main decompression routine *****************************************************************************/ typedef enum libdeflate_result (*decompress_func_t) (struct libdeflate_decompressor * restrict d, const void * restrict in, size_t in_nbytes, void * restrict out, size_t out_nbytes_avail, size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret); #undef DEFAULT_IMPL #undef DISPATCH #if defined(__i386__) || defined(__x86_64__) # include "x86/decompress_impl.h" #endif #ifndef DEFAULT_IMPL # define FUNCNAME deflate_decompress_default # define ATTRIBUTES # include "decompress_template.h" # define DEFAULT_IMPL deflate_decompress_default #endif #ifdef DISPATCH static enum libdeflate_result dispatch(struct libdeflate_decompressor * restrict d, const void * restrict in, size_t in_nbytes, void * restrict out, size_t out_nbytes_avail, size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret); static volatile decompress_func_t decompress_impl = dispatch; /* Choose the fastest implementation at runtime */ static enum libdeflate_result dispatch(struct libdeflate_decompressor * restrict d, const void * restrict in, size_t in_nbytes, void * restrict out, size_t out_nbytes_avail, size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret) { decompress_func_t f = arch_select_decompress_func(); if (f == NULL) f = DEFAULT_IMPL; decompress_impl = f; return (*f)(d, in, in_nbytes, out, out_nbytes_avail, actual_in_nbytes_ret, actual_out_nbytes_ret); } #else # define decompress_impl DEFAULT_IMPL /* only one implementation, use it */ #endif /* * This is the main DEFLATE decompression routine. See libdeflate.h for the * documentation. * * Note that the real code is in decompress_template.h. The part here just * handles calling the appropriate implementation depending on the CPU features * at runtime. */ LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI libdeflate_deflate_decompress_ex(struct libdeflate_decompressor * restrict d, const void * restrict in, size_t in_nbytes, void * restrict out, size_t out_nbytes_avail, size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret) { return decompress_impl(d, in, in_nbytes, out, out_nbytes_avail, actual_in_nbytes_ret, actual_out_nbytes_ret); } LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI libdeflate_deflate_decompress(struct libdeflate_decompressor * restrict d, const void * restrict in, size_t in_nbytes, void * restrict out, size_t out_nbytes_avail, size_t *actual_out_nbytes_ret) { return libdeflate_deflate_decompress_ex(d, in, in_nbytes, out, out_nbytes_avail, NULL, actual_out_nbytes_ret); } LIBDEFLATEEXPORT struct libdeflate_decompressor * LIBDEFLATEAPI libdeflate_alloc_decompressor(void) { /* * Note that only certain parts of the decompressor actually must be * initialized here: * * - 'static_codes_loaded' must be initialized to false. * * - The first half of the main portion of each decode table must be * initialized to any value, to avoid reading from uninitialized * memory during table expansion in build_decode_table(). (Although, * this is really just to avoid warnings with dynamic tools like * valgrind, since build_decode_table() is guaranteed to initialize * all entries eventually anyway.) * * But for simplicity, we currently just zero the whole decompressor. */ return calloc(1, sizeof(struct libdeflate_decompressor)); } LIBDEFLATEEXPORT void LIBDEFLATEAPI libdeflate_free_decompressor(struct libdeflate_decompressor *d) { free(d); } libdeflate-1.5/lib/gzip_compress.c000066400000000000000000000053701360172702500172600ustar00rootroot00000000000000/* * gzip_compress.c - compress with a gzip wrapper * * Originally public domain; changes after 2016-09-07 are copyrighted. * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "deflate_compress.h" #include "gzip_constants.h" #include "unaligned.h" #include "libdeflate.h" LIBDEFLATEEXPORT size_t LIBDEFLATEAPI libdeflate_gzip_compress(struct libdeflate_compressor *c, const void *in, size_t in_size, void *out, size_t out_nbytes_avail) { u8 *out_next = out; unsigned compression_level; u8 xfl; size_t deflate_size; if (out_nbytes_avail <= GZIP_MIN_OVERHEAD) return 0; /* ID1 */ *out_next++ = GZIP_ID1; /* ID2 */ *out_next++ = GZIP_ID2; /* CM */ *out_next++ = GZIP_CM_DEFLATE; /* FLG */ *out_next++ = 0; /* MTIME */ put_unaligned_le32(GZIP_MTIME_UNAVAILABLE, out_next); out_next += 4; /* XFL */ xfl = 0; compression_level = deflate_get_compression_level(c); if (compression_level < 2) xfl |= GZIP_XFL_FASTEST_COMRESSION; else if (compression_level >= 8) xfl |= GZIP_XFL_SLOWEST_COMRESSION; *out_next++ = xfl; /* OS */ *out_next++ = GZIP_OS_UNKNOWN; /* OS */ /* Compressed data */ deflate_size = libdeflate_deflate_compress(c, in, in_size, out_next, out_nbytes_avail - GZIP_MIN_OVERHEAD); if (deflate_size == 0) return 0; out_next += deflate_size; /* CRC32 */ put_unaligned_le32(libdeflate_crc32(0, in, in_size), out_next); out_next += 4; /* ISIZE */ put_unaligned_le32((u32)in_size, out_next); out_next += 4; return out_next - (u8 *)out; } LIBDEFLATEEXPORT size_t LIBDEFLATEAPI libdeflate_gzip_compress_bound(struct libdeflate_compressor *c, size_t in_nbytes) { return GZIP_MIN_OVERHEAD + libdeflate_deflate_compress_bound(c, in_nbytes); } libdeflate-1.5/lib/gzip_constants.h000066400000000000000000000020031360172702500174340ustar00rootroot00000000000000/* * gzip_constants.h - constants for the gzip wrapper format */ #ifndef LIB_GZIP_CONSTANTS_H #define LIB_GZIP_CONSTANTS_H #define GZIP_MIN_HEADER_SIZE 10 #define GZIP_FOOTER_SIZE 8 #define GZIP_MIN_OVERHEAD (GZIP_MIN_HEADER_SIZE + GZIP_FOOTER_SIZE) #define GZIP_ID1 0x1F #define GZIP_ID2 0x8B #define GZIP_CM_DEFLATE 8 #define GZIP_FTEXT 0x01 #define GZIP_FHCRC 0x02 #define GZIP_FEXTRA 0x04 #define GZIP_FNAME 0x08 #define GZIP_FCOMMENT 0x10 #define GZIP_FRESERVED 0xE0 #define GZIP_MTIME_UNAVAILABLE 0 #define GZIP_XFL_SLOWEST_COMRESSION 0x02 #define GZIP_XFL_FASTEST_COMRESSION 0x04 #define GZIP_OS_FAT 0 #define GZIP_OS_AMIGA 1 #define GZIP_OS_VMS 2 #define GZIP_OS_UNIX 3 #define GZIP_OS_VM_CMS 4 #define GZIP_OS_ATARI_TOS 5 #define GZIP_OS_HPFS 6 #define GZIP_OS_MACINTOSH 7 #define GZIP_OS_Z_SYSTEM 8 #define GZIP_OS_CP_M 9 #define GZIP_OS_TOPS_20 10 #define GZIP_OS_NTFS 11 #define GZIP_OS_QDOS 12 #define GZIP_OS_RISCOS 13 #define GZIP_OS_UNKNOWN 255 #endif /* LIB_GZIP_CONSTANTS_H */ libdeflate-1.5/lib/gzip_decompress.c000066400000000000000000000100401360172702500175570ustar00rootroot00000000000000/* * gzip_decompress.c - decompress with a gzip wrapper * * Originally public domain; changes after 2016-09-07 are copyrighted. * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "gzip_constants.h" #include "unaligned.h" #include "libdeflate.h" LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *d, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail, size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret) { const u8 *in_next = in; const u8 * const in_end = in_next + in_nbytes; u8 flg; size_t actual_in_nbytes; size_t actual_out_nbytes; enum libdeflate_result result; if (in_nbytes < GZIP_MIN_OVERHEAD) return LIBDEFLATE_BAD_DATA; /* ID1 */ if (*in_next++ != GZIP_ID1) return LIBDEFLATE_BAD_DATA; /* ID2 */ if (*in_next++ != GZIP_ID2) return LIBDEFLATE_BAD_DATA; /* CM */ if (*in_next++ != GZIP_CM_DEFLATE) return LIBDEFLATE_BAD_DATA; flg = *in_next++; /* MTIME */ in_next += 4; /* XFL */ in_next += 1; /* OS */ in_next += 1; if (flg & GZIP_FRESERVED) return LIBDEFLATE_BAD_DATA; /* Extra field */ if (flg & GZIP_FEXTRA) { u16 xlen = get_unaligned_le16(in_next); in_next += 2; if (in_end - in_next < (u32)xlen + GZIP_FOOTER_SIZE) return LIBDEFLATE_BAD_DATA; in_next += xlen; } /* Original file name (zero terminated) */ if (flg & GZIP_FNAME) { while (*in_next++ != 0 && in_next != in_end) ; if (in_end - in_next < GZIP_FOOTER_SIZE) return LIBDEFLATE_BAD_DATA; } /* File comment (zero terminated) */ if (flg & GZIP_FCOMMENT) { while (*in_next++ != 0 && in_next != in_end) ; if (in_end - in_next < GZIP_FOOTER_SIZE) return LIBDEFLATE_BAD_DATA; } /* CRC16 for gzip header */ if (flg & GZIP_FHCRC) { in_next += 2; if (in_end - in_next < GZIP_FOOTER_SIZE) return LIBDEFLATE_BAD_DATA; } /* Compressed data */ result = libdeflate_deflate_decompress_ex(d, in_next, in_end - GZIP_FOOTER_SIZE - in_next, out, out_nbytes_avail, &actual_in_nbytes, actual_out_nbytes_ret); if (result != LIBDEFLATE_SUCCESS) return result; if (actual_out_nbytes_ret) actual_out_nbytes = *actual_out_nbytes_ret; else actual_out_nbytes = out_nbytes_avail; in_next += actual_in_nbytes; /* CRC32 */ if (libdeflate_crc32(0, out, actual_out_nbytes) != get_unaligned_le32(in_next)) return LIBDEFLATE_BAD_DATA; in_next += 4; /* ISIZE */ if ((u32)actual_out_nbytes != get_unaligned_le32(in_next)) return LIBDEFLATE_BAD_DATA; in_next += 4; if (actual_in_nbytes_ret) *actual_in_nbytes_ret = in_next - (u8 *)in; return LIBDEFLATE_SUCCESS; } LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI libdeflate_gzip_decompress(struct libdeflate_decompressor *d, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail, size_t *actual_out_nbytes_ret) { return libdeflate_gzip_decompress_ex(d, in, in_nbytes, out, out_nbytes_avail, NULL, actual_out_nbytes_ret); } libdeflate-1.5/lib/hc_matchfinder.h000066400000000000000000000334351360172702500173420ustar00rootroot00000000000000/* * hc_matchfinder.h - Lempel-Ziv matchfinding with a hash table of linked lists * * Originally public domain; changes after 2016-09-07 are copyrighted. * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * --------------------------------------------------------------------------- * * Algorithm * * This is a Hash Chains (hc) based matchfinder. * * The main data structure is a hash table where each hash bucket contains a * linked list (or "chain") of sequences whose first 4 bytes share the same hash * code. Each sequence is identified by its starting position in the input * buffer. * * The algorithm processes the input buffer sequentially. At each byte * position, the hash code of the first 4 bytes of the sequence beginning at * that position (the sequence being matched against) is computed. This * identifies the hash bucket to use for that position. Then, this hash * bucket's linked list is searched for matches. Then, a new linked list node * is created to represent the current sequence and is prepended to the list. * * This algorithm has several useful properties: * * - It only finds true Lempel-Ziv matches; i.e., those where the matching * sequence occurs prior to the sequence being matched against. * * - The sequences in each linked list are always sorted by decreasing starting * position. Therefore, the closest (smallest offset) matches are found * first, which in many compression formats tend to be the cheapest to encode. * * - Although fast running time is not guaranteed due to the possibility of the * lists getting very long, the worst degenerate behavior can be easily * prevented by capping the number of nodes searched at each position. * * - If the compressor decides not to search for matches at a certain position, * then that position can be quickly inserted without searching the list. * * - The algorithm is adaptable to sliding windows: just store the positions * relative to a "base" value that is updated from time to time, and stop * searching each list when the sequences get too far away. * * ---------------------------------------------------------------------------- * * Optimizations * * The main hash table and chains handle length 4+ matches. Length 3 matches * are handled by a separate hash table with no chains. This works well for * typical "greedy" or "lazy"-style compressors, where length 3 matches are * often only helpful if they have small offsets. Instead of searching a full * chain for length 3+ matches, the algorithm just checks for one close length 3 * match, then focuses on finding length 4+ matches. * * The longest_match() and skip_positions() functions are inlined into the * compressors that use them. This isn't just about saving the overhead of a * function call. These functions are intended to be called from the inner * loops of compressors, where giving the compiler more control over register * allocation is very helpful. There is also significant benefit to be gained * from allowing the CPU to predict branches independently at each call site. * For example, "lazy"-style compressors can be written with two calls to * longest_match(), each of which starts with a different 'best_len' and * therefore has significantly different performance characteristics. * * Although any hash function can be used, a multiplicative hash is fast and * works well. * * On some processors, it is significantly faster to extend matches by whole * words (32 or 64 bits) instead of by individual bytes. For this to be the * case, the processor must implement unaligned memory accesses efficiently and * must have either a fast "find first set bit" instruction or a fast "find last * set bit" instruction, depending on the processor's endianness. * * The code uses one loop for finding the first match and one loop for finding a * longer match. Each of these loops is tuned for its respective task and in * combination are faster than a single generalized loop that handles both * tasks. * * The code also uses a tight inner loop that only compares the last and first * bytes of a potential match. It is only when these bytes match that a full * match extension is attempted. * * ---------------------------------------------------------------------------- */ #include "matchfinder_common.h" #define HC_MATCHFINDER_HASH3_ORDER 15 #define HC_MATCHFINDER_HASH4_ORDER 16 #define HC_MATCHFINDER_TOTAL_HASH_LENGTH \ ((1UL << HC_MATCHFINDER_HASH3_ORDER) + \ (1UL << HC_MATCHFINDER_HASH4_ORDER)) struct hc_matchfinder { /* The hash table for finding length 3 matches */ mf_pos_t hash3_tab[1UL << HC_MATCHFINDER_HASH3_ORDER]; /* The hash table which contains the first nodes of the linked lists for * finding length 4+ matches */ mf_pos_t hash4_tab[1UL << HC_MATCHFINDER_HASH4_ORDER]; /* The "next node" references for the linked lists. The "next node" of * the node for the sequence with position 'pos' is 'next_tab[pos]'. */ mf_pos_t next_tab[MATCHFINDER_WINDOW_SIZE]; } #ifdef _aligned_attribute _aligned_attribute(MATCHFINDER_ALIGNMENT) #endif ; /* Prepare the matchfinder for a new input buffer. */ static forceinline void hc_matchfinder_init(struct hc_matchfinder *mf) { matchfinder_init((mf_pos_t *)mf, HC_MATCHFINDER_TOTAL_HASH_LENGTH); } static forceinline void hc_matchfinder_slide_window(struct hc_matchfinder *mf) { matchfinder_rebase((mf_pos_t *)mf, sizeof(struct hc_matchfinder) / sizeof(mf_pos_t)); } /* * Find the longest match longer than 'best_len' bytes. * * @mf * The matchfinder structure. * @in_base_p * Location of a pointer which points to the place in the input data the * matchfinder currently stores positions relative to. This may be updated * by this function. * @cur_pos * The current position in the input buffer relative to @in_base (the * position of the sequence being matched against). * @best_len * Require a match longer than this length. * @max_len * The maximum permissible match length at this position. * @nice_len * Stop searching if a match of at least this length is found. * Must be <= @max_len. * @max_search_depth * Limit on the number of potential matches to consider. Must be >= 1. * @next_hashes * The precomputed hash codes for the sequence beginning at @in_next. * These will be used and then updated with the precomputed hashcodes for * the sequence beginning at @in_next + 1. * @offset_ret * If a match is found, its offset is returned in this location. * * Return the length of the match found, or 'best_len' if no match longer than * 'best_len' was found. */ static forceinline u32 hc_matchfinder_longest_match(struct hc_matchfinder * const restrict mf, const u8 ** const restrict in_base_p, const u8 * const restrict in_next, u32 best_len, const u32 max_len, const u32 nice_len, const u32 max_search_depth, u32 * const restrict next_hashes, u32 * const restrict offset_ret) { u32 depth_remaining = max_search_depth; const u8 *best_matchptr = in_next; mf_pos_t cur_node3, cur_node4; u32 hash3, hash4; u32 next_hashseq; u32 seq4; const u8 *matchptr; u32 len; u32 cur_pos = in_next - *in_base_p; const u8 *in_base; mf_pos_t cutoff; if (cur_pos == MATCHFINDER_WINDOW_SIZE) { hc_matchfinder_slide_window(mf); *in_base_p += MATCHFINDER_WINDOW_SIZE; cur_pos = 0; } in_base = *in_base_p; cutoff = cur_pos - MATCHFINDER_WINDOW_SIZE; if (unlikely(max_len < 5)) /* can we read 4 bytes from 'in_next + 1'? */ goto out; /* Get the precomputed hash codes. */ hash3 = next_hashes[0]; hash4 = next_hashes[1]; /* From the hash buckets, get the first node of each linked list. */ cur_node3 = mf->hash3_tab[hash3]; cur_node4 = mf->hash4_tab[hash4]; /* Update for length 3 matches. This replaces the singleton node in the * 'hash3' bucket with the node for the current sequence. */ mf->hash3_tab[hash3] = cur_pos; /* Update for length 4 matches. This prepends the node for the current * sequence to the linked list in the 'hash4' bucket. */ mf->hash4_tab[hash4] = cur_pos; mf->next_tab[cur_pos] = cur_node4; /* Compute the next hash codes. */ next_hashseq = get_unaligned_le32(in_next + 1); next_hashes[0] = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER); next_hashes[1] = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER); prefetchw(&mf->hash3_tab[next_hashes[0]]); prefetchw(&mf->hash4_tab[next_hashes[1]]); if (best_len < 4) { /* No match of length >= 4 found yet? */ /* Check for a length 3 match if needed. */ if (cur_node3 <= cutoff) goto out; seq4 = load_u32_unaligned(in_next); if (best_len < 3) { matchptr = &in_base[cur_node3]; if (load_u24_unaligned(matchptr) == loaded_u32_to_u24(seq4)) { best_len = 3; best_matchptr = matchptr; } } /* Check for a length 4 match. */ if (cur_node4 <= cutoff) goto out; for (;;) { /* No length 4 match found yet. Check the first 4 bytes. */ matchptr = &in_base[cur_node4]; if (load_u32_unaligned(matchptr) == seq4) break; /* The first 4 bytes did not match. Keep trying. */ cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)]; if (cur_node4 <= cutoff || !--depth_remaining) goto out; } /* Found a match of length >= 4. Extend it to its full length. */ best_matchptr = matchptr; best_len = lz_extend(in_next, best_matchptr, 4, max_len); if (best_len >= nice_len) goto out; cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)]; if (cur_node4 <= cutoff || !--depth_remaining) goto out; } else { if (cur_node4 <= cutoff || best_len >= nice_len) goto out; } /* Check for matches of length >= 5. */ for (;;) { for (;;) { matchptr = &in_base[cur_node4]; /* Already found a length 4 match. Try for a longer * match; start by checking either the last 4 bytes and * the first 4 bytes, or the last byte. (The last byte, * the one which would extend the match length by 1, is * the most important.) */ #if UNALIGNED_ACCESS_IS_FAST if ((load_u32_unaligned(matchptr + best_len - 3) == load_u32_unaligned(in_next + best_len - 3)) && (load_u32_unaligned(matchptr) == load_u32_unaligned(in_next))) #else if (matchptr[best_len] == in_next[best_len]) #endif break; /* Continue to the next node in the list. */ cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)]; if (cur_node4 <= cutoff || !--depth_remaining) goto out; } #if UNALIGNED_ACCESS_IS_FAST len = 4; #else len = 0; #endif len = lz_extend(in_next, matchptr, len, max_len); if (len > best_len) { /* This is the new longest match. */ best_len = len; best_matchptr = matchptr; if (best_len >= nice_len) goto out; } /* Continue to the next node in the list. */ cur_node4 = mf->next_tab[cur_node4 & (MATCHFINDER_WINDOW_SIZE - 1)]; if (cur_node4 <= cutoff || !--depth_remaining) goto out; } out: *offset_ret = in_next - best_matchptr; return best_len; } /* * Advance the matchfinder, but don't search for matches. * * @mf * The matchfinder structure. * @in_base_p * Location of a pointer which points to the place in the input data the * matchfinder currently stores positions relative to. This may be updated * by this function. * @cur_pos * The current position in the input buffer relative to @in_base. * @end_pos * The end position of the input buffer, relative to @in_base. * @next_hashes * The precomputed hash codes for the sequence beginning at @in_next. * These will be used and then updated with the precomputed hashcodes for * the sequence beginning at @in_next + @count. * @count * The number of bytes to advance. Must be > 0. * * Returns @in_next + @count. */ static forceinline const u8 * hc_matchfinder_skip_positions(struct hc_matchfinder * const restrict mf, const u8 ** const restrict in_base_p, const u8 *in_next, const u8 * const in_end, const u32 count, u32 * const restrict next_hashes) { u32 cur_pos; u32 hash3, hash4; u32 next_hashseq; u32 remaining = count; if (unlikely(count + 5 > in_end - in_next)) return &in_next[count]; cur_pos = in_next - *in_base_p; hash3 = next_hashes[0]; hash4 = next_hashes[1]; do { if (cur_pos == MATCHFINDER_WINDOW_SIZE) { hc_matchfinder_slide_window(mf); *in_base_p += MATCHFINDER_WINDOW_SIZE; cur_pos = 0; } mf->hash3_tab[hash3] = cur_pos; mf->next_tab[cur_pos] = mf->hash4_tab[hash4]; mf->hash4_tab[hash4] = cur_pos; next_hashseq = get_unaligned_le32(++in_next); hash3 = lz_hash(next_hashseq & 0xFFFFFF, HC_MATCHFINDER_HASH3_ORDER); hash4 = lz_hash(next_hashseq, HC_MATCHFINDER_HASH4_ORDER); cur_pos++; } while (--remaining); prefetchw(&mf->hash3_tab[hash3]); prefetchw(&mf->hash4_tab[hash4]); next_hashes[0] = hash3; next_hashes[1] = hash4; return in_next; } libdeflate-1.5/lib/lib_common.h000066400000000000000000000023231360172702500165120ustar00rootroot00000000000000/* * lib_common.h - internal header included by all library code */ #ifndef LIB_LIB_COMMON_H #define LIB_LIB_COMMON_H #ifdef LIBDEFLATE_H # error "lib_common.h must always be included before libdeflate.h" /* because BUILDING_LIBDEFLATE must be set first */ #endif #define BUILDING_LIBDEFLATE #include "common_defs.h" /* * Prefix with "_libdeflate_" all global symbols which are not part of the API. * This avoids exposing overly generic names when libdeflate is built as a * static library. * * Note that the chosen prefix is not really important and can be changed * without breaking library users. It was just chosen so that the resulting * symbol names are unlikely to conflict with those from any other software. * Also note that this fixup has no useful effect when libdeflate is built as a * shared library, since these symbols are not exported. */ #define SYM_FIXUP(sym) _libdeflate_##sym #define aligned_malloc SYM_FIXUP(aligned_malloc) #define aligned_free SYM_FIXUP(aligned_free) #define deflate_get_compression_level SYM_FIXUP(deflate_get_compression_level) #define _cpu_features SYM_FIXUP(_cpu_features) #define setup_cpu_features SYM_FIXUP(setup_cpu_features) #endif /* LIB_LIB_COMMON_H */ libdeflate-1.5/lib/matchfinder_common.h000066400000000000000000000111031360172702500202240ustar00rootroot00000000000000/* * matchfinder_common.h - common code for Lempel-Ziv matchfinding */ #ifndef LIB_MATCHFINDER_COMMON_H #define LIB_MATCHFINDER_COMMON_H #include "lib_common.h" #include "unaligned.h" #ifndef MATCHFINDER_WINDOW_ORDER # error "MATCHFINDER_WINDOW_ORDER must be defined!" #endif #define MATCHFINDER_WINDOW_SIZE (1UL << MATCHFINDER_WINDOW_ORDER) typedef s16 mf_pos_t; #define MATCHFINDER_INITVAL ((mf_pos_t)-MATCHFINDER_WINDOW_SIZE) #define MATCHFINDER_ALIGNMENT 8 #define arch_matchfinder_init(data, size) false #define arch_matchfinder_rebase(data, size) false #ifdef _aligned_attribute # if defined(__arm__) || defined(__aarch64__) # include "arm/matchfinder_impl.h" # elif defined(__i386__) || defined(__x86_64__) # include "x86/matchfinder_impl.h" # endif #endif /* * Initialize the hash table portion of the matchfinder. * * Essentially, this is an optimized memset(). * * 'data' must be aligned to a MATCHFINDER_ALIGNMENT boundary. */ static forceinline void matchfinder_init(mf_pos_t *data, size_t num_entries) { size_t i; if (arch_matchfinder_init(data, num_entries * sizeof(data[0]))) return; for (i = 0; i < num_entries; i++) data[i] = MATCHFINDER_INITVAL; } /* * Slide the matchfinder by WINDOW_SIZE bytes. * * This must be called just after each WINDOW_SIZE bytes have been run through * the matchfinder. * * This will subtract WINDOW_SIZE bytes from each entry in the array specified. * The effect is that all entries are updated to be relative to the current * position, rather than the position WINDOW_SIZE bytes prior. * * Underflow is detected and replaced with signed saturation. This ensures that * once the sliding window has passed over a position, that position forever * remains out of bounds. * * The array passed in must contain all matchfinder data that is * position-relative. Concretely, this will include the hash table as well as * the table of positions that is used to link together the sequences in each * hash bucket. Note that in the latter table, the links are 1-ary in the case * of "hash chains", and 2-ary in the case of "binary trees". In either case, * the links need to be rebased in the same way. */ static forceinline void matchfinder_rebase(mf_pos_t *data, size_t num_entries) { size_t i; if (arch_matchfinder_rebase(data, num_entries * sizeof(data[0]))) return; if (MATCHFINDER_WINDOW_SIZE == 32768) { /* Branchless version for 32768 byte windows. If the value was * already negative, clear all bits except the sign bit; this * changes the value to -32768. Otherwise, set the sign bit; * this is equivalent to subtracting 32768. */ for (i = 0; i < num_entries; i++) { u16 v = data[i]; u16 sign_bit = v & 0x8000; v &= sign_bit - ((sign_bit >> 15) ^ 1); v |= 0x8000; data[i] = v; } return; } for (i = 0; i < num_entries; i++) { if (data[i] >= 0) data[i] -= (mf_pos_t)-MATCHFINDER_WINDOW_SIZE; else data[i] = (mf_pos_t)-MATCHFINDER_WINDOW_SIZE; } } /* * The hash function: given a sequence prefix held in the low-order bits of a * 32-bit value, multiply by a carefully-chosen large constant. Discard any * bits of the product that don't fit in a 32-bit value, but take the * next-highest @num_bits bits of the product as the hash value, as those have * the most randomness. */ static forceinline u32 lz_hash(u32 seq, unsigned num_bits) { return (u32)(seq * 0x1E35A7BD) >> (32 - num_bits); } /* * Return the number of bytes at @matchptr that match the bytes at @strptr, up * to a maximum of @max_len. Initially, @start_len bytes are matched. */ static forceinline unsigned lz_extend(const u8 * const strptr, const u8 * const matchptr, const unsigned start_len, const unsigned max_len) { unsigned len = start_len; machine_word_t v_word; if (UNALIGNED_ACCESS_IS_FAST) { if (likely(max_len - len >= 4 * WORDBYTES)) { #define COMPARE_WORD_STEP \ v_word = load_word_unaligned(&matchptr[len]) ^ \ load_word_unaligned(&strptr[len]); \ if (v_word != 0) \ goto word_differs; \ len += WORDBYTES; \ COMPARE_WORD_STEP COMPARE_WORD_STEP COMPARE_WORD_STEP COMPARE_WORD_STEP #undef COMPARE_WORD_STEP } while (len + WORDBYTES <= max_len) { v_word = load_word_unaligned(&matchptr[len]) ^ load_word_unaligned(&strptr[len]); if (v_word != 0) goto word_differs; len += WORDBYTES; } } while (len < max_len && matchptr[len] == strptr[len]) len++; return len; word_differs: if (CPU_IS_LITTLE_ENDIAN()) len += (bsfw(v_word) >> 3); else len += (WORDBITS - 1 - bsrw(v_word)) >> 3; return len; } #endif /* LIB_MATCHFINDER_COMMON_H */ libdeflate-1.5/lib/unaligned.h000066400000000000000000000107151360172702500163460ustar00rootroot00000000000000/* * unaligned.h - inline functions for unaligned memory accesses */ #ifndef LIB_UNALIGNED_H #define LIB_UNALIGNED_H #include "lib_common.h" /* * Naming note: * * {load,store}_*_unaligned() deal with raw bytes without endianness conversion. * {get,put}_unaligned_*() deal with a specific endianness. */ DEFINE_UNALIGNED_TYPE(u16) DEFINE_UNALIGNED_TYPE(u32) DEFINE_UNALIGNED_TYPE(u64) DEFINE_UNALIGNED_TYPE(machine_word_t) #define load_word_unaligned load_machine_word_t_unaligned #define store_word_unaligned store_machine_word_t_unaligned /***** Unaligned loads *****/ static forceinline u16 get_unaligned_le16(const u8 *p) { if (UNALIGNED_ACCESS_IS_FAST) return le16_bswap(load_u16_unaligned(p)); else return ((u16)p[1] << 8) | p[0]; } static forceinline u16 get_unaligned_be16(const u8 *p) { if (UNALIGNED_ACCESS_IS_FAST) return be16_bswap(load_u16_unaligned(p)); else return ((u16)p[0] << 8) | p[1]; } static forceinline u32 get_unaligned_le32(const u8 *p) { if (UNALIGNED_ACCESS_IS_FAST) return le32_bswap(load_u32_unaligned(p)); else return ((u32)p[3] << 24) | ((u32)p[2] << 16) | ((u32)p[1] << 8) | p[0]; } static forceinline u32 get_unaligned_be32(const u8 *p) { if (UNALIGNED_ACCESS_IS_FAST) return be32_bswap(load_u32_unaligned(p)); else return ((u32)p[0] << 24) | ((u32)p[1] << 16) | ((u32)p[2] << 8) | p[3]; } static forceinline u64 get_unaligned_le64(const u8 *p) { if (UNALIGNED_ACCESS_IS_FAST) return le64_bswap(load_u64_unaligned(p)); else return ((u64)p[7] << 56) | ((u64)p[6] << 48) | ((u64)p[5] << 40) | ((u64)p[4] << 32) | ((u64)p[3] << 24) | ((u64)p[2] << 16) | ((u64)p[1] << 8) | p[0]; } static forceinline machine_word_t get_unaligned_leword(const u8 *p) { STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); if (WORDBITS == 32) return get_unaligned_le32(p); else return get_unaligned_le64(p); } /***** Unaligned stores *****/ static forceinline void put_unaligned_le16(u16 v, u8 *p) { if (UNALIGNED_ACCESS_IS_FAST) { store_u16_unaligned(le16_bswap(v), p); } else { p[0] = (u8)(v >> 0); p[1] = (u8)(v >> 8); } } static forceinline void put_unaligned_be16(u16 v, u8 *p) { if (UNALIGNED_ACCESS_IS_FAST) { store_u16_unaligned(be16_bswap(v), p); } else { p[0] = (u8)(v >> 8); p[1] = (u8)(v >> 0); } } static forceinline void put_unaligned_le32(u32 v, u8 *p) { if (UNALIGNED_ACCESS_IS_FAST) { store_u32_unaligned(le32_bswap(v), p); } else { p[0] = (u8)(v >> 0); p[1] = (u8)(v >> 8); p[2] = (u8)(v >> 16); p[3] = (u8)(v >> 24); } } static forceinline void put_unaligned_be32(u32 v, u8 *p) { if (UNALIGNED_ACCESS_IS_FAST) { store_u32_unaligned(be32_bswap(v), p); } else { p[0] = (u8)(v >> 24); p[1] = (u8)(v >> 16); p[2] = (u8)(v >> 8); p[3] = (u8)(v >> 0); } } static forceinline void put_unaligned_le64(u64 v, u8 *p) { if (UNALIGNED_ACCESS_IS_FAST) { store_u64_unaligned(le64_bswap(v), p); } else { p[0] = (u8)(v >> 0); p[1] = (u8)(v >> 8); p[2] = (u8)(v >> 16); p[3] = (u8)(v >> 24); p[4] = (u8)(v >> 32); p[5] = (u8)(v >> 40); p[6] = (u8)(v >> 48); p[7] = (u8)(v >> 56); } } static forceinline void put_unaligned_leword(machine_word_t v, u8 *p) { STATIC_ASSERT(WORDBITS == 32 || WORDBITS == 64); if (WORDBITS == 32) put_unaligned_le32(v, p); else put_unaligned_le64(v, p); } /***** 24-bit loads *****/ /* * Given a 32-bit value that was loaded with the platform's native endianness, * return a 32-bit value whose high-order 8 bits are 0 and whose low-order 24 * bits contain the first 3 bytes, arranged in octets in a platform-dependent * order, at the memory location from which the input 32-bit value was loaded. */ static forceinline u32 loaded_u32_to_u24(u32 v) { if (CPU_IS_LITTLE_ENDIAN()) return v & 0xFFFFFF; else return v >> 8; } /* * Load the next 3 bytes from the memory location @p into the 24 low-order bits * of a 32-bit value. The order in which the 3 bytes will be arranged as octets * in the 24 bits is platform-dependent. At least LOAD_U24_REQUIRED_NBYTES * bytes must be available at @p; note that this may be more than 3. */ static forceinline u32 load_u24_unaligned(const u8 *p) { #if UNALIGNED_ACCESS_IS_FAST # define LOAD_U24_REQUIRED_NBYTES 4 return loaded_u32_to_u24(load_u32_unaligned(p)); #else # define LOAD_U24_REQUIRED_NBYTES 3 if (CPU_IS_LITTLE_ENDIAN()) return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16); else return ((u32)p[2] << 0) | ((u32)p[1] << 8) | ((u32)p[0] << 16); #endif } #endif /* LIB_UNALIGNED_H */ libdeflate-1.5/lib/x86/000077500000000000000000000000001360172702500146505ustar00rootroot00000000000000libdeflate-1.5/lib/x86/adler32_impl.h000066400000000000000000000274211360172702500173040ustar00rootroot00000000000000/* * x86/adler32_impl.h - x86 implementations of Adler-32 checksum algorithm * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "cpu_features.h" /* * The following macros horizontally sum the s1 counters and add them to the * real s1, and likewise for s2. They do this via a series of reductions, each * of which halves the vector length, until just one counter remains. * * The s1 reductions don't depend on the s2 reductions and vice versa, so for * efficiency they are interleaved. Also, every other s1 counter is 0 due to * the 'psadbw' instruction (_mm_sad_epu8) summing groups of 8 bytes rather than * 4; hence, one of the s1 reductions is skipped when going from 128 => 32 bits. */ #define ADLER32_FINISH_VEC_CHUNK_128(s1, s2, v_s1, v_s2) \ { \ __v4si s1_last = (v_s1), s2_last = (v_s2); \ \ /* 128 => 32 bits */ \ s2_last += (__v4si)_mm_shuffle_epi32((__m128i)s2_last, 0x31); \ s1_last += (__v4si)_mm_shuffle_epi32((__m128i)s1_last, 0x02); \ s2_last += (__v4si)_mm_shuffle_epi32((__m128i)s2_last, 0x02); \ \ *(s1) += (u32)_mm_cvtsi128_si32((__m128i)s1_last); \ *(s2) += (u32)_mm_cvtsi128_si32((__m128i)s2_last); \ } #define ADLER32_FINISH_VEC_CHUNK_256(s1, s2, v_s1, v_s2) \ { \ __v4si s1_128bit, s2_128bit; \ \ /* 256 => 128 bits */ \ s1_128bit = (__v4si)_mm256_extracti128_si256((__m256i)(v_s1), 0) + \ (__v4si)_mm256_extracti128_si256((__m256i)(v_s1), 1); \ s2_128bit = (__v4si)_mm256_extracti128_si256((__m256i)(v_s2), 0) + \ (__v4si)_mm256_extracti128_si256((__m256i)(v_s2), 1); \ \ ADLER32_FINISH_VEC_CHUNK_128((s1), (s2), s1_128bit, s2_128bit); \ } #define ADLER32_FINISH_VEC_CHUNK_512(s1, s2, v_s1, v_s2) \ { \ __v8si s1_256bit, s2_256bit; \ \ /* 512 => 256 bits */ \ s1_256bit = (__v8si)_mm512_extracti64x4_epi64((__m512i)(v_s1), 0) + \ (__v8si)_mm512_extracti64x4_epi64((__m512i)(v_s1), 1); \ s2_256bit = (__v8si)_mm512_extracti64x4_epi64((__m512i)(v_s2), 0) + \ (__v8si)_mm512_extracti64x4_epi64((__m512i)(v_s2), 1); \ \ ADLER32_FINISH_VEC_CHUNK_256((s1), (s2), s1_256bit, s2_256bit); \ } /* AVX-512BW implementation: like the AVX2 one, but does 64 bytes at a time */ #undef DISPATCH_AVX512BW #if !defined(DEFAULT_IMPL) && \ /* * clang before v3.9 is missing some AVX-512BW intrinsics including * _mm512_sad_epu8(), a.k.a. __builtin_ia32_psadbw512. So just make using * AVX-512BW, even when __AVX512BW__ is defined, conditional on * COMPILER_SUPPORTS_AVX512BW_TARGET where we check for that builtin. */ \ COMPILER_SUPPORTS_AVX512BW_TARGET && \ (defined(__AVX512BW__) || (X86_CPU_FEATURES_ENABLED && \ COMPILER_SUPPORTS_AVX512BW_TARGET_INTRINSICS)) # define FUNCNAME adler32_avx512bw # define FUNCNAME_CHUNK adler32_avx512bw_chunk # define IMPL_ALIGNMENT 64 # define IMPL_SEGMENT_SIZE 64 # define IMPL_MAX_CHUNK_SIZE MAX_CHUNK_SIZE # ifdef __AVX512BW__ # define ATTRIBUTES # define DEFAULT_IMPL adler32_avx512bw # else # define ATTRIBUTES __attribute__((target("avx512bw"))) # define DISPATCH 1 # define DISPATCH_AVX512BW 1 # endif # include static forceinline ATTRIBUTES void adler32_avx512bw_chunk(const __m512i *p, const __m512i *const end, u32 *s1, u32 *s2) { const __m512i zeroes = _mm512_setzero_si512(); const __v64qi multipliers = (__v64qi){ 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, }; const __v32hi ones = (__v32hi)_mm512_set1_epi16(1); __v16si v_s1 = (__v16si)zeroes; __v16si v_s1_sums = (__v16si)zeroes; __v16si v_s2 = (__v16si)zeroes; do { /* Load the next 64-byte segment */ __m512i bytes = *p++; /* Multiply the bytes by 64...1 (the number of times they need * to be added to s2) and add adjacent products */ __v32hi sums = (__v32hi)_mm512_maddubs_epi16( bytes, (__m512i)multipliers); /* Keep sum of all previous s1 counters, for adding to s2 later. * This allows delaying the multiplication by 64 to the end. */ v_s1_sums += v_s1; /* Add the sum of each group of 8 bytes to the corresponding s1 * counter */ v_s1 += (__v16si)_mm512_sad_epu8(bytes, zeroes); /* Add the sum of each group of 4 products of the bytes by * 64...1 to the corresponding s2 counter */ v_s2 += (__v16si)_mm512_madd_epi16((__m512i)sums, (__m512i)ones); } while (p != end); /* Finish the s2 counters by adding the sum of the s1 values at the * beginning of each segment, multiplied by the segment size (64) */ v_s2 += (__v16si)_mm512_slli_epi32((__m512i)v_s1_sums, 6); /* Add the counters to the real s1 and s2 */ ADLER32_FINISH_VEC_CHUNK_512(s1, s2, v_s1, v_s2); } # include "../adler32_vec_template.h" #endif /* AVX-512BW implementation */ /* AVX2 implementation: like the AVX-512BW one, but does 32 bytes at a time */ #undef DISPATCH_AVX2 #if !defined(DEFAULT_IMPL) && \ (defined(__AVX2__) || (X86_CPU_FEATURES_ENABLED && \ COMPILER_SUPPORTS_AVX2_TARGET_INTRINSICS)) # define FUNCNAME adler32_avx2 # define FUNCNAME_CHUNK adler32_avx2_chunk # define IMPL_ALIGNMENT 32 # define IMPL_SEGMENT_SIZE 32 # define IMPL_MAX_CHUNK_SIZE MAX_CHUNK_SIZE # ifdef __AVX2__ # define ATTRIBUTES # define DEFAULT_IMPL adler32_avx2 # else # define ATTRIBUTES __attribute__((target("avx2"))) # define DISPATCH 1 # define DISPATCH_AVX2 1 # endif # include static forceinline ATTRIBUTES void adler32_avx2_chunk(const __m256i *p, const __m256i *const end, u32 *s1, u32 *s2) { const __m256i zeroes = _mm256_setzero_si256(); const __v32qi multipliers = (__v32qi){ 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, }; const __v16hi ones = (__v16hi)_mm256_set1_epi16(1); __v8si v_s1 = (__v8si)zeroes; __v8si v_s1_sums = (__v8si)zeroes; __v8si v_s2 = (__v8si)zeroes; do { /* Load the next 32-byte segment */ __m256i bytes = *p++; /* Multiply the bytes by 32...1 (the number of times they need * to be added to s2) and add adjacent products */ __v16hi sums = (__v16hi)_mm256_maddubs_epi16( bytes, (__m256i)multipliers); /* Keep sum of all previous s1 counters, for adding to s2 later. * This allows delaying the multiplication by 32 to the end. */ v_s1_sums += v_s1; /* Add the sum of each group of 8 bytes to the corresponding s1 * counter */ v_s1 += (__v8si)_mm256_sad_epu8(bytes, zeroes); /* Add the sum of each group of 4 products of the bytes by * 32...1 to the corresponding s2 counter */ v_s2 += (__v8si)_mm256_madd_epi16((__m256i)sums, (__m256i)ones); } while (p != end); /* Finish the s2 counters by adding the sum of the s1 values at the * beginning of each segment, multiplied by the segment size (32) */ v_s2 += (__v8si)_mm256_slli_epi32((__m256i)v_s1_sums, 5); /* Add the counters to the real s1 and s2 */ ADLER32_FINISH_VEC_CHUNK_256(s1, s2, v_s1, v_s2); } # include "../adler32_vec_template.h" #endif /* AVX2 implementation */ /* SSE2 implementation */ #undef DISPATCH_SSE2 #if !defined(DEFAULT_IMPL) && \ (defined(__SSE2__) || (X86_CPU_FEATURES_ENABLED && \ COMPILER_SUPPORTS_SSE2_TARGET_INTRINSICS)) # define FUNCNAME adler32_sse2 # define FUNCNAME_CHUNK adler32_sse2_chunk # define IMPL_ALIGNMENT 16 # define IMPL_SEGMENT_SIZE 32 /* * The 16-bit precision byte counters must not be allowed to undergo *signed* * overflow, otherwise the signed multiplications at the end (_mm_madd_epi16) * would behave incorrectly. */ # define IMPL_MAX_CHUNK_SIZE (32 * (0x7FFF / 0xFF)) # ifdef __SSE2__ # define ATTRIBUTES # define DEFAULT_IMPL adler32_sse2 # else # define ATTRIBUTES __attribute__((target("sse2"))) # define DISPATCH 1 # define DISPATCH_SSE2 1 # endif # include static forceinline ATTRIBUTES void adler32_sse2_chunk(const __m128i *p, const __m128i *const end, u32 *s1, u32 *s2) { const __m128i zeroes = _mm_setzero_si128(); /* s1 counters: 32-bit, sum of bytes */ __v4si v_s1 = (__v4si)zeroes; /* s2 counters: 32-bit, sum of s1 values */ __v4si v_s2 = (__v4si)zeroes; /* * Thirty-two 16-bit counters for byte sums. Each accumulates the bytes * that eventually need to be multiplied by a number 32...1 for addition * into s2. */ __v8hi v_byte_sums_a = (__v8hi)zeroes; __v8hi v_byte_sums_b = (__v8hi)zeroes; __v8hi v_byte_sums_c = (__v8hi)zeroes; __v8hi v_byte_sums_d = (__v8hi)zeroes; do { /* Load the next 32 bytes */ const __m128i bytes1 = *p++; const __m128i bytes2 = *p++; /* * Accumulate the previous s1 counters into the s2 counters. * Logically, this really should be v_s2 += v_s1 * 32, but we * can do the multiplication (or left shift) later. */ v_s2 += v_s1; /* * s1 update: use "Packed Sum of Absolute Differences" to add * the bytes horizontally with 8 bytes per sum. Then add the * sums to the s1 counters. */ v_s1 += (__v4si)_mm_sad_epu8(bytes1, zeroes); v_s1 += (__v4si)_mm_sad_epu8(bytes2, zeroes); /* * Also accumulate the bytes into 32 separate counters that have * 16-bit precision. */ v_byte_sums_a += (__v8hi)_mm_unpacklo_epi8(bytes1, zeroes); v_byte_sums_b += (__v8hi)_mm_unpackhi_epi8(bytes1, zeroes); v_byte_sums_c += (__v8hi)_mm_unpacklo_epi8(bytes2, zeroes); v_byte_sums_d += (__v8hi)_mm_unpackhi_epi8(bytes2, zeroes); } while (p != end); /* Finish calculating the s2 counters */ v_s2 = (__v4si)_mm_slli_epi32((__m128i)v_s2, 5); v_s2 += (__v4si)_mm_madd_epi16((__m128i)v_byte_sums_a, (__m128i)(__v8hi){ 32, 31, 30, 29, 28, 27, 26, 25 }); v_s2 += (__v4si)_mm_madd_epi16((__m128i)v_byte_sums_b, (__m128i)(__v8hi){ 24, 23, 22, 21, 20, 19, 18, 17 }); v_s2 += (__v4si)_mm_madd_epi16((__m128i)v_byte_sums_c, (__m128i)(__v8hi){ 16, 15, 14, 13, 12, 11, 10, 9 }); v_s2 += (__v4si)_mm_madd_epi16((__m128i)v_byte_sums_d, (__m128i)(__v8hi){ 8, 7, 6, 5, 4, 3, 2, 1 }); /* Add the counters to the real s1 and s2 */ ADLER32_FINISH_VEC_CHUNK_128(s1, s2, v_s1, v_s2); } # include "../adler32_vec_template.h" #endif /* SSE2 implementation */ #ifdef DISPATCH static inline adler32_func_t arch_select_adler32_func(void) { u32 features = get_cpu_features(); #ifdef DISPATCH_AVX512BW if (features & X86_CPU_FEATURE_AVX512BW) return adler32_avx512bw; #endif #ifdef DISPATCH_AVX2 if (features & X86_CPU_FEATURE_AVX2) return adler32_avx2; #endif #ifdef DISPATCH_SSE2 if (features & X86_CPU_FEATURE_SSE2) return adler32_sse2; #endif return NULL; } #endif /* DISPATCH */ libdeflate-1.5/lib/x86/cpu_features.c000066400000000000000000000100261360172702500175000ustar00rootroot00000000000000/* * x86/cpu_features.c - feature detection for x86 processors * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "cpu_features.h" #if X86_CPU_FEATURES_ENABLED volatile u32 _cpu_features = 0; /* With old GCC versions we have to manually save and restore the x86_32 PIC * register (ebx). See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602 */ #if defined(__i386__) && defined(__PIC__) # define EBX_CONSTRAINT "=r" #else # define EBX_CONSTRAINT "=b" #endif /* Execute the CPUID instruction. */ static inline void cpuid(u32 leaf, u32 subleaf, u32 *a, u32 *b, u32 *c, u32 *d) { __asm__(".ifnc %%ebx, %1; mov %%ebx, %1; .endif\n" "cpuid \n" ".ifnc %%ebx, %1; xchg %%ebx, %1; .endif\n" : "=a" (*a), EBX_CONSTRAINT (*b), "=c" (*c), "=d" (*d) : "a" (leaf), "c" (subleaf)); } /* Read an extended control register. */ static inline u64 read_xcr(u32 index) { u32 edx, eax; /* Execute the "xgetbv" instruction. Old versions of binutils do not * recognize this instruction, so list the raw bytes instead. */ __asm__ (".byte 0x0f, 0x01, 0xd0" : "=d" (edx), "=a" (eax) : "c" (index)); return ((u64)edx << 32) | eax; } #undef BIT #define BIT(nr) (1UL << (nr)) #define XCR0_BIT_SSE BIT(1) #define XCR0_BIT_AVX BIT(2) #define XCR0_BIT_OPMASK BIT(5) #define XCR0_BIT_ZMM_HI256 BIT(6) #define XCR0_BIT_HI16_ZMM BIT(7) #define IS_SET(reg, nr) ((reg) & BIT(nr)) #define IS_ALL_SET(reg, mask) (((reg) & (mask)) == (mask)) /* Initialize _cpu_features with bits for interesting processor features. */ void setup_cpu_features(void) { u32 features = 0; u32 dummy1, dummy2, dummy3, dummy4; u32 max_function; u32 features_1, features_2, features_3, features_4; bool os_avx_support = false; bool os_avx512_support = false; /* Get maximum supported function */ cpuid(0, 0, &max_function, &dummy2, &dummy3, &dummy4); if (max_function < 1) goto out; /* Standard feature flags */ cpuid(1, 0, &dummy1, &dummy2, &features_2, &features_1); if (IS_SET(features_1, 26)) features |= X86_CPU_FEATURE_SSE2; if (IS_SET(features_2, 1)) features |= X86_CPU_FEATURE_PCLMULQDQ; if (IS_SET(features_2, 27)) { /* OSXSAVE set? */ u64 xcr0 = read_xcr(0); os_avx_support = IS_ALL_SET(xcr0, XCR0_BIT_SSE | XCR0_BIT_AVX); os_avx512_support = IS_ALL_SET(xcr0, XCR0_BIT_SSE | XCR0_BIT_AVX | XCR0_BIT_OPMASK | XCR0_BIT_ZMM_HI256 | XCR0_BIT_HI16_ZMM); } if (os_avx_support && IS_SET(features_2, 28)) features |= X86_CPU_FEATURE_AVX; if (max_function < 7) goto out; /* Extended feature flags */ cpuid(7, 0, &dummy1, &features_3, &features_4, &dummy4); if (os_avx_support && IS_SET(features_3, 5)) features |= X86_CPU_FEATURE_AVX2; if (IS_SET(features_3, 8)) features |= X86_CPU_FEATURE_BMI2; if (os_avx512_support && IS_SET(features_3, 30)) features |= X86_CPU_FEATURE_AVX512BW; out: _cpu_features = features | X86_CPU_FEATURES_KNOWN; } #endif /* X86_CPU_FEATURES_ENABLED */ libdeflate-1.5/lib/x86/cpu_features.h000066400000000000000000000016541360172702500175140ustar00rootroot00000000000000/* * x86/cpu_features.h - feature detection for x86 processors */ #ifndef LIB_X86_CPU_FEATURES_H #define LIB_X86_CPU_FEATURES_H #include "../lib_common.h" #if (defined(__i386__) || defined(__x86_64__)) && \ COMPILER_SUPPORTS_TARGET_FUNCTION_ATTRIBUTE # define X86_CPU_FEATURES_ENABLED 1 #else # define X86_CPU_FEATURES_ENABLED 0 #endif #if X86_CPU_FEATURES_ENABLED #define X86_CPU_FEATURE_SSE2 0x00000001 #define X86_CPU_FEATURE_PCLMULQDQ 0x00000002 #define X86_CPU_FEATURE_AVX 0x00000004 #define X86_CPU_FEATURE_AVX2 0x00000008 #define X86_CPU_FEATURE_BMI2 0x00000010 #define X86_CPU_FEATURE_AVX512BW 0x00000020 #define X86_CPU_FEATURES_KNOWN 0x80000000 extern volatile u32 _cpu_features; extern void setup_cpu_features(void); static inline u32 get_cpu_features(void) { if (_cpu_features == 0) setup_cpu_features(); return _cpu_features; } #endif /* X86_CPU_FEATURES_ENABLED */ #endif /* LIB_X86_CPU_FEATURES_H */ libdeflate-1.5/lib/x86/crc32_impl.h000066400000000000000000000061111360172702500167550ustar00rootroot00000000000000/* * x86/crc32_impl.h - x86 implementations of CRC-32 checksum algorithm * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "cpu_features.h" /* * Include the PCLMUL/AVX implementation? Although our PCLMUL-optimized CRC-32 * function doesn't use any AVX intrinsics specifically, it can benefit a lot * from being compiled for an AVX target: on Skylake, ~16700 MB/s vs. ~10100 * MB/s. I expect this is related to the PCLMULQDQ instructions being assembled * in the newer three-operand form rather than the older two-operand form. * * Note: this is only needed if __AVX__ is *not* defined, since otherwise the * "regular" PCLMUL implementation would already be AVX enabled. */ #undef DISPATCH_PCLMUL_AVX #if !defined(DEFAULT_IMPL) && !defined(__AVX__) && \ X86_CPU_FEATURES_ENABLED && COMPILER_SUPPORTS_AVX_TARGET && \ (defined(__PCLMUL__) || COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS) # define FUNCNAME crc32_pclmul_avx # define FUNCNAME_ALIGNED crc32_pclmul_avx_aligned # define ATTRIBUTES __attribute__((target("pclmul,avx"))) # define DISPATCH 1 # define DISPATCH_PCLMUL_AVX 1 # include "crc32_pclmul_template.h" #endif /* PCLMUL implementation */ #undef DISPATCH_PCLMUL #if !defined(DEFAULT_IMPL) && \ (defined(__PCLMUL__) || (X86_CPU_FEATURES_ENABLED && \ COMPILER_SUPPORTS_PCLMUL_TARGET_INTRINSICS)) # define FUNCNAME crc32_pclmul # define FUNCNAME_ALIGNED crc32_pclmul_aligned # ifdef __PCLMUL__ # define ATTRIBUTES # define DEFAULT_IMPL crc32_pclmul # else # define ATTRIBUTES __attribute__((target("pclmul"))) # define DISPATCH 1 # define DISPATCH_PCLMUL 1 # endif # include "crc32_pclmul_template.h" #endif #ifdef DISPATCH static inline crc32_func_t arch_select_crc32_func(void) { u32 features = get_cpu_features(); #ifdef DISPATCH_PCLMUL_AVX if ((features & X86_CPU_FEATURE_PCLMULQDQ) && (features & X86_CPU_FEATURE_AVX)) return crc32_pclmul_avx; #endif #ifdef DISPATCH_PCLMUL if (features & X86_CPU_FEATURE_PCLMULQDQ) return crc32_pclmul; #endif return NULL; } #endif /* DISPATCH */ libdeflate-1.5/lib/x86/crc32_pclmul_template.h000066400000000000000000000243231360172702500212100ustar00rootroot00000000000000/* * x86/crc32_pclmul_template.h * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include /* * CRC-32 folding with PCLMULQDQ. * * The basic idea is to repeatedly "fold" each 512 bits into the next 512 bits, * producing an abbreviated message which is congruent the original message * modulo the generator polynomial G(x). * * Folding each 512 bits is implemented as eight 64-bit folds, each of which * uses one carryless multiplication instruction. It's expected that CPUs may * be able to execute some of these multiplications in parallel. * * Explanation of "folding": let A(x) be 64 bits from the message, and let B(x) * be 95 bits from a constant distance D later in the message. The relevant * portion of the message can be written as: * * M(x) = A(x)*x^D + B(x) * * ... where + and * represent addition and multiplication, respectively, of * polynomials over GF(2). Note that when implemented on a computer, these * operations are equivalent to XOR and carryless multiplication, respectively. * * For the purpose of CRC calculation, only the remainder modulo the generator * polynomial G(x) matters: * * M(x) mod G(x) = (A(x)*x^D + B(x)) mod G(x) * * Since the modulo operation can be applied anywhere in a sequence of additions * and multiplications without affecting the result, this is equivalent to: * * M(x) mod G(x) = (A(x)*(x^D mod G(x)) + B(x)) mod G(x) * * For any D, 'x^D mod G(x)' will be a polynomial with maximum degree 31, i.e. * a 32-bit quantity. So 'A(x) * (x^D mod G(x))' is equivalent to a carryless * multiplication of a 64-bit quantity by a 32-bit quantity, producing a 95-bit * product. Then, adding (XOR-ing) the product to B(x) produces a polynomial * with the same length as B(x) but with the same remainder as 'A(x)*x^D + * B(x)'. This is the basic fold operation with 64 bits. * * Note that the carryless multiplication instruction PCLMULQDQ actually takes * two 64-bit inputs and produces a 127-bit product in the low-order bits of a * 128-bit XMM register. This works fine, but care must be taken to account for * "bit endianness". With the CRC version implemented here, bits are always * ordered such that the lowest-order bit represents the coefficient of highest * power of x and the highest-order bit represents the coefficient of the lowest * power of x. This is backwards from the more intuitive order. Still, * carryless multiplication works essentially the same either way. It just must * be accounted for that when we XOR the 95-bit product in the low-order 95 bits * of a 128-bit XMM register into 128-bits of later data held in another XMM * register, we'll really be XOR-ing the product into the mathematically higher * degree end of those later bits, not the lower degree end as may be expected. * * So given that caveat and the fact that we process 512 bits per iteration, the * 'D' values we need for the two 64-bit halves of each 128 bits of data are: * * D = (512 + 95) - 64 for the higher-degree half of each 128 bits, * i.e. the lower order bits in the XMM register * * D = (512 + 95) - 128 for the lower-degree half of each 128 bits, * i.e. the higher order bits in the XMM register * * The required 'x^D mod G(x)' values were precomputed. * * When <= 512 bits remain in the message, we finish up by folding across * smaller distances. This works similarly; the distance D is just different, * so different constant multipliers must be used. Finally, once the remaining * message is just 64 bits, it is reduced to the CRC-32 using Barrett reduction * (explained later). * * For more information see the original paper from Intel: * "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" * December 2009 * http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf */ static u32 ATTRIBUTES FUNCNAME_ALIGNED(u32 remainder, const __m128i *p, size_t nr_segs) { /* Constants precomputed by gen_crc32_multipliers.c. Do not edit! */ const __v2di multipliers_4 = (__v2di){ 0x8F352D95, 0x1D9513D7 }; const __v2di multipliers_2 = (__v2di){ 0xF1DA05AA, 0x81256527 }; const __v2di multipliers_1 = (__v2di){ 0xAE689191, 0xCCAA009E }; const __v2di final_multiplier = (__v2di){ 0xB8BC6765 }; const __m128i mask32 = (__m128i)(__v4si){ 0xFFFFFFFF }; const __v2di barrett_reduction_constants = (__v2di){ 0x00000001F7011641, 0x00000001DB710641 }; const __m128i * const end = p + nr_segs; const __m128i * const end512 = p + (nr_segs & ~3); __m128i x0, x1, x2, x3; /* * Account for the current 'remainder', i.e. the CRC of the part of the * message already processed. Explanation: rewrite the message * polynomial M(x) in terms of the first part A(x), the second part * B(x), and the length of the second part in bits |B(x)| >= 32: * * M(x) = A(x)*x^|B(x)| + B(x) * * Then the CRC of M(x) is: * * CRC(M(x)) = CRC(A(x)*x^|B(x)| + B(x)) * = CRC(A(x)*x^32*x^(|B(x)| - 32) + B(x)) * = CRC(CRC(A(x))*x^(|B(x)| - 32) + B(x)) * * Note: all arithmetic is modulo G(x), the generator polynomial; that's * why A(x)*x^32 can be replaced with CRC(A(x)) = A(x)*x^32 mod G(x). * * So the CRC of the full message is the CRC of the second part of the * message where the first 32 bits of the second part of the message * have been XOR'ed with the CRC of the first part of the message. */ x0 = *p++; x0 ^= (__m128i)(__v4si){ remainder }; if (p > end512) /* only 128, 256, or 384 bits of input? */ goto _128_bits_at_a_time; x1 = *p++; x2 = *p++; x3 = *p++; /* Fold 512 bits at a time */ for (; p != end512; p += 4) { __m128i y0, y1, y2, y3; y0 = p[0]; y1 = p[1]; y2 = p[2]; y3 = p[3]; /* * Note: the immediate constant for PCLMULQDQ specifies which * 64-bit halves of the 128-bit vectors to multiply: * * 0x00 means low halves (higher degree polynomial terms for us) * 0x11 means high halves (lower degree polynomial terms for us) */ y0 ^= _mm_clmulepi64_si128(x0, multipliers_4, 0x00); y1 ^= _mm_clmulepi64_si128(x1, multipliers_4, 0x00); y2 ^= _mm_clmulepi64_si128(x2, multipliers_4, 0x00); y3 ^= _mm_clmulepi64_si128(x3, multipliers_4, 0x00); y0 ^= _mm_clmulepi64_si128(x0, multipliers_4, 0x11); y1 ^= _mm_clmulepi64_si128(x1, multipliers_4, 0x11); y2 ^= _mm_clmulepi64_si128(x2, multipliers_4, 0x11); y3 ^= _mm_clmulepi64_si128(x3, multipliers_4, 0x11); x0 = y0; x1 = y1; x2 = y2; x3 = y3; } /* Fold 512 bits => 128 bits */ x2 ^= _mm_clmulepi64_si128(x0, multipliers_2, 0x00); x3 ^= _mm_clmulepi64_si128(x1, multipliers_2, 0x00); x2 ^= _mm_clmulepi64_si128(x0, multipliers_2, 0x11); x3 ^= _mm_clmulepi64_si128(x1, multipliers_2, 0x11); x3 ^= _mm_clmulepi64_si128(x2, multipliers_1, 0x00); x3 ^= _mm_clmulepi64_si128(x2, multipliers_1, 0x11); x0 = x3; _128_bits_at_a_time: while (p != end) { /* Fold 128 bits into next 128 bits */ x1 = *p++; x1 ^= _mm_clmulepi64_si128(x0, multipliers_1, 0x00); x1 ^= _mm_clmulepi64_si128(x0, multipliers_1, 0x11); x0 = x1; } /* Now there are just 128 bits left, stored in 'x0'. */ /* * Fold 128 => 96 bits. This also implicitly appends 32 zero bits, * which is equivalent to multiplying by x^32. This is needed because * the CRC is defined as M(x)*x^32 mod G(x), not just M(x) mod G(x). */ x0 = _mm_srli_si128(x0, 8) ^ _mm_clmulepi64_si128(x0, multipliers_1, 0x10); /* Fold 96 => 64 bits */ x0 = _mm_srli_si128(x0, 4) ^ _mm_clmulepi64_si128(x0 & mask32, final_multiplier, 0x00); /* * Finally, reduce 64 => 32 bits using Barrett reduction. * * Let M(x) = A(x)*x^32 + B(x) be the remaining message. The goal is to * compute R(x) = M(x) mod G(x). Since degree(B(x)) < degree(G(x)): * * R(x) = (A(x)*x^32 + B(x)) mod G(x) * = (A(x)*x^32) mod G(x) + B(x) * * Then, by the Division Algorithm there exists a unique q(x) such that: * * A(x)*x^32 mod G(x) = A(x)*x^32 - q(x)*G(x) * * Since the left-hand side is of maximum degree 31, the right-hand side * must be too. This implies that we can apply 'mod x^32' to the * right-hand side without changing its value: * * (A(x)*x^32 - q(x)*G(x)) mod x^32 = q(x)*G(x) mod x^32 * * Note that '+' is equivalent to '-' in polynomials over GF(2). * * We also know that: * * / A(x)*x^32 \ * q(x) = floor ( --------- ) * \ G(x) / * * To compute this efficiently, we can multiply the top and bottom by * x^32 and move the division by G(x) to the top: * * / A(x) * floor(x^64 / G(x)) \ * q(x) = floor ( ------------------------- ) * \ x^32 / * * Note that floor(x^64 / G(x)) is a constant. * * So finally we have: * * / A(x) * floor(x^64 / G(x)) \ * R(x) = B(x) + G(x)*floor ( ------------------------- ) * \ x^32 / */ x1 = x0; x0 = _mm_clmulepi64_si128(x0 & mask32, barrett_reduction_constants, 0x00); x0 = _mm_clmulepi64_si128(x0 & mask32, barrett_reduction_constants, 0x10); return _mm_cvtsi128_si32(_mm_srli_si128(x0 ^ x1, 4)); } #define IMPL_ALIGNMENT 16 #define IMPL_SEGMENT_SIZE 16 #include "../crc32_vec_template.h" libdeflate-1.5/lib/x86/decompress_impl.h000066400000000000000000000011611360172702500202050ustar00rootroot00000000000000#include "cpu_features.h" /* Include the BMI2-optimized version? */ #undef DISPATCH_BMI2 #if !defined(__BMI2__) && X86_CPU_FEATURES_ENABLED && \ COMPILER_SUPPORTS_BMI2_TARGET # define FUNCNAME deflate_decompress_bmi2 # define ATTRIBUTES __attribute__((target("bmi2"))) # define DISPATCH 1 # define DISPATCH_BMI2 1 # include "../decompress_template.h" #endif #ifdef DISPATCH static inline decompress_func_t arch_select_decompress_func(void) { u32 features = get_cpu_features(); #ifdef DISPATCH_BMI2 if (features & X86_CPU_FEATURE_BMI2) return deflate_decompress_bmi2; #endif return NULL; } #endif /* DISPATCH */ libdeflate-1.5/lib/x86/matchfinder_impl.h000066400000000000000000000077301360172702500203350ustar00rootroot00000000000000/* * x86/matchfinder_impl.h - x86 implementations of matchfinder functions * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #ifdef __AVX2__ # if MATCHFINDER_ALIGNMENT < 32 # undef MATCHFINDER_ALIGNMENT # define MATCHFINDER_ALIGNMENT 32 # endif # include static forceinline bool matchfinder_init_avx2(mf_pos_t *data, size_t size) { __m256i v, *p; size_t n; if (size % (sizeof(__m256i) * 4) != 0) return false; STATIC_ASSERT(sizeof(mf_pos_t) == 2); v = _mm256_set1_epi16(MATCHFINDER_INITVAL); p = (__m256i *)data; n = size / (sizeof(__m256i) * 4); do { p[0] = v; p[1] = v; p[2] = v; p[3] = v; p += 4; } while (--n); return true; } static forceinline bool matchfinder_rebase_avx2(mf_pos_t *data, size_t size) { __m256i v, *p; size_t n; if (size % (sizeof(__m256i) * 4) != 0) return false; STATIC_ASSERT(sizeof(mf_pos_t) == 2); v = _mm256_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE); p = (__m256i *)data; n = size / (sizeof(__m256i) * 4); do { /* PADDSW: Add Packed Signed Integers With Signed Saturation */ p[0] = _mm256_adds_epi16(p[0], v); p[1] = _mm256_adds_epi16(p[1], v); p[2] = _mm256_adds_epi16(p[2], v); p[3] = _mm256_adds_epi16(p[3], v); p += 4; } while (--n); return true; } #endif /* __AVX2__ */ #ifdef __SSE2__ # if MATCHFINDER_ALIGNMENT < 16 # undef MATCHFINDER_ALIGNMENT # define MATCHFINDER_ALIGNMENT 16 # endif # include static forceinline bool matchfinder_init_sse2(mf_pos_t *data, size_t size) { __m128i v, *p; size_t n; if (size % (sizeof(__m128i) * 4) != 0) return false; STATIC_ASSERT(sizeof(mf_pos_t) == 2); v = _mm_set1_epi16(MATCHFINDER_INITVAL); p = (__m128i *)data; n = size / (sizeof(__m128i) * 4); do { p[0] = v; p[1] = v; p[2] = v; p[3] = v; p += 4; } while (--n); return true; } static forceinline bool matchfinder_rebase_sse2(mf_pos_t *data, size_t size) { __m128i v, *p; size_t n; if (size % (sizeof(__m128i) * 4) != 0) return false; STATIC_ASSERT(sizeof(mf_pos_t) == 2); v = _mm_set1_epi16((u16)-MATCHFINDER_WINDOW_SIZE); p = (__m128i *)data; n = size / (sizeof(__m128i) * 4); do { /* PADDSW: Add Packed Signed Integers With Signed Saturation */ p[0] = _mm_adds_epi16(p[0], v); p[1] = _mm_adds_epi16(p[1], v); p[2] = _mm_adds_epi16(p[2], v); p[3] = _mm_adds_epi16(p[3], v); p += 4; } while (--n); return true; } #endif /* __SSE2__ */ #undef arch_matchfinder_init static forceinline bool arch_matchfinder_init(mf_pos_t *data, size_t size) { #ifdef __AVX2__ if (matchfinder_init_avx2(data, size)) return true; #endif #ifdef __SSE2__ if (matchfinder_init_sse2(data, size)) return true; #endif return false; } #undef arch_matchfinder_rebase static forceinline bool arch_matchfinder_rebase(mf_pos_t *data, size_t size) { #ifdef __AVX2__ if (matchfinder_rebase_avx2(data, size)) return true; #endif #ifdef __SSE2__ if (matchfinder_rebase_sse2(data, size)) return true; #endif return false; } libdeflate-1.5/lib/zlib_compress.c000066400000000000000000000053151360172702500172460ustar00rootroot00000000000000/* * zlib_compress.c - compress with a zlib wrapper * * Originally public domain; changes after 2016-09-07 are copyrighted. * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "deflate_compress.h" #include "unaligned.h" #include "zlib_constants.h" #include "libdeflate.h" LIBDEFLATEEXPORT size_t LIBDEFLATEAPI libdeflate_zlib_compress(struct libdeflate_compressor *c, const void *in, size_t in_size, void *out, size_t out_nbytes_avail) { u8 *out_next = out; u16 hdr; unsigned compression_level; unsigned level_hint; size_t deflate_size; if (out_nbytes_avail <= ZLIB_MIN_OVERHEAD) return 0; /* 2 byte header: CMF and FLG */ hdr = (ZLIB_CM_DEFLATE << 8) | (ZLIB_CINFO_32K_WINDOW << 12); compression_level = deflate_get_compression_level(c); if (compression_level < 2) level_hint = ZLIB_FASTEST_COMPRESSION; else if (compression_level < 6) level_hint = ZLIB_FAST_COMPRESSION; else if (compression_level < 8) level_hint = ZLIB_DEFAULT_COMPRESSION; else level_hint = ZLIB_SLOWEST_COMPRESSION; hdr |= level_hint << 6; hdr |= 31 - (hdr % 31); put_unaligned_be16(hdr, out_next); out_next += 2; /* Compressed data */ deflate_size = libdeflate_deflate_compress(c, in, in_size, out_next, out_nbytes_avail - ZLIB_MIN_OVERHEAD); if (deflate_size == 0) return 0; out_next += deflate_size; /* ADLER32 */ put_unaligned_be32(libdeflate_adler32(1, in, in_size), out_next); out_next += 4; return out_next - (u8 *)out; } LIBDEFLATEEXPORT size_t LIBDEFLATEAPI libdeflate_zlib_compress_bound(struct libdeflate_compressor *c, size_t in_nbytes) { return ZLIB_MIN_OVERHEAD + libdeflate_deflate_compress_bound(c, in_nbytes); } libdeflate-1.5/lib/zlib_constants.h000066400000000000000000000007501360172702500174320ustar00rootroot00000000000000/* * zlib_constants.h - constants for the zlib wrapper format */ #ifndef LIB_ZLIB_CONSTANTS_H #define LIB_ZLIB_CONSTANTS_H #define ZLIB_MIN_HEADER_SIZE 2 #define ZLIB_FOOTER_SIZE 4 #define ZLIB_MIN_OVERHEAD (ZLIB_MIN_HEADER_SIZE + ZLIB_FOOTER_SIZE) #define ZLIB_CM_DEFLATE 8 #define ZLIB_CINFO_32K_WINDOW 7 #define ZLIB_FASTEST_COMPRESSION 0 #define ZLIB_FAST_COMPRESSION 1 #define ZLIB_DEFAULT_COMPRESSION 2 #define ZLIB_SLOWEST_COMPRESSION 3 #endif /* LIB_ZLIB_CONSTANTS_H */ libdeflate-1.5/lib/zlib_decompress.c000066400000000000000000000052411360172702500175550ustar00rootroot00000000000000/* * zlib_decompress.c - decompress with a zlib wrapper * * Originally public domain; changes after 2016-09-07 are copyrighted. * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "unaligned.h" #include "zlib_constants.h" #include "libdeflate.h" LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI libdeflate_zlib_decompress(struct libdeflate_decompressor *d, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail, size_t *actual_out_nbytes_ret) { const u8 *in_next = in; const u8 * const in_end = in_next + in_nbytes; u16 hdr; size_t actual_out_nbytes; enum libdeflate_result result; if (in_nbytes < ZLIB_MIN_OVERHEAD) return LIBDEFLATE_BAD_DATA; /* 2 byte header: CMF and FLG */ hdr = get_unaligned_be16(in_next); in_next += 2; /* FCHECK */ if ((hdr % 31) != 0) return LIBDEFLATE_BAD_DATA; /* CM */ if (((hdr >> 8) & 0xF) != ZLIB_CM_DEFLATE) return LIBDEFLATE_BAD_DATA; /* CINFO */ if ((hdr >> 12) > ZLIB_CINFO_32K_WINDOW) return LIBDEFLATE_BAD_DATA; /* FDICT */ if ((hdr >> 5) & 1) return LIBDEFLATE_BAD_DATA; /* Compressed data */ result = libdeflate_deflate_decompress(d, in_next, in_end - ZLIB_FOOTER_SIZE - in_next, out, out_nbytes_avail, actual_out_nbytes_ret); if (result != LIBDEFLATE_SUCCESS) return result; if (actual_out_nbytes_ret) actual_out_nbytes = *actual_out_nbytes_ret; else actual_out_nbytes = out_nbytes_avail; in_next = in_end - ZLIB_FOOTER_SIZE; /* ADLER32 */ if (libdeflate_adler32(1, out, actual_out_nbytes) != get_unaligned_be32(in_next)) return LIBDEFLATE_BAD_DATA; return LIBDEFLATE_SUCCESS; } libdeflate-1.5/libdeflate.h000066400000000000000000000320171360172702500157240ustar00rootroot00000000000000/* * libdeflate.h - public header for libdeflate */ #ifndef LIBDEFLATE_H #define LIBDEFLATE_H #ifdef __cplusplus extern "C" { #endif #define LIBDEFLATE_VERSION_MAJOR 1 #define LIBDEFLATE_VERSION_MINOR 5 #define LIBDEFLATE_VERSION_STRING "1.5" #include #include /* * On Windows, if you want to link to the DLL version of libdeflate, then * #define LIBDEFLATE_DLL. Note that the calling convention is "stdcall". */ #ifdef LIBDEFLATE_DLL # ifdef BUILDING_LIBDEFLATE # define LIBDEFLATEEXPORT LIBEXPORT # elif defined(_WIN32) || defined(__CYGWIN__) # define LIBDEFLATEEXPORT __declspec(dllimport) # endif #endif #ifndef LIBDEFLATEEXPORT # define LIBDEFLATEEXPORT #endif #if defined(_WIN32) && !defined(_WIN64) # define LIBDEFLATEAPI_ABI __stdcall #else # define LIBDEFLATEAPI_ABI #endif #if defined(BUILDING_LIBDEFLATE) && defined(__GNUC__) && \ defined(_WIN32) && !defined(_WIN64) /* * On 32-bit Windows, gcc assumes 16-byte stack alignment but MSVC only 4. * Realign the stack when entering libdeflate to avoid crashing in SSE/AVX * code when called from an MSVC-compiled application. */ # define LIBDEFLATEAPI_STACKALIGN __attribute__((force_align_arg_pointer)) #else # define LIBDEFLATEAPI_STACKALIGN #endif #define LIBDEFLATEAPI LIBDEFLATEAPI_ABI LIBDEFLATEAPI_STACKALIGN /* ========================================================================== */ /* Compression */ /* ========================================================================== */ struct libdeflate_compressor; /* * libdeflate_alloc_compressor() allocates a new compressor that supports * DEFLATE, zlib, and gzip compression. 'compression_level' is the compression * level on a zlib-like scale but with a higher maximum value (1 = fastest, 6 = * medium/default, 9 = slow, 12 = slowest). The return value is a pointer to * the new compressor, or NULL if out of memory. * * Note: for compression, the sliding window size is defined at compilation time * to 32768, the largest size permissible in the DEFLATE format. It cannot be * changed at runtime. * * A single compressor is not safe to use by multiple threads concurrently. * However, different threads may use different compressors concurrently. */ LIBDEFLATEEXPORT struct libdeflate_compressor * LIBDEFLATEAPI libdeflate_alloc_compressor(int compression_level); /* * libdeflate_deflate_compress() performs raw DEFLATE compression on a buffer of * data. The function attempts to compress 'in_nbytes' bytes of data located at * 'in' and write the results to 'out', which has space for 'out_nbytes_avail' * bytes. The return value is the compressed size in bytes, or 0 if the data * could not be compressed to 'out_nbytes_avail' bytes or fewer. */ LIBDEFLATEEXPORT size_t LIBDEFLATEAPI libdeflate_deflate_compress(struct libdeflate_compressor *compressor, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail); /* * libdeflate_deflate_compress_bound() returns a worst-case upper bound on the * number of bytes of compressed data that may be produced by compressing any * buffer of length less than or equal to 'in_nbytes' using * libdeflate_deflate_compress() with the specified compressor. Mathematically, * this bound will necessarily be a number greater than or equal to 'in_nbytes'. * It may be an overestimate of the true upper bound. The return value is * guaranteed to be the same for all invocations with the same compressor and * same 'in_nbytes'. * * As a special case, 'compressor' may be NULL. This causes the bound to be * taken across *any* libdeflate_compressor that could ever be allocated with * this build of the library, with any options. * * Note that this function is not necessary in many applications. With * block-based compression, it is usually preferable to separately store the * uncompressed size of each block and to store any blocks that did not compress * to less than their original size uncompressed. In that scenario, there is no * need to know the worst-case compressed size, since the maximum number of * bytes of compressed data that may be used would always be one less than the * input length. You can just pass a buffer of that size to * libdeflate_deflate_compress() and store the data uncompressed if * libdeflate_deflate_compress() returns 0, indicating that the compressed data * did not fit into the provided output buffer. */ LIBDEFLATEEXPORT size_t LIBDEFLATEAPI libdeflate_deflate_compress_bound(struct libdeflate_compressor *compressor, size_t in_nbytes); /* * Like libdeflate_deflate_compress(), but stores the data in the zlib wrapper * format. */ LIBDEFLATEEXPORT size_t LIBDEFLATEAPI libdeflate_zlib_compress(struct libdeflate_compressor *compressor, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail); /* * Like libdeflate_deflate_compress_bound(), but assumes the data will be * compressed with libdeflate_zlib_compress() rather than with * libdeflate_deflate_compress(). */ LIBDEFLATEEXPORT size_t LIBDEFLATEAPI libdeflate_zlib_compress_bound(struct libdeflate_compressor *compressor, size_t in_nbytes); /* * Like libdeflate_deflate_compress(), but stores the data in the gzip wrapper * format. */ LIBDEFLATEEXPORT size_t LIBDEFLATEAPI libdeflate_gzip_compress(struct libdeflate_compressor *compressor, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail); /* * Like libdeflate_deflate_compress_bound(), but assumes the data will be * compressed with libdeflate_gzip_compress() rather than with * libdeflate_deflate_compress(). */ LIBDEFLATEEXPORT size_t LIBDEFLATEAPI libdeflate_gzip_compress_bound(struct libdeflate_compressor *compressor, size_t in_nbytes); /* * libdeflate_free_compressor() frees a compressor that was allocated with * libdeflate_alloc_compressor(). If a NULL pointer is passed in, no action is * taken. */ LIBDEFLATEEXPORT void LIBDEFLATEAPI libdeflate_free_compressor(struct libdeflate_compressor *compressor); /* ========================================================================== */ /* Decompression */ /* ========================================================================== */ struct libdeflate_decompressor; /* * libdeflate_alloc_decompressor() allocates a new decompressor that can be used * for DEFLATE, zlib, and gzip decompression. The return value is a pointer to * the new decompressor, or NULL if out of memory. * * This function takes no parameters, and the returned decompressor is valid for * decompressing data that was compressed at any compression level and with any * sliding window size. * * A single decompressor is not safe to use by multiple threads concurrently. * However, different threads may use different decompressors concurrently. */ LIBDEFLATEEXPORT struct libdeflate_decompressor * LIBDEFLATEAPI libdeflate_alloc_decompressor(void); /* * Result of a call to libdeflate_deflate_decompress(), * libdeflate_zlib_decompress(), or libdeflate_gzip_decompress(). */ enum libdeflate_result { /* Decompression was successful. */ LIBDEFLATE_SUCCESS = 0, /* Decompressed failed because the compressed data was invalid, corrupt, * or otherwise unsupported. */ LIBDEFLATE_BAD_DATA = 1, /* A NULL 'actual_out_nbytes_ret' was provided, but the data would have * decompressed to fewer than 'out_nbytes_avail' bytes. */ LIBDEFLATE_SHORT_OUTPUT = 2, /* The data would have decompressed to more than 'out_nbytes_avail' * bytes. */ LIBDEFLATE_INSUFFICIENT_SPACE = 3, }; /* * libdeflate_deflate_decompress() decompresses the DEFLATE-compressed stream * from the buffer 'in' with compressed size up to 'in_nbytes' bytes. The * uncompressed data is written to 'out', a buffer with size 'out_nbytes_avail' * bytes. If decompression succeeds, then 0 (LIBDEFLATE_SUCCESS) is returned. * Otherwise, a nonzero result code such as LIBDEFLATE_BAD_DATA is returned. If * a nonzero result code is returned, then the contents of the output buffer are * undefined. * * Decompression stops at the end of the DEFLATE stream (as indicated by the * BFINAL flag), even if it is actually shorter than 'in_nbytes' bytes. * * libdeflate_deflate_decompress() can be used in cases where the actual * uncompressed size is known (recommended) or unknown (not recommended): * * - If the actual uncompressed size is known, then pass the actual * uncompressed size as 'out_nbytes_avail' and pass NULL for * 'actual_out_nbytes_ret'. This makes libdeflate_deflate_decompress() fail * with LIBDEFLATE_SHORT_OUTPUT if the data decompressed to fewer than the * specified number of bytes. * * - If the actual uncompressed size is unknown, then provide a non-NULL * 'actual_out_nbytes_ret' and provide a buffer with some size * 'out_nbytes_avail' that you think is large enough to hold all the * uncompressed data. In this case, if the data decompresses to less than * or equal to 'out_nbytes_avail' bytes, then * libdeflate_deflate_decompress() will write the actual uncompressed size * to *actual_out_nbytes_ret and return 0 (LIBDEFLATE_SUCCESS). Otherwise, * it will return LIBDEFLATE_INSUFFICIENT_SPACE if the provided buffer was * not large enough but no other problems were encountered, or another * nonzero result code if decompression failed for another reason. */ LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI libdeflate_deflate_decompress(struct libdeflate_decompressor *decompressor, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail, size_t *actual_out_nbytes_ret); /* * Like libdeflate_deflate_decompress(), but adds the 'actual_in_nbytes_ret' * argument. If decompression succeeds and 'actual_in_nbytes_ret' is not NULL, * then the actual compressed size of the DEFLATE stream (aligned to the next * byte boundary) is written to *actual_in_nbytes_ret. */ LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI libdeflate_deflate_decompress_ex(struct libdeflate_decompressor *decompressor, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail, size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret); /* * Like libdeflate_deflate_decompress(), but assumes the zlib wrapper format * instead of raw DEFLATE. */ LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI libdeflate_zlib_decompress(struct libdeflate_decompressor *decompressor, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail, size_t *actual_out_nbytes_ret); /* * Like libdeflate_deflate_decompress(), but assumes the gzip wrapper format * instead of raw DEFLATE. * * If multiple gzip-compressed members are concatenated, then only the first * will be decompressed. Use libdeflate_gzip_decompress_ex() if you need * multi-member support. */ LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI libdeflate_gzip_decompress(struct libdeflate_decompressor *decompressor, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail, size_t *actual_out_nbytes_ret); /* * Like libdeflate_gzip_decompress(), but adds the 'actual_in_nbytes_ret' * argument. If 'actual_in_nbytes_ret' is not NULL and the decompression * succeeds (indicating that the first gzip-compressed member in the input * buffer was decompressed), then the actual number of input bytes consumed is * written to *actual_in_nbytes_ret. */ LIBDEFLATEEXPORT enum libdeflate_result LIBDEFLATEAPI libdeflate_gzip_decompress_ex(struct libdeflate_decompressor *decompressor, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail, size_t *actual_in_nbytes_ret, size_t *actual_out_nbytes_ret); /* * libdeflate_free_decompressor() frees a decompressor that was allocated with * libdeflate_alloc_decompressor(). If a NULL pointer is passed in, no action * is taken. */ LIBDEFLATEEXPORT void LIBDEFLATEAPI libdeflate_free_decompressor(struct libdeflate_decompressor *decompressor); /* ========================================================================== */ /* Checksums */ /* ========================================================================== */ /* * libdeflate_adler32() updates a running Adler-32 checksum with 'len' bytes of * data and returns the updated checksum. When starting a new checksum, the * required initial value for 'adler' is 1. This value is also returned when * 'buffer' is specified as NULL. */ LIBDEFLATEEXPORT uint32_t LIBDEFLATEAPI libdeflate_adler32(uint32_t adler32, const void *buffer, size_t len); /* * libdeflate_crc32() updates a running CRC-32 checksum with 'len' bytes of data * and returns the updated checksum. When starting a new checksum, the required * initial value for 'crc' is 0. This value is also returned when 'buffer' is * specified as NULL. */ LIBDEFLATEEXPORT uint32_t LIBDEFLATEAPI libdeflate_crc32(uint32_t crc, const void *buffer, size_t len); #ifdef __cplusplus } #endif #endif /* LIBDEFLATE_H */ libdeflate-1.5/programs/000077500000000000000000000000001360172702500153075ustar00rootroot00000000000000libdeflate-1.5/programs/benchmark.c000066400000000000000000000364131360172702500174140ustar00rootroot00000000000000/* * benchmark.c - a compression testing and benchmark program * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "test_util.h" static const tchar *const optstring = T("1::2::3::4::5::6::7::8::9::C:D:ghs:VYZz"); enum wrapper { NO_WRAPPER, ZLIB_WRAPPER, GZIP_WRAPPER, }; struct compressor { int level; enum wrapper wrapper; const struct engine *engine; void *private; }; struct decompressor { enum wrapper wrapper; const struct engine *engine; void *private; }; struct engine { const tchar *name; bool (*init_compressor)(struct compressor *); size_t (*compress)(struct compressor *, const void *, size_t, void *, size_t); void (*destroy_compressor)(struct compressor *); bool (*init_decompressor)(struct decompressor *); bool (*decompress)(struct decompressor *, const void *, size_t, void *, size_t); void (*destroy_decompressor)(struct decompressor *); }; /******************************************************************************/ static bool libdeflate_engine_init_compressor(struct compressor *c) { c->private = alloc_compressor(c->level); return c->private != NULL; } static size_t libdeflate_engine_compress(struct compressor *c, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail) { switch (c->wrapper) { case ZLIB_WRAPPER: return libdeflate_zlib_compress(c->private, in, in_nbytes, out, out_nbytes_avail); case GZIP_WRAPPER: return libdeflate_gzip_compress(c->private, in, in_nbytes, out, out_nbytes_avail); default: return libdeflate_deflate_compress(c->private, in, in_nbytes, out, out_nbytes_avail); } } static void libdeflate_engine_destroy_compressor(struct compressor *c) { libdeflate_free_compressor(c->private); } static bool libdeflate_engine_init_decompressor(struct decompressor *d) { d->private = alloc_decompressor(); return d->private != NULL; } static bool libdeflate_engine_decompress(struct decompressor *d, const void *in, size_t in_nbytes, void *out, size_t out_nbytes) { switch (d->wrapper) { case ZLIB_WRAPPER: return !libdeflate_zlib_decompress(d->private, in, in_nbytes, out, out_nbytes, NULL); case GZIP_WRAPPER: return !libdeflate_gzip_decompress(d->private, in, in_nbytes, out, out_nbytes, NULL); default: return !libdeflate_deflate_decompress(d->private, in, in_nbytes, out, out_nbytes, NULL); } } static void libdeflate_engine_destroy_decompressor(struct decompressor *d) { libdeflate_free_decompressor(d->private); } static const struct engine libdeflate_engine = { .name = T("libdeflate"), .init_compressor = libdeflate_engine_init_compressor, .compress = libdeflate_engine_compress, .destroy_compressor = libdeflate_engine_destroy_compressor, .init_decompressor = libdeflate_engine_init_decompressor, .decompress = libdeflate_engine_decompress, .destroy_decompressor = libdeflate_engine_destroy_decompressor, }; /******************************************************************************/ static int get_libz_window_bits(enum wrapper wrapper) { const int windowBits = 15; switch (wrapper) { case ZLIB_WRAPPER: return windowBits; case GZIP_WRAPPER: return windowBits + 16; default: return -windowBits; } } static bool libz_engine_init_compressor(struct compressor *c) { z_stream *z; if (c->level > 9) { msg("libz only supports up to compression level 9"); return false; } z = xmalloc(sizeof(*z)); if (z == NULL) return false; z->next_in = NULL; z->avail_in = 0; z->zalloc = NULL; z->zfree = NULL; z->opaque = NULL; if (deflateInit2(z, c->level, Z_DEFLATED, get_libz_window_bits(c->wrapper), 8, Z_DEFAULT_STRATEGY) != Z_OK) { msg("unable to initialize deflater"); free(z); return false; } c->private = z; return true; } static size_t libz_engine_compress(struct compressor *c, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail) { z_stream *z = c->private; deflateReset(z); z->next_in = (void *)in; z->avail_in = in_nbytes; z->next_out = out; z->avail_out = out_nbytes_avail; if (deflate(z, Z_FINISH) != Z_STREAM_END) return 0; return out_nbytes_avail - z->avail_out; } static void libz_engine_destroy_compressor(struct compressor *c) { z_stream *z = c->private; deflateEnd(z); free(z); } static bool libz_engine_init_decompressor(struct decompressor *d) { z_stream *z; z = xmalloc(sizeof(*z)); if (z == NULL) return false; z->next_in = NULL; z->avail_in = 0; z->zalloc = NULL; z->zfree = NULL; z->opaque = NULL; if (inflateInit2(z, get_libz_window_bits(d->wrapper)) != Z_OK) { msg("unable to initialize inflater"); free(z); return false; } d->private = z; return true; } static bool libz_engine_decompress(struct decompressor *d, const void *in, size_t in_nbytes, void *out, size_t out_nbytes) { z_stream *z = d->private; inflateReset(z); z->next_in = (void *)in; z->avail_in = in_nbytes; z->next_out = out; z->avail_out = out_nbytes; return inflate(z, Z_FINISH) == Z_STREAM_END && z->avail_out == 0; } static void libz_engine_destroy_decompressor(struct decompressor *d) { z_stream *z = d->private; inflateEnd(z); free(z); } static const struct engine libz_engine = { .name = T("libz"), .init_compressor = libz_engine_init_compressor, .compress = libz_engine_compress, .destroy_compressor = libz_engine_destroy_compressor, .init_decompressor = libz_engine_init_decompressor, .decompress = libz_engine_decompress, .destroy_decompressor = libz_engine_destroy_decompressor, }; /******************************************************************************/ static const struct engine * const all_engines[] = { &libdeflate_engine, &libz_engine, }; #define DEFAULT_ENGINE libdeflate_engine static const struct engine * name_to_engine(const tchar *name) { size_t i; for (i = 0; i < ARRAY_LEN(all_engines); i++) if (tstrcmp(all_engines[i]->name, name) == 0) return all_engines[i]; return NULL; } /******************************************************************************/ static bool compressor_init(struct compressor *c, int level, enum wrapper wrapper, const struct engine *engine) { c->level = level; c->wrapper = wrapper; c->engine = engine; return engine->init_compressor(c); } static size_t do_compress(struct compressor *c, const void *in, size_t in_nbytes, void *out, size_t out_nbytes_avail) { return c->engine->compress(c, in, in_nbytes, out, out_nbytes_avail); } static void compressor_destroy(struct compressor *c) { c->engine->destroy_compressor(c); } static bool decompressor_init(struct decompressor *d, enum wrapper wrapper, const struct engine *engine) { d->wrapper = wrapper; d->engine = engine; return engine->init_decompressor(d); } static bool do_decompress(struct decompressor *d, const void *in, size_t in_nbytes, void *out, size_t out_nbytes) { return d->engine->decompress(d, in, in_nbytes, out, out_nbytes); } static void decompressor_destroy(struct decompressor *d) { d->engine->destroy_decompressor(d); } /******************************************************************************/ static void show_available_engines(FILE *fp) { size_t i; fprintf(fp, "Available ENGINEs are: "); for (i = 0; i < ARRAY_LEN(all_engines); i++) { fprintf(fp, "%"TS, all_engines[i]->name); if (i < ARRAY_LEN(all_engines) - 1) fprintf(fp, ", "); } fprintf(fp, ". Default is %"TS"\n", DEFAULT_ENGINE.name); } static void show_usage(FILE *fp) { fprintf(fp, "Usage: %"TS" [-LVL] [-C ENGINE] [-D ENGINE] [-ghVz] [-s SIZE] [FILE]...\n" "Benchmark DEFLATE compression and decompression on the specified FILEs.\n" "\n" "Options:\n" " -1 fastest (worst) compression\n" " -6 medium compression (default)\n" " -12 slowest (best) compression\n" " -C ENGINE compression engine\n" " -D ENGINE decompression engine\n" " -g use gzip wrapper\n" " -h print this help\n" " -s SIZE chunk size\n" " -V show version and legal information\n" " -z use zlib wrapper\n" "\n", program_invocation_name); show_available_engines(fp); } static void show_version(void) { printf( "libdeflate compression benchmark program v" LIBDEFLATE_VERSION_STRING "\n" "Copyright 2016 Eric Biggers\n" "\n" "This program is free software which may be modified and/or redistributed\n" "under the terms of the MIT license. There is NO WARRANTY, to the extent\n" "permitted by law. See the COPYING file for details.\n" ); } /******************************************************************************/ static int do_benchmark(struct file_stream *in, void *original_buf, void *compressed_buf, void *decompressed_buf, u32 chunk_size, struct compressor *compressor, struct decompressor *decompressor) { u64 total_uncompressed_size = 0; u64 total_compressed_size = 0; u64 total_compress_time = 0; u64 total_decompress_time = 0; ssize_t ret; while ((ret = xread(in, original_buf, chunk_size)) > 0) { u32 original_size = ret; u32 compressed_size; u64 start_time; bool ok; total_uncompressed_size += original_size; /* Compress the chunk of data. */ start_time = timer_ticks(); compressed_size = do_compress(compressor, original_buf, original_size, compressed_buf, original_size - 1); total_compress_time += timer_ticks() - start_time; if (compressed_size) { /* Successfully compressed the chunk of data. */ /* Decompress the data we just compressed and compare * the result with the original. */ start_time = timer_ticks(); ok = do_decompress(decompressor, compressed_buf, compressed_size, decompressed_buf, original_size); total_decompress_time += timer_ticks() - start_time; if (!ok) { msg("%"TS": failed to decompress data", in->name); return -1; } if (memcmp(original_buf, decompressed_buf, original_size) != 0) { msg("%"TS": data did not decompress to " "original", in->name); return -1; } total_compressed_size += compressed_size; } else { /* Compression did not make the chunk smaller. */ total_compressed_size += original_size; } } if (ret < 0) return ret; if (total_uncompressed_size == 0) { printf("\tFile was empty.\n"); return 0; } if (total_compress_time == 0) total_compress_time = 1; if (total_decompress_time == 0) total_decompress_time = 1; printf("\tCompressed %"PRIu64 " => %"PRIu64" bytes (%u.%03u%%)\n", total_uncompressed_size, total_compressed_size, (unsigned int)(total_compressed_size * 100 / total_uncompressed_size), (unsigned int)(total_compressed_size * 100000 / total_uncompressed_size % 1000)); printf("\tCompression time: %"PRIu64" ms (%"PRIu64" MB/s)\n", timer_ticks_to_ms(total_compress_time), timer_MB_per_s(total_uncompressed_size, total_compress_time)); printf("\tDecompression time: %"PRIu64" ms (%"PRIu64" MB/s)\n", timer_ticks_to_ms(total_decompress_time), timer_MB_per_s(total_uncompressed_size, total_decompress_time)); return 0; } int tmain(int argc, tchar *argv[]) { u32 chunk_size = 1048576; int level = 6; enum wrapper wrapper = NO_WRAPPER; const struct engine *compress_engine = &DEFAULT_ENGINE; const struct engine *decompress_engine = &DEFAULT_ENGINE; void *original_buf = NULL; void *compressed_buf = NULL; void *decompressed_buf = NULL; struct compressor compressor; struct decompressor decompressor; tchar *default_file_list[] = { NULL }; int opt_char; int i; int ret; program_invocation_name = get_filename(argv[0]); while ((opt_char = tgetopt(argc, argv, optstring)) != -1) { switch (opt_char) { case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': level = parse_compression_level(opt_char, toptarg); if (level == 0) return 1; break; case 'C': compress_engine = name_to_engine(toptarg); if (compress_engine == NULL) { msg("invalid compression engine: \"%"TS"\"", toptarg); show_available_engines(stderr); return 1; } break; case 'D': decompress_engine = name_to_engine(toptarg); if (decompress_engine == NULL) { msg("invalid decompression engine: \"%"TS"\"", toptarg); show_available_engines(stderr); return 1; } break; case 'g': wrapper = GZIP_WRAPPER; break; case 'h': show_usage(stdout); return 0; case 's': chunk_size = tstrtoul(toptarg, NULL, 10); if (chunk_size == 0) { msg("invalid chunk size: \"%"TS"\"", toptarg); return 1; } break; case 'V': show_version(); return 0; case 'Y': /* deprecated, use '-C libz' instead */ compress_engine = &libz_engine; break; case 'Z': /* deprecated, use '-D libz' instead */ decompress_engine = &libz_engine; break; case 'z': wrapper = ZLIB_WRAPPER; break; default: show_usage(stderr); return 1; } } argc -= toptind; argv += toptind; original_buf = xmalloc(chunk_size); compressed_buf = xmalloc(chunk_size - 1); decompressed_buf = xmalloc(chunk_size); ret = -1; if (original_buf == NULL || compressed_buf == NULL || decompressed_buf == NULL) goto out0; if (!compressor_init(&compressor, level, wrapper, compress_engine)) goto out0; if (!decompressor_init(&decompressor, wrapper, decompress_engine)) goto out1; if (argc == 0) { argv = default_file_list; argc = ARRAY_LEN(default_file_list); } else { for (i = 0; i < argc; i++) if (argv[i][0] == '-' && argv[i][1] == '\0') argv[i] = NULL; } printf("Benchmarking DEFLATE compression:\n"); printf("\tCompression level: %d\n", level); printf("\tChunk size: %"PRIu32"\n", chunk_size); printf("\tWrapper: %s\n", wrapper == NO_WRAPPER ? "None" : wrapper == ZLIB_WRAPPER ? "zlib" : "gzip"); printf("\tCompression engine: %"TS"\n", compress_engine->name); printf("\tDecompression engine: %"TS"\n", decompress_engine->name); for (i = 0; i < argc; i++) { struct file_stream in; ret = xopen_for_read(argv[i], true, &in); if (ret != 0) goto out2; printf("Processing %"TS"...\n", in.name); ret = do_benchmark(&in, original_buf, compressed_buf, decompressed_buf, chunk_size, &compressor, &decompressor); xclose(&in); if (ret != 0) goto out2; } ret = 0; out2: decompressor_destroy(&decompressor); out1: compressor_destroy(&compressor); out0: free(decompressed_buf); free(compressed_buf); free(original_buf); return -ret; } libdeflate-1.5/programs/checksum.c000066400000000000000000000110341360172702500172540ustar00rootroot00000000000000/* * checksum.c - Adler-32 and CRC-32 checksumming program * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "test_util.h" static const tchar *const optstring = T("Ahs:tZ"); static void show_usage(FILE *fp) { fprintf(fp, "Usage: %"TS" [-A] [-h] [-s SIZE] [-t] [-Z] [FILE]...\n" "Calculate Adler-32 or CRC-32 checksums of the specified FILEs.\n" "\n" "Options:\n" " -A use Adler-32 (default is CRC-32)\n" " -h print this help\n" " -s SIZE chunk size\n" " -t show checksum speed, excluding I/O\n" " -Z use zlib implementation instead of libdeflate\n", program_invocation_name); } typedef u32 (*cksum_fn_t)(u32, const void *, size_t); static u32 adler32_libdeflate(u32 adler, const void *buf, size_t len) { return libdeflate_adler32(adler, buf, len); } static u32 crc32_libdeflate(u32 crc, const void *buf, size_t len) { return libdeflate_crc32(crc, buf, len); } static u32 adler32_zlib(u32 adler, const void *buf, size_t len) { return adler32(adler, buf, len); } static u32 crc32_zlib(u32 crc, const void *buf, size_t len) { return crc32(crc, buf, len); } static int checksum_stream(struct file_stream *in, cksum_fn_t cksum, u32 *sum, void *buf, size_t bufsize, u64 *size_ret, u64 *elapsed_ret) { u64 size = 0; u64 elapsed = 0; for (;;) { ssize_t ret; u64 start_time; ret = xread(in, buf, bufsize); if (ret < 0) return ret; if (ret == 0) break; size += ret; start_time = timer_ticks(); *sum = cksum(*sum, buf, ret); elapsed += timer_ticks() - start_time; } if (elapsed == 0) elapsed = 1; *size_ret = size; *elapsed_ret = elapsed; return 0; } int tmain(int argc, tchar *argv[]) { bool use_adler32 = false; bool use_zlib_impl = false; bool do_timing = false; void *buf; size_t bufsize = 131072; tchar *default_file_list[] = { NULL }; cksum_fn_t cksum; int opt_char; int i; int ret; program_invocation_name = get_filename(argv[0]); while ((opt_char = tgetopt(argc, argv, optstring)) != -1) { switch (opt_char) { case 'A': use_adler32 = true; break; case 'h': show_usage(stdout); return 0; case 's': bufsize = tstrtoul(toptarg, NULL, 10); if (bufsize == 0) { msg("invalid chunk size: \"%"TS"\"", toptarg); return 1; } break; case 't': do_timing = true; break; case 'Z': use_zlib_impl = true; break; default: show_usage(stderr); return 1; } } argc -= toptind; argv += toptind; if (use_adler32) { if (use_zlib_impl) cksum = adler32_zlib; else cksum = adler32_libdeflate; } else { if (use_zlib_impl) cksum = crc32_zlib; else cksum = crc32_libdeflate; } buf = xmalloc(bufsize); if (buf == NULL) return 1; if (argc == 0) { argv = default_file_list; argc = ARRAY_LEN(default_file_list); } else { for (i = 0; i < argc; i++) if (argv[i][0] == '-' && argv[i][1] == '\0') argv[i] = NULL; } for (i = 0; i < argc; i++) { struct file_stream in; u32 sum = cksum(0, NULL, 0); u64 size = 0; u64 elapsed = 0; ret = xopen_for_read(argv[i], true, &in); if (ret != 0) goto out; ret = checksum_stream(&in, cksum, &sum, buf, bufsize, &size, &elapsed); if (ret == 0) { if (do_timing) { printf("%08"PRIx32"\t%"TS"\t" "%"PRIu64" ms\t%"PRIu64" MB/s\n", sum, in.name, timer_ticks_to_ms(elapsed), timer_MB_per_s(size, elapsed)); } else { printf("%08"PRIx32"\t%"TS"\t\n", sum, in.name); } } xclose(&in); if (ret != 0) goto out; } ret = 0; out: free(buf); return -ret; } libdeflate-1.5/programs/detect.sh000077500000000000000000000027201360172702500171170ustar00rootroot00000000000000#!/bin/sh if [ -z "$CC" ]; then CC=cc fi echo "/* THIS FILE WAS AUTOMATICALLY GENERATED. DO NOT EDIT. */" echo "#ifndef CONFIG_H" echo "#define CONFIG_H" tmpfile="$(mktemp -t libdeflate_config.XXXXXXXX)" trap "rm -f \"$tmpfile\"" EXIT program_compiles() { echo "$1" > "$tmpfile" $CC $CFLAGS -Wno-error -x c "$tmpfile" -o /dev/null > /dev/null 2>&1 } check_function() { funcname="$1" macro="HAVE_$(echo $funcname | tr a-z A-Z)" echo echo "/* Is the $funcname() function available? */" if program_compiles "int main() { $funcname(); }"; then echo "#define $macro 1" else echo "/* $macro is not set */" fi } have_stat_field() { program_compiles "#include #include int main() { struct stat st; st.$1; }" } check_stat_nanosecond_precision() { echo echo "/* Does stat() provide nanosecond-precision timestamps? */" if have_stat_field st_atim; then echo "#define HAVE_STAT_NANOSECOND_PRECISION 1" elif have_stat_field st_atimespec; then # Nonstandard field names used by OS X and older BSDs echo "#define HAVE_STAT_NANOSECOND_PRECISION 1" echo "#define st_atim st_atimespec" echo "#define st_mtim st_mtimespec" echo "#define st_ctim st_ctimespec" else echo "/* HAVE_STAT_NANOSECOND_PRECISION is not set */" fi } check_function clock_gettime check_function futimens check_function futimes check_function posix_fadvise check_function posix_madvise check_stat_nanosecond_precision echo echo "#endif /* CONFIG_H */" libdeflate-1.5/programs/gzip.c000066400000000000000000000372331360172702500164340ustar00rootroot00000000000000/* * gzip.c - a file compression and decompression program * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "prog_util.h" #include #include #include #ifdef _WIN32 # include #else # include # include # include #endif struct options { bool to_stdout; bool decompress; bool force; bool keep; int compression_level; const tchar *suffix; }; static const tchar *const optstring = T("1::2::3::4::5::6::7::8::9::cdfhknS:V"); static void show_usage(FILE *fp) { fprintf(fp, "Usage: %"TS" [-LEVEL] [-cdfhkV] [-S SUF] FILE...\n" "Compress or decompress the specified FILEs.\n" "\n" "Options:\n" " -1 fastest (worst) compression\n" " -6 medium compression (default)\n" " -12 slowest (best) compression\n" " -c write to standard output\n" " -d decompress\n" " -f overwrite existing output files\n" " -h print this help\n" " -k don't delete input files\n" " -S SUF use suffix SUF instead of .gz\n" " -V show version and legal information\n", program_invocation_name); } static void show_version(void) { printf( "gzip compression program v" LIBDEFLATE_VERSION_STRING "\n" "Copyright 2016 Eric Biggers\n" "\n" "This program is free software which may be modified and/or redistributed\n" "under the terms of the MIT license. There is NO WARRANTY, to the extent\n" "permitted by law. See the COPYING file for details.\n" ); } /* Was the program invoked in decompression mode? */ static bool is_gunzip(void) { if (tstrxcmp(program_invocation_name, T("gunzip")) == 0) return true; if (tstrxcmp(program_invocation_name, T("libdeflate-gunzip")) == 0) return true; #ifdef _WIN32 if (tstrxcmp(program_invocation_name, T("gunzip.exe")) == 0) return true; if (tstrxcmp(program_invocation_name, T("libdeflate-gunzip.exe")) == 0) return true; #endif return false; } static const tchar * get_suffix(const tchar *path, const tchar *suffix) { size_t path_len = tstrlen(path); size_t suffix_len = tstrlen(suffix); const tchar *p; if (path_len <= suffix_len) return NULL; p = &path[path_len - suffix_len]; if (tstrxcmp(p, suffix) == 0) return p; return NULL; } static bool has_suffix(const tchar *path, const tchar *suffix) { return get_suffix(path, suffix) != NULL; } static tchar * append_suffix(const tchar *path, const tchar *suffix) { size_t path_len = tstrlen(path); size_t suffix_len = tstrlen(suffix); tchar *suffixed_path; suffixed_path = xmalloc((path_len + suffix_len + 1) * sizeof(tchar)); if (suffixed_path == NULL) return NULL; tmemcpy(suffixed_path, path, path_len); tmemcpy(&suffixed_path[path_len], suffix, suffix_len + 1); return suffixed_path; } static int do_compress(struct libdeflate_compressor *compressor, struct file_stream *in, struct file_stream *out) { const void *uncompressed_data = in->mmap_mem; size_t uncompressed_size = in->mmap_size; void *compressed_data; size_t actual_compressed_size; size_t max_compressed_size; int ret; max_compressed_size = libdeflate_gzip_compress_bound(compressor, uncompressed_size); compressed_data = xmalloc(max_compressed_size); if (compressed_data == NULL) { msg("%"TS": file is probably too large to be processed by this " "program", in->name); ret = -1; goto out; } actual_compressed_size = libdeflate_gzip_compress(compressor, uncompressed_data, uncompressed_size, compressed_data, max_compressed_size); if (actual_compressed_size == 0) { msg("Bug in libdeflate_gzip_compress_bound()!"); ret = -1; goto out; } ret = full_write(out, compressed_data, actual_compressed_size); out: free(compressed_data); return ret; } static u32 load_u32_gzip(const u8 *p) { return ((u32)p[0] << 0) | ((u32)p[1] << 8) | ((u32)p[2] << 16) | ((u32)p[3] << 24); } static int do_decompress(struct libdeflate_decompressor *decompressor, struct file_stream *in, struct file_stream *out) { const u8 *compressed_data = in->mmap_mem; size_t compressed_size = in->mmap_size; void *uncompressed_data = NULL; size_t uncompressed_size; size_t actual_in_nbytes; size_t actual_out_nbytes; enum libdeflate_result result; int ret = 0; if (compressed_size < sizeof(u32)) { msg("%"TS": not in gzip format", in->name); ret = -1; goto out; } /* * Use the ISIZE field as a hint for the decompressed data size. It may * need to be increased later, however, because the file may contain * multiple gzip members and the particular ISIZE we happen to use may * not be the largest; or the real size may be >= 4 GiB, causing ISIZE * to overflow. In any case, make sure to allocate at least one byte. */ uncompressed_size = load_u32_gzip(&compressed_data[compressed_size - 4]); if (uncompressed_size == 0) uncompressed_size = 1; do { if (uncompressed_data == NULL) { uncompressed_data = xmalloc(uncompressed_size); if (uncompressed_data == NULL) { msg("%"TS": file is probably too large to be " "processed by this program", in->name); ret = -1; goto out; } } result = libdeflate_gzip_decompress_ex(decompressor, compressed_data, compressed_size, uncompressed_data, uncompressed_size, &actual_in_nbytes, &actual_out_nbytes); if (result == LIBDEFLATE_INSUFFICIENT_SPACE) { if (uncompressed_size * 2 <= uncompressed_size) { msg("%"TS": file corrupt or too large to be " "processed by this program", in->name); ret = -1; goto out; } uncompressed_size *= 2; free(uncompressed_data); uncompressed_data = NULL; continue; } if (result != LIBDEFLATE_SUCCESS) { msg("%"TS": file corrupt or not in gzip format", in->name); ret = -1; goto out; } if (actual_in_nbytes == 0 || actual_in_nbytes > compressed_size || actual_out_nbytes > uncompressed_size) { msg("Bug in libdeflate_gzip_decompress_ex()!"); ret = -1; goto out; } ret = full_write(out, uncompressed_data, actual_out_nbytes); if (ret != 0) goto out; compressed_data += actual_in_nbytes; compressed_size -= actual_in_nbytes; } while (compressed_size != 0); out: free(uncompressed_data); return ret; } static int stat_file(struct file_stream *in, stat_t *stbuf, bool allow_hard_links) { if (tfstat(in->fd, stbuf) != 0) { msg("%"TS": unable to stat file", in->name); return -1; } if (!S_ISREG(stbuf->st_mode) && !in->is_standard_stream) { msg("%"TS" is %s -- skipping", in->name, S_ISDIR(stbuf->st_mode) ? "a directory" : "not a regular file"); return -2; } if (stbuf->st_nlink > 1 && !allow_hard_links) { msg("%"TS" has multiple hard links -- skipping " "(use -f to process anyway)", in->name); return -2; } return 0; } static void restore_mode(struct file_stream *out, const stat_t *stbuf) { #ifndef _WIN32 if (fchmod(out->fd, stbuf->st_mode) != 0) msg_errno("%"TS": unable to preserve mode", out->name); #endif } static void restore_owner_and_group(struct file_stream *out, const stat_t *stbuf) { #ifndef _WIN32 if (fchown(out->fd, stbuf->st_uid, stbuf->st_gid) != 0) { msg_errno("%"TS": unable to preserve owner and group", out->name); } #endif } static void restore_timestamps(struct file_stream *out, const tchar *newpath, const stat_t *stbuf) { int ret; #if defined(HAVE_FUTIMENS) && defined(HAVE_STAT_NANOSECOND_PRECISION) struct timespec times[2] = { stbuf->st_atim, stbuf->st_mtim, }; ret = futimens(out->fd, times); #elif defined(HAVE_FUTIMES) && defined(HAVE_STAT_NANOSECOND_PRECISION) struct timeval times[2] = { { stbuf->st_atim.tv_sec, stbuf->st_atim.tv_nsec / 1000, }, { stbuf->st_mtim.tv_sec, stbuf->st_mtim.tv_nsec / 1000, }, }; ret = futimes(out->fd, times); #else struct tutimbuf times = { stbuf->st_atime, stbuf->st_mtime, }; ret = tutime(newpath, ×); #endif if (ret != 0) msg_errno("%"TS": unable to preserve timestamps", out->name); } static void restore_metadata(struct file_stream *out, const tchar *newpath, const stat_t *stbuf) { restore_mode(out, stbuf); restore_owner_and_group(out, stbuf); restore_timestamps(out, newpath, stbuf); } static int decompress_file(struct libdeflate_decompressor *decompressor, const tchar *path, const struct options *options) { tchar *oldpath = (tchar *)path; tchar *newpath = NULL; struct file_stream in; struct file_stream out; stat_t stbuf; int ret; int ret2; if (path != NULL) { const tchar *suffix = get_suffix(path, options->suffix); if (suffix == NULL) { /* * Input file is unsuffixed. If the file doesn't exist, * then try it suffixed. Otherwise, if we're not * writing to stdout, skip the file with warning status. * Otherwise, go ahead and try to open the file anyway * (which will very likely fail). */ if (tstat(path, &stbuf) != 0 && errno == ENOENT) { oldpath = append_suffix(path, options->suffix); if (oldpath == NULL) return -1; if (!options->to_stdout) newpath = (tchar *)path; } else if (!options->to_stdout) { msg("\"%"TS"\" does not end with the %"TS" " "suffix -- skipping", path, options->suffix); return -2; } } else if (!options->to_stdout) { /* * Input file is suffixed, and we're not writing to * stdout. Strip the suffix to get the path to the * output file. */ newpath = xmalloc((suffix - oldpath + 1) * sizeof(tchar)); if (newpath == NULL) return -1; tmemcpy(newpath, oldpath, suffix - oldpath); newpath[suffix - oldpath] = '\0'; } } ret = xopen_for_read(oldpath, options->force || options->to_stdout, &in); if (ret != 0) goto out_free_paths; if (!options->force && isatty(in.fd)) { msg("Refusing to read compressed data from terminal. " "Use -f to override.\nFor help, use -h."); ret = -1; goto out_close_in; } ret = stat_file(&in, &stbuf, options->force || options->keep || oldpath == NULL || newpath == NULL); if (ret != 0) goto out_close_in; ret = xopen_for_write(newpath, options->force, &out); if (ret != 0) goto out_close_in; /* TODO: need a streaming-friendly solution */ ret = map_file_contents(&in, stbuf.st_size); if (ret != 0) goto out_close_out; ret = do_decompress(decompressor, &in, &out); if (ret != 0) goto out_close_out; if (oldpath != NULL && newpath != NULL) restore_metadata(&out, newpath, &stbuf); ret = 0; out_close_out: ret2 = xclose(&out); if (ret == 0) ret = ret2; if (ret != 0 && newpath != NULL) tunlink(newpath); out_close_in: xclose(&in); if (ret == 0 && oldpath != NULL && newpath != NULL && !options->keep) tunlink(oldpath); out_free_paths: if (newpath != path) free(newpath); if (oldpath != path) free(oldpath); return ret; } static int compress_file(struct libdeflate_compressor *compressor, const tchar *path, const struct options *options) { tchar *newpath = NULL; struct file_stream in; struct file_stream out; stat_t stbuf; int ret; int ret2; if (path != NULL && !options->to_stdout) { if (!options->force && has_suffix(path, options->suffix)) { msg("%"TS": already has %"TS" suffix -- skipping", path, options->suffix); return 0; } newpath = append_suffix(path, options->suffix); if (newpath == NULL) return -1; } ret = xopen_for_read(path, options->force || options->to_stdout, &in); if (ret != 0) goto out_free_newpath; ret = stat_file(&in, &stbuf, options->force || options->keep || path == NULL || newpath == NULL); if (ret != 0) goto out_close_in; ret = xopen_for_write(newpath, options->force, &out); if (ret != 0) goto out_close_in; if (!options->force && isatty(out.fd)) { msg("Refusing to write compressed data to terminal. " "Use -f to override.\nFor help, use -h."); ret = -1; goto out_close_out; } /* TODO: need a streaming-friendly solution */ ret = map_file_contents(&in, stbuf.st_size); if (ret != 0) goto out_close_out; ret = do_compress(compressor, &in, &out); if (ret != 0) goto out_close_out; if (path != NULL && newpath != NULL) restore_metadata(&out, newpath, &stbuf); ret = 0; out_close_out: ret2 = xclose(&out); if (ret == 0) ret = ret2; if (ret != 0 && newpath != NULL) tunlink(newpath); out_close_in: xclose(&in); if (ret == 0 && path != NULL && newpath != NULL && !options->keep) tunlink(path); out_free_newpath: free(newpath); return ret; } int tmain(int argc, tchar *argv[]) { tchar *default_file_list[] = { NULL }; struct options options; int opt_char; int i; int ret; program_invocation_name = get_filename(argv[0]); options.to_stdout = false; options.decompress = is_gunzip(); options.force = false; options.keep = false; options.compression_level = 6; options.suffix = T(".gz"); while ((opt_char = tgetopt(argc, argv, optstring)) != -1) { switch (opt_char) { case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': options.compression_level = parse_compression_level(opt_char, toptarg); if (options.compression_level == 0) return 1; break; case 'c': options.to_stdout = true; break; case 'd': options.decompress = true; break; case 'f': options.force = true; break; case 'h': show_usage(stdout); return 0; case 'k': options.keep = true; break; case 'n': /* * -n means don't save or restore the original filename * in the gzip header. Currently this implementation * already behaves this way by default, so accept the * option as a no-op. */ break; case 'S': options.suffix = toptarg; if (options.suffix[0] == T('\0')) { msg("invalid suffix"); return 1; } break; case 'V': show_version(); return 0; default: show_usage(stderr); return 1; } } argv += toptind; argc -= toptind; if (argc == 0) { argv = default_file_list; argc = ARRAY_LEN(default_file_list); } else { for (i = 0; i < argc; i++) if (argv[i][0] == '-' && argv[i][1] == '\0') argv[i] = NULL; } ret = 0; if (options.decompress) { struct libdeflate_decompressor *d; d = alloc_decompressor(); if (d == NULL) return 1; for (i = 0; i < argc; i++) ret |= -decompress_file(d, argv[i], &options); libdeflate_free_decompressor(d); } else { struct libdeflate_compressor *c; c = alloc_compressor(options.compression_level); if (c == NULL) return 1; for (i = 0; i < argc; i++) ret |= -compress_file(c, argv[i], &options); libdeflate_free_compressor(c); } /* * If ret=0, there were no warnings or errors. Exit with status 0. * If ret=2, there was at least one warning. Exit with status 2. * Else, there was at least one error. Exit with status 1. */ if (ret != 0 && ret != 2) ret = 1; return ret; } libdeflate-1.5/programs/prog_util.c000066400000000000000000000251031360172702500174600ustar00rootroot00000000000000/* * prog_util.c - utility functions for programs * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "prog_util.h" #include #include #include #ifdef _WIN32 # include #else # include # include #endif #ifndef O_BINARY # define O_BINARY 0 #endif #ifndef O_SEQUENTIAL # define O_SEQUENTIAL 0 #endif #ifndef O_NOFOLLOW # define O_NOFOLLOW 0 #endif #ifndef O_NONBLOCK # define O_NONBLOCK 0 #endif #ifndef O_NOCTTY # define O_NOCTTY 0 #endif /* The invocation name of the program (filename component only) */ const tchar *program_invocation_name; static void do_msg(const char *format, bool with_errno, va_list va) { int saved_errno = errno; fprintf(stderr, "%"TS": ", program_invocation_name); vfprintf(stderr, format, va); if (with_errno) fprintf(stderr, ": %s\n", strerror(saved_errno)); else fprintf(stderr, "\n"); errno = saved_errno; } /* Print a message to standard error */ void msg(const char *format, ...) { va_list va; va_start(va, format); do_msg(format, false, va); va_end(va); } /* Print a message to standard error, including a description of errno */ void msg_errno(const char *format, ...) { va_list va; va_start(va, format); do_msg(format, true, va); va_end(va); } /* malloc() wrapper */ void * xmalloc(size_t size) { void *p = malloc(size); if (p == NULL && size == 0) p = malloc(1); if (p == NULL) msg("Out of memory"); return p; } /* * Retrieve a pointer to the filename component of the specified path. * * Note: this does not modify the path. Therefore, it is not guaranteed to work * properly for directories, since a path to a directory might have trailing * slashes. */ const tchar * get_filename(const tchar *path) { const tchar *slash = tstrrchr(path, '/'); #ifdef _WIN32 const tchar *backslash = tstrrchr(path, '\\'); if (backslash != NULL && (slash == NULL || backslash > slash)) slash = backslash; #endif if (slash != NULL) return slash + 1; return path; } /* Create a copy of 'path' surrounded by double quotes */ static tchar * quote_path(const tchar *path) { size_t len = tstrlen(path); tchar *result; result = xmalloc((1 + len + 1 + 1) * sizeof(tchar)); if (result == NULL) return NULL; result[0] = '"'; tmemcpy(&result[1], path, len); result[1 + len] = '"'; result[1 + len + 1] = '\0'; return result; } /* Open a file for reading, or set up standard input for reading */ int xopen_for_read(const tchar *path, bool symlink_ok, struct file_stream *strm) { strm->mmap_token = NULL; strm->mmap_mem = NULL; if (path == NULL) { strm->is_standard_stream = true; strm->name = T("standard input"); strm->fd = STDIN_FILENO; #ifdef _WIN32 _setmode(strm->fd, O_BINARY); #endif return 0; } strm->is_standard_stream = false; strm->name = quote_path(path); if (strm->name == NULL) return -1; strm->fd = topen(path, O_RDONLY | O_BINARY | O_NONBLOCK | O_NOCTTY | (symlink_ok ? 0 : O_NOFOLLOW) | O_SEQUENTIAL); if (strm->fd < 0) { msg_errno("Can't open %"TS" for reading", strm->name); free(strm->name); return -1; } #if defined(HAVE_POSIX_FADVISE) && (O_SEQUENTIAL == 0) posix_fadvise(strm->fd, 0, 0, POSIX_FADV_SEQUENTIAL); #endif return 0; } /* Open a file for writing, or set up standard output for writing */ int xopen_for_write(const tchar *path, bool overwrite, struct file_stream *strm) { int ret = -1; strm->mmap_token = NULL; strm->mmap_mem = NULL; if (path == NULL) { strm->is_standard_stream = true; strm->name = T("standard output"); strm->fd = STDOUT_FILENO; #ifdef _WIN32 _setmode(strm->fd, O_BINARY); #endif return 0; } strm->is_standard_stream = false; strm->name = quote_path(path); if (strm->name == NULL) goto err; retry: strm->fd = topen(path, O_WRONLY | O_BINARY | O_NOFOLLOW | O_CREAT | O_EXCL, 0644); if (strm->fd < 0) { if (errno != EEXIST) { msg_errno("Can't open %"TS" for writing", strm->name); goto err; } if (!overwrite) { if (!isatty(STDERR_FILENO) || !isatty(STDIN_FILENO)) { msg("%"TS" already exists; use -f to overwrite", strm->name); ret = -2; /* warning only */ goto err; } fprintf(stderr, "%"TS": %"TS" already exists; " "overwrite? (y/n) ", program_invocation_name, strm->name); if (getchar() != 'y') { msg("Not overwriting."); goto err; } } if (tunlink(path) != 0) { msg_errno("Unable to delete %"TS, strm->name); goto err; } goto retry; } return 0; err: free(strm->name); return ret; } /* Read the full contents of a file into memory */ static int read_full_contents(struct file_stream *strm) { size_t filled = 0; size_t capacity = 4096; char *buf; int ret; buf = xmalloc(capacity); if (buf == NULL) return -1; do { if (filled == capacity) { char *newbuf; if (capacity == SIZE_MAX) goto oom; capacity += MIN(SIZE_MAX - capacity, capacity); newbuf = realloc(buf, capacity); if (newbuf == NULL) goto oom; buf = newbuf; } ret = xread(strm, &buf[filled], capacity - filled); if (ret < 0) goto err; filled += ret; } while (ret != 0); strm->mmap_mem = buf; strm->mmap_size = filled; return 0; err: free(buf); return ret; oom: msg("Out of memory! %"TS" is too large to be processed by " "this program as currently implemented.", strm->name); ret = -1; goto err; } /* Map the contents of a file into memory */ int map_file_contents(struct file_stream *strm, u64 size) { if (size == 0) /* mmap isn't supported on empty files */ return read_full_contents(strm); if (size > SIZE_MAX) { msg("%"TS" is too large to be processed by this program", strm->name); return -1; } #ifdef _WIN32 strm->mmap_token = CreateFileMapping( (HANDLE)(intptr_t)_get_osfhandle(strm->fd), NULL, PAGE_READONLY, 0, 0, NULL); if (strm->mmap_token == NULL) { DWORD err = GetLastError(); if (err == ERROR_BAD_EXE_FORMAT) /* mmap unsupported */ return read_full_contents(strm); msg("Unable create file mapping for %"TS": Windows error %u", strm->name, (unsigned int)err); return -1; } strm->mmap_mem = MapViewOfFile((HANDLE)strm->mmap_token, FILE_MAP_READ, 0, 0, size); if (strm->mmap_mem == NULL) { msg("Unable to map %"TS" into memory: Windows error %u", strm->name, (unsigned int)GetLastError()); CloseHandle((HANDLE)strm->mmap_token); return -1; } #else /* _WIN32 */ strm->mmap_mem = mmap(NULL, size, PROT_READ, MAP_SHARED, strm->fd, 0); if (strm->mmap_mem == MAP_FAILED) { strm->mmap_mem = NULL; if (errno == ENODEV) /* mmap isn't supported on this file */ return read_full_contents(strm); if (errno == ENOMEM) { msg("%"TS" is too large to be processed by this " "program", strm->name); } else { msg_errno("Unable to map %"TS" into memory", strm->name); } return -1; } #ifdef HAVE_POSIX_MADVISE posix_madvise(strm->mmap_mem, size, POSIX_MADV_SEQUENTIAL); #endif strm->mmap_token = strm; /* anything that's not NULL */ #endif /* !_WIN32 */ strm->mmap_size = size; return 0; } /* * Read from a file, returning the full count to indicate all bytes were read, a * short count (possibly 0) to indicate EOF, or -1 to indicate error. */ ssize_t xread(struct file_stream *strm, void *buf, size_t count) { char *p = buf; size_t orig_count = count; while (count != 0) { ssize_t res = read(strm->fd, p, MIN(count, INT_MAX)); if (res == 0) break; if (res < 0) { if (errno == EAGAIN || errno == EINTR) continue; msg_errno("Error reading from %"TS, strm->name); return -1; } p += res; count -= res; } return orig_count - count; } /* Write to a file, returning 0 if all bytes were written or -1 on error */ int full_write(struct file_stream *strm, const void *buf, size_t count) { const char *p = buf; while (count != 0) { ssize_t res = write(strm->fd, p, MIN(count, INT_MAX)); if (res <= 0) { msg_errno("Error writing to %"TS, strm->name); return -1; } p += res; count -= res; } return 0; } /* Close a file, returning 0 on success or -1 on error */ int xclose(struct file_stream *strm) { int ret = 0; if (!strm->is_standard_stream) { if (close(strm->fd) != 0) { msg_errno("Error closing %"TS, strm->name); ret = -1; } free(strm->name); } if (strm->mmap_token != NULL) { #ifdef _WIN32 UnmapViewOfFile(strm->mmap_mem); CloseHandle((HANDLE)strm->mmap_token); #else munmap(strm->mmap_mem, strm->mmap_size); #endif strm->mmap_token = NULL; } else { free(strm->mmap_mem); } strm->mmap_mem = NULL; strm->fd = -1; strm->name = NULL; return ret; } /* * Parse the compression level given on the command line, returning the * compression level on success or 0 on error */ int parse_compression_level(tchar opt_char, const tchar *arg) { unsigned long level = opt_char - '0'; const tchar *p; if (arg == NULL) arg = T(""); for (p = arg; *p >= '0' && *p <= '9'; p++) level = (level * 10) + (*p - '0'); if (level < 1 || level > 12 || *p != '\0') { msg("Invalid compression level: \"%"TC"%"TS"\". " "Must be an integer in the range [1, 12].", opt_char, arg); return 0; } return level; } /* Allocate a new DEFLATE compressor */ struct libdeflate_compressor * alloc_compressor(int level) { struct libdeflate_compressor *c; c = libdeflate_alloc_compressor(level); if (c == NULL) { msg_errno("Unable to allocate compressor with " "compression level %d", level); } return c; } /* Allocate a new DEFLATE decompressor */ struct libdeflate_decompressor * alloc_decompressor(void) { struct libdeflate_decompressor *d; d = libdeflate_alloc_decompressor(); if (d == NULL) msg_errno("Unable to allocate decompressor"); return d; } libdeflate-1.5/programs/prog_util.h000066400000000000000000000105121360172702500174630ustar00rootroot00000000000000/* * prog_util.h - utility functions for programs * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #ifndef PROGRAMS_PROG_UTIL_H #define PROGRAMS_PROG_UTIL_H #ifdef HAVE_CONFIG_H # include "config.h" #endif #include "libdeflate.h" #include #include #include #include #include "common_defs.h" #ifdef __GNUC__ # define _printf(str_idx, args_idx) \ __attribute__((format(printf, str_idx, args_idx))) #else # define _printf(str_idx, args_idx) #endif #ifdef _WIN32 /* * Definitions for Windows builds. Mainly, 'tchar' is defined to be the 2-byte * 'wchar_t' type instead of 'char'. This is the only "easy" way I know of to * get full Unicode support on Windows... */ #include extern int wmain(int argc, wchar_t **argv); # define tmain wmain # define tchar wchar_t # define _T(text) L##text # define T(text) _T(text) # define TS "ls" # define TC "lc" # define tmemcpy wmemcpy # define topen _wopen # define tstrchr wcschr # define tstrcmp wcscmp # define tstrcpy wcscpy # define tstrlen wcslen # define tstrrchr wcsrchr # define tstrtoul wcstoul # define tstrxcmp wcsicmp # define tunlink _wunlink # define tutimbuf __utimbuf64 # define tutime _wutime64 # define tstat _wstat64 # define tfstat _fstat64 # define stat_t struct _stat64 # ifdef _MSC_VER # define STDIN_FILENO 0 # define STDOUT_FILENO 1 # define STDERR_FILENO 2 # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) # define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) # endif #else /* _WIN32 */ /* Standard definitions for everyone else */ # define tmain main # define tchar char # define T(text) text # define TS "s" # define TC "c" # define tmemcpy memcpy # define topen open # define tstrchr strchr # define tstrcmp strcmp # define tstrcpy strcpy # define tstrlen strlen # define tstrrchr strrchr # define tstrtoul strtoul # define tstrxcmp strcmp # define tunlink unlink # define tutimbuf utimbuf # define tutime utime # define tstat stat # define tfstat fstat # define stat_t struct stat #endif /* !_WIN32 */ extern const tchar *program_invocation_name; extern void _printf(1, 2) msg(const char *fmt, ...); extern void _printf(1, 2) msg_errno(const char *fmt, ...); extern void *xmalloc(size_t size); extern const tchar *get_filename(const tchar *path); struct file_stream { int fd; tchar *name; bool is_standard_stream; void *mmap_token; void *mmap_mem; size_t mmap_size; }; extern int xopen_for_read(const tchar *path, bool symlink_ok, struct file_stream *strm); extern int xopen_for_write(const tchar *path, bool force, struct file_stream *strm); extern int map_file_contents(struct file_stream *strm, u64 size); extern ssize_t xread(struct file_stream *strm, void *buf, size_t count); extern int full_write(struct file_stream *strm, const void *buf, size_t count); extern int xclose(struct file_stream *strm); extern int parse_compression_level(tchar opt_char, const tchar *arg); extern struct libdeflate_compressor *alloc_compressor(int level); extern struct libdeflate_decompressor *alloc_decompressor(void); /* tgetopt.c */ extern tchar *toptarg; extern int toptind, topterr, toptopt; extern int tgetopt(int argc, tchar *argv[], const tchar *optstring); #endif /* PROGRAMS_PROG_UTIL_H */ libdeflate-1.5/programs/test_checksums.c000066400000000000000000000117521360172702500205050ustar00rootroot00000000000000/* * test_checksums.c * * Verify that libdeflate's Adler-32 and CRC-32 functions produce the same * results as their zlib equivalents. */ #include #include #include "test_util.h" static unsigned int rng_seed; typedef u32 (*cksum_fn_t)(u32, const void *, size_t); static u32 adler32_libdeflate(u32 adler, const void *buf, size_t len) { return libdeflate_adler32(adler, buf, len); } static u32 crc32_libdeflate(u32 crc, const void *buf, size_t len) { return libdeflate_crc32(crc, buf, len); } static u32 adler32_zlib(u32 adler, const void *buf, size_t len) { return adler32(adler, buf, len); } static u32 crc32_zlib(u32 crc, const void *buf, size_t len) { return crc32(crc, buf, len); } static u32 select_initial_crc(void) { if (rand() & 1) return 0; return ((u32)rand() << 16) | rand(); } static u32 select_initial_adler(void) { u32 lo, hi; if (rand() & 1) return 1; lo = (rand() % 4 == 0 ? 65520 : rand() % 65521); hi = (rand() % 4 == 0 ? 65520 : rand() % 65521); return (hi << 16) | lo; } static void test_initial_values(cksum_fn_t cksum, u32 expected) { ASSERT(cksum(0, NULL, 0) == expected); if (cksum != adler32_zlib) /* broken */ ASSERT(cksum(0, NULL, 1) == expected); ASSERT(cksum(0, NULL, 1234) == expected); ASSERT(cksum(1234, NULL, 0) == expected); ASSERT(cksum(1234, NULL, 1234) == expected); } static void test_multipart(const u8 *buffer, size_t size, const char *name, cksum_fn_t cksum, u32 v, u32 expected) { size_t division = rand() % (size + 1); v = cksum(v, buffer, division); v = cksum(v, buffer + division, size - division); if (v != expected) { fprintf(stderr, "%s checksum failed multipart test\n", name); ASSERT(0); } } static void test_checksums(const void *buffer, size_t size, const char *name, cksum_fn_t cksum1, cksum_fn_t cksum2, u32 initial_value) { u32 v1 = cksum1(initial_value, buffer, size); u32 v2 = cksum2(initial_value, buffer, size); if (v1 != v2) { fprintf(stderr, "%s checksum mismatch\n", name); fprintf(stderr, "initial_value=0x%08"PRIx32", buffer=%p, " "size=%zu, buffer=", initial_value, buffer, size); for (size_t i = 0; i < MIN(size, 256); i++) fprintf(stderr, "%02x", ((const u8 *)buffer)[i]); if (size > 256) fprintf(stderr, "..."); fprintf(stderr, "\n"); ASSERT(0); } if ((rand() & 15) == 0) { test_multipart(buffer, size, name, cksum1, initial_value, v1); test_multipart(buffer, size, name, cksum2, initial_value, v1); } } static void test_crc32(const void *buffer, size_t size, u32 initial_value) { test_checksums(buffer, size, "CRC-32", crc32_libdeflate, crc32_zlib, initial_value); } static void test_adler32(const void *buffer, size_t size, u32 initial_value) { test_checksums(buffer, size, "Adler-32", adler32_libdeflate, adler32_zlib, initial_value); } static void test_random_buffers(u8 *buffer, u8 *guarded_buf_end, size_t limit, u32 num_iter) { for (u32 i = 0; i < num_iter; i++) { size_t start = rand() % limit; size_t len = rand() % (limit - start); u32 a0 = select_initial_adler(); u32 c0 = select_initial_crc(); for (size_t j = start; j < start + len; j++) buffer[j] = rand(); /* Test with chosen size and alignment */ test_adler32(&buffer[start], len, a0); test_crc32(&buffer[start], len, c0); /* Test with chosen size, with guard page after input buffer */ memcpy(guarded_buf_end - len, &buffer[start], len); test_adler32(guarded_buf_end - len, len, a0); test_crc32(guarded_buf_end - len, len, c0); } } int tmain(int argc, tchar *argv[]) { u8 *buffer = xmalloc(32768); u8 *guarded_buf_start, *guarded_buf_end; program_invocation_name = get_filename(argv[0]); alloc_guarded_buffer(32768, &guarded_buf_start, &guarded_buf_end); rng_seed = time(NULL); srand(rng_seed); test_initial_values(adler32_libdeflate, 1); test_initial_values(adler32_zlib, 1); test_initial_values(crc32_libdeflate, 0); test_initial_values(crc32_zlib, 0); /* Test different buffer sizes and alignments */ test_random_buffers(buffer, guarded_buf_end, 256, 5000); test_random_buffers(buffer, guarded_buf_end, 1024, 500); test_random_buffers(buffer, guarded_buf_end, 32768, 50); /* * Test Adler-32 overflow cases. For example, given all 0xFF bytes and * the highest possible initial (s1, s2) of (65520, 65520), then s2 if * stored as a 32-bit unsigned integer will overflow if > 5552 bytes are * processed. Implementations must make sure to reduce s2 modulo 65521 * before that point. Also, some implementations make use of 16-bit * counters which can overflow earlier. */ memset(buffer, 0xFF, 32768); for (u32 i = 0; i < 20; i++) { u32 initial_value; if (i == 0) initial_value = ((u32)65520 << 16) | 65520; else initial_value = select_initial_adler(); test_adler32(buffer, 5553, initial_value); test_adler32(buffer, rand() % 32769, initial_value); buffer[rand() % 32768] = 0xFE; } printf("Adler-32 and CRC-32 checksum tests passed!\n"); free(buffer); free_guarded_buffer(guarded_buf_start, guarded_buf_end); return 0; } libdeflate-1.5/programs/test_incomplete_codes.c000066400000000000000000000272161360172702500220360ustar00rootroot00000000000000/* * test_incomplete_codes.c * * Test that the decompressor accepts incomplete Huffman codes in certain * specific cases. */ #include "test_util.h" static void verify_decompression_libdeflate(const u8 *in, size_t in_nbytes, u8 *out, size_t out_nbytes_avail, const u8 *expected_out, size_t expected_out_nbytes) { struct libdeflate_decompressor *d; enum libdeflate_result res; size_t actual_out_nbytes; d = libdeflate_alloc_decompressor(); ASSERT(d != NULL); res = libdeflate_deflate_decompress(d, in, in_nbytes, out, out_nbytes_avail, &actual_out_nbytes); ASSERT(res == LIBDEFLATE_SUCCESS); ASSERT(actual_out_nbytes == expected_out_nbytes); ASSERT(memcmp(out, expected_out, actual_out_nbytes) == 0); libdeflate_free_decompressor(d); } static void verify_decompression_zlib(const u8 *in, size_t in_nbytes, u8 *out, size_t out_nbytes_avail, const u8 *expected_out, size_t expected_out_nbytes) { z_stream z; int res; size_t actual_out_nbytes; memset(&z, 0, sizeof(z)); res = inflateInit2(&z, -15); ASSERT(res == Z_OK); z.next_in = (void *)in; z.avail_in = in_nbytes; z.next_out = (void *)out; z.avail_out = out_nbytes_avail; res = inflate(&z, Z_FINISH); ASSERT(res == Z_STREAM_END); actual_out_nbytes = out_nbytes_avail - z.avail_out; ASSERT(actual_out_nbytes == expected_out_nbytes); ASSERT(memcmp(out, expected_out, actual_out_nbytes) == 0); inflateEnd(&z); } static void verify_decompression(const u8 *in, size_t in_nbytes, u8 *out, size_t out_nbytes_avail, const u8 *expected_out, size_t expected_out_nbytes) { verify_decompression_libdeflate(in, in_nbytes, out, out_nbytes_avail, expected_out, expected_out_nbytes); verify_decompression_zlib(in, in_nbytes, out, out_nbytes_avail, expected_out, expected_out_nbytes); } /* Test that an empty offset code is accepted. */ static void test_empty_offset_code(void) { static const u8 expected_out[] = { 'A', 'B', 'A', 'A' }; u8 in[128]; u8 out[128]; struct output_bitstream os = { .next = in, .end = in + sizeof(in) }; int i; /* * Generate a DEFLATE stream containing a "dynamic Huffman" block * containing literals, but no offsets; and having an empty offset code * (all codeword lengths set to 0). * * Litlen code: * litlensym_A freq=3 len=1 codeword= 0 * litlensym_B freq=1 len=2 codeword=01 * litlensym_256 (end-of-block) freq=1 len=2 codeword=11 * Offset code: * (empty) * * Litlen and offset codeword lengths: * [0..'A'-1] = 0 presym_18 * ['A'] = 1 presym_1 * ['B'] = 2 presym_2 * ['B'+1..255] = 0 presym_18 presym_18 * [256] = 2 presym_2 * [257] = 0 presym_0 * * Precode: * presym_0 freq=1 len=3 codeword=011 * presym_1 freq=1 len=3 codeword=111 * presym_2 freq=2 len=2 codeword= 01 * presym_18 freq=3 len=1 codeword= 0 */ ASSERT(put_bits(&os, 1, 1)); /* BFINAL: 1 */ ASSERT(put_bits(&os, 2, 2)); /* BTYPE: DYNAMIC_HUFFMAN */ ASSERT(put_bits(&os, 0, 5)); /* num_litlen_syms: 0 + 257 */ ASSERT(put_bits(&os, 0, 5)); /* num_offset_syms: 0 + 1 */ ASSERT(put_bits(&os, 14, 4)); /* num_explicit_precode_lens: 14 + 4 */ /* * Precode codeword lengths: order is * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15] */ for (i = 0; i < 2; i++) /* presym_{16,17}: len=0 */ ASSERT(put_bits(&os, 0, 3)); ASSERT(put_bits(&os, 1, 3)); /* presym_18: len=1 */ ASSERT(put_bits(&os, 3, 3)); /* presym_0: len=3 */ for (i = 0; i < 11; i++) /* presym_{8,...,13}: len=0 */ ASSERT(put_bits(&os, 0, 3)); ASSERT(put_bits(&os, 2, 3)); /* presym_2: len=2 */ ASSERT(put_bits(&os, 0, 3)); /* presym_14: len=0 */ ASSERT(put_bits(&os, 3, 3)); /* presym_1: len=3 */ /* Litlen and offset codeword lengths */ ASSERT(put_bits(&os, 0x0, 1) && put_bits(&os, 54, 7)); /* presym_18, 65 zeroes */ ASSERT(put_bits(&os, 0x7, 3)); /* presym_1 */ ASSERT(put_bits(&os, 0x1, 2)); /* presym_2 */ ASSERT(put_bits(&os, 0x0, 1) && put_bits(&os, 89, 7)); /* presym_18, 100 zeroes */ ASSERT(put_bits(&os, 0x0, 1) && put_bits(&os, 78, 7)); /* presym_18, 89 zeroes */ ASSERT(put_bits(&os, 0x1, 2)); /* presym_2 */ ASSERT(put_bits(&os, 0x3, 3)); /* presym_0 */ /* Litlen symbols */ ASSERT(put_bits(&os, 0x0, 1)); /* litlensym_A */ ASSERT(put_bits(&os, 0x1, 2)); /* litlensym_B */ ASSERT(put_bits(&os, 0x0, 1)); /* litlensym_A */ ASSERT(put_bits(&os, 0x0, 1)); /* litlensym_A */ ASSERT(put_bits(&os, 0x3, 2)); /* litlensym_256 (end-of-block) */ ASSERT(flush_bits(&os)); verify_decompression(in, os.next - in, out, sizeof(out), expected_out, sizeof(expected_out)); } /* Test that a litrunlen code containing only one symbol is accepted. */ static void test_singleton_litrunlen_code(void) { u8 in[128]; u8 out[128]; struct output_bitstream os = { .next = in, .end = in + sizeof(in) }; int i; /* * Litlen code: * litlensym_256 (end-of-block) freq=1 len=1 codeword=0 * Offset code: * (empty) * * Litlen and offset codeword lengths: * [0..256] = 0 presym_18 presym_18 * [256] = 1 presym_1 * [257] = 0 presym_0 * * Precode: * presym_0 freq=1 len=2 codeword=01 * presym_1 freq=1 len=2 codeword=11 * presym_18 freq=2 len=1 codeword= 0 */ ASSERT(put_bits(&os, 1, 1)); /* BFINAL: 1 */ ASSERT(put_bits(&os, 2, 2)); /* BTYPE: DYNAMIC_HUFFMAN */ ASSERT(put_bits(&os, 0, 5)); /* num_litlen_syms: 0 + 257 */ ASSERT(put_bits(&os, 0, 5)); /* num_offset_syms: 0 + 1 */ ASSERT(put_bits(&os, 14, 4)); /* num_explicit_precode_lens: 14 + 4 */ /* * Precode codeword lengths: order is * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15] */ for (i = 0; i < 2; i++) /* presym_{16,17}: len=0 */ ASSERT(put_bits(&os, 0, 3)); ASSERT(put_bits(&os, 1, 3)); /* presym_18: len=1 */ ASSERT(put_bits(&os, 2, 3)); /* presym_0: len=2 */ for (i = 0; i < 13; i++) /* presym_{8,...,14}: len=0 */ ASSERT(put_bits(&os, 0, 3)); ASSERT(put_bits(&os, 2, 3)); /* presym_1: len=2 */ /* Litlen and offset codeword lengths */ for (i = 0; i < 2; i++) { ASSERT(put_bits(&os, 0, 1) && /* presym_18, 128 zeroes */ put_bits(&os, 117, 7)); } ASSERT(put_bits(&os, 0x3, 2)); /* presym_1 */ ASSERT(put_bits(&os, 0x1, 2)); /* presym_0 */ /* Litlen symbols */ ASSERT(put_bits(&os, 0x0, 1)); /* litlensym_256 (end-of-block) */ ASSERT(flush_bits(&os)); verify_decompression(in, os.next - in, out, sizeof(out), in, 0); } /* Test that an offset code containing only one symbol is accepted. */ static void test_singleton_offset_code(void) { static const u8 expected_out[] = { 255, 255, 255, 255 }; u8 in[128]; u8 out[128]; struct output_bitstream os = { .next = in, .end = in + sizeof(in) }; int i; ASSERT(put_bits(&os, 1, 1)); /* BFINAL: 1 */ ASSERT(put_bits(&os, 2, 2)); /* BTYPE: DYNAMIC_HUFFMAN */ /* * Litlen code: * litlensym_255 freq=1 len=1 codeword= 0 * litlensym_256 (end-of-block) freq=1 len=2 codeword=01 * litlensym_257 (len 3) freq=1 len=2 codeword=11 * Offset code: * offsetsym_0 (offset 0) freq=1 len=1 codeword=0 * * Litlen and offset codeword lengths: * [0..254] = 0 presym_{18,18} * [255] = 1 presym_1 * [256] = 1 presym_2 * [257] = 1 presym_2 * [258] = 1 presym_1 * * Precode: * presym_1 freq=2 len=2 codeword=01 * presym_2 freq=2 len=2 codeword=11 * presym_18 freq=2 len=1 codeword= 0 */ ASSERT(put_bits(&os, 1, 5)); /* num_litlen_syms: 1 + 257 */ ASSERT(put_bits(&os, 0, 5)); /* num_offset_syms: 0 + 1 */ ASSERT(put_bits(&os, 14, 4)); /* num_explicit_precode_lens: 14 + 4 */ /* * Precode codeword lengths: order is * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15] */ for (i = 0; i < 2; i++) /* presym_{16,17}: len=0 */ ASSERT(put_bits(&os, 0, 3)); ASSERT(put_bits(&os, 1, 3)); /* presym_18: len=1 */ for (i = 0; i < 12; i++) /* presym_{0,...,13}: len=0 */ ASSERT(put_bits(&os, 0, 3)); ASSERT(put_bits(&os, 2, 3)); /* presym_2: len=2 */ ASSERT(put_bits(&os, 0, 3)); /* presym_14: len=0 */ ASSERT(put_bits(&os, 2, 3)); /* presym_1: len=2 */ /* Litlen and offset codeword lengths */ ASSERT(put_bits(&os, 0x0, 1) && /* presym_18, 128 zeroes */ put_bits(&os, 117, 7)); ASSERT(put_bits(&os, 0x0, 1) && /* presym_18, 127 zeroes */ put_bits(&os, 116, 7)); ASSERT(put_bits(&os, 0x1, 2)); /* presym_1 */ ASSERT(put_bits(&os, 0x3, 2)); /* presym_2 */ ASSERT(put_bits(&os, 0x3, 2)); /* presym_2 */ ASSERT(put_bits(&os, 0x1, 2)); /* presym_1 */ /* Literal */ ASSERT(put_bits(&os, 0x0, 1)); /* litlensym_255 */ /* Match */ ASSERT(put_bits(&os, 0x3, 2)); /* litlensym_257 */ ASSERT(put_bits(&os, 0x0, 1)); /* offsetsym_0 */ /* End of block */ ASSERT(put_bits(&os, 0x1, 2)); /* litlensym_256 */ ASSERT(flush_bits(&os)); verify_decompression(in, os.next - in, out, sizeof(out), expected_out, sizeof(expected_out)); } /* Test that an offset code containing only one symbol is accepted, even if that * symbol is not symbol 0. The codeword should be '0' in either case. */ static void test_singleton_offset_code_notsymzero(void) { static const u8 expected_out[] = { 254, 255, 254, 255, 254 }; u8 in[128]; u8 out[128]; struct output_bitstream os = { .next = in, .end = in + sizeof(in) }; int i; ASSERT(put_bits(&os, 1, 1)); /* BFINAL: 1 */ ASSERT(put_bits(&os, 2, 2)); /* BTYPE: DYNAMIC_HUFFMAN */ /* * Litlen code: * litlensym_254 len=2 codeword=00 * litlensym_255 len=2 codeword=10 * litlensym_256 (end-of-block) len=2 codeword=01 * litlensym_257 (len 3) len=2 codeword=11 * Offset code: * offsetsym_1 (offset 2) len=1 codeword=0 * * Litlen and offset codeword lengths: * [0..253] = 0 presym_{18,18} * [254] = 2 presym_2 * [255] = 2 presym_2 * [256] = 2 presym_2 * [257] = 2 presym_2 * [258] = 0 presym_0 * [259] = 1 presym_1 * * Precode: * presym_0 len=2 codeword=00 * presym_1 len=2 codeword=10 * presym_2 len=2 codeword=01 * presym_18 len=2 codeword=11 */ ASSERT(put_bits(&os, 1, 5)); /* num_litlen_syms: 1 + 257 */ ASSERT(put_bits(&os, 1, 5)); /* num_offset_syms: 1 + 1 */ ASSERT(put_bits(&os, 14, 4)); /* num_explicit_precode_lens: 14 + 4 */ /* * Precode codeword lengths: order is * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15] */ for (i = 0; i < 2; i++) /* presym_{16,17}: len=0 */ ASSERT(put_bits(&os, 0, 3)); ASSERT(put_bits(&os, 2, 3)); /* presym_18: len=2 */ ASSERT(put_bits(&os, 2, 3)); /* presym_0: len=2 */ for (i = 0; i < 11; i++) /* presym_{8,...,13}: len=0 */ ASSERT(put_bits(&os, 0, 3)); ASSERT(put_bits(&os, 2, 3)); /* presym_2: len=2 */ ASSERT(put_bits(&os, 0, 3)); /* presym_14: len=0 */ ASSERT(put_bits(&os, 2, 3)); /* presym_1: len=2 */ /* Litlen and offset codeword lengths */ ASSERT(put_bits(&os, 0x3, 2) && /* presym_18, 128 zeroes */ put_bits(&os, 117, 7)); ASSERT(put_bits(&os, 0x3, 2) && /* presym_18, 126 zeroes */ put_bits(&os, 115, 7)); ASSERT(put_bits(&os, 0x1, 2)); /* presym_2 */ ASSERT(put_bits(&os, 0x1, 2)); /* presym_2 */ ASSERT(put_bits(&os, 0x1, 2)); /* presym_2 */ ASSERT(put_bits(&os, 0x1, 2)); /* presym_2 */ ASSERT(put_bits(&os, 0x0, 2)); /* presym_0 */ ASSERT(put_bits(&os, 0x2, 2)); /* presym_1 */ /* Literals */ ASSERT(put_bits(&os, 0x0, 2)); /* litlensym_254 */ ASSERT(put_bits(&os, 0x2, 2)); /* litlensym_255 */ /* Match */ ASSERT(put_bits(&os, 0x3, 2)); /* litlensym_257 */ ASSERT(put_bits(&os, 0x0, 1)); /* offsetsym_1 */ /* End of block */ ASSERT(put_bits(&os, 0x1, 2)); /* litlensym_256 */ ASSERT(flush_bits(&os)); verify_decompression(in, os.next - in, out, sizeof(out), expected_out, sizeof(expected_out)); } int tmain(int argc, tchar *argv[]) { program_invocation_name = get_filename(argv[0]); test_empty_offset_code(); test_singleton_litrunlen_code(); test_singleton_offset_code(); test_singleton_offset_code_notsymzero(); return 0; } libdeflate-1.5/programs/test_slow_decompression.c000066400000000000000000000543621360172702500224420ustar00rootroot00000000000000/* * test_slow_decompression.c * * Test how quickly libdeflate decompresses degenerate/malicious compressed data * streams that start new Huffman blocks extremely frequently. */ #include "test_util.h" /* * Generate a DEFLATE stream containing all empty "static Huffman" blocks. * * libdeflate used to decompress this very slowly (~1000x slower than typical * data), but now it's much faster (only ~2x slower than typical data) because * now it skips rebuilding the decode tables for the static Huffman codes when * they're already loaded into the decompressor. */ static void generate_empty_static_huffman_blocks(u8 *p, size_t len) { struct output_bitstream os = { .next = p, .end = p + len }; while (put_bits(&os, 0, 1) && /* BFINAL: 0 */ put_bits(&os, 1, 2) && /* BTYPE: STATIC_HUFFMAN */ put_bits(&os, 0, 7)) /* litlensym_256 (end-of-block) */ ; } static bool generate_empty_dynamic_huffman_block(struct output_bitstream *os) { int i; if (!put_bits(os, 0, 1)) /* BFINAL: 0 */ return false; if (!put_bits(os, 2, 2)) /* BTYPE: DYNAMIC_HUFFMAN */ return false; /* * Write a minimal Huffman code, then the end-of-block symbol. * * Litlen code: * litlensym_256 (end-of-block) freq=1 len=1 codeword=0 * Offset code: * offsetsym_0 (unused) freq=0 len=1 codeword=0 * * Litlen and offset codeword lengths: * [0..255] = 0 presym_{18,18} * [256] = 1 presym_1 * [257] = 1 presym_1 * * Precode: * presym_1 freq=2 len=1 codeword=0 * presym_18 freq=2 len=1 codeword=1 */ if (!put_bits(os, 0, 5)) /* num_litlen_syms: 0 + 257 */ return false; if (!put_bits(os, 0, 5)) /* num_offset_syms: 0 + 1 */ return false; if (!put_bits(os, 14, 4)) /* num_explicit_precode_lens: 14 + 4 */ return false; /* * Precode codeword lengths: order is * [16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15] */ for (i = 0; i < 2; i++) { /* presym_{16,17}: len=0 */ if (!put_bits(os, 0, 3)) return false; } if (!put_bits(os, 1, 3)) /* presym_18: len=1 */ return false; for (i = 0; i < 14; i++) { /* presym_{0,...,14}: len=0 */ if (!put_bits(os, 0, 3)) return false; } if (!put_bits(os, 1, 3)) /* presym_1: len=1 */ return false; /* Litlen and offset codeword lengths */ for (i = 0; i < 2; i++) { if (!put_bits(os, 1, 1) || /* presym_18, 128 zeroes */ !put_bits(os, 117, 7)) return false; } if (!put_bits(os, 0, 1)) /* presym_1 */ return false; if (!put_bits(os, 0, 1)) /* presym_1 */ return false; /* Done writing the Huffman codes */ return put_bits(os, 0, 1); /* litlensym_256 (end-of-block) */ } /* * Generate a DEFLATE stream containing all empty "dynamic Huffman" blocks. * * This is the worst known case currently, being ~100x slower to decompress than * typical data. */ static void generate_empty_dynamic_huffman_blocks(u8 *p, size_t len) { struct output_bitstream os = { .next = p, .end = p + len }; while (generate_empty_dynamic_huffman_block(&os)) ; } #define NUM_ITERATIONS 100 static u64 do_test_libdeflate(const char *input_type, const u8 *in, size_t in_nbytes, u8 *out, size_t out_nbytes_avail) { struct libdeflate_decompressor *d; enum libdeflate_result res; u64 t; int i; d = libdeflate_alloc_decompressor(); ASSERT(d != NULL); t = timer_ticks(); for (i = 0; i < NUM_ITERATIONS; i++) { res = libdeflate_deflate_decompress(d, in, in_nbytes, out, out_nbytes_avail, NULL); ASSERT(res == LIBDEFLATE_BAD_DATA || res == LIBDEFLATE_INSUFFICIENT_SPACE); } t = timer_ticks() - t; printf("[%s, libdeflate]: %"PRIu64" KB/s\n", input_type, timer_KB_per_s((u64)in_nbytes * NUM_ITERATIONS, t)); libdeflate_free_decompressor(d); return t; } static u64 do_test_zlib(const char *input_type, const u8 *in, size_t in_nbytes, u8 *out, size_t out_nbytes_avail) { z_stream z; int res; u64 t; int i; memset(&z, 0, sizeof(z)); res = inflateInit2(&z, -15); ASSERT(res == Z_OK); t = timer_ticks(); for (i = 0; i < NUM_ITERATIONS; i++) { inflateReset(&z); z.next_in = (void *)in; z.avail_in = in_nbytes; z.next_out = out; z.avail_out = out_nbytes_avail; res = inflate(&z, Z_FINISH); ASSERT(res == Z_BUF_ERROR || res == Z_DATA_ERROR); } t = timer_ticks() - t; printf("[%s, zlib ]: %"PRIu64" KB/s\n", input_type, timer_KB_per_s((u64)in_nbytes * NUM_ITERATIONS, t)); inflateEnd(&z); return t; } /* * Test case from https://github.com/ebiggers/libdeflate/issues/33 * with the gzip header and footer removed to leave just the DEFLATE stream */ static const u8 orig_repro[3962] = "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a" "\x6a\x6a\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20" "\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28" "\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11" "\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48" "\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80" "\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00" "\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea" "\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea" "\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48" "\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20" "\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00" "\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11" "\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x63" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92" "\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00" "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48" "\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20" "\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00" "\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea" "\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48" "\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11" "\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00" "\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11" "\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63" "\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea" "\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x92\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a" "\x6a\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80" "\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00" "\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00" "\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x92\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a\x6a" "\x6a\x6a\x6a\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00" "\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80" "\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00" "\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04" "\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20" "\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28" "\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00" "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04" "\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00" "\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28" "\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00" "\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04" "\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00" "\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28" "\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00" "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04" "\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00" "\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28" "\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x92\x63\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00" "\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92" "\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00" "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x63\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00" "\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80" "\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00" "\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92" "\x63\x00\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00" "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04" "\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00\x20" "\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x00\xea\x04\x48\x00\x20\x80\x28" "\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1a\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00" "\xea\x04\x48\x00\x20\x80\x28\x00\x00\x11\x00\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b" "\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x1b\x92\x63\x00\x04\xea\x48\x00\x20" "\x80\x28\x00\x00\x11\x1b\x1b\x1b\x1b\x92\x63\x00\xea\x04\x48\x00" "\x20\x80\x28\x00\x00\x11\x00\x00\x01\x04\x00\x3f\x00\x00\x00\x00" "\x28\xf7\xff\x00\xff\xff\xff\xff\x00\x00"; int tmain(int argc, tchar *argv[]) { u8 in[4096]; u8 out[10000]; u64 t, tz; program_invocation_name = get_filename(argv[0]); begin_performance_test(); /* static huffman case */ generate_empty_static_huffman_blocks(in, sizeof(in)); t = do_test_libdeflate("static huffman", in, sizeof(in), out, sizeof(out)); tz = do_test_zlib("static huffman", in, sizeof(in), out, sizeof(out)); /* * libdeflate is faster than zlib in this case, e.g. * [static huffman, libdeflate]: 215861 KB/s * [static huffman, zlib ]: 73651 KB/s */ putchar('\n'); ASSERT(t < tz); /* dynamic huffman case */ generate_empty_dynamic_huffman_blocks(in, sizeof(in)); t = do_test_libdeflate("dynamic huffman", in, sizeof(in), out, sizeof(out)); tz = do_test_zlib("dynamic huffman", in, sizeof(in), out, sizeof(out)); /* * libdeflate is slower than zlib in this case, though not super bad. * [dynamic huffman, libdeflate]: 6277 KB/s * [dynamic huffman, zlib ]: 10419 KB/s * FIXME: make it faster. */ putchar('\n'); ASSERT(t < 4 * tz); /* original reproducer */ t = do_test_libdeflate("original repro", orig_repro, sizeof(orig_repro), out, sizeof(out)); tz = do_test_zlib("original repro", orig_repro, sizeof(orig_repro), out, sizeof(out)); ASSERT(t < tz); return 0; } libdeflate-1.5/programs/test_util.c000066400000000000000000000131661360172702500174760ustar00rootroot00000000000000/* * test_util.c - utility functions for test programs * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #ifndef _WIN32 /* for MAP_ANONYMOUS or MAP_ANON, which unfortunately aren't part of POSIX... */ # undef _POSIX_C_SOURCE # ifdef __APPLE__ # define _DARWIN_C_SOURCE # elif defined(__linux__) # define _GNU_SOURCE # endif #endif #include "test_util.h" #include #include #ifdef _WIN32 # include #else # include # include # include #endif #ifndef MAP_ANONYMOUS # define MAP_ANONYMOUS MAP_ANON #endif /* Abort with an error message */ _noreturn void assertion_failed(const char *expr, const char *file, int line) { msg("Assertion failed: %s at %s:%d", expr, file, line); abort(); } void begin_performance_test(void) { if (getenv("INCLUDE_PERF_TESTS") == NULL) { printf("Skipping '%"TS"' since it's a performance test, which may be flaky.\n", program_invocation_name); exit(0); } } static size_t get_page_size(void) { #ifdef _WIN32 SYSTEM_INFO info; GetSystemInfo(&info); return info.dwPageSize; #else return sysconf(_SC_PAGESIZE); #endif } /* Allocate a buffer with guard pages */ void alloc_guarded_buffer(size_t size, u8 **start_ret, u8 **end_ret) { const size_t pagesize = get_page_size(); const size_t nr_pages = (size + pagesize - 1) / pagesize; u8 *base_addr; u8 *start, *end; #ifdef _WIN32 DWORD oldProtect; #endif *start_ret = NULL; *end_ret = NULL; #ifdef _WIN32 /* Allocate buffer and guard pages with no access. */ base_addr = VirtualAlloc(NULL, (nr_pages + 2) * pagesize, MEM_COMMIT | MEM_RESERVE, PAGE_NOACCESS); if (!base_addr) { msg("Unable to allocate memory (VirtualAlloc): Windows error %u", (unsigned int)GetLastError()); ASSERT(0); } start = base_addr + pagesize; end = start + (nr_pages * pagesize); /* Grant read+write access to just the buffer. */ if (!VirtualProtect(start, end - start, PAGE_READWRITE, &oldProtect)) { msg("Unable to protect memory (VirtualProtect): Windows error %u", (unsigned int)GetLastError()); VirtualFree(base_addr, 0, MEM_RELEASE); ASSERT(0); } #else /* Allocate buffer and guard pages. */ base_addr = mmap(NULL, (nr_pages + 2) * pagesize, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (base_addr == (u8 *)MAP_FAILED) { msg_errno("Unable to allocate memory (anonymous mmap)"); ASSERT(0); } start = base_addr + pagesize; end = start + (nr_pages * pagesize); /* Unmap the guard pages. */ munmap(base_addr, pagesize); munmap(end, pagesize); #endif *start_ret = start; *end_ret = end; } /* Free a buffer that was allocated by alloc_guarded_buffer() */ void free_guarded_buffer(u8 *start, u8 *end) { if (!start) return; #ifdef _WIN32 VirtualFree(start - get_page_size(), 0, MEM_RELEASE); #else munmap(start, end - start); #endif } /* * Return the number of timer ticks that have elapsed since some unspecified * point fixed at the start of program execution */ u64 timer_ticks(void) { #ifdef _WIN32 LARGE_INTEGER count; QueryPerformanceCounter(&count); return count.QuadPart; #elif defined(HAVE_CLOCK_GETTIME) struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return (1000000000 * (u64)ts.tv_sec) + ts.tv_nsec; #else struct timeval tv; gettimeofday(&tv, NULL); return (1000000 * (u64)tv.tv_sec) + tv.tv_usec; #endif } /* * Return the number of timer ticks per second */ static u64 timer_frequency(void) { #ifdef _WIN32 LARGE_INTEGER freq; QueryPerformanceFrequency(&freq); return freq.QuadPart; #elif defined(HAVE_CLOCK_GETTIME) return 1000000000; #else return 1000000; #endif } /* * Convert a number of elapsed timer ticks to milliseconds */ u64 timer_ticks_to_ms(u64 ticks) { return ticks * 1000 / timer_frequency(); } /* * Convert a byte count and a number of elapsed timer ticks to MB/s */ u64 timer_MB_per_s(u64 bytes, u64 ticks) { return bytes * timer_frequency() / ticks / 1000000; } /* * Convert a byte count and a number of elapsed timer ticks to KB/s */ u64 timer_KB_per_s(u64 bytes, u64 ticks) { return bytes * timer_frequency() / ticks / 1000; } bool put_bits(struct output_bitstream *os, machine_word_t bits, int num_bits) { os->bitbuf |= bits << os->bitcount; os->bitcount += num_bits; while (os->bitcount >= 8) { if (os->next == os->end) return false; *os->next++ = os->bitbuf; os->bitcount -= 8; os->bitbuf >>= 8; } return true; } bool flush_bits(struct output_bitstream *os) { while (os->bitcount > 0) { if (os->next == os->end) return false; *os->next++ = os->bitbuf; os->bitcount -= 8; os->bitbuf >>= 8; } os->bitcount = 0; return true; } libdeflate-1.5/programs/test_util.h000066400000000000000000000042341360172702500174770ustar00rootroot00000000000000/* * test_util.h - utility functions for test programs * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #ifndef PROGRAMS_TEST_UTIL_H #define PROGRAMS_TEST_UTIL_H #include "prog_util.h" #include /* for comparison purposes */ #ifdef __GNUC__ # define _noreturn __attribute__((noreturn)) #else # define _noreturn #endif extern void _noreturn assertion_failed(const char *expr, const char *file, int line); #define ASSERT(expr) { if (unlikely(!(expr))) \ assertion_failed(#expr, __FILE__, __LINE__); } extern void begin_performance_test(void); extern void alloc_guarded_buffer(size_t size, u8 **start_ret, u8 **end_ret); extern void free_guarded_buffer(u8 *start, u8 *end); extern u64 timer_ticks(void); extern u64 timer_ticks_to_ms(u64 ticks); extern u64 timer_MB_per_s(u64 bytes, u64 ticks); extern u64 timer_KB_per_s(u64 bytes, u64 ticks); struct output_bitstream { machine_word_t bitbuf; int bitcount; u8 *next; u8 *end; }; extern bool put_bits(struct output_bitstream *os, machine_word_t bits, int num_bits); extern bool flush_bits(struct output_bitstream *os); #endif /* PROGRAMS_TEST_UTIL_H */ libdeflate-1.5/programs/tgetopt.c000066400000000000000000000067121360172702500171470ustar00rootroot00000000000000/* * tgetopt.c - portable replacement for GNU getopt() * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include "prog_util.h" tchar *toptarg; int toptind = 1, topterr = 1, toptopt; /* * This is a simple implementation of getopt(). It can be compiled with either * 'char' or 'wchar_t' as the character type. * * Do *not* use this implementation if you need any of the following features, * as they are not supported: * - Long options * - Option-related arguments retained in argv, not nulled out * - '+' and '-' characters in optstring */ int tgetopt(int argc, tchar *argv[], const tchar *optstring) { static tchar empty[1]; static tchar *nextchar; static bool done; if (toptind == 1) { /* Starting to scan a new argument vector */ nextchar = NULL; done = false; } while (!done && (nextchar != NULL || toptind < argc)) { if (nextchar == NULL) { /* Scanning a new argument */ tchar *arg = argv[toptind++]; if (arg[0] == '-' && arg[1] != '\0') { if (arg[1] == '-' && arg[2] == '\0') { /* All args after "--" are nonoptions */ argv[toptind - 1] = NULL; done = true; } else { /* Start of short option characters */ nextchar = &arg[1]; } } } else { /* More short options in previous arg */ tchar opt = *nextchar; tchar *p = tstrchr(optstring, opt); if (p == NULL) { if (topterr) msg("invalid option -- '%"TC"'", opt); toptopt = opt; return '?'; } /* 'opt' is a valid short option character */ nextchar++; toptarg = NULL; if (*(p + 1) == ':') { /* 'opt' can take an argument */ if (*nextchar != '\0') { /* Optarg is in same argv argument */ toptarg = nextchar; nextchar = empty; } else if (toptind < argc && *(p + 2) != ':') { /* Optarg is next argv argument */ argv[toptind - 1] = NULL; toptarg = argv[toptind++]; } else if (*(p + 2) != ':') { if (topterr && *optstring != ':') { msg("option requires an " "argument -- '%"TC"'", opt); } toptopt = opt; opt = (*optstring == ':') ? ':' : '?'; } } if (*nextchar == '\0') { argv[toptind - 1] = NULL; nextchar = NULL; } return opt; } } /* Done scanning. Move all nonoptions to the end, set optind to the * index of the first nonoption, and return -1. */ toptind = argc; while (--argc > 0) if (argv[argc] != NULL) argv[--toptind] = argv[argc]; done = true; return -1; } libdeflate-1.5/tools/000077500000000000000000000000001360172702500146155ustar00rootroot00000000000000libdeflate-1.5/tools/afl-fuzz/000077500000000000000000000000001360172702500163535ustar00rootroot00000000000000libdeflate-1.5/tools/afl-fuzz/Makefile000066400000000000000000000002321360172702500200100ustar00rootroot00000000000000SRC := $(wildcard */*.c) EXE := $(SRC:.c=) CFLAGS := -O2 -s LDLIBS := -ldeflate LDFLAGS := -L../.. CPPFLAGS := -I../.. all:$(EXE) clean: rm -f $(EXE) libdeflate-1.5/tools/afl-fuzz/deflate_compress/000077500000000000000000000000001360172702500216725ustar00rootroot00000000000000libdeflate-1.5/tools/afl-fuzz/deflate_compress/fuzz.c000066400000000000000000000016311360172702500230350ustar00rootroot00000000000000#include #include #include #include #include #include int main(int argc, char **argv) { struct libdeflate_decompressor *d; struct libdeflate_compressor *c; int ret; int fd = open(argv[1], O_RDONLY); struct stat stbuf; assert(fd >= 0); ret = fstat(fd, &stbuf); assert(!ret); char in[stbuf.st_size]; ret = read(fd, in, sizeof in); assert(ret == sizeof in); c = libdeflate_alloc_compressor(6); d = libdeflate_alloc_decompressor(); char out[sizeof(in)]; char checkarray[sizeof(in)]; size_t csize = libdeflate_deflate_compress(c, in,sizeof in, out, sizeof out); if (csize) { enum libdeflate_result res; res = libdeflate_deflate_decompress(d, out, csize, checkarray, sizeof in, NULL); assert(!res); assert(!memcmp(in, checkarray, sizeof in)); } libdeflate_free_compressor(c); libdeflate_free_decompressor(d); return 0; } libdeflate-1.5/tools/afl-fuzz/deflate_compress/inputs/000077500000000000000000000000001360172702500232145ustar00rootroot00000000000000libdeflate-1.5/tools/afl-fuzz/deflate_compress/inputs/0000066400000000000000000000007641360172702500233050ustar00rootroot00000000000000_01#2#.3 Z ^V ` 2` 1 @U@@U 0T0-5T-5=T=>T>?T?@T >Q>g>Qg>?Q?@Qlibdeflate-1.5/tools/afl-fuzz/deflate_decompress/000077500000000000000000000000001360172702500222035ustar00rootroot00000000000000libdeflate-1.5/tools/afl-fuzz/deflate_decompress/fuzz.c000066400000000000000000000010751360172702500233500ustar00rootroot00000000000000#include #include #include #include #include int main(int argc, char **argv) { struct libdeflate_decompressor *d; int ret; int fd = open(argv[1], O_RDONLY); struct stat stbuf; assert(fd >= 0); ret = fstat(fd, &stbuf); assert(!ret); char in[stbuf.st_size]; ret = read(fd, in, sizeof in); assert(ret == sizeof in); char out[sizeof(in) * 3]; d = libdeflate_alloc_decompressor(); libdeflate_deflate_decompress(d, in, sizeof in, out, sizeof out, NULL); libdeflate_free_decompressor(d); return 0; } libdeflate-1.5/tools/afl-fuzz/deflate_decompress/inputs/000077500000000000000000000000001360172702500235255ustar00rootroot00000000000000libdeflate-1.5/tools/afl-fuzz/deflate_decompress/inputs/0000066400000000000000000000002511360172702500236050ustar00rootroot00000000000000u1 @EgBl5 V6jX{i=l=Οl?tD =G% 2xԇ7eDs[Ukq |R/뮰*FMzv`r1B,lDuYj#0<՞20hE`IWlibdeflate-1.5/tools/afl-fuzz/gzip_decompress/000077500000000000000000000000001360172702500215505ustar00rootroot00000000000000libdeflate-1.5/tools/afl-fuzz/gzip_decompress/fuzz.c000066400000000000000000000010721360172702500227120ustar00rootroot00000000000000#include #include #include #include #include int main(int argc, char **argv) { struct libdeflate_decompressor *d; int ret; int fd = open(argv[1], O_RDONLY); struct stat stbuf; assert(fd >= 0); ret = fstat(fd, &stbuf); assert(!ret); char in[stbuf.st_size]; ret = read(fd, in, sizeof in); assert(ret == sizeof in); char out[sizeof(in) * 3]; d = libdeflate_alloc_decompressor(); libdeflate_gzip_decompress(d, in, sizeof in, out, sizeof out, NULL); libdeflate_free_decompressor(d); return 0; } libdeflate-1.5/tools/afl-fuzz/gzip_decompress/inputs/000077500000000000000000000000001360172702500230725ustar00rootroot00000000000000libdeflate-1.5/tools/afl-fuzz/gzip_decompress/inputs/0000066400000000000000000000002731360172702500231560ustar00rootroot00000000000000u1 @EgBl5 V6jX{i=l=Οl?tD =G% 2xԇ7eDs[Ukq |R/뮰*FMzv`r1B,lDuYj#0<՞20hE`IW `libdeflate-1.5/tools/afl-fuzz/prepare_for_fuzz.sh000077500000000000000000000003731360172702500222770ustar00rootroot00000000000000#!/bin/sh set -e make -C ../../ clean make clean AFL_HARDEN=1 make CC=afl-gcc -C ../../ AFL_HARDEN=1 make CC=afl-gcc for dir in $(find . -mindepth 1 -maxdepth 1 -type d); do rm -rf /tmp/$dir cp -va $dir /tmp/$dir mkdir -p /tmp/$dir/outputs done libdeflate-1.5/tools/afl-fuzz/zlib_decompress/000077500000000000000000000000001360172702500215375ustar00rootroot00000000000000libdeflate-1.5/tools/afl-fuzz/zlib_decompress/fuzz.c000066400000000000000000000010721360172702500227010ustar00rootroot00000000000000#include #include #include #include #include int main(int argc, char **argv) { struct libdeflate_decompressor *d; int ret; int fd = open(argv[1], O_RDONLY); struct stat stbuf; assert(fd >= 0); ret = fstat(fd, &stbuf); assert(!ret); char in[stbuf.st_size]; ret = read(fd, in, sizeof in); assert(ret == sizeof in); char out[sizeof(in) * 3]; d = libdeflate_alloc_decompressor(); libdeflate_zlib_decompress(d, in, sizeof in, out, sizeof out, NULL); libdeflate_free_decompressor(d); return 0; } libdeflate-1.5/tools/afl-fuzz/zlib_decompress/inputs/000077500000000000000000000000001360172702500230615ustar00rootroot00000000000000libdeflate-1.5/tools/afl-fuzz/zlib_decompress/inputs/0000066400000000000000000000002571360172702500231470ustar00rootroot00000000000000xu1 @EgBl5 V6jX{i=l=Οl?tD =G% 2xԇ7eDs[Ukq |R/뮰*FMzv`r1B,lDuYj#0<՞20hE`IW-libdeflate-1.5/tools/android_build.sh000077500000000000000000000044271360172702500177620ustar00rootroot00000000000000#!/bin/bash set -eu ARCH="arm32" COMPILER="gcc" NDKDIR="/opt/android-ndk" ENABLE_NEON=false ENABLE_CRYPTO=false usage() { cat << EOF Usage: $0 [OPTION]... -- [BENCHMARK_PROGRAM_ARG]... Build the libdeflate test programs for Android --arch=ARCH Architecture: arm32|arm64 (default: $ARCH) --compiler=COMPILER Compiler: gcc|clang (default: $COMPILER) --ndkdir=NDKDIR Android NDK directory (default: $NDKDIR) --enable-neon Enable NEON instructions --enable-crypto Enable crypto extensions (implies NEON too) EOF } if ! options=$(getopt -o '' \ -l 'arch:,compiler:,ndkdir:,enable-neon,enable-crypto,help' -- "$@"); then usage exit 1 fi eval set -- "$options" while [ $# -gt 0 ]; do case "$1" in --arch) ARCH="$2" shift ;; --compiler) COMPILER="$2" shift ;; --ndkdir) NDKDIR="$2" shift ;; --enable-neon) ENABLE_NEON=true ;; --enable-crypto) ENABLE_CRYPTO=true ;; --help) usage exit 0 ;; --) shift break ;; *) echo 1>&2 "Unknown option \"$1\"" usage exit 1 esac shift done CFLAGS="-fPIC" case "$ARCH" in arm|arm32|aarch32) GCC_TOOLCHAIN="arm-linux-androideabi-4.9" CLANG_TARGET="armv7-none-linux-androideabi" if $ENABLE_CRYPTO; then CFLAGS+=" -march=armv7-a -mfloat-abi=softfp -mfpu=crypto-neon-fp-armv8" elif $ENABLE_NEON; then CFLAGS+=" -march=armv7-a -mfloat-abi=softfp -mfpu=neon" else CFLAGS+=" -march=armv6" fi CFLAGS+=" --sysroot=\"$NDKDIR/platforms/android-12/arch-arm\"" ;; arm64|aarch64) GCC_TOOLCHAIN="aarch64-linux-android-4.9" CLANG_TARGET="aarch64-none-linux-android" if $ENABLE_CRYPTO; then CFLAGS+=" -march=armv8-a+crypto" else CFLAGS+=" -march=armv8-a" fi CFLAGS+=" --sysroot=\"$NDKDIR/platforms/android-21/arch-arm64\"" ;; *) echo 1>&2 "Unknown architecture: \"$ARCH\"" usage exit 1 esac case "$COMPILER" in gcc) CC="\"$NDKDIR/toolchains/$GCC_TOOLCHAIN/prebuilt/linux-x86_64/bin/${GCC_TOOLCHAIN%-*}-gcc\"" ;; clang) CC="\"$NDKDIR/toolchains/llvm/prebuilt/linux-x86_64/bin/clang\"" CFLAGS+=" -target \"$CLANG_TARGET\"" CFLAGS+=" -gcc-toolchain \"$NDKDIR/toolchains/$GCC_TOOLCHAIN/prebuilt/linux-x86_64\"" ;; *) echo 1>&2 "Unknown compiler: \"$COMPILER\"" usage exit 1 esac make -j$(grep -c processor /proc/cpuinfo) test_programs \ CC="$CC" CFLAGS="$CFLAGS" LDFLAGS="-pie" libdeflate-1.5/tools/checksum_benchmarks.sh000077500000000000000000000060101360172702500211500ustar00rootroot00000000000000#!/bin/bash set -eu -o pipefail have_cpu_feature() { local feature="$1" local tag case $ARCH in arm*|aarch*) tag="Features" ;; *) tag="flags" ;; esac grep -q "^$tag"$'[ \t]'"*:.*\<$feature\>" /proc/cpuinfo } make_and_test() { make "$@" checksum test_checksums > /dev/null ./test_checksums > /dev/null } __do_benchmark() { local impl="$1" speed shift local flags="$CKSUM_FLAGS $*" speed=$(./checksum $flags -t "$FILE" | \ grep -o '[0-9]\+ MB/s' | grep -o '[0-9]\+') printf "%-45s%-10s\n" "$CKSUM_NAME ($impl)" "$speed" } do_benchmark() { local impl="$1" if [ "$impl" = zlib ]; then __do_benchmark "$impl" "-Z" else make_and_test CFLAGS="$EXTRA_CFLAGS" __do_benchmark "libdeflate, $impl" if [ "$ARCH" = x86_64 ]; then make_and_test CFLAGS="-m32 $EXTRA_CFLAGS" __do_benchmark "libdeflate, $impl, 32-bit" fi fi } sort_by_speed() { awk '{print $NF, $0}' | sort -nr | cut -f2- -d' ' } disable_impl() { local name="$1" local extra_cflags="$2" sed -i '/^\#ifdef DISPATCH_'"$name"'$/aif (0)' lib/*/{adler,crc}32_impl.h EXTRA_CFLAGS+=" $extra_cflags" } cleanup() { git checkout -f lib/*/{adler,crc}32_impl.h if $USING_TMPFILE; then rm "$FILE" fi } ARCH="$(uname -m)" USING_TMPFILE=false if (( $# > 1 )); then echo "Usage: $0 [FILE]" 1>&2 exit 1 fi if git status -s | grep -E -q 'adler32_impl.h|crc32_impl.h'; then echo "This script will overwrite adler32_impl.h and crc32_impl.h," \ "which have uncommitted changes. Refusing to run." 1>&2 exit 1 fi trap cleanup EXIT if (( $# == 0 )); then # Generate default test data file. FILE=$(mktemp -t checksum_testdata.XXXXXXXXXX) USING_TMPFILE=true echo "Generating 100 MB test file: $FILE" head -c 100000000 /dev/urandom > "$FILE" else FILE="$1" fi cat << EOF Method Speed (MB/s) ------ ------------ EOF # CRC-32 CKSUM_NAME="CRC-32" CKSUM_FLAGS="" EXTRA_CFLAGS="" { case $ARCH in i386|x86_64) if have_cpu_feature pclmulqdq && have_cpu_feature avx; then do_benchmark "PCLMUL/AVX" disable_impl "PCLMUL_AVX" "-mno-avx" fi if have_cpu_feature pclmulqdq; then do_benchmark "PCLMUL" disable_impl "PCLMUL" "-mno-pclmul" fi ;; arm*|aarch*) if have_cpu_feature pmull; then do_benchmark "PMULL" disable_impl "PMULL" "" fi ;; esac do_benchmark "generic" do_benchmark "zlib" } | sort_by_speed # Adler-32 CKSUM_NAME="Adler-32" CKSUM_FLAGS="-A" EXTRA_CFLAGS="" echo { case $ARCH in i386|x86_64) if have_cpu_feature avx512bw; then do_benchmark "AVX-512BW" disable_impl "AVX512BW" "-mno-avx512bw" fi if have_cpu_feature avx2; then do_benchmark "AVX2" disable_impl "AVX2" "-mno-avx2" fi if have_cpu_feature sse2; then do_benchmark "SSE2" disable_impl "SSE2" "-mno-sse2" fi ;; arm*) if have_cpu_feature neon; then do_benchmark "NEON" disable_impl "NEON" "-mfpu=vfpv3" fi ;; aarch*) if have_cpu_feature asimd; then do_benchmark "NEON" disable_impl "NEON" "-march=armv8-a+nosimd" fi ;; esac do_benchmark "generic" do_benchmark "zlib" } | sort_by_speed libdeflate-1.5/tools/exec_tests.sh000066400000000000000000000011541360172702500173200ustar00rootroot00000000000000# # Helper script used by run_tests.sh, not intended to be run directly # set -eu run_cmd() { echo "$WRAPPER $@" $WRAPPER "$@" > /dev/null } for prog in ./test_*; do run_cmd $prog done for format in '' '-g' '-z'; do for ref_impl in '' '-Y' '-Z'; do run_cmd ./benchmark $format $ref_impl $SMOKEDATA done done for level in 1 3 7 9; do for ref_impl in '' '-Y'; do run_cmd ./benchmark -$level $ref_impl $SMOKEDATA done done for level in 1 3 7 9 12; do for ref_impl in '' '-Z'; do run_cmd ./benchmark -$level $ref_impl $SMOKEDATA done done echo "exec_tests finished successfully" # Needed for 'adb shell' libdeflate-1.5/tools/gen_crc32_multipliers.c000066400000000000000000000070651360172702500211670ustar00rootroot00000000000000/* * gen_crc32_multipliers.c * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include #include /* generator polynomial G(x) */ #define CRCPOLY 0xEDB88320 /* G(x) without x^32 term */ #define CRCPOLY_FULL (((uint64_t)CRCPOLY << 1) | 1) /* G(x) */ /* Compute x^D mod G(x) */ static uint32_t compute_multiplier(int D) { /* Start with x^0 mod G(x) */ uint32_t remainder = 0x80000000; /* Each iteration, 'remainder' becomes x^i mod G(x) */ for (int i = 1; i <= D; i++) remainder = (remainder >> 1) ^ ((remainder & 1) ? CRCPOLY : 0); /* Now 'remainder' is x^D mod G(x) */ return remainder; } /* Compute floor(x^64 / G(x)) */ static uint64_t compute_barrett_reduction_constant(void) { uint64_t quotient = 0; uint64_t dividend = 0x1; for (int i = 0; i < 64 - 32 + 1; i++) { if ((dividend >> i) & 1) { quotient |= (uint64_t)1 << i; dividend ^= CRCPOLY_FULL << i; } } return quotient; } /* * This program computes the constant multipliers needed for carryless * multiplication accelerated CRC-32. It assumes 128-bit vectors divided into * two 64-bit halves which are multiplied separately with different 32-bit * multipliers, producing two 95-bit products. For a given number of 128-bit * vectors per iteration, the program outputs a pair of multipliers, one for * each 64-bit half. * * Careful: all polynomials are "bit-reversed", meaning that the low-order bits * have the highest degree and the high-order bits have the lowest degree! */ int main(void) { printf("\t/* Constants precomputed by gen_crc32_multipliers.c. " "Do not edit! */\n"); /* High and low multipliers for each needed vector count */ for (int order = 2; order >= 0; order--) { int vecs_per_iteration = 1 << order; int right = (128 * vecs_per_iteration) + 95; printf("\tconst __v2di multipliers_%d = (__v2di)" "{ 0x%08"PRIX32", 0x%08"PRIX32" };\n", vecs_per_iteration, compute_multiplier(right - 64) /* higher degree half */, compute_multiplier(right - 128) /* lower degree half */); } /* Multiplier for final 96 => 64 bit fold */ printf("\tconst __v2di final_multiplier = (__v2di){ 0x%08"PRIX32" };\n", compute_multiplier(63)); /* 32-bit mask */ printf("\tconst __m128i mask32 = (__m128i)(__v4si){ 0xFFFFFFFF };\n"); /* Constants for final 64 => 32 bit reduction */ printf("\tconst __v2di barrett_reduction_constants =\n" "\t\t\t(__v2di){ 0x%016"PRIX64", 0x%016"PRIX64" };\n", compute_barrett_reduction_constant(), CRCPOLY_FULL); return 0; } libdeflate-1.5/tools/gen_crc32_table.c000066400000000000000000000062361360172702500177040ustar00rootroot00000000000000/* * gen_crc32_table.c - a program for CRC-32 table generation * * Originally public domain; changes after 2016-09-07 are copyrighted. * * Copyright 2016 Eric Biggers * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include #include static uint32_t crc32_table[0x800]; static uint32_t crc32_update_bit(uint32_t remainder, uint8_t next_bit) { return (remainder >> 1) ^ (((remainder ^ next_bit) & 1) ? 0xEDB88320 : 0); } static uint32_t crc32_update_byte(uint32_t remainder, uint8_t next_byte) { for (int j = 0; j < 8; j++, next_byte >>= 1) remainder = crc32_update_bit(remainder, next_byte & 1); return remainder; } static void print_256_entries(const uint32_t *entries) { for (size_t i = 0; i < 256 / 4; i++) { printf("\t"); for (size_t j = 0; j < 4; j++) { printf("0x%08x,", entries[i * 4 + j]); if (j != 3) printf(" "); } printf("\n"); } } int main(void) { /* crc32_table[i] for 0 <= i < 0x100 is the CRC-32 of byte i. */ for (int i = 0; i < 0x100; i++) crc32_table[i] = crc32_update_byte(0, i); /* crc32_table[i] for 0x100 <= i < 0x800 is the CRC-32 of byte i % 0x100 * followed by i / 0x100 zero bytes. */ for (int i = 0x100; i < 0x800; i++) crc32_table[i] = crc32_update_byte(crc32_table[i - 0x100], 0); printf("/*\n"); printf(" * crc32_table.h - data table to accelerate CRC-32 computation\n"); printf(" *\n"); printf(" * THIS FILE WAS AUTOMATICALLY GENERATED " "BY gen_crc32_table.c. DO NOT EDIT.\n"); printf(" */\n"); printf("\n"); printf("#include \n"); printf("\n"); printf("static const uint32_t crc32_table[] = {\n"); print_256_entries(&crc32_table[0x000]); printf("#if defined(CRC32_SLICE4) || defined(CRC32_SLICE8)\n"); print_256_entries(&crc32_table[0x100]); print_256_entries(&crc32_table[0x200]); print_256_entries(&crc32_table[0x300]); printf("#endif /* CRC32_SLICE4 || CRC32_SLICE8 */\n"); printf("#if defined(CRC32_SLICE8)\n"); print_256_entries(&crc32_table[0x400]); print_256_entries(&crc32_table[0x500]); print_256_entries(&crc32_table[0x600]); print_256_entries(&crc32_table[0x700]); printf("#endif /* CRC32_SLICE8 */\n"); printf("};\n"); return 0; } libdeflate-1.5/tools/gzip_tests.sh000077500000000000000000000227251360172702500173570ustar00rootroot00000000000000#!/bin/bash # # Test script for libdeflate's gzip and gunzip programs. # # To run, you must set GZIP and GUNZIP in the environment to the absolute paths # to the gzip and gunzip programs to test. All tests should pass regardless of # whether the GNU versions or the libdeflate versions, or a combination, of # these programs are used. # # The environmental variable SMOKEDATA must also be set to a file containing # test data. # set -eu -o pipefail export -n GZIP GUNZIP SMOKEDATA TMPDIR="$(mktemp -d)" CURRENT_TEST= cleanup() { if [ -n "$CURRENT_TEST" ]; then echo "TEST FAILED: \"$CURRENT_TEST\"" fi rm -rf -- "$TMPDIR" } trap cleanup EXIT SMOKEDATA="$(realpath "$SMOKEDATA")" cd "$TMPDIR" begin_test() { CURRENT_TEST="$1" rm -rf -- "${TMPDIR:?}"/* cp "$SMOKEDATA" file } gzip() { $GZIP "$@" } gunzip() { $GUNZIP "$@" } assert_status() { local expected_status="$1" local expected_msg="$2" shift 2 ( set +e { eval "$*" > /dev/null; } 2>&1 local actual_status=$? if [ "$actual_status" != "$expected_status" ]; then echo 1>&2 "Command '$*' exited with status" \ "$actual_status but expected status" \ "$expected_status" exit 1 fi exit 0 ) > command_output if ! grep -E -q "$expected_msg" command_output; then echo 1>&2 "Expected output of command '$*' to match regex" \ "'$expected_msg'" echo 1>&2 "Actual output was:" echo 1>&2 "---------------------------------------------------" cat 1>&2 command_output echo 1>&2 "---------------------------------------------------" return 1 fi } assert_error() { assert_status 1 "$@" } assert_warning() { assert_status 2 "$@" } assert_skipped() { assert_warning '\<(ignored|skipping|unchanged)\>' "$@" } assert_equals() { local expected="$1" local actual="$2" if [ "$expected" != "$actual" ]; then echo 1>&2 "Expected '$expected', but got '$actual'" return 1 fi } begin_test 'Basic compression and decompression works' cp file orig gzip file [ ! -e file ] && [ -e file.gz ] gunzip file.gz [ -e file ] && [ ! -e file.gz ] cmp file orig begin_test 'gzip -d is gunzip' cp file orig gzip file gzip -d file.gz cmp file orig begin_test '-k (keep original file) works' cp file orig gzip -k file cmp file orig rm file cp file.gz orig.gz gunzip -k file.gz cmp file.gz orig.gz begin_test '-c (write to stdout) works' cp file orig gzip -k file gzip -c file > 2.gz cmp file orig cmp file.gz 2.gz gunzip -c 2.gz > file cmp file.gz 2.gz cmp file orig begin_test 'Reading from stdin works' gzip < file > 1.gz gzip - < file > 2.gz cat file | gzip > 3.gz cat file | gzip - > 4.gz cmp file <(gunzip < 1.gz) cmp file <(gunzip - < 2.gz) cmp file <(cat 3.gz | gunzip) cmp file <(cat 4.gz | gunzip -) begin_test '-n option is accepted' gzip -n file gunzip -n file.gz begin_test 'can specify multiple options' gzip -fk1 file cmp <(gzip -c -1 file) file.gz gunzip -kfd file.gz begin_test 'Compression levels' if [ "$GZIP" = /bin/gzip ]; then assert_error '\' gzip -10 max_level=9 else for level in 13 99999 1a; do assert_error '\' gzip -$level done max_level=12 fi for level in $(seq 1 $max_level); do gzip -c "-$level" file > "file$level" cmp file <(gunzip -c "file$level") done rm file command_output cmp <(ls -S) <(ls -v) # file,file{1..max_level} have decreasing size begin_test 'Overwriting output file requires -f' cp file orig echo -n > file.gz gzip -c file > 2.gz assert_warning 'already exists' gzip file file assert_warning 'already exists' gunzip file.gz c.gz gzip file.gz 2>&1 >/dev/null | grep -q 'already has .gz suffix' [ -e file.gz ] && [ ! -e file.gz.gz ] gzip -f file.gz [ ! -e file.gz ] && [ -e file.gz.gz ] cmp file.gz.gz c.gz begin_test 'Decompressing unsuffixed file only works with -c' gzip file && mv file.gz file assert_skipped gunzip file assert_skipped gunzip -f file gunzip -c file > orig mv file file.gz && gunzip file.gz && cmp file orig begin_test '... unless there is a corresponding suffixed file' cp file orig gzip file [ ! -e file ] && [ -e file.gz ] gunzip -c file > tmp cmp tmp orig rm tmp ln -s NONEXISTENT file gunzip -c file > tmp cmp tmp orig rm tmp file gunzip file [ -e file ] && [ ! -e file.gz ] cmp file orig begin_test 'Directory is skipped, even with -f' mkdir dir mkdir dir.gz for opt in '' '-f' '-c'; do assert_skipped gzip $opt dir done #assert_skipped gzip dir.gz # XXX: GNU gzip warns, libdeflate gzip no-ops for opt in '' '-f' '-c'; do for name in dir dir.gz; do assert_skipped gunzip $opt $name done done begin_test '(gzip) symlink is rejected without -f or -c' ln -s file symlink1 ln -s file symlink2 assert_error 'Too many levels of symbolic links' gzip symlink1 [ -e file ] && [ -e symlink1 ] && [ ! -e symlink1.gz ] gzip -f symlink1 [ -e file ] && [ ! -e symlink1 ] && [ -e symlink1.gz ] gzip -c symlink2 > /dev/null begin_test '(gunzip) symlink is rejected without -f or -c' gzip file ln -s file.gz symlink1.gz ln -s file.gz symlink2.gz assert_error 'Too many levels of symbolic links' gunzip symlink1 [ -e file.gz ] && [ -e symlink1.gz ] && [ ! -e symlink1 ] gunzip -f symlink1.gz [ -e file.gz ] && [ ! -e symlink1.gz ] && [ -e symlink1 ] gunzip -c symlink2.gz > /dev/null begin_test 'FIFO is skipped, even with -f' mkfifo foo mkfifo foo.gz assert_skipped gzip foo assert_skipped gzip -f foo #assert_skipped gzip -c foo # XXX: works with GNU gzip, not libdeflate's assert_skipped gunzip foo.gz assert_skipped gunzip -f foo.gz #assert_skipped gunzip -c foo.gz # XXX: works with GNU gzip, not libdeflate's begin_test '(gzip) overwriting symlink does not follow symlink' echo 1 > 1 echo 2 > 2 gzip 1 ln -s 1.gz 2.gz gzip -f 2 gunzip 1.gz cmp <(echo 1) 1 begin_test '(gunzip) overwriting symlink does not follow symlink' echo 1 > 1 echo 2 > 2 gzip 2 ln -s 1 2 gunzip -f 2.gz cmp <(echo 1) 1 cmp <(echo 2) 2 begin_test '(gzip) hard linked file skipped without -f or -c' cp file orig ln file link assert_equals 2 "$(stat -c %h file)" assert_skipped gzip file gzip -c file > /dev/null assert_equals 2 "$(stat -c %h file)" gzip -f file assert_equals 1 "$(stat -c %h link)" assert_equals 1 "$(stat -c %h file.gz)" cmp link orig # XXX: GNU gzip skips hard linked files with -k, libdeflate's doesn't begin_test '(gunzip) hard linked file skipped without -f or -c' gzip file ln file.gz link.gz cp file.gz orig.gz assert_equals 2 "$(stat -c %h file.gz)" assert_skipped gunzip file.gz gunzip -c file.gz > /dev/null assert_equals 2 "$(stat -c %h file.gz)" gunzip -f file assert_equals 1 "$(stat -c %h link.gz)" assert_equals 1 "$(stat -c %h file)" cmp link.gz orig.gz begin_test 'Multiple files' cp file file2 gzip file file2 [ ! -e file ] && [ ! -e file2 ] && [ -e file.gz ] && [ -e file2.gz ] gunzip file.gz file2.gz [ -e file ] && [ -e file2 ] && [ ! -e file.gz ] && [ ! -e file2.gz ] begin_test 'Multiple files, continue on warning' mkdir 1 cp file 2 assert_skipped gzip 1 2 [ ! -e 1.gz ] cmp file <(gunzip -c 2.gz) rmdir 1 mkdir 1.gz assert_skipped gunzip 1.gz 2.gz [ ! -e 1 ] cmp 2 file begin_test 'Multiple files, continue on error' cp file 1 cp file 2 chmod a-r 1 assert_error 'Permission denied' gzip 1 2 [ ! -e 1.gz ] cmp file <(gunzip -c 2.gz) rm -f 1 cp 2.gz 1.gz chmod a-r 1.gz assert_error 'Permission denied' gunzip 1.gz 2.gz [ ! -e 1 ] cmp 2 file begin_test 'Compressing empty file' echo -n > empty gzip empty gunzip empty.gz cmp /dev/null empty begin_test 'Decompressing malformed file' echo -n > foo.gz assert_error '\<(not in gzip format|unexpected end of file)\>' \ gunzip foo.gz echo 1 > foo.gz assert_error '\' gunzip foo.gz echo abcdefgh > foo.gz assert_error '\' gunzip foo.gz xxd -r > foo.gz <<-EOF 00000000: 1f8b 0800 0000 0000 00ff 4b4c 4a4e 4924 ..........KLJNI$ 00000010: 1673 0100 6c5b a262 2e00 0000 .s..l[.b.... EOF assert_error '\<(not in gzip format|crc error)\>' gunzip foo.gz for suf in .foo foo .blaaaaaaaaaaaaaaaargh; do begin_test "Custom suffix: $suf" gzip -S $suf file [ ! -e file ] && [ ! -e file.gz ] && [ -e file$suf ] assert_skipped gunzip file$suf gunzip -S $suf file$suf [ -e file ] && [ ! -e file.gz ] && [ ! -e file$suf ] done # DIFFERENCE: GNU gzip lower cases suffix, we don't begin_test 'Empty suffix is rejected' assert_error '\' gzip -S '""' file assert_error '\' gunzip -S '""' file begin_test 'Timestamps and mode are preserved' chmod 777 file orig_stat="$(stat -c '%a;%x;%y' file)" gzip file sleep 1 gunzip file.gz assert_equals "$orig_stat" "$(stat -c '%a;%x;%y' file)" begin_test 'Decompressing multi-member gzip file' cat file file > orig gzip -c file > file.gz gzip -c file >> file.gz gunzip -f file.gz cmp file orig begin_test 'Decompressing multi-member gzip file (final member smaller)' echo 'hello world' > 2 cat file 2 > orig gzip -c file > file.gz gzip -c 2 >> file.gz gunzip -f file.gz cmp file orig begin_test 'Help option' gzip -h 2>&1 | grep -q 'Usage' gunzip -h 2>&1 | grep -q 'Usage' begin_test 'Incorrect usage' for prog in gzip gunzip; do for opt in '--invalid-option' '-0'; do assert_error '\<(unrecognized|invalid) option\>' $prog $opt done done begin_test 'Version information' gzip -V | grep -q Copyright gunzip -V | grep -q Copyright CURRENT_TEST= libdeflate-1.5/tools/make-windows-releases000077500000000000000000000011321360172702500207460ustar00rootroot00000000000000#!/bin/bash set -eu for arch in 'i686' 'x86_64'; do make clean make -j CC=${arch}-w64-mingw32-gcc CFLAGS="-Werror" all \ benchmark.exe checksum.exe dir=libdeflate-$(git describe --tags | tr -d v)-windows-${arch}-bin rm -rf $dir ${dir}.zip mkdir $dir cp libdeflate.{dll,lib,def} libdeflatestatic.lib libdeflate.h *.exe $dir ${arch}-w64-mingw32-strip ${dir}/libdeflate.dll ${dir}/*.exe for file in COPYING NEWS; do sed < $file > ${dir}/${file}.txt -e 's/$/\r/g' done for file in README.md; do sed < $file > ${dir}/${file} -e 's/$/\r/g' done (cd ${dir} && zip -r ../${dir}.zip .) done libdeflate-1.5/tools/mips_build.sh000077500000000000000000000003761360172702500173110ustar00rootroot00000000000000#!/bin/bash set -eu TOOLCHAIN_DIR=$HOME/src/ddwrt-toolchains/toolchain-mips_34kc_gcc-5.1.0_musl-1.1.9 make -j$(grep -c processor /proc/cpuinfo) all test_programs \ CC="$TOOLCHAIN_DIR/bin/mips-openwrt-linux-musl-gcc" \ CFLAGS="-DNEED_PRINTF -Werror" libdeflate-1.5/tools/msc_test.bat000077500000000000000000000001131360172702500171240ustar00rootroot00000000000000nmake /f Makefile.msc clean nmake /f Makefile.msc copy /y *.exe j:\exe\ libdeflate-1.5/tools/pgo_build.sh000077500000000000000000000011511360172702500171160ustar00rootroot00000000000000#!/bin/bash # Try gcc profile-guided optimizations set -eu MAKE="make -j$(grep -c processor /proc/cpuinfo)" DATAFILE="$HOME/data/silesia" $MAKE benchmark > /dev/null echo "=====================" echo "Original performance:" echo "---------------------" ./benchmark "$@" "$DATAFILE" $MAKE CFLAGS=-fprofile-generate LDFLAGS=-fprofile-generate benchmark > /dev/null ./benchmark "$@" "$DATAFILE" > /dev/null $MAKE CFLAGS=-fprofile-use benchmark > /dev/null rm -f {lib,programs}/*.gcda echo "==========================" echo "PGO-optimized performance:" echo "--------------------------" ./benchmark "$@" "$DATAFILE" libdeflate-1.5/tools/produce_gzip_benchmark_table.sh000077500000000000000000000014651360172702500230350ustar00rootroot00000000000000#!/bin/bash set -e do_benchmark() { usize=$(stat -c %s "$file") "$HOME/proj/libdeflate/benchmark" -g -s $usize "$@" "$file" \ | grep Compressed | cut -f 4 -d ' ' } echo "File | zlib -6 | zlib -9 | libdeflate -6 | libdeflate -9 | libdeflate -12" echo "-----|---------|---------|---------------|---------------|---------------" for file in "$@"; do echo -n "$(basename "$file")" results=() results+=($(do_benchmark -Y -6)) results+=($(do_benchmark -Y -9)) results+=($(do_benchmark -6)) results+=($(do_benchmark -9)) results+=($(do_benchmark -12)) best=2000000000 for result in "${results[@]}"; do if (( result < best)); then best=$result fi done for result in "${results[@]}"; do if (( result == best )); then em="**" else em="" fi echo -n " | ${em}${result}${em}" done echo done libdeflate-1.5/tools/run_tests.sh000077500000000000000000000231671360172702500172130ustar00rootroot00000000000000#!/bin/bash # # Test script for libdeflate # # Usage: ./tools/run_tests.sh [TESTGROUP]... [-TESTGROUP]... # # By default all tests are run, but it is possible to explicitly include or # exclude specific test groups. # set -eu -o pipefail cd "$(dirname "$0")/.." TESTGROUPS=(all) set_test_groups() { TESTGROUPS=("$@") local have_exclusion=0 local have_all=0 for group in "${TESTGROUPS[@]}"; do if [[ $group == -* ]]; then have_exclusion=1 elif [[ $group == all ]]; then have_all=1 fi done if (( have_exclusion && !have_all )); then TESTGROUPS=(all "${TESTGROUPS[@]}") fi } if [ $# -gt 0 ]; then set_test_groups "$@" fi TMPFILE="$(mktemp)" USING_TMP_SMOKEDATA=false cleanup() { rm "$TMPFILE" if $USING_TMP_SMOKEDATA; then rm "$SMOKEDATA" fi } trap cleanup EXIT if [ -z "${SMOKEDATA:-}" ]; then # Generate default SMOKEDATA file. SMOKEDATA=$(mktemp -t smokedata.XXXXXXXXXX) USING_TMP_SMOKEDATA=true cat $(find . -name '*.c' -o -name '*.h' -o -name '*.sh') \ | head -c 1000000 > "$SMOKEDATA" fi NDKDIR="${NDKDIR:=/opt/android-ndk}" FILES=("$SMOKEDATA" ./tools/exec_tests.sh benchmark 'test_*') EXEC_TESTS_CMD="WRAPPER= SMOKEDATA=\"$(basename $SMOKEDATA)\" sh exec_tests.sh" NPROC=$(grep -c processor /proc/cpuinfo) VALGRIND="valgrind --quiet --error-exitcode=100 --leak-check=full --errors-for-leak-kinds=all" SANITIZE_CFLAGS="-fsanitize=undefined -fno-sanitize-recover=undefined,integer" ############################################################################### rm -f run_tests.log exec > >(tee -ia run_tests.log) exec 2> >(tee -ia run_tests.log >&2) TESTS_SKIPPED=0 log_skip() { log "[WARNING, TEST SKIPPED]: $@" TESTS_SKIPPED=1 } log() { echo "[$(date)] $@" } run_cmd() { log "$@" "$@" > /dev/null } test_group_included() { local included=0 group for group in "${TESTGROUPS[@]}"; do if [ "$group" = "$1" ]; then included=1 # explicitly included break fi if [ "$group" = "-$1" ]; then included=0 # explicitly excluded break fi if [ "$group" = "all" ]; then # implicitly included included=1 fi done if (( included )); then log "Starting test group: $1" fi (( included )) } have_valgrind() { if ! type -P valgrind > /dev/null; then log_skip "valgrind not found; can't run tests with valgrind" return 1 fi } have_ubsan() { if ! type -P clang > /dev/null; then log_skip "clang not found; can't run tests with UBSAN" return 1 fi } have_python() { if ! type -P python3 > /dev/null; then log_skip "Python not found" return 1 fi } ############################################################################### native_build_and_test() { make "$@" -j$NPROC all test_programs > /dev/null WRAPPER="$WRAPPER" SMOKEDATA="$SMOKEDATA" sh ./tools/exec_tests.sh \ > /dev/null } native_tests() { test_group_included native || return 0 local compiler compilers_to_try=(gcc) local cflags cflags_to_try=("") shopt -s nullglob compilers_to_try+=(/usr/bin/gcc-[0-9]*) compilers_to_try+=(/usr/bin/clang-[0-9]*) compilers_to_try+=(/opt/gcc*/bin/gcc) compilers_to_try+=(/opt/clang*/bin/clang) shopt -u nullglob if [ "$(uname -m)" = "x86_64" ]; then cflags_to_try+=("-march=native") cflags_to_try+=("-m32") fi for compiler in ${compilers_to_try[@]}; do for cflags in "${cflags_to_try[@]}"; do if [ "$cflags" = "-m32" ] && \ $compiler -v |& grep -q -- '--disable-multilib' then continue fi log "Running tests with CC=$compiler," \ "CFLAGS=$cflags" WRAPPER= native_build_and_test \ CC=$compiler CFLAGS="$cflags -Werror" done done if have_valgrind; then log "Running tests with Valgrind" WRAPPER="$VALGRIND" native_build_and_test fi if have_ubsan; then log "Running tests with undefined behavior sanitizer" WRAPPER= native_build_and_test CC=clang CFLAGS="$SANITIZE_CFLAGS" fi } ############################################################################### checksum_benchmarks() { test_group_included checksum_benchmarks || return 0 ./tools/checksum_benchmarks.sh } ############################################################################### android_build_and_test() { run_cmd ./tools/android_build.sh --ndkdir="$NDKDIR" "$@" run_cmd adb push ${FILES[@]} /data/local/tmp/ # Note: adb shell always returns 0, even if the shell command fails... log "adb shell \"cd /data/local/tmp && $EXEC_TESTS_CMD\"" adb shell "cd /data/local/tmp && $EXEC_TESTS_CMD" > "$TMPFILE" if ! grep -q "exec_tests finished successfully" "$TMPFILE"; then log "Android test failure! adb shell output:" cat "$TMPFILE" return 1 fi } android_tests() { local compiler test_group_included android || return 0 if [ ! -e $NDKDIR ]; then log_skip "Android NDK was not found in NDKDIR=$NDKDIR!" \ "If you want to run the Android tests, set the" \ "environmental variable NDKDIR to the location of" \ "your Android NDK installation" return 0 fi if ! type -P adb > /dev/null; then log_skip "adb (android-tools) is not installed" return 0 fi if ! adb devices | grep -q 'device$'; then log_skip "No Android device is currently attached" return 0 fi for compiler in gcc clang; do for flags in "" "--enable-neon" "--enable-crypto"; do for arch in arm32 arm64; do android_build_and_test --arch=$arch \ --compiler=$compiler $flags done done done } ############################################################################### mips_tests() { test_group_included mips || return 0 if [ "$(hostname)" != "zzz" ] && [ "$(hostname)" != "sol" ]; then log_skip "MIPS tests are not supported on this host" return 0 fi if ! ping -c 1 dd-wrt > /dev/null; then log_skip "Can't run MIPS tests: dd-wrt system not available" return 0 fi run_cmd ./tools/mips_build.sh run_cmd scp ${FILES[@]} root@dd-wrt: run_cmd ssh root@dd-wrt "$EXEC_TESTS_CMD" log "Checking that compression on big endian CPU produces same output" run_cmd scp gzip root@dd-wrt: run_cmd ssh root@dd-wrt \ "rm -f big*.gz; ./gzip -c -6 $(basename $SMOKEDATA) > big6.gz; ./gzip -c -10 $(basename $SMOKEDATA) > big10.gz" run_cmd scp root@dd-wrt:big*.gz . make -j$NPROC gzip > /dev/null ./gzip -c -6 "$SMOKEDATA" > little6.gz ./gzip -c -10 "$SMOKEDATA" > little10.gz if ! cmp big6.gz little6.gz || ! cmp big10.gz little10.gz; then echo 1>&2 "Compressed data differed on big endian vs. little endian!" return 1 fi rm big*.gz little*.gz } ############################################################################### windows_tests() { local arch test_group_included windows || return 0 # Windows: currently compiled but not run for arch in i686 x86_64; do local compiler=${arch}-w64-mingw32-gcc if ! type -P $compiler > /dev/null; then log_skip "$compiler not found" continue fi run_cmd make CC=$compiler CFLAGS=-Werror -j$NPROC \ all test_programs done } ############################################################################### static_analysis_tests() { test_group_included static_analysis || return 0 if ! type -P scan-build > /dev/null; then log_skip "clang static analyzer (scan-build) not found" return 0 fi run_cmd scan-build --status-bugs make -j$NPROC all test_programs } ############################################################################### gzip_tests() { test_group_included gzip || return 0 local gzip gunzip run_cmd make -j$NPROC gzip gunzip for gzip in "$PWD/gzip" /bin/gzip; do for gunzip in "$PWD/gunzip" /bin/gunzip; do log "Running gzip program tests with GZIP=$gzip," \ "GUNZIP=$gunzip" GZIP="$gzip" GUNZIP="$gunzip" SMOKEDATA="$SMOKEDATA" \ ./tools/gzip_tests.sh done done if have_valgrind; then log "Running gzip program tests with Valgrind" GZIP="$VALGRIND $PWD/gzip" GUNZIP="$VALGRIND $PWD/gunzip" \ SMOKEDATA="$SMOKEDATA" ./tools/gzip_tests.sh fi if have_ubsan; then log "Running gzip program tests with undefined behavior sanitizer" run_cmd make -j$NPROC CC=clang CFLAGS="$SANITIZE_CFLAGS" gzip gunzip GZIP="$PWD/gzip" GUNZIP="$PWD/gunzip" \ SMOKEDATA="$SMOKEDATA" ./tools/gzip_tests.sh fi } ############################################################################### edge_case_tests() { test_group_included edge_case || return 0 # Regression test for "deflate_compress: fix corruption with long # literal run". Try to compress a file longer than 65535 bytes where no # 2-byte sequence (3 would be sufficient) is repeated <= 32768 bytes # apart, and the distribution of bytes remains constant throughout, and # yet not all bytes are used so the data is still slightly compressible. # There will be no matches in this data, but the compressor should still # output a compressed block, and this block should contain more than # 65535 consecutive literals, which triggered the bug. # # Note: on random data, this situation is extremely unlikely if the # compressor uses all matches it finds, since random data will on # average have a 3-byte match every (256**3)/32768 = 512 bytes. if have_python; then python3 > "$TMPFILE" << EOF import sys for i in range(2): for stride in range(1,251): b = bytes(stride*multiple % 251 for multiple in range(251)) sys.stdout.buffer.write(b) EOF run_cmd make -j$NPROC benchmark run_cmd ./benchmark -3 "$TMPFILE" run_cmd ./benchmark -6 "$TMPFILE" run_cmd ./benchmark -12 "$TMPFILE" fi } ############################################################################### log "Starting libdeflate tests" log " TESTGROUPS=(${TESTGROUPS[@]})" log " SMOKEDATA=$SMOKEDATA" log " NDKDIR=$NDKDIR" native_tests checksum_benchmarks android_tests mips_tests windows_tests static_analysis_tests gzip_tests edge_case_tests if (( TESTS_SKIPPED )); then log "No tests failed, but some tests were skipped. See above." else log "All tests passed!" fi libdeflate-1.5/tools/windows_build.sh000077500000000000000000000003121360172702500200210ustar00rootroot00000000000000#!/bin/bash set -eu make -j CC=i686-w64-mingw32-gcc all test_programs cp -vf *.exe /j/exe/ make -j CC=x86_64-w64-mingw32-gcc all test_programs cp -vf *.exe /j/exe64/ sudo $HOME/bin/sudo/restart-smbd