pax_global_header00006660000000000000000000000064150372200260014507gustar00rootroot0000000000000052 comment=371d9f0cf74ce31c5d210897c2fb366b830558c0 DaemonEngine-crunch-ef4d32f/000077500000000000000000000000001503722002600160175ustar00rootroot00000000000000DaemonEngine-crunch-ef4d32f/.appveyor.yml000066400000000000000000000027751503722002600205000ustar00rootroot00000000000000# Documentation: https://wiki.unvanquished.net/wiki/Continuous_integration skip_branch_with_pr: true only_commits: files: - .appveyor.yml # The rest of this list should stay in sync with azure-pipelines.yml - crunch/ - crnlib/ - example1/ - example2/ - example3/ - inc/ - test/ - cmake/ - CMakeLists.txt environment: matrix: - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019 generator: Visual Studio 16 2019 platform: x64 - APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2019 generator: Visual Studio 16 2019 platform: win32 build: parallel: true build_script: # simple line break: space # double line break: line break # The CFLAGS/CXXFLAGS env vars *prepend* to CMAKE__FLAGS rather than overwriting. # /Wv pins warnings to a specific compiler version so that new ones # don't make the build error after Appveyor updates the compiler. # The CMAKE_CONFIGURATION_TYPES CMake option make sure to not uselessly provide # build configurations that will not be used. - cmd: > pip install colorama echo %NUMBER_OF_PROCESSORS% set CMAKE_BUILD_PARALLEL_LEVEL=%NUMBER_OF_PROCESSORS% cmake --version set CFLAGS=/Wv:19.29.30037 set CXXFLAGS=/Wv:19.29.30037 cmake -Wdev -G"%generator%" -A"%platform%" -S. -Bbuild -DCMAKE_CONFIGURATION_TYPES=Release -DBUILD_CRUNCH=ON -DBUILD_EXAMPLES=ON -DUSE_FAST_MATH=OFF cmake --build build --config Release python test\test.py DaemonEngine-crunch-ef4d32f/.azure-pipelines.yml000066400000000000000000000115461503722002600217430ustar00rootroot00000000000000# Documentation: https://wiki.unvanquished.net/wiki/Continuous_integration trigger: branches: include: - master pr: branches: include: - '*' paths: include: - .azure-pipelines.yml # The rest of this list should stay in sync with .appveyor.yml - crunch/ - crnlib/ - inc/ - example1/ - example2/ - example3/ - test/ - cmake/ - CMakeLists.txt strategy: matrix: Linux amd64 GCC: VM_IMAGE: 'ubuntu-22.04' APT_PACKAGES: ninja-build g++-10 CXX_COMPILER: g++-10 Linux i686 GCC: VM_IMAGE: 'ubuntu-22.04' APT_PACKAGES: ninja-build g++-i686-linux-gnu CXX_COMPILER: i686-linux-gnu-g++ COMPILER_FLAGS: -mfpmath=sse -msse Linux arm64 GCC: VM_IMAGE: 'ubuntu-22.04' APT_PACKAGES: ninja-build g++-aarch64-linux-gnu qemu-user CXX_COMPILER: aarch64-linux-gnu-g++ CRUNCH_EXE_RUNNER: qemu-aarch64 -L /usr/aarch64-linux-gnu Linux armhf GCC: # There is an IO bug in qemu-arm from ubuntu-22.04. VM_IMAGE: 'ubuntu-24.04' APT_PACKAGES: ninja-build g++-arm-linux-gnueabihf qemu-user CXX_COMPILER: arm-linux-gnueabihf-g++ CRUNCH_EXE_RUNNER: qemu-arm -L /usr/arm-linux-gnueabihf Linux amd64 Clang: VM_IMAGE: 'ubuntu-22.04' APT_PACKAGES: ninja-build CXX_COMPILER: clang++ Windows amd64 MinGW: VM_IMAGE: 'ubuntu-22.04' APT_PACKAGES: ninja-build g++-mingw-w64-x86-64 mingw-w64-x86-64-dev gcc-mingw-w64-x86-64-posix-runtime wine SETUP_COMMANDS: sudo update-alternatives --set x86_64-w64-mingw32-g++ /usr/bin/x86_64-w64-mingw32-g++-posix TOOLCHAIN_FILE: cmake/cross-toolchain-mingw64.cmake CRUNCH_EXE_RUNNER: wine CRUNCH_EXTENSION: .exe RUNTIME_FILES: /usr/lib/gcc/x86_64-w64-mingw32/10-posix/libgcc_s_seh-1.dll /usr/lib/gcc/x86_64-w64-mingw32/10-posix/libstdc++-6.dll /usr/x86_64-w64-mingw32/lib/libwinpthread-1.dll Windows i686 MinGW: VM_IMAGE: 'ubuntu-22.04' APT_ARCHITECTURE: i386 APT_PACKAGES: ninja-build g++-mingw-w64-i686 mingw-w64-i686-dev gcc-mingw-w64-i686-posix-runtime wine wine32 SETUP_COMMANDS: sudo update-alternatives --set i686-w64-mingw32-g++ /usr/bin/i686-w64-mingw32-g++-posix TOOLCHAIN_FILE: cmake/cross-toolchain-mingw32.cmake COMPILER_FLAGS: -mfpmath=sse -msse CRUNCH_EXE_RUNNER: wine CRUNCH_EXTENSION: .exe RUNTIME_FILES: /usr/lib/gcc/i686-w64-mingw32/10-posix/libgcc_s_dw2-1.dll /usr/lib/gcc/i686-w64-mingw32/10-posix/libstdc++-6.dll /usr/i686-w64-mingw32/lib/libwinpthread-1.dll macOS amd64 AppleClang: VM_IMAGE: 'macOS-13' PIP_PACKAGES: colorama CMAKE_GENERATOR: Unix Makefiles NPROC_COMMAND: sysctl -n hw.logicalcpu macOS arm64 AppleClang: VM_IMAGE: 'macOS-13' PIP_PACKAGES: colorama CMAKE_GENERATOR: Unix Makefiles COMPILER_FLAGS: -target arm64-apple-macos11 -Wno-overriding-t-option NPROC_COMMAND: sysctl -n hw.logicalcpu RUN_TESTS: false Web Asm.js Emscripten: VM_IMAGE: 'ubuntu-22.04' APT_PACKAGES: ninja-build emscripten TOOLCHAIN_FILE: /usr/share/emscripten/cmake/Modules/Platform/Emscripten.cmake SOURCE_DIR: emscripten EXECUTABLES: crunch.js crunch.wasm RUN_TESTS: false pool: vmImage: $(VM_IMAGE) steps: - bash: | set -xue if [ -n "${APT_ARCHITECTURE:-}" ]; then sudo dpkg --add-architecture "${APT_ARCHITECTURE}" fi if [ -n "${APT_PACKAGES:-}" ]; then sudo apt-get update && sudo apt-get -y -q --no-install-recommends install ${APT_PACKAGES} fi if [ -n "${PIP_PACKAGES:-}" ]; then pip install ${PIP_PACKAGES} fi if [ -n "${SETUP_COMMANDS:-}" ]; then $(SETUP_COMMANDS) fi displayName: 'Setup' - bash: | set -xue export CMAKE_BUILD_PARALLEL_LEVEL="$(${NPROC_COMMAND:-nproc})" echo "${CMAKE_BUILD_PARALLEL_LEVEL}" cmake_args=(-G"${CMAKE_GENERATOR:-Ninja}") if [ -n "${TOOLCHAIN_FILE:-}" ]; then cmake_args+=(-DCMAKE_TOOLCHAIN_FILE="${TOOLCHAIN_FILE}") fi if [ -n "${CXX_COMPILER:-}" ]; then cmake_args+=(-DCMAKE_CXX_COMPILER="${CXX_COMPILER}") fi if [ -n "${COMPILER_FLAGS:-}" ]; then cmake_args+=(-DCMAKE_CXX_FLAGS="${COMPILER_FLAGS}") fi if [ -z "${SOURCE_DIR:-}" ]; then cmake_args+=(-DBUILD_CRUNCH=ON -DBUILD_EXAMPLES=ON -DBUILD_SHARED_LIBS=ON) fi cmake_args+=(-DUSE_FAST_MATH=OFF) cmake -S"${SOURCE_DIR:-.}" -Bbuild "${cmake_args[@]}" cmake --build build --config Release displayName: 'Build' - bash: | set -xue EXECUTABLES="${EXECUTABLES:-crunch example1 example2 example3}" for exe_file in ${EXECUTABLES}; do file 'build/${exe_file}${CRUNCH_EXTENSION:-}' done if [ -n "${RUNTIME_FILES:-}" ]; then cp -av ${RUNTIME_FILES} build/ fi if "${RUN_TESTS:-true}"; then test/test.py fi displayName: 'Test' DaemonEngine-crunch-ef4d32f/.github/000077500000000000000000000000001503722002600173575ustar00rootroot00000000000000DaemonEngine-crunch-ef4d32f/.github/workflows/000077500000000000000000000000001503722002600214145ustar00rootroot00000000000000DaemonEngine-crunch-ef4d32f/.github/workflows/codeql.yml000066400000000000000000000075351503722002600234200ustar00rootroot00000000000000name: "CodeQL" on: push: branches: [ "master" ] pull_request: branches: [ "master" ] schedule: - cron: '19 10 5 * *' jobs: analyze: name: Analyze (${{ matrix.language }}) # Runner size impacts CodeQL analysis time. To learn more, please see: # - https://gh.io/recommended-hardware-resources-for-running-codeql # - https://gh.io/supported-runners-and-hardware-resources # - https://gh.io/using-larger-runners (GitHub.com only) # Consider using larger runners or machines with greater resources for possible analysis time improvements. runs-on: ${{ 'ubuntu-latest' }} timeout-minutes: ${{ 360 }} permissions: # required for all workflows security-events: write # required to fetch internal or private CodeQL packs packages: read # only required for workflows in private repositories actions: read contents: read strategy: fail-fast: false matrix: include: - language: c-cpp build-mode: manual # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' # Use `c-cpp` to analyze code written in C, C++ or both # Use 'java-kotlin' to analyze code written in Java, Kotlin or both # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages steps: - name: Checkout repository uses: actions/checkout@v4 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} build-mode: ${{ matrix.build-mode }} # If you wish to specify custom queries, you can do so here or in a config file. # By default, queries listed here will override any specified in a config file. # Prefix the list here with "+" to use these queries and those in the config file. # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs # queries: security-extended,security-and-quality # If the analyze step fails for one of the languages you are analyzing with # "We were unable to automatically build your code", modify the matrix above # to set the build mode to "manual" for that language. Then modify this step # to build your code. # ℹ️ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun - if: matrix.build-mode == 'manual' shell: bash run: | set -xe sudo apt-get update sudo apt-get -y -q --no-install-recommends install ninja-build export CMAKE_BUILD_PARALLEL_LEVEL="$(nproc)" echo "${CMAKE_BUILD_PARALLEL_LEVEL}" cmake -Wdev -S. -Bbuild -GNinja -DCMAKE_BUILD_TYPE=Release \ -DBUILD_CRUNCH=ON -DBUILD_EXAMPLES=OFF -DBUILD_SHARED_LIBS=ON cmake --build build - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v3 with: category: "/language:${{matrix.language}}" DaemonEngine-crunch-ef4d32f/.gitignore000066400000000000000000000003631503722002600200110ustar00rootroot00000000000000*.o *.js *.wasm *.2010.vcxproj.user *.2010.suo /crnlib/crunch /crnlib/Win32 /crnlib/x64 /crunch/Win32 /crunch/x64 /example1/Win32 /example1/x64 /example2/Win32 /example2/x64 /example3/Win32 /example3/x64 /lib /bin/* !bin/crunch_x64.exe build* DaemonEngine-crunch-ef4d32f/CHANGELOG.md000066400000000000000000000006251503722002600176330ustar00rootroot00000000000000# Change Log ## [0.1.4] - 2012-11-24 ### Added * KTX file format * Basic ETC1 support * Simple makefile ### Fixed * Various DDS format fixes ## [0.1.3] - 2012-04-26 ### Added * Ported to Linux (tested on Ubuntu x86 w/Codeblocks). Note that a few features of the cmd line tool don't work yet (eg. -timestamp) [0.1.4]: https://github.com/BinomialLLC/crunch [0.1.3]: https://github.com/BinomialLLC/crunchDaemonEngine-crunch-ef4d32f/CMakeLists.txt000066400000000000000000000127231503722002600205640ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.5) set(CMAKE_CXX_STANDARD 11) set(CRUNCH_PROJECT_NAME crunch) set(CRUNCH_LIBRARY_NAME crn) set(CRUNCH_EXE_NAME crunch) project(${CRUNCH_PROJECT_NAME} LANGUAGES CXX) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}) if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel" FORCE) endif() find_package(Git) if (Git_FOUND) execute_process( COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} OUTPUT_VARIABLE GIT_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE ) if (GIT_VERSION) add_definitions(-DCOMPUTED_VERSION_SUFFIX="Built from git-${GIT_VERSION}") endif() endif() macro(set_cxx_flag FLAG) if (${ARGC} GREATER 1) set(CMAKE_CXX_FLAGS_${ARGV1} "${CMAKE_CXX_FLAGS_${ARGV1}} ${FLAG}") else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}") endif() endmacro() macro(set_linker_flag FLAG) if (${ARGC} GREATER 1) set(CMAKE_EXE_LINKER_FLAGS_${ARGV1} "${CMAKE_EXE_LINKER_FLAGS_${ARGV1}} ${FLAG}") set(CMAKE_SHARED_LINKER_FLAGS_${ARGV1} "${CMAKE_SHARED_LINKER_FLAGS_${ARGV1}} ${FLAG}") set(CMAKE_MODULE_LINKER_FLAGS_${ARGV1} "${CMAKE_MODULE_LINKER_FLAGS_${ARGV1}} ${FLAG}") else() set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${FLAG}") set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} ${FLAG}") set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} ${FLAG}") endif() endmacro() # This option decides if crunch is dynamically linked against libcrn.so # statically linked against libcrn.o, enabling it always build libcrn.so. # This option is a builtin CMake one, the name means “build executables # against shader libraries”, not “build the shared libraries”. option(BUILD_SHARED_LIBS "Link executables against shared library" OFF) # Always build libcrn.so even if crunch is linked to libcrn statically. option(BUILD_SHARED_LIBCRN "Build shared libcrn" OFF) # Always build libcrn.a even if crunch is linked to libcrn dynamically. option(BUILD_STATIC_LIBCRN "Build static libcrn" OFF) # Build the crunch tool, implies the build of libcrn.o or libcrn.so. option(BUILD_CRUNCH "Build crunch" ON) # Build the provided examples, they only build on Windows for now. option(BUILD_EXAMPLES "Build examples" OFF) # Enable extra optimization flags, like using -O3 even in RelWithDebInfo build. option(USE_EXTRA_OPTIMIZATION "Enable extra optimization" ON) # Enable link time optimization, slows down the build but produce faster and smaller binaries. option(USE_LTO "Enable link-time optimization" OFF) # Enabling fast math makes generated images less likely to be reproducible. # See https://github.com/DaemonEngine/crunch/issues/29 option(USE_FAST_MATH "Enable fast math (generated images are less likely to be reproducible)" OFF) if (BUILD_STATIC_LIBCRN) option(INSTALL_STATIC_LIBCRN "Install static libraries" OFF) endif() if (MSVC) # Enable MSVC parallel compilation. set_cxx_flag("/MP") # MSVC doesn't implement strict aliasing so there is nothing else to do. # CMake already sets the /O2 flag on Release and RelWithDebInfo build and /O[1-2] already sets the /Oy flag. if (USE_FAST_MATH) # By default, the MSVC /fp:fast option enables /fp:contract (introduced in VS 2022). # See https://learn.microsoft.com/en-us/cpp/build/reference/fp-specify-floating-point-behavior # and https://devblogs.microsoft.com/cppblog/the-fpcontract-flag-and-changes-to-fp-modes-in-vs2022/ # By default, MSVC doesn't enable the /fp:fast option. set_cxx_flag("/fp:fast") endif() if (USE_LTO) set_cxx_flag("/GL" RELEASE) set_cxx_flag("/GL" RELWITHDEBINFO) set_cxx_flag("/GL" MINSIZEREL) set_linker_flag("/LTCG" RELEASE) set_linker_flag("/LTCG" RELWITHDEBINFO) set_linker_flag("/LTCG" MINSIZEREL) endif() else() # As written in crnlib.h and stb_image.h, strict aliasing should always be disabled. set_cxx_flag("-fno-strict-aliasing") # Generate maxmimum amount of debug information, including macro definitions. set_cxx_flag("-g3" DEBUG) set_cxx_flag("-g3" RELWITHDEBINFO) set_cxx_flag("-pthread") set_linker_flag("-pthread") if (USE_EXTRA_OPTIMIZATION) # CMake already sets the -O3 flag on Release build and -O[1-3s] already sets the -fomit-frame-pointer flag. set_cxx_flag("-Og" DEBUG) set_cxx_flag("-O3" RELWITHDEBINFO) endif() if (USE_FAST_MATH) # By default, GCC uses -ffp-contract=fast with -std=gnu* and uses -ffp-contract=off with -std=c*. # See https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html # By default, GCC doesn't enable the -ffast-math option. set_cxx_flag("-ffast-math -fno-math-errno -ffp-contract=fast") else() # By default, GCC uses -std=gnu* and then enables -ffp-contract=fast even if -ffast-math is not enabled. set_cxx_flag("-ffp-contract=off") endif() # It should be done at the very end because it copies all compiler flags # to the linker flags. if (USE_LTO) set_cxx_flag("-flto" RELEASE) set_cxx_flag("-flto" RELWITHDEBINFO) set_cxx_flag("-flto" MINSIZEREL) set_linker_flag("${CMAKE_CXX_FLAGS}" RELEASE) set_linker_flag("${CMAKE_CXX_FLAGS}" RELWITHDEBINFO) set_linker_flag("${CMAKE_CXX_FLAGS}" MINSIZEREL) endif() endif() if (BUILD_SHARED_LIBCRN OR BUILD_STATIC_LIBCRN OR BUILD_CRUNCH OR BUILD_EXAMPLES) add_subdirectory(crnlib crnlib) endif() if (BUILD_CRUNCH) add_subdirectory(crunch _crunch) endif() if (BUILD_EXAMPLES) add_subdirectory(example1 _example1) add_subdirectory(example2 _example2) add_subdirectory(example3 _example3) endif() DaemonEngine-crunch-ef4d32f/README.md000066400000000000000000000572361503722002600173130ustar00rootroot00000000000000# Crunch **crunch/crnlib v1.04U** — Advanced DXTn texture compression library (Dæmon branch, Unity format variant) - Upstream for the Dæmon branch: https://github.com/DaemonEngine/crunch - Bug tracker for the Dæmon branch: https://github.com/DaemonEngine/crunch/issues Target & compiler|Build status|Target & compiler|Build status -|-|-|- Linux amd64 GCC|[![Build Status](https://dev.azure.com/UnvanquishedDevelopment/crunch/_apis/build/status%2FDaemonEngine.crunch?branchName=master&jobName=Job&configuration=Job%20Linux%20amd64%20Clang)](https://dev.azure.com/UnvanquishedDevelopment/crunch/_build/latest?definitionId=3&branchName=master)|Windows amd64 MSVC|[![Build status](https://ci.appveyor.com/api/projects/status/github/DaemonEngine/crunch?banch=master&svg=true)](https://ci.appveyor.com/project/DolceTriade/crunch/history) Linux amd64 Clang|[![Build Status](https://dev.azure.com/UnvanquishedDevelopment/crunch/_apis/build/status%2FDaemonEngine.crunch?branchName=master&jobName=Job&configuration=Job%20Linux%20amd64%20Clang)](https://dev.azure.com/UnvanquishedDevelopment/crunch/_build/latest?definitionId=3&branchName=master)|Windows i686 MSVC|[![Build status](https://ci.appveyor.com/api/projects/status/github/DaemonEngine/crunch?banch=master&svg=true)](https://ci.appveyor.com/project/DolceTriade/crunch/history) Linux i686 GCC|[![Build Status](https://dev.azure.com/UnvanquishedDevelopment/crunch/_apis/build/status%2FDaemonEngine.crunch?branchName=master&jobName=Job&configuration=Job%20Linux%20i686%20GCC)](https://dev.azure.com/UnvanquishedDevelopment/crunch/_build/latest?definitionId=3&branchName=master)|Windows amd64 MinGW|[![Build Status](https://dev.azure.com/UnvanquishedDevelopment/crunch/_apis/build/status%2FDaemonEngine.crunch?branchName=master&jobName=Job&configuration=Job%20Windows%20amd64%20MinGW)](https://dev.azure.com/UnvanquishedDevelopment/crunch/_build/latest?definitionId=3&branchName=master) Linux arm64 GCC|[![Build Status](https://dev.azure.com/UnvanquishedDevelopment/crunch/_apis/build/status%2FDaemonEngine.crunch?branchName=master&jobName=Job&configuration=Job%20Linux%20arm64%20GCC)](https://dev.azure.com/UnvanquishedDevelopment/crunch/_build/latest?definitionId=3&branchName=master)|Windows i686 MinGW|[![Build Status](https://dev.azure.com/UnvanquishedDevelopment/crunch/_apis/build/status%2FDaemonEngine.crunch?branchName=master&jobName=Job&configuration=Job%20Windows%20i686%20MinGW)](https://dev.azure.com/UnvanquishedDevelopment/crunch/_build/latest?definitionId=3&branchName=master) Linux armhf GCC|[![Build Status](https://dev.azure.com/UnvanquishedDevelopment/crunch/_apis/build/status%2FDaemonEngine.crunch?branchName=master&jobName=Job&configuration=Job%20Linux%20armhf%20GCC)](https://dev.azure.com/UnvanquishedDevelopment/crunch/_build/latest?definitionId=3&branchName=master)|macOS amd64 AppleClang|[![Build Status](https://dev.azure.com/UnvanquishedDevelopment/crunch/_apis/build/status%2FDaemonEngine.crunch?branchName=master&jobName=Job&configuration=Job%20macOS%20amd64%20AppleClang)](https://dev.azure.com/UnvanquishedDevelopment/crunch/_build/latest?definitionId=3&branchName=master) Web Asm.js Emscripten|[![Build Status](https://dev.azure.com/UnvanquishedDevelopment/crunch/_apis/build/status%2FDaemonEngine.crunch?branchName=master&jobName=Job&configuration=Job%20Web%20Asm.js%20Emscripten)](https://dev.azure.com/UnvanquishedDevelopment/crunch/_build/latest?definitionId=3&branchName=master)|macOS arm64 AppleClang|[![Build Status](https://dev.azure.com/UnvanquishedDevelopment/crunch/_apis/build/status%2FDaemonEngine.crunch?branchName=master&jobName=Job&configuration=Job%20macOS%20arm64%20AppleClang)](https://dev.azure.com/UnvanquishedDevelopment/crunch/_build/latest?definitionId=3&branchName=master) ## Dæmon crunch tool ℹ️ The [Dæmon engine](https://github.com/DaemonEngine/Daemon) uses the [`master`](https://github.com/DaemonEngine/crunch/tree/master) branch, which tracks the [Unity fork](https://github.com/Unity-Technologies/crunch/tree/unity), with additional fixes and features. The Dæmon engine is the open source game engine powering the [Unvanquished game](https://unvanquished.net). The produced `.crn` files are compatible with both the Daemon game engine and the Unity game engine. Dæmon crunch is brought to you by: - **2014-2024**: Dæmon Developers and contributors https://github.com/DaemonEngine/crunch - **2017-2018**: Alexander Suvorov and Unity Software Inc. https://github.com/Unity-Technologies/crunch/tree/unity - **2010-2017**: Richard Geldreich, Jr. and Binomial LLC and contributors https://github.com/BinomialLLC/crunch The Dæmon crunch is known to be used by: - The [Dæmon game engine](https://github.com/DaemonEngine/Daemon), - The [Urcheon game data build automation tool](https://github.com/DaemonEngine/Urcheon), - The [NetRadiant game level editor](https://netradiant.gitlab.io/) and the `q3map2` map compiler and light mapper, - The [Unvanquished game](https://unvanquished.net), - The [Xonotic game](https://xonotic.org), - Some games using the Unity game engine. This branch provides many improvements over the original crunch: - ✅️ Unity crunch format (runs many time faster and produces smaller files), - ✅️ Unity crunch metadata (the header is compatible with Unity), - ✅️ Improved image compatibility (1-bit PNG images are now supported), - ✅️ Added features and command line options (top mip renormalization and more), - ✅️ Network file system compatibility, - ✅️ Optional header-only checksumming, - ✅️ Multisystem and multiplatform (runs almost everywhere), - ✅️ CMake toolchain (with many useful build options). ## Unity crunch format This repository merged improvements done by Unity Technologies, it produces smaller files and produces them faster than original code by Binomial. Unity also modified the format which makes it incompatible with earlier versions of the tool. Quote from [unvanquished.net](https://unvanquished.net/unvanquished-area-51): > [Unity guys said](https://blog.unity.com/technology/crunch-compression-of-etc-textures) that their modified crunch tool “can compress up to 2.5 > times faster, while providing about 10% better compression ratio”. So we > did a test on our own asset repository, re-crunching all the ressources > and textures packages. At the time we did the test the given corpus > produced 1797 .crn files. > The Unity’s crunch tool reduced compression time by 4.31 and reduced size > by 11.15%. They said “up to 2.5 time faster” but we’ve seen some random > textures being compressed 6 time faster and the average of the whole is > 4.3 time faster, and yes the tool compresses more than 10% more. ## Unity crunch metadata Unlike the Unity `crunch` tool built from their public repository, this tool produces files loadable by the Unity engine. Since [February 11th 2023](faf5127b8c69dfd1ae554f4f3bf8168048b48d9f), this branch sets the CRN `m_userdata0` field to `1` to make CRN files loadable by the Unity engine which now prevents to load files with this value set to `0`. Files with this value set to `1` are expected to use the new format. Files with this value set to `0` may or may not use the new format. When Unity updated the tool and modified the format in an incompatible way, no bit was modified to detect if a CRN file was using the old or the new format. Unity has not updated that field in their public repository of Crunch so there may be files in the wild using new format with this value set to `0`. ## Improved image compatibility The `stb_image` library was updated from version 1.18 to version 2.30, increasing the amount of image format variants that can be converted, like 1-bit PNG formats. ## Added features and command line options In addition to the original `crunch` features and command line options this branch brings: - `-rtopmip`: option to use with `-renormalize` to also renormalize on the top mip-level. - `-noNormalDetection`: do not attempt to detect normal map to avoid selecting formats thought for normal maps when it's known an image is not a normal map. It may prevent the tool to use heavier and less-supported `DXT5_AGBR` format when `DXT1` is good enough. - `-h` or `--help`: print the command line built-in help. ## Network file system compatibility The original `crunch` tool used I/O functions not working on some network file systems like NFS or SSHFS. The limitations is now gone and `crunch` can now process image files stored over the network. ## Optional header-only checksumming For applications distributing their files in containers already providing a checksum mechanism for the whole contained file, it's now possible to only validate the CRN header checksum instead of the whole file checksum to not checksum the whole file twice. ## Multisystem and multiplatform portability Unlike upstream branches from Binomial LLC and Unity Software Inc. this branch focuses on keeping the code buildable outside of Windows and Visual Studio, and adds a CMake build option alongside the legacy Makefile. This `crunch` tool and the related `crnlib` library are known to build with and and run on: - Compilers: GCC, MinGW, MSVC, Clang, Apple Clang, and more. - Systems: Linux, Windows, macOS, FreeBSD. - Architectures: amd64, arm64, i686, armhf. ## CMake toolchain CMake is now preferred to the old Makefile. The supplied CMake configuration provides many useful build options. Toolchain files for MinGW cross-compilation are also supplied. ## How to build ```sh git clone https://github.com/DaemonEngine/crunch.git cd crunch cmake -S. -Bbuild cmake --build build --parallel $(nproc) ``` Where `$(nproc)` is the amount of cores of your computer. You'll then find a `crunch` binary in the `build/` folder. Some CMake build options are availables (explore more with `ccmake`). ## Licensing This software uses the ZLIB license, which is located in license.txt. http://opensource.org/licenses/Zlib Richard Geldreich removed copyright on all his work on Crunch in order to put it in public domain on 2020-09-15 with commit [`crunch@57353fa`](https://github.com/BinomialLLC/crunch/commit/57353fa9ac0908893215bc30ba106adfb80c4c95) but this repository also contains commits by Alexander Suvorov from Unity Technologies and from Dæmon Developers and contributors. Portions of this software make use of public domain code originally written by Igor Pavlov (LZMA), RYG (`crn_ryg_dxt*`), and Sean Barrett (`stb_image*.h`). If you use this software in a product, an acknowledgment in the product documentation would be highly appreciated but is not required. Note: crunch originally used to live on Google Code: https://code.google.com/p/crunch/ ## Overview crnlib is a lossy texture compression library for developers that ship content using the DXT1/5/N or 3DC compressed color/normal map/cubemap mipmapped texture formats. It was written by the same author as the open source [LZHAM compression library](http://code.google.com/p/lzham/). It can compress mipmapped 2D textures, normal maps, and cubemaps to approx. 1-1.25 bits/texel, and normal maps to 1.75-2 bits/texel. The actual bitrate depends on the complexity of the texture itself, the specified quality factor/target bitrate, and ultimately on the desired quality needed for a particular texture. crnlib's differs significantly from other approaches because its compressed texture data format was carefully designed to be quickly transcodable directly to DXTn with no intermediate recompression step. The typical (single threaded) transcode to DXTn rate is generally between 100-250 megatexels/sec. The current library supports PC (Win32/x64) and Xbox 360. Fast random access to individual mipmap levels is supported. crnlib can also generates standard DDS files at specified quality setting, which results in files that are much more compressible by LZMA/Deflate/etc. compared to files generated by standard DXTn texture tools (see below). This feature allows easy integration into any engine or graphics library that already supports DDS files. The CRN file format supports the following core DXTn texture formats: DXT1 (but not DXT1A), DXT5, DXT5A, and DXN/3DC It also supports several popular swizzled variants (several are also supported by AMD's Compressonator): DXT5_XGBR, DXT5_xGxR, DXT5_AGBR, and DXT5_CCxY (experimental luma-chroma YCoCg). ## Recommended software AMD's [Compressonator tool](https://github.com/GPUOpen-Tools/Compressonator) is recommended to view the DDS files created by the crunch tool and the included example projects. Note: Some of the swizzled DXTn DDS output formats (such as DXT5_xGBR) read/written by the crunch tool or examples deviate from the DX9 DDS standard, so DXSDK tools such as `DXTEX.EXE` won't load them at all or they won't be properly displayed. ## Compression algorithm details The compression process employed in creating both CRN and clustered DDS files utilizes a very high quality, scalable DXTn endpoint optimizer capable of processing any number of pixels (instead of the typical hard coded 16), optional adaptive switching between several macroblock sizes/configurations (currently any combination of 4x4, 8x4, 4x8, and 8x8 pixel blocks), endpoint clusterization using top-down cluster analysis, vector quantization (VQ) of the selector indices, and several custom algorithms for compressing the resulting endpoint/selector codebooks and macroblock indices. Multiple feedback passes are performed between the clusterization and VQ steps to optimize quality, and several steps use a brute force refinement approach to improve quality. The majority of compression steps are multithreaded. The CRN format currently utilizes canonical Huffman coding for speed (similar to Deflate but with much larger tables), but the next major version will also utilize adaptive binary arithmetic coding and higher order context modeling using already developed tech from the my LZHAM compression library. ## Supported file formats crnlib supports three compressed texture file formats. The first format (clustered DDS) is simple to integrate into an existing project (typically, no code changes are required), but it doesn't offer the highest quality/compression ratio that crnlib is capable of. Integrating the second, higher quality custom format (CRN) requires a few typically straightforward engine modifications to integrate the CRN→DXTn transcoder header file library into your tools/engine. ### DDS crnlib can compress textures to standard DX9-style `.dds` files using clustered DXTn compression, which is a subset of the approach used to create CRN files.(For completeness, crnlib also supports vanilla, block by block DXTn compression too, but that's not very interesting.) Clustered DXTn compressed DDS files are much more compressible than files created by other libraries/tools. Apart from increased compressibility, the DDS files generated by this process are completely standard so they should be fairly easy to add to a project with little to no code changes. To actually benefit from clustered DXTn DDS files, your engine needs to further losslessly compress the DDS data generated by crnlib using a lossless codec such as zlib, lzo, LZMA, LZHAM, etc. Most likely, your engine does this already. (If not, you definitely should because DXTn compressed textures generally contain a large amount of highly redundant data.) Clustered DDS files are intended to be the simplest/fastest way to integrate crnlib's tech into a project. ### CRN The second, better, option is to compress your textures to `.crn` files using crnlib. To read the resulting CRN data, you must add the CRN transcoder library (located in the included single file, stand-alone header file library inc/crn_decomp.h) into your application. CRN files provide noticeably higher quality at the same effective bitrate compared to clustered DXTn compressed DDS files. Also, CRN files don't require further lossless compression because they're already highly compressed. CRN files are a bit more difficult/risky to integrate into a project, but the resulting compression ratio and quality is superior vs. clustered DDS files. ### KTX crnlib and crunch can read/write the `.ktx` file format in various pixel formats. Rate distortion optimization (clustered DXTc compression) is not yet supported when writing KTX files. The KTX file format is just like DDS, except it's a fairly well specified standard created by the Khronos Group. Unfortunately, almost all of the tools the original Crunch author found that supported KTX were fairly (to very) buggy, or were limited to only a handful of pixel formats, so there's no guarantee that the KTX files written by crnlib can be reliably read by other tools. ## Building the examples This release contains the source code and projects for three simple example projects: ### example1 Demonstrates how to use crnlib's high-level C-helper compression/decompression/transcoding functions in `inc/crnlib.h`. It's a fairly complete example of crnlib's functionality. ### example2 Shows how to transcodec CRN files to DDS using **only** the functionality in `inc/crn_decomp.h`. It does not link against against `crnlib.lib` or depend on it in any way. (Note: The complete source code, approx. 4800 lines, to the CRN transcoder is included in `inc/crn_decomp.h`.) `example2` is intended to show how simple it is to integrate CRN textures into your application. ### example3 Shows how to use the regular, low-level DXTn block compressor functions in `inc/crnlib.h`. This functionality is included for completeness. (Your engine or toolchain most likely already has its own DXTn compressor. crnlib's compressor is typically very competitive or superior to most available closed and open source CPU-based compressors.) ## Creating compressed textures with crunch The simplest way to create compressed textures using crnlib is to integrate the `crunch` (or `crunch.exe`) command line tool into your texture build toolchain or export process. It can write DXTn compressed 2D/cubemap textures to regular DXTn compressed DDS, clustered (or reduced entropy) DXTn compressed DDS, or CRN files. It can also transcode or decompress files to several standard image formats, such as TGA or BMP. Run `crunch --help` for help. The `.crn` files created by `crunch` can be efficiently transcoded to DXTn using the included CRN transcoding library, located in full source form under `inc/crn_decomp.h`. Here are a few example crunch.exe command lines: 1. Compress blah.tga to blah.dds using normal DXT1 compression: * `crunch -file blah.tga -fileformat dds -dxt1` 2. Compress blah.tga to blah.dds using clustered DXT1 at an effective bitrate of 1.5 bits/texel, display image statistic: * `crunch -file blah.tga -fileformat dds -dxt1 -bitrate 1.5 -imagestats` 3. Compress blah.tga to blah.dds using clustered DXT1 at quality level 100 (from [0,255]), with no mipmaps, display LZMA statistics: * `crunch -file blah.tga -fileformat dds -dxt1 -quality 100 -mipmode none -lzmastats` 3. Compress blah.tga to blah.crn using clustered DXT1 at a bitrate of 1.2 bits/texel, no mipmaps: * `crunch -file blah.tga -dxt1 -bitrate 1.2 -mipmode none` 4. Decompress blah.dds to a .tga file: * `crunch -file blah.dds -fileformat tga` 5. Transcode blah.crn to a .dds file: * `crunch -file blah.crn` 6. Decompress blah.crn, writing each mipmap level to a separate .tga file: * `crunch -split -file blah.crn -fileformat tga` crunch.exe can do a lot more, like rescale/crop images before compression, convert images from one file format to another, compare images, process multiple images, etc. Note: I would have included the full source to crunch.exe, but it still has some low-level dependencies to crnlib internals which I didn't have time to address. This version of crunch.exe has some reduced functionality compared to an earlier eval release. For example, XML file support is not included in this version. ## Using crnlib The most flexible and powerful way of using crnlib is to integrate the library into your editor/toolchain/etc. and directly supply it your raw/source texture bits. See the C-style API's and comments in inc/crnlib.h. To compress, you basically fill in a few structs in and call one function: ```c void *crn_compress( const crn_comp_params &comp_params, crn_uint32 &compressed_size, crn_uint32 *pActual_quality_level = NULL, float *pActual_bitrate = NULL); ``` Or, if you want crnlib to also generate mipmaps, you call this function: ```c void *crn_compress( const crn_comp_params &comp_params, const crn_mipmap_params &mip_params, crn_uint32 &compressed_size, crn_uint32 *pActual_quality_level = NULL, float *pActual_bitrate = NULL); ``` You can also transcode/uncompress DDS/CRN files to raw 32bpp images using `crn_decompress_crn_to_dds()` and `crn_decompress_dds_to_images()`. Internally, crnlib just uses inc/crn_decomp.h to transcode textures to DXTn. If you only need to transcode CRN format files to raw DXTn bits at runtime (and not compress), you don't actually need to compile or link against crnlib at all. Just include inc/crn_decomp.h, which contains a completely self-contained CRN transcoder in the "crnd" namespace. The `crnd_get_texture_info()`, `crnd_unpack_begin()`, `crnd_unpack_level()`, etc. functions are all you need to efficiently get at the raw DXTn bits, which can be directly supplied to whatever API or GPU you're using. (See example2.) Important note: When compiling under native client, be sure to define the `PLATFORM_NACL` macro before including the `inc/crn_decomp.h` header file library. ## Known issues/Bugs * crnlib currently assumes you'll be further losslessly compressing its output DDS files using LZMA. However, some engines use weaker codecs such as LZO, zlib, or custom codecs, so crnlib's bitrate measurements will be inaccurate. It should be easy to allow the caller to plug-in custom lossless compressors for bitrate measurement. * Compressing to a desired bitrate can be time consuming, especially when processing large (2k or 4k) images to the CRN format. There are several high-level optimizations employed when compressing to clustered DXTn DDS files using multiple trials, but not so for CRN. * The CRN compressor does not currently use 3 color (transparent) DXT1 blocks at all, only 4 color blocks. So it doesn't support DXT1A transparency, and its output quality suffers a little due to this limitation. (Note that the clustered DXTn compressor used when writing clustered DDS files does _not_ have this limitation.) * Clustered DXT5/DXT5A compressor is able to group DXT5A blocks into clusters only if they use absolute (black/white) selector indices. This hurts performance at very low bitrates, because too many bits are effectively given to alpha. * DXT3 is not supported when writing CRN or clustered DXTn DDS files. (DXT3 is supported by crnlib's when compressing to regular DXTn DDS files.) You'll get DXT5 files if you request DXT3. However, DXT3 is supported by the regular DXTn block compressor. (DXT3's 4bpp fixed alpha sucks verses DXT5 alpha blocks, so I don't see this as a bug deal.) * The DXT5_CCXY format uses a simple YCoCg encoding that is workable but hasn't been tuned for max. quality yet. * Clustered (or rate distortion optimized) DXTc compression is only supported when writing to DDS, not KTX. Also, only plain block by block compression is supported when writing to ETC1, and CRN does not support ETC1. ## Compile to Javascript and WebAssembly with Emscripten Download and install Emscripten: - https://emscripten.org/docs/getting_started/downloads.html From the repository directory, run: ```sh cmake -Semscripten -Bbuild-emscripten cmake --build build-emscripten ``` You’ll then find a `crunch.js` script and a `crunch.wasm` binary in the `build-emscripten/` folder. DaemonEngine-crunch-ef4d32f/cmake/000077500000000000000000000000001503722002600170775ustar00rootroot00000000000000DaemonEngine-crunch-ef4d32f/cmake/cross-toolchain-mingw32.cmake000066400000000000000000000010501503722002600244700ustar00rootroot00000000000000# Target operating system and architecture set( CMAKE_SYSTEM_NAME Windows ) set( CMAKE_SYSTEM_PROCESSOR x86 ) # C/C++ compilers set( CMAKE_C_COMPILER i686-w64-mingw32-gcc ) set( CMAKE_CXX_COMPILER i686-w64-mingw32-g++ ) set( CMAKE_RC_COMPILER i686-w64-mingw32-windres ) # Target prefix set( CMAKE_FIND_ROOT_PATH /usr/i686-w64-mingw32 ) # Find programs using host paths and headers/libraries using target paths set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER ) set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY ) set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY ) DaemonEngine-crunch-ef4d32f/cmake/cross-toolchain-mingw64.cmake000066400000000000000000000010631503722002600245010ustar00rootroot00000000000000# Target operating system and architecture set( CMAKE_SYSTEM_NAME Windows ) set( CMAKE_SYSTEM_PROCESSOR x86_64 ) # C/C++ compilers set( CMAKE_C_COMPILER x86_64-w64-mingw32-gcc ) set( CMAKE_CXX_COMPILER x86_64-w64-mingw32-g++ ) set( CMAKE_RC_COMPILER x86_64-w64-mingw32-windres ) # Target prefix set( CMAKE_FIND_ROOT_PATH /usr/x86_64-w64-mingw32 ) # Find programs using host paths and headers/libraries using target paths set( CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER ) set( CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY ) set( CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY ) DaemonEngine-crunch-ef4d32f/crnlib/000077500000000000000000000000001503722002600172705ustar00rootroot00000000000000DaemonEngine-crunch-ef4d32f/crnlib/CMakeLists.txt000066400000000000000000000234461503722002600220410ustar00rootroot00000000000000include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/../inc ) # Defines the source code for the library set(CRNLIB_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/crn_arealist.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_arealist.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_assert.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_assert.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_buffer_stream.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_cfile_stream.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_checksum.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_checksum.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_clusterizer.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_color.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_colorized_console.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_colorized_console.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_command_line_params.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_command_line_params.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_comp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_comp.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_console.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_console.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_core.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_data_stream.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_data_stream.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_data_stream_serializer.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_dds_comp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_dds_comp.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_decomp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_dxt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_dxt.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_dxt1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_dxt1.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_dxt5a.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_dxt5a.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_dxt_endpoint_refiner.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_dxt_endpoint_refiner.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_dxt_fast.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_dxt_fast.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_dxt_hc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_dxt_hc.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_dxt_hc_common.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_dxt_hc_common.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_dxt_image.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_dxt_image.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_dynamic_stream.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_dynamic_string.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_dynamic_string.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_etc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_etc.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_file_utils.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_file_utils.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_find_files.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_find_files.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_hash.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_hash.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_hash_map.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_hash_map.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_helpers.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_huffman_codes.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_huffman_codes.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_image.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_image_utils.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_image_utils.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_intersect.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_jpgd.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_jpgd.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_jpge.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_jpge.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_ktx_texture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_ktx_texture.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_lzma_codec.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_lzma_codec.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_math.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_math.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_matrix.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_mem.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_mem.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_miniz.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_miniz.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_mipmapped_texture.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_mipmapped_texture.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_packed_uint.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_pixel_format.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_pixel_format.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_platform.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_platform.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_prefix_coding.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_prefix_coding.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_qdxt1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_qdxt1.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_qdxt5.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_qdxt5.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_rand.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_rand.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_ray.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_rect.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_resample_filters.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_resample_filters.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_resampler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_resampler.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_rg_etc1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_rg_etc1.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_ryg_dxt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_ryg_dxt.hpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_ryg_types.hpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_sparse_array.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_sparse_bit_array.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_sparse_bit_array.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_stb_image.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_strutils.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_strutils.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_symbol_codec.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_symbol_codec.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_texture_comp.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_texture_comp.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_texture_conversion.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_texture_conversion.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_texture_file_types.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_texture_file_types.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_threaded_clusterizer.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_threaded_resampler.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_threaded_resampler.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_threading.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_threading_null.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_timer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_timer.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_traits.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_tree_clusterizer.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_types.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_utils.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_utils.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_value.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_value.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_vec.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_vec_interval.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_vector.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_vector.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_vector2d.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_winhdr.h ${CMAKE_CURRENT_SOURCE_DIR}/crnlib.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lzma_7zBuf.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lzma_7zBuf.h ${CMAKE_CURRENT_SOURCE_DIR}/lzma_7zBuf2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lzma_7zCrc.h ${CMAKE_CURRENT_SOURCE_DIR}/lzma_7zFile.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lzma_7zFile.h ${CMAKE_CURRENT_SOURCE_DIR}/lzma_7zStream.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lzma_7zVersion.h ${CMAKE_CURRENT_SOURCE_DIR}/lzma_Alloc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lzma_Alloc.h ${CMAKE_CURRENT_SOURCE_DIR}/lzma_Bcj2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lzma_Bcj2.h ${CMAKE_CURRENT_SOURCE_DIR}/lzma_Bra.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lzma_Bra.h ${CMAKE_CURRENT_SOURCE_DIR}/lzma_Bra86.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lzma_BraIA64.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lzma_CpuArch.h ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzFind.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzFind.h ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzHash.h ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzmaDec.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzmaDec.h ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzmaEnc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzmaEnc.h ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzmaLib.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzmaLib.h ${CMAKE_CURRENT_SOURCE_DIR}/lzma_MyVersion.h ${CMAKE_CURRENT_SOURCE_DIR}/lzma_Types.h ${CMAKE_CURRENT_SOURCE_DIR}/stb_image.h ${CMAKE_CURRENT_SOURCE_DIR}/stb_image_write.h ${CMAKE_CURRENT_SOURCE_DIR}/../inc/crn_decomp.h ${CMAKE_CURRENT_SOURCE_DIR}/../inc/crnlib.h ${CMAKE_CURRENT_SOURCE_DIR}/../inc/dds_defs.h ) if(WIN32) set(CRNLIB_THREAD_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzFindMt.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lzma_LzFindMt.h ${CMAKE_CURRENT_SOURCE_DIR}/lzma_Threads.cpp ${CMAKE_CURRENT_SOURCE_DIR}/lzma_Threads.h ${CMAKE_CURRENT_SOURCE_DIR}/crn_threading_win32.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_threading_win32.h ) else() set(CRNLIB_THREAD_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/crn_threading_pthreads.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crn_threading_pthreads.h ) endif() set(CRUNCH_OBJECT_NAME ${CRUNCH_LIBRARY_NAME}-obj) add_library(${CRUNCH_OBJECT_NAME} OBJECT ${CRNLIB_SRCS} ${CRNLIB_THREAD_SRCS}) set_property(TARGET ${CRUNCH_OBJECT_NAME} PROPERTY POSITION_INDEPENDENT_CODE 1) if(BUILD_SHARED_LIBS) set(CRUNCH_DEFAULT_LIBRARY_TYPE SHARED) set(CRUNCH_OTHER_LIBRARY_TYPE STATIC) set(CRUNCH_OTHER_LIBRARY_SUFFIX -static) else() set(CRUNCH_DEFAULT_LIBRARY_TYPE STATIC) set(CRUNCH_OTHER_LIBRARY_TYPE SHARED) set(CRUNCH_OTHER_LIBRARY_SUFFIX -shared) endif() add_library(${CRUNCH_LIBRARY_NAME} ${CRUNCH_DEFAULT_LIBRARY_TYPE} $) set_property(TARGET ${CRUNCH_LIBRARY_NAME} PROPERTY POSITION_INDEPENDENT_CODE 1) if (BUILD_SHARED_LIBS OR (BUILD_STATIC_LIBCRN AND INSTALL_STATIC_LIBCRN)) install(TARGETS ${CRUNCH_LIBRARY_NAME} DESTINATION lib) endif() if ((BUILD_SHARED_LIBS AND BUILD_STATIC_LIBCRN) OR (NOT BUILD_SHARED_LIBS AND BUILD_SHARED_LIBCRN)) set(CRUNCH_OTHER_LIBRARY_NAME ${CRUNCH_LIBRARY_NAME}${CRUNCH_OTHER_LIBRARY_SUFFIX}) add_library(${CRUNCH_OTHER_LIBRARY_NAME} ${CRUNCH_OTHER_LIBRARY_TYPE} $) set_property(TARGET ${CRUNCH_OTHER_LIBRARY_NAME} PROPERTY POSITION_INDEPENDENT_CODE 1) if(NOT WIN32) set_target_properties(${CRUNCH_OTHER_LIBRARY_NAME} PROPERTIES OUTPUT_NAME ${CRUNCH_LIBRARY_NAME}) endif() install(TARGETS ${CRUNCH_OTHER_LIBRARY_NAME} DESTINATION lib) endif() DaemonEngine-crunch-ef4d32f/crnlib/Makefile000066400000000000000000000046201503722002600207320ustar00rootroot00000000000000.DEFAULT_GOAL := all .PHONY: all clean CXX = g++ COMPILE_CPU_OPTIONS = -march=core2 COMPILE_DEBUG_OPTIONS = -g COMPILE_OPTIMIZATION_OPTIONS = -O3 -fomit-frame-pointer -ffast-math -fno-math-errno -fno-strict-aliasing COMPILE_WARN_OPTIONS = -Wall -Wno-unused-value -Wno-unused COMPILE_OPTIONS = -std=c++11 $(COMPILE_CPU_OPTIONS) $(COMPILE_DEBUG_OPTIONS) $(COMPILE_OPTIMIZATION_OPTIONS) $(COMPILE_WARN_OPTIONS) LINKER_OPTIONS = -lpthread -g OBJECTS = \ crn_arealist.o \ crn_assert.o \ crn_checksum.o \ crn_colorized_console.o \ crn_command_line_params.o \ crn_comp.o \ crn_console.o \ crn_core.o \ crn_data_stream.o \ crn_mipmapped_texture.o \ crn_decomp.o \ crn_dxt1.o \ crn_dxt5a.o \ crn_dxt.o \ crn_dxt_endpoint_refiner.o \ crn_dxt_fast.o \ crn_dxt_hc_common.o \ crn_dxt_hc.o \ crn_dxt_image.o \ crn_dynamic_string.o \ crn_file_utils.o \ crn_find_files.o \ crn_hash.o \ crn_hash_map.o \ crn_huffman_codes.o \ crn_image_utils.o \ crnlib.o \ crn_math.o \ crn_mem.o \ crn_pixel_format.o \ crn_platform.o \ crn_prefix_coding.o \ crn_qdxt1.o \ crn_qdxt5.o \ crn_rand.o \ crn_resample_filters.o \ crn_resampler.o \ crn_ryg_dxt.o \ crn_sparse_bit_array.o \ crn_stb_image.o \ crn_strutils.o \ crn_symbol_codec.o \ crn_texture_file_types.o \ crn_threaded_resampler.o \ crn_threading_pthreads.o \ crn_timer.o \ crn_utils.o \ crn_value.o \ crn_vector.o \ crn_texture_comp.o \ crn_texture_conversion.o \ crn_dds_comp.o \ crn_lzma_codec.o \ crn_ktx_texture.o \ crn_etc.o \ crn_rg_etc1.o \ crn_miniz.o \ crn_jpge.o \ crn_jpgd.o \ lzma_7zBuf2.o \ lzma_7zBuf.o \ lzma_7zCrc.o \ lzma_7zFile.o \ lzma_7zStream.o \ lzma_Alloc.o \ lzma_Bcj2.o \ lzma_Bra86.o \ lzma_Bra.o \ lzma_BraIA64.o \ lzma_LzFind.o \ lzma_LzmaDec.o \ lzma_LzmaEnc.o \ lzma_LzmaLib.o all: crunch %.o: %.cpp $(CXX) $< -o $@ -c $(COMPILE_OPTIONS) crunch.o: ../crunch/crunch.cpp $(CXX) $< -o $@ -c -I../inc -I../crnlib $(COMPILE_OPTIONS) corpus_gen.o: ../crunch/corpus_gen.cpp $(CXX) $< -o $@ -c -I../inc -I../crnlib $(COMPILE_OPTIONS) corpus_test.o: ../crunch/corpus_test.cpp $(CXX) $< -o $@ -c -I../inc -I../crnlib $(COMPILE_OPTIONS) crunch: $(OBJECTS) crunch.o corpus_gen.o corpus_test.o $(CXX) $(OBJECTS) crunch.o corpus_gen.o corpus_test.o -o crunch $(LINKER_OPTIONS) clean: rm $(OBJECTS) crunch.o corpus_gen.o corpus_test.o crunch DaemonEngine-crunch-ef4d32f/crnlib/crn_arealist.cpp000066400000000000000000000415521503722002600224510ustar00rootroot00000000000000// File: crn_arealist.cpp - 2D shape algebra (currently unused) // See Copyright Notice and license at the end of inc/crnlib.h // Ported from the PowerView DOS image viewer, a product I wrote back in 1993. Not currently used in the open source release of crnlib. #include "crn_core.h" #include "crn_arealist.h" #define RECT_DEBUG namespace crnlib { static void area_fatal_error(const char*, const char* pMsg, ...) { va_list args; va_start(args, pMsg); char buf[512]; crnlib_vsnprintf(buf, sizeof(buf), pMsg, args); va_end(args); CRNLIB_FAIL(buf); } static Area* delete_area(Area_List* Plist, Area* Parea) { Area *p, *q; #ifdef RECT_DEBUG if ((Parea == Plist->Phead) || (Parea == Plist->Ptail)) area_fatal_error("delete_area", "tried to remove head or tail"); #endif p = Parea->Pprev; q = Parea->Pnext; p->Pnext = q; q->Pprev = p; Parea->Pnext = Plist->Pfree; Parea->Pprev = NULL; Plist->Pfree = Parea; return (q); } static Area* alloc_area(Area_List* Plist) { Area* p = Plist->Pfree; if (p == NULL) { if (Plist->next_free == Plist->total_areas) area_fatal_error("alloc_area", "Out of areas!"); p = Plist->Phead + Plist->next_free; Plist->next_free++; } else Plist->Pfree = p->Pnext; return (p); } static Area* insert_area_before(Area_List* Plist, Area* Parea, int x1, int y1, int x2, int y2) { Area *p, *Pnew_area = alloc_area(Plist); p = Parea->Pprev; p->Pnext = Pnew_area; Pnew_area->Pprev = p; Pnew_area->Pnext = Parea; Parea->Pprev = Pnew_area; Pnew_area->x1 = x1; Pnew_area->y1 = y1; Pnew_area->x2 = x2; Pnew_area->y2 = y2; return (Pnew_area); } static Area* insert_area_after(Area_List* Plist, Area* Parea, int x1, int y1, int x2, int y2) { Area *p, *Pnew_area = alloc_area(Plist); p = Parea->Pnext; p->Pprev = Pnew_area; Pnew_area->Pnext = p; Pnew_area->Pprev = Parea; Parea->Pnext = Pnew_area; Pnew_area->x1 = x1; Pnew_area->y1 = y1; Pnew_area->x2 = x2; Pnew_area->y2 = y2; return (Pnew_area); } void Area_List_deinit(Area_List* Pobj_base) { Area_List* Plist = (Area_List*)Pobj_base; if (!Plist) return; if (Plist->Phead) { crnlib_free(Plist->Phead); Plist->Phead = NULL; } crnlib_free(Plist); } Area_List* Area_List_init(int max_areas) { Area_List* Plist = (Area_List*)crnlib_calloc(1, sizeof(Area_List)); Plist->total_areas = max_areas + 2; Plist->Phead = (Area*)crnlib_calloc(max_areas + 2, sizeof(Area)); Plist->Ptail = Plist->Phead + 1; Plist->Phead->Pprev = NULL; Plist->Phead->Pnext = Plist->Ptail; Plist->Ptail->Pprev = Plist->Phead; Plist->Ptail->Pnext = NULL; Plist->Pfree = NULL; Plist->next_free = 2; return (Plist); } void Area_List_print(Area_List* Plist) { Area* Parea = Plist->Phead->Pnext; while (Parea != Plist->Ptail) { printf("%04i %04i : %04i %04i\n", Parea->x1, Parea->y1, Parea->x2, Parea->y2); Parea = Parea->Pnext; } } Area_List* Area_List_dup_new(Area_List* Plist, int x_ofs, int y_ofs) { int i; Area_List* Pnew_list = (Area_List*)crnlib_calloc(1, sizeof(Area_List)); Pnew_list->total_areas = Plist->total_areas; Pnew_list->Phead = (Area*)crnlib_malloc(sizeof(Area) * Plist->total_areas); Pnew_list->Ptail = Pnew_list->Phead + 1; Pnew_list->Pfree = (Plist->Pfree) ? ((Plist->Pfree - Plist->Phead) + Pnew_list->Phead) : NULL; Pnew_list->next_free = Plist->next_free; memcpy(Pnew_list->Phead, Plist->Phead, sizeof(Area) * Plist->total_areas); for (i = 0; i < Plist->total_areas; i++) { Pnew_list->Phead[i].Pnext = (Plist->Phead[i].Pnext == NULL) ? NULL : (Plist->Phead[i].Pnext - Plist->Phead) + Pnew_list->Phead; Pnew_list->Phead[i].Pprev = (Plist->Phead[i].Pprev == NULL) ? NULL : (Plist->Phead[i].Pprev - Plist->Phead) + Pnew_list->Phead; Pnew_list->Phead[i].x1 += x_ofs; Pnew_list->Phead[i].y1 += y_ofs; Pnew_list->Phead[i].x2 += x_ofs; Pnew_list->Phead[i].y2 += y_ofs; } return (Pnew_list); } uint Area_List_get_num(Area_List* Plist) { uint num = 0; Area* Parea = Plist->Phead->Pnext; while (Parea != Plist->Ptail) { num++; Parea = Parea->Pnext; } return num; } void Area_List_dup(Area_List* Psrc_list, Area_List* Pdst_list, int x_ofs, int y_ofs) { int i; if (Psrc_list->total_areas != Pdst_list->total_areas) area_fatal_error("Area_List_dup", "Src and Dst total_areas must be equal!"); Pdst_list->Pfree = (Psrc_list->Pfree) ? ((Psrc_list->Pfree - Psrc_list->Phead) + Pdst_list->Phead) : NULL; Pdst_list->next_free = Psrc_list->next_free; memcpy(Pdst_list->Phead, Psrc_list->Phead, sizeof(Area) * Psrc_list->total_areas); if ((x_ofs) || (y_ofs)) { for (i = 0; i < Psrc_list->total_areas; i++) { Pdst_list->Phead[i].Pnext = (Psrc_list->Phead[i].Pnext == NULL) ? NULL : (Psrc_list->Phead[i].Pnext - Psrc_list->Phead) + Pdst_list->Phead; Pdst_list->Phead[i].Pprev = (Psrc_list->Phead[i].Pprev == NULL) ? NULL : (Psrc_list->Phead[i].Pprev - Psrc_list->Phead) + Pdst_list->Phead; Pdst_list->Phead[i].x1 += x_ofs; Pdst_list->Phead[i].y1 += y_ofs; Pdst_list->Phead[i].x2 += x_ofs; Pdst_list->Phead[i].y2 += y_ofs; } } else { for (i = 0; i < Psrc_list->total_areas; i++) { Pdst_list->Phead[i].Pnext = (Psrc_list->Phead[i].Pnext == NULL) ? NULL : (Psrc_list->Phead[i].Pnext - Psrc_list->Phead) + Pdst_list->Phead; Pdst_list->Phead[i].Pprev = (Psrc_list->Phead[i].Pprev == NULL) ? NULL : (Psrc_list->Phead[i].Pprev - Psrc_list->Phead) + Pdst_list->Phead; } } } void Area_List_copy( Area_List* Psrc_list, Area_List* Pdst_list, int x_ofs, int y_ofs) { Area* Parea = Psrc_list->Phead->Pnext; Area_List_clear(Pdst_list); if ((x_ofs) || (y_ofs)) { Area* Pprev_area = Pdst_list->Phead; while (Parea != Psrc_list->Ptail) { // Area *p, *Pnew_area; Area* Pnew_area; if (Pdst_list->next_free == Pdst_list->total_areas) area_fatal_error("Area_List_copy", "Out of areas!"); Pnew_area = Pdst_list->Phead + Pdst_list->next_free; Pdst_list->next_free++; Pnew_area->Pprev = Pprev_area; Pprev_area->Pnext = Pnew_area; Pnew_area->x1 = Parea->x1 + x_ofs; Pnew_area->y1 = Parea->y1 + y_ofs; Pnew_area->x2 = Parea->x2 + x_ofs; Pnew_area->y2 = Parea->y2 + y_ofs; Pprev_area = Pnew_area; Parea = Parea->Pnext; } Pprev_area->Pnext = Pdst_list->Ptail; } else { #if 0 while (Parea != Psrc_list->Ptail) { insert_area_after(Pdst_list, Pdst_list->Phead, Parea->x1, Parea->y1, Parea->x2, Parea->y2); Parea = Parea->Pnext; } #endif Area* Pprev_area = Pdst_list->Phead; while (Parea != Psrc_list->Ptail) { // Area *p, *Pnew_area; Area* Pnew_area; if (Pdst_list->next_free == Pdst_list->total_areas) area_fatal_error("Area_List_copy", "Out of areas!"); Pnew_area = Pdst_list->Phead + Pdst_list->next_free; Pdst_list->next_free++; Pnew_area->Pprev = Pprev_area; Pprev_area->Pnext = Pnew_area; Pnew_area->x1 = Parea->x1; Pnew_area->y1 = Parea->y1; Pnew_area->x2 = Parea->x2; Pnew_area->y2 = Parea->y2; Pprev_area = Pnew_area; Parea = Parea->Pnext; } Pprev_area->Pnext = Pdst_list->Ptail; } } void Area_List_clear(Area_List* Plist) { Plist->Phead->Pnext = Plist->Ptail; Plist->Ptail->Pprev = Plist->Phead; Plist->Pfree = NULL; Plist->next_free = 2; } void Area_List_set(Area_List* Plist, int x1, int y1, int x2, int y2) { Plist->Pfree = NULL; Plist->Phead[2].x1 = x1; Plist->Phead[2].y1 = y1; Plist->Phead[2].x2 = x2; Plist->Phead[2].y2 = y2; Plist->Phead[2].Pprev = Plist->Phead; Plist->Phead->Pnext = Plist->Phead + 2; Plist->Phead[2].Pnext = Plist->Ptail; Plist->Ptail->Pprev = Plist->Phead + 2; Plist->next_free = 3; } void Area_List_remove(Area_List* Plist, int x1, int y1, int x2, int y2) { int l, h; Area* Parea = Plist->Phead->Pnext; #ifdef RECT_DEBUG if ((x1 > x2) || (y1 > y2)) area_fatal_error("area_list_remove", "invalid coords: %i %i %i %i", x1, y1, x2, y2); #endif while (Parea != Plist->Ptail) { // Not touching if ((x2 < Parea->x1) || (x1 > Parea->x2) || (y2 < Parea->y1) || (y1 > Parea->y2)) { Parea = Parea->Pnext; continue; } // Completely covers if ((x1 <= Parea->x1) && (x2 >= Parea->x2) && (y1 <= Parea->y1) && (y2 >= Parea->y2)) { if ((x1 == Parea->x1) && (x2 == Parea->x2) && (y1 == Parea->y1) && (y2 == Parea->y2)) { delete_area(Plist, Parea); return; } Parea = delete_area(Plist, Parea); continue; } // top if (y1 > Parea->y1) { insert_area_before(Plist, Parea, Parea->x1, Parea->y1, Parea->x2, y1 - 1); } // bottom if (y2 < Parea->y2) { insert_area_before(Plist, Parea, Parea->x1, y2 + 1, Parea->x2, Parea->y2); } l = math::maximum(y1, Parea->y1); h = math::minimum(y2, Parea->y2); // left middle if (x1 > Parea->x1) { insert_area_before(Plist, Parea, Parea->x1, l, x1 - 1, h); } // right middle if (x2 < Parea->x2) { insert_area_before(Plist, Parea, x2 + 1, l, Parea->x2, h); } // early out - we know there's nothing else to remove, as areas can // never overlap if ((x1 >= Parea->x1) && (x2 <= Parea->x2) && (y1 >= Parea->y1) && (y2 <= Parea->y2)) { delete_area(Plist, Parea); return; } Parea = delete_area(Plist, Parea); } } void Area_List_insert(Area_List* Plist, int x1, int y1, int x2, int y2, bool combine) { Area* Parea = Plist->Phead->Pnext; #ifdef RECT_DEBUG if ((x1 > x2) || (y1 > y2)) area_fatal_error("Area_List_insert", "invalid coords: %i %i %i %i", x1, y1, x2, y2); #endif while (Parea != Plist->Ptail) { // totally covers if ((x1 <= Parea->x1) && (x2 >= Parea->x2) && (y1 <= Parea->y1) && (y2 >= Parea->y2)) { Parea = delete_area(Plist, Parea); continue; } // intersects if ((x2 >= Parea->x1) && (x1 <= Parea->x2) && (y2 >= Parea->y1) && (y1 <= Parea->y2)) { int ax1, ay1, ax2, ay2; ax1 = Parea->x1; ay1 = Parea->y1; ax2 = Parea->x2; ay2 = Parea->y2; if (x1 < ax1) Area_List_insert(Plist, x1, math::maximum(y1, ay1), ax1 - 1, math::minimum(y2, ay2), combine); if (x2 > ax2) Area_List_insert(Plist, ax2 + 1, math::maximum(y1, ay1), x2, math::minimum(y2, ay2), combine); if (y1 < ay1) Area_List_insert(Plist, x1, y1, x2, ay1 - 1, combine); if (y2 > ay2) Area_List_insert(Plist, x1, ay2 + 1, x2, y2, combine); return; } if (combine) { if ((x1 == Parea->x1) && (x2 == Parea->x2)) { if ((y2 == Parea->y1 - 1) || (y1 == Parea->y2 + 1)) { delete_area(Plist, Parea); Area_List_insert(Plist, x1, math::minimum(y1, Parea->y1), x2, math::maximum(y2, Parea->y2), CRNLIB_TRUE); return; } } else if ((y1 == Parea->y1) && (y2 == Parea->y2)) { if ((x2 == Parea->x1 - 1) || (x1 == Parea->x2 + 1)) { delete_area(Plist, Parea); Area_List_insert(Plist, math::minimum(x1, Parea->x1), y1, math::maximum(x2, Parea->x2), y2, CRNLIB_TRUE); return; } } } Parea = Parea->Pnext; } insert_area_before(Plist, Parea, x1, y1, x2, y2); } void Area_List_intersect_area(Area_List* Plist, int x1, int y1, int x2, int y2) { Area* Parea = Plist->Phead->Pnext; while (Parea != Plist->Ptail) { // doesn't cover if ((x2 < Parea->x1) || (x1 > Parea->x2) || (y2 < Parea->y1) || (y1 > Parea->y2)) { Parea = delete_area(Plist, Parea); continue; } // totally covers if ((x1 <= Parea->x1) && (x2 >= Parea->x2) && (y1 <= Parea->y1) && (y2 >= Parea->y2)) { Parea = Parea->Pnext; continue; } // Oct 21- should insert after, because deleted area will access the NEXT area! // insert_area_after(Plist, Parea, // math::maximum(x1, Parea->x1), // math::maximum(y1, Parea->y1), // math::minimum(x2, Parea->x2), // math::minimum(y2, Parea->y2)); insert_area_before(Plist, Parea, math::maximum(x1, Parea->x1), math::maximum(y1, Parea->y1), math::minimum(x2, Parea->x2), math::minimum(y2, Parea->y2)); Parea = delete_area(Plist, Parea); } } #if 0 void Area_List_intersect_Area_List( Area_List *Pouter_list, Area_List *Pinner_list, Area_List *Pdst_list) { Area *Parea1 = Pouter_list->Phead->Pnext; while (Parea1 != Pouter_list->Ptail) { Area *Parea2 = Pinner_list->Phead->Pnext; int x1, y1, x2, y2; x1 = Parea1->x1; x2 = Parea1->x2; y1 = Parea1->y1; y2 = Parea1->y2; while (Parea2 != Pinner_list->Ptail) { if ((x1 <= Parea2->x2) && (x2 >= Parea2->x1) && (y1 <= Parea2->y2) && (y2 >= Parea2->y1)) { insert_area_after(Pdst_list, Pdst_list->Phead, math::maximum(x1, Parea2->x1), math::maximum(y1, Parea2->y1), math::minimum(x2, Parea2->x2), math::minimum(y2, Parea2->y2)); } Parea2 = Parea2->Pnext; } Parea1 = Parea1->Pnext; } } #endif #if 1 void Area_List_intersect_Area_List(Area_List* Pouter_list, Area_List* Pinner_list, Area_List* Pdst_list) { Area* Parea1 = Pouter_list->Phead->Pnext; while (Parea1 != Pouter_list->Ptail) { Area* Parea2 = Pinner_list->Phead->Pnext; int x1, y1, x2, y2; x1 = Parea1->x1; x2 = Parea1->x2; y1 = Parea1->y1; y2 = Parea1->y2; while (Parea2 != Pinner_list->Ptail) { if ((x1 <= Parea2->x2) && (x2 >= Parea2->x1) && (y1 <= Parea2->y2) && (y2 >= Parea2->y1)) { int nx1, ny1, nx2, ny2; nx1 = math::maximum(x1, Parea2->x1); ny1 = math::maximum(y1, Parea2->y1); nx2 = math::minimum(x2, Parea2->x2); ny2 = math::minimum(y2, Parea2->y2); if (Pdst_list->Phead->Pnext == Pdst_list->Ptail) { insert_area_after(Pdst_list, Pdst_list->Phead, nx1, ny1, nx2, ny2); } else { Area_Ptr Ptemp = Pdst_list->Phead->Pnext; if ((Ptemp->x1 == nx1) && (Ptemp->x2 == nx2)) { if (Ptemp->y1 == (ny2 + 1)) { Ptemp->y1 = ny1; goto next; } else if (Ptemp->y2 == (ny1 - 1)) { Ptemp->y2 = ny2; goto next; } } else if ((Ptemp->y1 == ny1) && (Ptemp->y2 == ny2)) { if (Ptemp->x1 == (nx2 + 1)) { Ptemp->x1 = nx1; goto next; } else if (Ptemp->x2 == (nx1 - 1)) { Ptemp->x2 = nx2; goto next; } } insert_area_after(Pdst_list, Pdst_list->Phead, nx1, ny1, nx2, ny2); } } next: Parea2 = Parea2->Pnext; } Parea1 = Parea1->Pnext; } } #endif Area_List_Ptr Area_List_create_optimal(Area_List_Ptr Plist) { Area_Ptr Parea = Plist->Phead->Pnext, Parea_after; int num = 2; Area_List_Ptr Pnew_list; while (Parea != Plist->Ptail) { num++; Parea = Parea->Pnext; } Pnew_list = Area_List_init(num); Parea = Plist->Phead->Pnext; Parea_after = Pnew_list->Phead; while (Parea != Plist->Ptail) { Parea_after = insert_area_after(Pnew_list, Parea_after, Parea->x1, Parea->y1, Parea->x2, Parea->y2); Parea = Parea->Pnext; } return (Pnew_list); } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_arealist.h000066400000000000000000000037261503722002600221170ustar00rootroot00000000000000// File: crn_arealist.h - 2D shape algebra // See Copyright Notice and license at the end of inc/crnlib.h #pragma once namespace crnlib { struct Area { struct Area *Pprev, *Pnext; int x1, y1, x2, y2; uint get_width() const { return x2 - x1 + 1; } uint get_height() const { return y2 - y1 + 1; } uint get_area() const { return get_width() * get_height(); } }; typedef Area* Area_Ptr; struct Area_List { int total_areas; int next_free; Area *Phead, *Ptail, *Pfree; }; typedef Area_List* Area_List_Ptr; Area_List* Area_List_init(int max_areas); void Area_List_deinit(Area_List* Pobj_base); void Area_List_print(Area_List* Plist); Area_List* Area_List_dup_new(Area_List* Plist, int x_ofs, int y_ofs); uint Area_List_get_num(Area_List* Plist); // src and dst area lists must have the same number of total areas. void Area_List_dup(Area_List* Psrc_list, Area_List* Pdst_list, int x_ofs, int y_ofs); void Area_List_copy(Area_List* Psrc_list, Area_List* Pdst_list, int x_ofs, int y_ofs); void Area_List_clear(Area_List* Plist); void Area_List_set(Area_List* Plist, int x1, int y1, int x2, int y2); // logical: x and (not y) void Area_List_remove(Area_List* Plist, int x1, int y1, int x2, int y2); // logical: x or y void Area_List_insert(Area_List* Plist, int x1, int y1, int x2, int y2, bool combine); // logical: x and y void Area_List_intersect_area(Area_List* Plist, int x1, int y1, int x2, int y2); // logical: x and y void Area_List_intersect_Area_List(Area_List* Pouter_list, Area_List* Pinner_list, Area_List* Pdst_list); Area_List_Ptr Area_List_create_optimal(Area_List_Ptr Plist); } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_assert.cpp000066400000000000000000000027161503722002600221450ustar00rootroot00000000000000// File: crn_assert.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #if CRNLIB_USE_WIN32_API #include "crn_winhdr.h" #endif static bool g_fail_exceptions; static bool g_exit_on_failure = true; void crnlib_enable_fail_exceptions(bool enabled) { g_fail_exceptions = enabled; } void crnlib_assert(const char* pExp, const char* pFile, unsigned line) { char buf[512]; crnlib_snprintf(buf, sizeof(buf), "%s(%u): Assertion failed: \"%s\"\n", pFile, line, pExp); crnlib_output_debug_string(buf); fputs(buf, stderr); if (crnlib_is_debugger_present()) crnlib_debug_break(); } void crnlib_fail(const char* pExp, const char* pFile, unsigned line) { char buf[512]; crnlib_snprintf(buf, sizeof(buf), "%s(%u): Failure: \"%s\"\n", pFile, line, pExp); crnlib_output_debug_string(buf); fputs(buf, stderr); if (crnlib_is_debugger_present()) crnlib_debug_break(); #if CRNLIB_USE_WIN32_API if (g_fail_exceptions) RaiseException(CRNLIB_FAIL_EXCEPTION_CODE, 0, 0, NULL); else #endif if (g_exit_on_failure) exit(EXIT_FAILURE); } void trace(const char* pFmt, va_list args) { if (crnlib_is_debugger_present()) { char buf[512]; crnlib_snprintf(buf, sizeof(buf), pFmt, args); crnlib_output_debug_string(buf); } }; void trace(const char* pFmt, ...) { va_list args; va_start(args, pFmt); trace(pFmt, args); va_end(args); }; DaemonEngine-crunch-ef4d32f/crnlib/crn_assert.h000066400000000000000000000030431503722002600216040ustar00rootroot00000000000000// File: crn_assert.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once const unsigned int CRNLIB_FAIL_EXCEPTION_CODE = 256U; void crnlib_enable_fail_exceptions(bool enabled); void crnlib_assert(const char* pExp, const char* pFile, unsigned line); void crnlib_fail(const char* pExp, const char* pFile, unsigned line); #ifdef NDEBUG #define CRNLIB_ASSERT(x) ((void)0) #undef CRNLIB_ASSERTS_ENABLED #else #define CRNLIB_ASSERT(_exp) (void)((!!(_exp)) || (crnlib_assert(#_exp, __FILE__, __LINE__), 0)) #define CRNLIB_ASSERTS_ENABLED #endif #define CRNLIB_VERIFY(_exp) (void)((!!(_exp)) || (crnlib_assert(#_exp, __FILE__, __LINE__), 0)) #define CRNLIB_FAIL(msg) \ do { \ crnlib_fail(#msg, __FILE__, __LINE__); \ } while (0) #define CRNLIB_ASSERT_OPEN_RANGE(x, l, h) CRNLIB_ASSERT((x >= l) && (x < h)) #define CRNLIB_ASSERT_CLOSED_RANGE(x, l, h) CRNLIB_ASSERT((x >= l) && (x <= h)) void trace(const char* pFmt, va_list args); void trace(const char* pFmt, ...); #define CRNLIB_ASSUME(p) static_assert(p, "") #ifdef NDEBUG template inline T crnlib_assert_range(T i, T) { return i; } template inline T crnlib_assert_range_incl(T i, T) { return i; } #else template inline T crnlib_assert_range(T i, T m) { CRNLIB_ASSERT((i >= 0) && (i < m)); return i; } template inline T crnlib_assert_range_incl(T i, T m) { CRNLIB_ASSERT((i >= 0) && (i <= m)); return i; } #endif DaemonEngine-crunch-ef4d32f/crnlib/crn_atomics.h000066400000000000000000000141621503722002600217460ustar00rootroot00000000000000// File: crn_atomics.h #ifndef CRN_ATOMICS_H #define CRN_ATOMICS_H #ifdef WIN32 #pragma once #endif #ifdef WIN32 #include "crn_winhdr.h" #endif #if defined(__GNUC__) && CRNLIB_PLATFORM_PC extern __inline__ __attribute__((__always_inline__, __gnu_inline__)) void crnlib_yield_processor() { __asm__ __volatile__("pause"); } #else CRNLIB_FORCE_INLINE void crnlib_yield_processor() { #if CRNLIB_USE_MSVC_INTRINSICS #if CRNLIB_PLATFORM_PC_X64 _mm_pause(); #else YieldProcessor(); #endif #else // No implementation #endif } #endif #if CRNLIB_USE_WIN32_ATOMIC_FUNCTIONS extern "C" __int64 _InterlockedCompareExchange64(__int64 volatile* Destination, __int64 Exchange, __int64 Comperand); #if defined(_MSC_VER) #pragma intrinsic(_InterlockedCompareExchange64) #endif #endif // CRNLIB_USE_WIN32_ATOMIC_FUNCTIONS namespace crnlib { #if CRNLIB_USE_WIN32_ATOMIC_FUNCTIONS typedef LONG atomic32_t; typedef LONGLONG atomic64_t; // Returns the original value. inline atomic32_t atomic_compare_exchange32(atomic32_t volatile* pDest, atomic32_t exchange, atomic32_t comparand) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); return InterlockedCompareExchange(pDest, exchange, comparand); } // Returns the original value. inline atomic64_t atomic_compare_exchange64(atomic64_t volatile* pDest, atomic64_t exchange, atomic64_t comparand) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 7) == 0); return _InterlockedCompareExchange64(pDest, exchange, comparand); } // Returns the resulting incremented value. inline atomic32_t atomic_increment32(atomic32_t volatile* pDest) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); return InterlockedIncrement(pDest); } // Returns the resulting decremented value. inline atomic32_t atomic_decrement32(atomic32_t volatile* pDest) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); return InterlockedDecrement(pDest); } // Returns the original value. inline atomic32_t atomic_exchange32(atomic32_t volatile* pDest, atomic32_t val) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); return InterlockedExchange(pDest, val); } // Returns the resulting value. inline atomic32_t atomic_add32(atomic32_t volatile* pDest, atomic32_t val) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); return InterlockedExchangeAdd(pDest, val) + val; } // Returns the original value. inline atomic32_t atomic_exchange_add32(atomic32_t volatile* pDest, atomic32_t val) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); return InterlockedExchangeAdd(pDest, val); } #elif CRNLIB_USE_GCC_ATOMIC_BUILTINS typedef long atomic32_t; typedef long long atomic64_t; // Returns the original value. inline atomic32_t atomic_compare_exchange32(atomic32_t volatile* pDest, atomic32_t exchange, atomic32_t comparand) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); return __sync_val_compare_and_swap(pDest, comparand, exchange); } // Returns the original value. inline atomic64_t atomic_compare_exchange64(atomic64_t volatile* pDest, atomic64_t exchange, atomic64_t comparand) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 7) == 0); return __sync_val_compare_and_swap(pDest, comparand, exchange); } // Returns the resulting incremented value. inline atomic32_t atomic_increment32(atomic32_t volatile* pDest) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); return __sync_add_and_fetch(pDest, 1); } // Returns the resulting decremented value. inline atomic32_t atomic_decrement32(atomic32_t volatile* pDest) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); return __sync_sub_and_fetch(pDest, 1); } // Returns the original value. inline atomic32_t atomic_exchange32(atomic32_t volatile* pDest, atomic32_t val) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); return __sync_lock_test_and_set(pDest, val); } // Returns the resulting value. inline atomic32_t atomic_add32(atomic32_t volatile* pDest, atomic32_t val) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); return __sync_add_and_fetch(pDest, val); } // Returns the original value. inline atomic32_t atomic_exchange_add32(atomic32_t volatile* pDest, atomic32_t val) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); return __sync_fetch_and_add(pDest, val); } #else #define CRNLIB_NO_ATOMICS 1 // Atomic ops not supported - but try to do something reasonable. Assumes no threading at all. typedef long atomic32_t; typedef long long atomic64_t; inline atomic32_t atomic_compare_exchange32(atomic32_t volatile* pDest, atomic32_t exchange, atomic32_t comparand) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); atomic32_t cur = *pDest; if (cur == comparand) *pDest = exchange; return cur; } inline atomic64_t atomic_compare_exchange64(atomic64_t volatile* pDest, atomic64_t exchange, atomic64_t comparand) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 7) == 0); atomic64_t cur = *pDest; if (cur == comparand) *pDest = exchange; return cur; } inline atomic32_t atomic_increment32(atomic32_t volatile* pDest) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); return (*pDest += 1); } inline atomic32_t atomic_decrement32(atomic32_t volatile* pDest) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); return (*pDest -= 1); } inline atomic32_t atomic_exchange32(atomic32_t volatile* pDest, atomic32_t val) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); atomic32_t cur = *pDest; *pDest = val; return cur; } inline atomic32_t atomic_add32(atomic32_t volatile* pDest, atomic32_t val) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); return (*pDest += val); } inline atomic32_t atomic_exchange_add32(atomic32_t volatile* pDest, atomic32_t val) { CRNLIB_ASSERT((reinterpret_cast(pDest) & 3) == 0); atomic32_t cur = *pDest; *pDest += val; return cur; } #endif } // namespace crnlib #endif // CRN_ATOMICS_H DaemonEngine-crunch-ef4d32f/crnlib/crn_buffer_stream.h000066400000000000000000000063761503722002600231430ustar00rootroot00000000000000// File: crn_buffer_stream.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_data_stream.h" namespace crnlib { class buffer_stream : public data_stream { public: buffer_stream() : data_stream(), m_pBuf(NULL), m_size(0), m_ofs(0) { } buffer_stream(void* p, uint size) : data_stream(), m_pBuf(NULL), m_size(0), m_ofs(0) { open(p, size); } buffer_stream(const void* p, uint size) : data_stream(), m_pBuf(NULL), m_size(0), m_ofs(0) { open(p, size); } virtual ~buffer_stream() { } bool open(const void* p, uint size) { CRNLIB_ASSERT(p); close(); if ((!p) || (!size)) return false; m_opened = true; m_pBuf = (uint8*)(p); m_size = size; m_ofs = 0; m_attribs = cDataStreamSeekable | cDataStreamReadable; return true; } bool open(void* p, uint size) { CRNLIB_ASSERT(p); close(); if ((!p) || (!size)) return false; m_opened = true; m_pBuf = static_cast(p); m_size = size; m_ofs = 0; m_attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable; return true; } virtual bool close() { if (m_opened) { m_opened = false; m_pBuf = NULL; m_size = 0; m_ofs = 0; return true; } return false; } const void* get_buf() const { return m_pBuf; } void* get_buf() { return m_pBuf; } virtual const void* get_ptr() const { return m_pBuf; } virtual uint read(void* pBuf, uint len) { CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); if ((!m_opened) || (!is_readable()) || (!len)) return 0; CRNLIB_ASSERT(m_ofs <= m_size); uint bytes_left = m_size - m_ofs; len = math::minimum(len, bytes_left); if (len) memcpy(pBuf, &m_pBuf[m_ofs], len); m_ofs += len; return len; } virtual uint write(const void* pBuf, uint len) { CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); if ((!m_opened) || (!is_writable()) || (!len)) return 0; CRNLIB_ASSERT(m_ofs <= m_size); uint bytes_left = m_size - m_ofs; len = math::minimum(len, bytes_left); if (len) memcpy(&m_pBuf[m_ofs], pBuf, len); m_ofs += len; return len; } virtual bool flush() { if (!m_opened) return false; return true; } virtual uint64 get_size() { if (!m_opened) return 0; return m_size; } virtual uint64 get_remaining() { if (!m_opened) return 0; CRNLIB_ASSERT(m_ofs <= m_size); return m_size - m_ofs; } virtual uint64 get_ofs() { if (!m_opened) return 0; return m_ofs; } virtual bool seek(int64 ofs, bool relative) { if ((!m_opened) || (!is_seekable())) return false; int64 new_ofs = relative ? (m_ofs + ofs) : ofs; if (new_ofs < 0) return false; else if (new_ofs > m_size) return false; m_ofs = static_cast(new_ofs); post_seek(); return true; } private: uint8* m_pBuf; uint m_size; uint m_ofs; }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_cfile_stream.h000066400000000000000000000115641503722002600227470ustar00rootroot00000000000000// File: crn_cfile_stream.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_data_stream.h" namespace crnlib { class cfile_stream : public data_stream { public: cfile_stream() : data_stream(), m_pFile(NULL), m_size(0), m_ofs(0), m_has_ownership(false) { } cfile_stream(FILE* pFile, const char* pFilename, uint attribs, bool has_ownership) : data_stream(), m_pFile(NULL), m_size(0), m_ofs(0), m_has_ownership(false) { open(pFile, pFilename, attribs, has_ownership); } cfile_stream(const char* pFilename, uint attribs = cDataStreamReadable | cDataStreamSeekable, bool open_existing = false) : data_stream(), m_pFile(NULL), m_size(0), m_ofs(0), m_has_ownership(false) { open(pFilename, attribs, open_existing); } virtual ~cfile_stream() { close(); } virtual bool close() { clear_error(); if (m_opened) { bool status = true; if (m_has_ownership) { if (EOF == fclose(m_pFile)) status = false; } m_pFile = NULL; m_opened = false; m_size = 0; m_ofs = 0; m_has_ownership = false; return status; } return false; } bool open(FILE* pFile, const char* pFilename, uint attribs, bool has_ownership) { CRNLIB_ASSERT(pFile); CRNLIB_ASSERT(pFilename); close(); set_name(pFilename); m_pFile = pFile; m_has_ownership = has_ownership; m_attribs = static_cast(attribs); m_ofs = crn_ftell(m_pFile); crn_fseek(m_pFile, 0, SEEK_END); m_size = crn_ftell(m_pFile); crn_fseek(m_pFile, m_ofs, SEEK_SET); m_opened = true; return true; } bool open(const char* pFilename, uint attribs = cDataStreamReadable | cDataStreamSeekable, bool open_existing = false) { CRNLIB_ASSERT(pFilename); close(); m_attribs = static_cast(attribs); const char* pMode; if ((is_readable()) && (is_writable())) pMode = open_existing ? "r+b" : "w+b"; else if (is_writable()) pMode = open_existing ? "ab" : "wb"; else if (is_readable()) pMode = "rb"; else { set_error(); return false; } FILE* pFile = NULL; crn_fopen(&pFile, pFilename, pMode); m_has_ownership = true; if (!pFile) { set_error(); return false; } // TODO: Change stream class to support UCS2 filenames. return open(pFile, pFilename, attribs, true); } FILE* get_file() const { return m_pFile; } virtual uint read(void* pBuf, uint len) { CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); if (!m_opened || (!is_readable()) || (!len)) return 0; len = static_cast(math::minimum(len, get_remaining())); if (fread(pBuf, 1, len, m_pFile) != len) { set_error(); return 0; } m_ofs += len; return len; } virtual uint write(const void* pBuf, uint len) { CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); if (!m_opened || (!is_writable()) || (!len)) return 0; if (fwrite(pBuf, 1, len, m_pFile) != len) { set_error(); return 0; } m_ofs += len; m_size = math::maximum(m_size, m_ofs); return len; } virtual bool flush() { if ((!m_opened) || (!is_writable())) return false; if (EOF == fflush(m_pFile)) { set_error(); return false; } return true; } virtual uint64 get_size() { if (!m_opened) return 0; return m_size; } virtual uint64 get_remaining() { if (!m_opened) return 0; CRNLIB_ASSERT(m_ofs <= m_size); return m_size - m_ofs; } virtual uint64 get_ofs() { if (!m_opened) return 0; return m_ofs; } virtual bool seek(int64 ofs, bool relative) { if ((!m_opened) || (!is_seekable())) return false; int64 new_ofs = relative ? (m_ofs + ofs) : ofs; if (new_ofs < 0) return false; else if (static_cast(new_ofs) > m_size) return false; if (static_cast(new_ofs) != m_ofs) { if (crn_fseek(m_pFile, new_ofs, SEEK_SET) != 0) { set_error(); return false; } m_ofs = new_ofs; } return true; } static bool read_file_into_array(const char* pFilename, vector& buf) { cfile_stream in_stream(pFilename); if (!in_stream.is_opened()) return false; return in_stream.read_array(buf); } static bool write_array_to_file(const char* pFilename, const vector& buf) { cfile_stream out_stream(pFilename, cDataStreamWritable | cDataStreamSeekable); if (!out_stream.is_opened()) return false; return out_stream.write_array(buf); } private: FILE* m_pFile; uint64 m_size, m_ofs; bool m_has_ownership; }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_checksum.cpp000066400000000000000000000025251503722002600224440ustar00rootroot00000000000000// File: crn_checksum.cpp #include "crn_core.h" namespace crnlib { // From the public domain stb.h header. uint adler32(const void* pBuf, size_t buflen, uint adler32) { const uint8* buffer = static_cast(pBuf); const unsigned long ADLER_MOD = 65521; unsigned long s1 = adler32 & 0xffff, s2 = adler32 >> 16; size_t blocklen; unsigned long i; blocklen = buflen % 5552; while (buflen) { for (i = 0; i + 7 < blocklen; i += 8) { s1 += buffer[0], s2 += s1; s1 += buffer[1], s2 += s1; s1 += buffer[2], s2 += s1; s1 += buffer[3], s2 += s1; s1 += buffer[4], s2 += s1; s1 += buffer[5], s2 += s1; s1 += buffer[6], s2 += s1; s1 += buffer[7], s2 += s1; buffer += 8; } for (; i < blocklen; ++i) s1 += *buffer++, s2 += s1; s1 %= ADLER_MOD, s2 %= ADLER_MOD; buflen -= blocklen; blocklen = 5552; } return (s2 << 16) + s1; } uint16 crc16(const void* pBuf, size_t len, uint16 crc) { crc = ~crc; const uint8* p = reinterpret_cast(pBuf); while (len) { const uint16 q = *p++ ^ (crc >> 8); crc <<= 8U; uint16 r = (q >> 4) ^ q; crc ^= r; r <<= 5U; crc ^= r; r <<= 7U; crc ^= r; len--; } return static_cast(~crc); } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_checksum.h000066400000000000000000000005631503722002600221110ustar00rootroot00000000000000// File: crn_checksum.h #pragma once namespace crnlib { const uint cInitAdler32 = 1U; uint adler32(const void* pBuf, size_t buflen, uint adler32 = cInitAdler32); // crc16() intended for small buffers - doesn't use an acceleration table. const uint cInitCRC16 = 0; uint16 crc16(const void* pBuf, size_t len, uint16 crc = cInitCRC16); } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_clusterizer.h000066400000000000000000000476131503722002600226710ustar00rootroot00000000000000// File: crn_clusterizer.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_matrix.h" namespace crnlib { template class clusterizer { public: clusterizer() : m_overall_variance(0.0f), m_split_index(0), m_heap_size(0), m_quick(false) { } void clear() { m_training_vecs.clear(); m_codebook.clear(); m_nodes.clear(); m_overall_variance = 0.0f; m_split_index = 0; m_heap_size = 0; m_quick = false; } void reserve_training_vecs(uint num_expected) { m_training_vecs.reserve(num_expected); } void add_training_vec(const VectorType& v, uint weight) { m_training_vecs.push_back(std::make_pair(v, weight)); } typedef bool (*progress_callback_func_ptr)(uint percentage_completed, void* pData); bool generate_codebook(uint max_size, progress_callback_func_ptr pProgress_callback = NULL, void* pProgress_data = NULL, bool quick = false) { if (m_training_vecs.empty()) return false; m_quick = quick; double ttsum = 0.0f; vq_node root; root.m_vectors.reserve(m_training_vecs.size()); for (uint i = 0; i < m_training_vecs.size(); i++) { const VectorType& v = m_training_vecs[i].first; const uint weight = m_training_vecs[i].second; root.m_centroid += (v * (float)weight); root.m_total_weight += weight; root.m_vectors.push_back(i); ttsum += v.dot(v) * weight; } root.m_variance = (float)(ttsum - (root.m_centroid.dot(root.m_centroid) / root.m_total_weight)); root.m_centroid *= (1.0f / root.m_total_weight); m_nodes.clear(); m_nodes.reserve(max_size * 2 + 1); m_nodes.push_back(root); m_heap.resize(max_size + 1); m_heap[1] = 0; m_heap_size = 1; m_split_index = 0; uint total_leaves = 1; m_left_children.reserve(m_training_vecs.size() + 1); m_right_children.reserve(m_training_vecs.size() + 1); int prev_percentage = -1; while ((total_leaves < max_size) && (m_heap_size)) { int worst_node_index = m_heap[1]; m_heap[1] = m_heap[m_heap_size]; m_heap_size--; if (m_heap_size) down_heap(1); split_node(worst_node_index); total_leaves++; if ((pProgress_callback) && ((total_leaves & 63) == 0) && (max_size)) { int cur_percentage = (total_leaves * 100U + (max_size / 2U)) / max_size; if (cur_percentage != prev_percentage) { if (!(*pProgress_callback)(cur_percentage, pProgress_data)) return false; prev_percentage = cur_percentage; } } } m_codebook.clear(); m_overall_variance = 0.0f; for (uint i = 0; i < m_nodes.size(); i++) { vq_node& node = m_nodes[i]; if (node.m_left != -1) { CRNLIB_ASSERT(node.m_right != -1); continue; } CRNLIB_ASSERT((node.m_left == -1) && (node.m_right == -1)); node.m_codebook_index = m_codebook.size(); m_codebook.push_back(node.m_centroid); m_overall_variance += node.m_variance; } m_heap.clear(); m_left_children.clear(); m_right_children.clear(); return true; } inline uint get_num_training_vecs() const { return m_training_vecs.size(); } const VectorType& get_training_vec(uint index) const { return m_training_vecs[index].first; } uint get_training_vec_weight(uint index) const { return m_training_vecs[index].second; } typedef crnlib::vector > training_vec_array; const training_vec_array& get_training_vecs() const { return m_training_vecs; } training_vec_array& get_training_vecs() { return m_training_vecs; } inline float get_overall_variance() const { return m_overall_variance; } inline uint get_codebook_size() const { return m_codebook.size(); } inline const VectorType& get_codebook_entry(uint index) const { return m_codebook[index]; } VectorType& get_codebook_entry(uint index) { return m_codebook[index]; } typedef crnlib::vector vector_vec_type; inline const vector_vec_type& get_codebook() const { return m_codebook; } uint find_best_codebook_entry(const VectorType& v) const { uint cur_node_index = 0; for (;;) { const vq_node& cur_node = m_nodes[cur_node_index]; if (cur_node.m_left == -1) return cur_node.m_codebook_index; const vq_node& left_node = m_nodes[cur_node.m_left]; const vq_node& right_node = m_nodes[cur_node.m_right]; float left_dist = left_node.m_centroid.squared_distance(v); float right_dist = right_node.m_centroid.squared_distance(v); if (left_dist < right_dist) cur_node_index = cur_node.m_left; else cur_node_index = cur_node.m_right; } } const VectorType& find_best_codebook_entry(const VectorType& v, uint max_codebook_size) const { uint cur_node_index = 0; for (;;) { const vq_node& cur_node = m_nodes[cur_node_index]; if ((cur_node.m_left == -1) || ((cur_node.m_codebook_index + 1) >= (int)max_codebook_size)) return cur_node.m_centroid; const vq_node& left_node = m_nodes[cur_node.m_left]; const vq_node& right_node = m_nodes[cur_node.m_right]; float left_dist = left_node.m_centroid.squared_distance(v); float right_dist = right_node.m_centroid.squared_distance(v); if (left_dist < right_dist) cur_node_index = cur_node.m_left; else cur_node_index = cur_node.m_right; } } uint find_best_codebook_entry_fs(const VectorType& v) const { float best_dist = math::cNearlyInfinite; uint best_index = 0; for (uint i = 0; i < m_codebook.size(); i++) { float dist = m_codebook[i].squared_distance(v); if (dist < best_dist) { best_dist = dist; best_index = i; if (best_dist == 0.0f) break; } } return best_index; } void retrieve_clusters(uint max_clusters, crnlib::vector >& clusters) const { clusters.resize(0); clusters.reserve(max_clusters); crnlib::vector stack; stack.reserve(512); uint cur_node_index = 0; for (;;) { const vq_node& cur_node = m_nodes[cur_node_index]; if ((cur_node.is_leaf()) || ((cur_node.m_codebook_index + 2) > (int)max_clusters)) { clusters.resize(clusters.size() + 1); clusters.back() = cur_node.m_vectors; if (stack.empty()) break; cur_node_index = stack.back(); stack.pop_back(); continue; } cur_node_index = cur_node.m_left; stack.push_back(cur_node.m_right); } } private: training_vec_array m_training_vecs; struct vq_node { vq_node() : m_centroid(cClear), m_total_weight(0), m_left(-1), m_right(-1), m_codebook_index(-1), m_unsplittable(false) {} VectorType m_centroid; uint64 m_total_weight; float m_variance; crnlib::vector m_vectors; int m_left; int m_right; int m_codebook_index; bool m_unsplittable; bool is_leaf() const { return m_left < 0; } }; typedef crnlib::vector node_vec_type; node_vec_type m_nodes; vector_vec_type m_codebook; float m_overall_variance; uint m_split_index; crnlib::vector m_heap; uint m_heap_size; bool m_quick; void insert_heap(uint node_index) { const float variance = m_nodes[node_index].m_variance; uint pos = ++m_heap_size; if (m_heap_size >= m_heap.size()) m_heap.resize(m_heap_size + 1); for (;;) { uint parent = pos >> 1; if (!parent) break; float parent_variance = m_nodes[m_heap[parent]].m_variance; if (parent_variance > variance) break; m_heap[pos] = m_heap[parent]; pos = parent; } m_heap[pos] = node_index; } void down_heap(uint pos) { uint child; uint orig = m_heap[pos]; const float orig_variance = m_nodes[orig].m_variance; while ((child = (pos << 1)) <= m_heap_size) { if (child < m_heap_size) { if (m_nodes[m_heap[child]].m_variance < m_nodes[m_heap[child + 1]].m_variance) child++; } if (orig_variance > m_nodes[m_heap[child]].m_variance) break; m_heap[pos] = m_heap[child]; pos = child; } m_heap[pos] = orig; } void compute_split_estimate(VectorType& left_child_res, VectorType& right_child_res, const vq_node& parent_node) { VectorType furthest(0); double furthest_dist = -1.0f; for (uint i = 0; i < parent_node.m_vectors.size(); i++) { const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; double dist = v.squared_distance(parent_node.m_centroid); if (dist > furthest_dist) { furthest_dist = dist; furthest = v; } } VectorType opposite(0); double opposite_dist = -1.0f; for (uint i = 0; i < parent_node.m_vectors.size(); i++) { const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; double dist = v.squared_distance(furthest); if (dist > opposite_dist) { opposite_dist = dist; opposite = v; } } left_child_res = (furthest + parent_node.m_centroid) * .5f; right_child_res = (opposite + parent_node.m_centroid) * .5f; } void compute_split_pca(VectorType& left_child_res, VectorType& right_child_res, const vq_node& parent_node) { if (parent_node.m_vectors.size() == 2) { left_child_res = m_training_vecs[parent_node.m_vectors[0]].first; right_child_res = m_training_vecs[parent_node.m_vectors[1]].first; return; } const uint N = VectorType::num_elements; matrix covar; covar.clear(); for (uint i = 0; i < parent_node.m_vectors.size(); i++) { const VectorType v(m_training_vecs[parent_node.m_vectors[i]].first - parent_node.m_centroid); const VectorType w(v * (float)m_training_vecs[parent_node.m_vectors[i]].second); for (uint x = 0; x < N; x++) for (uint y = x; y < N; y++) covar[x][y] = covar[x][y] + v[x] * w[y]; } float one_over_total_weight = 1.0f / parent_node.m_total_weight; for (uint x = 0; x < N; x++) for (uint y = x; y < N; y++) covar[x][y] *= one_over_total_weight; for (uint x = 0; x < (N - 1); x++) for (uint y = x + 1; y < N; y++) covar[y][x] = covar[x][y]; VectorType axis; //(1.0f); if (N == 1) axis.set(1.0f); else { for (uint i = 0; i < N; i++) axis[i] = math::lerp(.75f, 1.25f, i * (1.0f / math::maximum(N - 1, 1))); } VectorType prev_axis(axis); for (uint iter = 0; iter < 10; iter++) { VectorType x; double max_sum = 0; for (uint i = 0; i < N; i++) { double sum = 0; for (uint j = 0; j < N; j++) sum += axis[j] * covar[i][j]; x[i] = static_cast(sum); max_sum = math::maximum(max_sum, fabs(sum)); } if (max_sum != 0.0f) x *= static_cast(1.0f / max_sum); VectorType delta_axis(prev_axis - x); prev_axis = axis; axis = x; if (delta_axis.norm() < .0025f) break; } axis.normalize(); VectorType left_child(0.0f); VectorType right_child(0.0f); double left_weight = 0.0f; double right_weight = 0.0f; for (uint i = 0; i < parent_node.m_vectors.size(); i++) { const float weight = (float)m_training_vecs[parent_node.m_vectors[i]].second; const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; double t = (v - parent_node.m_centroid) * axis; if (t < 0.0f) { left_child += v * weight; left_weight += weight; } else { right_child += v * weight; right_weight += weight; } } if ((left_weight > 0.0f) && (right_weight > 0.0f)) { left_child_res = left_child * (float)(1.0f / left_weight); right_child_res = right_child * (float)(1.0f / right_weight); } else { compute_split_estimate(left_child_res, right_child_res, parent_node); } } #if 0 void compute_split_pca2(VectorType& left_child_res, VectorType& right_child_res, const vq_node& parent_node) { if (parent_node.m_vectors.size() == 2) { left_child_res = m_training_vecs[parent_node.m_vectors[0]].first; right_child_res = m_training_vecs[parent_node.m_vectors[1]].first; return; } const uint N = VectorType::num_elements; VectorType furthest; double furthest_dist = -1.0f; for (uint i = 0; i < parent_node.m_vectors.size(); i++) { const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; double dist = v.squared_distance(parent_node.m_centroid); if (dist > furthest_dist) { furthest_dist = dist; furthest = v; } } VectorType opposite; double opposite_dist = -1.0f; for (uint i = 0; i < parent_node.m_vectors.size(); i++) { const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; double dist = v.squared_distance(furthest); if (dist > opposite_dist) { opposite_dist = dist; opposite = v; } } VectorType axis(opposite - furthest); if (axis.normalize() < .000125f) { left_child_res = (furthest + parent_node.m_centroid) * .5f; right_child_res = (opposite + parent_node.m_centroid) * .5f; return; } for (uint iter = 0; iter < 2; iter++) { double next_axis[N]; utils::zero_object(next_axis); for (uint i = 0; i < parent_node.m_vectors.size(); i++) { const double weight = m_training_vecs[parent_node.m_vectors[i]].second; VectorType v(m_training_vecs[parent_node.m_vectors[i]].first - parent_node.m_centroid); double dot = (v * axis) * weight; for (uint j = 0; j < N; j++) next_axis[j] += dot * v[j]; } double w = 0.0f; for (uint j = 0; j < N; j++) w += next_axis[j] * next_axis[j]; if (w > 0.0f) { w = 1.0f / sqrt(w); for (uint j = 0; j < N; j++) axis[j] = static_cast(next_axis[j] * w); } else break; } VectorType left_child(0.0f); VectorType right_child(0.0f); double left_weight = 0.0f; double right_weight = 0.0f; for (uint i = 0; i < parent_node.m_vectors.size(); i++) { const float weight = (float)m_training_vecs[parent_node.m_vectors[i]].second; const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; double t = (v - parent_node.m_centroid) * axis; if (t < 0.0f) { left_child += v * weight; left_weight += weight; } else { right_child += v * weight; right_weight += weight; } } if ((left_weight > 0.0f) && (right_weight > 0.0f)) { left_child_res = left_child * (float)(1.0f / left_weight); right_child_res = right_child * (float)(1.0f / right_weight); } else { left_child_res = (furthest + parent_node.m_centroid) * .5f; right_child_res = (opposite + parent_node.m_centroid) * .5f; } } #endif // thread safety warning: shared state! crnlib::vector m_left_children; crnlib::vector m_right_children; void split_node(uint index) { vq_node& parent_node = m_nodes[index]; if (parent_node.m_vectors.size() == 1) return; VectorType left_child, right_child; if (m_quick) compute_split_estimate(left_child, right_child, parent_node); else compute_split_pca(left_child, right_child, parent_node); uint64 left_weight = 0; uint64 right_weight = 0; float prev_total_variance = 1e+10f; float left_variance = 0.0f; float right_variance = 0.0f; const uint cMaxLoops = m_quick ? 2 : 8; for (uint total_loops = 0; total_loops < cMaxLoops; total_loops++) { m_left_children.resize(0); m_right_children.resize(0); VectorType new_left_child(cClear); VectorType new_right_child(cClear); double left_ttsum = 0.0f; double right_ttsum = 0.0f; left_weight = 0; right_weight = 0; for (uint i = 0; i < parent_node.m_vectors.size(); i++) { const VectorType& v = m_training_vecs[parent_node.m_vectors[i]].first; const uint weight = m_training_vecs[parent_node.m_vectors[i]].second; double left_dist2 = left_child.squared_distance(v); double right_dist2 = right_child.squared_distance(v); if (left_dist2 < right_dist2) { m_left_children.push_back(parent_node.m_vectors[i]); new_left_child += (v * (float)weight); left_weight += weight; left_ttsum += v.dot(v) * weight; } else { m_right_children.push_back(parent_node.m_vectors[i]); new_right_child += (v * (float)weight); right_weight += weight; right_ttsum += v.dot(v) * weight; } } if ((!left_weight) || (!right_weight)) { parent_node.m_unsplittable = true; return; } left_variance = (float)(left_ttsum - (new_left_child.dot(new_left_child) / left_weight)); right_variance = (float)(right_ttsum - (new_right_child.dot(new_right_child) / right_weight)); new_left_child *= (1.0f / left_weight); new_right_child *= (1.0f / right_weight); left_child = new_left_child; right_child = new_right_child; float total_variance = left_variance + right_variance; if (total_variance < .00001f) break; //const float variance_delta_thresh = .00001f; const float variance_delta_thresh = .00125f; if (((prev_total_variance - total_variance) / total_variance) < variance_delta_thresh) break; prev_total_variance = total_variance; } const uint left_child_index = m_nodes.size(); const uint right_child_index = m_nodes.size() + 1; parent_node.m_left = m_nodes.size(); parent_node.m_right = m_nodes.size() + 1; parent_node.m_codebook_index = m_split_index; m_split_index++; m_nodes.resize(m_nodes.size() + 2); // parent_node is invalid now, because m_nodes has been changed vq_node& left_child_node = m_nodes[left_child_index]; vq_node& right_child_node = m_nodes[right_child_index]; left_child_node.m_centroid = left_child; left_child_node.m_total_weight = left_weight; left_child_node.m_vectors.swap(m_left_children); left_child_node.m_variance = left_variance; if ((left_child_node.m_vectors.size() > 1) && (left_child_node.m_variance > 0.0f)) insert_heap(left_child_index); right_child_node.m_centroid = right_child; right_child_node.m_total_weight = right_weight; right_child_node.m_vectors.swap(m_right_children); right_child_node.m_variance = right_variance; if ((right_child_node.m_vectors.size() > 1) && (right_child_node.m_variance > 0.0f)) insert_heap(right_child_index); } }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_color.h000066400000000000000000000703131503722002600214250ustar00rootroot00000000000000// File: crn_color.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_core.h" namespace crnlib { template struct color_quad_component_traits { enum { cSigned = false, cFloat = false, cMin = cUINT8_MIN, cMax = cUINT8_MAX }; }; template <> struct color_quad_component_traits { enum { cSigned = true, cFloat = false, cMin = cINT8_MIN, cMax = cINT8_MAX }; }; template <> struct color_quad_component_traits { enum { cSigned = true, cFloat = false, cMin = cINT16_MIN, cMax = cINT16_MAX }; }; template <> struct color_quad_component_traits { enum { cSigned = false, cFloat = false, cMin = cUINT16_MIN, cMax = cUINT16_MAX }; }; template <> struct color_quad_component_traits { enum { cSigned = true, cFloat = false, cMin = cINT32_MIN, cMax = cINT32_MAX }; }; template <> struct color_quad_component_traits { enum { cSigned = false, cFloat = false, cMin = cUINT32_MIN, cMax = cUINT32_MAX }; }; template <> struct color_quad_component_traits { enum { cSigned = false, cFloat = true, cMin = cINT32_MIN, cMax = cINT32_MAX }; }; template <> struct color_quad_component_traits { enum { cSigned = false, cFloat = true, cMin = cINT32_MIN, cMax = cINT32_MAX }; }; template class color_quad : public helpers::rel_ops > { template static inline parameter_type clamp(T v) { parameter_type result = static_cast(v); if (!component_traits::cFloat) { if (v < component_traits::cMin) result = static_cast(component_traits::cMin); else if (v > component_traits::cMax) result = static_cast(component_traits::cMax); } return result; } #ifdef _MSC_VER template <> static inline parameter_type clamp(int v) { if (!component_traits::cFloat) { if ((!component_traits::cSigned) && (component_traits::cMin == 0) && (component_traits::cMax == 0xFF)) { if (v & 0xFFFFFF00U) v = (~(static_cast(v) >> 31)) & 0xFF; } else { if (v < component_traits::cMin) v = component_traits::cMin; else if (v > component_traits::cMax) v = component_traits::cMax; } } return static_cast(v); } #endif public: typedef component_type component_t; typedef parameter_type parameter_t; typedef color_quad_component_traits component_traits; enum { cNumComps = 4 }; union { struct { component_type r; component_type g; component_type b; component_type a; }; component_type c[cNumComps]; uint32 m_u32; }; inline color_quad() { } inline color_quad(eClear) : r(0), g(0), b(0), a(0) { } inline color_quad(const color_quad& other) : r(other.r), g(other.g), b(other.b), a(other.a) { } explicit inline color_quad(parameter_type y, parameter_type alpha = component_traits::cMax) { set(y, alpha); } inline color_quad(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) { set(red, green, blue, alpha); } explicit inline color_quad(eNoClamp, parameter_type y, parameter_type alpha = component_traits::cMax) { set_noclamp_y_alpha(y, alpha); } inline color_quad(eNoClamp, parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) { set_noclamp_rgba(red, green, blue, alpha); } template inline color_quad(const color_quad& other) : r(static_cast(clamp(other.r))), g(static_cast(clamp(other.g))), b(static_cast(clamp(other.b))), a(static_cast(clamp(other.a))) { } inline void clear() { r = 0; g = 0; b = 0; a = 0; } inline color_quad& operator=(const color_quad& other) { r = other.r; g = other.g; b = other.b; a = other.a; return *this; } inline color_quad& set_rgb(const color_quad& other) { r = other.r; g = other.g; b = other.b; return *this; } template inline color_quad& operator=(const color_quad& other) { r = static_cast(clamp(other.r)); g = static_cast(clamp(other.g)); b = static_cast(clamp(other.b)); a = static_cast(clamp(other.a)); return *this; } inline color_quad& operator=(parameter_type y) { set(y, component_traits::cMax); return *this; } inline color_quad& set(parameter_type y, parameter_type alpha = component_traits::cMax) { y = clamp(y); alpha = clamp(alpha); r = static_cast(y); g = static_cast(y); b = static_cast(y); a = static_cast(alpha); return *this; } inline color_quad& set_noclamp_y_alpha(parameter_type y, parameter_type alpha = component_traits::cMax) { CRNLIB_ASSERT((y >= component_traits::cMin) && (y <= component_traits::cMax)); CRNLIB_ASSERT((alpha >= component_traits::cMin) && (alpha <= component_traits::cMax)); r = static_cast(y); g = static_cast(y); b = static_cast(y); a = static_cast(alpha); return *this; } inline color_quad& set(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) { r = static_cast(clamp(red)); g = static_cast(clamp(green)); b = static_cast(clamp(blue)); a = static_cast(clamp(alpha)); return *this; } inline color_quad& set_noclamp_rgba(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha) { CRNLIB_ASSERT((red >= component_traits::cMin) && (red <= component_traits::cMax)); CRNLIB_ASSERT((green >= component_traits::cMin) && (green <= component_traits::cMax)); CRNLIB_ASSERT((blue >= component_traits::cMin) && (blue <= component_traits::cMax)); CRNLIB_ASSERT((alpha >= component_traits::cMin) && (alpha <= component_traits::cMax)); r = static_cast(red); g = static_cast(green); b = static_cast(blue); a = static_cast(alpha); return *this; } inline color_quad& set_noclamp_rgb(parameter_type red, parameter_type green, parameter_type blue) { CRNLIB_ASSERT((red >= component_traits::cMin) && (red <= component_traits::cMax)); CRNLIB_ASSERT((green >= component_traits::cMin) && (green <= component_traits::cMax)); CRNLIB_ASSERT((blue >= component_traits::cMin) && (blue <= component_traits::cMax)); r = static_cast(red); g = static_cast(green); b = static_cast(blue); return *this; } static inline parameter_type get_min_comp() { return component_traits::cMin; } static inline parameter_type get_max_comp() { return component_traits::cMax; } static inline bool get_comps_are_signed() { return component_traits::cSigned; } inline component_type operator[](uint i) const { CRNLIB_ASSERT(i < cNumComps); return c[i]; } inline component_type& operator[](uint i) { CRNLIB_ASSERT(i < cNumComps); return c[i]; } inline color_quad& set_component(uint i, parameter_type f) { CRNLIB_ASSERT(i < cNumComps); c[i] = static_cast(clamp(f)); return *this; } inline color_quad& set_grayscale(parameter_t l) { component_t x = static_cast(clamp(l)); c[0] = x; c[1] = x; c[2] = x; return *this; } inline color_quad& clamp(const color_quad& l, const color_quad& h) { for (uint i = 0; i < cNumComps; i++) c[i] = static_cast(math::clamp(c[i], l[i], h[i])); return *this; } inline color_quad& clamp(parameter_type l, parameter_type h) { for (uint i = 0; i < cNumComps; i++) c[i] = static_cast(math::clamp(c[i], l, h)); return *this; } // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y). inline parameter_type get_luma() const { return static_cast((19595U * r + 38470U * g + 7471U * b + 32768U) >> 16U); } // Returns REC 709 luma. inline parameter_type get_luma_rec709() const { return static_cast((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U); } // Beware of endianness! inline uint32 get_uint32() const { CRNLIB_ASSERT(sizeof(*this) == sizeof(uint32)); return *reinterpret_cast(this); } // Beware of endianness! inline uint64 get_uint64() const { CRNLIB_ASSERT(sizeof(*this) == sizeof(uint64)); return *reinterpret_cast(this); } inline uint squared_distance(const color_quad& c, bool alpha = true) const { return math::square(r - c.r) + math::square(g - c.g) + math::square(b - c.b) + (alpha ? math::square(a - c.a) : 0); } inline bool rgb_equals(const color_quad& rhs) const { return (r == rhs.r) && (g == rhs.g) && (b == rhs.b); } inline bool operator==(const color_quad& rhs) const { if (sizeof(color_quad) == sizeof(uint32)) return m_u32 == rhs.m_u32; else return (r == rhs.r) && (g == rhs.g) && (b == rhs.b) && (a == rhs.a); } inline bool operator<(const color_quad& rhs) const { for (uint i = 0; i < cNumComps; i++) { if (c[i] < rhs.c[i]) return true; else if (!(c[i] == rhs.c[i])) return false; } return false; } color_quad& operator+=(const color_quad& other) { for (uint i = 0; i < 4; i++) c[i] = static_cast(clamp(c[i] + other.c[i])); return *this; } color_quad& operator-=(const color_quad& other) { for (uint i = 0; i < 4; i++) c[i] = static_cast(clamp(c[i] - other.c[i])); return *this; } color_quad& operator*=(parameter_type v) { for (uint i = 0; i < 4; i++) c[i] = static_cast(clamp(c[i] * v)); return *this; } color_quad& operator/=(parameter_type v) { for (uint i = 0; i < 4; i++) c[i] = static_cast(c[i] / v); return *this; } color_quad get_swizzled(uint x, uint y, uint z, uint w) const { CRNLIB_ASSERT((x | y | z | w) < 4); return color_quad(c[x], c[y], c[z], c[w]); } friend color_quad operator+(const color_quad& lhs, const color_quad& rhs) { color_quad result(lhs); result += rhs; return result; } friend color_quad operator-(const color_quad& lhs, const color_quad& rhs) { color_quad result(lhs); result -= rhs; return result; } friend color_quad operator*(const color_quad& lhs, parameter_type v) { color_quad result(lhs); result *= v; return result; } friend color_quad operator/(const color_quad& lhs, parameter_type v) { color_quad result(lhs); result /= v; return result; } friend color_quad operator*(parameter_type v, const color_quad& rhs) { color_quad result(rhs); result *= v; return result; } inline bool is_grayscale() const { return (c[0] == c[1]) && (c[1] == c[2]); } uint get_min_component_index(bool alpha = true) const { uint index = 0; uint limit = alpha ? cNumComps : (cNumComps - 1); for (uint i = 1; i < limit; i++) if (c[i] < c[index]) index = i; return index; } uint get_max_component_index(bool alpha = true) const { uint index = 0; uint limit = alpha ? cNumComps : (cNumComps - 1); for (uint i = 1; i < limit; i++) if (c[i] > c[index]) index = i; return index; } operator size_t() const { return (size_t)fast_hash(this, sizeof(*this)); } void get_float4(float* pDst) { for (uint i = 0; i < 4; i++) pDst[i] = ((*this)[i] - component_traits::cMin) / float(component_traits::cMax - component_traits::cMin); } void get_float3(float* pDst) { for (uint i = 0; i < 3; i++) pDst[i] = ((*this)[i] - component_traits::cMin) / float(component_traits::cMax - component_traits::cMin); } static color_quad component_min(const color_quad& a, const color_quad& b) { color_quad result; for (uint i = 0; i < 4; i++) result[i] = static_cast(math::minimum(a[i], b[i])); return result; } static color_quad component_max(const color_quad& a, const color_quad& b) { color_quad result; for (uint i = 0; i < 4; i++) result[i] = static_cast(math::maximum(a[i], b[i])); return result; } static color_quad make_black() { return color_quad(0, 0, 0, component_traits::cMax); } static color_quad make_white() { return color_quad(component_traits::cMax, component_traits::cMax, component_traits::cMax, component_traits::cMax); } }; // class color_quad template struct scalar_type > { enum { cFlag = true }; static inline void construct(color_quad* p) {} static inline void construct(color_quad* p, const color_quad& init) { memcpy(p, &init, sizeof(color_quad)); } static inline void construct_array(color_quad*, uint) {} static inline void destruct(color_quad*) {} static inline void destruct_array(color_quad*, uint) {} }; typedef color_quad color_quad_u8; typedef color_quad color_quad_i8; typedef color_quad color_quad_i16; typedef color_quad color_quad_u16; typedef color_quad color_quad_i32; typedef color_quad color_quad_u32; typedef color_quad color_quad_f; typedef color_quad color_quad_d; namespace color { inline uint elucidian_distance(uint r0, uint g0, uint b0, uint r1, uint g1, uint b1) { int dr = (int)r0 - (int)r1; int dg = (int)g0 - (int)g1; int db = (int)b0 - (int)b1; return static_cast(dr * dr + dg * dg + db * db); } inline uint elucidian_distance(uint r0, uint g0, uint b0, uint a0, uint r1, uint g1, uint b1, uint a1) { int dr = (int)r0 - (int)r1; int dg = (int)g0 - (int)g1; int db = (int)b0 - (int)b1; int da = (int)a0 - (int)a1; return static_cast(dr * dr + dg * dg + db * db + da * da); } inline uint elucidian_distance(const color_quad_u8& c0, const color_quad_u8& c1, bool alpha) { if (alpha) return elucidian_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a); else return elucidian_distance(c0.r, c0.g, c0.b, c1.r, c1.g, c1.b); } inline uint weighted_elucidian_distance(uint r0, uint g0, uint b0, uint r1, uint g1, uint b1, uint wr, uint wg, uint wb) { int dr = (int)r0 - (int)r1; int dg = (int)g0 - (int)g1; int db = (int)b0 - (int)b1; return static_cast((wr * dr * dr) + (wg * dg * dg) + (wb * db * db)); } inline uint weighted_elucidian_distance( uint r0, uint g0, uint b0, uint a0, uint r1, uint g1, uint b1, uint a1, uint wr, uint wg, uint wb, uint wa) { int dr = (int)r0 - (int)r1; int dg = (int)g0 - (int)g1; int db = (int)b0 - (int)b1; int da = (int)a0 - (int)a1; return static_cast((wr * dr * dr) + (wg * dg * dg) + (wb * db * db) + (wa * da * da)); } inline uint weighted_elucidian_distance(const color_quad_u8& c0, const color_quad_u8& c1, uint wr, uint wg, uint wb, uint wa) { return weighted_elucidian_distance(c0.r, c0.g, c0.b, c0.a, c1.r, c1.g, c1.b, c1.a, wr, wg, wb, wa); } //const uint cRWeight = 8;//24; //const uint cGWeight = 24;//73; //const uint cBWeight = 1;//3; const uint cRWeight = 8; //24; const uint cGWeight = 25; //73; const uint cBWeight = 1; //3; inline uint color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha) { if (perceptual) { if (alpha) return weighted_elucidian_distance(e1, e2, cRWeight, cGWeight, cBWeight, cRWeight + cGWeight + cBWeight); else return weighted_elucidian_distance(e1, e2, cRWeight, cGWeight, cBWeight, 0); } else return elucidian_distance(e1, e2, alpha); } inline uint peak_color_error(const color_quad_u8& e1, const color_quad_u8& e2) { return math::maximum(labs(e1[0] - e2[0]), labs(e1[1] - e2[1]), labs(e1[2] - e2[2])); //return math::square(e1[0] - e2[0]) + math::square(e1[1] - e2[1]) + math::square(e1[2] - e2[2]); } // y - [0,255] // co - [-127,127] // cg - [-126,127] inline void RGB_to_YCoCg(int r, int g, int b, int& y, int& co, int& cg) { y = (r >> 2) + (g >> 1) + (b >> 2); co = (r >> 1) - (b >> 1); cg = -(r >> 2) + (g >> 1) - (b >> 2); } inline void YCoCg_to_RGB(int y, int co, int cg, int& r, int& g, int& b) { int tmp = y - cg; g = y + cg; r = tmp + co; b = tmp - co; } static inline uint8 clamp_component(int i) { if (static_cast(i) > 255U) { if (i < 0) i = 0; else if (i > 255) i = 255; } return static_cast(i); } // RGB->YCbCr constants, scaled by 2^16 const int YR = 19595, YG = 38470, YB = 7471, CB_R = -11059, CB_G = -21709, CB_B = 32768, CR_R = 32768, CR_G = -27439, CR_B = -5329; // YCbCr->RGB constants, scaled by 2^16 const int R_CR = 91881, B_CB = 116130, G_CR = -46802, G_CB = -22554; inline int RGB_to_Y(const color_quad_u8& rgb) { const int r = rgb[0], g = rgb[1], b = rgb[2]; return (r * YR + g * YG + b * YB + 32768) >> 16; } // RGB to YCbCr (same as JFIF JPEG). // Odd default biases account for 565 endpoint packing. inline void RGB_to_YCC(color_quad_u8& ycc, const color_quad_u8& rgb, int cb_bias = 123, int cr_bias = 125) { const int r = rgb[0], g = rgb[1], b = rgb[2]; ycc.a = static_cast((r * YR + g * YG + b * YB + 32768) >> 16); ycc.r = clamp_component(cb_bias + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16)); ycc.g = clamp_component(cr_bias + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16)); ycc.b = 0; } // YCbCr to RGB. // Odd biases account for 565 endpoint packing. inline void YCC_to_RGB(color_quad_u8& rgb, const color_quad_u8& ycc, int cb_bias = 123, int cr_bias = 125) { const int y = ycc.a; const int cb = ycc.r - cb_bias; const int cr = ycc.g - cr_bias; rgb.r = clamp_component(y + ((R_CR * cr + 32768) >> 16)); rgb.g = clamp_component(y + ((G_CR * cr + G_CB * cb + 32768) >> 16)); rgb.b = clamp_component(y + ((B_CB * cb + 32768) >> 16)); rgb.a = 255; } // Float RGB->YCbCr constants const float S = 1.0f / 65536.0f; const float F_YR = S * YR, F_YG = S * YG, F_YB = S * YB, F_CB_R = S * CB_R, F_CB_G = S * CB_G, F_CB_B = S * CB_B, F_CR_R = S * CR_R, F_CR_G = S * CR_G, F_CR_B = S * CR_B; // Float YCbCr->RGB constants const float F_R_CR = S * R_CR, F_B_CB = S * B_CB, F_G_CR = S * G_CR, F_G_CB = S * G_CB; inline void RGB_to_YCC_float(color_quad_f& ycc, const color_quad_u8& rgb) { const int r = rgb[0], g = rgb[1], b = rgb[2]; ycc.a = r * F_YR + g * F_YG + b * F_YB; ycc.r = r * F_CB_R + g * F_CB_G + b * F_CB_B; ycc.g = r * F_CR_R + g * F_CR_G + b * F_CR_B; ycc.b = 0; } inline void YCC_float_to_RGB(color_quad_u8& rgb, const color_quad_f& ycc) { float y = ycc.a, cb = ycc.r, cr = ycc.g; rgb.r = color::clamp_component(static_cast(.5f + y + F_R_CR * cr)); rgb.g = color::clamp_component(static_cast(.5f + y + F_G_CR * cr + F_G_CB * cb)); rgb.b = color::clamp_component(static_cast(.5f + y + F_B_CB * cb)); rgb.a = 255; } } // namespace color // This class purposely trades off speed for extremely flexibility. It can handle any component swizzle, any pixel type from 1-4 components and 1-32 bits/component, // any pixel size between 1-16 bytes/pixel, any pixel stride, any color_quad data type (signed/unsigned/float 8/16/32 bits/component), and scaled/non-scaled components. // On the downside, it's freaking slow. class pixel_packer { public: pixel_packer() { clear(); } pixel_packer(uint num_comps, uint bits_per_comp, int pixel_stride = -1, bool reversed = false) { init(num_comps, bits_per_comp, pixel_stride, reversed); } pixel_packer(const char* pComp_map, int pixel_stride = -1, int force_comp_size = -1) { init(pComp_map, pixel_stride, force_comp_size); } void clear() { utils::zero_this(this); } inline bool is_valid() const { return m_pixel_stride > 0; } inline uint get_pixel_stride() const { return m_pixel_stride; } void set_pixel_stride(uint n) { m_pixel_stride = n; } uint get_num_comps() const { return m_num_comps; } uint get_comp_size(uint index) const { CRNLIB_ASSERT(index < 4); return m_comp_size[index]; } uint get_comp_ofs(uint index) const { CRNLIB_ASSERT(index < 4); return m_comp_ofs[index]; } uint get_comp_max(uint index) const { CRNLIB_ASSERT(index < 4); return m_comp_max[index]; } bool get_rgb_is_luma() const { return m_rgb_is_luma; } template const void* unpack(const void* p, color_quad_type& color, bool rescale = true) const { const uint8* pSrc = static_cast(p); for (uint i = 0; i < 4; i++) { const uint comp_size = m_comp_size[i]; if (!comp_size) { if (color_quad_type::component_traits::cFloat) color[i] = static_cast((i == 3) ? 1 : 0); else color[i] = static_cast((i == 3) ? color_quad_type::component_traits::cMax : 0); continue; } uint n = 0, dst_bit_ofs = 0; uint src_bit_ofs = m_comp_ofs[i]; while (dst_bit_ofs < comp_size) { const uint byte_bit_ofs = src_bit_ofs & 7; n |= ((pSrc[src_bit_ofs >> 3] >> byte_bit_ofs) << dst_bit_ofs); const uint bits_read = 8 - byte_bit_ofs; src_bit_ofs += bits_read; dst_bit_ofs += bits_read; } const uint32 mx = m_comp_max[i]; n &= mx; const uint32 h = static_cast(color_quad_type::component_traits::cMax); if (color_quad_type::component_traits::cFloat) color.set_component(i, static_cast(n)); else if (rescale) color.set_component(i, static_cast((static_cast(n) * h + (mx >> 1U)) / mx)); else if (color_quad_type::component_traits::cSigned) color.set_component(i, static_cast(math::minimum(n, h))); else color.set_component(i, static_cast(n)); } if (m_rgb_is_luma) { color[0] = color[1]; color[2] = color[1]; } return pSrc + m_pixel_stride; } template void* pack(const color_quad_type& color, void* p, bool rescale = true) const { uint8* pDst = static_cast(p); for (uint i = 0; i < 4; i++) { const uint comp_size = m_comp_size[i]; if (!comp_size) continue; uint32 mx = m_comp_max[i]; uint32 n; if (color_quad_type::component_traits::cFloat) { typename color_quad_type::parameter_t t = color[i]; if (t < 0.0f) n = 0; else if (t > static_cast(mx)) n = mx; else n = math::minimum(static_cast(floor(t + .5f)), mx); } else if (rescale) { if (color_quad_type::component_traits::cSigned) n = math::maximum(static_cast(color[i]), 0); else n = static_cast(color[i]); const uint32 h = static_cast(color_quad_type::component_traits::cMax); n = static_cast((static_cast(n) * mx + (h >> 1)) / h); } else { if (color_quad_type::component_traits::cSigned) n = math::minimum(static_cast(math::maximum(static_cast(color[i]), 0)), mx); else n = math::minimum(static_cast(color[i]), mx); } uint src_bit_ofs = 0; uint dst_bit_ofs = m_comp_ofs[i]; while (src_bit_ofs < comp_size) { const uint cur_byte_bit_ofs = (dst_bit_ofs & 7); const uint cur_byte_bits = 8 - cur_byte_bit_ofs; uint byte_val = pDst[dst_bit_ofs >> 3]; uint bit_mask = (mx << cur_byte_bit_ofs) & 0xFF; byte_val &= ~bit_mask; byte_val |= (n << cur_byte_bit_ofs); pDst[dst_bit_ofs >> 3] = static_cast(byte_val); mx >>= cur_byte_bits; n >>= cur_byte_bits; dst_bit_ofs += cur_byte_bits; src_bit_ofs += cur_byte_bits; } } return pDst + m_pixel_stride; } bool init(uint num_comps, uint bits_per_comp, int pixel_stride = -1, bool reversed = false) { clear(); if ((num_comps < 1) || (num_comps > 4) || (bits_per_comp < 1) || (bits_per_comp > 32)) { CRNLIB_ASSERT(0); return false; } for (uint i = 0; i < num_comps; i++) { m_comp_size[i] = bits_per_comp; m_comp_ofs[i] = i * bits_per_comp; if (reversed) m_comp_ofs[i] = ((num_comps - 1) * bits_per_comp) - m_comp_ofs[i]; } for (uint i = 0; i < 4; i++) m_comp_max[i] = static_cast((1ULL << m_comp_size[i]) - 1ULL); m_pixel_stride = (pixel_stride >= 0) ? pixel_stride : (num_comps * bits_per_comp + 7) / 8; return true; } // Format examples: // R16G16B16 // B5G6R5 // B5G5R5x1 // Y8A8 // A8R8G8B8 // First component is at LSB in memory. Assumes unsigned integer components, 1-32bits each. bool init(const char* pComp_map, int pixel_stride = -1, int force_comp_size = -1) { clear(); uint cur_bit_ofs = 0; while (*pComp_map) { char c = *pComp_map++; int comp_index = -1; if (c == 'R') comp_index = 0; else if (c == 'G') comp_index = 1; else if (c == 'B') comp_index = 2; else if (c == 'A') comp_index = 3; else if (c == 'Y') comp_index = 4; else if (c != 'x') return false; uint comp_size = 0; uint n = *pComp_map; if ((n >= '0') && (n <= '9')) { comp_size = n - '0'; pComp_map++; n = *pComp_map; if ((n >= '0') && (n <= '9')) { comp_size = (comp_size * 10) + (n - '0'); pComp_map++; } } if (force_comp_size != -1) comp_size = force_comp_size; if ((!comp_size) || (comp_size > 32)) return false; if (comp_index == 4) { if (m_comp_size[0] || m_comp_size[1] || m_comp_size[2]) return false; //m_comp_ofs[0] = m_comp_ofs[1] = m_comp_ofs[2] = cur_bit_ofs; //m_comp_size[0] = m_comp_size[1] = m_comp_size[2] = comp_size; m_comp_ofs[1] = cur_bit_ofs; m_comp_size[1] = comp_size; m_rgb_is_luma = true; m_num_comps++; } else if (comp_index >= 0) { if (m_comp_size[comp_index]) return false; m_comp_ofs[comp_index] = cur_bit_ofs; m_comp_size[comp_index] = comp_size; m_num_comps++; } cur_bit_ofs += comp_size; } for (uint i = 0; i < 4; i++) m_comp_max[i] = static_cast((1ULL << m_comp_size[i]) - 1ULL); if (pixel_stride >= 0) m_pixel_stride = pixel_stride; else m_pixel_stride = (cur_bit_ofs + 7) / 8; return true; } private: uint m_pixel_stride; uint m_num_comps; uint m_comp_size[4]; uint m_comp_ofs[4]; uint m_comp_max[4]; bool m_rgb_is_luma; }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_colorized_console.cpp000066400000000000000000000054071503722002600243600ustar00rootroot00000000000000// File: crn_colorized_console.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_colorized_console.h" #ifdef CRNLIB_USE_WIN32_API #include "crn_winhdr.h" #endif namespace crnlib { void colorized_console::init() { console::init(); console::add_console_output_func(console_output_func, NULL); } void colorized_console::deinit() { console::remove_console_output_func(console_output_func); console::deinit(); } void colorized_console::tick() { } #ifdef CRNLIB_USE_WIN32_API bool colorized_console::console_output_func(eConsoleMessageType type, const char* pMsg, void*) { if (console::get_output_disabled()) return true; HANDLE cons = GetStdHandle(STD_OUTPUT_HANDLE); DWORD attr = FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE; switch (type) { case cDebugConsoleMessage: attr = FOREGROUND_BLUE | FOREGROUND_INTENSITY; break; case cMessageConsoleMessage: attr = FOREGROUND_GREEN | FOREGROUND_BLUE | FOREGROUND_INTENSITY; break; case cWarningConsoleMessage: attr = FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_INTENSITY; break; case cErrorConsoleMessage: attr = FOREGROUND_RED | FOREGROUND_INTENSITY; break; default: break; } if (INVALID_HANDLE_VALUE != cons) SetConsoleTextAttribute(cons, (WORD)attr); if ((console::get_prefixes()) && (console::get_at_beginning_of_line())) { switch (type) { case cDebugConsoleMessage: printf("Debug: %s", pMsg); break; case cWarningConsoleMessage: printf("Warning: %s", pMsg); break; case cErrorConsoleMessage: printf("Error: %s", pMsg); break; default: printf("%s", pMsg); break; } } else { printf("%s", pMsg); } if (console::get_crlf()) printf("\n"); if (INVALID_HANDLE_VALUE != cons) SetConsoleTextAttribute(cons, FOREGROUND_RED | FOREGROUND_GREEN | FOREGROUND_BLUE); return true; } #else bool colorized_console::console_output_func(eConsoleMessageType type, const char* pMsg, void*) { if (console::get_output_disabled()) return true; if ((console::get_prefixes()) && (console::get_at_beginning_of_line())) { switch (type) { case cDebugConsoleMessage: printf("Debug: %s", pMsg); break; case cWarningConsoleMessage: printf("Warning: %s", pMsg); break; case cErrorConsoleMessage: printf("Error: %s", pMsg); break; default: printf("%s", pMsg); break; } } else { printf("%s", pMsg); } if (console::get_crlf()) printf("\n"); return true; } #endif } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_colorized_console.h000066400000000000000000000006241503722002600240210ustar00rootroot00000000000000// File: crn_colorized_console.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_console.h" namespace crnlib { class colorized_console { public: static void init(); static void deinit(); static void tick(); private: static bool console_output_func(eConsoleMessageType type, const char* pMsg, void* pData); }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_command_line_params.cpp000066400000000000000000000262271503722002600246370ustar00rootroot00000000000000// File: crn_command_line_params.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_command_line_params.h" #include "crn_console.h" #include "crn_cfile_stream.h" #ifdef WIN32 #define CRNLIB_CMD_LINE_ALLOW_SLASH_PARAMS 1 #endif #if CRNLIB_USE_WIN32_API #include "crn_winhdr.h" #endif namespace crnlib { #if CRNLIB_USE_WIN32_API void get_command_line_as_single_string(dynamic_string& cmd_line, int /* argc */, char** /* argv */) { cmd_line.set(GetCommandLineA()); #else void get_command_line_as_single_string(dynamic_string& cmd_line, int argc, char* argv[]) { cmd_line.clear(); for (int i = 0; i < argc; i++) { dynamic_string tmp(argv[i]); if ((tmp.front() != '"') && (tmp.front() != '-') && (tmp.front() != '@')) tmp = "\"" + tmp + "\""; if (cmd_line.get_len()) cmd_line += " "; cmd_line += tmp; } #endif } command_line_params::command_line_params() { } void command_line_params::clear() { m_params.clear(); m_param_map.clear(); } bool command_line_params::split_params(const char* p, dynamic_string_array& params) { bool within_param = false; bool within_quote = false; uint ofs = 0; dynamic_string str; while (p[ofs]) { const char c = p[ofs]; if (within_param) { if (within_quote) { if (c == '"') within_quote = false; str.append_char(c); } else if ((c == ' ') || (c == '\t')) { if (!str.is_empty()) { params.push_back(str); str.clear(); } within_param = false; } else { if (c == '"') within_quote = true; str.append_char(c); } } else if ((c != ' ') && (c != '\t')) { within_param = true; if (c == '"') within_quote = true; str.append_char(c); } ofs++; } if (within_quote) { console::error("Unmatched quote in command line \"%s\"", p); return false; } if (!str.is_empty()) params.push_back(str); return true; } bool command_line_params::load_string_file(const char* pFilename, dynamic_string_array& strings) { cfile_stream in_stream; if (!in_stream.open(pFilename, cDataStreamReadable | cDataStreamSeekable)) { console::error("Unable to open file \"%s\" for reading!", pFilename); return false; } dynamic_string ansi_str; for (;;) { if (!in_stream.read_line(ansi_str)) break; ansi_str.trim(); if (ansi_str.is_empty()) continue; strings.push_back(dynamic_string(ansi_str.get_ptr())); } return true; } bool command_line_params::parse(const dynamic_string_array& params, uint n, const param_desc* pParam_desc) { CRNLIB_ASSERT(n && pParam_desc); m_params = params; uint arg_index = 0; while (arg_index < params.size()) { const uint cur_arg_index = arg_index; const dynamic_string& src_param = params[arg_index++]; if (src_param.is_empty()) continue; #if CRNLIB_CMD_LINE_ALLOW_SLASH_PARAMS if ((src_param[0] == '/') || (src_param[0] == '-')) #else if (src_param[0] == '-') #endif { if (src_param.get_len() < 2) { console::error("Invalid command line parameter: \"%s\"", src_param.get_ptr()); return false; } dynamic_string key_str(src_param); key_str.right(1); #if CRNLIB_CMD_LINE_ALLOW_SLASH_PARAMS if ((src_param == "/?") || (src_param == "--help")) #else if (src_param == "--help") #endif { key_str.set("h"); } int modifier = 0; char c = key_str[key_str.get_len() - 1]; if (c == '+') modifier = 1; else if (c == '-') modifier = -1; if (modifier) key_str.left(key_str.get_len() - 1); uint param_index; for (param_index = 0; param_index < n; param_index++) if (key_str == pParam_desc[param_index].m_pName) break; if (param_index == n) { console::error("Unrecognized command line parameter: \"%s\"", src_param.get_ptr()); return false; } const param_desc& desc = pParam_desc[param_index]; const uint cMaxValues = 16; dynamic_string val_str[cMaxValues]; uint num_val_strs = 0; if (desc.m_num_values) { CRNLIB_ASSERT(desc.m_num_values <= cMaxValues); if ((arg_index + desc.m_num_values) > params.size()) { console::error("Expected %u value(s) after command line parameter: \"%s\"", desc.m_num_values, src_param.get_ptr()); return false; } for (uint v = 0; v < desc.m_num_values; v++) val_str[num_val_strs++] = params[arg_index++]; } dynamic_string_array strings; if ((desc.m_support_listing_file) && (val_str[0].get_len() >= 2) && (val_str[0][0] == '@')) { dynamic_string filename(val_str[0]); filename.right(1); filename.unquote(); if (!load_string_file(filename.get_ptr(), strings)) { console::error("Failed loading listing file \"%s\"!", filename.get_ptr()); return false; } } else { for (uint v = 0; v < num_val_strs; v++) { val_str[v].unquote(); strings.push_back(val_str[v]); } } param_value pv; pv.m_values.swap(strings); pv.m_index = cur_arg_index; pv.m_modifier = (int8)modifier; m_param_map.insert(std::make_pair(key_str, pv)); } else { param_value pv; pv.m_values.push_back(src_param); pv.m_values.back().unquote(); pv.m_index = cur_arg_index; m_param_map.insert(std::make_pair(g_empty_dynamic_string, pv)); } } return true; } bool command_line_params::parse(const char* pCmd_line, uint n, const param_desc* pParam_desc, bool skip_first_param) { CRNLIB_ASSERT(n && pParam_desc); dynamic_string_array p; if (!split_params(pCmd_line, p)) return 0; if (p.empty()) return 0; if (skip_first_param) p.erase(0U); return parse(p, n, pParam_desc); } bool command_line_params::is_param(uint index) const { CRNLIB_ASSERT(index < m_params.size()); if (index >= m_params.size()) return false; const dynamic_string& w = m_params[index]; if (w.is_empty()) return false; #if CRNLIB_CMD_LINE_ALLOW_SLASH_PARAMS return (w.get_len() >= 2) && ((w[0] == '-') || (w[0] == '/')); #else return (w.get_len() >= 2) && (w[0] == '-'); #endif } uint command_line_params::find(uint num_keys, const char** ppKeys, crnlib::vector* pIterators, crnlib::vector* pUnmatched_indices) const { CRNLIB_ASSERT(ppKeys); if (pUnmatched_indices) { pUnmatched_indices->resize(m_params.size()); for (uint i = 0; i < m_params.size(); i++) (*pUnmatched_indices)[i] = i; } uint n = 0; for (uint i = 0; i < num_keys; i++) { const char* pKey = ppKeys[i]; param_map_const_iterator begin, end; find(pKey, begin, end); while (begin != end) { if (pIterators) pIterators->push_back(begin); if (pUnmatched_indices) { int k = pUnmatched_indices->find(begin->second.m_index); if (k >= 0) pUnmatched_indices->erase_unordered(k); } n++; begin++; } } return n; } void command_line_params::find(const char* pKey, param_map_const_iterator& begin, param_map_const_iterator& end) const { dynamic_string key(pKey); begin = m_param_map.lower_bound(key); end = m_param_map.upper_bound(key); } uint command_line_params::get_count(const char* pKey) const { param_map_const_iterator begin, end; find(pKey, begin, end); uint n = 0; while (begin != end) { n++; begin++; } return n; } command_line_params::param_map_const_iterator command_line_params::get_param(const char* pKey, uint index) const { param_map_const_iterator begin, end; find(pKey, begin, end); if (begin == end) return m_param_map.end(); uint n = 0; while ((begin != end) && (n != index)) { n++; begin++; } if (begin == end) return m_param_map.end(); return begin; } bool command_line_params::has_value(const char* pKey, uint index) const { return get_num_values(pKey, index) != 0; } uint command_line_params::get_num_values(const char* pKey, uint index) const { param_map_const_iterator it = get_param(pKey, index); if (it == end()) return 0; return it->second.m_values.size(); } bool command_line_params::get_value_as_bool(const char* pKey, uint index, bool def) const { param_map_const_iterator it = get_param(pKey, index); if (it == end()) return def; if (it->second.m_modifier) return it->second.m_modifier > 0; else return true; } int command_line_params::get_value_as_int(const char* pKey, uint index, int def, int l, int h, uint value_index) const { param_map_const_iterator it = get_param(pKey, index); if ((it == end()) || (value_index >= it->second.m_values.size())) return def; int val; const char* p = it->second.m_values[value_index].get_ptr(); if (!string_to_int(p, val)) { crnlib::console::warning("Invalid value specified for parameter \"%s\", using default value of %i", pKey, def); return def; } if (val < l) { crnlib::console::warning("Value %i for parameter \"%s\" is out of range, clamping to %i", val, pKey, l); val = l; } else if (val > h) { crnlib::console::warning("Value %i for parameter \"%s\" is out of range, clamping to %i", val, pKey, h); val = h; } return val; } float command_line_params::get_value_as_float(const char* pKey, uint index, float def, float l, float h, uint value_index) const { param_map_const_iterator it = get_param(pKey, index); if ((it == end()) || (value_index >= it->second.m_values.size())) return def; float val; const char* p = it->second.m_values[value_index].get_ptr(); if (!string_to_float(p, val)) { crnlib::console::warning("Invalid value specified for float parameter \"%s\", using default value of %f", pKey, def); return def; } if (val < l) { crnlib::console::warning("Value %f for parameter \"%s\" is out of range, clamping to %f", val, pKey, l); val = l; } else if (val > h) { crnlib::console::warning("Value %f for parameter \"%s\" is out of range, clamping to %f", val, pKey, h); val = h; } return val; } bool command_line_params::get_value_as_string(const char* pKey, uint index, dynamic_string& value, uint value_index) const { param_map_const_iterator it = get_param(pKey, index); if ((it == end()) || (value_index >= it->second.m_values.size())) { value.empty(); return false; } value = it->second.m_values[value_index]; return true; } const dynamic_string& command_line_params::get_value_as_string_or_empty(const char* pKey, uint index, uint value_index) const { param_map_const_iterator it = get_param(pKey, index); if ((it == end()) || (value_index >= it->second.m_values.size())) return g_empty_dynamic_string; return it->second.m_values[value_index]; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_command_line_params.h000066400000000000000000000061251503722002600242770ustar00rootroot00000000000000// File: crn_command_line_params.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_value.h" #include namespace crnlib { // Returns the command line passed to the app as a string. // On systems where this isn't trivial, this function combines together the separate arguments, quoting and adding spaces as needed. void get_command_line_as_single_string(dynamic_string& cmd_line, int argc, char* argv[]); class command_line_params { public: struct param_value { inline param_value() : m_index(0), m_modifier(0) {} dynamic_string_array m_values; uint m_index; int8 m_modifier; }; typedef std::multimap param_map; typedef param_map::const_iterator param_map_const_iterator; typedef param_map::iterator param_map_iterator; command_line_params(); void clear(); static bool split_params(const char* p, dynamic_string_array& params); struct param_desc { const char* m_pName; uint m_num_values; bool m_support_listing_file; }; bool parse(const dynamic_string_array& params, uint n, const param_desc* pParam_desc); bool parse(const char* pCmd_line, uint n, const param_desc* pParam_desc, bool skip_first_param = true); const dynamic_string_array& get_array() const { return m_params; } bool is_param(uint index) const; const param_map& get_map() const { return m_param_map; } uint get_num_params() const { return static_cast(m_param_map.size()); } param_map_const_iterator begin() const { return m_param_map.begin(); } param_map_const_iterator end() const { return m_param_map.end(); } uint find(uint num_keys, const char** ppKeys, crnlib::vector* pIterators, crnlib::vector* pUnmatched_indices) const; void find(const char* pKey, param_map_const_iterator& begin, param_map_const_iterator& end) const; uint get_count(const char* pKey) const; // Returns end() if param cannot be found, or index is out of range. param_map_const_iterator get_param(const char* pKey, uint index) const; bool has_key(const char* pKey) const { return get_param(pKey, 0) != end(); } bool has_value(const char* pKey, uint index) const; uint get_num_values(const char* pKey, uint index) const; bool get_value_as_bool(const char* pKey, uint index = 0, bool def = false) const; int get_value_as_int(const char* pKey, uint index, int def, int l = INT_MIN, int h = INT_MAX, uint value_index = 0) const; float get_value_as_float(const char* pKey, uint index, float def = 0.0f, float l = -math::cNearlyInfinite, float h = math::cNearlyInfinite, uint value_index = 0) const; bool get_value_as_string(const char* pKey, uint index, dynamic_string& value, uint value_index = 0) const; const dynamic_string& get_value_as_string_or_empty(const char* pKey, uint index = 0, uint value_index = 0) const; private: dynamic_string_array m_params; param_map m_param_map; static bool load_string_file(const char* pFilename, dynamic_string_array& strings); }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_comp.cpp000066400000000000000000001416511503722002600216040ustar00rootroot00000000000000// File: crn_comp.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_console.h" #include "crn_comp.h" #include "crn_checksum.h" #define CRNLIB_CREATE_DEBUG_IMAGES 0 #define CRNLIB_ENABLE_DEBUG_MESSAGES 0 namespace crnlib { crn_comp::crn_comp() : m_pParams(NULL) { } crn_comp::~crn_comp() { } bool crn_comp::pack_color_endpoints(crnlib::vector& packed_data, const crnlib::vector& remapping) { crnlib::vector remapped_endpoints(m_color_endpoints.size()); for (uint i = 0; i < m_color_endpoints.size(); i++) remapped_endpoints[remapping[i]] = m_color_endpoints[i]; const uint component_limits[6] = {31, 63, 31, 31, 63, 31}; symbol_histogram hist[2]; hist[0].resize(32); hist[1].resize(64); crnlib::vector residual_syms; residual_syms.reserve(m_color_endpoints.size() * 2 * 3); color_quad_u8 prev[2]; prev[0].clear(); prev[1].clear(); for (uint endpoint_index = 0; endpoint_index < m_color_endpoints.size(); endpoint_index++) { const uint endpoint = remapped_endpoints[endpoint_index]; color_quad_u8 cur[2]; cur[0] = dxt1_block::unpack_color((uint16)(endpoint & 0xFFFF), false); cur[1] = dxt1_block::unpack_color((uint16)((endpoint >> 16) & 0xFFFF), false); for (uint j = 0; j < 2; j++) { for (uint k = 0; k < 3; k++) { int delta = cur[j][k] - prev[j][k]; int sym = delta & component_limits[j * 3 + k]; int table = (k == 1) ? 1 : 0; hist[table].inc_freq(sym); residual_syms.push_back(sym); } } prev[0] = cur[0]; prev[1] = cur[1]; } static_huffman_data_model residual_dm[2]; symbol_codec codec; codec.start_encoding(1024 * 1024); // Transmit residuals for (uint i = 0; i < 2; i++) { if (!residual_dm[i].init(true, hist[i], 15)) return false; if (!codec.encode_transmit_static_huffman_data_model(residual_dm[i], false)) return false; } for (uint i = 0; i < residual_syms.size(); i++) { const uint sym = residual_syms[i]; const uint table = ((i % 3) == 1) ? 1 : 0; codec.encode(sym, residual_dm[table]); } codec.stop_encoding(false); packed_data.swap(codec.get_encoding_buf()); return true; } bool crn_comp::pack_color_endpoints_etc(crnlib::vector& packed_data, const crnlib::vector& remapping) { crnlib::vector remapped_endpoints(m_color_endpoints.size()); for (uint i = 0; i < m_color_endpoints.size(); i++) remapped_endpoints[remapping[i]] = (m_color_endpoints[i] & 0x07000000) | (m_color_endpoints[i] >> 3 & 0x001F1F1F); symbol_histogram hist(32); for (uint32 prev_endpoint = 0, p = 0; p < remapped_endpoints.size(); p++) { for (uint32 _e = prev_endpoint, e = prev_endpoint = remapped_endpoints[p], c = 0; c < 4; c++, _e >>= 8, e >>= 8) hist.inc_freq((e - _e) & 0x1F); } static_huffman_data_model dm; dm.init(true, hist, 15); symbol_codec codec; codec.start_encoding(1024 * 1024); codec.encode_transmit_static_huffman_data_model(dm, false); for (uint32 prev_endpoint = 0, p = 0; p < remapped_endpoints.size(); p++) { for (uint32 _e = prev_endpoint, e = prev_endpoint = remapped_endpoints[p], c = 0; c < 4; c++, _e >>= 8, e >>= 8) codec.encode((e - _e) & 0x1F, dm); } codec.stop_encoding(false); packed_data.swap(codec.get_encoding_buf()); return true; } bool crn_comp::pack_alpha_endpoints(crnlib::vector& packed_data, const crnlib::vector& remapping) { crnlib::vector remapped_endpoints(m_alpha_endpoints.size()); for (uint i = 0; i < m_alpha_endpoints.size(); i++) remapped_endpoints[remapping[i]] = m_alpha_endpoints[i]; symbol_histogram hist; hist.resize(256); crnlib::vector residual_syms; residual_syms.reserve(m_alpha_endpoints.size() * 2 * 3); uint prev[2]; utils::zero_object(prev); for (uint endpoint_index = 0; endpoint_index < m_alpha_endpoints.size(); endpoint_index++) { const uint endpoint = remapped_endpoints[endpoint_index]; uint cur[2]; cur[0] = dxt5_block::unpack_endpoint(endpoint, 0); cur[1] = dxt5_block::unpack_endpoint(endpoint, 1); for (uint j = 0; j < 2; j++) { int delta = cur[j] - prev[j]; int sym = delta & 255; hist.inc_freq(sym); residual_syms.push_back(sym); } prev[0] = cur[0]; prev[1] = cur[1]; } static_huffman_data_model residual_dm; symbol_codec codec; codec.start_encoding(1024 * 1024); // Transmit residuals if (!residual_dm.init(true, hist, 15)) return false; if (!codec.encode_transmit_static_huffman_data_model(residual_dm, false)) return false; for (uint i = 0; i < residual_syms.size(); i++) { const uint sym = residual_syms[i]; codec.encode(sym, residual_dm); } codec.stop_encoding(false); packed_data.swap(codec.get_encoding_buf()); return true; } bool crn_comp::pack_color_selectors(crnlib::vector& packed_data, const crnlib::vector& remapping) { crnlib::vector remapped_selectors(m_color_selectors.size()); for (uint i = 0; i < m_color_selectors.size(); i++) remapped_selectors[remapping[i]] = m_color_selectors[i]; symbol_histogram hist(16); for (uint32 c, selector, prev_selector = 0, i = 0; i < remapped_selectors.size(); i++) { for (selector = prev_selector ^ remapped_selectors[i], prev_selector ^= selector, c = 8; c; c--, selector >>= 4) hist.inc_freq(selector & 0xF); } static_huffman_data_model dm; dm.init(true, hist, 15); symbol_codec codec; codec.start_encoding(1024 * 1024); codec.encode_transmit_static_huffman_data_model(dm, false); for (uint32 c, selector, prev_selector = 0, i = 0; i < remapped_selectors.size(); i++) { for (selector = prev_selector ^ remapped_selectors[i], prev_selector ^= selector, c = 8; c; c--, selector >>= 4) codec.encode(selector & 0xF, dm); } codec.stop_encoding(false); packed_data.swap(codec.get_encoding_buf()); return true; } bool crn_comp::pack_alpha_selectors(crnlib::vector& packed_data, const crnlib::vector& remapping) { crnlib::vector remapped_selectors(m_alpha_selectors.size()); for (uint i = 0; i < m_alpha_selectors.size(); i++) remapped_selectors[remapping[i]] = m_alpha_selectors[i]; symbol_histogram hist(64); for (uint64 c, selector, prev_selector = 0, i = 0; i < remapped_selectors.size(); i++) { for (selector = prev_selector ^ remapped_selectors[i], prev_selector ^= selector, c = 8; c; c--, selector >>= 6) hist.inc_freq(selector & 0x3F); } static_huffman_data_model dm; dm.init(true, hist, 15); symbol_codec codec; codec.start_encoding(1024 * 1024); codec.encode_transmit_static_huffman_data_model(dm, false); for (uint64 c, selector, prev_selector = 0, i = 0; i < remapped_selectors.size(); i++) { for (selector = prev_selector ^ remapped_selectors[i], prev_selector ^= selector, c = 8; c; c--, selector >>= 6) codec.encode(selector & 0x3F, dm); } codec.stop_encoding(false); packed_data.swap(codec.get_encoding_buf()); return true; } bool crn_comp::pack_blocks( uint group, bool clear_histograms, symbol_codec* pCodec, const crnlib::vector* pColor_endpoint_remap, const crnlib::vector* pColor_selector_remap, const crnlib::vector* pAlpha_endpoint_remap, const crnlib::vector* pAlpha_selector_remap ) { if (!pCodec) { m_reference_hist.resize(256); if (clear_histograms) m_reference_hist.set_all(0); if (pColor_endpoint_remap) { m_endpoint_index_hist[0].resize(pColor_endpoint_remap->size()); if (clear_histograms) m_endpoint_index_hist[0].set_all(0); } if (pColor_selector_remap) { m_selector_index_hist[0].resize(pColor_selector_remap->size()); if (clear_histograms) m_selector_index_hist[0].set_all(0); } if (pAlpha_endpoint_remap) { m_endpoint_index_hist[1].resize(pAlpha_endpoint_remap->size()); if (clear_histograms) m_endpoint_index_hist[1].set_all(0); } if (pAlpha_selector_remap) { m_selector_index_hist[1].resize(pAlpha_selector_remap->size()); if (clear_histograms) m_selector_index_hist[1].set_all(0); } } uint endpoint_index[cNumComps] = {}; const crnlib::vector* endpoint_remap[cNumComps] = {}; const crnlib::vector* selector_remap[cNumComps] = {}; for (uint c = 0; c < cNumComps; c++) { if (m_has_comp[c]) { endpoint_remap[c] = c ? pAlpha_endpoint_remap : pColor_endpoint_remap; selector_remap[c] = c ? pAlpha_selector_remap : pColor_selector_remap; } } uint block_width = m_levels[group].block_width; for (uint by = 0, b = m_levels[group].first_block, bEnd = b + m_levels[group].num_blocks; b < bEnd; by++) { for (uint bx = 0; bx < block_width; bx++, b++) { const bool secondary_etc_subblock = m_has_subblocks && bx & 1; if (!(by & 1) && !(bx & 1)) { uint8 reference_group = m_endpoint_indices[b].reference | m_endpoint_indices[b + block_width].reference << 2 | m_endpoint_indices[b + 1].reference << 4 | m_endpoint_indices[b + block_width + 1].reference << 6; if (pCodec) pCodec->encode(reference_group, m_reference_dm); else m_reference_hist.inc_freq(reference_group); } for (uint c = 0, cEnd = secondary_etc_subblock ? cAlpha0 : cNumComps; c < cEnd; c++) { if (endpoint_remap[c]) { uint index = (*endpoint_remap[c])[m_endpoint_indices[b].component[c]]; if (secondary_etc_subblock ? m_endpoint_indices[b].reference : !m_endpoint_indices[b].reference) { int sym = index - endpoint_index[c]; if (sym < 0) sym += endpoint_remap[c]->size(); if (!pCodec) m_endpoint_index_hist[c ? 1 : 0].inc_freq(sym); else pCodec->encode(sym, m_endpoint_index_dm[c ? 1 : 0]); } endpoint_index[c] = index; } } for (uint c = 0, cEnd = secondary_etc_subblock ? 0 : cNumComps; c < cEnd; c++) { if (selector_remap[c]) { uint index = (*selector_remap[c])[m_selector_indices[b].component[c]]; if (!pCodec) m_selector_index_hist[c ? 1 : 0].inc_freq(index); else pCodec->encode(index, m_selector_index_dm[c ? 1 : 0]); } } } } return true; } bool crn_comp::alias_images() { for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) { for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) { const uint width = math::maximum(1U, m_pParams->m_width >> level_index); const uint height = math::maximum(1U, m_pParams->m_height >> level_index); if (!m_pParams->m_pImages[face_index][level_index]) return false; m_images[face_index][level_index].alias((color_quad_u8*)m_pParams->m_pImages[face_index][level_index], width, height); } } image_utils::conversion_type conv_type = image_utils::get_image_conversion_type_from_crn_format((crn_format)m_pParams->m_format); if (conv_type != image_utils::cConversion_Invalid) { for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) { for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) { image_u8 cooked_image(m_images[face_index][level_index]); image_utils::convert_image(cooked_image, conv_type); m_images[face_index][level_index].swap(cooked_image); } } } m_levels.resize(m_pParams->m_levels); m_total_blocks = 0; for (uint level = 0; level < m_pParams->m_levels; level++) { uint blockHeight = ((math::maximum(1U, m_pParams->m_height >> level) + 7) & ~7) >> 2; m_levels[level].block_width = ((math::maximum(1U, m_pParams->m_width >> level) + 7) & ~7) >> (m_has_subblocks ? 1 : 2); m_levels[level].first_block = m_total_blocks; m_levels[level].num_blocks = m_pParams->m_faces * m_levels[level].block_width * blockHeight; m_total_blocks += m_levels[level].num_blocks; } return true; } void crn_comp::clear() { m_pParams = NULL; for (uint f = 0; f < cCRNMaxFaces; f++) for (uint l = 0; l < cCRNMaxLevels; l++) m_images[f][l].clear(); utils::zero_object(m_has_comp); m_has_etc_color_blocks = false; m_has_subblocks = false; m_levels.clear(); m_total_blocks = 0; m_color_endpoints.clear(); m_alpha_endpoints.clear(); m_color_selectors.clear(); m_alpha_selectors.clear(); m_endpoint_indices.clear(); m_selector_indices.clear(); utils::zero_object(m_crn_header); m_comp_data.clear(); m_hvq.clear(); m_reference_hist.clear(); m_reference_dm.clear(); for (uint i = 0; i < 2; i++) { m_endpoint_remaping[i].clear(); m_endpoint_index_hist[i].clear(); m_endpoint_index_dm[i].clear(); m_selector_remaping[i].clear(); m_selector_index_hist[i].clear(); m_selector_index_dm[i].clear(); } for (uint i = 0; i < cCRNMaxLevels; i++) m_packed_blocks[i].clear(); m_packed_data_models.clear(); m_packed_color_endpoints.clear(); m_packed_color_selectors.clear(); m_packed_alpha_endpoints.clear(); m_packed_alpha_selectors.clear(); } bool crn_comp::quantize_images() { dxt_hc::params params; params.m_adaptive_tile_alpha_psnr_derating = m_pParams->m_crn_adaptive_tile_alpha_psnr_derating; params.m_adaptive_tile_color_psnr_derating = m_pParams->m_crn_adaptive_tile_color_psnr_derating; if (m_pParams->m_flags & cCRNCompFlagManualPaletteSizes) { params.m_color_endpoint_codebook_size = math::clamp(m_pParams->m_crn_color_endpoint_palette_size, cCRNMinPaletteSize, cCRNMaxPaletteSize); params.m_color_selector_codebook_size = math::clamp(m_pParams->m_crn_color_selector_palette_size, cCRNMinPaletteSize, cCRNMaxPaletteSize); params.m_alpha_endpoint_codebook_size = math::clamp(m_pParams->m_crn_alpha_endpoint_palette_size, cCRNMinPaletteSize, cCRNMaxPaletteSize); params.m_alpha_selector_codebook_size = math::clamp(m_pParams->m_crn_alpha_selector_palette_size, cCRNMinPaletteSize, cCRNMaxPaletteSize); } else { uint max_codebook_entries = ((m_pParams->m_width + 3) / 4) * ((m_pParams->m_height + 3) / 4); max_codebook_entries = math::clamp(max_codebook_entries, cCRNMinPaletteSize, cCRNMaxPaletteSize); float quality = math::clamp((float)m_pParams->m_quality_level / cCRNMaxQualityLevel, 0.0f, 1.0f); float color_quality_power_mul = 1.0f; float alpha_quality_power_mul = 1.0f; if (m_has_etc_color_blocks) { color_quality_power_mul = m_has_subblocks ? 1.31f : 0.7f; params.m_adaptive_tile_color_psnr_derating = m_has_subblocks ? 5.0f : 2.0f; } if (m_pParams->m_format == cCRNFmtDXT5_CCxY) { color_quality_power_mul = 3.5f; alpha_quality_power_mul = .35f; params.m_adaptive_tile_color_psnr_derating = 5.0f; } else if (m_pParams->m_format == cCRNFmtDXT5) { color_quality_power_mul = .75f; } else if (m_pParams->m_format == cCRNFmtETC2A) { alpha_quality_power_mul = .9f; } float color_endpoint_quality = powf(quality, 1.8f * color_quality_power_mul); float color_selector_quality = powf(quality, 1.65f * color_quality_power_mul); params.m_color_endpoint_codebook_size = math::clamp(math::float_to_uint(.5f + math::lerp(math::maximum(64, cCRNMinPaletteSize), (float)max_codebook_entries, color_endpoint_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize); params.m_color_selector_codebook_size = math::clamp(math::float_to_uint(.5f + math::lerp(math::maximum(96, cCRNMinPaletteSize), (float)max_codebook_entries, color_selector_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize); float alpha_endpoint_quality = powf(quality, 2.1f * alpha_quality_power_mul); float alpha_selector_quality = powf(quality, 1.65f * alpha_quality_power_mul); params.m_alpha_endpoint_codebook_size = math::clamp(math::float_to_uint(.5f + math::lerp(math::maximum(24, cCRNMinPaletteSize), (float)max_codebook_entries, alpha_endpoint_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize); params.m_alpha_selector_codebook_size = math::clamp(math::float_to_uint(.5f + math::lerp(math::maximum(48, cCRNMinPaletteSize), (float)max_codebook_entries, alpha_selector_quality)), cCRNMinPaletteSize, cCRNMaxPaletteSize); } if (m_pParams->m_flags & cCRNCompFlagDebugging) { console::debug("Color endpoints: %u", params.m_color_endpoint_codebook_size); console::debug("Color selectors: %u", params.m_color_selector_codebook_size); console::debug("Alpha endpoints: %u", params.m_alpha_endpoint_codebook_size); console::debug("Alpha selectors: %u", params.m_alpha_selector_codebook_size); } params.m_hierarchical = (m_pParams->m_flags & cCRNCompFlagHierarchical) != 0; params.m_perceptual = (m_pParams->m_flags & cCRNCompFlagPerceptual) != 0; params.m_pProgress_func = m_pParams->m_pProgress_func; params.m_pProgress_func_data = m_pParams->m_pProgress_func_data; switch (m_pParams->m_format) { case cCRNFmtDXT1: { params.m_format = cDXT1; m_has_comp[cColor] = true; break; } case cCRNFmtDXT3: { m_has_comp[cAlpha0] = true; return false; } case cCRNFmtDXT5: { params.m_format = cDXT5; params.m_alpha_component_indices[0] = m_pParams->m_alpha_component; m_has_comp[cColor] = true; m_has_comp[cAlpha0] = true; break; } case cCRNFmtDXT5_CCxY: { params.m_format = cDXT5; params.m_alpha_component_indices[0] = 3; m_has_comp[cColor] = true; m_has_comp[cAlpha0] = true; params.m_perceptual = false; //params.m_adaptive_tile_color_alpha_weighting_ratio = 1.0f; params.m_adaptive_tile_color_alpha_weighting_ratio = 1.5f; break; } case cCRNFmtDXT5_xGBR: case cCRNFmtDXT5_AGBR: case cCRNFmtDXT5_xGxR: { params.m_format = cDXT5; params.m_alpha_component_indices[0] = 3; m_has_comp[cColor] = true; m_has_comp[cAlpha0] = true; params.m_perceptual = false; break; } case cCRNFmtDXN_XY: { params.m_format = cDXN_XY; params.m_alpha_component_indices[0] = 0; params.m_alpha_component_indices[1] = 1; m_has_comp[cAlpha0] = true; m_has_comp[cAlpha1] = true; params.m_perceptual = false; break; } case cCRNFmtDXN_YX: { params.m_format = cDXN_YX; params.m_alpha_component_indices[0] = 1; params.m_alpha_component_indices[1] = 0; m_has_comp[cAlpha0] = true; m_has_comp[cAlpha1] = true; params.m_perceptual = false; break; } case cCRNFmtDXT5A: { params.m_format = cDXT5A; params.m_alpha_component_indices[0] = m_pParams->m_alpha_component; m_has_comp[cAlpha0] = true; params.m_perceptual = false; break; } case cCRNFmtETC1: { params.m_format = cETC1; m_has_comp[cColor] = true; break; } case cCRNFmtETC2: { params.m_format = cETC2; m_has_comp[cColor] = true; break; } case cCRNFmtETC2A: { params.m_format = cETC2A; params.m_alpha_component_indices[0] = m_pParams->m_alpha_component; m_has_comp[cColor] = true; m_has_comp[cAlpha0] = true; break; } case cCRNFmtETC1S: { params.m_format = cETC1S; m_has_comp[cColor] = true; break; } case cCRNFmtETC2AS: { params.m_format = cETC2AS; params.m_alpha_component_indices[0] = m_pParams->m_alpha_component; m_has_comp[cColor] = true; m_has_comp[cAlpha0] = true; break; } default: { return false; } } params.m_debugging = (m_pParams->m_flags & cCRNCompFlagDebugging) != 0; params.m_pTask_pool = &m_task_pool; params.m_num_levels = m_pParams->m_levels; for (uint i = 0; i < m_pParams->m_levels; i++) { params.m_levels[i].m_first_block = m_levels[i].first_block; params.m_levels[i].m_num_blocks = m_levels[i].num_blocks; params.m_levels[i].m_block_width = m_levels[i].block_width; params.m_levels[i].m_weight = math::minimum(12.0f, powf(1.3f, (float)i)); } params.m_num_faces = m_pParams->m_faces; params.m_num_blocks = m_total_blocks; color_quad_u8 (*blocks)[16] = (color_quad_u8(*)[16])crnlib_malloc(params.m_num_blocks * 16 * sizeof(color_quad_u8)); for (uint b = 0, level = 0; level < m_pParams->m_levels; level++) { for (uint face = 0; face < m_pParams->m_faces; face++) { image_u8& image = m_images[face][level]; uint width = image.get_width(); uint height = image.get_height(); uint blockWidth = ((width + 7) & ~7) >> 2; uint blockHeight = ((height + 7) & ~7) >> 2; for (uint by = 0; by < blockHeight; by++) { for (uint y0 = by << 2, bx = 0; bx < blockWidth; bx++, b++) { for (uint t = 0, x0 = bx << 2, dy = 0; dy < 4; dy++) { for (uint y = math::minimum(y0 + dy, height - 1), dx = 0; dx < 4; dx++, t++) blocks[b][t] = image(math::minimum(x0 + dx, width - 1), y); } } } } } bool result = m_hvq.compress(blocks, m_endpoint_indices, m_selector_indices, m_color_endpoints, m_alpha_endpoints, m_color_selectors, m_alpha_selectors, params); crnlib_free(blocks); return result; } struct optimize_color_params { struct unpacked_endpoint { color_quad_u8 low, high; }; const unpacked_endpoint* unpacked_endpoints; const uint* hist; uint16 n; uint16 selected; float weight; struct result { crnlib::vector endpoint_remapping; crnlib::vector packed_endpoints; uint total_bits; } *pResult; }; static void sort_color_endpoints(crnlib::vector& remapping, const optimize_color_params::unpacked_endpoint* unpacked_endpoints, uint16 n) { remapping.resize(n); crnlib::vector endpoints(n); crnlib::vector indices(n); for (uint16 i = 0; i < n; i++) { endpoints[i] = unpacked_endpoints[i]; indices[i] = i; } optimize_color_params::unpacked_endpoint selected_endpoint = {color_quad_u8(0), color_quad_u8(0)}; for (uint16 left = n; left;) { uint16 selected_index = 0; uint min_error = cUINT32_MAX; for (uint16 i = 0; i < left; i++) { optimize_color_params::unpacked_endpoint& endpoint = endpoints[i]; uint error = color::elucidian_distance(endpoint.low, selected_endpoint.low, false) + color::elucidian_distance(endpoint.high, selected_endpoint.high, false); if (error < min_error) { min_error = error; selected_index = i; } } selected_endpoint = endpoints[selected_index]; remapping[indices[selected_index]] = n - left; left--; endpoints[selected_index] = endpoints[left]; indices[selected_index] = indices[left]; } } static void remap_color_endpoints(uint16* remapping, const optimize_color_params::unpacked_endpoint* unpacked_endpoints, const uint* hist, uint16 n, uint16 selected, float weight) { struct Node { uint index, frequency, front_similarity, back_similarity; optimize_color_params::unpacked_endpoint e; Node() { utils::zero_object(*this); } }; crnlib::vector remaining(n); for (uint16 i = 0; i < n; i++) { remaining[i].index = i; remaining[i].e = unpacked_endpoints[i]; } crnlib::vector chosen(n << 1); uint remaining_count = n, chosen_front = n, chosen_back = chosen_front; chosen[chosen_front] = selected; optimize_color_params::unpacked_endpoint front_e = remaining[selected].e, back_e = front_e; bool front_updated = true, back_updated = true; remaining[selected] = remaining[--remaining_count]; const uint* frequency = hist + selected * n; for (uint similarity_base = (uint)(4000 * (1.0f + weight)), frequency_normalizer = 0; remaining_count;) { uint64 best_value = 0; uint best_index = 0; for (uint i = 0; i < remaining_count; i++) { Node& node = remaining[i]; node.frequency += frequency[node.index]; if (front_updated) node.front_similarity = similarity_base - math::minimum(4000, color::elucidian_distance(node.e.low, front_e.low, false) + color::elucidian_distance(node.e.high, front_e.high, false)); if (back_updated) node.back_similarity = similarity_base - math::minimum(4000, color::elucidian_distance(node.e.low, back_e.low, false) + color::elucidian_distance(node.e.high, back_e.high, false)); uint64 value = math::maximum(node.front_similarity, node.back_similarity) * (node.frequency + frequency_normalizer) + 1; if (value > best_value || (value == best_value && node.index < selected)) { best_value = value; best_index = i; selected = node.index; } } frequency = hist + selected * n; uint frequency_front = 0, frequency_back = 0; for (int front = chosen_front, back = chosen_back, scale = back - front; scale > 0; front++, back--, scale -= 2) { frequency_front += scale * frequency[chosen[front]]; frequency_back += scale * frequency[chosen[back]]; } front_updated = back_updated = false; Node& best_node = remaining[best_index]; frequency_normalizer = best_node.frequency << 3; if ((uint64)best_node.front_similarity * frequency_front > (uint64)best_node.back_similarity * frequency_back) { chosen[--chosen_front] = selected; front_e = best_node.e; front_updated = true; } else { chosen[++chosen_back] = selected; back_e = best_node.e; back_updated = true; } best_node = remaining[--remaining_count]; } for (uint16 i = chosen_front; i <= chosen_back; i++) remapping[chosen[i]] = i - chosen_front; } void crn_comp::optimize_color_endpoints_task(uint64 data, void* pData_ptr) { optimize_color_params* pParams = reinterpret_cast(pData_ptr); crnlib::vector& remapping = pParams->pResult->endpoint_remapping; uint16 n = pParams->n; remapping.resize(n); if (data) { remap_color_endpoints(remapping.get_ptr(), pParams->unpacked_endpoints, pParams->hist, n, pParams->selected, pParams->weight); } else { sort_color_endpoints(remapping, pParams->unpacked_endpoints, n); optimize_color_selectors(); } m_has_etc_color_blocks ? pack_color_endpoints_etc(pParams->pResult->packed_endpoints, remapping) : pack_color_endpoints(pParams->pResult->packed_endpoints, remapping); uint total_bits = pParams->pResult->packed_endpoints.size() << 3; crnlib::vector hist(n); for (uint level = 0; level < m_levels.size(); level++) { for (uint endpoint_index = 0, b = m_levels[level].first_block, bEnd = b + m_levels[level].num_blocks; b < bEnd; b++) { uint index = remapping[m_endpoint_indices[b].component[cColor]]; if (m_has_subblocks && b & 1 ? m_endpoint_indices[b].reference : !m_endpoint_indices[b].reference) { int sym = index - endpoint_index; hist[sym < 0 ? sym + n : sym]++; } endpoint_index = index; } } static_huffman_data_model dm; dm.init(true, n, hist.get_ptr(), 16); const uint8* code_sizes = dm.get_code_sizes(); for (uint16 s = 0; s < n; s++) total_bits += hist[s] * code_sizes[s]; symbol_codec codec; codec.start_encoding(64 * 1024); codec.encode_enable_simulation(true); codec.encode_transmit_static_huffman_data_model(dm, false); codec.stop_encoding(false); total_bits += codec.encode_get_total_bits_written(); pParams->pResult->total_bits = total_bits; crnlib_delete(pParams); } void crn_comp::optimize_color_selectors() { crnlib::vector& remapping = m_selector_remaping[cColor]; uint16 n = m_color_selectors.size(); remapping.resize(n); uint8 d[] = {0, 5, 14, 10}; uint8 D4[0x100]; for (uint16 i = 0; i < 0x100; i++) D4[i] = d[(i ^ i >> 4) & 3] + d[(i >> 2 ^ i >> 6) & 3]; uint8 D8[0x10000]; for (uint32 i = 0; i < 0x10000; i++) D8[i] = D4[(i >> 8 & 0xF0) | (i >> 4 & 0xF)] + D4[(i >> 4 & 0xF0) | (i & 0xF)]; crnlib::vector selectors(n); crnlib::vector indices(n); for (uint16 i = 0; i < n; i++) { selectors[i] = m_color_selectors[i]; indices[i] = i; } uint32 selected_selector = 0; for (uint16 left = n; left;) { uint16 selected_index = 0; uint min_error = cUINT32_MAX; for (uint16 i = 0; i < left; i++) { uint32 selector = selectors[i]; uint8 d0 = D8[(selector >> 16 & 0xFF00) | (selected_selector >> 24 & 0xFF)]; uint8 d1 = D8[(selector >> 8 & 0xFF00) | (selected_selector >> 16 & 0xFF)]; uint8 d2 = D8[(selector & 0xFF00) | (selected_selector >> 8 & 0xFF)]; uint8 d3 = D8[(selector << 8 & 0xFF00) | (selected_selector & 0xFF)]; uint error = d0 + d1 + d2 + d3; if (error < min_error) { min_error = error; selected_index = i; } } selected_selector = selectors[selected_index]; remapping[indices[selected_index]] = n - left; left--; selectors[selected_index] = selectors[left]; indices[selected_index] = indices[left]; } pack_color_selectors(m_packed_color_selectors, remapping); } void crn_comp::optimize_color() { uint16 n = m_color_endpoints.size(); crnlib::vector hist(n * n); crnlib::vector sum(n); for (uint i, i_prev = 0, b = 0; b < m_endpoint_indices.size(); b++, i_prev = i) { i = m_endpoint_indices[b].color; if ((m_has_subblocks && b & 1 ? m_endpoint_indices[b].reference : !m_endpoint_indices[b].reference) && i != i_prev) { hist[i * n + i_prev]++; hist[i_prev * n + i]++; sum[i]++; sum[i_prev]++; } } uint16 selected = 0; uint best_sum = 0; for (uint16 i = 0; i < n; i++) { if (best_sum < sum[i]) { best_sum = sum[i]; selected = i; } } crnlib::vector unpacked_endpoints(n); for (uint16 i = 0; i < n; i++) { unpacked_endpoints[i].low.m_u32 = m_has_etc_color_blocks ? m_color_endpoints[i] & 0xFFFFFF : dxt1_block::unpack_color(m_color_endpoints[i] & 0xFFFF, true).m_u32; unpacked_endpoints[i].high.m_u32 = m_has_etc_color_blocks ? m_color_endpoints[i] >> 24 : dxt1_block::unpack_color(m_color_endpoints[i] >> 16, true).m_u32; } optimize_color_params::result remapping_trial[4]; float weights[4] = {0, 0, 1.0f / 6.0f, 0.5f}; for (uint i = 0; i < 4; i++) { optimize_color_params* pParams = crnlib_new(); pParams->unpacked_endpoints = unpacked_endpoints.get_ptr(); pParams->hist = hist.get_ptr(); pParams->n = n; pParams->selected = selected; pParams->weight = weights[i]; pParams->pResult = remapping_trial + i; m_task_pool.queue_object_task(this, &crn_comp::optimize_color_endpoints_task, i, pParams); } m_task_pool.join(); for (uint best_bits = cUINT32_MAX, i = 0; i < 4; i++) { if (remapping_trial[i].total_bits < best_bits) { m_packed_color_endpoints.swap(remapping_trial[i].packed_endpoints); m_endpoint_remaping[cColor].swap(remapping_trial[i].endpoint_remapping); best_bits = remapping_trial[i].total_bits; } } } struct optimize_alpha_params { struct unpacked_endpoint { uint8 low, high; }; const unpacked_endpoint* unpacked_endpoints; const uint* hist; uint16 n; uint16 selected; float weight; struct result { crnlib::vector endpoint_remapping; crnlib::vector packed_endpoints; uint total_bits; } *pResult; }; static void sort_alpha_endpoints(crnlib::vector& remapping, const optimize_alpha_params::unpacked_endpoint* unpacked_endpoints, uint16 n) { remapping.resize(n); crnlib::vector endpoints(n); crnlib::vector indices(n); for (uint16 i = 0; i < n; i++) { endpoints[i] = unpacked_endpoints[i]; indices[i] = i; } optimize_alpha_params::unpacked_endpoint selected_endpoint = {0, 0}; for (uint16 left = n; left;) { uint16 selected_index = 0; uint min_error = cUINT32_MAX; for (uint16 i = 0; i < left; i++) { optimize_alpha_params::unpacked_endpoint& endpoint = endpoints[i]; uint error = math::square(endpoint.low - selected_endpoint.low) + math::square(endpoint.high - selected_endpoint.high); if (error < min_error) { min_error = error; selected_index = i; } } selected_endpoint = endpoints[selected_index]; remapping[indices[selected_index]] = n - left; left--; endpoints[selected_index] = endpoints[left]; indices[selected_index] = indices[left]; } } static void remap_alpha_endpoints(uint16* remapping, const optimize_alpha_params::unpacked_endpoint* unpacked_endpoints, const uint* hist, uint16 n, uint16 selected, float weight) { const uint* frequency = hist + selected * n; crnlib::vector chosen, remaining; crnlib::vector total_frequency(n); chosen.push_back(selected); for (uint16 i = 0; i < n; i++) { if (i != selected) { remaining.push_back(i); total_frequency[i] = frequency[i]; } } for (uint similarity_base = (uint)(1000 * (1.0f + weight)), total_frequency_normalizer = 0; remaining.size();) { const optimize_alpha_params::unpacked_endpoint& e_front = unpacked_endpoints[chosen.front()]; const optimize_alpha_params::unpacked_endpoint& e_back = unpacked_endpoints[chosen.back()]; uint16 selected_index = 0; uint64 best_value = 0, selected_similarity_front = 0, selected_similarity_back = 0; for (uint16 i = 0; i < remaining.size(); i++) { uint remaining_index = remaining[i]; const optimize_alpha_params::unpacked_endpoint& e_remaining = unpacked_endpoints[remaining_index]; uint error_front = math::square(e_remaining.low - e_front.low) + math::square(e_remaining.high - e_front.high); uint error_back = math::square(e_remaining.low - e_back.low) + math::square(e_remaining.high - e_back.high); uint64 similarity_front = similarity_base - math::minimum(error_front, 1000); uint64 similarity_back = similarity_base - math::minimum(error_back, 1000); uint64 value = math::maximum(similarity_front, similarity_back) * (total_frequency[remaining_index] + total_frequency_normalizer) + 1; if (value > best_value) { best_value = value; selected_index = i; selected_similarity_front = similarity_front; selected_similarity_back = similarity_back; } } selected = remaining[selected_index]; frequency = hist + selected * n; total_frequency_normalizer = total_frequency[selected]; uint frequency_front = 0, frequency_back = 0; for (int front = 0, back = chosen.size() - 1, scale = back; scale > 0; front++, back--, scale -= 2) { frequency_front += scale * frequency[chosen[front]]; frequency_back += scale * frequency[chosen[back]]; } if (selected_similarity_front * frequency_front > selected_similarity_back * frequency_back) { chosen.push_front(selected); } else { chosen.push_back(selected); } remaining.erase(remaining.begin() + selected_index); for (uint16 i = 0; i < remaining.size(); i++) total_frequency[remaining[i]] += frequency[remaining[i]]; } for (uint16 i = 0; i < n; i++) remapping[chosen[i]] = i; } void crn_comp::optimize_alpha_endpoints_task(uint64 data, void* pData_ptr) { optimize_alpha_params* pParams = reinterpret_cast(pData_ptr); crnlib::vector& remapping = pParams->pResult->endpoint_remapping; uint16 n = pParams->n; remapping.resize(n); if (data) { remap_alpha_endpoints(remapping.get_ptr(), pParams->unpacked_endpoints, pParams->hist, n, pParams->selected, pParams->weight); } else { sort_alpha_endpoints(remapping, pParams->unpacked_endpoints, n); optimize_alpha_selectors(); } pack_alpha_endpoints(pParams->pResult->packed_endpoints, remapping); uint total_bits = pParams->pResult->packed_endpoints.size() << 3; crnlib::vector hist(n); bool hasAlpha0 = m_has_comp[cAlpha0], hasAlpha1 = m_has_comp[cAlpha1]; for (uint level = 0; level < m_levels.size(); level++) { for (uint alpha0_index = 0, alpha1_index = 0, b = m_levels[level].first_block, bEnd = b + m_levels[level].num_blocks; b < bEnd; b++) { if (hasAlpha0) { uint index = remapping[m_endpoint_indices[b].component[cAlpha0]]; if (!m_endpoint_indices[b].reference) { int sym = index - alpha0_index; hist[sym < 0 ? sym + n : sym]++; } alpha0_index = index; } if (hasAlpha1) { uint index = remapping[m_endpoint_indices[b].component[cAlpha1]]; if (!m_endpoint_indices[b].reference) { int sym = index - alpha1_index; hist[sym < 0 ? sym + n : sym]++; } alpha1_index = index; } } } static_huffman_data_model dm; dm.init(true, n, hist.get_ptr(), 16); const uint8* code_sizes = dm.get_code_sizes(); for (uint16 s = 0; s < n; s++) total_bits += hist[s] * code_sizes[s]; symbol_codec codec; codec.start_encoding(64 * 1024); codec.encode_enable_simulation(true); codec.encode_transmit_static_huffman_data_model(dm, false); codec.stop_encoding(false); total_bits += codec.encode_get_total_bits_written(); pParams->pResult->total_bits = total_bits; crnlib_delete(pParams); } void crn_comp::optimize_alpha_selectors() { crnlib::vector& remapping = m_selector_remaping[cAlpha0]; uint16 n = m_alpha_selectors.size(); remapping.resize(n); uint8 d[] = {0, 2, 3, 3, 5, 5, 4, 4}; uint8 D6[0x1000]; for (uint16 i = 0; i < 0x1000; i++) D6[i] = d[(i ^ i >> 6) & 7] + d[(i >> 3 ^ i >> 9) & 7]; crnlib::vector selectors(n); crnlib::vector indices(n); for (uint16 i = 0; i < n; i++) { selectors[i] = m_alpha_selectors[i]; indices[i] = i; } uint64 selected_selector = 0; for (uint16 left = n; left;) { uint16 selected_index = 0; uint min_error = cUINT32_MAX; for (uint16 i = 0; i < left; i++) { uint error = 0; for (uint64 selector = selectors[i] << 6, delta_selector = selected_selector, j = 0; j < 8; j++, selector >>= 6, delta_selector >>= 6) error += D6[(selector & 0xFC0) | (delta_selector & 0x3F)]; if (error < min_error) { min_error = error; selected_index = i; } } selected_selector = selectors[selected_index]; remapping[indices[selected_index]] = n - left; left--; selectors[selected_index] = selectors[left]; indices[selected_index] = indices[left]; } pack_alpha_selectors(m_packed_alpha_selectors, remapping); } void crn_comp::optimize_alpha() { uint16 n = m_alpha_endpoints.size(); crnlib::vector hist(n * n); crnlib::vector sum(n); bool hasAlpha0 = m_has_comp[cAlpha0], hasAlpha1 = m_has_comp[cAlpha1]; for (uint i0, i1, i0_prev = 0, i1_prev = 0, b = 0; b < m_endpoint_indices.size(); b++, i0_prev = i0, i1_prev = i1) { i0 = m_endpoint_indices[b].alpha0; i1 = m_endpoint_indices[b].alpha1; if (!m_endpoint_indices[b].reference) { if (hasAlpha0 && i0 != i0_prev) { hist[i0 * n + i0_prev]++; hist[i0_prev * n + i0]++; sum[i0]++; sum[i0_prev]++; } if (hasAlpha1 && i1 != i1_prev) { hist[i1 * n + i1_prev]++; hist[i1_prev * n + i1]++; sum[i1]++; sum[i1_prev]++; } } } uint16 selected = 0; uint best_sum = 0; for (uint16 i = 0; i < n; i++) { if (best_sum < sum[i]) { best_sum = sum[i]; selected = i; } } crnlib::vector unpacked_endpoints(n); for (uint16 i = 0; i < n; i++) { unpacked_endpoints[i].low = dxt5_block::unpack_endpoint(m_alpha_endpoints[i], 0); unpacked_endpoints[i].high = dxt5_block::unpack_endpoint(m_alpha_endpoints[i], 1); } optimize_alpha_params::result remapping_trial[4]; float weights[4] = {0, 0, 1.0f / 6.0f, 0.5f}; for (uint i = 0; i < 4; i++) { optimize_alpha_params* pParams = crnlib_new(); pParams->unpacked_endpoints = unpacked_endpoints.get_ptr(); pParams->hist = hist.get_ptr(); pParams->n = n; pParams->selected = selected; pParams->weight = weights[i]; pParams->pResult = remapping_trial + i; m_task_pool.queue_object_task(this, &crn_comp::optimize_alpha_endpoints_task, i, pParams); } m_task_pool.join(); for (uint best_bits = cUINT32_MAX, i = 0; i < 4; i++) { if (remapping_trial[i].total_bits < best_bits) { m_packed_alpha_endpoints.swap(remapping_trial[i].packed_endpoints); m_endpoint_remaping[cAlpha0].swap(remapping_trial[i].endpoint_remapping); best_bits = remapping_trial[i].total_bits; } } } bool crn_comp::pack_data_models() { symbol_codec codec; codec.start_encoding(1024 * 1024); if (!codec.encode_transmit_static_huffman_data_model(m_reference_dm, false)) return false; for (uint i = 0; i < 2; i++) { if (m_endpoint_index_dm[i].get_total_syms()) { if (!codec.encode_transmit_static_huffman_data_model(m_endpoint_index_dm[i], false)) return false; } if (m_selector_index_dm[i].get_total_syms()) { if (!codec.encode_transmit_static_huffman_data_model(m_selector_index_dm[i], false)) return false; } } codec.stop_encoding(false); m_packed_data_models.swap(codec.get_encoding_buf()); return true; } void crn_comp::append_vec(crnlib::vector& a, const void* p, uint size) { if (size) { uint ofs = a.size(); a.resize(ofs + size); memcpy(&a[ofs], p, size); } } void crn_comp::append_vec(crnlib::vector& a, const crnlib::vector& b) { if (!b.empty()) { uint ofs = a.size(); a.resize(ofs + b.size()); memcpy(&a[ofs], &b[0], b.size()); } } bool crn_comp::create_comp_data() { utils::zero_object(m_crn_header); m_crn_header.m_width = static_cast(m_pParams->m_width); m_crn_header.m_height = static_cast(m_pParams->m_height); m_crn_header.m_levels = static_cast(m_pParams->m_levels); m_crn_header.m_faces = static_cast(m_pParams->m_faces); m_crn_header.m_format = static_cast(m_pParams->m_format); m_crn_header.m_userdata0 = m_pParams->m_userdata0; m_crn_header.m_userdata1 = m_pParams->m_userdata1; m_comp_data.clear(); m_comp_data.reserve(2 * 1024 * 1024); append_vec(m_comp_data, &m_crn_header, sizeof(m_crn_header)); // tack on the rest of the variable size m_level_ofs array m_comp_data.resize(m_comp_data.size() + sizeof(m_crn_header.m_level_ofs[0]) * (m_pParams->m_levels - 1)); if (m_packed_color_endpoints.size()) { m_crn_header.m_color_endpoints.m_num = static_cast(m_color_endpoints.size()); m_crn_header.m_color_endpoints.m_size = m_packed_color_endpoints.size(); m_crn_header.m_color_endpoints.m_ofs = m_comp_data.size(); append_vec(m_comp_data, m_packed_color_endpoints); } if (m_packed_color_selectors.size()) { m_crn_header.m_color_selectors.m_num = static_cast(m_color_selectors.size()); m_crn_header.m_color_selectors.m_size = m_packed_color_selectors.size(); m_crn_header.m_color_selectors.m_ofs = m_comp_data.size(); append_vec(m_comp_data, m_packed_color_selectors); } if (m_packed_alpha_endpoints.size()) { m_crn_header.m_alpha_endpoints.m_num = static_cast(m_alpha_endpoints.size()); m_crn_header.m_alpha_endpoints.m_size = m_packed_alpha_endpoints.size(); m_crn_header.m_alpha_endpoints.m_ofs = m_comp_data.size(); append_vec(m_comp_data, m_packed_alpha_endpoints); } if (m_packed_alpha_selectors.size()) { m_crn_header.m_alpha_selectors.m_num = static_cast(m_alpha_selectors.size()); m_crn_header.m_alpha_selectors.m_size = m_packed_alpha_selectors.size(); m_crn_header.m_alpha_selectors.m_ofs = m_comp_data.size(); append_vec(m_comp_data, m_packed_alpha_selectors); } m_crn_header.m_tables_ofs = m_comp_data.size(); m_crn_header.m_tables_size = m_packed_data_models.size(); append_vec(m_comp_data, m_packed_data_models); uint level_ofs[cCRNMaxLevels]; for (uint i = 0; i < m_levels.size(); i++) { level_ofs[i] = m_comp_data.size(); append_vec(m_comp_data, m_packed_blocks[i]); } crnd::crn_header& dst_header = *(crnd::crn_header*)&m_comp_data[0]; // don't change the m_comp_data vector - or dst_header will be invalidated! memcpy(&dst_header, &m_crn_header, sizeof(dst_header)); for (uint i = 0; i < m_levels.size(); i++) dst_header.m_level_ofs[i] = level_ofs[i]; const uint actual_header_size = sizeof(crnd::crn_header) + sizeof(dst_header.m_level_ofs[0]) * (m_levels.size() - 1); dst_header.m_sig = crnd::crn_header::cCRNSigValue; dst_header.m_data_size = m_comp_data.size(); dst_header.m_data_crc16 = crc16(&m_comp_data[actual_header_size], m_comp_data.size() - actual_header_size); dst_header.m_header_size = actual_header_size; dst_header.m_header_crc16 = crc16(&dst_header.m_data_size, actual_header_size - (uint)((uint8*)&dst_header.m_data_size - (uint8*)&dst_header)); return true; } bool crn_comp::update_progress(uint phase_index, uint subphase_index, uint subphase_total) { if (!m_pParams->m_pProgress_func) return true; #if CRNLIB_ENABLE_DEBUG_MESSAGES if (m_pParams->m_flags & cCRNCompFlagDebugging) return true; #endif return (*m_pParams->m_pProgress_func)(phase_index, cTotalCompressionPhases, subphase_index, subphase_total, m_pParams->m_pProgress_func_data) != 0; } bool crn_comp::compress_internal() { if (!alias_images()) return false; if (!quantize_images()) return false; m_reference_hist.clear(); for (uint i = 0; i < 2; i++) { m_endpoint_remaping[i].clear(); m_endpoint_index_hist[i].clear(); m_endpoint_index_dm[i].clear(); m_selector_remaping[i].clear(); m_selector_index_hist[i].clear(); m_selector_index_dm[i].clear(); } if (m_has_comp[cColor]) optimize_color(); if (m_has_comp[cAlpha0]) optimize_alpha(); for (uint pass = 0; pass < 2; pass++) { for (uint level = 0; level < m_levels.size(); level++) { symbol_codec codec; codec.start_encoding(2 * 1024 * 1024); if (!pack_blocks( level, !pass && !level, pass ? &codec : NULL, m_has_comp[cColor] ? &m_endpoint_remaping[cColor] : NULL, m_has_comp[cColor] ? &m_selector_remaping[cColor] : NULL, m_has_comp[cAlpha0] ? &m_endpoint_remaping[cAlpha0] : NULL, m_has_comp[cAlpha0] ? &m_selector_remaping[cAlpha0] : NULL)) { return false; } codec.stop_encoding(false); if (pass) m_packed_blocks[level].swap(codec.get_encoding_buf()); } if (!pass) { m_reference_dm.init(true, m_reference_hist, 16); for (uint i = 0; i < 2; i++) { if (m_endpoint_index_hist[i].size()) m_endpoint_index_dm[i].init(true, m_endpoint_index_hist[i], 16); if (m_selector_index_hist[i].size()) m_selector_index_dm[i].init(true, m_selector_index_hist[i], 16); } } } if (!pack_data_models()) return false; if (!create_comp_data()) return false; if (!update_progress(24, 1, 1)) return false; if (m_pParams->m_flags & cCRNCompFlagDebugging) { crnlib_print_mem_stats(); } return true; } bool crn_comp::compress_pass(const crn_comp_params& params, float* pEffective_bitrate) { clear(); if (pEffective_bitrate) *pEffective_bitrate = 0.0f; m_pParams = ¶ms; m_has_etc_color_blocks = params.m_format == cCRNFmtETC1 || params.m_format == cCRNFmtETC2 || params.m_format == cCRNFmtETC2A || params.m_format == cCRNFmtETC1S || params.m_format == cCRNFmtETC2AS; m_has_subblocks = params.m_format == cCRNFmtETC1 || params.m_format == cCRNFmtETC2 || params.m_format == cCRNFmtETC2A; if ((math::minimum(m_pParams->m_width, m_pParams->m_height) < 1) || (math::maximum(m_pParams->m_width, m_pParams->m_height) > cCRNMaxLevelResolution)) return false; if (!m_task_pool.init(params.m_num_helper_threads)) return false; bool status = compress_internal(); m_task_pool.deinit(); if ((status) && (pEffective_bitrate)) { uint total_pixels = 0; for (uint f = 0; f < m_pParams->m_faces; f++) for (uint l = 0; l < m_pParams->m_levels; l++) total_pixels += m_images[f][l].get_total_pixels(); *pEffective_bitrate = (m_comp_data.size() * 8.0f) / total_pixels; } return status; } void crn_comp::compress_deinit() { } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_comp.h000066400000000000000000000101551503722002600212430ustar00rootroot00000000000000// File: crn_comp.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "../inc/crn_defs.h" #include "../inc/crnlib.h" #include "crn_symbol_codec.h" #include "crn_dxt_hc.h" #include "crn_image.h" #include "crn_image_utils.h" #include "crn_texture_comp.h" namespace crnlib { class crn_comp : public itexture_comp { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(crn_comp); public: crn_comp(); virtual ~crn_comp(); virtual const char* get_ext() const { return "CRN"; } virtual bool compress_init(const crn_comp_params&) { return true; }; virtual bool compress_pass(const crn_comp_params& params, float* pEffective_bitrate); virtual void compress_deinit(); virtual const crnlib::vector& get_comp_data() const { return m_comp_data; } virtual crnlib::vector& get_comp_data() { return m_comp_data; } uint get_comp_data_size() const { return m_comp_data.size(); } const uint8* get_comp_data_ptr() const { return m_comp_data.size() ? &m_comp_data[0] : NULL; } private: task_pool m_task_pool; const crn_comp_params* m_pParams; image_u8 m_images[cCRNMaxFaces][cCRNMaxLevels]; enum comp { cColor, cAlpha0, cAlpha1, cNumComps }; bool m_has_comp[cNumComps]; bool m_has_etc_color_blocks; bool m_has_subblocks; struct level_details { uint first_block; uint num_blocks; uint block_width; }; crnlib::vector m_levels; uint m_total_blocks; crnlib::vector m_color_endpoints; crnlib::vector m_alpha_endpoints; crnlib::vector m_color_selectors; crnlib::vector m_alpha_selectors; crnlib::vector m_endpoint_indices; crnlib::vector m_selector_indices; crnd::crn_header m_crn_header; crnlib::vector m_comp_data; dxt_hc m_hvq; symbol_histogram m_reference_hist; static_huffman_data_model m_reference_dm; crnlib::vector m_endpoint_remaping[2]; symbol_histogram m_endpoint_index_hist[2]; static_huffman_data_model m_endpoint_index_dm[2]; crnlib::vector m_selector_remaping[2]; symbol_histogram m_selector_index_hist[2]; static_huffman_data_model m_selector_index_dm[2]; crnlib::vector m_packed_blocks[cCRNMaxLevels]; crnlib::vector m_packed_data_models; crnlib::vector m_packed_color_endpoints; crnlib::vector m_packed_color_selectors; crnlib::vector m_packed_alpha_endpoints; crnlib::vector m_packed_alpha_selectors; bool pack_color_endpoints(crnlib::vector& packed_data, const crnlib::vector& remapping); bool pack_color_endpoints_etc(crnlib::vector& packed_data, const crnlib::vector& remapping); bool pack_color_selectors(crnlib::vector& packed_data, const crnlib::vector& remapping); bool pack_alpha_endpoints(crnlib::vector& packed_data, const crnlib::vector& remapping); bool pack_alpha_selectors(crnlib::vector& packed_data, const crnlib::vector& remapping); bool pack_blocks( uint group, bool clear_histograms, symbol_codec* pCodec, const crnlib::vector* pColor_endpoint_remap, const crnlib::vector* pColor_selector_remap, const crnlib::vector* pAlpha_endpoint_remap, const crnlib::vector* pAlpha_selector_remap ); bool alias_images(); void clear(); bool quantize_images(); void optimize_color_endpoints_task(uint64 data, void* pData_ptr); void optimize_color_selectors(); void optimize_color(); void optimize_alpha_endpoints_task(uint64 data, void* pData_ptr); void optimize_alpha_selectors(); void optimize_alpha(); bool pack_data_models(); static void append_vec(crnlib::vector& a, const void* p, uint size); static void append_vec(crnlib::vector& a, const crnlib::vector& b); bool create_comp_data(); bool update_progress(uint phase_index, uint subphase_index, uint subphase_total); bool compress_internal(); }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_console.cpp000066400000000000000000000106271503722002600223060ustar00rootroot00000000000000// File: crn_console.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_console.h" #include "crn_data_stream.h" #include "crn_threading.h" namespace crnlib { eConsoleMessageType console::m_default_category = cInfoConsoleMessage; crnlib::vector console::m_output_funcs; bool console::m_crlf = true; bool console::m_prefixes = true; bool console::m_output_disabled; data_stream* console::m_pLog_stream; mutex* console::m_pMutex; uint console::m_num_messages[cCMTTotal]; bool console::m_at_beginning_of_line = true; const uint cConsoleBufSize = 4096; void console::init() { if (!m_pMutex) { m_pMutex = crnlib_new(); } } void console::deinit() { if (m_pMutex) { crnlib_delete(m_pMutex); m_pMutex = NULL; } } void console::disable_crlf() { init(); m_crlf = false; } void console::enable_crlf() { init(); m_crlf = true; } void console::vprintf(eConsoleMessageType type, const char* p, va_list args) { init(); scoped_mutex lock(*m_pMutex); m_num_messages[type]++; char buf[cConsoleBufSize]; crnlib_vsnprintf(buf, cConsoleBufSize, p, args); bool handled = false; if (m_output_funcs.size()) { for (uint i = 0; i < m_output_funcs.size(); i++) if (m_output_funcs[i].m_func(type, buf, m_output_funcs[i].m_pData)) handled = true; } const char* pPrefix = NULL; if ((m_prefixes) && (m_at_beginning_of_line)) { switch (type) { case cDebugConsoleMessage: pPrefix = "Debug: "; break; case cWarningConsoleMessage: pPrefix = "Warning: "; break; case cErrorConsoleMessage: pPrefix = "Error: "; break; default: break; } } if ((!m_output_disabled) && (!handled)) { if (pPrefix) ::printf("%s", pPrefix); ::printf(m_crlf ? "%s\n" : "%s", buf); } uint n = strlen(buf); m_at_beginning_of_line = (m_crlf) || ((n) && (buf[n - 1] == '\n')); if ((type != cProgressConsoleMessage) && (m_pLog_stream)) { // Yes this is bad. dynamic_string tmp_buf(buf); tmp_buf.translate_lf_to_crlf(); m_pLog_stream->printf(m_crlf ? "%s\r\n" : "%s", tmp_buf.get_ptr()); m_pLog_stream->flush(); } } void console::printf(eConsoleMessageType type, const char* p, ...) { va_list args; va_start(args, p); vprintf(type, p, args); va_end(args); } void console::printf(const char* p, ...) { va_list args; va_start(args, p); vprintf(m_default_category, p, args); va_end(args); } void console::set_default_category(eConsoleMessageType category) { init(); m_default_category = category; } eConsoleMessageType console::get_default_category() { init(); return m_default_category; } void console::add_console_output_func(console_output_func pFunc, void* pData) { init(); scoped_mutex lock(*m_pMutex); m_output_funcs.push_back(console_func(pFunc, pData)); } void console::remove_console_output_func(console_output_func pFunc) { init(); scoped_mutex lock(*m_pMutex); for (int i = m_output_funcs.size() - 1; i >= 0; i--) { if (m_output_funcs[i].m_func == pFunc) { m_output_funcs.erase(m_output_funcs.begin() + i); } } if (!m_output_funcs.size()) { m_output_funcs.clear(); } } void console::progress(const char* p, ...) { va_list args; va_start(args, p); vprintf(cProgressConsoleMessage, p, args); va_end(args); } void console::info(const char* p, ...) { va_list args; va_start(args, p); vprintf(cInfoConsoleMessage, p, args); va_end(args); } void console::message(const char* p, ...) { va_list args; va_start(args, p); vprintf(cMessageConsoleMessage, p, args); va_end(args); } void console::cons(const char* p, ...) { va_list args; va_start(args, p); vprintf(cConsoleConsoleMessage, p, args); va_end(args); } void console::debug(const char* p, ...) { va_list args; va_start(args, p); vprintf(cDebugConsoleMessage, p, args); va_end(args); } void console::warning(const char* p, ...) { va_list args; va_start(args, p); vprintf(cWarningConsoleMessage, p, args); va_end(args); } void console::error(const char* p, ...) { va_list args; va_start(args, p); vprintf(cErrorConsoleMessage, p, args); va_end(args); } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_console.h000066400000000000000000000067561503722002600217630ustar00rootroot00000000000000// File: crn_console.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_dynamic_string.h" #ifdef WIN32 #include #include #elif defined(__GNUC__) #include #include #endif namespace crnlib { class dynamic_string; class data_stream; class mutex; enum eConsoleMessageType { cDebugConsoleMessage, // debugging messages cProgressConsoleMessage, // progress messages cInfoConsoleMessage, // ordinary messages cConsoleConsoleMessage, // user console output cMessageConsoleMessage, // high importance messages cWarningConsoleMessage, // warnings cErrorConsoleMessage, // errors cCMTTotal }; typedef bool (*console_output_func)(eConsoleMessageType type, const char* pMsg, void* pData); class console { public: static void init(); static void deinit(); static bool is_initialized() { return m_pMutex != NULL; } static void set_default_category(eConsoleMessageType category); static eConsoleMessageType get_default_category(); static void add_console_output_func(console_output_func pFunc, void* pData); static void remove_console_output_func(console_output_func pFunc); static void printf(const char* p, ...); static void vprintf(eConsoleMessageType type, const char* p, va_list args); static void printf(eConsoleMessageType type, const char* p, ...); static void cons(const char* p, ...); static void debug(const char* p, ...); static void progress(const char* p, ...); static void info(const char* p, ...); static void message(const char* p, ...); static void warning(const char* p, ...); static void error(const char* p, ...); // FIXME: All console state is currently global! static void disable_prefixes(); static void enable_prefixes(); static bool get_prefixes() { return m_prefixes; } static bool get_at_beginning_of_line() { return m_at_beginning_of_line; } static void disable_crlf(); static void enable_crlf(); static bool get_crlf() { return m_crlf; } static void disable_output() { m_output_disabled = true; } static void enable_output() { m_output_disabled = false; } static bool get_output_disabled() { return m_output_disabled; } static void set_log_stream(data_stream* pStream) { m_pLog_stream = pStream; } static data_stream* get_log_stream() { return m_pLog_stream; } static uint get_num_messages(eConsoleMessageType type) { return m_num_messages[type]; } private: static eConsoleMessageType m_default_category; struct console_func { console_func(console_output_func func = NULL, void* pData = NULL) : m_func(func), m_pData(pData) {} console_output_func m_func; void* m_pData; }; static crnlib::vector m_output_funcs; static bool m_crlf, m_prefixes, m_output_disabled; static data_stream* m_pLog_stream; static mutex* m_pMutex; static uint m_num_messages[cCMTTotal]; static bool m_at_beginning_of_line; }; #if defined(WIN32) inline int crn_getch() { return _getch(); } #elif defined(__GNUC__) inline int crn_getch() { struct termios oldt, newt; int ch; tcgetattr(STDIN_FILENO, &oldt); newt = oldt; newt.c_lflag &= ~(ICANON | ECHO); tcsetattr(STDIN_FILENO, TCSANOW, &newt); ch = getchar(); tcsetattr(STDIN_FILENO, TCSANOW, &oldt); return ch; } #else inline int crn_getch() { printf("crn_getch: Unimplemented"); return 0; } #endif } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_core.cpp000066400000000000000000000006021503722002600215640ustar00rootroot00000000000000// File: crn_core.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #if CRNLIB_USE_WIN32_API #include "crn_winhdr.h" #endif namespace crnlib { const char* g_copyright_str = "Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC"; const char* g_sig_str = "C8cfRlaorj0wLtnMSxrBJxTC85rho2L9hUZKHcBL"; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_core.h000066400000000000000000000115271503722002600212410ustar00rootroot00000000000000// File: crn_core.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #if defined(WIN32) && defined(_MSC_VER) #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union #pragma warning(disable : 4127) // conditional expression is constant #pragma warning(disable : 4793) // function compiled as native #pragma warning(disable : 4324) // structure was padded due to __declspec(align()) #endif #if defined(WIN32) && !defined(CRNLIB_ANSI_CPLUSPLUS) // MSVC or MinGW, x86 or x64, Win32 API's for threading and Win32 Interlocked API's or GCC built-ins for atomic ops. #ifdef NDEBUG // Ensure checked iterators are disabled. Note: Be sure anything else that links against this lib also #define's this stuff, or remove this crap! #define _SECURE_SCL 0 #define _HAS_ITERATOR_DEBUGGING 0 #endif #ifndef _DLL // If we're using the DLL form of the run-time libs, we're also going to be enabling exceptions because we'll be building CLR apps. // Otherwise, we disable exceptions for a small speed boost. #define _HAS_EXCEPTIONS 0 #endif #define NOMINMAX #define CRNLIB_USE_WIN32_API 1 #if defined(__MINGW32__) || defined(__MINGW64__) #define CRNLIB_USE_GCC_ATOMIC_BUILTINS 1 #else #define CRNLIB_USE_WIN32_ATOMIC_FUNCTIONS 1 #endif #define CRNLIB_PLATFORM_PC 1 #if defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) #define CRNLIB_PLATFORM_PC_X64 1 #define CRNLIB_64BIT_POINTERS 1 #define CRNLIB_CPU_HAS_64BIT_REGISTERS 1 #define CRNLIB_LITTLE_ENDIAN_CPU 1 #else #define CRNLIB_PLATFORM_PC_X86 1 #define CRNLIB_64BIT_POINTERS 0 #define CRNLIB_CPU_HAS_64BIT_REGISTERS 0 #define CRNLIB_LITTLE_ENDIAN_CPU 1 #endif #define CRNLIB_USE_UNALIGNED_INT_LOADS 1 #define CRNLIB_RESTRICT __restrict #define CRNLIB_FORCE_INLINE __forceinline #if defined(_MSC_VER) || defined(__MINGW32__) || defined(__MINGW64__) #define CRNLIB_USE_MSVC_INTRINSICS 1 #endif #define CRNLIB_INT64_FORMAT_SPECIFIER "%I64i" #define CRNLIB_UINT64_FORMAT_SPECIFIER "%I64u" #define CRNLIB_STDCALL __stdcall #define CRNLIB_MEMORY_IMPORT_BARRIER #define CRNLIB_MEMORY_EXPORT_BARRIER #elif defined(__GNUC__) && !defined(CRNLIB_ANSI_CPLUSPLUS) // GCC x86 or x64, pthreads for threading and GCC built-ins for atomic ops. #define CRNLIB_PLATFORM_PC 1 #if defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) #define CRNLIB_PLATFORM_PC_X64 1 #define CRNLIB_64BIT_POINTERS 1 #define CRNLIB_CPU_HAS_64BIT_REGISTERS 1 #else #define CRNLIB_PLATFORM_PC_X86 1 #define CRNLIB_64BIT_POINTERS 0 #define CRNLIB_CPU_HAS_64BIT_REGISTERS 0 #endif #define CRNLIB_USE_UNALIGNED_INT_LOADS 1 #define CRNLIB_LITTLE_ENDIAN_CPU 1 #define CRNLIB_USE_PTHREADS_API 1 #define CRNLIB_USE_GCC_ATOMIC_BUILTINS 1 #define CRNLIB_RESTRICT #define CRNLIB_FORCE_INLINE inline __attribute__((__always_inline__, __gnu_inline__)) #define CRNLIB_INT64_FORMAT_SPECIFIER "%lli" #define CRNLIB_UINT64_FORMAT_SPECIFIER "%llu" #define CRNLIB_STDCALL #define CRNLIB_MEMORY_IMPORT_BARRIER #define CRNLIB_MEMORY_EXPORT_BARRIER #else // Vanilla ANSI-C/C++ // No threading support, unaligned loads are NOT okay. #if defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) #define CRNLIB_64BIT_POINTERS 1 #define CRNLIB_CPU_HAS_64BIT_REGISTERS 1 #else #define CRNLIB_64BIT_POINTERS 0 #define CRNLIB_CPU_HAS_64BIT_REGISTERS 0 #endif #define CRNLIB_USE_UNALIGNED_INT_LOADS 0 #if __BIG_ENDIAN__ #define CRNLIB_BIG_ENDIAN_CPU 1 #else #define CRNLIB_LITTLE_ENDIAN_CPU 1 #endif #define CRNLIB_USE_GCC_ATOMIC_BUILTINS 0 #define CRNLIB_USE_WIN32_ATOMIC_FUNCTIONS 0 #define CRNLIB_RESTRICT #define CRNLIB_FORCE_INLINE inline #define CRNLIB_INT64_FORMAT_SPECIFIER "%I64i" #define CRNLIB_UINT64_FORMAT_SPECIFIER "%I64u" #define CRNLIB_STDCALL #define CRNLIB_MEMORY_IMPORT_BARRIER #define CRNLIB_MEMORY_EXPORT_BARRIER #endif #define CRNLIB_SLOW_STRING_LEN_CHECKS 1 #include #include #include #include #include #include #include #include #include #include #include #include #ifdef min #undef min #endif #ifdef max #undef max #endif #define CRNLIB_FALSE (0) #define CRNLIB_TRUE (1) #define CRNLIB_MAX_PATH (260) #ifdef _DEBUG #define CRNLIB_BUILD_DEBUG #else #define CRNLIB_BUILD_RELEASE #ifndef NDEBUG #define NDEBUG #endif #ifdef DEBUG #error DEBUG cannot be defined in CRNLIB_BUILD_RELEASE #endif #endif #include "crn_types.h" #include "crn_assert.h" #include "crn_platform.h" #include "crn_helpers.h" #include "crn_traits.h" #include "crn_mem.h" #include "crn_math.h" #include "crn_utils.h" #include "crn_hash.h" #include "crn_vector.h" #include "crn_timer.h" #include "crn_dynamic_string.h" DaemonEngine-crunch-ef4d32f/crnlib/crn_data_stream.cpp000066400000000000000000000046301503722002600231250ustar00rootroot00000000000000// File: crn_data_stream.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_data_stream.h" namespace crnlib { data_stream::data_stream() : m_attribs(0), m_opened(false), m_error(false), m_got_cr(false) { } data_stream::data_stream(const char* pName, uint attribs) : m_name(pName), m_attribs(static_cast(attribs)), m_opened(false), m_error(false), m_got_cr(false) { } uint64 data_stream::skip(uint64 len) { uint64 total_bytes_read = 0; const uint cBufSize = 1024; uint8 buf[cBufSize]; while (len) { const uint64 bytes_to_read = math::minimum(sizeof(buf), len); const uint64 bytes_read = read(buf, static_cast(bytes_to_read)); total_bytes_read += bytes_read; if (bytes_read != bytes_to_read) break; len -= bytes_read; } return total_bytes_read; } bool data_stream::read_line(dynamic_string& str) { str.empty(); for (;;) { const int c = read_byte(); const bool prev_got_cr = m_got_cr; m_got_cr = false; if (c < 0) { if (!str.is_empty()) break; return false; } else if ((26 == c) || (!c)) continue; else if (13 == c) { m_got_cr = true; break; } else if (10 == c) { if (prev_got_cr) continue; break; } str.append_char(static_cast(c)); } return true; } bool data_stream::printf(const char* p, ...) { va_list args; va_start(args, p); dynamic_string buf; buf.format_args(p, args); va_end(args); return write(buf.get_ptr(), buf.get_len() * sizeof(char)) == buf.get_len() * sizeof(char); } bool data_stream::write_line(const dynamic_string& str) { if (!str.is_empty()) return write(str.get_ptr(), str.get_len()) == str.get_len(); return true; } bool data_stream::read_array(vector& buf) { if (buf.size() < get_remaining()) { if (get_remaining() > 1024U * 1024U * 1024U) return false; buf.resize((uint)get_remaining()); } if (!get_remaining()) { buf.resize(0); return true; } return read(&buf[0], buf.size()) == buf.size(); } bool data_stream::write_array(const vector& buf) { if (!buf.empty()) return write(&buf[0], buf.size()) == buf.size(); return true; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_data_stream.h000066400000000000000000000053041503722002600225710ustar00rootroot00000000000000// File: crn_data_stream.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once namespace crnlib { enum data_stream_attribs { cDataStreamReadable = 1, cDataStreamWritable = 2, cDataStreamSeekable = 4 }; const int64 DATA_STREAM_SIZE_UNKNOWN = cINT64_MAX; const int64 DATA_STREAM_SIZE_INFINITE = cUINT64_MAX; class data_stream { data_stream(const data_stream&); data_stream& operator=(const data_stream&); public: data_stream(); data_stream(const char* pName, uint attribs); virtual ~data_stream() {} virtual data_stream* get_parent() { return NULL; } virtual bool close() { m_opened = false; m_error = false; m_got_cr = false; return true; } typedef uint16 attribs_t; inline attribs_t get_attribs() const { return m_attribs; } inline bool is_opened() const { return m_opened; } inline bool is_readable() const { return utils::is_bit_set(m_attribs, cDataStreamReadable); } inline bool is_writable() const { return utils::is_bit_set(m_attribs, cDataStreamWritable); } inline bool is_seekable() const { return utils::is_bit_set(m_attribs, cDataStreamSeekable); } inline bool get_error() const { return m_error; } inline const dynamic_string& get_name() const { return m_name; } inline void set_name(const char* pName) { m_name.set(pName); } virtual uint read(void* pBuf, uint len) = 0; virtual uint64 skip(uint64 len); virtual uint write(const void* pBuf, uint len) = 0; virtual bool flush() = 0; virtual bool is_size_known() const { return true; } // Returns DATA_STREAM_SIZE_UNKNOWN if size hasn't been determined yet, or DATA_STREAM_SIZE_INFINITE for infinite streams. virtual uint64 get_size() = 0; virtual uint64 get_remaining() = 0; virtual uint64 get_ofs() = 0; virtual bool seek(int64 ofs, bool relative) = 0; virtual const void* get_ptr() const { return NULL; } inline int read_byte() { uint8 c; if (read(&c, 1) != 1) return -1; return c; } inline bool write_byte(uint8 c) { return write(&c, 1) == 1; } bool read_line(dynamic_string& str); bool printf(const char* p, ...); bool write_line(const dynamic_string& str); bool write_bom() { uint16 bom = 0xFEFF; return write(&bom, sizeof(bom)) == sizeof(bom); } bool read_array(vector& buf); bool write_array(const vector& buf); protected: dynamic_string m_name; attribs_t m_attribs; bool m_opened : 1; bool m_error : 1; bool m_got_cr : 1; inline void set_error() { m_error = true; } inline void clear_error() { m_error = false; } inline void post_seek() { m_got_cr = false; } }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_data_stream_serializer.h000066400000000000000000000323341503722002600250250ustar00rootroot00000000000000// File: data_stream_serializer.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_data_stream.h" namespace crnlib { // Defaults to little endian mode. class data_stream_serializer { public: data_stream_serializer() : m_pStream(NULL), m_little_endian(true) {} data_stream_serializer(data_stream* pStream) : m_pStream(pStream), m_little_endian(true) {} data_stream_serializer(data_stream& stream) : m_pStream(&stream), m_little_endian(true) {} data_stream_serializer(const data_stream_serializer& other) : m_pStream(other.m_pStream), m_little_endian(other.m_little_endian) {} data_stream_serializer& operator=(const data_stream_serializer& rhs) { m_pStream = rhs.m_pStream; m_little_endian = rhs.m_little_endian; return *this; } data_stream* get_stream() const { return m_pStream; } void set_stream(data_stream* pStream) { m_pStream = pStream; } const dynamic_string& get_name() const { return m_pStream ? m_pStream->get_name() : g_empty_dynamic_string; } bool get_error() { return m_pStream ? m_pStream->get_error() : false; } bool get_little_endian() const { return m_little_endian; } void set_little_endian(bool little_endian) { m_little_endian = little_endian; } bool write(const void* pBuf, uint len) { return m_pStream->write(pBuf, len) == len; } bool read(void* pBuf, uint len) { return m_pStream->read(pBuf, len) == len; } // size = size of each element, count = number of elements, returns actual count of elements written uint write(const void* pBuf, uint size, uint count) { uint actual_size = size * count; if (!actual_size) return 0; uint n = m_pStream->write(pBuf, actual_size); if (n == actual_size) return count; return n / size; } // size = size of each element, count = number of elements, returns actual count of elements read uint read(void* pBuf, uint size, uint count) { uint actual_size = size * count; if (!actual_size) return 0; uint n = m_pStream->read(pBuf, actual_size); if (n == actual_size) return count; return n / size; } bool write_chars(const char* pBuf, uint len) { return write(pBuf, len); } bool read_chars(char* pBuf, uint len) { return read(pBuf, len); } bool skip(uint len) { return m_pStream->skip(len) == len; } template bool write_object(const T& obj) { if (m_little_endian == c_crnlib_little_endian_platform) return write(&obj, sizeof(obj)); else { uint8 buf[sizeof(T)]; uint buf_size = sizeof(T); void* pBuf = buf; utils::write_obj(obj, pBuf, buf_size, m_little_endian); return write(buf, sizeof(T)); } } template bool read_object(T& obj) { if (m_little_endian == c_crnlib_little_endian_platform) return read(&obj, sizeof(obj)); else { uint8 buf[sizeof(T)]; if (!read(buf, sizeof(T))) return false; uint buf_size = sizeof(T); const void* pBuf = buf; utils::read_obj(obj, pBuf, buf_size, m_little_endian); return true; } } template bool write_value(T value) { return write_object(value); } template T read_value(const T& on_error_value = T()) { T result; if (!read_object(result)) result = on_error_value; return result; } template bool write_enum(T e) { int val = static_cast(e); return write_object(val); } template T read_enum() { return static_cast(read_value()); } // Writes uint using a simple variable length code (VLC). bool write_uint_vlc(uint val) { do { uint8 c = static_cast(val) & 0x7F; if (val <= 0x7F) c |= 0x80; if (!write_value(c)) return false; val >>= 7; } while (val); return true; } // Reads uint using a simple variable length code (VLC). bool read_uint_vlc(uint& val) { val = 0; uint shift = 0; for (;;) { if (shift >= 32) return false; uint8 c; if (!read_object(c)) return false; val |= ((c & 0x7F) << shift); shift += 7; if (c & 0x80) break; } return true; } bool write_c_str(const char* p) { uint len = static_cast(strlen(p)); if (!write_uint_vlc(len)) return false; return write_chars(p, len); } bool read_c_str(char* pBuf, uint buf_size) { uint len; if (!read_uint_vlc(len)) return false; if ((len + 1) > buf_size) return false; pBuf[len] = '\0'; return read_chars(pBuf, len); } bool write_string(const dynamic_string& str) { if (!write_uint_vlc(str.get_len())) return false; return write_chars(str.get_ptr(), str.get_len()); } bool read_string(dynamic_string& str) { uint len; if (!read_uint_vlc(len)) return false; if (!str.set_len(len)) return false; if (len) { if (!read_chars(str.get_ptr_raw(), len)) return false; if (memchr(str.get_ptr(), 0, len) != NULL) { str.truncate(0); return false; } } return true; } template bool write_vector(const T& vec) { if (!write_uint_vlc(vec.size())) return false; for (uint i = 0; i < vec.size(); i++) { *this << vec[i]; if (get_error()) return false; } return true; } template bool read_vector(T& vec, uint num_expected = UINT_MAX) { uint size; if (!read_uint_vlc(size)) return false; if ((size * sizeof(T::value_type)) >= 2U * 1024U * 1024U * 1024U) return false; if ((num_expected != UINT_MAX) && (size != num_expected)) return false; vec.resize(size); for (uint i = 0; i < vec.size(); i++) { *this >> vec[i]; if (get_error()) return false; } return true; } bool read_entire_file(crnlib::vector& buf) { return m_pStream->read_array(buf); } bool write_entire_file(const crnlib::vector& buf) { return m_pStream->write_array(buf); } // Got this idea from the Molly Rocket forums. // fmt may contain the characters "1", "2", or "4". bool writef(char* fmt, ...) { va_list v; va_start(v, fmt); while (*fmt) { switch (*fmt++) { case '1': { const uint8 x = static_cast(va_arg(v, uint)); if (!write_value(x)) return false; } case '2': { const uint16 x = static_cast(va_arg(v, uint)); if (!write_value(x)) return false; } case '4': { const uint32 x = static_cast(va_arg(v, uint)); if (!write_value(x)) return false; } case ' ': case ',': { break; } default: { CRNLIB_ASSERT(0); return false; } } } va_end(v); return true; } // Got this idea from the Molly Rocket forums. // fmt may contain the characters "1", "2", or "4". bool readf(char* fmt, ...) { va_list v; va_start(v, fmt); while (*fmt) { switch (*fmt++) { case '1': { uint8* x = va_arg(v, uint8*); CRNLIB_ASSERT(x); if (!read_object(*x)) return false; } case '2': { uint16* x = va_arg(v, uint16*); CRNLIB_ASSERT(x); if (!read_object(*x)) return false; } case '4': { uint32* x = va_arg(v, uint32*); CRNLIB_ASSERT(x); if (!read_object(*x)) return false; } case ' ': case ',': { break; } default: { CRNLIB_ASSERT(0); return false; } } } va_end(v); return true; } private: data_stream* m_pStream; bool m_little_endian; }; // Write operators inline data_stream_serializer& operator<<(data_stream_serializer& serializer, bool val) { serializer.write_value(val); return serializer; } inline data_stream_serializer& operator<<(data_stream_serializer& serializer, int8 val) { serializer.write_value(val); return serializer; } inline data_stream_serializer& operator<<(data_stream_serializer& serializer, uint8 val) { serializer.write_value(val); return serializer; } inline data_stream_serializer& operator<<(data_stream_serializer& serializer, int16 val) { serializer.write_value(val); return serializer; } inline data_stream_serializer& operator<<(data_stream_serializer& serializer, uint16 val) { serializer.write_value(val); return serializer; } inline data_stream_serializer& operator<<(data_stream_serializer& serializer, int32 val) { serializer.write_value(val); return serializer; } inline data_stream_serializer& operator<<(data_stream_serializer& serializer, uint32 val) { serializer.write_uint_vlc(val); return serializer; } inline data_stream_serializer& operator<<(data_stream_serializer& serializer, int64 val) { serializer.write_value(val); return serializer; } inline data_stream_serializer& operator<<(data_stream_serializer& serializer, uint64 val) { serializer.write_value(val); return serializer; } inline data_stream_serializer& operator<<(data_stream_serializer& serializer, long val) { serializer.write_value(val); return serializer; } inline data_stream_serializer& operator<<(data_stream_serializer& serializer, unsigned long val) { serializer.write_value(val); return serializer; } inline data_stream_serializer& operator<<(data_stream_serializer& serializer, float val) { serializer.write_value(val); return serializer; } inline data_stream_serializer& operator<<(data_stream_serializer& serializer, double val) { serializer.write_value(val); return serializer; } inline data_stream_serializer& operator<<(data_stream_serializer& serializer, const char* p) { serializer.write_c_str(p); return serializer; } inline data_stream_serializer& operator<<(data_stream_serializer& serializer, const dynamic_string& str) { serializer.write_string(str); return serializer; } template inline data_stream_serializer& operator<<(data_stream_serializer& serializer, const crnlib::vector& vec) { serializer.write_vector(vec); return serializer; } template inline data_stream_serializer& operator<<(data_stream_serializer& serializer, const T* p) { serializer.write_object(*p); return serializer; } // Read operators inline data_stream_serializer& operator>>(data_stream_serializer& serializer, bool& val) { serializer.read_object(val); return serializer; } inline data_stream_serializer& operator>>(data_stream_serializer& serializer, int8& val) { serializer.read_object(val); return serializer; } inline data_stream_serializer& operator>>(data_stream_serializer& serializer, uint8& val) { serializer.read_object(val); return serializer; } inline data_stream_serializer& operator>>(data_stream_serializer& serializer, int16& val) { serializer.read_object(val); return serializer; } inline data_stream_serializer& operator>>(data_stream_serializer& serializer, uint16& val) { serializer.read_object(val); return serializer; } inline data_stream_serializer& operator>>(data_stream_serializer& serializer, int32& val) { serializer.read_object(val); return serializer; } inline data_stream_serializer& operator>>(data_stream_serializer& serializer, uint32& val) { serializer.read_uint_vlc(val); return serializer; } inline data_stream_serializer& operator>>(data_stream_serializer& serializer, int64& val) { serializer.read_object(val); return serializer; } inline data_stream_serializer& operator>>(data_stream_serializer& serializer, uint64& val) { serializer.read_object(val); return serializer; } inline data_stream_serializer& operator>>(data_stream_serializer& serializer, long& val) { serializer.read_object(val); return serializer; } inline data_stream_serializer& operator>>(data_stream_serializer& serializer, unsigned long& val) { serializer.read_object(val); return serializer; } inline data_stream_serializer& operator>>(data_stream_serializer& serializer, float& val) { serializer.read_object(val); return serializer; } inline data_stream_serializer& operator>>(data_stream_serializer& serializer, double& val) { serializer.read_object(val); return serializer; } inline data_stream_serializer& operator>>(data_stream_serializer& serializer, dynamic_string& str) { serializer.read_string(str); return serializer; } template inline data_stream_serializer& operator>>(data_stream_serializer& serializer, crnlib::vector& vec) { serializer.read_vector(vec); return serializer; } template inline data_stream_serializer& operator>>(data_stream_serializer& serializer, T* p) { serializer.read_object(*p); return serializer; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_dds_comp.cpp000066400000000000000000000172241503722002600224340ustar00rootroot00000000000000// File: crn_dds_comp.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_dds_comp.h" #include "crn_dynamic_stream.h" #include "crn_lzma_codec.h" namespace crnlib { dds_comp::dds_comp() : m_pParams(NULL), m_pixel_fmt(PIXEL_FMT_INVALID), m_pQDXT_state(NULL) { } dds_comp::~dds_comp() { crnlib_delete(m_pQDXT_state); } void dds_comp::clear() { m_src_tex.clear(); m_packed_tex.clear(); m_comp_data.clear(); m_pParams = NULL; m_pixel_fmt = PIXEL_FMT_INVALID; m_task_pool.deinit(); if (m_pQDXT_state) { crnlib_delete(m_pQDXT_state); m_pQDXT_state = NULL; } } bool dds_comp::create_dds_tex(mipmapped_texture& dds_tex) { image_u8 images[cCRNMaxFaces][cCRNMaxLevels]; bool has_alpha = false; for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) { for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) { const uint width = math::maximum(1U, m_pParams->m_width >> level_index); const uint height = math::maximum(1U, m_pParams->m_height >> level_index); if (!m_pParams->m_pImages[face_index][level_index]) return false; images[face_index][level_index].alias((color_quad_u8*)m_pParams->m_pImages[face_index][level_index], width, height); if (!has_alpha) has_alpha = image_utils::has_alpha(images[face_index][level_index]); } } for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) images[face_index][level_index].set_component_valid(3, has_alpha); image_utils::conversion_type conv_type = image_utils::get_image_conversion_type_from_crn_format((crn_format)m_pParams->m_format); if (conv_type != image_utils::cConversion_Invalid) { for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) { for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) { image_u8 cooked_image(images[face_index][level_index]); image_utils::convert_image(cooked_image, conv_type); images[face_index][level_index].swap(cooked_image); } } } face_vec faces(m_pParams->m_faces); for (uint face_index = 0; face_index < m_pParams->m_faces; face_index++) { for (uint level_index = 0; level_index < m_pParams->m_levels; level_index++) { mip_level* pMip = crnlib_new(); image_u8* pImage = crnlib_new(); pImage->swap(images[face_index][level_index]); pMip->assign(pImage); faces[face_index].push_back(pMip); } } dds_tex.assign(faces); #ifdef CRNLIB_BUILD_DEBUG CRNLIB_ASSERT(dds_tex.check()); #endif return true; } static bool progress_callback_func(uint percentage_complete, void* pUser_data_ptr) { const crn_comp_params& params = *(const crn_comp_params*)pUser_data_ptr; return params.m_pProgress_func(0, 1, percentage_complete, 100, params.m_pProgress_func_data) != 0; } static bool progress_callback_func_phase_0(uint percentage_complete, void* pUser_data_ptr) { const crn_comp_params& params = *(const crn_comp_params*)pUser_data_ptr; return params.m_pProgress_func(0, 2, percentage_complete, 100, params.m_pProgress_func_data) != 0; } static bool progress_callback_func_phase_1(uint percentage_complete, void* pUser_data_ptr) { const crn_comp_params& params = *(const crn_comp_params*)pUser_data_ptr; return params.m_pProgress_func(1, 2, percentage_complete, 100, params.m_pProgress_func_data) != 0; } bool dds_comp::convert_to_dxt(const crn_comp_params& params) { if ((params.m_quality_level == cCRNMaxQualityLevel) || (params.m_format == cCRNFmtDXT3)) { m_packed_tex = m_src_tex; if (!m_packed_tex.convert(m_pixel_fmt, false, m_pack_params)) return false; } else { const bool hierarchical = (params.m_flags & cCRNCompFlagHierarchical) != 0; m_q1_params.m_quality_level = params.m_quality_level; m_q1_params.m_hierarchical = hierarchical; m_q5_params.m_quality_level = params.m_quality_level; m_q5_params.m_hierarchical = hierarchical; if (!m_pQDXT_state) { m_pQDXT_state = crnlib_new(m_task_pool); if (params.m_pProgress_func) { m_q1_params.m_pProgress_func = progress_callback_func_phase_0; m_q1_params.m_pProgress_data = (void*)¶ms; m_q5_params.m_pProgress_func = progress_callback_func_phase_0; m_q5_params.m_pProgress_data = (void*)¶ms; } if (!m_src_tex.qdxt_pack_init(*m_pQDXT_state, m_packed_tex, m_q1_params, m_q5_params, m_pixel_fmt, false)) return false; if (params.m_pProgress_func) { m_q1_params.m_pProgress_func = progress_callback_func_phase_1; m_q5_params.m_pProgress_func = progress_callback_func_phase_1; } } else { if (params.m_pProgress_func) { m_q1_params.m_pProgress_func = progress_callback_func; m_q1_params.m_pProgress_data = (void*)¶ms; m_q5_params.m_pProgress_func = progress_callback_func; m_q5_params.m_pProgress_data = (void*)¶ms; } } if (!m_src_tex.qdxt_pack(*m_pQDXT_state, m_packed_tex, m_q1_params, m_q5_params)) return false; } return true; } bool dds_comp::compress_init(const crn_comp_params& params) { clear(); m_pParams = ¶ms; if ((math::minimum(m_pParams->m_width, m_pParams->m_height) < 1) || (math::maximum(m_pParams->m_width, m_pParams->m_height) > cCRNMaxLevelResolution)) return false; if (math::minimum(m_pParams->m_faces, m_pParams->m_levels) < 1) return false; if (!create_dds_tex(m_src_tex)) return false; m_pack_params.init(*m_pParams); if (params.m_pProgress_func) { m_pack_params.m_pProgress_callback = progress_callback_func; m_pack_params.m_pProgress_callback_user_data_ptr = (void*)¶ms; } m_pixel_fmt = pixel_format_helpers::convert_crn_format_to_pixel_format(static_cast(m_pParams->m_format)); if (m_pixel_fmt == PIXEL_FMT_INVALID) return false; if ((m_pixel_fmt == PIXEL_FMT_DXT1) && (m_src_tex.has_alpha()) && (m_pack_params.m_use_both_block_types) && (m_pParams->m_flags & cCRNCompFlagDXT1AForTransparency)) m_pixel_fmt = PIXEL_FMT_DXT1A; if (!m_task_pool.init(m_pParams->m_num_helper_threads)) return false; m_pack_params.m_pTask_pool = &m_task_pool; const bool hierarchical = (params.m_flags & cCRNCompFlagHierarchical) != 0; m_q1_params.init(m_pack_params, params.m_quality_level, hierarchical); m_q5_params.init(m_pack_params, params.m_quality_level, hierarchical); return true; } bool dds_comp::compress_pass(const crn_comp_params& params, float* pEffective_bitrate) { if (pEffective_bitrate) *pEffective_bitrate = 0.0f; if (!m_pParams) return false; if (!convert_to_dxt(params)) return false; dynamic_stream out_stream; out_stream.reserve(512 * 1024); data_stream_serializer serializer(out_stream); if (!m_packed_tex.write_dds(serializer)) return false; out_stream.reserve(0); m_comp_data.swap(out_stream.get_buf()); if (pEffective_bitrate) { lzma_codec lossless_codec; crnlib::vector cmp_tex_bytes; if (lossless_codec.pack(m_comp_data.get_ptr(), m_comp_data.size(), cmp_tex_bytes)) { uint comp_size = cmp_tex_bytes.size(); if (comp_size) { *pEffective_bitrate = (comp_size * 8.0f) / m_src_tex.get_total_pixels_in_all_faces_and_mips(); } } } return true; } void dds_comp::compress_deinit() { clear(); } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_dds_comp.h000066400000000000000000000024001503722002600220670ustar00rootroot00000000000000// File: crn_comp.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_comp.h" #include "crn_mipmapped_texture.h" #include "crn_texture_comp.h" namespace crnlib { class dds_comp : public itexture_comp { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(dds_comp); public: dds_comp(); virtual ~dds_comp(); virtual const char* get_ext() const { return "DDS"; } virtual bool compress_init(const crn_comp_params& params); virtual bool compress_pass(const crn_comp_params& params, float* pEffective_bitrate); virtual void compress_deinit(); virtual const crnlib::vector& get_comp_data() const { return m_comp_data; } virtual crnlib::vector& get_comp_data() { return m_comp_data; } private: mipmapped_texture m_src_tex; mipmapped_texture m_packed_tex; crnlib::vector m_comp_data; const crn_comp_params* m_pParams; pixel_format m_pixel_fmt; dxt_image::pack_params m_pack_params; task_pool m_task_pool; qdxt1_params m_q1_params; qdxt5_params m_q5_params; mipmapped_texture::qdxt_state* m_pQDXT_state; void clear(); bool create_dds_tex(mipmapped_texture& dds_tex); bool convert_to_dxt(const crn_comp_params& params); }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_decomp.cpp000066400000000000000000000003711503722002600221060ustar00rootroot00000000000000// File: crn_decomp.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" // Include the single-file header library with no defines, which brings in the full CRN decompressor. #include "../inc/crn_decomp.h" DaemonEngine-crunch-ef4d32f/crnlib/crn_dxt.cpp000066400000000000000000000253321503722002600214420ustar00rootroot00000000000000// File: crn_dxt.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_dxt.h" #include "crn_dxt1.h" #include "crn_ryg_dxt.hpp" #include "crn_dxt_fast.h" #include "crn_intersect.h" namespace crnlib { const uint8 g_dxt5_from_linear[cDXT5SelectorValues] = {0U, 2U, 3U, 4U, 5U, 6U, 7U, 1U}; const uint8 g_dxt5_to_linear[cDXT5SelectorValues] = {0U, 7U, 1U, 2U, 3U, 4U, 5U, 6U}; const uint8 g_dxt5_alpha6_to_linear[cDXT5SelectorValues] = {0U, 5U, 1U, 2U, 3U, 4U, 0U, 0U}; const uint8 g_dxt1_from_linear[cDXT1SelectorValues] = {0U, 2U, 3U, 1U}; const uint8 g_dxt1_to_linear[cDXT1SelectorValues] = {0U, 3U, 1U, 2U}; const uint8 g_six_alpha_invert_table[cDXT5SelectorValues] = {1, 0, 5, 4, 3, 2, 6, 7}; const uint8 g_eight_alpha_invert_table[cDXT5SelectorValues] = {1, 0, 7, 6, 5, 4, 3, 2}; const char* get_dxt_format_string(dxt_format fmt) { switch (fmt) { case cDXT1: return "DXT1"; case cDXT1A: return "DXT1A"; case cDXT3: return "DXT3"; case cDXT5: return "DXT5"; case cDXT5A: return "DXT5A"; case cDXN_XY: return "DXN_XY"; case cDXN_YX: return "DXN_YX"; case cETC1: return "ETC1"; case cETC2: return "ETC2"; case cETC2A: return "ETC2A"; case cETC1S: return "ETC1S"; case cETC2AS: return "ETC2AS"; default: break; } CRNLIB_ASSERT(false); return "?"; } const char* get_dxt_compressor_name(crn_dxt_compressor_type c) { switch (c) { case cCRNDXTCompressorCRN: return "CRN"; case cCRNDXTCompressorCRNF: return "CRNF"; case cCRNDXTCompressorRYG: return "RYG"; #if CRNLIB_SUPPORT_ATI_COMPRESS case cCRNDXTCompressorATI: return "ATI"; #endif default: break; } CRNLIB_ASSERT(false); return "?"; } uint get_dxt_format_bits_per_pixel(dxt_format fmt) { switch (fmt) { case cDXT1: case cDXT1A: case cDXT5A: case cETC1: case cETC2: case cETC1S: return 4; case cDXT3: case cDXT5: case cDXN_XY: case cDXN_YX: case cETC2A: case cETC2AS: return 8; default: break; } CRNLIB_ASSERT(false); return 0; } bool get_dxt_format_has_alpha(dxt_format fmt) { switch (fmt) { case cDXT1A: case cDXT3: case cDXT5: case cDXT5A: case cETC2A: case cETC2AS: return true; default: break; } return false; } uint16 dxt1_block::pack_color(const color_quad_u8& color, bool scaled, uint bias) { uint r = color.r; uint g = color.g; uint b = color.b; if (scaled) { r = (r * 31U + bias) / 255U; g = (g * 63U + bias) / 255U; b = (b * 31U + bias) / 255U; } r = math::minimum(r, 31U); g = math::minimum(g, 63U); b = math::minimum(b, 31U); return static_cast(b | (g << 5U) | (r << 11U)); } uint16 dxt1_block::pack_color(uint r, uint g, uint b, bool scaled, uint bias) { return pack_color(color_quad_u8(r, g, b, 0), scaled, bias); } color_quad_u8 dxt1_block::unpack_color(uint16 packed_color, bool scaled, uint alpha) { uint b = packed_color & 31U; uint g = (packed_color >> 5U) & 63U; uint r = (packed_color >> 11U) & 31U; if (scaled) { b = (b << 3U) | (b >> 2U); g = (g << 2U) | (g >> 4U); r = (r << 3U) | (r >> 2U); } return color_quad_u8(cNoClamp, r, g, b, math::minimum(alpha, 255U)); } void dxt1_block::unpack_color(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled) { color_quad_u8 c(unpack_color(packed_color, scaled, 0)); r = c.r; g = c.g; b = c.b; } void dxt1_block::get_block_colors_NV5x(color_quad_u8* pDst, uint16 packed_col0, uint16 packed_col1, bool color4) { color_quad_u8 col0(unpack_color(packed_col0, false)); color_quad_u8 col1(unpack_color(packed_col1, false)); pDst[0].r = (3 * col0.r * 22) / 8; pDst[0].b = (3 * col0.b * 22) / 8; pDst[0].g = (col0.g << 2) | (col0.g >> 4); pDst[0].a = 0xFF; pDst[1].r = (3 * col1.r * 22) / 8; pDst[1].g = (col1.g << 2) | (col1.g >> 4); pDst[1].b = (3 * col1.b * 22) / 8; pDst[1].a = 0xFF; int gdiff = pDst[1].g - pDst[0].g; if (color4) //(packed_col0 > packed_col1) { pDst[2].r = static_cast(((2 * col0.r + col1.r) * 22) / 8); pDst[2].g = static_cast((256 * pDst[0].g + gdiff / 4 + 128 + gdiff * 80) / 256); pDst[2].b = static_cast(((2 * col0.b + col1.b) * 22) / 8); pDst[2].a = 0xFF; pDst[3].r = static_cast(((2 * col1.r + col0.r) * 22) / 8); pDst[3].g = static_cast((256 * pDst[1].g - gdiff / 4 + 128 - gdiff * 80) / 256); pDst[3].b = static_cast(((2 * col1.b + col0.b) * 22) / 8); pDst[3].a = 0xFF; } else { pDst[2].r = static_cast(((col0.r + col1.r) * 33) / 8); pDst[2].g = static_cast((256 * pDst[0].g + gdiff / 4 + 128 + gdiff * 128) / 256); pDst[2].b = static_cast(((col0.b + col1.b) * 33) / 8); pDst[2].a = 0xFF; pDst[3].r = 0x00; pDst[3].g = 0x00; pDst[3].b = 0x00; pDst[3].a = 0x00; } } uint dxt1_block::get_block_colors3(color_quad_u8* pDst, uint16 color0, uint16 color1) { color_quad_u8 c0(unpack_color(color0, true)); color_quad_u8 c1(unpack_color(color1, true)); pDst[0] = c0; pDst[1] = c1; pDst[2].set_noclamp_rgba((c0.r + c1.r) >> 1U, (c0.g + c1.g) >> 1U, (c0.b + c1.b) >> 1U, 255U); pDst[3].set_noclamp_rgba(0, 0, 0, 0); return 3; } uint dxt1_block::get_block_colors4(color_quad_u8* pDst, uint16 color0, uint16 color1) { color_quad_u8 c0(unpack_color(color0, true)); color_quad_u8 c1(unpack_color(color1, true)); pDst[0] = c0; pDst[1] = c1; // The compiler changes the div3 into a mul by recip+shift. pDst[2].set_noclamp_rgba((c0.r * 2 + c1.r) / 3, (c0.g * 2 + c1.g) / 3, (c0.b * 2 + c1.b) / 3, 255U); pDst[3].set_noclamp_rgba((c1.r * 2 + c0.r) / 3, (c1.g * 2 + c0.g) / 3, (c1.b * 2 + c0.b) / 3, 255U); return 4; } uint dxt1_block::get_block_colors3_round(color_quad_u8* pDst, uint16 color0, uint16 color1) { color_quad_u8 c0(unpack_color(color0, true)); color_quad_u8 c1(unpack_color(color1, true)); pDst[0] = c0; pDst[1] = c1; pDst[2].set_noclamp_rgba((c0.r + c1.r + 1) >> 1U, (c0.g + c1.g + 1) >> 1U, (c0.b + c1.b + 1) >> 1U, 255U); pDst[3].set_noclamp_rgba(0, 0, 0, 0); return 3; } uint dxt1_block::get_block_colors4_round(color_quad_u8* pDst, uint16 color0, uint16 color1) { color_quad_u8 c0(unpack_color(color0, true)); color_quad_u8 c1(unpack_color(color1, true)); pDst[0] = c0; pDst[1] = c1; // 12/14/08 - Supposed to round according to DX docs, but this conflicts with the OpenGL S3TC spec. ? // The compiler changes the div3 into a mul by recip+shift. pDst[2].set_noclamp_rgba((c0.r * 2 + c1.r + 1) / 3, (c0.g * 2 + c1.g + 1) / 3, (c0.b * 2 + c1.b + 1) / 3, 255U); pDst[3].set_noclamp_rgba((c1.r * 2 + c0.r + 1) / 3, (c1.g * 2 + c0.g + 1) / 3, (c1.b * 2 + c0.b + 1) / 3, 255U); return 4; } uint dxt1_block::get_block_colors(color_quad_u8* pDst, uint16 color0, uint16 color1) { if (color0 > color1) return get_block_colors4(pDst, color0, color1); else return get_block_colors3(pDst, color0, color1); } uint dxt1_block::get_block_colors_round(color_quad_u8* pDst, uint16 color0, uint16 color1) { if (color0 > color1) return get_block_colors4_round(pDst, color0, color1); else return get_block_colors3_round(pDst, color0, color1); } color_quad_u8 dxt1_block::unpack_endpoint(uint32 endpoints, uint index, bool scaled, uint alpha) { CRNLIB_ASSERT(index < 2); return unpack_color(static_cast((endpoints >> (index * 16U)) & 0xFFFFU), scaled, alpha); } uint dxt1_block::pack_endpoints(uint lo, uint hi) { CRNLIB_ASSERT((lo <= 0xFFFFU) && (hi <= 0xFFFFU)); return lo | (hi << 16U); } void dxt3_block::set_alpha(uint x, uint y, uint value, bool scaled) { CRNLIB_ASSERT((x < cDXTBlockSize) && (y < cDXTBlockSize)); if (scaled) { CRNLIB_ASSERT(value <= 0xFF); value = (value * 15U + 128U) / 255U; } else { CRNLIB_ASSERT(value <= 0xF); } uint ofs = (y << 1U) + (x >> 1U); uint c = m_alpha[ofs]; c &= ~(0xF << ((x & 1U) << 2U)); c |= (value << ((x & 1U) << 2U)); m_alpha[ofs] = static_cast(c); } uint dxt3_block::get_alpha(uint x, uint y, bool scaled) const { CRNLIB_ASSERT((x < cDXTBlockSize) && (y < cDXTBlockSize)); uint value = m_alpha[(y << 1U) + (x >> 1U)]; if (x & 1) value >>= 4; value &= 0xF; if (scaled) value = (value << 4U) | value; return value; } uint dxt5_block::get_block_values6(color_quad_u8* pDst, uint l, uint h) { pDst[0].a = static_cast(l); pDst[1].a = static_cast(h); pDst[2].a = static_cast((l * 4 + h) / 5); pDst[3].a = static_cast((l * 3 + h * 2) / 5); pDst[4].a = static_cast((l * 2 + h * 3) / 5); pDst[5].a = static_cast((l + h * 4) / 5); pDst[6].a = 0; pDst[7].a = 255; return 6; } uint dxt5_block::get_block_values8(color_quad_u8* pDst, uint l, uint h) { pDst[0].a = static_cast(l); pDst[1].a = static_cast(h); pDst[2].a = static_cast((l * 6 + h) / 7); pDst[3].a = static_cast((l * 5 + h * 2) / 7); pDst[4].a = static_cast((l * 4 + h * 3) / 7); pDst[5].a = static_cast((l * 3 + h * 4) / 7); pDst[6].a = static_cast((l * 2 + h * 5) / 7); pDst[7].a = static_cast((l + h * 6) / 7); return 8; } uint dxt5_block::get_block_values(color_quad_u8* pDst, uint l, uint h) { if (l > h) return get_block_values8(pDst, l, h); else return get_block_values6(pDst, l, h); } uint dxt5_block::get_block_values6(uint* pDst, uint l, uint h) { pDst[0] = l; pDst[1] = h; pDst[2] = (l * 4 + h) / 5; pDst[3] = (l * 3 + h * 2) / 5; pDst[4] = (l * 2 + h * 3) / 5; pDst[5] = (l + h * 4) / 5; pDst[6] = 0; pDst[7] = 255; return 6; } uint dxt5_block::get_block_values8(uint* pDst, uint l, uint h) { pDst[0] = l; pDst[1] = h; pDst[2] = (l * 6 + h) / 7; pDst[3] = (l * 5 + h * 2) / 7; pDst[4] = (l * 4 + h * 3) / 7; pDst[5] = (l * 3 + h * 4) / 7; pDst[6] = (l * 2 + h * 5) / 7; pDst[7] = (l + h * 6) / 7; return 8; } uint dxt5_block::unpack_endpoint(uint packed, uint index) { CRNLIB_ASSERT(index < 2); return (packed >> (8 * index)) & 0xFF; } uint dxt5_block::pack_endpoints(uint lo, uint hi) { CRNLIB_ASSERT((lo <= 0xFF) && (hi <= 0xFF)); return lo | (hi << 8U); } uint dxt5_block::get_block_values(uint* pDst, uint l, uint h) { if (l > h) return get_block_values8(pDst, l, h); else return get_block_values6(pDst, l, h); } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_dxt.h000066400000000000000000000235171503722002600211120ustar00rootroot00000000000000// File: crn_dxt.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "../inc/crnlib.h" #include "crn_color.h" #include "crn_vec.h" #include "crn_rand.h" #include "crn_sparse_bit_array.h" #include "crn_hash_map.h" #include #define CRNLIB_DXT_ALT_ROUNDING 1 namespace crnlib { enum dxt_constants { cDXT1BytesPerBlock = 8U, cDXT5NBytesPerBlock = 16U, cDXT5SelectorBits = 3U, cDXT5SelectorValues = 1U << cDXT5SelectorBits, cDXT5SelectorMask = cDXT5SelectorValues - 1U, cDXT1SelectorBits = 2U, cDXT1SelectorValues = 1U << cDXT1SelectorBits, cDXT1SelectorMask = cDXT1SelectorValues - 1U, cDXTBlockShift = 2U, cDXTBlockSize = 1U << cDXTBlockShift }; enum dxt_format { cDXTInvalid = -1, // cDXT1/1A must appear first! cDXT1, cDXT1A, cDXT3, cDXT5, cDXT5A, cDXN_XY, // inverted relative to standard ATI2, 360's DXN cDXN_YX, // standard ATI2, cETC1, cETC2, cETC2A, cETC1S, cETC2AS, }; const float cDXT1MaxLinearValue = 3.0f; const float cDXT1InvMaxLinearValue = 1.0f / 3.0f; const float cDXT5MaxLinearValue = 7.0f; const float cDXT5InvMaxLinearValue = 1.0f / 7.0f; // Converts DXT1 raw color selector index to a linear value. extern const uint8 g_dxt1_to_linear[cDXT1SelectorValues]; // Converts DXT5 raw alpha selector index to a linear value. extern const uint8 g_dxt5_to_linear[cDXT5SelectorValues]; // Converts DXT1 linear color selector index to a raw value (inverse of g_dxt1_to_linear). extern const uint8 g_dxt1_from_linear[cDXT1SelectorValues]; // Converts DXT5 linear alpha selector index to a raw value (inverse of g_dxt5_to_linear). extern const uint8 g_dxt5_from_linear[cDXT5SelectorValues]; extern const uint8 g_dxt5_alpha6_to_linear[cDXT5SelectorValues]; extern const uint8 g_six_alpha_invert_table[cDXT5SelectorValues]; extern const uint8 g_eight_alpha_invert_table[cDXT5SelectorValues]; const char* get_dxt_format_string(dxt_format fmt); uint get_dxt_format_bits_per_pixel(dxt_format fmt); bool get_dxt_format_has_alpha(dxt_format fmt); const char* get_dxt_quality_string(crn_dxt_quality q); const char* get_dxt_compressor_name(crn_dxt_compressor_type c); struct dxt1_block { uint8 m_low_color[2]; uint8 m_high_color[2]; enum { cNumSelectorBytes = 4 }; uint8 m_selectors[cNumSelectorBytes]; inline void clear() { utils::zero_this(this); } // These methods assume the in-memory rep is in LE byte order. inline uint get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); } inline uint get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); } inline void set_low_color(uint16 c) { m_low_color[0] = static_cast(c & 0xFF); m_low_color[1] = static_cast((c >> 8) & 0xFF); } inline void set_high_color(uint16 c) { m_high_color[0] = static_cast(c & 0xFF); m_high_color[1] = static_cast((c >> 8) & 0xFF); } inline bool is_constant_color_block() const { return get_low_color() == get_high_color(); } inline bool is_alpha_block() const { return get_low_color() <= get_high_color(); } inline bool is_non_alpha_block() const { return !is_alpha_block(); } inline uint get_selector(uint x, uint y) const { CRNLIB_ASSERT((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask; } inline void set_selector(uint x, uint y, uint val) { CRNLIB_ASSERT((x < 4U) && (y < 4U) && (val < 4U)); m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); m_selectors[y] |= (val << (x * cDXT1SelectorBits)); } inline void flip_x(uint w = 4, uint h = 4) { for (uint x = 0; x < (w / 2); x++) { for (uint y = 0; y < h; y++) { const uint c = get_selector(x, y); set_selector(x, y, get_selector((w - 1) - x, y)); set_selector((w - 1) - x, y, c); } } } inline void flip_y(uint w = 4, uint h = 4) { for (uint y = 0; y < (h / 2); y++) { for (uint x = 0; x < w; x++) { const uint c = get_selector(x, y); set_selector(x, y, get_selector(x, (h - 1) - y)); set_selector(x, (h - 1) - y, c); } } } static uint16 pack_color(const color_quad_u8& color, bool scaled, uint bias = 127U); static uint16 pack_color(uint r, uint g, uint b, bool scaled, uint bias = 127U); static color_quad_u8 unpack_color(uint16 packed_color, bool scaled, uint alpha = 255U); static void unpack_color(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled); static uint get_block_colors3(color_quad_u8* pDst, uint16 color0, uint16 color1); static uint get_block_colors3_round(color_quad_u8* pDst, uint16 color0, uint16 color1); static uint get_block_colors4(color_quad_u8* pDst, uint16 color0, uint16 color1); static uint get_block_colors4_round(color_quad_u8* pDst, uint16 color0, uint16 color1); // pDst must point to an array at least cDXT1SelectorValues long. static uint get_block_colors(color_quad_u8* pDst, uint16 color0, uint16 color1); static uint get_block_colors_round(color_quad_u8* pDst, uint16 color0, uint16 color1); static color_quad_u8 unpack_endpoint(uint32 endpoints, uint index, bool scaled, uint alpha = 255U); static uint pack_endpoints(uint lo, uint hi); static void get_block_colors_NV5x(color_quad_u8* pDst, uint16 packed_col0, uint16 packed_col1, bool color4); }; CRNLIB_DEFINE_BITWISE_COPYABLE(dxt1_block); struct dxt3_block { enum { cNumAlphaBytes = 8 }; uint8 m_alpha[cNumAlphaBytes]; void set_alpha(uint x, uint y, uint value, bool scaled); uint get_alpha(uint x, uint y, bool scaled) const; inline void flip_x(uint w = 4, uint h = 4) { for (uint x = 0; x < (w / 2); x++) { for (uint y = 0; y < h; y++) { const uint c = get_alpha(x, y, false); set_alpha(x, y, get_alpha((w - 1) - x, y, false), false); set_alpha((w - 1) - x, y, c, false); } } } inline void flip_y(uint w = 4, uint h = 4) { for (uint y = 0; y < (h / 2); y++) { for (uint x = 0; x < w; x++) { const uint c = get_alpha(x, y, false); set_alpha(x, y, get_alpha(x, (h - 1) - y, false), false); set_alpha(x, (h - 1) - y, c, false); } } } }; CRNLIB_DEFINE_BITWISE_COPYABLE(dxt3_block); struct dxt5_block { uint8 m_endpoints[2]; enum { cNumSelectorBytes = 6 }; uint8 m_selectors[cNumSelectorBytes]; inline void clear() { utils::zero_this(this); } inline uint get_low_alpha() const { return m_endpoints[0]; } inline uint get_high_alpha() const { return m_endpoints[1]; } inline void set_low_alpha(uint i) { CRNLIB_ASSERT(i <= cUINT8_MAX); m_endpoints[0] = static_cast(i); } inline void set_high_alpha(uint i) { CRNLIB_ASSERT(i <= cUINT8_MAX); m_endpoints[1] = static_cast(i); } inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); } uint get_endpoints_as_word() const { return m_endpoints[0] | (m_endpoints[1] << 8); } uint get_selectors_as_word(uint index) { CRNLIB_ASSERT(index < 3); return m_selectors[index * 2] | (m_selectors[index * 2 + 1] << 8); } inline uint get_selector(uint x, uint y) const { CRNLIB_ASSERT((x < 4U) && (y < 4U)); uint selector_index = (y * 4) + x; uint bit_index = selector_index * cDXT5SelectorBits; uint byte_index = bit_index >> 3; uint bit_ofs = bit_index & 7; uint v = m_selectors[byte_index]; if (byte_index < (cNumSelectorBytes - 1)) v |= (m_selectors[byte_index + 1] << 8); return (v >> bit_ofs) & 7; } inline void set_selector(uint x, uint y, uint val) { CRNLIB_ASSERT((x < 4U) && (y < 4U) && (val < 8U)); uint selector_index = (y * 4) + x; uint bit_index = selector_index * cDXT5SelectorBits; uint byte_index = bit_index >> 3; uint bit_ofs = bit_index & 7; uint v = m_selectors[byte_index]; if (byte_index < (cNumSelectorBytes - 1)) v |= (m_selectors[byte_index + 1] << 8); v &= (~(7 << bit_ofs)); v |= (val << bit_ofs); m_selectors[byte_index] = static_cast(v); if (byte_index < (cNumSelectorBytes - 1)) m_selectors[byte_index + 1] = static_cast(v >> 8); } inline void flip_x(uint w = 4, uint h = 4) { for (uint x = 0; x < (w / 2); x++) { for (uint y = 0; y < h; y++) { const uint c = get_selector(x, y); set_selector(x, y, get_selector((w - 1) - x, y)); set_selector((w - 1) - x, y, c); } } } inline void flip_y(uint w = 4, uint h = 4) { for (uint y = 0; y < (h / 2); y++) { for (uint x = 0; x < w; x++) { const uint c = get_selector(x, y); set_selector(x, y, get_selector(x, (h - 1) - y)); set_selector(x, (h - 1) - y, c); } } } enum { cMaxSelectorValues = 8 }; // Results written to alpha channel. static uint get_block_values6(color_quad_u8* pDst, uint l, uint h); static uint get_block_values8(color_quad_u8* pDst, uint l, uint h); static uint get_block_values(color_quad_u8* pDst, uint l, uint h); static uint get_block_values6(uint* pDst, uint l, uint h); static uint get_block_values8(uint* pDst, uint l, uint h); // pDst must point to an array at least cDXT5SelectorValues long. static uint get_block_values(uint* pDst, uint l, uint h); static uint unpack_endpoint(uint packed, uint index); static uint pack_endpoints(uint lo, uint hi); }; CRNLIB_DEFINE_BITWISE_COPYABLE(dxt5_block); struct dxt_pixel_block { color_quad_u8 m_pixels[cDXTBlockSize][cDXTBlockSize]; // [y][x] inline void clear() { utils::zero_object(*this); } }; CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_pixel_block); } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_dxt1.cpp000066400000000000000000002201751503722002600215250ustar00rootroot00000000000000// File: crn_dxt1.cpp // See Copyright Notice and license at the end of inc/crnlib.h // // Notes: // This class is not optimized for performance on small blocks, unlike typical DXT1 compressors. It's optimized for scalability and quality: // - Very high quality in terms of avg. RMSE or Luma RMSE. Goal is to always match or beat every other known offline DXTc compressor: ATI_Compress, squish, NVidia texture tools, nvdxt.exe, etc. // - Reasonable scalability and stability with hundreds to many thousands of input colors (including inputs with many thousands of equal/nearly equal colors). // - Any quality optimization which results in even a tiny improvement is worth it -- as long as it's either a constant or linear slowdown. // Tiny quality improvements can be extremely valuable in large clusters. // - Quality should scale well vs. CPU time cost, i.e. the more time you spend the higher the quality. #include "crn_core.h" #include "crn_dxt1.h" #include "crn_ryg_dxt.hpp" #include "crn_dxt_fast.h" #include "crn_intersect.h" #include "crn_vec_interval.h" namespace crnlib { //----------------------------------------------------------------------------------------------------------------------------------------- static const int16 g_fast_probe_table[] = {0, 1, 2, 3}; static const uint cFastProbeTableSize = sizeof(g_fast_probe_table) / sizeof(g_fast_probe_table[0]); static const int16 g_normal_probe_table[] = {0, 1, 3, 5, 7}; static const uint cNormalProbeTableSize = sizeof(g_normal_probe_table) / sizeof(g_normal_probe_table[0]); static const int16 g_better_probe_table[] = {0, 1, 2, 3, 5, 9, 15, 19, 27, 43}; static const uint cBetterProbeTableSize = sizeof(g_better_probe_table) / sizeof(g_better_probe_table[0]); static const int16 g_uber_probe_table[] = {0, 1, 2, 3, 5, 7, 9, 10, 13, 15, 19, 27, 43, 59, 91}; static const uint cUberProbeTableSize = sizeof(g_uber_probe_table) / sizeof(g_uber_probe_table[0]); struct unique_color_projection { unique_color color; int64 projection; }; static struct { bool operator()(unique_color_projection a, unique_color_projection b) const { return a.projection < b.projection; } } g_unique_color_projection_sort; //----------------------------------------------------------------------------------------------------------------------------------------- dxt1_endpoint_optimizer::dxt1_endpoint_optimizer() : m_pParams(NULL), m_pResults(NULL), m_perceptual(false), m_num_prev_results(0) { m_low_coords.reserve(512); m_high_coords.reserve(512); m_unique_colors.reserve(512); m_temp_unique_colors.reserve(512); m_unique_packed_colors.reserve(512); m_norm_unique_colors.reserve(512); m_norm_unique_colors_weighted.reserve(512); m_lo_cells.reserve(128); m_hi_cells.reserve(128); } // All selectors are equal. Try compressing as if it was solid, using the block's average color, using ryg's optimal single color compression tables. bool dxt1_endpoint_optimizer::try_average_block_as_solid() { uint64 tot_r = 0; uint64 tot_g = 0; uint64 tot_b = 0; uint total_weight = 0; for (uint i = 0; i < m_unique_colors.size(); i++) { uint weight = m_unique_colors[i].m_weight; total_weight += weight; tot_r += m_unique_colors[i].m_color.r * static_cast(weight); tot_g += m_unique_colors[i].m_color.g * static_cast(weight); tot_b += m_unique_colors[i].m_color.b * static_cast(weight); } const uint half_total_weight = total_weight >> 1; uint ave_r = static_cast((tot_r + half_total_weight) / total_weight); uint ave_g = static_cast((tot_g + half_total_weight) / total_weight); uint ave_b = static_cast((tot_b + half_total_weight) / total_weight); uint low_color = (ryg_dxt::OMatch5[ave_r][0] << 11) | (ryg_dxt::OMatch6[ave_g][0] << 5) | ryg_dxt::OMatch5[ave_b][0]; uint high_color = (ryg_dxt::OMatch5[ave_r][1] << 11) | (ryg_dxt::OMatch6[ave_g][1] << 5) | ryg_dxt::OMatch5[ave_b][1]; bool improved = evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); if ((m_pParams->m_use_alpha_blocks) && (m_best_solution.m_error)) { low_color = (ryg_dxt::OMatch5_3[ave_r][0] << 11) | (ryg_dxt::OMatch6_3[ave_g][0] << 5) | ryg_dxt::OMatch5_3[ave_b][0]; high_color = (ryg_dxt::OMatch5_3[ave_r][1] << 11) | (ryg_dxt::OMatch6_3[ave_g][1] << 5) | ryg_dxt::OMatch5_3[ave_b][1]; improved |= evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); } if (m_pParams->m_quality == cCRNDXTQualityUber) { // Try compressing as all-solid using the other (non-average) colors in the block in uber. for (uint i = 0; i < m_unique_colors.size(); i++) { uint r = m_unique_colors[i].m_color[0]; uint g = m_unique_colors[i].m_color[1]; uint b = m_unique_colors[i].m_color[2]; if ((r == ave_r) && (g == ave_g) && (b == ave_b)) continue; uint low_color = (ryg_dxt::OMatch5[r][0] << 11) | (ryg_dxt::OMatch6[g][0] << 5) | ryg_dxt::OMatch5[b][0]; uint high_color = (ryg_dxt::OMatch5[r][1] << 11) | (ryg_dxt::OMatch6[g][1] << 5) | ryg_dxt::OMatch5[b][1]; improved |= evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); if ((m_pParams->m_use_alpha_blocks) && (m_best_solution.m_error)) { low_color = (ryg_dxt::OMatch5_3[r][0] << 11) | (ryg_dxt::OMatch6_3[g][0] << 5) | ryg_dxt::OMatch5_3[b][0]; high_color = (ryg_dxt::OMatch5_3[r][1] << 11) | (ryg_dxt::OMatch6_3[g][1] << 5) | ryg_dxt::OMatch5_3[b][1]; improved |= evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); } } } return improved; } void dxt1_endpoint_optimizer::compute_vectors(const vec3F& perceptual_weights) { m_norm_unique_colors.resize(0); m_norm_unique_colors_weighted.resize(0); m_mean_norm_color.clear(); m_mean_norm_color_weighted.clear(); for (uint i = 0; i < m_unique_colors.size(); i++) { const color_quad_u8& color = m_unique_colors[i].m_color; const uint weight = m_unique_colors[i].m_weight; vec3F norm_color(color.r * 1.0f / 255.0f, color.g * 1.0f / 255.0f, color.b * 1.0f / 255.0f); vec3F norm_color_weighted(vec3F::mul_components(perceptual_weights, norm_color)); m_norm_unique_colors.push_back(norm_color); m_norm_unique_colors_weighted.push_back(norm_color_weighted); m_mean_norm_color += norm_color * (float)weight; m_mean_norm_color_weighted += norm_color_weighted * (float)weight; } if (m_total_unique_color_weight) { m_mean_norm_color *= (1.0f / m_total_unique_color_weight); m_mean_norm_color_weighted *= (1.0f / m_total_unique_color_weight); } for (uint i = 0; i < m_unique_colors.size(); i++) { m_norm_unique_colors[i] -= m_mean_norm_color; m_norm_unique_colors_weighted[i] -= m_mean_norm_color_weighted; } } // Compute PCA (principle axis, i.e. direction of largest variance) of input vectors. void dxt1_endpoint_optimizer::compute_pca(vec3F& axis, const vec3F_array& norm_colors, const vec3F& def) { double cov[6] = {0, 0, 0, 0, 0, 0}; for (uint i = 0; i < norm_colors.size(); i++) { const vec3F& v = norm_colors[i]; float r = v[0]; float g = v[1]; float b = v[2]; if (m_unique_colors[i].m_weight > 1) { const double weight = m_unique_colors[i].m_weight; cov[0] += r * r * weight; cov[1] += r * g * weight; cov[2] += r * b * weight; cov[3] += g * g * weight; cov[4] += g * b * weight; cov[5] += b * b * weight; } else { cov[0] += r * r; cov[1] += r * g; cov[2] += r * b; cov[3] += g * g; cov[4] += g * b; cov[5] += b * b; } } double vfr = .9f; double vfg = 1.0f; double vfb = .7f; for (uint iter = 0; iter < 8; iter++) { double r = vfr * cov[0] + vfg * cov[1] + vfb * cov[2]; double g = vfr * cov[1] + vfg * cov[3] + vfb * cov[4]; double b = vfr * cov[2] + vfg * cov[4] + vfb * cov[5]; double m = math::maximum(fabs(r), fabs(g), fabs(b)); if (m > 1e-10) { m = 1.0f / m; r *= m; g *= m; b *= m; } double delta = math::square(vfr - r) + math::square(vfg - g) + math::square(vfb - b); vfr = r; vfg = g; vfb = b; if ((iter > 2) && (delta < 1e-8)) break; } double len = vfr * vfr + vfg * vfg + vfb * vfb; if (len < 1e-10) { axis = def; } else { len = 1.0f / sqrt(len); axis.set(static_cast(vfr * len), static_cast(vfg * len), static_cast(vfb * len)); } } static const uint8 g_invTableNull[4] = {0, 1, 2, 3}; static const uint8 g_invTableAlpha[4] = {1, 0, 2, 3}; static const uint8 g_invTableColor[4] = {1, 0, 3, 2}; // Computes a valid (encodable) DXT1 solution (low/high colors, swizzled selectors) from input. void dxt1_endpoint_optimizer::return_solution() { compute_selectors(); bool invert_selectors; if (m_best_solution.m_alpha_block) invert_selectors = (m_best_solution.m_coords.m_low_color > m_best_solution.m_coords.m_high_color); else { CRNLIB_ASSERT(m_best_solution.m_coords.m_low_color != m_best_solution.m_coords.m_high_color); invert_selectors = (m_best_solution.m_coords.m_low_color < m_best_solution.m_coords.m_high_color); } m_pResults->m_alternate_rounding = m_best_solution.m_alternate_rounding; m_pResults->m_enforce_selector = m_best_solution.m_enforce_selector; m_pResults->m_enforced_selector = m_best_solution.m_enforced_selector; m_pResults->m_reordered = invert_selectors; if (invert_selectors) { m_pResults->m_low_color = m_best_solution.m_coords.m_high_color; m_pResults->m_high_color = m_best_solution.m_coords.m_low_color; } else { m_pResults->m_low_color = m_best_solution.m_coords.m_low_color; m_pResults->m_high_color = m_best_solution.m_coords.m_high_color; } const uint8* pInvert_table = g_invTableNull; if (invert_selectors) pInvert_table = m_best_solution.m_alpha_block ? g_invTableAlpha : g_invTableColor; const uint alpha_thresh = m_pParams->m_pixels_have_alpha ? (m_pParams->m_dxt1a_alpha_threshold << 24U) : 0; const uint32* pSrc_pixels = reinterpret_cast(m_pParams->m_pPixels); uint8* pDst_selectors = m_pResults->m_pSelectors; if ((m_unique_colors.size() == 1) && (!m_pParams->m_pixels_have_alpha)) { uint32 c = utils::read_le32(pSrc_pixels); CRNLIB_ASSERT(c >= alpha_thresh); c |= 0xFF000000U; unique_color_hash_map::const_iterator it(m_unique_color_hash_map.find(c)); CRNLIB_ASSERT(it != m_unique_color_hash_map.end()); uint unique_color_index = it->second; uint selector = pInvert_table[m_best_solution.m_selectors[unique_color_index]]; memset(pDst_selectors, selector, m_pParams->m_num_pixels); } else { uint8* pDst_selectors_end = pDst_selectors + m_pParams->m_num_pixels; uint8 prev_selector = 0; uint32 prev_color = 0; do { uint32 c = utils::read_le32(pSrc_pixels); pSrc_pixels++; uint8 selector = 3; if (c >= alpha_thresh) { c |= 0xFF000000U; if (c == prev_color) selector = prev_selector; else { unique_color_hash_map::const_iterator it(m_unique_color_hash_map.find(c)); CRNLIB_ASSERT(it != m_unique_color_hash_map.end()); uint unique_color_index = it->second; selector = pInvert_table[m_best_solution.m_selectors[unique_color_index]]; prev_color = c; prev_selector = selector; } } *pDst_selectors++ = selector; } while (pDst_selectors != pDst_selectors_end); } m_pResults->m_alpha_block = m_best_solution.m_alpha_block; m_pResults->m_error = m_best_solution.m_error; } // Per-component 1D endpoint optimization. void dxt1_endpoint_optimizer::compute_endpoint_component_errors(uint comp_index, uint64 (&error)[4][256], uint64 (&best_remaining_error)[4]) { uint64 W[4] = {}, WP2[4] = {}, WPP[4] = {}; for (uint i = 0; i < m_unique_colors.size(); i++) { uint p = m_unique_colors[i].m_color[comp_index]; uint w = m_unique_colors[i].m_weight; uint8 s = m_best_solution.m_selectors[i]; W[s] += (int64)w; WP2[s] += (int64)w * p * 2; WPP[s] += (int64)w * p * p; } const uint comp_limit = comp_index == 1 ? 64 : 32; for (uint8 s = 0; s < 2; s++) { uint64 best_error = error[s][0] = WPP[s]; for (uint8 c = 1; c < comp_limit; c++) { uint8 p = comp_index == 1 ? c << 2 | c >> 4 : c << 3 | c >> 2; error[s][c] = W[s] * p * p - WP2[s] * p + WPP[s]; if (error[s][c] < best_error) best_error = error[s][c]; } best_remaining_error[s] = best_error; } for (uint8 s = 2; s < 4; s++) { uint64 best_error = error[s][0] = WPP[s], d = W[s] - WP2[s], dd = W[s] << 1, e = WPP[s] + d; for (uint p = 1; p < 256; p++, d += dd, e += d) { error[s][p] = e; if (e < best_error) best_error = e; } best_remaining_error[s] = best_error; } for (uint8 s = 3; s; s--) best_remaining_error[s - 1] += best_remaining_error[s]; } void dxt1_endpoint_optimizer::optimize_endpoint_comps() { compute_selectors(); if (m_best_solution.m_alpha_block || !m_best_solution.m_error) return; color_quad_u8 source_low(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, true)); color_quad_u8 source_high(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, true)); uint64 error[4][256], best_remaining_error[4]; for (uint comp_index = 0; comp_index < 3; comp_index++) { uint8 p0 = source_low[comp_index]; uint8 p1 = source_high[comp_index]; color_quad_u8 low(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false)); color_quad_u8 high(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false)); compute_endpoint_component_errors(comp_index, error, best_remaining_error); uint64 best_error = error[0][low[comp_index]] + error[1][high[comp_index]] + error[2][(p0 * 2 + p1) / 3] + error[3][(p0 + p1 * 2) / 3]; if (best_remaining_error[0] >= best_error) continue; const uint comp_limit = comp_index == 1 ? 64 : 32; for (uint8 c0 = 0; c0 < comp_limit; c0++) { uint64 e0 = error[0][c0]; if (e0 + best_remaining_error[1] >= best_error) continue; low[comp_index] = c0; uint16 packed_low = dxt1_block::pack_color(low, false); p0 = comp_index == 1 ? c0 << 2 | c0 >> 4 : c0 << 3 | c0 >> 2; for (uint8 c1 = 0; c1 < comp_limit; c1++) { uint64 e = e0 + error[1][c1]; if (e + best_remaining_error[2] >= best_error) continue; p1 = comp_index == 1 ? c1 << 2 | c1 >> 4 : c1 << 3 | c1 >> 2; e += error[2][(p0 * 2 + p1) / 3]; if (e + best_remaining_error[3] >= best_error) continue; e += error[3][(p0 + p1 * 2) / 3]; if (e >= best_error) continue; high[comp_index] = c1; if (!evaluate_solution(dxt1_solution_coordinates(packed_low, dxt1_block::pack_color(high, false)))) continue; if (!m_best_solution.m_error) return; compute_selectors(); compute_endpoint_component_errors(comp_index, error, best_remaining_error); best_error = error[0][c0] + error[1][c1] + error[2][(p0 * 2 + p1) / 3] + error[3][(p0 + p1 * 2) / 3]; e0 = error[0][c0]; if (e0 + best_remaining_error[1] >= best_error) break; } } } } // Voxel adjacency delta coordinations. static const struct adjacent_coords { int8 x, y, z; } g_adjacency[26] = { {-1, -1, -1}, {0, -1, -1}, {1, -1, -1}, {-1, 0, -1}, {0, 0, -1}, {1, 0, -1}, {-1, 1, -1}, {0, 1, -1}, {1, 1, -1}, {-1, -1, 0}, {0, -1, 0}, {1, -1, 0}, {-1, 0, 0}, {1, 0, 0}, {-1, 1, 0}, {0, 1, 0}, {1, 1, 0}, {-1, -1, 1}, {0, -1, 1}, {1, -1, 1}, {-1, 0, 1}, {0, 0, 1}, {1, 0, 1}, {-1, 1, 1}, {0, 1, 1}, {1, 1, 1}}; // Attempt to refine current solution's endpoints given the current selectors using least squares. bool dxt1_endpoint_optimizer::refine_solution(int refinement_level) { compute_selectors(); static const int w1Tab[4] = {3, 0, 2, 1}; static const int prods_0[4] = {0x00, 0x00, 0x02, 0x02}; static const int prods_1[4] = {0x00, 0x09, 0x01, 0x04}; static const int prods_2[4] = {0x09, 0x00, 0x04, 0x01}; double akku_0 = 0; double akku_1 = 0; double akku_2 = 0; double At1_r, At1_g, At1_b; double At2_r, At2_g, At2_b; At1_r = At1_g = At1_b = 0; At2_r = At2_g = At2_b = 0; for (uint i = 0; i < m_unique_colors.size(); i++) { const color_quad_u8& c = m_unique_colors[i].m_color; const double weight = m_unique_colors[i].m_weight; double r = c.r * weight; double g = c.g * weight; double b = c.b * weight; int step = m_best_solution.m_selectors[i] ^ 1; int w1 = w1Tab[step]; akku_0 += prods_0[step] * weight; akku_1 += prods_1[step] * weight; akku_2 += prods_2[step] * weight; At1_r += w1 * r; At1_g += w1 * g; At1_b += w1 * b; At2_r += r; At2_g += g; At2_b += b; } At2_r = 3 * At2_r - At1_r; At2_g = 3 * At2_g - At1_g; At2_b = 3 * At2_b - At1_b; double xx = akku_2; double yy = akku_1; double xy = akku_0; double t = xx * yy - xy * xy; if (!yy || !xx || (fabs(t) < .0000125f)) return false; double frb = (3.0f * 31.0f / 255.0f) / t; double fg = frb * (63.0f / 31.0f); bool improved = false; if (refinement_level == 0) { uint max16; max16 = math::clamp(static_cast((At1_r * yy - At2_r * xy) * frb + 0.5f), 0, 31) << 11; max16 |= math::clamp(static_cast((At1_g * yy - At2_g * xy) * fg + 0.5f), 0, 63) << 5; max16 |= math::clamp(static_cast((At1_b * yy - At2_b * xy) * frb + 0.5f), 0, 31) << 0; uint min16; min16 = math::clamp(static_cast((At2_r * xx - At1_r * xy) * frb + 0.5f), 0, 31) << 11; min16 |= math::clamp(static_cast((At2_g * xx - At1_g * xy) * fg + 0.5f), 0, 63) << 5; min16 |= math::clamp(static_cast((At2_b * xx - At1_b * xy) * frb + 0.5f), 0, 31) << 0; dxt1_solution_coordinates nc((uint16)min16, (uint16)max16); nc.canonicalize(); improved |= evaluate_solution(nc); } else if (refinement_level == 1) { // Try exploring the local lattice neighbors of the least squares optimized result. color_quad_u8 e[2]; e[0].clear(); e[0][0] = (uint8)math::clamp(static_cast((At1_r * yy - At2_r * xy) * frb + 0.5f), 0, 31); e[0][1] = (uint8)math::clamp(static_cast((At1_g * yy - At2_g * xy) * fg + 0.5f), 0, 63); e[0][2] = (uint8)math::clamp(static_cast((At1_b * yy - At2_b * xy) * frb + 0.5f), 0, 31); e[1].clear(); e[1][0] = (uint8)math::clamp(static_cast((At2_r * xx - At1_r * xy) * frb + 0.5f), 0, 31); e[1][1] = (uint8)math::clamp(static_cast((At2_g * xx - At1_g * xy) * fg + 0.5f), 0, 63); e[1][2] = (uint8)math::clamp(static_cast((At2_b * xx - At1_b * xy) * frb + 0.5f), 0, 31); for (uint i = 0; i < 2; i++) { for (int rr = -1; rr <= 1; rr++) { for (int gr = -1; gr <= 1; gr++) { for (int br = -1; br <= 1; br++) { dxt1_solution_coordinates nc; color_quad_u8 c[2]; c[0] = e[0]; c[1] = e[1]; c[i][0] = (uint8)math::clamp(c[i][0] + rr, 0, 31); c[i][1] = (uint8)math::clamp(c[i][1] + gr, 0, 63); c[i][2] = (uint8)math::clamp(c[i][2] + br, 0, 31); nc.m_low_color = dxt1_block::pack_color(c[0], false); nc.m_high_color = dxt1_block::pack_color(c[1], false); nc.canonicalize(); improved |= evaluate_solution(nc); } } } } } else { // Try even harder to explore the local lattice neighbors of the least squares optimized result. color_quad_u8 e[2]; e[0].clear(); e[0][0] = (uint8)math::clamp(static_cast((At1_r * yy - At2_r * xy) * frb + 0.5f), 0, 31); e[0][1] = (uint8)math::clamp(static_cast((At1_g * yy - At2_g * xy) * fg + 0.5f), 0, 63); e[0][2] = (uint8)math::clamp(static_cast((At1_b * yy - At2_b * xy) * frb + 0.5f), 0, 31); e[1].clear(); e[1][0] = (uint8)math::clamp(static_cast((At2_r * xx - At1_r * xy) * frb + 0.5f), 0, 31); e[1][1] = (uint8)math::clamp(static_cast((At2_g * xx - At1_g * xy) * fg + 0.5f), 0, 63); e[1][2] = (uint8)math::clamp(static_cast((At2_b * xx - At1_b * xy) * frb + 0.5f), 0, 31); for (int orr = -1; orr <= 1; orr++) { for (int ogr = -1; ogr <= 1; ogr++) { for (int obr = -1; obr <= 1; obr++) { dxt1_solution_coordinates nc; color_quad_u8 c[2]; c[0] = e[0]; c[1] = e[1]; c[0][0] = (uint8)math::clamp(c[0][0] + orr, 0, 31); c[0][1] = (uint8)math::clamp(c[0][1] + ogr, 0, 63); c[0][2] = (uint8)math::clamp(c[0][2] + obr, 0, 31); for (int rr = -1; rr <= 1; rr++) { for (int gr = -1; gr <= 1; gr++) { for (int br = -1; br <= 1; br++) { c[1][0] = (uint8)math::clamp(c[1][0] + rr, 0, 31); c[1][1] = (uint8)math::clamp(c[1][1] + gr, 0, 63); c[1][2] = (uint8)math::clamp(c[1][2] + br, 0, 31); nc.m_low_color = dxt1_block::pack_color(c[0], false); nc.m_high_color = dxt1_block::pack_color(c[1], false); nc.canonicalize(); improved |= evaluate_solution(nc); } } } } } } } return improved; } //----------------------------------------------------------------------------------------------------------------------------------------- // Primary endpoint optimization entrypoint. void dxt1_endpoint_optimizer::optimize_endpoints(vec3F& low_color, vec3F& high_color) { vec3F orig_low_color(low_color); vec3F orig_high_color(high_color); m_trial_solution.clear(); uint num_passes; const int16* pProbe_table = g_uber_probe_table; uint probe_range; float dist_per_trial = .015625f; // How many probes, and the distance between each probe depends on the quality level. switch (m_pParams->m_quality) { case cCRNDXTQualitySuperFast: pProbe_table = g_fast_probe_table; probe_range = cFastProbeTableSize; dist_per_trial = .027063293f; num_passes = 1; break; case cCRNDXTQualityFast: pProbe_table = g_fast_probe_table; probe_range = cFastProbeTableSize; dist_per_trial = .027063293f; num_passes = 2; break; case cCRNDXTQualityNormal: pProbe_table = g_normal_probe_table; probe_range = cNormalProbeTableSize; dist_per_trial = .027063293f; num_passes = 2; break; case cCRNDXTQualityBetter: pProbe_table = g_better_probe_table; probe_range = cBetterProbeTableSize; num_passes = 2; break; default: pProbe_table = g_uber_probe_table; probe_range = cUberProbeTableSize; num_passes = 4; break; } if (m_pParams->m_endpoint_caching) { // Try the previous X winning endpoints. This may not give us optimal results, but it may increase the probability of early outs while evaluating potential solutions. const uint num_prev_results = math::minimum(cMaxPrevResults, m_num_prev_results); for (uint i = 0; i < num_prev_results; i++) evaluate_solution(m_prev_results[i]); if (!m_best_solution.m_error) { // Got lucky - one of the previous endpoints is optimal. return_solution(); return; } } if (m_pParams->m_quality >= cCRNDXTQualityBetter) { //evaluate_solution(dxt1_solution_coordinates(low_color, high_color), true, &m_best_solution); //refine_solution(); try_median4(orig_low_color, orig_high_color); } uint probe_low[cUberProbeTableSize * 2 + 1]; uint probe_high[cUberProbeTableSize * 2 + 1]; vec3F scaled_principle_axis[2]; scaled_principle_axis[1] = m_principle_axis * dist_per_trial; scaled_principle_axis[1][0] *= 31.0f; scaled_principle_axis[1][1] *= 63.0f; scaled_principle_axis[1][2] *= 31.0f; scaled_principle_axis[0] = -scaled_principle_axis[1]; //vec3F initial_ofs(scaled_principle_axis * (float)-probe_range); //initial_ofs[0] += .5f; //initial_ofs[1] += .5f; //initial_ofs[2] += .5f; low_color[0] = math::clamp(low_color[0] * 31.0f, 0.0f, 31.0f); low_color[1] = math::clamp(low_color[1] * 63.0f, 0.0f, 63.0f); low_color[2] = math::clamp(low_color[2] * 31.0f, 0.0f, 31.0f); high_color[0] = math::clamp(high_color[0] * 31.0f, 0.0f, 31.0f); high_color[1] = math::clamp(high_color[1] * 63.0f, 0.0f, 63.0f); high_color[2] = math::clamp(high_color[2] * 31.0f, 0.0f, 31.0f); int d[3]; for (uint c = 0; c < 3; c++) d[c] = math::float_to_int_round((high_color[c] - low_color[c]) * (c == 0 ? m_perceptual ? 16 : 2 : c == 1 ? m_perceptual ? 25 : 1 : 2)); crnlib::vector evaluated_color_projections(m_evaluated_colors.size()); int64 average_projection = d[0] * (high_color[0] + low_color[0]) * 4 + d[1] * (high_color[1] + low_color[1]) * 2 + d[2] * (high_color[2] + low_color[2]) * 4; for (uint i = 0; i < m_evaluated_colors.size(); i++) { int64 delta = d[0] * m_evaluated_colors[i].m_color[0] + d[1] * m_evaluated_colors[i].m_color[1] + d[2] * m_evaluated_colors[i].m_color[2] - average_projection; evaluated_color_projections[i].projection = delta * m_evaluated_colors[i].m_weight; evaluated_color_projections[i].color = m_evaluated_colors[i]; } std::sort(evaluated_color_projections.begin(), evaluated_color_projections.end(), g_unique_color_projection_sort); for (uint i = 0, iEnd = m_evaluated_colors.size(); i < iEnd; i++) m_evaluated_colors[i] = evaluated_color_projections[i & 1 ? i >> 1 : iEnd - 1 - (i >> 1)].color; for (uint pass = 0; pass < num_passes; pass++) { // Now separately sweep or probe the low and high colors along the principle axis, both positively and negatively. // This results in two arrays of candidate low/high endpoints. Every unique combination of candidate endpoints is tried as a potential solution. // In higher quality modes, the various nearby lattice neighbors of each candidate endpoint are also explored, which allows the current solution to "wobble" or "migrate" // to areas with lower error. // This entire process can be repeated up to X times (depending on the quality level) until a local minimum is established. // This method is very stable and scalable. It could be implemented more elegantly, but I'm now very cautious of touching this code. if (pass) { color_quad_u8 low(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false)); low_color = vec3F(low.r, low.g, low.b); color_quad_u8 high(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false)); high_color = vec3F(high.r, high.g, high.b); } const uint64 prev_best_error = m_best_solution.m_error; if (!prev_best_error) break; // Sweep low endpoint along principle axis, record positions int prev_packed_color[2] = {-1, -1}; uint num_low_trials = 0; vec3F initial_probe_low_color(low_color + vec3F(.5f)); for (uint i = 0; i < probe_range; i++) { const int ls = i ? 0 : 1; int x = pProbe_table[i]; for (int s = ls; s < 2; s++) { vec3F probe_low_color(initial_probe_low_color + scaled_principle_axis[s] * (float)x); int r = math::clamp((int)floor(probe_low_color[0]), 0, 31); int g = math::clamp((int)floor(probe_low_color[1]), 0, 63); int b = math::clamp((int)floor(probe_low_color[2]), 0, 31); int packed_color = b | (g << 5U) | (r << 11U); if (packed_color != prev_packed_color[s]) { probe_low[num_low_trials++] = packed_color; prev_packed_color[s] = packed_color; } } } prev_packed_color[0] = -1; prev_packed_color[1] = -1; // Sweep high endpoint along principle axis, record positions uint num_high_trials = 0; vec3F initial_probe_high_color(high_color + vec3F(.5f)); for (uint i = 0; i < probe_range; i++) { const int ls = i ? 0 : 1; int x = pProbe_table[i]; for (int s = ls; s < 2; s++) { vec3F probe_high_color(initial_probe_high_color + scaled_principle_axis[s] * (float)x); int r = math::clamp((int)floor(probe_high_color[0]), 0, 31); int g = math::clamp((int)floor(probe_high_color[1]), 0, 63); int b = math::clamp((int)floor(probe_high_color[2]), 0, 31); int packed_color = b | (g << 5U) | (r << 11U); if (packed_color != prev_packed_color[s]) { probe_high[num_high_trials++] = packed_color; prev_packed_color[s] = packed_color; } } } // Now try all unique combinations. for (uint i = 0; i < num_low_trials; i++) { for (uint j = 0; j < num_high_trials; j++) { dxt1_solution_coordinates coords((uint16)probe_low[i], (uint16)probe_high[j]); coords.canonicalize(); evaluate_solution(coords); } } if (m_pParams->m_quality >= cCRNDXTQualityNormal) { // Generate new candidates by exploring the low color's direct lattice neighbors color_quad_u8 lc(dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false)); for (int i = 0; i < 26; i++) { int r = lc.r + g_adjacency[i].x; if ((r < 0) || (r > 31)) continue; int g = lc.g + g_adjacency[i].y; if ((g < 0) || (g > 63)) continue; int b = lc.b + g_adjacency[i].z; if ((b < 0) || (b > 31)) continue; dxt1_solution_coordinates coords(dxt1_block::pack_color(r, g, b, false), m_best_solution.m_coords.m_high_color); coords.canonicalize(); evaluate_solution(coords); } if (m_pParams->m_quality == cCRNDXTQualityUber) { // Generate new candidates by exploring the low color's direct lattice neighbors - this time, explore much further separately on each axis. lc = dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, false); for (int a = 0; a < 3; a++) { int limit = (a == 1) ? 63 : 31; for (int s = -2; s <= 2; s += 4) { color_quad_u8 c(lc); int q = c[a] + s; if ((q < 0) || (q > limit)) continue; c[a] = (uint8)q; dxt1_solution_coordinates coords(dxt1_block::pack_color(c, false), m_best_solution.m_coords.m_high_color); coords.canonicalize(); evaluate_solution(coords); } } } // Generate new candidates by exploring the high color's direct lattice neighbors color_quad_u8 hc(dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false)); for (int i = 0; i < 26; i++) { int r = hc.r + g_adjacency[i].x; if ((r < 0) || (r > 31)) continue; int g = hc.g + g_adjacency[i].y; if ((g < 0) || (g > 63)) continue; int b = hc.b + g_adjacency[i].z; if ((b < 0) || (b > 31)) continue; dxt1_solution_coordinates coords(m_best_solution.m_coords.m_low_color, dxt1_block::pack_color(r, g, b, false)); coords.canonicalize(); evaluate_solution(coords); } if (m_pParams->m_quality == cCRNDXTQualityUber) { // Generate new candidates by exploring the high color's direct lattice neighbors - this time, explore much further separately on each axis. hc = dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, false); for (int a = 0; a < 3; a++) { int limit = (a == 1) ? 63 : 31; for (int s = -2; s <= 2; s += 4) { color_quad_u8 c(hc); int q = c[a] + s; if ((q < 0) || (q > limit)) continue; c[a] = (uint8)q; dxt1_solution_coordinates coords(m_best_solution.m_coords.m_low_color, dxt1_block::pack_color(c, false)); coords.canonicalize(); evaluate_solution(coords); } } } } if ((!m_best_solution.m_error) || ((pass) && (m_best_solution.m_error == prev_best_error))) break; if (m_pParams->m_quality >= cCRNDXTQualityUber) { // Attempt to refine current solution's endpoints given the current selectors using least squares. refine_solution(1); } } if (m_pParams->m_quality >= cCRNDXTQualityNormal) { if ((m_best_solution.m_error) && (!m_pParams->m_pixels_have_alpha)) { bool choose_solid_block = false; if (m_best_solution.are_selectors_all_equal()) { // All selectors equal - try various solid-block optimizations choose_solid_block = try_average_block_as_solid(); } if ((!choose_solid_block) && (m_pParams->m_quality == cCRNDXTQualityUber)) { // Per-component 1D endpoint optimization. optimize_endpoint_comps(); } } if (m_pParams->m_quality == cCRNDXTQualityUber) { if (m_best_solution.m_error) { // The pixels may have already been DXTc compressed by another compressor. // It's usually possible to recover the endpoints used to previously pack the block. try_combinatorial_encoding(); } } } return_solution(); if (m_pParams->m_endpoint_caching) { // Remember result for later reruse. m_prev_results[m_num_prev_results & (cMaxPrevResults - 1)] = m_best_solution.m_coords; m_num_prev_results++; } } void dxt1_endpoint_optimizer::handle_multicolor_block() { uint num_passes = 1; vec3F perceptual_weights(1.0f); if (m_perceptual) { // Compute RGB weighting for use in perceptual mode. // The more saturated the block, the more the weights deviate from (1,1,1). float ave_redness = 0; float ave_blueness = 0; float ave_l = 0; for (uint i = 0; i < m_unique_colors.size(); i++) { const color_quad_u8& c = m_unique_colors[i].m_color; int l = (c.r + c.g + c.b + 1) / 3; float scale = (float)m_unique_colors[i].m_weight / math::maximum(1.0f, l); ave_redness += scale * c.r; ave_blueness += scale * c.b; ave_l += l; } ave_redness /= m_total_unique_color_weight; ave_blueness /= m_total_unique_color_weight; ave_l /= m_total_unique_color_weight; ave_l = math::minimum(1.0f, ave_l * 16.0f / 255.0f); float p = ave_l * powf(math::saturate(math::maximum(ave_redness, ave_blueness) * 1.0f / 3.0f), 2.75f); if (p >= 1.0f) num_passes = 1; else { num_passes = 2; perceptual_weights = vec3F::lerp(vec3F(.212f, .72f, .072f), perceptual_weights, p); } } for (uint pass_index = 0; pass_index < num_passes; pass_index++) { compute_vectors(perceptual_weights); compute_pca(m_principle_axis, m_norm_unique_colors_weighted, vec3F(.2837149f, 0.9540631f, 0.096277453f)); m_principle_axis[0] /= perceptual_weights[0]; m_principle_axis[1] /= perceptual_weights[1]; m_principle_axis[2] /= perceptual_weights[2]; m_principle_axis.normalize_in_place(); if (num_passes > 1) { // Check for obviously wild principle axes and try to compensate by backing off the component weightings. if (fabs(m_principle_axis[0]) >= .795f) perceptual_weights.set(.424f, .6f, .072f); else if (fabs(m_principle_axis[2]) >= .795f) perceptual_weights.set(.212f, .6f, .212f); else break; } } // Find bounds of projection onto (potentially skewed) principle axis. float l = 1e+9; float h = -1e+9; for (uint i = 0; i < m_norm_unique_colors.size(); i++) { float d = m_norm_unique_colors[i] * m_principle_axis; l = math::minimum(l, d); h = math::maximum(h, d); } vec3F low_color(m_mean_norm_color + l * m_principle_axis); vec3F high_color(m_mean_norm_color + h * m_principle_axis); if (!low_color.is_within_bounds(0.0f, 1.0f)) { // Low color is outside the lattice, so bring it back in by casting a ray. vec3F coord; float t; aabb3F bounds(vec3F(0.0f), vec3F(1.0f)); intersection::result res = intersection::ray_aabb(coord, t, ray3F(low_color, m_principle_axis), bounds); if (res == intersection::cSuccess) low_color = coord; } if (!high_color.is_within_bounds(0.0f, 1.0f)) { // High color is outside the lattice, so bring it back in by casting a ray. vec3F coord; float t; aabb3F bounds(vec3F(0.0f), vec3F(1.0f)); intersection::result res = intersection::ray_aabb(coord, t, ray3F(high_color, -m_principle_axis), bounds); if (res == intersection::cSuccess) high_color = coord; } // Now optimize the endpoints using the projection bounds on the (potentially skewed) principle axis as a starting point. optimize_endpoints(low_color, high_color); } // Tries quantizing the block to 4 colors using vanilla LBG. It tries all combinations of the quantized results as potential endpoints. bool dxt1_endpoint_optimizer::try_median4(const vec3F& low_color, const vec3F& high_color) { vec3F means[4]; if (m_unique_colors.size() <= 4) { for (uint i = 0; i < 4; i++) means[i] = m_norm_unique_colors[math::minimum(m_norm_unique_colors.size() - 1, i)]; } else { means[0] = low_color - m_mean_norm_color; means[3] = high_color - m_mean_norm_color; means[1] = vec3F::lerp(means[0], means[3], 1.0f / 3.0f); means[2] = vec3F::lerp(means[0], means[3], 2.0f / 3.0f); fast_random rm; const uint cMaxIters = 8; uint reassign_rover = 0; float prev_total_dist = math::cNearlyInfinite; for (uint iter = 0; iter < cMaxIters; iter++) { vec3F new_means[4]; float new_weights[4]; utils::zero_object(new_means); utils::zero_object(new_weights); float total_dist = 0; for (uint i = 0; i < m_unique_colors.size(); i++) { const vec3F& v = m_norm_unique_colors[i]; float best_dist = means[0].squared_distance(v); int best_index = 0; for (uint j = 1; j < 4; j++) { float dist = means[j].squared_distance(v); if (dist < best_dist) { best_dist = dist; best_index = j; } } total_dist += best_dist; new_means[best_index] += v * (float)m_unique_colors[i].m_weight; new_weights[best_index] += (float)m_unique_colors[i].m_weight; } uint highest_index = 0; float highest_weight = 0; bool empty_cell = false; for (uint j = 0; j < 4; j++) { if (new_weights[j] > 0.0f) { means[j] = new_means[j] / new_weights[j]; if (new_weights[j] > highest_weight) { highest_weight = new_weights[j]; highest_index = j; } } else empty_cell = true; } if (!empty_cell) { if (fabs(total_dist - prev_total_dist) < .00001f) break; prev_total_dist = total_dist; } else prev_total_dist = math::cNearlyInfinite; if ((empty_cell) && (iter != (cMaxIters - 1))) { const uint ri = (highest_index + reassign_rover) & 3; reassign_rover++; for (uint j = 0; j < 4; j++) { if (new_weights[j] == 0.0f) { means[j] = means[ri]; means[j] += vec3F::make_random(rm, -.00196f, .00196f); } } } } } bool improved = false; for (uint i = 0; i < 3; i++) { for (uint j = i + 1; j < 4; j++) { const vec3F v0(means[i] + m_mean_norm_color); const vec3F v1(means[j] + m_mean_norm_color); dxt1_solution_coordinates sc( color_quad_u8((int)floor(.5f + v0[0] * 31.0f), (int)floor(.5f + v0[1] * 63.0f), (int)floor(.5f + v0[2] * 31.0f), 255), color_quad_u8((int)floor(.5f + v1[0] * 31.0f), (int)floor(.5f + v1[1] * 63.0f), (int)floor(.5f + v1[2] * 31.0f), 255), false); sc.canonicalize(); improved |= evaluate_solution(sc); } } improved |= refine_solution((m_pParams->m_quality == cCRNDXTQualityUber) ? 1 : 0); return improved; } // Given candidate low/high endpoints, find the optimal selectors for 3 and 4 color blocks, compute the resulting error, // and use the candidate if it results in less error than the best found result so far. bool dxt1_endpoint_optimizer::evaluate_solution(const dxt1_solution_coordinates& coords, bool alternate_rounding) { color_quad_u8 c0 = dxt1_block::unpack_color(coords.m_low_color, false); color_quad_u8 c1 = dxt1_block::unpack_color(coords.m_high_color, false); uint64 rError = c0.r < c1.r ? m_rDist[c0.r].low + m_rDist[c1.r].high : m_rDist[c0.r].high + m_rDist[c1.r].low; uint64 gError = c0.g < c1.g ? m_gDist[c0.g].low + m_gDist[c1.g].high : m_gDist[c0.g].high + m_gDist[c1.g].low; uint64 bError = c0.b < c1.b ? m_bDist[c0.b].low + m_bDist[c1.b].high : m_bDist[c0.b].high + m_bDist[c1.b].low; if (rError + gError + bError >= m_best_solution.m_error) return false; if (!alternate_rounding) { solution_hash_map::insert_result solution_res(m_solutions_tried.insert(coords.m_low_color | coords.m_high_color << 16)); if (!solution_res.second) return false; } if (m_evaluate_hc) return m_perceptual ? evaluate_solution_hc_perceptual(coords, alternate_rounding) : evaluate_solution_hc_uniform(coords, alternate_rounding); if (m_pParams->m_quality >= cCRNDXTQualityBetter) return evaluate_solution_uber(coords, alternate_rounding); return evaluate_solution_fast(coords, alternate_rounding); } inline uint dxt1_endpoint_optimizer::color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha) { if (perceptual) { return color::color_distance(true, e1, e2, alpha); } else if (m_pParams->m_grayscale_sampling) { // Computes error assuming shader will be converting the result to grayscale. int y0 = color::RGB_to_Y(e1); int y1 = color::RGB_to_Y(e2); int yd = y0 - y1; if (alpha) { int da = (int)e1[3] - (int)e2[3]; return yd * yd + da * da; } else { return yd * yd; } } else { return color::color_distance(false, e1, e2, alpha); } } bool dxt1_endpoint_optimizer::evaluate_solution_uber(const dxt1_solution_coordinates& coords, bool alternate_rounding) { m_trial_solution.m_coords = coords; m_trial_solution.m_selectors.resize(m_unique_colors.size()); m_trial_solution.m_error = m_best_solution.m_error; m_trial_solution.m_alpha_block = false; uint first_block_type = 0; uint last_block_type = 1; if ((m_pParams->m_pixels_have_alpha) || (m_pParams->m_force_alpha_blocks)) first_block_type = 1; else if (!m_pParams->m_use_alpha_blocks) last_block_type = 0; m_trial_selectors.resize(m_unique_colors.size()); color_quad_u8 colors[cDXT1SelectorValues]; colors[0] = dxt1_block::unpack_color(coords.m_low_color, true); colors[1] = dxt1_block::unpack_color(coords.m_high_color, true); for (uint block_type = first_block_type; block_type <= last_block_type; block_type++) { uint64 trial_error = 0; if (!block_type) { colors[2].set_noclamp_rgba((colors[0].r * 2 + colors[1].r + alternate_rounding) / 3, (colors[0].g * 2 + colors[1].g + alternate_rounding) / 3, (colors[0].b * 2 + colors[1].b + alternate_rounding) / 3, 0); colors[3].set_noclamp_rgba((colors[1].r * 2 + colors[0].r + alternate_rounding) / 3, (colors[1].g * 2 + colors[0].g + alternate_rounding) / 3, (colors[1].b * 2 + colors[0].b + alternate_rounding) / 3, 0); if (m_perceptual) { for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) { const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; uint best_error = color_distance(true, c, colors[0], false); uint best_color_index = 0; uint err = color_distance(true, c, colors[1], false); if (err < best_error) { best_error = err; best_color_index = 1; } err = color_distance(true, c, colors[2], false); if (err < best_error) { best_error = err; best_color_index = 2; } err = color_distance(true, c, colors[3], false); if (err < best_error) { best_error = err; best_color_index = 3; } trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); if (trial_error >= m_trial_solution.m_error) break; m_trial_selectors[unique_color_index] = static_cast(best_color_index); } } else { for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) { const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; uint best_error = color_distance(false, c, colors[0], false); uint best_color_index = 0; uint err = color_distance(false, c, colors[1], false); if (err < best_error) { best_error = err; best_color_index = 1; } err = color_distance(false, c, colors[2], false); if (err < best_error) { best_error = err; best_color_index = 2; } err = color_distance(false, c, colors[3], false); if (err < best_error) { best_error = err; best_color_index = 3; } trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); if (trial_error >= m_trial_solution.m_error) break; m_trial_selectors[unique_color_index] = static_cast(best_color_index); } } } else { colors[2].set_noclamp_rgba((colors[0].r + colors[1].r + alternate_rounding) >> 1, (colors[0].g + colors[1].g + alternate_rounding) >> 1, (colors[0].b + colors[1].b + alternate_rounding) >> 1, 255U); if (m_perceptual) { for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) { const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; uint best_error = color_distance(true, c, colors[0], false); uint best_color_index = 0; uint err = color_distance(true, c, colors[1], false); if (err < best_error) { best_error = err; best_color_index = 1; } err = color_distance(true, c, colors[2], false); if (err < best_error) { best_error = err; best_color_index = 2; } trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); if (trial_error >= m_trial_solution.m_error) break; m_trial_selectors[unique_color_index] = static_cast(best_color_index); } } else { for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) { const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; uint best_error = color_distance(false, c, colors[0], false); uint best_color_index = 0; uint err = color_distance(false, c, colors[1], false); if (err < best_error) { best_error = err; best_color_index = 1; } err = color_distance(false, c, colors[2], false); if (err < best_error) { best_error = err; best_color_index = 2; } trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); if (trial_error >= m_trial_solution.m_error) break; m_trial_selectors[unique_color_index] = static_cast(best_color_index); } } } if (trial_error < m_trial_solution.m_error) { m_trial_solution.m_error = trial_error; m_trial_solution.m_alpha_block = (block_type != 0); m_trial_solution.m_selectors = m_trial_selectors; m_trial_solution.m_alternate_rounding = alternate_rounding; } } m_trial_solution.m_enforce_selector = !m_trial_solution.m_alpha_block && m_trial_solution.m_coords.m_low_color == m_trial_solution.m_coords.m_high_color; if (m_trial_solution.m_enforce_selector) { uint s; if ((m_trial_solution.m_coords.m_low_color & 31) != 31) { m_trial_solution.m_coords.m_low_color++; s = 1; } else { m_trial_solution.m_coords.m_high_color--; s = 0; } for (uint i = 0; i < m_unique_colors.size(); i++) m_trial_solution.m_selectors[i] = static_cast(s); m_trial_solution.m_enforced_selector = s; } if (m_trial_solution.m_error < m_best_solution.m_error) { m_best_solution = m_trial_solution; return true; } return false; } bool dxt1_endpoint_optimizer::evaluate_solution_fast(const dxt1_solution_coordinates& coords, bool alternate_rounding) { m_trial_solution.m_coords = coords; m_trial_solution.m_selectors.resize(m_unique_colors.size()); m_trial_solution.m_error = m_best_solution.m_error; m_trial_solution.m_alpha_block = false; uint first_block_type = 0; uint last_block_type = 1; if ((m_pParams->m_pixels_have_alpha) || (m_pParams->m_force_alpha_blocks)) first_block_type = 1; else if (!m_pParams->m_use_alpha_blocks) last_block_type = 0; m_trial_selectors.resize(m_unique_colors.size()); color_quad_u8 colors[cDXT1SelectorValues]; colors[0] = dxt1_block::unpack_color(coords.m_low_color, true); colors[1] = dxt1_block::unpack_color(coords.m_high_color, true); int vr = colors[1].r - colors[0].r; int vg = colors[1].g - colors[0].g; int vb = colors[1].b - colors[0].b; if (m_perceptual) { vr *= 8; vg *= 24; } int stops[4]; stops[0] = colors[0].r * vr + colors[0].g * vg + colors[0].b * vb; stops[1] = colors[1].r * vr + colors[1].g * vg + colors[1].b * vb; int dirr = vr * 2; int dirg = vg * 2; int dirb = vb * 2; for (uint block_type = first_block_type; block_type <= last_block_type; block_type++) { uint64 trial_error = 0; if (!block_type) { colors[2].set_noclamp_rgba((colors[0].r * 2 + colors[1].r + alternate_rounding) / 3, (colors[0].g * 2 + colors[1].g + alternate_rounding) / 3, (colors[0].b * 2 + colors[1].b + alternate_rounding) / 3, 255U); colors[3].set_noclamp_rgba((colors[1].r * 2 + colors[0].r + alternate_rounding) / 3, (colors[1].g * 2 + colors[0].g + alternate_rounding) / 3, (colors[1].b * 2 + colors[0].b + alternate_rounding) / 3, 255U); stops[2] = colors[2].r * vr + colors[2].g * vg + colors[2].b * vb; stops[3] = colors[3].r * vr + colors[3].g * vg + colors[3].b * vb; // 0 2 3 1 int c0Point = stops[1] + stops[3]; int halfPoint = stops[3] + stops[2]; int c3Point = stops[2] + stops[0]; for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) { const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; int dot = c.r * dirr + c.g * dirg + c.b * dirb; uint8 best_color_index; if (dot < halfPoint) best_color_index = (dot < c3Point) ? 0 : 2; else best_color_index = (dot < c0Point) ? 3 : 1; uint best_error = color_distance(m_perceptual, c, colors[best_color_index], false); trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); if (trial_error >= m_trial_solution.m_error) break; m_trial_selectors[unique_color_index] = static_cast(best_color_index); } } else { colors[2].set_noclamp_rgba((colors[0].r + colors[1].r + alternate_rounding) >> 1, (colors[0].g + colors[1].g + alternate_rounding) >> 1, (colors[0].b + colors[1].b + alternate_rounding) >> 1, 255U); stops[2] = colors[2].r * vr + colors[2].g * vg + colors[2].b * vb; // 0 2 1 int c02Point = stops[0] + stops[2]; int c21Point = stops[2] + stops[1]; for (int unique_color_index = (int)m_unique_colors.size() - 1; unique_color_index >= 0; unique_color_index--) { const color_quad_u8& c = m_unique_colors[unique_color_index].m_color; int dot = c.r * dirr + c.g * dirg + c.b * dirb; uint8 best_color_index; if (dot < c02Point) best_color_index = 0; else if (dot < c21Point) best_color_index = 2; else best_color_index = 1; uint best_error = color_distance(m_perceptual, c, colors[best_color_index], false); trial_error += best_error * static_cast(m_unique_colors[unique_color_index].m_weight); if (trial_error >= m_trial_solution.m_error) break; m_trial_selectors[unique_color_index] = static_cast(best_color_index); } } if (trial_error < m_trial_solution.m_error) { m_trial_solution.m_error = trial_error; m_trial_solution.m_alpha_block = (block_type != 0); m_trial_solution.m_selectors = m_trial_selectors; } } if ((!m_trial_solution.m_alpha_block) && (m_trial_solution.m_coords.m_low_color == m_trial_solution.m_coords.m_high_color)) { uint s; if ((m_trial_solution.m_coords.m_low_color & 31) != 31) { m_trial_solution.m_coords.m_low_color++; s = 1; } else { m_trial_solution.m_coords.m_high_color--; s = 0; } for (uint i = 0; i < m_unique_colors.size(); i++) m_trial_solution.m_selectors[i] = static_cast(s); } if (m_trial_solution.m_error < m_best_solution.m_error) { m_best_solution = m_trial_solution; return true; } return false; } bool dxt1_endpoint_optimizer::evaluate_solution_hc_perceptual(const dxt1_solution_coordinates& coords, bool alternate_rounding) { color_quad_u8 c0 = dxt1_block::unpack_color(coords.m_low_color, true); color_quad_u8 c1 = dxt1_block::unpack_color(coords.m_high_color, true); color_quad_u8 c2((c0.r * 2 + c1.r + alternate_rounding) / 3, (c0.g * 2 + c1.g + alternate_rounding) / 3, (c0.b * 2 + c1.b + alternate_rounding) / 3, 0); color_quad_u8 c3((c1.r * 2 + c0.r + alternate_rounding) / 3, (c1.g * 2 + c0.g + alternate_rounding) / 3, (c1.b * 2 + c0.b + alternate_rounding) / 3, 0); uint64 error = 0; unique_color* color = m_evaluated_colors.get_ptr(); for (uint count = m_evaluated_colors.size(); count; color++, error < m_best_solution.m_error ? count-- : count = 0) { uint e01 = math::minimum(color::color_distance(true, color->m_color, c0, false), color::color_distance(true, color->m_color, c1, false)); uint e23 = math::minimum(color::color_distance(true, color->m_color, c2, false), color::color_distance(true, color->m_color, c3, false)); error += math::minimum(e01, e23) * (uint64)color->m_weight; } if (error >= m_best_solution.m_error) return false; m_best_solution.m_coords = coords; m_best_solution.m_error = error; m_best_solution.m_alpha_block = false; m_best_solution.m_alternate_rounding = alternate_rounding; m_best_solution.m_enforce_selector = m_best_solution.m_coords.m_low_color == m_best_solution.m_coords.m_high_color; if (m_best_solution.m_enforce_selector) { if ((m_best_solution.m_coords.m_low_color & 31) != 31) { m_best_solution.m_coords.m_low_color++; m_best_solution.m_enforced_selector = 1; } else { m_best_solution.m_coords.m_high_color--; m_best_solution.m_enforced_selector = 0; } } return true; } bool dxt1_endpoint_optimizer::evaluate_solution_hc_uniform(const dxt1_solution_coordinates& coords, bool alternate_rounding) { color_quad_u8 c0 = dxt1_block::unpack_color(coords.m_low_color, true); color_quad_u8 c1 = dxt1_block::unpack_color(coords.m_high_color, true); color_quad_u8 c2((c0.r * 2 + c1.r + alternate_rounding) / 3, (c0.g * 2 + c1.g + alternate_rounding) / 3, (c0.b * 2 + c1.b + alternate_rounding) / 3, 0); color_quad_u8 c3((c1.r * 2 + c0.r + alternate_rounding) / 3, (c1.g * 2 + c0.g + alternate_rounding) / 3, (c1.b * 2 + c0.b + alternate_rounding) / 3, 0); uint64 error = 0; unique_color* color = m_evaluated_colors.get_ptr(); for (uint count = m_evaluated_colors.size(); count; color++, error < m_best_solution.m_error ? count-- : count = 0) { uint e01 = math::minimum(color::color_distance(false, color->m_color, c0, false), color::color_distance(false, color->m_color, c1, false)); uint e23 = math::minimum(color::color_distance(false, color->m_color, c2, false), color::color_distance(false, color->m_color, c3, false)); error += math::minimum(e01, e23) * (uint64)color->m_weight; } if (error >= m_best_solution.m_error) return false; m_best_solution.m_coords = coords; m_best_solution.m_error = error; m_best_solution.m_alpha_block = false; m_best_solution.m_alternate_rounding = alternate_rounding; m_best_solution.m_enforce_selector = m_best_solution.m_coords.m_low_color == m_best_solution.m_coords.m_high_color; if (m_best_solution.m_enforce_selector) { if ((m_best_solution.m_coords.m_low_color & 31) != 31) { m_best_solution.m_coords.m_low_color++; m_best_solution.m_enforced_selector = 1; } else { m_best_solution.m_coords.m_high_color--; m_best_solution.m_enforced_selector = 0; } } return true; } void dxt1_endpoint_optimizer::compute_selectors() { if (m_evaluate_hc) compute_selectors_hc(); } void dxt1_endpoint_optimizer::compute_selectors_hc() { m_best_solution.m_selectors.resize(m_unique_colors.size()); if (m_best_solution.m_enforce_selector) { memset(m_best_solution.m_selectors.get_ptr(), m_best_solution.m_enforced_selector, m_best_solution.m_selectors.size()); return; } color_quad_u8 c0 = dxt1_block::unpack_color(m_best_solution.m_coords.m_low_color, true); color_quad_u8 c1 = dxt1_block::unpack_color(m_best_solution.m_coords.m_high_color, true); color_quad_u8 c2((c0.r * 2 + c1.r + m_best_solution.m_alternate_rounding) / 3, (c0.g * 2 + c1.g + m_best_solution.m_alternate_rounding) / 3, (c0.b * 2 + c1.b + m_best_solution.m_alternate_rounding) / 3, 0); color_quad_u8 c3((c1.r * 2 + c0.r + m_best_solution.m_alternate_rounding) / 3, (c1.g * 2 + c0.g + m_best_solution.m_alternate_rounding) / 3, (c1.b * 2 + c0.b + m_best_solution.m_alternate_rounding) / 3, 0); for (uint i = 0, iEnd = m_unique_colors.size(); i < iEnd; i++) { const color_quad_u8& c = m_unique_colors[i].m_color; uint e0 = color::color_distance(m_perceptual, c, c0, false); uint e1 = color::color_distance(m_perceptual, c, c1, false); uint e2 = color::color_distance(m_perceptual, c, c2, false); uint e3 = color::color_distance(m_perceptual, c, c3, false); uint e01 = math::minimum(e0, e1); uint e23 = math::minimum(e2, e3); m_best_solution.m_selectors[i] = e01 <= e23 ? e01 == e0 ? 0 : 1 : e23 == e2 ? 2 : 3; } } unique_color dxt1_endpoint_optimizer::lerp_color(const color_quad_u8& a, const color_quad_u8& b, float f, int rounding) { color_quad_u8 res; float r = rounding ? 1.0f : 0.0f; res[0] = static_cast(math::clamp(math::float_to_int(r + math::lerp(a[0], b[0], f)), 0, 255)); res[1] = static_cast(math::clamp(math::float_to_int(r + math::lerp(a[1], b[1], f)), 0, 255)); res[2] = static_cast(math::clamp(math::float_to_int(r + math::lerp(a[2], b[2], f)), 0, 255)); res[3] = 255; return unique_color(res, 1); } // The block may have been already compressed using another DXTc compressor, such as squish, ATI_Compress, ryg_dxt, etc. // Attempt to recover the endpoints used by that block compressor. void dxt1_endpoint_optimizer::try_combinatorial_encoding() { if ((m_unique_colors.size() < 2) || (m_unique_colors.size() > 4)) return; m_temp_unique_colors = m_unique_colors; if (m_temp_unique_colors.size() == 2) { // a b c d // 0.0 1/3 2/3 1.0 for (uint k = 0; k < 2; k++) { for (uint q = 0; q < 2; q++) { const uint r = q ^ 1; // a b m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 2.0f, k)); m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 3.0f, k)); // a c m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, .5f, k)); m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 1.5f, k)); // a d // b c m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -1.0f, k)); m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, 2.0f, k)); // b d m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -.5f, k)); m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, .5f, k)); // c d m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -2.0f, k)); m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[q].m_color, m_temp_unique_colors[r].m_color, -1.0f, k)); } } } else if (m_temp_unique_colors.size() == 3) { // a b c d // 0.0 1/3 2/3 1.0 for (uint i = 0; i <= 2; i++) { for (uint j = 0; j <= 2; j++) { if (i == j) continue; // a b c m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, 1.5f)); // a b d m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, 2.0f / 3.0f)); // a c d m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, 1.0f / 3.0f)); // b c d m_temp_unique_colors.push_back(lerp_color(m_temp_unique_colors[i].m_color, m_temp_unique_colors[j].m_color, -.5f)); } } } m_unique_packed_colors.resize(0); for (uint i = 0; i < m_temp_unique_colors.size(); i++) { const color_quad_u8& unique_color = m_temp_unique_colors[i].m_color; const uint16 packed_color = dxt1_block::pack_color(unique_color, true); if (std::find(m_unique_packed_colors.begin(), m_unique_packed_colors.end(), packed_color) != m_unique_packed_colors.end()) continue; m_unique_packed_colors.push_back(packed_color); } for (uint i = 0; m_best_solution.m_error && i < m_unique_packed_colors.size() - 1; i++) { for (uint j = i + 1; m_best_solution.m_error && j < m_unique_packed_colors.size(); j++) evaluate_solution(dxt1_solution_coordinates(m_unique_packed_colors[i], m_unique_packed_colors[j])); } uint64 error = m_best_solution.m_error; if (error) m_best_solution.m_error = 1; for (uint i = 0; m_best_solution.m_error && i < m_unique_packed_colors.size() - 1; i++) { for (uint j = i + 1; m_best_solution.m_error && j < m_unique_packed_colors.size(); j++) evaluate_solution(dxt1_solution_coordinates(m_unique_packed_colors[i], m_unique_packed_colors[j]), true); } if (m_best_solution.m_error) m_best_solution.m_error = error; } // The fourth (transparent) color in 3 color "transparent" blocks is black, which can be optionally exploited for small gains in DXT1 mode if the caller // doesn't actually use alpha. (But not in DXT5 mode, because 3-color blocks aren't permitted by GPU's for DXT5.) bool dxt1_endpoint_optimizer::try_alpha_as_black_optimization() { results* pOrig_results = m_pResults; uint num_dark_colors = 0; for (uint i = 0; i < m_unique_colors.size(); i++) if ((m_unique_colors[i].m_color[0] <= 4) && (m_unique_colors[i].m_color[1] <= 4) && (m_unique_colors[i].m_color[2] <= 4)) num_dark_colors++; if ((!num_dark_colors) || (num_dark_colors == m_unique_colors.size())) return true; params trial_params(*m_pParams); crnlib::vector trial_colors; trial_colors.insert(0, m_pParams->m_pPixels, m_pParams->m_num_pixels); trial_params.m_pPixels = trial_colors.get_ptr(); trial_params.m_pixels_have_alpha = true; for (uint i = 0; i < trial_colors.size(); i++) if ((trial_colors[i][0] <= 4) && (trial_colors[i][1] <= 4) && (trial_colors[i][2] <= 4)) trial_colors[i][3] = 0; results trial_results; crnlib::vector trial_selectors(m_pParams->m_num_pixels); trial_results.m_pSelectors = trial_selectors.get_ptr(); compute_internal(trial_params, trial_results); CRNLIB_ASSERT(trial_results.m_alpha_block); color_quad_u8 c[4]; dxt1_block::get_block_colors3(c, trial_results.m_low_color, trial_results.m_high_color); uint64 trial_error = 0; for (uint i = 0; i < trial_colors.size(); i++) { if (trial_colors[i][3] == 0) { CRNLIB_ASSERT(trial_selectors[i] == 3); } else { CRNLIB_ASSERT(trial_selectors[i] != 3); } trial_error += color_distance(m_perceptual, trial_colors[i], c[trial_selectors[i]], false); } if (trial_error < pOrig_results->m_error) { pOrig_results->m_error = trial_error; pOrig_results->m_low_color = trial_results.m_low_color; pOrig_results->m_high_color = trial_results.m_high_color; if (pOrig_results->m_pSelectors) memcpy(pOrig_results->m_pSelectors, trial_results.m_pSelectors, m_pParams->m_num_pixels); pOrig_results->m_alpha_block = true; } return true; } void dxt1_endpoint_optimizer::compute_internal(const params& p, results& r) { m_pParams = &p; m_pResults = &r; m_evaluate_hc = m_pParams->m_quality == cCRNDXTQualityUber && !m_pParams->m_pixels_have_alpha && !m_pParams->m_force_alpha_blocks && !m_pParams->m_use_alpha_blocks && !m_pParams->m_grayscale_sampling; m_perceptual = m_pParams->m_perceptual && !m_pParams->m_grayscale_sampling; if (m_unique_color_hash_map.get_table_size() > 8192) m_unique_color_hash_map.clear(); else m_unique_color_hash_map.reset(); if (m_solutions_tried.get_table_size() > 8192) m_solutions_tried.clear(); else m_solutions_tried.reset(); m_unique_colors.clear(); m_norm_unique_colors.clear(); m_mean_norm_color.clear(); m_norm_unique_colors_weighted.clear(); m_mean_norm_color_weighted.clear(); m_principle_axis.clear(); m_best_solution.clear(); m_total_unique_color_weight = 0; m_unique_colors.reserve(m_pParams->m_num_pixels); unique_color color(color_quad_u8(0), 1); for (uint i = 0; i < m_pParams->m_num_pixels; i++) { if (!m_pParams->m_pixels_have_alpha || m_pParams->m_pPixels[i].a >= m_pParams->m_dxt1a_alpha_threshold) { color.m_color.m_u32 = m_pParams->m_pPixels[i].m_u32 | 0xFF000000; unique_color_hash_map::insert_result ins_result(m_unique_color_hash_map.insert(color.m_color.m_u32, m_unique_colors.size())); if (ins_result.second) { m_unique_colors.push_back(color); } else { m_unique_colors[ins_result.first->second].m_weight++; } m_total_unique_color_weight++; } } m_has_transparent_pixels = m_total_unique_color_weight != m_pParams->m_num_pixels; m_evaluated_colors = m_unique_colors; struct { uint64 weight, weightedColor, weightedSquaredColor; } rPlane[32] = {}, gPlane[64] = {}, bPlane[32] = {}; for (uint i = 0; i < m_unique_colors.size(); i++) { const unique_color& color = m_unique_colors[i]; uint8 R = color.m_color.r, r = (R >> 3) + ((R & 7) > (R >> 5) ? 1 : 0); rPlane[r].weight += color.m_weight; rPlane[r].weightedColor += (uint64)color.m_weight * R; rPlane[r].weightedSquaredColor += (uint64)color.m_weight * R * R; uint8 G = color.m_color.g, g = (G >> 2) + ((G & 3) > (G >> 6) ? 1 : 0); gPlane[g].weight += color.m_weight; gPlane[g].weightedColor += (uint64)color.m_weight * G; gPlane[g].weightedSquaredColor += (uint64)color.m_weight * G * G; uint8 B = color.m_color.b, b = (B >> 3) + ((B & 7) > (B >> 5) ? 1 : 0); bPlane[b].weight += color.m_weight; bPlane[b].weightedColor += (uint64)color.m_weight * B; bPlane[b].weightedSquaredColor += (uint64)color.m_weight * B * B; } if (m_perceptual) { for (uint c = 0; c < 32; c++) { rPlane[c].weight *= 8; rPlane[c].weightedColor *= 8; rPlane[c].weightedSquaredColor *= 8; } for (uint c = 0; c < 64; c++) { gPlane[c].weight *= 25; gPlane[c].weightedColor *= 25; gPlane[c].weightedSquaredColor *= 25; } } for (uint c = 1; c < 32; c++) { rPlane[c].weight += rPlane[c - 1].weight; rPlane[c].weightedColor += rPlane[c - 1].weightedColor; rPlane[c].weightedSquaredColor += rPlane[c - 1].weightedSquaredColor; bPlane[c].weight += bPlane[c - 1].weight; bPlane[c].weightedColor += bPlane[c - 1].weightedColor; bPlane[c].weightedSquaredColor += bPlane[c - 1].weightedSquaredColor; } for (uint c = 1; c < 64; c++) { gPlane[c].weight += gPlane[c - 1].weight; gPlane[c].weightedColor += gPlane[c - 1].weightedColor; gPlane[c].weightedSquaredColor += gPlane[c - 1].weightedSquaredColor; } for (uint c = 0; c < 32; c++) { uint8 C = c << 3 | c >> 2; m_rDist[c].low = rPlane[c].weightedSquaredColor + C * C * rPlane[c].weight - 2 * C * rPlane[c].weightedColor; m_rDist[c].high = rPlane[31].weightedSquaredColor + C * C * rPlane[31].weight - 2 * C * rPlane[31].weightedColor - m_rDist[c].low; m_bDist[c].low = bPlane[c].weightedSquaredColor + C * C * bPlane[c].weight - 2 * C * bPlane[c].weightedColor; m_bDist[c].high = bPlane[31].weightedSquaredColor + C * C * bPlane[31].weight - 2 * C * bPlane[31].weightedColor - m_bDist[c].low; } for (uint c = 0; c < 64; c++) { uint8 C = c << 2 | c >> 4; m_gDist[c].low = gPlane[c].weightedSquaredColor + C * C * gPlane[c].weight - 2 * C * gPlane[c].weightedColor; m_gDist[c].high = gPlane[63].weightedSquaredColor + C * C * gPlane[63].weight - 2 * C * gPlane[63].weightedColor - m_gDist[c].low; } if (!m_unique_colors.size()) { m_pResults->m_low_color = 0; m_pResults->m_high_color = 0; m_pResults->m_alpha_block = true; memset(m_pResults->m_pSelectors, 3, m_pParams->m_num_pixels); } else if (m_unique_colors.size() == 1 && !m_has_transparent_pixels) { int r = m_unique_colors[0].m_color.r; int g = m_unique_colors[0].m_color.g; int b = m_unique_colors[0].m_color.b; uint low_color = (ryg_dxt::OMatch5[r][0] << 11) | (ryg_dxt::OMatch6[g][0] << 5) | ryg_dxt::OMatch5[b][0]; uint high_color = (ryg_dxt::OMatch5[r][1] << 11) | (ryg_dxt::OMatch6[g][1] << 5) | ryg_dxt::OMatch5[b][1]; evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); if (m_pParams->m_use_alpha_blocks && m_best_solution.m_error) { low_color = (ryg_dxt::OMatch5_3[r][0] << 11) | (ryg_dxt::OMatch6_3[g][0] << 5) | ryg_dxt::OMatch5_3[b][0]; high_color = (ryg_dxt::OMatch5_3[r][1] << 11) | (ryg_dxt::OMatch6_3[g][1] << 5) | ryg_dxt::OMatch5_3[b][1]; evaluate_solution(dxt1_solution_coordinates((uint16)low_color, (uint16)high_color)); } return_solution(); } else { handle_multicolor_block(); } } bool dxt1_endpoint_optimizer::compute(const params& p, results& r) { if (!p.m_pPixels) return false; compute_internal(p, r); if (m_pParams->m_use_alpha_blocks && m_pParams->m_use_transparent_indices_for_black && !m_pParams->m_pixels_have_alpha) return try_alpha_as_black_optimization(); return true; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_dxt1.h000066400000000000000000000206221503722002600211650ustar00rootroot00000000000000// File: crn_dxt1.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_dxt.h" namespace crnlib { struct dxt1_solution_coordinates { inline dxt1_solution_coordinates() : m_low_color(0), m_high_color(0) {} inline dxt1_solution_coordinates(uint16 l, uint16 h) : m_low_color(l), m_high_color(h) {} inline dxt1_solution_coordinates(const color_quad_u8& l, const color_quad_u8& h, bool scaled = true) : m_low_color(dxt1_block::pack_color(l, scaled)), m_high_color(dxt1_block::pack_color(h, scaled)) { } inline dxt1_solution_coordinates(vec3F nl, vec3F nh) { #if CRNLIB_DXT_ALT_ROUNDING // Umm, wtf? nl.clamp(0.0f, .999f); nh.clamp(0.0f, .999f); color_quad_u8 l((int)floor(nl[0] * 32.0f), (int)floor(nl[1] * 64.0f), (int)floor(nl[2] * 32.0f), 255); color_quad_u8 h((int)floor(nh[0] * 32.0f), (int)floor(nh[1] * 64.0f), (int)floor(nh[2] * 32.0f), 255); #else // Fixes the bins color_quad_u8 l((int)floor(.5f + nl[0] * 31.0f), (int)floor(.5f + nl[1] * 63.0f), (int)floor(.5f + nl[2] * 31.0f), 255); color_quad_u8 h((int)floor(.5f + nh[0] * 31.0f), (int)floor(.5f + nh[1] * 63.0f), (int)floor(.5f + nh[2] * 31.0f), 255); #endif m_low_color = dxt1_block::pack_color(l, false); m_high_color = dxt1_block::pack_color(h, false); } uint16 m_low_color; uint16 m_high_color; inline void clear() { m_low_color = 0; m_high_color = 0; } inline dxt1_solution_coordinates& canonicalize() { if (m_low_color < m_high_color) utils::swap(m_low_color, m_high_color); return *this; } inline operator size_t() const { return fast_hash(this, sizeof(*this)); } inline bool operator==(const dxt1_solution_coordinates& other) const { uint16 l0 = math::minimum(m_low_color, m_high_color); uint16 h0 = math::maximum(m_low_color, m_high_color); uint16 l1 = math::minimum(other.m_low_color, other.m_high_color); uint16 h1 = math::maximum(other.m_low_color, other.m_high_color); return (l0 == l1) && (h0 == h1); } inline bool operator!=(const dxt1_solution_coordinates& other) const { return !(*this == other); } inline bool operator<(const dxt1_solution_coordinates& other) const { uint16 l0 = math::minimum(m_low_color, m_high_color); uint16 h0 = math::maximum(m_low_color, m_high_color); uint16 l1 = math::minimum(other.m_low_color, other.m_high_color); uint16 h1 = math::maximum(other.m_low_color, other.m_high_color); if (l0 < l1) return true; else if (l0 == l1) { if (h0 < h1) return true; } return false; } }; typedef crnlib::vector dxt1_solution_coordinates_vec; CRNLIB_DEFINE_BITWISE_COPYABLE(dxt1_solution_coordinates); struct unique_color { inline unique_color() {} inline unique_color(const color_quad_u8& color, uint weight) : m_color(color), m_weight(weight) {} color_quad_u8 m_color; uint m_weight; inline bool operator<(const unique_color& c) const { return *reinterpret_cast(&m_color) < *reinterpret_cast(&c.m_color); } inline bool operator==(const unique_color& c) const { return *reinterpret_cast(&m_color) == *reinterpret_cast(&c.m_color); } }; CRNLIB_DEFINE_BITWISE_COPYABLE(unique_color); class dxt1_endpoint_optimizer { public: dxt1_endpoint_optimizer(); struct params { params() : m_block_index(0), m_pPixels(NULL), m_num_pixels(0), m_dxt1a_alpha_threshold(128U), m_quality(cCRNDXTQualityUber), m_pixels_have_alpha(false), m_use_alpha_blocks(true), m_perceptual(true), m_grayscale_sampling(false), m_endpoint_caching(true), m_use_transparent_indices_for_black(false), m_force_alpha_blocks(false) { } uint m_block_index; const color_quad_u8* m_pPixels; uint m_num_pixels; uint m_dxt1a_alpha_threshold; crn_dxt_quality m_quality; bool m_pixels_have_alpha; bool m_use_alpha_blocks; bool m_perceptual; bool m_grayscale_sampling; bool m_endpoint_caching; bool m_use_transparent_indices_for_black; bool m_force_alpha_blocks; }; struct results { inline results() : m_pSelectors(NULL) {} uint64 m_error; uint16 m_low_color; uint16 m_high_color; uint8* m_pSelectors; bool m_alpha_block; bool m_reordered; bool m_alternate_rounding; bool m_enforce_selector; uint8 m_enforced_selector; }; bool compute(const params& p, results& r); private: const params* m_pParams; results* m_pResults; bool m_perceptual; bool m_evaluate_hc; typedef crnlib::vector unique_color_vec; //typedef crnlib::hash_map > unique_color_hash_map; typedef crnlib::hash_map unique_color_hash_map; unique_color_hash_map m_unique_color_hash_map; unique_color_vec m_unique_colors; // excludes transparent colors! unique_color_vec m_evaluated_colors; unique_color_vec m_temp_unique_colors; struct { uint64 low, high; } m_rDist[32], m_gDist[64], m_bDist[32]; uint m_total_unique_color_weight; bool m_has_transparent_pixels; vec3F_array m_norm_unique_colors; vec3F m_mean_norm_color; vec3F_array m_norm_unique_colors_weighted; vec3F m_mean_norm_color_weighted; vec3F m_principle_axis; crnlib::vector m_unique_packed_colors; crnlib::vector m_trial_selectors; crnlib::vector m_low_coords; crnlib::vector m_high_coords; enum { cMaxPrevResults = 4 }; dxt1_solution_coordinates m_prev_results[cMaxPrevResults]; uint m_num_prev_results; crnlib::vector m_lo_cells; crnlib::vector m_hi_cells; struct potential_solution { potential_solution() : m_coords(), m_error(cUINT64_MAX), m_alpha_block(false) { } dxt1_solution_coordinates m_coords; crnlib::vector m_selectors; uint64 m_error; bool m_alpha_block; bool m_alternate_rounding; bool m_enforce_selector; uint8 m_enforced_selector; void clear() { m_coords.clear(); m_selectors.resize(0); m_error = cUINT64_MAX; m_alpha_block = false; } bool are_selectors_all_equal() const { if (m_selectors.empty()) return false; const uint s = m_selectors[0]; for (uint i = 1; i < m_selectors.size(); i++) if (m_selectors[i] != s) return false; return true; } }; potential_solution m_trial_solution; potential_solution m_best_solution; typedef crnlib::hash_map solution_hash_map; solution_hash_map m_solutions_tried; bool refine_solution(int refinement_level = 0); bool evaluate_solution(const dxt1_solution_coordinates& coords, bool alternate_rounding = false); bool evaluate_solution_uber(const dxt1_solution_coordinates& coords, bool alternate_rounding); bool evaluate_solution_fast(const dxt1_solution_coordinates& coords, bool alternate_rounding); bool evaluate_solution_hc_perceptual(const dxt1_solution_coordinates& coords, bool alternate_rounding); bool evaluate_solution_hc_uniform(const dxt1_solution_coordinates& coords, bool alternate_rounding); void compute_selectors(); void compute_selectors_hc(); void find_unique_colors(); void handle_multicolor_block(); void compute_pca(vec3F& axis, const vec3F_array& norm_colors, const vec3F& def); void compute_vectors(const vec3F& perceptual_weights); void return_solution(); void try_combinatorial_encoding(); void compute_endpoint_component_errors(uint comp_index, uint64 (&error)[4][256], uint64 (&best_remaining_error)[4]); void optimize_endpoint_comps(); void optimize_endpoints(vec3F& low_color, vec3F& high_color); bool try_alpha_as_black_optimization(); bool try_average_block_as_solid(); bool try_median4(const vec3F& low_color, const vec3F& high_color); void compute_internal(const params& p, results& r); unique_color lerp_color(const color_quad_u8& a, const color_quad_u8& b, float f, int rounding = 1); inline uint color_distance(bool perceptual, const color_quad_u8& e1, const color_quad_u8& e2, bool alpha); }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_dxt5a.cpp000066400000000000000000000133261503722002600216700ustar00rootroot00000000000000// File: crn_dxt5a.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_dxt5a.h" #include "crn_ryg_dxt.hpp" #include "crn_dxt_fast.h" #include "crn_intersect.h" namespace crnlib { dxt5_endpoint_optimizer::dxt5_endpoint_optimizer() : m_pParams(NULL), m_pResults(NULL) { m_unique_values.reserve(16); m_unique_value_weights.reserve(16); } bool dxt5_endpoint_optimizer::compute(const params& p, results& r) { m_pParams = &p; m_pResults = &r; if ((!p.m_num_pixels) || (!p.m_pPixels)) return false; m_unique_values.resize(0); m_unique_value_weights.resize(0); for (uint i = 0; i < 256; i++) m_unique_value_map[i] = -1; for (uint i = 0; i < p.m_num_pixels; i++) { uint alpha = p.m_pPixels[i][p.m_comp_index]; int index = m_unique_value_map[alpha]; if (index == -1) { index = m_unique_values.size(); m_unique_value_map[alpha] = index; m_unique_values.push_back(static_cast(alpha)); m_unique_value_weights.push_back(0); } m_unique_value_weights[index]++; } if (m_unique_values.size() == 1) { r.m_block_type = 0; r.m_reordered = false; r.m_error = 0; r.m_first_endpoint = m_unique_values[0]; r.m_second_endpoint = m_unique_values[0]; memset(r.m_pSelectors, 0, p.m_num_pixels); return true; } m_trial_selectors.resize(m_unique_values.size()); m_best_selectors.resize(m_unique_values.size()); r.m_error = cUINT64_MAX; for (uint i = 0; i < m_unique_values.size() - 1; i++) { const uint low_endpoint = m_unique_values[i]; for (uint j = i + 1; j < m_unique_values.size(); j++) { const uint high_endpoint = m_unique_values[j]; evaluate_solution(low_endpoint, high_endpoint); } } if ((m_pParams->m_quality >= cCRNDXTQualityBetter) && (m_pResults->m_error)) { m_flags.resize(256 * 256); m_flags.clear_all_bits(); const int cProbeAmount = (m_pParams->m_quality == cCRNDXTQualityUber) ? 16 : 8; for (int l_delta = -cProbeAmount; l_delta <= cProbeAmount; l_delta++) { const int l = m_pResults->m_first_endpoint + l_delta; if (l < 0) continue; else if (l > 255) break; const uint bit_index = l * 256; for (int h_delta = -cProbeAmount; h_delta <= cProbeAmount; h_delta++) { const int h = m_pResults->m_second_endpoint + h_delta; if (h < 0) continue; else if (h > 255) break; //if (m_flags.get_bit(bit_index + h)) // continue; if ((m_flags.get_bit(bit_index + h)) || (m_flags.get_bit(h * 256 + l))) continue; m_flags.set_bit(bit_index + h); evaluate_solution(static_cast(l), static_cast(h)); } } } m_pResults->m_reordered = false; if (m_pResults->m_first_endpoint == m_pResults->m_second_endpoint) { for (uint i = 0; i < m_best_selectors.size(); i++) m_best_selectors[i] = 0; } else if (m_pResults->m_block_type) { //if (l > h) // eight alpha // else // six alpha if (m_pResults->m_first_endpoint > m_pResults->m_second_endpoint) { utils::swap(m_pResults->m_first_endpoint, m_pResults->m_second_endpoint); m_pResults->m_reordered = true; for (uint i = 0; i < m_best_selectors.size(); i++) m_best_selectors[i] = g_six_alpha_invert_table[m_best_selectors[i]]; } } else if (!(m_pResults->m_first_endpoint > m_pResults->m_second_endpoint)) { utils::swap(m_pResults->m_first_endpoint, m_pResults->m_second_endpoint); m_pResults->m_reordered = true; for (uint i = 0; i < m_best_selectors.size(); i++) m_best_selectors[i] = g_eight_alpha_invert_table[m_best_selectors[i]]; } for (uint i = 0; i < m_pParams->m_num_pixels; i++) { uint alpha = m_pParams->m_pPixels[i][m_pParams->m_comp_index]; int index = m_unique_value_map[alpha]; m_pResults->m_pSelectors[i] = m_best_selectors[index]; } return true; } void dxt5_endpoint_optimizer::evaluate_solution(uint low_endpoint, uint high_endpoint) { for (uint block_type = 0; block_type < (m_pParams->m_use_both_block_types ? 2U : 1U); block_type++) { uint selector_values[8]; if (!block_type) dxt5_block::get_block_values8(selector_values, low_endpoint, high_endpoint); else dxt5_block::get_block_values6(selector_values, low_endpoint, high_endpoint); uint64 trial_error = 0; for (uint i = 0; i < m_unique_values.size(); i++) { const uint val = m_unique_values[i]; const uint weight = m_unique_value_weights[i]; uint best_selector_error = UINT_MAX; uint best_selector = 0; for (uint j = 0; j < 8; j++) { int selector_error = val - selector_values[j]; selector_error = selector_error * selector_error * (int)weight; if (static_cast(selector_error) < best_selector_error) { best_selector_error = selector_error; best_selector = j; if (!best_selector_error) break; } } m_trial_selectors[i] = static_cast(best_selector); trial_error += best_selector_error; if (trial_error > m_pResults->m_error) break; } if (trial_error < m_pResults->m_error) { m_pResults->m_error = trial_error; m_pResults->m_first_endpoint = static_cast(low_endpoint); m_pResults->m_second_endpoint = static_cast(high_endpoint); m_pResults->m_block_type = static_cast(block_type); m_best_selectors.swap(m_trial_selectors); if (!trial_error) break; } } } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_dxt5a.h000066400000000000000000000024551503722002600213360ustar00rootroot00000000000000// File: crn_dxt5a.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_dxt.h" namespace crnlib { class dxt5_endpoint_optimizer { public: dxt5_endpoint_optimizer(); struct params { params() : m_block_index(0), m_pPixels(NULL), m_num_pixels(0), m_comp_index(3), m_quality(cCRNDXTQualityUber), m_use_both_block_types(true) { } uint m_block_index; const color_quad_u8* m_pPixels; uint m_num_pixels; uint m_comp_index; crn_dxt_quality m_quality; bool m_use_both_block_types; }; struct results { uint8* m_pSelectors; uint64 m_error; uint8 m_first_endpoint; uint8 m_second_endpoint; uint8 m_block_type; // 1 if 6-alpha, otherwise 8-alpha bool m_reordered; }; bool compute(const params& p, results& r); private: const params* m_pParams; results* m_pResults; crnlib::vector m_unique_values; crnlib::vector m_unique_value_weights; crnlib::vector m_trial_selectors; crnlib::vector m_best_selectors; int m_unique_value_map[256]; sparse_bit_array m_flags; void evaluate_solution(uint low_endpoint, uint high_endpoint); }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_dxt_endpoint_refiner.cpp000066400000000000000000000157131503722002600250560ustar00rootroot00000000000000// File: crn_dxt_endpoint_refiner.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_dxt_endpoint_refiner.h" #include "crn_dxt1.h" namespace crnlib { dxt_endpoint_refiner::dxt_endpoint_refiner() : m_pParams(NULL), m_pResults(NULL) { } bool dxt_endpoint_refiner::refine(const params& p, results& r) { if (!p.m_num_pixels) return false; m_pParams = &p; m_pResults = &r; r.m_error = cUINT64_MAX; r.m_low_color = 0; r.m_high_color = 0; double alpha2_sum = 0.0f; double beta2_sum = 0.0f; double alphabeta_sum = 0.0f; vec<3, double> alphax_sum(0.0f); vec<3, double> betax_sum(0.0f); vec<3, double> first_color(0.0f); // This linear solver is from Squish. for (uint i = 0; i < p.m_num_pixels; ++i) { uint8 c = p.m_pSelectors[i]; double k; if (p.m_dxt1_selectors) k = g_dxt1_to_linear[c] * 1.0f / 3.0f; else k = g_dxt5_to_linear[c] * 1.0f / 7.0f; double alpha = 1.0f - k; double beta = k; vec<3, double> x; if (p.m_dxt1_selectors) x.set(p.m_pPixels[i][0] * 1.0f / 255.0f, p.m_pPixels[i][1] * 1.0f / 255.0f, p.m_pPixels[i][2] * 1.0f / 255.0f); else x.set(p.m_pPixels[i][p.m_alpha_comp_index] / 255.0f); if (!i) first_color = x; alpha2_sum += alpha * alpha; beta2_sum += beta * beta; alphabeta_sum += alpha * beta; alphax_sum += alpha * x; betax_sum += beta * x; } // zero where non-determinate vec<3, double> a, b; if (beta2_sum == 0.0f) { a = alphax_sum / alpha2_sum; b.clear(); } else if (alpha2_sum == 0.0f) { a.clear(); b = betax_sum / beta2_sum; } else { double factor = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum; if (factor != 0.0f) { a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) / factor; b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) / factor; } else { a = first_color; b = first_color; } } vec3F l(0.0f), h(0.0f); l = a; h = b; l.clamp(0.0f, 1.0f); h.clamp(0.0f, 1.0f); if (p.m_dxt1_selectors) optimize_dxt1(l, h); else optimize_dxt5(l, h); return r.m_error < p.m_error_to_beat; } void dxt_endpoint_refiner::optimize_dxt5(vec3F low_color, vec3F high_color) { uint8 L0 = math::clamp(low_color[0] * 256.0f, 0, 255); uint8 H0 = math::clamp(high_color[0] * 256.0f, 0, 255); uint64 hist[8] = {}, D2[8] = {}, DD[8] = {}; for (uint c = m_pParams->m_alpha_comp_index, i = 0; i < m_pParams->m_num_pixels; i++) { uint8 a = m_pParams->m_pPixels[i][c]; uint8 s = m_pParams->m_pSelectors[i]; hist[s]++; D2[s] += a * 2; DD[s] += a * a; } uint16 solutions[529]; uint solutions_count = 0; solutions[solutions_count++] = L0 == H0 ? H0 ? (H0 - 1) << 8 | L0 : 1 : L0 > H0 ? H0 << 8 | L0 : L0 << 8 | H0; uint8 minL = L0 <= 11 ? 0 : L0 - 11, maxL = L0 >= 244 ? 255 : L0 + 11; uint8 minH = H0 <= 11 ? 0 : H0 - 11, maxH = H0 >= 244 ? 255 : H0 + 11; for (uint16 L = minL; L <= maxL; L++) { for (uint16 H = minH; H <= maxH; H++) { if ((maxH < L || L <= H || H < minL) && (L != L0 || H != H0) && (L != H0 || H != L0)) solutions[solutions_count++] = L == H ? H ? (H - 1) << 8 | L : 1 : L > H ? H << 8 | L : L << 8 | H; } } for (uint i = 0; i < solutions_count; i++) { uint8 L = solutions[i] & 0xFF; uint8 H = solutions[i] >> 8; uint values[8]; dxt5_block::get_block_values8(values, L, H); uint64 error = 0; for (uint64 s = 0; s < 8; s++) error += hist[s] * values[s] * values[s] - D2[s] * values[s] + DD[s]; if (error < m_pResults->m_error) { m_pResults->m_low_color = L; m_pResults->m_high_color = H; m_pResults->m_error = error; if (!m_pResults->m_error) return; } } } void dxt_endpoint_refiner::optimize_dxt1(vec3F low_color, vec3F high_color) { uint16 L0 = math::clamp(low_color[0] * 32.0f, 0, 31) << 11 | math::clamp(low_color[1] * 64.0f, 0, 63) << 5 | math::clamp(low_color[2] * 32.0f, 0, 31); uint16 H0 = math::clamp(high_color[0] * 32.0f, 0, 31) << 11 | math::clamp(high_color[1] * 64.0f, 0, 63) << 5 | math::clamp(high_color[2] * 32.0f, 0, 31); uint64 hist[4] = {}, D2[4][3] = {}, DD[4][3] = {}; for (uint i = 0; i < m_pParams->m_num_pixels; i++) { const color_quad_u8& pixel = m_pParams->m_pPixels[i]; uint8 s = m_pParams->m_pSelectors[i]; hist[s]++; for (uint c = 0; c < 3; c++) { D2[s][c] += pixel[c] * 2; DD[s][c] += pixel[c] * pixel[c]; } } crnlib::vector solutions(54); bool preserveL = hist[0] + hist[2] > hist[1] + hist[3]; bool improved = true; for (uint iterations = 8; improved && iterations; iterations--) { improved = false; uint solutions_count = 0; for (uint16 b0 = L0 & 31, g0 = L0 >> 5 & 63, r0 = L0 >> 11 & 31, b = b0 ? b0 - 1 : b0; b <= b0 + 1 && b <= 31; b++) { for (uint16 g = g0 ? g0 - 1 : g0; g <= g0 + 1 && g <= 63; g++) { for (uint16 r = r0 ? r0 - 1 : r0; r <= r0 + 1 && r <= 31; r++) { uint16 L = r << 11 | g << 5 | b; if (L != L0) solutions[solutions_count++] = L > H0 ? L | H0 << 16 : H0 | L << 16; } } } for (uint16 b0 = H0 & 31, g0 = H0 >> 5 & 63, r0 = H0 >> 11 & 31, b = b0 ? b0 - 1 : b0; b <= b0 + 1 && b <= 31; b++) { for (uint16 g = g0 ? g0 - 1 : g0; g <= g0 + 1 && g <= 63; g++) { for (uint16 r = r0 ? r0 - 1 : r0; r <= r0 + 1 && r <= 31; r++) { uint16 H = r << 11 | g << 5 | b; if (H != H0) solutions[solutions_count++] = H > L0 ? H | L0 << 16 : L0 | H << 16; } } } std::sort(solutions.begin(), solutions.begin() + solutions_count); for (uint i = 0; i < solutions_count; i++) { if (i && solutions[i] == solutions[i - 1]) continue; uint16 L = solutions[i] & 0xFFFF; uint16 H = solutions[i] >> 16; if (L == H) { L += !preserveL ? ~L & 0x1F ? 0x1 : ~L & 0xF800 ? 0x800 : ~L & 0x7E0 ? 0x20 : 0 : !L ? 0x1 : 0; H -= preserveL ? H & 0x1F ? 0x1 : H & 0xF800 ? 0x800 : H & 0x7E0 ? 0x20 : 0 : H == 0xFFFF ? 0x1 : 0; } color_quad_u8 block_colors[4]; dxt1_block::get_block_colors4(block_colors, L, H); uint64 error = 0; for (uint64 s = 0, d[3]; s < 4; s++) { for (uint c = 0; c < 3; c++) d[c] = hist[s] * block_colors[s][c] * block_colors[s][c] - D2[s][c] * block_colors[s][c] + DD[s][c]; error += m_pParams->m_perceptual ? d[0] * 8 + d[1] * 25 + d[2] : d[0] + d[1] + d[2]; } if (error < m_pResults->m_error) { m_pResults->m_low_color = L0 = L; m_pResults->m_high_color = H0 = H; m_pResults->m_error = error; if (!m_pResults->m_error) return; improved = true; } } } } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_dxt_endpoint_refiner.h000066400000000000000000000023331503722002600245150ustar00rootroot00000000000000// File: crn_dxt_endpoint_refiner.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_dxt.h" namespace crnlib { // TODO: Experimental/Not fully implemented class dxt_endpoint_refiner { public: dxt_endpoint_refiner(); struct params { params() : m_block_index(0), m_pPixels(NULL), m_num_pixels(0), m_pSelectors(NULL), m_alpha_comp_index(0), m_error_to_beat(cUINT64_MAX), m_dxt1_selectors(true), m_perceptual(true), m_highest_quality(true) { } uint m_block_index; const color_quad_u8* m_pPixels; uint m_num_pixels; const uint8* m_pSelectors; uint m_alpha_comp_index; uint64 m_error_to_beat; bool m_dxt1_selectors; bool m_perceptual; bool m_highest_quality; }; struct results { uint16 m_low_color; uint16 m_high_color; uint64 m_error; }; bool refine(const params& p, results& r); private: const params* m_pParams; results* m_pResults; void optimize_dxt1(vec3F low_color, vec3F high_color); void optimize_dxt5(vec3F low_color, vec3F high_color); }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_dxt_fast.cpp000066400000000000000000000557041503722002600224650ustar00rootroot00000000000000// File: crn_dxt_fast.cpp // See Copyright Notice and license at the end of inc/crnlib.h // Parts of this module are derived from RYG's excellent public domain DXTx compressor. #include "crn_core.h" #include "crn_dxt_fast.h" #include "crn_ryg_dxt.hpp" namespace crnlib { namespace dxt_fast { static inline int mul_8bit(int a, int b) { int t = a * b + 128; return (t + (t >> 8)) >> 8; } static inline color_quad_u8& unpack_color(color_quad_u8& c, uint v) { uint rv = (v & 0xf800) >> 11; uint gv = (v & 0x07e0) >> 5; uint bv = (v & 0x001f) >> 0; c.r = ryg_dxt::Expand5[rv]; c.g = ryg_dxt::Expand6[gv]; c.b = ryg_dxt::Expand5[bv]; c.a = 0; return c; } static inline uint pack_color(const color_quad_u8& c) { return (mul_8bit(c.r, 31) << 11) + (mul_8bit(c.g, 63) << 5) + mul_8bit(c.b, 31); } #if 0 static inline void lerp_color(color_quad_u8& result, const color_quad_u8& p1, const color_quad_u8& p2, uint f) { CRNLIB_ASSERT(f <= 255); result.r = static_cast(p1.r + mul_8bit(p2.r - p1.r, f)); result.g = static_cast(p1.g + mul_8bit(p2.g - p1.g, f)); result.b = static_cast(p1.b + mul_8bit(p2.b - p1.b, f)); } #endif static inline void eval_colors(color_quad_u8* pColors, uint c0, uint c1) { unpack_color(pColors[0], c0); unpack_color(pColors[1], c1); #if 0 lerp_color(pColors[2], pColors[0], pColors[1], 0x55); lerp_color(pColors[3], pColors[0], pColors[1], 0xAA); #else pColors[2].r = (pColors[0].r * 2 + pColors[1].r) / 3; pColors[2].g = (pColors[0].g * 2 + pColors[1].g) / 3; pColors[2].b = (pColors[0].b * 2 + pColors[1].b) / 3; pColors[3].r = (pColors[1].r * 2 + pColors[0].r) / 3; pColors[3].g = (pColors[1].g * 2 + pColors[0].g) / 3; pColors[3].b = (pColors[1].b * 2 + pColors[0].b) / 3; #endif } // false if all selectors equal static bool match_block_colors(uint n, const color_quad_u8* pBlock, const color_quad_u8* pColors, uint8* pSelectors) { int dirr = pColors[0].r - pColors[1].r; int dirg = pColors[0].g - pColors[1].g; int dirb = pColors[0].b - pColors[1].b; int stops[4]; for (int i = 0; i < 4; i++) stops[i] = pColors[i].r * dirr + pColors[i].g * dirg + pColors[i].b * dirb; // 0 2 3 1 int c0Point = stops[1] + stops[3]; int halfPoint = stops[3] + stops[2]; int c3Point = stops[2] + stops[0]; //dirr *= 2; //dirg *= 2; //dirb *= 2; c0Point >>= 1; halfPoint >>= 1; c3Point >>= 1; bool status = false; for (uint i = 0; i < n; i++) { int dot = pBlock[i].r * dirr + pBlock[i].g * dirg + pBlock[i].b * dirb; uint8 s; if (dot < halfPoint) s = (dot < c0Point) ? 1 : 3; else s = (dot < c3Point) ? 2 : 0; pSelectors[i] = s; if (s != pSelectors[0]) status = true; } return status; } static bool optimize_block_colors(uint n, const color_quad_u8* block, uint& max16, uint& min16, uint ave_color[3], float axis[3]) { int min[3], max[3]; for (uint ch = 0; ch < 3; ch++) { const uint8* bp = ((const uint8*)block) + ch; int minv, maxv; int64 muv = bp[0]; minv = maxv = bp[0]; const uint l = n << 2; for (uint i = 4; i < l; i += 4) { muv += bp[i]; minv = math::minimum(minv, bp[i]); maxv = math::maximum(maxv, bp[i]); } ave_color[ch] = static_cast((muv + (n / 2)) / n); min[ch] = minv; max[ch] = maxv; } if ((min[0] == max[0]) && (min[1] == max[1]) && (min[2] == max[2])) return false; // determine covariance matrix double cov[6]; for (int i = 0; i < 6; i++) cov[i] = 0; for (uint i = 0; i < n; i++) { double r = (int)block[i].r - (int)ave_color[0]; double g = (int)block[i].g - (int)ave_color[1]; double b = (int)block[i].b - (int)ave_color[2]; cov[0] += r * r; cov[1] += r * g; cov[2] += r * b; cov[3] += g * g; cov[4] += g * b; cov[5] += b * b; } double covf[6], vfr, vfg, vfb; for (int i = 0; i < 6; i++) covf[i] = cov[i] * (1.0f / 255.0f); vfr = max[0] - min[0]; vfg = max[1] - min[1]; vfb = max[2] - min[2]; static const uint nIterPower = 4; for (uint iter = 0; iter < nIterPower; iter++) { double r = vfr * covf[0] + vfg * covf[1] + vfb * covf[2]; double g = vfr * covf[1] + vfg * covf[3] + vfb * covf[4]; double b = vfr * covf[2] + vfg * covf[4] + vfb * covf[5]; vfr = r; vfg = g; vfb = b; } double magn = math::maximum(math::maximum(fabs(vfr), fabs(vfg)), fabs(vfb)); int v_r, v_g, v_b; if (magn < 4.0f) // too small, default to luminance { v_r = 148; v_g = 300; v_b = 58; axis[0] = (float)v_r; axis[1] = (float)v_g; axis[2] = (float)v_b; } else { magn = 512.0f / magn; vfr *= magn; vfg *= magn; vfb *= magn; v_r = static_cast(vfr); v_g = static_cast(vfg); v_b = static_cast(vfb); axis[0] = (float)vfr; axis[1] = (float)vfg; axis[2] = (float)vfb; } int mind = block[0].r * v_r + block[0].g * v_g + block[0].b * v_b; int maxd = mind; color_quad_u8 minp(block[0]); color_quad_u8 maxp(block[0]); for (uint i = 1; i < n; i++) { int dot = block[i].r * v_r + block[i].g * v_g + block[i].b * v_b; if (dot < mind) { mind = dot; minp = block[i]; } if (dot > maxd) { maxd = dot; maxp = block[i]; } } max16 = pack_color(maxp); min16 = pack_color(minp); return true; } // The refinement function. (Clever code, part 2) // Tries to optimize colors to suit block contents better. // (By solving a least squares system via normal equations+Cramer's rule) static bool refine_block(uint n, const color_quad_u8* block, uint& max16, uint& min16, const uint8* pSelectors) { static const int w1Tab[4] = {3, 0, 2, 1}; static const int prods_0[4] = {0x00, 0x00, 0x02, 0x02}; static const int prods_1[4] = {0x00, 0x09, 0x01, 0x04}; static const int prods_2[4] = {0x09, 0x00, 0x04, 0x01}; double akku_0 = 0; double akku_1 = 0; double akku_2 = 0; double At1_r, At1_g, At1_b; double At2_r, At2_g, At2_b; At1_r = At1_g = At1_b = 0; At2_r = At2_g = At2_b = 0; for (uint i = 0; i < n; i++) { double r = block[i].r; double g = block[i].g; double b = block[i].b; int step = pSelectors[i]; int w1 = w1Tab[step]; akku_0 += prods_0[step]; akku_1 += prods_1[step]; akku_2 += prods_2[step]; At1_r += w1 * r; At1_g += w1 * g; At1_b += w1 * b; At2_r += r; At2_g += g; At2_b += b; } At2_r = 3 * At2_r - At1_r; At2_g = 3 * At2_g - At1_g; At2_b = 3 * At2_b - At1_b; double xx = akku_2; double yy = akku_1; double xy = akku_0; double t = xx * yy - xy * xy; if (!yy || !xx || (fabs(t) < .0000125f)) return false; double frb = (3.0f * 31.0f / 255.0f) / t; double fg = frb * (63.0f / 31.0f); uint oldMin = min16; uint oldMax = max16; // solve. max16 = math::clamp(static_cast((At1_r * yy - At2_r * xy) * frb + 0.5f), 0, 31) << 11; max16 |= math::clamp(static_cast((At1_g * yy - At2_g * xy) * fg + 0.5f), 0, 63) << 5; max16 |= math::clamp(static_cast((At1_b * yy - At2_b * xy) * frb + 0.5f), 0, 31) << 0; min16 = math::clamp(static_cast((At2_r * xx - At1_r * xy) * frb + 0.5f), 0, 31) << 11; min16 |= math::clamp(static_cast((At2_g * xx - At1_g * xy) * fg + 0.5f), 0, 63) << 5; min16 |= math::clamp(static_cast((At2_b * xx - At1_b * xy) * frb + 0.5f), 0, 31) << 0; return (oldMin != min16) || (oldMax != max16); } // false if all selectors equal static bool determine_selectors(uint n, const color_quad_u8* block, uint min16, uint max16, uint8* pSelectors) { color_quad_u8 color[4]; if (max16 != min16) { eval_colors(color, min16, max16); return match_block_colors(n, block, color, pSelectors); } memset(pSelectors, 0, n); return false; } static uint64 determine_error(uint n, const color_quad_u8* block, uint min16, uint max16, uint64 early_out_error) { color_quad_u8 color[4]; eval_colors(color, min16, max16); int dirr = color[0].r - color[1].r; int dirg = color[0].g - color[1].g; int dirb = color[0].b - color[1].b; int stops[4]; for (int i = 0; i < 4; i++) stops[i] = color[i].r * dirr + color[i].g * dirg + color[i].b * dirb; // 0 2 3 1 int c0Point = stops[1] + stops[3]; int halfPoint = stops[3] + stops[2]; int c3Point = stops[2] + stops[0]; c0Point >>= 1; halfPoint >>= 1; c3Point >>= 1; uint64 total_error = 0; for (uint i = 0; i < n; i++) { const color_quad_u8& a = block[i]; uint s = 0; if (min16 != max16) { int dot = a.r * dirr + a.g * dirg + a.b * dirb; if (dot < halfPoint) s = (dot < c0Point) ? 1 : 3; else s = (dot < c3Point) ? 2 : 0; } const color_quad_u8& b = color[s]; int e = a[0] - b[0]; total_error += e * e; e = a[1] - b[1]; total_error += e * e; e = a[2] - b[2]; total_error += e * e; if (total_error >= early_out_error) break; } return total_error; } static bool refine_endpoints(uint n, const color_quad_u8* pBlock, uint& low16, uint& high16, uint8* pSelectors) { bool optimized = false; const int limits[3] = {31, 63, 31}; for (uint trial = 0; trial < 2; trial++) { color_quad_u8 color[4]; eval_colors(color, low16, high16); uint64 total_error[3] = {0, 0, 0}; for (uint i = 0; i < n; i++) { const color_quad_u8& a = pBlock[i]; const uint s = pSelectors[i]; const color_quad_u8& b = color[s]; int e = a[0] - b[0]; total_error[0] += e * e; e = a[1] - b[1]; total_error[1] += e * e; e = a[2] - b[2]; total_error[2] += e * e; } color_quad_u8 endpoints[2]; endpoints[0] = dxt1_block::unpack_color((uint16)low16, false); endpoints[1] = dxt1_block::unpack_color((uint16)high16, false); color_quad_u8 expanded_endpoints[2]; expanded_endpoints[0] = dxt1_block::unpack_color((uint16)low16, true); expanded_endpoints[1] = dxt1_block::unpack_color((uint16)high16, true); bool trial_optimized = false; for (uint axis = 0; axis < 3; axis++) { if (!total_error[axis]) continue; const sU8* const pExpand = (axis == 1) ? ryg_dxt::Expand6 : ryg_dxt::Expand5; for (uint e = 0; e < 2; e++) { uint v[4]; v[e ^ 1] = expanded_endpoints[e ^ 1][axis]; for (int t = -1; t <= 1; t += 2) { int a = endpoints[e][axis] + t; if ((a < 0) || (a > limits[axis])) continue; v[e] = pExpand[a]; //int delta = v[1] - v[0]; //v[2] = v[0] + mul_8bit(delta, 0x55); //v[3] = v[0] + mul_8bit(delta, 0xAA); v[2] = (v[0] * 2 + v[1]) / 3; v[3] = (v[0] + v[1] * 2) / 3; uint64 axis_error = 0; for (uint i = 0; i < n; i++) { const color_quad_u8& p = pBlock[i]; int e = v[pSelectors[i]] - p[axis]; axis_error += e * e; if (axis_error >= total_error[axis]) break; } if (axis_error < total_error[axis]) { //total_error[axis] = axis_error; endpoints[e][axis] = (uint8)a; expanded_endpoints[e][axis] = (uint8)v[e]; if (e) high16 = dxt1_block::pack_color(endpoints[1], false); else low16 = dxt1_block::pack_color(endpoints[0], false); determine_selectors(n, pBlock, low16, high16, pSelectors); eval_colors(color, low16, high16); utils::zero_object(total_error); for (uint i = 0; i < n; i++) { const color_quad_u8& a = pBlock[i]; const uint s = pSelectors[i]; const color_quad_u8& b = color[s]; int e = a[0] - b[0]; total_error[0] += e * e; e = a[1] - b[1]; total_error[1] += e * e; e = a[2] - b[2]; total_error[2] += e * e; } trial_optimized = true; } } // t } // e } // axis if (!trial_optimized) break; optimized = true; } // for ( ; ; ) return optimized; } static void refine_endpoints2(uint n, const color_quad_u8* pBlock, uint& low16, uint& high16, uint8* pSelectors, float axis[3]) { uint64 orig_error = determine_error(n, pBlock, low16, high16, cUINT64_MAX); if (!orig_error) return; float l = 1.0f / sqrt(axis[0] * axis[0] + axis[1] * axis[1] + axis[2] * axis[2]); vec3F principle_axis(axis[0] * l, axis[1] * l, axis[2] * l); const float dist_per_trial = 0.027063293f; const uint cMaxProbeRange = 8; uint probe_low[cMaxProbeRange * 2 + 1]; uint probe_high[cMaxProbeRange * 2 + 1]; int probe_range = 8; uint num_iters = 4; const uint num_trials = probe_range * 2 + 1; vec3F scaled_principle_axis(principle_axis * dist_per_trial); scaled_principle_axis[0] *= 31.0f; scaled_principle_axis[1] *= 63.0f; scaled_principle_axis[2] *= 31.0f; vec3F initial_ofs(scaled_principle_axis * (float)-probe_range); initial_ofs[0] += .5f; initial_ofs[1] += .5f; initial_ofs[2] += .5f; uint64 cur_error = orig_error; for (uint iter = 0; iter < num_iters; iter++) { color_quad_u8 endpoints[2]; endpoints[0] = dxt1_block::unpack_color((uint16)low16, false); endpoints[1] = dxt1_block::unpack_color((uint16)high16, false); vec3F low_color(endpoints[0][0], endpoints[0][1], endpoints[0][2]); vec3F high_color(endpoints[1][0], endpoints[1][1], endpoints[1][2]); vec3F probe_low_color(low_color + initial_ofs); for (uint i = 0; i < num_trials; i++) { int r = math::clamp((int)floor(probe_low_color[0]), 0, 31); int g = math::clamp((int)floor(probe_low_color[1]), 0, 63); int b = math::clamp((int)floor(probe_low_color[2]), 0, 31); probe_low[i] = b | (g << 5U) | (r << 11U); probe_low_color += scaled_principle_axis; } vec3F probe_high_color(high_color + initial_ofs); for (uint i = 0; i < num_trials; i++) { int r = math::clamp((int)floor(probe_high_color[0]), 0, 31); int g = math::clamp((int)floor(probe_high_color[1]), 0, 63); int b = math::clamp((int)floor(probe_high_color[2]), 0, 31); probe_high[i] = b | (g << 5U) | (r << 11U); probe_high_color += scaled_principle_axis; } uint best_l = low16; uint best_h = high16; enum { cMaxHash = 4 }; uint64 hash[cMaxHash]; for (uint i = 0; i < cMaxHash; i++) hash[i] = 0; uint c = best_l | (best_h << 16); c = fast_hash(&c, sizeof(c)); hash[(c >> 6) & 3] = 1ULL << (c & 63); for (uint i = 0; i < num_trials; i++) { for (uint j = 0; j < num_trials; j++) { uint l = probe_low[i]; uint h = probe_high[j]; if (l < h) utils::swap(l, h); uint c = l | (h << 16); c = fast_hash(&c, sizeof(c)); uint64 mask = 1ULL << (c & 63); uint ofs = (c >> 6) & 3; if (hash[ofs] & mask) continue; hash[ofs] |= mask; uint64 new_error = determine_error(n, pBlock, l, h, cur_error); if (new_error < cur_error) { best_l = l; best_h = h; cur_error = new_error; } } } bool improved = false; if ((best_l != low16) || (best_h != high16)) { low16 = best_l; high16 = best_h; determine_selectors(n, pBlock, low16, high16, pSelectors); improved = true; } if (refine_endpoints(n, pBlock, low16, high16, pSelectors)) { improved = true; uint64 cur_error = determine_error(n, pBlock, low16, high16, cUINT64_MAX); if (!cur_error) return; } if (!improved) break; } // iter //uint64 end_error = determine_error(n, pBlock, low16, high16, UINT64_MAX); //if (end_error > orig_error) DebugBreak(); } static void compress_solid_block(uint n, uint ave_color[3], uint& low16, uint& high16, uint8* pSelectors) { uint r = ave_color[0]; uint g = ave_color[1]; uint b = ave_color[2]; memset(pSelectors, 2, n); low16 = (ryg_dxt::OMatch5[r][0] << 11) | (ryg_dxt::OMatch6[g][0] << 5) | ryg_dxt::OMatch5[b][0]; high16 = (ryg_dxt::OMatch5[r][1] << 11) | (ryg_dxt::OMatch6[g][1] << 5) | ryg_dxt::OMatch5[b][1]; } void compress_color_block(uint n, const color_quad_u8* block, uint& low16, uint& high16, uint8* pSelectors, bool refine) { CRNLIB_ASSERT((n & 15) == 0); uint ave_color[3]; float axis[3]; if (!optimize_block_colors(n, block, low16, high16, ave_color, axis)) { compress_solid_block(n, ave_color, low16, high16, pSelectors); } else { if (!determine_selectors(n, block, low16, high16, pSelectors)) compress_solid_block(n, ave_color, low16, high16, pSelectors); else { if (refine_block(n, block, low16, high16, pSelectors)) determine_selectors(n, block, low16, high16, pSelectors); if (refine) refine_endpoints2(n, block, low16, high16, pSelectors, axis); } } if (low16 < high16) { utils::swap(low16, high16); for (uint i = 0; i < n; i++) pSelectors[i] ^= 1; } } void compress_color_block(dxt1_block* pDXT1_block, const color_quad_u8* pBlock, bool refine) { uint8 color_selectors[16]; uint low16, high16; dxt_fast::compress_color_block(16, pBlock, low16, high16, color_selectors, refine); pDXT1_block->set_low_color(static_cast(low16)); pDXT1_block->set_high_color(static_cast(high16)); uint mask = 0; for (int i = 15; i >= 0; i--) { mask <<= 2; mask |= color_selectors[i]; } pDXT1_block->m_selectors[0] = (uint8)(mask & 0xFF); pDXT1_block->m_selectors[1] = (uint8)((mask >> 8) & 0xFF); pDXT1_block->m_selectors[2] = (uint8)((mask >> 16) & 0xFF); pDXT1_block->m_selectors[3] = (uint8)((mask >> 24) & 0xFF); } void compress_alpha_block(uint n, const color_quad_u8* block, uint& low8, uint& high8, uint8* pSelectors, uint comp_index) { int min, max; min = max = block[0][comp_index]; for (uint i = 1; i < n; i++) { min = math::minimum(min, block[i][comp_index]); max = math::maximum(max, block[i][comp_index]); } low8 = max; high8 = min; int dist = max - min; int bias = min * 7 - (dist >> 1); int dist4 = dist * 4; int dist2 = dist * 2; for (uint i = 0; i < n; i++) { int a = block[i][comp_index] * 7 - bias; int ind, t; t = (dist4 - a) >> 31; ind = t & 4; a -= dist4 & t; t = (dist2 - a) >> 31; ind += t & 2; a -= dist2 & t; t = (dist - a) >> 31; ind += t & 1; ind = -ind & 7; ind ^= (2 > ind); pSelectors[i] = static_cast(ind); } } void compress_alpha_block(dxt5_block* pDXT5_block, const color_quad_u8* pBlock, uint comp_index) { uint8 selectors[16]; uint low8, high8; compress_alpha_block(16, pBlock, low8, high8, selectors, comp_index); pDXT5_block->set_low_alpha(low8); pDXT5_block->set_high_alpha(high8); uint mask = 0; uint bits = 0; uint8* pDst = pDXT5_block->m_selectors; for (uint i = 0; i < 16; i++) { mask |= (selectors[i] << bits); if ((bits += 3) >= 8) { *pDst++ = static_cast(mask); mask >>= 8; bits -= 8; } } } void find_representative_colors(uint n, const color_quad_u8* pBlock, color_quad_u8& lo, color_quad_u8& hi) { uint64 ave64[3]; ave64[0] = 0; ave64[1] = 0; ave64[2] = 0; for (uint i = 0; i < n; i++) { ave64[0] += pBlock[i].r; ave64[1] += pBlock[i].g; ave64[2] += pBlock[i].b; } uint ave[3]; ave[0] = static_cast((ave64[0] + (n / 2)) / n); ave[1] = static_cast((ave64[1] + (n / 2)) / n); ave[2] = static_cast((ave64[2] + (n / 2)) / n); int furthest_dist = -1; uint furthest_index = 0; for (uint i = 0; i < n; i++) { int r = pBlock[i].r - ave[0]; int g = pBlock[i].g - ave[1]; int b = pBlock[i].b - ave[2]; int dist = r * r + g * g + b * b; if (dist > furthest_dist) { furthest_dist = dist; furthest_index = i; } } color_quad_u8 lo_color(pBlock[furthest_index]); int opp_dist = -1; uint opp_index = 0; for (uint i = 0; i < n; i++) { int r = pBlock[i].r - lo_color.r; int g = pBlock[i].g - lo_color.g; int b = pBlock[i].b - lo_color.b; int dist = r * r + g * g + b * b; if (dist > opp_dist) { opp_dist = dist; opp_index = i; } } color_quad_u8 hi_color(pBlock[opp_index]); for (uint i = 0; i < 3; i++) { lo_color[i] = static_cast((lo_color[i] + ave[i]) >> 1); hi_color[i] = static_cast((hi_color[i] + ave[i]) >> 1); } const uint cMaxIters = 4; for (uint iter_index = 0; iter_index < cMaxIters; iter_index++) { if ((lo_color[0] == hi_color[0]) && (lo_color[1] == hi_color[1]) && (lo_color[2] == hi_color[2])) break; uint64 new_color[2][3]; uint weight[2]; utils::zero_object(new_color); utils::zero_object(weight); int vec_r = hi_color[0] - lo_color[0]; int vec_g = hi_color[1] - lo_color[1]; int vec_b = hi_color[2] - lo_color[2]; int lo_dot = vec_r * lo_color[0] + vec_g * lo_color[1] + vec_b * lo_color[2]; int hi_dot = vec_r * hi_color[0] + vec_g * hi_color[1] + vec_b * hi_color[2]; int mid_dot = lo_dot + hi_dot; vec_r *= 2; vec_g *= 2; vec_b *= 2; for (uint i = 0; i < n; i++) { const color_quad_u8& c = pBlock[i]; const int dot = c[0] * vec_r + c[1] * vec_g + c[2] * vec_b; const uint match_index = (dot > mid_dot); new_color[match_index][0] += c.r; new_color[match_index][1] += c.g; new_color[match_index][2] += c.b; weight[match_index]++; } if ((!weight[0]) || (!weight[1])) break; uint8 new_color8[2][3]; for (uint j = 0; j < 2; j++) for (uint i = 0; i < 3; i++) new_color8[j][i] = static_cast((new_color[j][i] + (weight[j] / 2)) / weight[j]); if ((new_color8[0][0] == lo_color[0]) && (new_color8[0][1] == lo_color[1]) && (new_color8[0][2] == lo_color[2]) && (new_color8[1][0] == hi_color[0]) && (new_color8[1][1] == hi_color[1]) && (new_color8[1][2] == hi_color[2])) break; for (uint i = 0; i < 3; i++) { lo_color[i] = new_color8[0][i]; hi_color[i] = new_color8[1][i]; } } uint energy[2] = {0, 0}; for (uint i = 0; i < 3; i++) { energy[0] += lo_color[i] * lo_color[i]; energy[1] += hi_color[i] * hi_color[i]; } if (energy[0] > energy[1]) utils::swap(lo_color, hi_color); lo = lo_color; hi = hi_color; } } // namespace dxt_fast } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_dxt_fast.h000066400000000000000000000014621503722002600221220ustar00rootroot00000000000000// File: crn_dxt_fast.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_color.h" #include "crn_dxt.h" namespace crnlib { namespace dxt_fast { void compress_color_block(uint n, const color_quad_u8* block, uint& low16, uint& high16, uint8* pSelectors, bool refine = false); void compress_color_block(dxt1_block* pDXT1_block, const color_quad_u8* pBlock, bool refine = false); void compress_alpha_block(uint n, const color_quad_u8* block, uint& low8, uint& high8, uint8* pSelectors, uint comp_index); void compress_alpha_block(dxt5_block* pDXT5_block, const color_quad_u8* pBlock, uint comp_index); void find_representative_colors(uint n, const color_quad_u8* pBlock, color_quad_u8& lo, color_quad_u8& hi); } // namespace dxt_fast } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_dxt_hc.cpp000066400000000000000000001531171503722002600221170ustar00rootroot00000000000000// File: crn_dxt_hc.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_dxt_hc.h" #include "crn_image_utils.h" #include "crn_console.h" #include "crn_dxt_fast.h" #include "crn_etc.h" namespace crnlib { typedef vec<6, float> vec6F; typedef vec<16, float> vec16F; static uint8 g_tile_map[8][2][2] = { {{ 0, 0 }, { 0, 0 }}, {{ 0, 0 }, { 1, 1 }}, {{ 0, 1 }, { 0, 1 }}, {{ 0, 0 }, { 1, 2 }}, {{ 1, 2 }, { 0, 0 }}, {{ 0, 1 }, { 0, 2 }}, {{ 1, 0 }, { 2, 0 }}, {{ 0, 1 }, { 2, 3 }}, }; dxt_hc::dxt_hc() : m_num_blocks(0), m_num_alpha_blocks(0), m_has_color_blocks(false), m_has_etc_color_blocks(false), m_has_subblocks(false), m_main_thread_id(crn_get_current_thread_id()), m_canceled(false), m_pTask_pool(NULL), m_prev_phase_index(-1), m_prev_percentage_complete(-1) { } dxt_hc::~dxt_hc() { } void dxt_hc::clear() { m_blocks = 0; m_num_blocks = 0; m_num_alpha_blocks = 0; m_has_color_blocks = false; m_color_clusters.clear(); m_alpha_clusters.clear(); m_canceled = false; m_prev_phase_index = -1; m_prev_percentage_complete = -1; m_block_weights.clear(); m_block_encodings.clear(); for (uint c = 0; c < 3; c++) m_block_selectors[c].clear(); m_color_selectors.clear(); m_alpha_selectors.clear(); m_color_selectors_used.clear(); m_alpha_selectors_used.clear(); m_tile_indices.clear(); m_endpoint_indices.clear(); m_selector_indices.clear(); m_tiles.clear(); m_num_tiles = 0; } bool dxt_hc::compress( color_quad_u8 (*blocks)[16], crnlib::vector& endpoint_indices, crnlib::vector& selector_indices, crnlib::vector& color_endpoints, crnlib::vector& alpha_endpoints, crnlib::vector& color_selectors, crnlib::vector& alpha_selectors, const params& p ) { clear(); m_has_etc_color_blocks = p.m_format == cETC1 || p.m_format == cETC2 || p.m_format == cETC2A || p.m_format == cETC1S || p.m_format == cETC2AS; m_has_subblocks = p.m_format == cETC1 || p.m_format == cETC2 || p.m_format == cETC2A; m_has_color_blocks = p.m_format == cDXT1 || p.m_format == cDXT5 || m_has_etc_color_blocks; m_num_alpha_blocks = p.m_format == cDXT5 || p.m_format == cDXT5A || p.m_format == cETC2A || p.m_format == cETC2AS ? 1 : p.m_format == cDXN_XY || p.m_format == cDXN_YX ? 2 : 0; if (!m_has_color_blocks && !m_num_alpha_blocks) return false; m_blocks = blocks; m_main_thread_id = crn_get_current_thread_id(); m_pTask_pool = p.m_pTask_pool; m_params = p; uint tile_derating[8] = {0, 1, 1, 2, 2, 2, 2, 3}; for (uint level = 0; level < p.m_num_levels; level++) { float adaptive_tile_color_psnr_derating = p.m_adaptive_tile_color_psnr_derating; if (level && adaptive_tile_color_psnr_derating > .25f) adaptive_tile_color_psnr_derating = math::maximum(.25f, adaptive_tile_color_psnr_derating / powf(3.0f, static_cast(level))); for (uint e = 0; e < 8; e++) m_color_derating[level][e] = math::lerp(0.0f, adaptive_tile_color_psnr_derating, tile_derating[e] / 3.0f); } for (uint e = 0; e < 8; e++) m_alpha_derating[e] = math::lerp(0.0f, m_params.m_adaptive_tile_alpha_psnr_derating, tile_derating[e] / 3.0f); for (uint i = 0; i < 256; i++) m_uint8_to_float[i] = i * 1.0f / 255.0f; m_num_blocks = m_params.m_num_blocks; m_block_weights.resize(m_num_blocks); m_block_encodings.resize(m_num_blocks); for (uint c = 0; c < 3; c++) m_block_selectors[c].resize(m_num_blocks); m_tile_indices.resize(m_num_blocks); m_endpoint_indices.resize(m_num_blocks); m_selector_indices.resize(m_num_blocks); m_tiles.resize(m_num_blocks); for (uint level = 0; level < p.m_num_levels; level++) { float weight = p.m_levels[level].m_weight; for (uint b = p.m_levels[level].m_first_block, bEnd = b + p.m_levels[level].m_num_blocks; b < bEnd; b++) m_block_weights[b] = weight; } for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) m_pTask_pool->queue_object_task(this, m_has_subblocks ? &dxt_hc::determine_tiles_task_etc : &dxt_hc::determine_tiles_task, i); m_pTask_pool->join(); m_num_tiles = 0; for (uint t = 0; t < m_tiles.size(); t++) { if (m_tiles[t].pixels.size()) m_num_tiles++; } if (m_has_color_blocks) determine_color_endpoints(); if (m_num_alpha_blocks) determine_alpha_endpoints(); if (m_has_color_blocks) create_color_selector_codebook(); if (m_num_alpha_blocks) create_alpha_selector_codebook(); color_endpoints.reserve(color_endpoints.size() + m_color_clusters.size()); crnlib::vector color_endpoints_remap(m_color_clusters.size()); hash_map color_endpoints_map; for (uint i = 0; i < m_color_clusters.size(); i++) { if (m_color_clusters[i].pixels.size()) { uint32 endpoint = m_has_etc_color_blocks ? m_color_clusters[i].first_endpoint : dxt1_block::pack_endpoints(m_color_clusters[i].first_endpoint, m_color_clusters[i].second_endpoint); hash_map::insert_result insert_result = color_endpoints_map.insert(endpoint, color_endpoints.size()); if (insert_result.second) { color_endpoints_remap[i] = color_endpoints.size(); color_endpoints.push_back(endpoint); } else { color_endpoints_remap[i] = insert_result.first->second; } } } alpha_endpoints.reserve(alpha_endpoints.size() + m_alpha_clusters.size()); crnlib::vector alpha_endpoints_remap(m_alpha_clusters.size()); hash_map alpha_endpoints_map; for (uint i = 0; i < m_alpha_clusters.size(); i++) { if (m_alpha_clusters[i].pixels.size()) { uint32 endpoint = dxt5_block::pack_endpoints(m_alpha_clusters[i].first_endpoint, m_alpha_clusters[i].second_endpoint); hash_map::insert_result insert_result = alpha_endpoints_map.insert(endpoint, alpha_endpoints.size()); if (insert_result.second) { alpha_endpoints_remap[i] = alpha_endpoints.size(); alpha_endpoints.push_back(endpoint); } else { alpha_endpoints_remap[i] = insert_result.first->second; } } } color_selectors.reserve(color_selectors.size() + m_color_selectors.size()); crnlib::vector color_selectors_remap(m_color_selectors.size()); hash_map color_selectors_map; for (uint i = 0; i < m_color_selectors.size(); i++) { if (m_color_selectors_used[i]) { hash_map::insert_result insert_result = color_selectors_map.insert(m_color_selectors[i], color_selectors.size()); if (insert_result.second) { color_selectors_remap[i] = color_selectors.size(); color_selectors.push_back(m_color_selectors[i]); } else { color_selectors_remap[i] = insert_result.first->second; } } } alpha_selectors.reserve(alpha_selectors.size() + m_alpha_selectors.size()); crnlib::vector alpha_selectors_remap(m_alpha_selectors.size()); hash_map alpha_selectors_map; for (uint i = 0; i < m_alpha_selectors.size(); i++) { if (m_alpha_selectors_used[i]) { hash_map::insert_result insert_result = alpha_selectors_map.insert(m_alpha_selectors[i], alpha_selectors.size()); if (insert_result.second) { alpha_selectors_remap[i] = alpha_selectors.size(); alpha_selectors.push_back(m_alpha_selectors[i]); } else { alpha_selectors_remap[i] = insert_result.first->second; } } } endpoint_indices.resize(m_num_blocks); selector_indices.resize(m_num_blocks); for (uint level = 0; level < p.m_num_levels; level++) { uint first_block = p.m_levels[level].m_first_block; uint end_block = first_block + p.m_levels[level].m_num_blocks; uint block_width = p.m_levels[level].m_block_width; for (uint by = 0, b = first_block; b < end_block; by++) { for (uint bx = 0; bx < block_width; bx++, b++) { bool top_match = by != 0; bool left_match = top_match || bx; bool diag_match = m_has_subblocks && top_match && bx; for (uint c = m_has_color_blocks ? 0 : cAlpha0; c < cAlpha0 + m_num_alpha_blocks; c++) { uint16 endpoint_index = (c ? alpha_endpoints_remap : color_endpoints_remap)[m_endpoint_indices[b].component[c]]; left_match = left_match && endpoint_index == endpoint_indices[b - 1].component[c]; top_match = top_match && endpoint_index == endpoint_indices[b - block_width].component[c]; diag_match = diag_match && endpoint_index == endpoint_indices[b - block_width - 1].component[c]; endpoint_indices[b].component[c] = endpoint_index; uint16 selector_index = (c ? alpha_selectors_remap : color_selectors_remap)[m_selector_indices[b].component[c]]; selector_indices[b].component[c] = selector_index; } endpoint_indices[b].reference = m_has_subblocks && b & 1 ? m_endpoint_indices[b].reference : left_match ? 1 : top_match ? 2 : diag_match ? 3 : 0; } } } m_pTask_pool = NULL; return true; } vec6F dxt_hc::palettize_color(color_quad_u8* pixels, uint pixels_count) { uint color[64]; for (uint i = 0; i < pixels_count; i++) color[i] = pixels[i][0] << 16 | pixels[i][1] << 8 | pixels[i][2]; std::sort(color, color + pixels_count); vec3F vectors[64]; uint weights[64]; uint size = 0; for (uint i = 0; i < pixels_count; i++) { if (!i || color[i] != color[i - 1]) { vectors[size][0] = m_params.m_perceptual ? m_uint8_to_float[color[i] >> 16] * 0.5f : m_uint8_to_float[color[i] >> 16]; vectors[size][1] = m_uint8_to_float[color[i] >> 8 & 0xFF]; vectors[size][2] = m_params.m_perceptual ? m_uint8_to_float[color[i] & 0xFF] * 0.25f : m_uint8_to_float[color[i] & 0xFF]; weights[size] = 1; size++; } else { weights[size - 1]++; } } vec3F result[2]; split_vectors(vectors, weights, size, result); if (result[0].length() > result[1].length()) utils::swap(result[0], result[1]); return *(vec6F*)result; } vec2F dxt_hc::palettize_alpha(color_quad_u8* pixels, uint pixels_count, uint comp_index) { uint8 alpha[64]; for (uint p = 0; p < pixels_count; p++) alpha[p] = pixels[p][comp_index]; std::sort(alpha, alpha + pixels_count); vec1F vectors[64]; uint weights[64]; uint size = 0; for (uint i = 0; i < pixels_count; i++) { if (!i || alpha[i] != alpha[i - 1]) { vectors[size][0] = m_uint8_to_float[alpha[i]]; weights[size] = 1; size++; } else { weights[size - 1]++; } } vec1F result[2]; split_vectors(vectors, weights, size, result); if (result[0] > result[1]) utils::swap(result[0], result[1]); return *(vec2F*)result; } void dxt_hc::determine_tiles_task(uint64 data, void*) { uint num_tasks = m_pTask_pool->get_num_threads() + 1; uint offsets[9] = {0, 16, 32, 48, 0, 32, 64, 96, 64}; uint8 tiles[8][4] = {{8}, {6, 7}, {4, 5}, {6, 1, 3}, {7, 0, 2}, {4, 2, 3}, {5, 0, 1}, {0, 2, 1, 3}}; color_quad_u8 tilePixels[128]; uint8 selectors[64]; uint tile_error[3][9]; uint total_error[3][8]; etc1_optimizer optimizer; etc1_optimizer::params params; params.m_use_color4 = false; params.m_constrain_against_base_color5 = false; etc1_optimizer::results results; results.m_pSelectors = selectors; int scan[] = {-1, 0, 1}; int refine[] = {-3, -2, 2, 3}; for (uint level = 0; level < m_params.m_num_levels; level++) { float weight = m_params.m_levels[level].m_weight; uint width = m_params.m_levels[level].m_block_width; uint height = m_params.m_levels[level].m_num_blocks / width; uint faceHeight = height / m_params.m_num_faces; uint h = height * data / num_tasks & ~1; uint hEnd = height * (data + 1) / num_tasks & ~1; uint hFace = h % faceHeight; uint b = m_params.m_levels[level].m_first_block + h * width; for (; h < hEnd; h += 2, hFace += 2, b += width) { uint tile_offset = b; uint tile_offset_delta = 4; if (hFace == faceHeight) { hFace = 0; } else if (hFace & 2) { tile_offset_delta = -4; tile_offset += (width << 1) + tile_offset_delta; } for (uint bNext = b + width; b < bNext; b += 2, tile_offset += tile_offset_delta) { for (int t = 0; t < 64; t += 16) memcpy(tilePixels + t, m_blocks[b + (t & 16 ? width : 0) + (t & 32 ? 1 : 0)], 64); for (int t = 0; t < 64; t += 4) memcpy(tilePixels + 64 + t, m_blocks[b + (t & 32 ? width : 0) + (t & 4 ? 1 : 0)] + (t >> 1 & 12), 16); for (uint t = 0; t < 9; t++) { color_quad_u8* pixels = tilePixels + offsets[t]; uint size = 16 << (t >> 2); if (m_has_etc_color_blocks) { params.m_pSrc_pixels = pixels; params.m_num_src_pixels = results.m_n = size; optimizer.init(params, results); params.m_pScan_deltas = scan; params.m_scan_delta_size = sizeof(scan) / sizeof(*scan); optimizer.compute(); if (results.m_error > 375 * params.m_num_src_pixels) { params.m_pScan_deltas = refine; params.m_scan_delta_size = sizeof(refine) / sizeof(*refine); optimizer.compute(); } tile_error[cColor][t] = results.m_error; } else if (m_has_color_blocks) { uint low16, high16; dxt_fast::compress_color_block(size, pixels, low16, high16, selectors); color_quad_u8 block_colors[4]; dxt1_block::get_block_colors4(block_colors, low16, high16); uint error = 0; for (uint p = 0; p < size; p++) { for (uint8 c = 0; c < 3; c++) { uint delta = pixels[p][c] - block_colors[selectors[p]][c]; error += delta * delta; } } tile_error[cColor][t] = error; } for (uint a = 0; a < m_num_alpha_blocks; a++) { uint8 component = m_params.m_alpha_component_indices[a]; dxt5_endpoint_optimizer optimizer; dxt5_endpoint_optimizer::params params; dxt5_endpoint_optimizer::results results; params.m_pPixels = pixels; params.m_num_pixels = size; params.m_comp_index = component; params.m_use_both_block_types = false; params.m_quality = cCRNDXTQualityNormal; results.m_pSelectors = selectors; optimizer.compute(params, results); uint block_values[cDXT5SelectorValues]; dxt5_block::get_block_values8(block_values, results.m_first_endpoint, results.m_second_endpoint); tile_error[cAlpha0 + a][t] = results.m_error; } } for (uint8 c = m_has_color_blocks ? 0 : cAlpha0; c < cAlpha0 + m_num_alpha_blocks; c++) { for (uint8 e = 0; e < 8; e++) { total_error[c][e] = 0; for (uint8 t = 0, s = e + 1; s; s >>= 1, t++) total_error[c][e] += tile_error[c][tiles[e][t]]; } } float best_quality = 0.0f; uint best_encoding = 0; for (uint e = 0; e < 8; e++) { float quality = 0; if (m_has_color_blocks) { double peakSNR = total_error[cColor][e] ? log10(255.0f / sqrt(total_error[cColor][e] / 192.0)) * 20.0f : 999999.0f; quality = (float)math::maximum(peakSNR - m_color_derating[level][e], 0.0f); if (m_num_alpha_blocks) quality *= m_params.m_adaptive_tile_color_alpha_weighting_ratio; } for (uint a = 0; a < m_num_alpha_blocks; a++) { double peakSNR = total_error[cAlpha0 + a][e] ? log10(255.0f / sqrt(total_error[cAlpha0 + a][e] / 64.0)) * 20.0f : 999999.0f; quality += (float)math::maximum(peakSNR - m_alpha_derating[e], 0.0f); } if (quality > best_quality) { best_quality = quality; best_encoding = e; } } for (uint tile_index = 0, s = best_encoding + 1; s; s >>= 1, tile_index++) { tile_details& tile = m_tiles[tile_offset | tile_index]; uint t = tiles[best_encoding][tile_index]; tile.pixels.append(tilePixels + offsets[t], 16 << (t >> 2)); tile.weight = weight; if (m_has_color_blocks) tile.color_endpoint = palettize_color(tile.pixels.get_ptr(), tile.pixels.size()); for (uint a = 0; a < m_num_alpha_blocks; a++) tile.alpha_endpoints[a] = palettize_alpha(tile.pixels.get_ptr(), tile.pixels.size(), m_params.m_alpha_component_indices[a]); } for (uint by = 0; by < 2; by++) { for (uint bx = 0; bx < 2; bx++) { m_block_encodings[b + (by ? width : 0) + bx] = best_encoding; m_tile_indices[b + (by ? width : 0) + bx] = tile_offset | g_tile_map[best_encoding][by][bx]; } } } } } } void dxt_hc::determine_tiles_task_etc(uint64 data, void*) { uint num_tasks = m_pTask_pool->get_num_threads() + 1; uint offsets[5] = {0, 8, 16, 24, 16}; uint8 tiles[3][2] = {{4}, {2, 3}, {0, 1}}; uint8 tile_map[3][2] = {{ 0, 0 }, { 0, 1 }, { 0, 1 }}; color_quad_u8 tilePixels[32]; uint8 selectors[32]; uint tile_error[5]; uint total_error[3]; etc1_optimizer optimizer; etc1_optimizer::params params; params.m_use_color4 = false; params.m_constrain_against_base_color5 = false; etc1_optimizer::results results; results.m_pSelectors = selectors; int scan[] = {-1, 0, 1}; int refine[] = {-3, -2, 2, 3}; for (uint level = 0; level < m_params.m_num_levels; level++) { float weight = m_params.m_levels[level].m_weight; uint b = (m_params.m_levels[level].m_first_block + m_params.m_levels[level].m_num_blocks * data / num_tasks) & ~1; uint bEnd = (m_params.m_levels[level].m_first_block + m_params.m_levels[level].m_num_blocks * (data + 1) / num_tasks) & ~1; for (; b < bEnd; b += 2) { for (uint p = 0; p < 16; p++) tilePixels[p] = m_blocks[b >> 1][(p << 2 & 12) | p >> 2]; memcpy(tilePixels + 16, m_blocks[b >> 1], 64); for (uint t = 0; t < 5; t++) { params.m_pSrc_pixels = tilePixels + offsets[t]; params.m_num_src_pixels = results.m_n = 8 << (t >> 2); optimizer.init(params, results); params.m_pScan_deltas = scan; params.m_scan_delta_size = sizeof(scan) / sizeof(*scan); optimizer.compute(); if (results.m_error > 375 * params.m_num_src_pixels) { params.m_pScan_deltas = refine; params.m_scan_delta_size = sizeof(refine) / sizeof(*refine); optimizer.compute(); } tile_error[t] = results.m_error; } for (uint8 e = 0; e < 3; e++) { total_error[e] = 0; for (uint8 t = 0, s = e + 1; s; s >>= 1, t++) total_error[e] += tile_error[tiles[e][t]]; } float best_quality = 0.0f; uint best_encoding = 0; for (uint e = 0; e < 3; e++) { float quality = 0; double peakSNR = total_error[e] ? log10(255.0f / sqrt(total_error[e] / 48.0)) * 20.0f : 999999.0f; quality = (float)math::maximum(peakSNR - m_color_derating[level][e], 0.0f); if (quality > best_quality) { best_quality = quality; best_encoding = e; } } vec2F alpha_endpoints = m_num_alpha_blocks ? palettize_alpha(tilePixels, 16, 3) : vec2F(cClear); for (uint tile_index = 0, s = best_encoding + 1; s; s >>= 1, tile_index++) { tile_details& tile = m_tiles[b | tile_index]; uint t = tiles[best_encoding][tile_index]; tile.pixels.append(tilePixels + offsets[t], 8 << (t >> 2)); tile.weight = weight; tile.color_endpoint = palettize_color(tile.pixels.get_ptr(), tile.pixels.size()); if (m_num_alpha_blocks) tile.alpha_endpoints[0] = alpha_endpoints; } for (uint bx = 0; bx < 2; bx++) { m_block_encodings[b | bx] = best_encoding; m_tile_indices[b | bx] = b | tile_map[best_encoding][bx]; m_endpoint_indices[b | bx].reference = bx ? best_encoding : 0; } if (best_encoding >> 1) memcpy(m_blocks[b >> 1], tilePixels, 64); } } } void dxt_hc::determine_color_endpoint_codebook_task(uint64 data, void*) { const uint num_tasks = m_pTask_pool->get_num_threads() + 1; dxt1_endpoint_optimizer optimizer; dxt_endpoint_refiner refiner; crnlib::vector selectors; for (uint cluster_index = (uint)data; cluster_index < m_color_clusters.size(); cluster_index += num_tasks) { color_cluster& cluster = m_color_clusters[cluster_index]; if (cluster.pixels.empty()) continue; dxt1_endpoint_optimizer::params params; params.m_block_index = cluster_index; params.m_pPixels = cluster.pixels.get_ptr(); params.m_num_pixels = cluster.pixels.size(); params.m_pixels_have_alpha = false; params.m_use_alpha_blocks = false; params.m_perceptual = m_params.m_perceptual; params.m_quality = cCRNDXTQualityUber; params.m_endpoint_caching = false; dxt1_endpoint_optimizer::results results; selectors.resize(params.m_num_pixels); results.m_pSelectors = selectors.get_ptr(); optimizer.compute(params, results); cluster.first_endpoint = results.m_low_color; cluster.second_endpoint = results.m_high_color; color_quad_u8 block_values[4], color_values[4]; dxt1_block::get_block_colors4(block_values, cluster.first_endpoint, cluster.second_endpoint); for (uint i = 0; i < 4; i++) color_values[i] = cluster.color_values[i] = block_values[g_dxt1_from_linear[i]]; for (uint c = 0; results.m_alternate_rounding && c < 3; c++) { color_values[1].c[c] = ((color_values[0].c[c] << 1) + color_values[3].c[c] + 1) / 3; color_values[2].c[c] = ((color_values[3].c[c] << 1) + color_values[0].c[c] + 1) / 3; } uint endpoint_weight = color::color_distance(m_params.m_perceptual, color_values[0], color_values[3], false) / 2000; float encoding_weight[8]; for (uint i = 0; i < 8; i++) encoding_weight[i] = math::lerp(1.15f, 1.0f, i / 7.0f); crnlib::vector& blocks = cluster.blocks[cColor]; for (uint i = 0; i < blocks.size(); i++) { uint b = blocks[i]; uint weight = (uint)(math::clamp(endpoint_weight * m_block_weights[b], 1, 2048) * encoding_weight[m_block_encodings[b]]); uint32 selector = 0; for (uint p = 0; p < 16; p++) { uint error_best = cUINT32_MAX; uint8 s_best = 0; for (uint8 t = 0; t < 4; t++) { uint8 s = results.m_reordered ? 3 - g_dxt1_to_linear[t] : g_dxt1_to_linear[t]; uint error = color::color_distance(m_params.m_perceptual, (color_quad_u8&)m_blocks[b][p], color_values[s], false); if (error < error_best) { s_best = s; error_best = error; } } selector = selector << 2 | s_best; } m_block_selectors[cColor][b] = (uint64)selector << 32 | weight; } dxt_endpoint_refiner::params refinerParams; dxt_endpoint_refiner::results refinerResults; refinerParams.m_perceptual = m_params.m_perceptual; refinerParams.m_pSelectors = selectors.get_ptr(); refinerParams.m_pPixels = cluster.pixels.get_ptr(); refinerParams.m_num_pixels = cluster.pixels.size(); refinerParams.m_dxt1_selectors = true; refinerParams.m_error_to_beat = results.m_error; refinerParams.m_block_index = cluster_index; if (refiner.refine(refinerParams, refinerResults)) { cluster.first_endpoint = refinerResults.m_low_color; cluster.second_endpoint = refinerResults.m_high_color; } } } void dxt_hc::determine_color_endpoint_codebook_task_etc(uint64 data, void*) { uint num_tasks = m_pTask_pool->get_num_threads() + 1; uint8 delta[8][2] = { {2, 8}, {5, 17}, {9, 29}, {13, 42}, {18, 60}, {24, 80}, {33, 106}, {47, 183} }; int scan[] = {-1, 0, 1}; int refine[] = {-3, -2, 2, 3}; for (uint iCluster = m_color_clusters.size() * data / num_tasks, iEnd = m_color_clusters.size() * (data + 1) / num_tasks; iCluster < iEnd; iCluster++) { color_cluster& cluster = m_color_clusters[iCluster]; if (cluster.pixels.size()) { etc1_optimizer optimizer; etc1_optimizer::params params; params.m_use_color4 = false; params.m_constrain_against_base_color5 = false; etc1_optimizer::results results; crnlib::vector selectors(cluster.pixels.size()); params.m_pSrc_pixels = cluster.pixels.get_ptr(); results.m_pSelectors = selectors.get_ptr(); results.m_n = params.m_num_src_pixels = cluster.pixels.size(); optimizer.init(params, results); params.m_pScan_deltas = scan; params.m_scan_delta_size = sizeof(scan) / sizeof(*scan); optimizer.compute(); if (results.m_error > 375 * params.m_num_src_pixels) { params.m_pScan_deltas = refine; params.m_scan_delta_size = sizeof(refine) / sizeof(*refine); optimizer.compute(); } color_quad_u8 endpoint; for (int c = 0; c < 3; c++) endpoint.c[c] = results.m_block_color_unscaled.c[c] << 3 | results.m_block_color_unscaled.c[c] >> 2; endpoint.c[3] = results.m_block_inten_table; cluster.first_endpoint = endpoint.m_u32; for (uint8 d0 = delta[endpoint.c[3]][0], d1 = delta[endpoint.c[3]][1], c = 0; c < 3; c++) { uint8 q = endpoint.c[c]; cluster.color_values[0].c[c] = q <= d1 ? 0 : q - d1; cluster.color_values[1].c[c] = q <= d0 ? 0 : q - d0; cluster.color_values[2].c[c] = q >= 255 - d0 ? 255 : q + d0; cluster.color_values[3].c[c] = q >= 255 - d1 ? 255 : q + d1; } for (int t = 0; t < 4; t++) cluster.color_values[t].c[3] = 0xFF; float endpoint_weight = powf(math::minimum((cluster.color_values[3].get_luma() - cluster.color_values[0].get_luma()) / 100.0f, 1.0f), 2.7f); crnlib::vector& blocks = cluster.blocks[cColor]; uint blockSize = m_has_subblocks ? 8 : 16; for (uint i = 0; i < blocks.size(); i++) { uint b = blocks[i]; color_quad_u8* pixels = m_has_subblocks ? ((color_quad_u8(*)[8])m_blocks)[b] : m_blocks[b]; uint weight = (uint)(math::clamp(0x8000 * endpoint_weight * m_block_weights[b] * (m_block_encodings[b] ? 0.972f : 1.0f), 1, 0xFFFF)); uint32 selector = 0; for (uint p = 0; p < blockSize; p++) { uint error_best = cUINT32_MAX; uint8 s_best = 0; for (uint8 s = 0; s < 4; s++) { uint error = color::color_distance(m_params.m_perceptual, pixels[p], cluster.color_values[s], false); if (error < error_best) { s_best = s; error_best = error; } } selector = selector << 2 | s_best; } m_block_selectors[cColor][b] = (uint64)selector << (!m_has_subblocks || (b & 1) ? 32 : 48) | weight; } } } } void dxt_hc::determine_color_endpoint_clusters_task(uint64 data, void* pData_ptr) { tree_clusterizer* vq = (tree_clusterizer*)pData_ptr; const crnlib::vector& codebook = vq->get_codebook(); uint num_tasks = m_pTask_pool->get_num_threads() + 1; for (uint t = m_tiles.size() * data / num_tasks, tEnd = m_tiles.size() * (data + 1) / num_tasks; t < tEnd; t++) { if (m_tiles[t].pixels.size()) { const vec6F& v = m_tiles[t].color_endpoint; float node_dist = codebook[vq->get_node_index(v)].squared_distance(v); float best_dist = math::cNearlyInfinite; uint best_index = 0; for (uint i = 0; i < codebook.size(); i++) { const vec6F& c = codebook[i]; float dist = 0; float d0 = c[0] - v[0]; dist += d0 * d0; float d1 = c[1] - v[1]; dist += d1 * d1; if (dist > node_dist) continue; float d2 = c[2] - v[2]; dist += d2 * d2; float d3 = c[3] - v[3]; dist += d3 * d3; if (dist > node_dist) continue; float d4 = c[4] - v[4]; dist += d4 * d4; float d5 = c[5] - v[5]; dist += d5 * d5; if (dist < best_dist) { best_dist = dist; best_index = i; if (best_dist == 0.0f) break; } } m_tiles[t].cluster_indices[cColor] = best_index; } } } void dxt_hc::determine_color_endpoints() { uint num_tasks = m_pTask_pool->get_num_threads() + 1; crnlib::vector > endpoints; for (uint t = 0; t < m_tiles.size(); t++) { if (m_tiles[t].pixels.size()) endpoints.push_back(std::make_pair(m_tiles[t].color_endpoint, (uint)(m_tiles[t].pixels.size() * m_tiles[t].weight))); } struct Node { std::pair *p, *pEnd; Node (std::pair* begin, std::pair* end) : p(begin), pEnd(end) {} bool operator<(const Node& other) const { return *p > *other.p; } static void sort_task(uint64 data, void* ptr) { std::sort(((Node*)ptr)->p, ((Node*)ptr)->pEnd); } }; crnlib::vector nodes; Node node(0, endpoints.get_ptr()); for (uint i = 0; i < num_tasks; i++) { node.p = node.pEnd; node.pEnd = endpoints.get_ptr() + endpoints.size() * (i + 1) / num_tasks; if (node.p != node.pEnd) nodes.push_back(node); } for (uint i = 0; i < nodes.size(); i++) m_pTask_pool->queue_task(&Node::sort_task, i, &nodes[i]); m_pTask_pool->join(); std::priority_queue queue; for (uint i = 0; i < nodes.size(); i++) queue.push(nodes[i]); crnlib::vector vectors; crnlib::vector weights; vectors.reserve(endpoints.size()); weights.reserve(endpoints.size()); while (queue.size()) { Node node = queue.top(); std::pair* endpoint = node.p++; queue.pop(); if (node.p != node.pEnd) queue.push(node); if (!vectors.size() || endpoint->first != vectors.back()) { vectors.push_back(endpoint->first); weights.push_back(endpoint->second); } else if (weights.back() > UINT_MAX - endpoint->second) { weights.back() = UINT_MAX; } else { weights.back() += endpoint->second; } } tree_clusterizer vq; vq.generate_codebook(vectors.get_ptr(), weights.get_ptr(), vectors.size(), math::minimum(m_num_tiles, m_params.m_color_endpoint_codebook_size), true, m_pTask_pool); m_color_clusters.resize(vq.get_codebook_size()); for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) m_pTask_pool->queue_object_task(this, &dxt_hc::determine_color_endpoint_clusters_task, i, &vq); m_pTask_pool->join(); for (uint t = 0; t < m_num_blocks; t++) { if (m_tiles[t].pixels.size()) m_color_clusters[m_tiles[t].cluster_indices[cColor]].pixels.append(m_tiles[t].pixels); } for (uint b = 0; b < m_num_blocks; b++) { uint cluster_index = m_tiles[m_tile_indices[b]].cluster_indices[cColor]; m_endpoint_indices[b].component[cColor] = cluster_index; m_color_clusters[cluster_index].blocks[cColor].push_back(b); if (m_has_subblocks && m_endpoint_indices[b].reference && cluster_index == m_endpoint_indices[b - 1].component[cColor]) { if (m_endpoint_indices[b].reference >> 1) { color_quad_u8 mirror[16]; for (uint p = 0; p < 16; p++) mirror[p] = m_blocks[b >> 1][(p << 2 & 12) | p >> 2]; memcpy(m_blocks[b >> 1], mirror, 64); } m_endpoint_indices[b].reference = 0; } } for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) m_pTask_pool->queue_object_task(this, m_has_etc_color_blocks ? &dxt_hc::determine_color_endpoint_codebook_task_etc : &dxt_hc::determine_color_endpoint_codebook_task, i, NULL); m_pTask_pool->join(); } void dxt_hc::determine_alpha_endpoint_codebook_task(uint64 data, void*) { const uint num_tasks = m_pTask_pool->get_num_threads() + 1; dxt5_endpoint_optimizer optimizer; dxt_endpoint_refiner refiner; crnlib::vector selectors; for (uint cluster_index = (uint)data; cluster_index < m_alpha_clusters.size(); cluster_index += num_tasks) { alpha_cluster& cluster = m_alpha_clusters[cluster_index]; if (cluster.pixels.empty()) continue; dxt5_endpoint_optimizer::params params; params.m_pPixels = cluster.pixels.get_ptr(); params.m_num_pixels = cluster.pixels.size(); params.m_comp_index = 0; params.m_quality = cCRNDXTQualityUber; params.m_use_both_block_types = false; dxt5_endpoint_optimizer::results results; selectors.resize(params.m_num_pixels); results.m_pSelectors = selectors.get_ptr(); optimizer.compute(params, results); cluster.first_endpoint = results.m_first_endpoint; cluster.second_endpoint = results.m_second_endpoint; uint block_values[8], alpha_values[8]; dxt5_block::get_block_values(block_values, cluster.first_endpoint, cluster.second_endpoint); for (uint i = 0; i < 8; i++) alpha_values[i] = cluster.alpha_values[i] = block_values[g_dxt5_from_linear[i]]; int delta = cluster.first_endpoint - cluster.second_endpoint; uint encoding_weight[8]; for (uint endpoint_weight = math::clamp(delta * delta >> 3, 1, 2048), i = 0; i < 8; i++) encoding_weight[i] = (uint)(endpoint_weight * math::lerp(1.15f, 1.0f, i / 7.0f)); if (m_has_etc_color_blocks) { static const int stripped_modifier_table[2][8] = { {-10, -7, -5, -2, 1, 4, 6, 9}, {-10, -3, -2, -1, 0, 1, 2, 9} }; int base_codeword = (results.m_first_endpoint + results.m_second_endpoint + 1) >> 1; int modifier_index = delta <= 6 ? 13 : 11; int multiplier = delta <= 6 ? 1 : math::clamp((delta + 12) / 18, 1, 15); const int* modifier = stripped_modifier_table[modifier_index == 11 ? 0 : 1]; for (int i = 0; i < 8; i++) alpha_values[i] = cluster.alpha_values[i] = math::clamp(base_codeword + modifier[i] * multiplier, 0, 255); cluster.first_endpoint = base_codeword; cluster.second_endpoint = multiplier << 4 | modifier_index; } for (uint a = 0; a < m_num_alpha_blocks; a++) { uint component_index = m_params.m_alpha_component_indices[a]; crnlib::vector& blocks = cluster.blocks[cAlpha0 + a]; for (uint i = 0; i < blocks.size(); i++) { uint b = blocks[i]; uint weight = encoding_weight[m_block_encodings[b]]; uint64 selector = 0; for (uint p = 0; p < 16; p++) { uint error_best = cUINT32_MAX; uint8 s_best = 0; for (uint8 t = 0; t < 8; t++) { uint8 s = m_has_etc_color_blocks ? t : results.m_reordered ? 7 - g_dxt5_to_linear[t] : g_dxt5_to_linear[t]; int delta = m_blocks[m_has_subblocks ? b >> 1 : b][p][component_index] - alpha_values[s]; uint error = delta >= 0 ? delta : -delta; if (error < error_best) { s_best = s; error_best = error; } } selector = selector << 3 | s_best; } m_block_selectors[cAlpha0 + a][b] = selector << 16 | weight; } } dxt_endpoint_refiner::params refinerParams; dxt_endpoint_refiner::results refinerResults; refinerParams.m_perceptual = m_params.m_perceptual; refinerParams.m_pSelectors = selectors.get_ptr(); refinerParams.m_pPixels = cluster.pixels.get_ptr(); refinerParams.m_num_pixels = cluster.pixels.size(); refinerParams.m_dxt1_selectors = false; refinerParams.m_error_to_beat = results.m_error; refinerParams.m_block_index = cluster_index; cluster.refined_alpha = !m_has_etc_color_blocks && refiner.refine(refinerParams, refinerResults); if (cluster.refined_alpha) { cluster.first_endpoint = refinerResults.m_low_color; cluster.second_endpoint = refinerResults.m_high_color; dxt5_block::get_block_values(block_values, cluster.first_endpoint, cluster.second_endpoint); for (uint i = 0; i < 8; i++) cluster.refined_alpha_values[i] = block_values[g_dxt5_from_linear[i]]; } else { memcpy(cluster.refined_alpha_values, cluster.alpha_values, sizeof(cluster.refined_alpha_values)); } } } void dxt_hc::determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr) { tree_clusterizer* vq = (tree_clusterizer*)pData_ptr; const crnlib::vector& codebook = vq->get_codebook(); uint num_tasks = m_pTask_pool->get_num_threads() + 1; for (uint t = m_tiles.size() * data / num_tasks, tEnd = m_tiles.size() * (data + 1) / num_tasks; t < tEnd; t++) { if (m_tiles[t].pixels.size()) { for (uint a = 0; a < m_num_alpha_blocks; a++) { const vec2F& v = m_tiles[t].alpha_endpoints[a]; float best_dist = math::cNearlyInfinite; uint best_index = 0; for (uint i = 0; i < codebook.size(); i++) { float dist = (codebook[i][0] - v[0]) * (codebook[i][0] - v[0]) + (codebook[i][1] - v[1]) * (codebook[i][1] - v[1]); if (dist < best_dist) { best_dist = dist; best_index = i; if (best_dist == 0.0f) break; } } m_tiles[t].cluster_indices[cAlpha0 + a] = best_index; } } } } void dxt_hc::determine_alpha_endpoints() { uint num_tasks = m_pTask_pool->get_num_threads() + 1; crnlib::vector > endpoints; for (uint a = 0; a < m_num_alpha_blocks; a++) { for (uint t = 0; t < m_tiles.size(); t++) { if (m_tiles[t].pixels.size()) endpoints.push_back(std::make_pair(m_tiles[t].alpha_endpoints[a], m_tiles[t].pixels.size())); } } struct Node { std::pair *p, *pEnd; Node (std::pair* begin, std::pair* end) : p(begin), pEnd(end) {} bool operator<(const Node& other) const { return *p > *other.p; } static void sort_task(uint64 data, void* ptr) { std::sort(((Node*)ptr)->p, ((Node*)ptr)->pEnd); } }; crnlib::vector nodes; Node node(0, endpoints.get_ptr()); for (uint i = 0; i < num_tasks; i++) { node.p = node.pEnd; node.pEnd = endpoints.get_ptr() + endpoints.size() * (i + 1) / num_tasks; if (node.p != node.pEnd) nodes.push_back(node); } for (uint i = 0; i < nodes.size(); i++) m_pTask_pool->queue_task(&Node::sort_task, i, &nodes[i]); m_pTask_pool->join(); std::priority_queue queue; for (uint i = 0; i < nodes.size(); i++) queue.push(nodes[i]); crnlib::vector vectors; crnlib::vector weights; vectors.reserve(endpoints.size()); weights.reserve(endpoints.size()); while (queue.size()) { Node node = queue.top(); std::pair* endpoint = node.p++; queue.pop(); if (node.p != node.pEnd) queue.push(node); if (!vectors.size() || endpoint->first != vectors.back()) { vectors.push_back(endpoint->first); weights.push_back(endpoint->second); } else if (weights.back() > UINT_MAX - endpoint->second) { weights.back() = UINT_MAX; } else { weights.back() += endpoint->second; } } tree_clusterizer vq; vq.generate_codebook(vectors.get_ptr(), weights.get_ptr(), vectors.size(), math::minimum(m_num_tiles, m_params.m_alpha_endpoint_codebook_size), false, m_pTask_pool); m_alpha_clusters.resize(vq.get_codebook_size()); for (uint i = 0; i < num_tasks; i++) m_pTask_pool->queue_object_task(this, &dxt_hc::determine_alpha_endpoint_clusters_task, i, &vq); m_pTask_pool->join(); for (uint a = 0; a < m_num_alpha_blocks; a++) { uint component_index = m_params.m_alpha_component_indices[a]; for (uint t = 0; t < m_num_blocks; t++) { crnlib::vector& source = m_tiles[t].pixels; if (source.size()) { crnlib::vector& destination = m_alpha_clusters[m_tiles[t].cluster_indices[cAlpha0 + a]].pixels; for (uint p = 0; p < source.size(); p++) destination.push_back(color_quad_u8(source[p][component_index])); } } } for (uint b = 0; b < m_num_blocks; b++) { for (uint a = 0; a < m_num_alpha_blocks; a++) { uint cluster_index = m_tiles[m_tile_indices[b]].cluster_indices[cAlpha0 + a]; m_endpoint_indices[b].component[cAlpha0 + a] = cluster_index; if (!(m_has_subblocks && b & 1)) m_alpha_clusters[cluster_index].blocks[cAlpha0 + a].push_back(b); } } for (uint i = 0; i < num_tasks; i++) m_pTask_pool->queue_object_task(this, &dxt_hc::determine_alpha_endpoint_codebook_task, i, NULL); m_pTask_pool->join(); } struct color_selector_details { color_selector_details() { utils::zero_object(*this); } uint error[16][4]; bool used; }; void dxt_hc::create_color_selector_codebook_task(uint64 data, void* pData_ptr) { crnlib::vector& selector_details = *static_cast*>(pData_ptr); uint num_tasks = m_pTask_pool->get_num_threads() + 1; uint E2[16][4]; uint E4[8][16]; uint E8[4][256]; for (uint n = m_has_subblocks ? m_num_blocks >> 1 : m_num_blocks, b = n * data / num_tasks, bEnd = n * (data + 1) / num_tasks; b < bEnd; b++) { color_cluster& cluster = m_color_clusters[m_endpoint_indices[b].color]; color_quad_u8* endpoint_colors = cluster.color_values; for (uint p = 0; p < 16; p++) { for (uint s = 0; s < 4; s++) E2[p][s] = m_has_subblocks ? color::color_distance(m_params.m_perceptual, m_blocks[b][p], m_color_clusters[m_endpoint_indices[b << 1 | p >> 3].color].color_values[s], false) : color::color_distance(m_params.m_perceptual, m_blocks[b][p], endpoint_colors[s], false); } for (uint p = 0; p < 8; p++) { for (uint s = 0; s < 16; s++) E4[p][s] = E2[p << 1][s & 3] + E2[p << 1 | 1][s >> 2]; } for (uint p = 0; p < 4; p++) { for (uint s = 0; s < 256; s++) E8[p][s] = E4[p << 1][s & 15] + E4[p << 1 | 1][s >> 4]; } uint best_index = 0; for (uint best_error = cUINT32_MAX, s = 0; s < m_color_selectors.size(); s++) { uint32 selector = m_color_selectors[s]; uint error = E8[0][selector & 255] + E8[1][selector >> 8 & 255] + E8[2][selector >> 16 & 255] + E8[3][selector >> 24 & 255]; if (error < best_error) { best_error = error; best_index = s; } } uint (&total_errors)[16][4] = selector_details[best_index].error; for (uint p = 0; p < 16; p++) { for (uint s = 0; s < 4; s++) total_errors[p][s] += E2[p][s]; } selector_details[best_index].used = true; m_selector_indices[m_has_subblocks ? b << 1 : b].color = best_index; } } struct SelectorNode { uint64 *p, *pEnd; SelectorNode (uint64* begin, uint64* end) : p(begin), pEnd(end) {} bool operator<(const SelectorNode& other) const { return *p > *other.p; } static void sort_task(uint64 data, void* ptr) { std::sort(((SelectorNode*)ptr)->p, ((SelectorNode*)ptr)->pEnd); } }; void dxt_hc::create_color_selector_codebook() { uint num_tasks = m_pTask_pool->get_num_threads() + 1; crnlib::vector selectors(m_has_subblocks ? m_num_blocks >> 1 : m_num_blocks); for (uint i = 0, b = 0, step = m_has_subblocks ? 2 : 1; b < m_num_blocks; b += step) selectors[i++] = m_block_selectors[cColor][b] + (m_has_subblocks ? m_block_selectors[cColor][b + 1] : 0); crnlib::vector nodes; SelectorNode node(0, selectors.get_ptr()); for (uint i = 0; i < num_tasks; i++) { node.p = node.pEnd; node.pEnd = selectors.get_ptr() + selectors.size() * (i + 1) / num_tasks; if (node.p != node.pEnd) nodes.push_back(node); } for (uint i = 0; i < nodes.size(); i++) m_pTask_pool->queue_task(&SelectorNode::sort_task, i, &nodes[i]); m_pTask_pool->join(); std::priority_queue queue; for (uint i = 0; i < nodes.size(); i++) queue.push(nodes[i]); float v[4]; for (uint s = 0; s < 4; s++) v[s] = (s + 0.5f) * 0.25f; crnlib::vector vectors; crnlib::vector weights; vectors.reserve(selectors.size()); weights.reserve(selectors.size()); for (uint64 prev_selector = 0; queue.size();) { SelectorNode node = queue.top(); uint64 selector = *node.p++; queue.pop(); if (node.p != node.pEnd) queue.push(node); uint weight = (uint)selector; selector >>= 32; if (!vectors.size() || selector != prev_selector) { prev_selector = selector; vec16F vector; for (uint p = 0; p < 16; p++, selector >>= 2) vector[15 - p] = v[selector & 3]; vectors.push_back(vector); weights.push_back(weight); } else if (weights.back() > UINT_MAX - weight) { weights.back() = UINT_MAX; } else { weights.back() += weight; } } tree_clusterizer selector_vq; selector_vq.generate_codebook(vectors.get_ptr(), weights.get_ptr(), vectors.size(), m_params.m_color_selector_codebook_size, false, m_pTask_pool); m_color_selectors.resize(selector_vq.get_codebook_size()); m_color_selectors_used.resize(selector_vq.get_codebook_size()); for (uint i = 0; i < selector_vq.get_codebook_size(); i++) { const vec16F& v = selector_vq.get_codebook_entry(i); m_color_selectors[i] = 0; for (uint sh = 0, j = 0; j < 16; j++, sh += 2) m_color_selectors[i] |= (uint)(v[j] * 4.0f) << sh; } crnlib::vector > selector_details(num_tasks); for (uint t = 0; t < num_tasks; t++) { selector_details[t].resize(m_color_selectors.size()); m_pTask_pool->queue_object_task(this, &dxt_hc::create_color_selector_codebook_task, t, &selector_details[t]); } m_pTask_pool->join(); for (uint t = 1; t < num_tasks; t++) { for (uint i = 0; i < m_color_selectors.size(); i++) { for (uint8 p = 0; p < 16; p++) { for (uint8 s = 0; s < 4; s++) selector_details[0][i].error[p][s] += selector_details[t][i].error[p][s]; } selector_details[0][i].used = selector_details[0][i].used || selector_details[t][i].used; } } for (uint i = 0; i < m_color_selectors.size(); i++) { m_color_selectors_used[i] = selector_details[0][i].used; uint (&errors)[16][4] = selector_details[0][i].error; m_color_selectors[i] = 0; for (uint sh = 0, p = 0; p < 16; p++, sh += 2) { uint* e = errors[p]; uint8 s03 = e[3] < e[0] ? 3 : 0; uint8 s12 = e[2] < e[1] ? 2 : 1; m_color_selectors[i] |= (e[s12] < e[s03] ? s12 : s03) << sh; } } } struct alpha_selector_details { alpha_selector_details() { utils::zero_object(*this); } uint error[16][8]; bool used; }; void dxt_hc::create_alpha_selector_codebook_task(uint64 data, void* pData_ptr) { crnlib::vector& selector_details = *static_cast*>(pData_ptr); uint num_tasks = m_pTask_pool->get_num_threads() + 1; uint E3[16][8]; uint E6[8][64]; for (uint n = m_has_subblocks ? m_num_blocks >> 1 : m_num_blocks, b = n * data / num_tasks, bEnd = n * (data + 1) / num_tasks; b < bEnd; b++) { for (uint c = cAlpha0; c < cAlpha0 + m_num_alpha_blocks; c++) { const uint alpha_pixel_comp = m_params.m_alpha_component_indices[c - cAlpha0]; alpha_cluster& cluster = m_alpha_clusters[m_endpoint_indices[m_has_subblocks ? b << 1 : b].component[c]]; uint* block_values = cluster.alpha_values; for (uint p = 0; p < 16; p++) { for (uint s = 0; s < 8; s++) { int delta = m_blocks[b][p][alpha_pixel_comp] - block_values[s]; E3[p][s] = delta * delta; } } for (uint p = 0; p < 8; p++) { for (uint s = 0; s < 64; s++) E6[p][s] = E3[p << 1][s & 7] + E3[p << 1 | 1][s >> 3]; } uint best_index = 0; for (uint best_error = cUINT32_MAX, s = 0; s < m_alpha_selectors.size(); s++) { uint64 selector = m_alpha_selectors[s]; uint error = E6[0][selector & 63]; error += E6[1][selector >> 6 & 63]; error += E6[2][selector >> 12 & 63]; error += E6[3][selector >> 18 & 63]; error += E6[4][selector >> 24 & 63]; error += E6[5][selector >> 30 & 63]; error += E6[6][selector >> 36 & 63]; error += E6[7][selector >> 42 & 63]; if (error < best_error) { best_error = error; best_index = s; } } if (cluster.refined_alpha) { block_values = cluster.refined_alpha_values; for (uint p = 0; p < 16; p++) { for (uint s = 0; s < 8; s++) { int delta = m_blocks[b][p][alpha_pixel_comp] - block_values[s]; E3[p][s] = delta * delta; } } } uint (&total_errors)[16][8] = selector_details[best_index].error; for (uint p = 0; p < 16; p++) { for (uint s = 0; s < 8; s++) total_errors[p][s] += E3[p][s]; } selector_details[best_index].used = true; m_selector_indices[m_has_subblocks ? b << 1 : b].component[c] = best_index; } } } void dxt_hc::create_alpha_selector_codebook() { uint num_tasks = m_pTask_pool->get_num_threads() + 1; crnlib::vector selectors(m_num_alpha_blocks * (m_has_subblocks ? m_num_blocks >> 1 : m_num_blocks)); for (uint i = 0, c = cAlpha0; c < cAlpha0 + m_num_alpha_blocks; c++) { for (uint b = 0, step = m_has_subblocks ? 2 : 1; b < m_num_blocks; b += step) selectors[i++] = m_block_selectors[c][b]; } crnlib::vector nodes; SelectorNode node(0, selectors.get_ptr()); for (uint i = 0; i < num_tasks; i++) { node.p = node.pEnd; node.pEnd = selectors.get_ptr() + selectors.size() * (i + 1) / num_tasks; if (node.p != node.pEnd) nodes.push_back(node); } for (uint i = 0; i < nodes.size(); i++) m_pTask_pool->queue_task(&SelectorNode::sort_task, i, &nodes[i]); m_pTask_pool->join(); std::priority_queue queue; for (uint i = 0; i < nodes.size(); i++) queue.push(nodes[i]); float v[8]; for (uint s = 0; s < 8; s++) v[s] = (s + 0.5f) * 0.125f; crnlib::vector vectors; crnlib::vector weights; vectors.reserve(selectors.size()); weights.reserve(selectors.size()); for (uint64 prev_selector = 0; queue.size();) { SelectorNode node = queue.top(); uint64 selector = *node.p++; queue.pop(); if (node.p != node.pEnd) queue.push(node); uint weight = (uint16)selector; selector >>= 16; if (!vectors.size() || selector != prev_selector) { prev_selector = selector; vec16F vector; for (uint p = 0; p < 16; p++, selector >>= 3) vector[15 - p] = v[selector & 7]; vectors.push_back(vector); weights.push_back(weight); } else if (weights.back() > UINT_MAX - weight) { weights.back() = UINT_MAX; } else { weights.back() += weight; } } tree_clusterizer selector_vq; selector_vq.generate_codebook(vectors.get_ptr(), weights.get_ptr(), vectors.size(), m_params.m_alpha_selector_codebook_size, false, m_pTask_pool); m_alpha_selectors.resize(selector_vq.get_codebook_size()); m_alpha_selectors_used.resize(selector_vq.get_codebook_size()); for (uint i = 0; i < selector_vq.get_codebook_size(); i++) { const vec16F& v = selector_vq.get_codebook_entry(i); m_alpha_selectors[i] = 0; for (uint sh = 0, j = 0; j < 16; j++, sh += 3) m_alpha_selectors[i] |= (uint64)(v[j] * 8.0f) << sh; } crnlib::vector > selector_details(num_tasks); for (uint t = 0; t < num_tasks; t++) { selector_details[t].resize(m_alpha_selectors.size()); m_pTask_pool->queue_object_task(this, &dxt_hc::create_alpha_selector_codebook_task, t, &selector_details[t]); } m_pTask_pool->join(); for (uint t = 1; t < num_tasks; t++) { for (uint i = 0; i < m_alpha_selectors.size(); i++) { for (uint8 p = 0; p < 16; p++) { for (uint8 s = 0; s < 8; s++) selector_details[0][i].error[p][s] += selector_details[t][i].error[p][s]; } selector_details[0][i].used = selector_details[0][i].used || selector_details[t][i].used; } } for (uint i = 0; i < m_alpha_selectors.size(); i++) { m_alpha_selectors_used[i] = selector_details[0][i].used; uint (&errors)[16][8] = selector_details[0][i].error; m_alpha_selectors[i] = 0; for (uint sh = 0, p = 0; p < 16; p++, sh += 3) { uint* e = errors[p]; uint8 s07 = e[7] < e[0] ? 7 : 0; uint8 s12 = e[2] < e[1] ? 2 : 1; uint8 s34 = e[4] < e[3] ? 4 : 3; uint8 s56 = e[6] < e[5] ? 6 : 5; uint8 s02 = e[s12] < e[s07] ? s12 : s07; uint8 s36 = e[s56] < e[s34] ? s56 : s34; m_alpha_selectors[i] |= (uint64)(e[s36] < e[s02] ? s36 : s02) << sh; } } } bool dxt_hc::update_progress(uint phase_index, uint subphase_index, uint subphase_total) { CRNLIB_ASSERT(crn_get_current_thread_id() == m_main_thread_id); if (!m_params.m_pProgress_func) return true; const int percentage_complete = (subphase_total > 1) ? ((100 * subphase_index) / (subphase_total - 1)) : 100; if (((int)phase_index == m_prev_phase_index) && (m_prev_percentage_complete == percentage_complete)) return !m_canceled; m_prev_percentage_complete = percentage_complete; bool status = (*m_params.m_pProgress_func)(phase_index, cTotalCompressionPhases, subphase_index, subphase_total, m_params.m_pProgress_func_data) != 0; if (!status) { m_canceled = true; return false; } return true; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_dxt_hc.h000066400000000000000000000143151503722002600215600ustar00rootroot00000000000000// File: crn_dxt_hc.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_dxt1.h" #include "crn_dxt5a.h" #include "crn_dxt_endpoint_refiner.h" #include "crn_image.h" #include "crn_dxt.h" #include "crn_image.h" #include "crn_dxt_hc_common.h" #include "crn_tree_clusterizer.h" #include "crn_threading.h" #define CRN_NO_FUNCTION_DEFINITIONS #include "../inc/crnlib.h" namespace crnlib { const uint cTotalCompressionPhases = 25; class dxt_hc { public: dxt_hc(); ~dxt_hc(); struct endpoint_indices_details { union { struct { uint16 color; uint16 alpha0; uint16 alpha1; }; uint16 component[3]; }; uint8 reference; endpoint_indices_details() { utils::zero_object(*this); } }; struct selector_indices_details { union { struct { uint16 color; uint16 alpha0; uint16 alpha1; }; uint16 component[3]; }; selector_indices_details() { utils::zero_object(*this); } }; struct tile_details { crnlib::vector pixels; float weight; vec<6, float> color_endpoint; vec<2, float> alpha_endpoints[2]; uint16 cluster_indices[3]; }; crnlib::vector m_tiles; uint m_num_tiles; float m_color_derating[cCRNMaxLevels][8]; float m_alpha_derating[8]; float m_uint8_to_float[256]; color_quad_u8 (*m_blocks)[16]; uint m_num_blocks; crnlib::vector m_block_weights; crnlib::vector m_block_encodings; crnlib::vector m_block_selectors[3]; crnlib::vector m_color_selectors; crnlib::vector m_alpha_selectors; crnlib::vector m_color_selectors_used; crnlib::vector m_alpha_selectors_used; crnlib::vector m_tile_indices; crnlib::vector m_endpoint_indices; crnlib::vector m_selector_indices; struct params { params() : m_num_blocks(0), m_num_levels(0), m_num_faces(0), m_format(cDXT1), m_perceptual(true), m_hierarchical(true), m_color_endpoint_codebook_size(3072), m_color_selector_codebook_size(3072), m_alpha_endpoint_codebook_size(3072), m_alpha_selector_codebook_size(3072), m_adaptive_tile_color_psnr_derating(2.0f), m_adaptive_tile_alpha_psnr_derating(2.0f), m_adaptive_tile_color_alpha_weighting_ratio(3.0f), m_debugging(false), m_pProgress_func(0), m_pProgress_func_data(0) { m_alpha_component_indices[0] = 3; m_alpha_component_indices[1] = 0; for (uint i = 0; i < cCRNMaxLevels; i++) { m_levels[i].m_first_block = 0; m_levels[i].m_num_blocks = 0; m_levels[i].m_block_width = 0; } } uint m_num_blocks; uint m_num_levels; uint m_num_faces; struct { uint m_first_block; uint m_num_blocks; uint m_block_width; float m_weight; } m_levels[cCRNMaxLevels]; dxt_format m_format; bool m_perceptual; bool m_hierarchical; uint m_color_endpoint_codebook_size; uint m_color_selector_codebook_size; uint m_alpha_endpoint_codebook_size; uint m_alpha_selector_codebook_size; float m_adaptive_tile_color_psnr_derating; float m_adaptive_tile_alpha_psnr_derating; float m_adaptive_tile_color_alpha_weighting_ratio; uint m_alpha_component_indices[2]; task_pool* m_pTask_pool; bool m_debugging; crn_progress_callback_func m_pProgress_func; void* m_pProgress_func_data; }; void clear(); bool compress( color_quad_u8 (*blocks)[16], crnlib::vector& endpoint_indices, crnlib::vector& selector_indices, crnlib::vector& color_endpoints, crnlib::vector& alpha_endpoints, crnlib::vector& color_selectors, crnlib::vector& alpha_selectors, const params& p ); private: params m_params; uint m_num_alpha_blocks; bool m_has_color_blocks; bool m_has_etc_color_blocks; bool m_has_subblocks; enum { cColor = 0, cAlpha0 = 1, cAlpha1 = 2, cNumComps = 3 }; struct color_cluster { color_cluster() : first_endpoint(0), second_endpoint(0) {} crnlib::vector blocks[3]; crnlib::vector pixels; uint first_endpoint; uint second_endpoint; color_quad_u8 color_values[4]; }; crnlib::vector m_color_clusters; struct alpha_cluster { alpha_cluster() : first_endpoint(0), second_endpoint(0) {} crnlib::vector blocks[3]; crnlib::vector pixels; uint first_endpoint; uint second_endpoint; uint alpha_values[8]; bool refined_alpha; uint refined_alpha_values[8]; }; crnlib::vector m_alpha_clusters; crn_thread_id_t m_main_thread_id; bool m_canceled; task_pool* m_pTask_pool; int m_prev_phase_index; int m_prev_percentage_complete; vec<6, float> palettize_color(color_quad_u8* pixels, uint pixels_count); vec<2, float> palettize_alpha(color_quad_u8* pixels, uint pixels_count, uint comp_index); void determine_tiles_task(uint64 data, void* pData_ptr); void determine_tiles_task_etc(uint64 data, void* pData_ptr); void determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr); void determine_color_endpoint_codebook_task_etc(uint64 data, void* pData_ptr); void determine_color_endpoint_clusters_task(uint64 data, void* pData_ptr); void determine_color_endpoints(); void determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr); void determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr); void determine_alpha_endpoints(); void create_color_selector_codebook_task(uint64 data, void* pData_ptr); void create_color_selector_codebook(); void create_alpha_selector_codebook_task(uint64 data, void* pData_ptr); void create_alpha_selector_codebook(); bool update_progress(uint phase_index, uint subphase_index, uint subphase_total); }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_dxt_hc_common.cpp000066400000000000000000000021611503722002600234570ustar00rootroot00000000000000// File: crn_dxt_hc_common.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_dxt_hc_common.h" namespace crnlib { chunk_encoding_desc g_chunk_encodings[cNumChunkEncodings] = { {1, {{0, 0, 8, 8, 0}}}, {2, {{0, 0, 8, 4, 1}, {0, 4, 8, 4, 2}}}, {2, {{0, 0, 4, 8, 3}, {4, 0, 4, 8, 4}}}, {3, {{0, 0, 8, 4, 1}, {0, 4, 4, 4, 7}, {4, 4, 4, 4, 8}}}, {3, {{0, 4, 8, 4, 2}, {0, 0, 4, 4, 5}, {4, 0, 4, 4, 6}}}, {3, {{0, 0, 4, 8, 3}, {4, 0, 4, 4, 6}, {4, 4, 4, 4, 8}}}, {3, {{4, 0, 4, 8, 4}, {0, 0, 4, 4, 5}, {0, 4, 4, 4, 7}}}, {4, {{0, 0, 4, 4, 5}, {4, 0, 4, 4, 6}, {0, 4, 4, 4, 7}, {4, 4, 4, 4, 8}}}}; chunk_tile_desc g_chunk_tile_layouts[cNumChunkTileLayouts] = { // 2x2 {0, 0, 8, 8, 0}, // 2x1 {0, 0, 8, 4, 1}, {0, 4, 8, 4, 2}, // 1x2 {0, 0, 4, 8, 3}, {4, 0, 4, 8, 4}, // 1x1 {0, 0, 4, 4, 5}, {4, 0, 4, 4, 6}, {0, 4, 4, 4, 7}, {4, 4, 4, 4, 8}}; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_dxt_hc_common.h000066400000000000000000000020231503722002600231210ustar00rootroot00000000000000// File: crn_dxt_hc_common.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once namespace crnlib { struct chunk_tile_desc { // These values are in pixels, and always a multiple of cBlockPixelWidth/cBlockPixelHeight. uint m_x_ofs; uint m_y_ofs; uint m_width; uint m_height; uint m_layout_index; }; struct chunk_encoding_desc { uint m_num_tiles; chunk_tile_desc m_tiles[4]; }; const uint cChunkPixelWidth = 8; const uint cChunkPixelHeight = 8; const uint cChunkBlockWidth = 2; const uint cChunkBlockHeight = 2; const uint cChunkMaxTiles = 4; const uint cBlockPixelWidthShift = 2; const uint cBlockPixelHeightShift = 2; const uint cBlockPixelWidth = 4; const uint cBlockPixelHeight = 4; const uint cNumChunkEncodings = 8; extern chunk_encoding_desc g_chunk_encodings[cNumChunkEncodings]; const uint cNumChunkTileLayouts = 9; const uint cFirst4x4ChunkTileLayout = 5; extern chunk_tile_desc g_chunk_tile_layouts[cNumChunkTileLayouts]; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_dxt_image.cpp000066400000000000000000001525551503722002600226140ustar00rootroot00000000000000// File: crn_dxt_image.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_dxt_image.h" #if CRNLIB_SUPPORT_SQUISH #include "squish\squish.h" #endif #include "crn_ryg_dxt.hpp" #include "crn_dxt_fast.h" #include "crn_console.h" #include "crn_threading.h" #if CRNLIB_SUPPORT_ATI_COMPRESS #ifdef _DLL #pragma comment(lib, "ATI_Compress_MT_DLL_VC8.lib") #else #pragma comment(lib, "ATI_Compress_MT_VC8.lib") #endif #include "..\ext\ATI_Compress\ATI_Compress.h" #endif #include "crn_rg_etc1.h" #include "crn_etc.h" #define CRNLIB_USE_RG_ETC1 1 namespace crnlib { dxt_image::dxt_image() : m_pElements(NULL), m_width(0), m_height(0), m_blocks_x(0), m_blocks_y(0), m_total_blocks(0), m_total_elements(0), m_num_elements_per_block(0), m_bytes_per_block(0), m_format(cDXTInvalid) { utils::zero_object(m_element_type); utils::zero_object(m_element_component_index); } dxt_image::dxt_image(const dxt_image& other) : m_pElements(NULL) { *this = other; } dxt_image& dxt_image::operator=(const dxt_image& rhs) { if (this == &rhs) return *this; clear(); m_width = rhs.m_width; m_height = rhs.m_height; m_blocks_x = rhs.m_blocks_x; m_blocks_y = rhs.m_blocks_y; m_num_elements_per_block = rhs.m_num_elements_per_block; m_bytes_per_block = rhs.m_bytes_per_block; m_format = rhs.m_format; m_total_blocks = rhs.m_total_blocks; m_total_elements = rhs.m_total_elements; m_pElements = NULL; memcpy(m_element_type, rhs.m_element_type, sizeof(m_element_type)); memcpy(m_element_component_index, rhs.m_element_component_index, sizeof(m_element_component_index)); if (rhs.m_pElements) { m_elements.resize(m_total_elements); memcpy(&m_elements[0], rhs.m_pElements, sizeof(element) * m_total_elements); m_pElements = &m_elements[0]; } return *this; } void dxt_image::clear() { m_elements.clear(); m_width = 0; m_height = 0; m_blocks_x = 0; m_blocks_y = 0; m_num_elements_per_block = 0; m_bytes_per_block = 0; m_format = cDXTInvalid; utils::zero_object(m_element_type); utils::zero_object(m_element_component_index); m_total_blocks = 0; m_total_elements = 0; m_pElements = NULL; } bool dxt_image::init_internal(dxt_format fmt, uint width, uint height) { CRNLIB_ASSERT((fmt != cDXTInvalid) && (width > 0) && (height > 0)); clear(); m_width = width; m_height = height; m_blocks_x = (m_width + 3) >> cDXTBlockShift; m_blocks_y = (m_height + 3) >> cDXTBlockShift; m_num_elements_per_block = 2; if ((fmt == cDXT1) || (fmt == cDXT1A) || (fmt == cDXT5A) || (fmt == cETC1) || (fmt == cETC2) || (fmt == cETC1S)) m_num_elements_per_block = 1; m_total_blocks = m_blocks_x * m_blocks_y; m_total_elements = m_total_blocks * m_num_elements_per_block; CRNLIB_ASSUME((uint)cDXT1BytesPerBlock == (uint)cETC1BytesPerBlock); m_bytes_per_block = cDXT1BytesPerBlock * m_num_elements_per_block; m_format = fmt; switch (m_format) { case cDXT1: case cDXT1A: { m_element_type[0] = cColorDXT1; m_element_component_index[0] = -1; break; } case cDXT3: { m_element_type[0] = cAlphaDXT3; m_element_type[1] = cColorDXT1; m_element_component_index[0] = 3; m_element_component_index[1] = -1; break; } case cDXT5: { m_element_type[0] = cAlphaDXT5; m_element_type[1] = cColorDXT1; m_element_component_index[0] = 3; m_element_component_index[1] = -1; break; } case cDXT5A: { m_element_type[0] = cAlphaDXT5; m_element_component_index[0] = 3; break; } case cDXN_XY: { m_element_type[0] = cAlphaDXT5; m_element_type[1] = cAlphaDXT5; m_element_component_index[0] = 0; m_element_component_index[1] = 1; break; } case cDXN_YX: { m_element_type[0] = cAlphaDXT5; m_element_type[1] = cAlphaDXT5; m_element_component_index[0] = 1; m_element_component_index[1] = 0; break; } case cETC1: case cETC1S: { m_element_type[0] = cColorETC1; m_element_component_index[0] = -1; break; } case cETC2: { m_element_type[0] = cColorETC2; m_element_component_index[0] = -1; break; } case cETC2A: case cETC2AS: { m_element_type[0] = cAlphaETC2; m_element_type[1] = cColorETC2; m_element_component_index[0] = 3; m_element_component_index[1] = -1; break; } default: { CRNLIB_ASSERT(0); clear(); return false; } } return true; } bool dxt_image::init(dxt_format fmt, uint width, uint height, bool clear_elements) { if (!init_internal(fmt, width, height)) return false; m_elements.resize(m_total_elements); m_pElements = &m_elements[0]; if (clear_elements) memset(m_pElements, 0, sizeof(element) * m_total_elements); return true; } bool dxt_image::init(dxt_format fmt, uint width, uint height, uint num_elements, element* pElements, bool create_copy) { CRNLIB_ASSERT(num_elements && pElements); if (!init_internal(fmt, width, height)) return false; if (num_elements != m_total_elements) { clear(); return false; } if (create_copy) { m_elements.resize(m_total_elements); m_pElements = &m_elements[0]; memcpy(m_pElements, pElements, m_total_elements * sizeof(element)); } else m_pElements = pElements; return true; } struct init_task_params { dxt_format m_fmt; const image_u8* m_pImg; const dxt_image::pack_params* m_pParams; crn_thread_id_t m_main_thread; atomic32_t m_canceled; }; void dxt_image::init_task(uint64 data, void* pData_ptr) { const uint thread_index = static_cast(data); init_task_params* pInit_params = static_cast(pData_ptr); const image_u8& img = *pInit_params->m_pImg; const pack_params& p = *pInit_params->m_pParams; const bool is_main_thread = (crn_get_current_thread_id() == pInit_params->m_main_thread); uint block_index = 0; set_block_pixels_context optimizer_context; int prev_progress_percentage = -1; for (uint block_y = 0; block_y < m_blocks_y; block_y++) { const uint pixel_ofs_y = block_y * cDXTBlockSize; for (uint block_x = 0; block_x < m_blocks_x; block_x++, block_index++) { if (pInit_params->m_canceled) return; if (p.m_pProgress_callback && is_main_thread && ((block_index & 63) == 63)) { const uint progress_percentage = p.m_progress_start + ((block_index * p.m_progress_range + get_total_blocks() / 2) / get_total_blocks()); if ((int)progress_percentage != prev_progress_percentage) { prev_progress_percentage = progress_percentage; if (!(p.m_pProgress_callback)(progress_percentage, p.m_pProgress_callback_user_data_ptr)) { atomic_exchange32(&pInit_params->m_canceled, CRNLIB_TRUE); return; } } } if (p.m_num_helper_threads) { if ((block_index % (p.m_num_helper_threads + 1)) != thread_index) continue; } color_quad_u8 pixels[cDXTBlockSize * cDXTBlockSize]; const uint pixel_ofs_x = block_x * cDXTBlockSize; for (uint y = 0; y < cDXTBlockSize; y++) { const uint iy = math::minimum(pixel_ofs_y + y, img.get_height() - 1); for (uint x = 0; x < cDXTBlockSize; x++) { const uint ix = math::minimum(pixel_ofs_x + x, img.get_width() - 1); pixels[x + y * cDXTBlockSize] = img(ix, iy); } } set_block_pixels(block_x, block_y, pixels, p, optimizer_context); } } } #if CRNLIB_SUPPORT_ATI_COMPRESS bool dxt_image::init_ati_compress(dxt_format fmt, const image_u8& img, const pack_params& p) { image_u8 tmp_img(img); for (uint y = 0; y < img.get_height(); y++) { for (uint x = 0; x < img.get_width(); x++) { color_quad_u8 c(img(x, y)); std::swap(c.r, c.b); tmp_img(x, y) = c; } } ATI_TC_Texture src_tex; utils::zero_object(src_tex); src_tex.dwSize = sizeof(ATI_TC_Texture); src_tex.dwWidth = tmp_img.get_width(); src_tex.dwHeight = tmp_img.get_height(); src_tex.dwPitch = tmp_img.get_pitch_in_bytes(); src_tex.format = ATI_TC_FORMAT_ARGB_8888; src_tex.dwDataSize = src_tex.dwPitch * tmp_img.get_height(); src_tex.pData = (ATI_TC_BYTE*)tmp_img.get_ptr(); ATI_TC_Texture dst_tex; utils::zero_object(dst_tex); dst_tex.dwSize = sizeof(ATI_TC_Texture); dst_tex.dwWidth = tmp_img.get_width(); dst_tex.dwHeight = tmp_img.get_height(); dst_tex.dwDataSize = get_size_in_bytes(); dst_tex.pData = (ATI_TC_BYTE*)get_element_ptr(); switch (fmt) { case cDXT1: case cDXT1A: dst_tex.format = ATI_TC_FORMAT_DXT1; break; case cDXT3: dst_tex.format = ATI_TC_FORMAT_DXT3; break; case cDXT5: dst_tex.format = ATI_TC_FORMAT_DXT5; break; case cDXT5A: dst_tex.format = ATI_TC_FORMAT_ATI1N; break; case cDXN_XY: dst_tex.format = ATI_TC_FORMAT_ATI2N_XY; break; case cDXN_YX: dst_tex.format = ATI_TC_FORMAT_ATI2N; break; default: { CRNLIB_ASSERT(false); return false; } } ATI_TC_CompressOptions options; utils::zero_object(options); options.dwSize = sizeof(ATI_TC_CompressOptions); if (fmt == cDXT1A) { options.bDXT1UseAlpha = true; options.nAlphaThreshold = (ATI_TC_BYTE)p.m_dxt1a_alpha_threshold; } options.bDisableMultiThreading = (p.m_num_helper_threads == 0); switch (p.m_quality) { case cCRNDXTQualityFast: options.nCompressionSpeed = ATI_TC_Speed_Fast; break; case cCRNDXTQualitySuperFast: options.nCompressionSpeed = ATI_TC_Speed_SuperFast; break; default: options.nCompressionSpeed = ATI_TC_Speed_Normal; break; } if (p.m_perceptual) { options.bUseChannelWeighting = true; options.fWeightingRed = .212671f; options.fWeightingGreen = .715160f; options.fWeightingBlue = .072169f; } ATI_TC_ERROR err = ATI_TC_ConvertTexture(&src_tex, &dst_tex, &options, NULL, NULL, NULL); return err == ATI_TC_OK; } #endif bool dxt_image::init(dxt_format fmt, const image_u8& img, const pack_params& p) { if (!init(fmt, img.get_width(), img.get_height(), false)) return false; #if CRNLIB_SUPPORT_ATI_COMPRESS if (p.m_compressor == cCRNDXTCompressorATI) return init_ati_compress(fmt, img, p); #endif task_pool* pPool = p.m_pTask_pool; task_pool tmp_pool; if (!pPool) { if (!tmp_pool.init(p.m_num_helper_threads)) return false; pPool = &tmp_pool; } init_task_params init_params; init_params.m_fmt = fmt; init_params.m_pImg = &img; init_params.m_pParams = &p; init_params.m_main_thread = crn_get_current_thread_id(); init_params.m_canceled = false; for (uint i = 0; i <= p.m_num_helper_threads; i++) pPool->queue_object_task(this, &dxt_image::init_task, i, &init_params); pPool->join(); if (init_params.m_canceled) return false; return true; } bool dxt_image::unpack(image_u8& img) const { if (!m_total_elements) return false; img.resize(m_width, m_height); color_quad_u8 pixels[cDXTBlockSize * cDXTBlockSize]; for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) pixels[i].set(0, 0, 0, 255); bool all_blocks_valid = true; for (uint block_y = 0; block_y < m_blocks_y; block_y++) { const uint pixel_ofs_y = block_y * cDXTBlockSize; const uint limit_y = math::minimum(cDXTBlockSize, img.get_height() - pixel_ofs_y); for (uint block_x = 0; block_x < m_blocks_x; block_x++) { if (!get_block_pixels(block_x, block_y, pixels)) all_blocks_valid = false; const uint pixel_ofs_x = block_x * cDXTBlockSize; const uint limit_x = math::minimum(cDXTBlockSize, img.get_width() - pixel_ofs_x); for (uint y = 0; y < limit_y; y++) { const uint iy = pixel_ofs_y + y; for (uint x = 0; x < limit_x; x++) { const uint ix = pixel_ofs_x + x; img(ix, iy) = pixels[x + (y << cDXTBlockShift)]; } } } } if (!all_blocks_valid) console::error("dxt_image::unpack: One or more invalid blocks encountered!"); img.reset_comp_flags(); img.set_component_valid(0, false); img.set_component_valid(1, false); img.set_component_valid(2, false); for (uint i = 0; i < m_num_elements_per_block; i++) { if (m_element_component_index[i] < 0) { img.set_component_valid(0, true); img.set_component_valid(1, true); img.set_component_valid(2, true); } else img.set_component_valid(m_element_component_index[i], true); } img.set_component_valid(3, get_dxt_format_has_alpha(m_format)); return true; } void dxt_image::endian_swap() { utils::endian_switch_words(reinterpret_cast(m_elements.get_ptr()), m_elements.size_in_bytes() / sizeof(uint16)); } const dxt_image::element& dxt_image::get_element(uint block_x, uint block_y, uint element_index) const { CRNLIB_ASSERT((block_x < m_blocks_x) && (block_y < m_blocks_y) && (element_index < m_num_elements_per_block)); return m_pElements[(block_x + block_y * m_blocks_x) * m_num_elements_per_block + element_index]; } dxt_image::element& dxt_image::get_element(uint block_x, uint block_y, uint element_index) { CRNLIB_ASSERT((block_x < m_blocks_x) && (block_y < m_blocks_y) && (element_index < m_num_elements_per_block)); return m_pElements[(block_x + block_y * m_blocks_x) * m_num_elements_per_block + element_index]; } bool dxt_image::has_alpha() const { switch (m_format) { case cDXT1: { for (uint i = 0; i < m_total_elements; i++) { const dxt1_block& blk = *(dxt1_block*)&m_pElements[i]; if (blk.get_low_color() <= blk.get_high_color()) { for (uint y = 0; y < cDXTBlockSize; y++) for (uint x = 0; x < cDXTBlockSize; x++) if (blk.get_selector(x, y) == 3) return true; } } break; } case cDXT1A: case cDXT3: case cDXT5: case cDXT5A: case cETC2A: case cETC2AS: return true; default: break; } return false; } color_quad_u8 dxt_image::get_pixel(uint x, uint y) const { CRNLIB_ASSERT((x < m_width) && (y < m_height)); const uint block_x = x >> cDXTBlockShift; const uint block_y = y >> cDXTBlockShift; const element* pElement = reinterpret_cast(&get_element(block_x, block_y, 0)); color_quad_u8 result(0, 0, 0, 255); for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) { switch (m_element_type[element_index]) { case cColorETC1: { const etc1_block& block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); const bool diff_flag = block.get_diff_bit(); const bool flip_flag = block.get_flip_bit(); const uint table_index0 = block.get_inten_table(0); const uint table_index1 = block.get_inten_table(1); color_quad_u8 subblock_colors0[4], subblock_colors1[4]; if (diff_flag) { const uint16 base_color5 = block.get_base5_color(); const uint16 delta_color3 = block.get_delta3_color(); etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1); } else { const uint16 base_color4_0 = block.get_base4_color(0); etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); const uint16 base_color4_1 = block.get_base4_color(1); etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); } const uint bx = x & 3; const uint by = y & 3; const uint selector_index = block.get_selector(bx, by); if (flip_flag) { if (by <= 2) result = subblock_colors0[selector_index]; else result = subblock_colors1[selector_index]; } else { if (bx <= 2) result = subblock_colors0[selector_index]; else result = subblock_colors1[selector_index]; } break; } case cColorDXT1: { const dxt1_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); const uint l = pBlock->get_low_color(); const uint h = pBlock->get_high_color(); color_quad_u8 c0(dxt1_block::unpack_color(static_cast(l), true)); color_quad_u8 c1(dxt1_block::unpack_color(static_cast(h), true)); const uint s = pBlock->get_selector(x & 3, y & 3); if (l > h) { switch (s) { case 0: result.set_noclamp_rgb(c0.r, c0.g, c0.b); break; case 1: result.set_noclamp_rgb(c1.r, c1.g, c1.b); break; case 2: result.set_noclamp_rgb((c0.r * 2 + c1.r) / 3, (c0.g * 2 + c1.g) / 3, (c0.b * 2 + c1.b) / 3); break; case 3: result.set_noclamp_rgb((c1.r * 2 + c0.r) / 3, (c1.g * 2 + c0.g) / 3, (c1.b * 2 + c0.b) / 3); break; } } else { switch (s) { case 0: result.set_noclamp_rgb(c0.r, c0.g, c0.b); break; case 1: result.set_noclamp_rgb(c1.r, c1.g, c1.b); break; case 2: result.set_noclamp_rgb((c0.r + c1.r) >> 1U, (c0.g + c1.g) >> 1U, (c0.b + c1.b) >> 1U); break; case 3: { if (m_format <= cDXT1A) result.set_noclamp_rgba(0, 0, 0, 0); else result.set_noclamp_rgb(0, 0, 0); break; } } } break; } case cAlphaDXT5: { const int comp_index = m_element_component_index[element_index]; const dxt5_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); const uint l = pBlock->get_low_alpha(); const uint h = pBlock->get_high_alpha(); const uint s = pBlock->get_selector(x & 3, y & 3); if (l > h) { switch (s) { case 0: result[comp_index] = static_cast(l); break; case 1: result[comp_index] = static_cast(h); break; case 2: result[comp_index] = static_cast((l * 6 + h) / 7); break; case 3: result[comp_index] = static_cast((l * 5 + h * 2) / 7); break; case 4: result[comp_index] = static_cast((l * 4 + h * 3) / 7); break; case 5: result[comp_index] = static_cast((l * 3 + h * 4) / 7); break; case 6: result[comp_index] = static_cast((l * 2 + h * 5) / 7); break; case 7: result[comp_index] = static_cast((l + h * 6) / 7); break; } } else { switch (s) { case 0: result[comp_index] = static_cast(l); break; case 1: result[comp_index] = static_cast(h); break; case 2: result[comp_index] = static_cast((l * 4 + h) / 5); break; case 3: result[comp_index] = static_cast((l * 3 + h * 2) / 5); break; case 4: result[comp_index] = static_cast((l * 2 + h * 3) / 5); break; case 5: result[comp_index] = static_cast((l + h * 4) / 5); break; case 6: result[comp_index] = 0; break; case 7: result[comp_index] = 255; break; } } break; } case cAlphaDXT3: { const int comp_index = m_element_component_index[element_index]; const dxt3_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); result[comp_index] = static_cast(pBlock->get_alpha(x & 3, y & 3, true)); break; } default: break; } } return result; } uint dxt_image::get_pixel_alpha(uint x, uint y, uint element_index) const { CRNLIB_ASSERT((x < m_width) && (y < m_height) && (element_index < m_num_elements_per_block)); const uint block_x = x >> cDXTBlockShift; const uint block_y = y >> cDXTBlockShift; switch (m_element_type[element_index]) { case cColorDXT1: { if (m_format <= cDXT1A) { const dxt1_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); const uint l = pBlock->get_low_color(); const uint h = pBlock->get_high_color(); if (l <= h) { uint s = pBlock->get_selector(x & 3, y & 3); return (s == 3) ? 0 : 255; } else { return 255; } } break; } case cAlphaDXT5: { const dxt5_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); const uint l = pBlock->get_low_alpha(); const uint h = pBlock->get_high_alpha(); const uint s = pBlock->get_selector(x & 3, y & 3); if (l > h) { switch (s) { case 0: return l; case 1: return h; case 2: return (l * 6 + h) / 7; case 3: return (l * 5 + h * 2) / 7; case 4: return (l * 4 + h * 3) / 7; case 5: return (l * 3 + h * 4) / 7; case 6: return (l * 2 + h * 5) / 7; case 7: return (l + h * 6) / 7; } } else { switch (s) { case 0: return l; case 1: return h; case 2: return (l * 4 + h) / 5; case 3: return (l * 3 + h * 2) / 5; case 4: return (l * 2 + h * 3) / 5; case 5: return (l + h * 4) / 5; case 6: return 0; case 7: return 255; } } } case cAlphaDXT3: { const dxt3_block* pBlock = reinterpret_cast(&get_element(block_x, block_y, element_index)); return pBlock->get_alpha(x & 3, y & 3, true); } default: break; } return 255; } void dxt_image::set_pixel(uint x, uint y, const color_quad_u8& c, bool perceptual) { CRNLIB_ASSERT((x < m_width) && (y < m_height)); const uint block_x = x >> cDXTBlockShift; const uint block_y = y >> cDXTBlockShift; element* pElement = &get_element(block_x, block_y, 0); for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) { switch (m_element_type[element_index]) { case cColorETC1: { etc1_block& block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); const bool diff_flag = block.get_diff_bit(); const bool flip_flag = block.get_flip_bit(); const uint table_index0 = block.get_inten_table(0); const uint table_index1 = block.get_inten_table(1); color_quad_u8 subblock_colors0[4], subblock_colors1[4]; if (diff_flag) { const uint16 base_color5 = block.get_base5_color(); const uint16 delta_color3 = block.get_delta3_color(); etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1); } else { const uint16 base_color4_0 = block.get_base4_color(0); etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); const uint16 base_color4_1 = block.get_base4_color(1); etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); } const uint bx = x & 3; const uint by = y & 3; color_quad_u8* pColors = subblock_colors1; if (flip_flag) { if (by <= 2) pColors = subblock_colors0; } else { if (bx <= 2) pColors = subblock_colors0; } uint best_error = UINT_MAX; uint best_selector = 0; for (uint i = 0; i < 4; i++) { uint error = color::color_distance(perceptual, pColors[i], c, false); if (error < best_error) { best_error = error; best_selector = i; } } block.set_selector(bx, by, best_selector); break; } case cColorDXT1: { dxt1_block* pDXT1_block = reinterpret_cast(pElement); color_quad_u8 colors[cDXT1SelectorValues]; const uint n = pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); if ((m_format == cDXT1A) && (c.a < 128)) pDXT1_block->set_selector(x & 3, y & 3, 3); else { uint best_error = UINT_MAX; uint best_selector = 0; for (uint i = 0; i < n; i++) { uint error = color::color_distance(perceptual, colors[i], c, false); if (error < best_error) { best_error = error; best_selector = i; } } pDXT1_block->set_selector(x & 3, y & 3, best_selector); } break; } case cAlphaDXT5: { dxt5_block* pDXT5_block = reinterpret_cast(pElement); uint values[cDXT5SelectorValues]; dxt5_block::get_block_values(values, pDXT5_block->get_low_alpha(), pDXT5_block->get_high_alpha()); const int comp_index = m_element_component_index[element_index]; uint best_error = UINT_MAX; uint best_selector = 0; for (uint i = 0; i < cDXT5SelectorValues; i++) { uint error = labs((int)values[i] - (int)c[comp_index]); // no need to square if (error < best_error) { best_error = error; best_selector = i; } } pDXT5_block->set_selector(x & 3, y & 3, best_selector); break; } case cAlphaDXT3: { const int comp_index = m_element_component_index[element_index]; dxt3_block* pDXT3_block = reinterpret_cast(pElement); pDXT3_block->set_alpha(x & 3, y & 3, c[comp_index], true); break; } default: break; } } // element_index } bool dxt_image::get_block_pixels(uint block_x, uint block_y, color_quad_u8* pPixels) const { bool success = true; const element* pElement = &get_element(block_x, block_y, 0); for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) { switch (m_element_type[element_index]) { case cColorETC1: { const etc1_block& block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); // Preserve alpha if the format is something weird (like ETC1 for color and DXT5A for alpha) - which isn't currently supported. #if CRNLIB_USE_RG_ETC1 if (!rg_etc1::unpack_etc1_block(&block, (uint32*)pPixels, m_format != cETC1)) success = false; #else if (!unpack_etc1(block, pPixels, m_format != cETC1)) success = false; #endif break; } case cColorETC2: { const etc1_block& block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); if (!rg_etc1::unpack_etc2_color(&block, (uint32*)pPixels, m_format != cETC2)) success = false; break; } case cAlphaETC2: { const etc1_block& block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); if (!rg_etc1::unpack_etc2_alpha(&block, (uint32*)pPixels, m_element_component_index[element_index])) success = false; break; } case cColorDXT1: { const dxt1_block* pDXT1_block = reinterpret_cast(pElement); color_quad_u8 colors[cDXT1SelectorValues]; pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { uint s = pDXT1_block->get_selector(i & 3, i >> 2); pPixels[i].r = colors[s].r; pPixels[i].g = colors[s].g; pPixels[i].b = colors[s].b; if (m_format <= cDXT1A) pPixels[i].a = colors[s].a; } break; } case cAlphaDXT5: { const dxt5_block* pDXT5_block = reinterpret_cast(pElement); uint values[cDXT5SelectorValues]; dxt5_block::get_block_values(values, pDXT5_block->get_low_alpha(), pDXT5_block->get_high_alpha()); const int comp_index = m_element_component_index[element_index]; for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { uint s = pDXT5_block->get_selector(i & 3, i >> 2); pPixels[i][comp_index] = static_cast(values[s]); } break; } case cAlphaDXT3: { const dxt3_block* pDXT3_block = reinterpret_cast(pElement); const int comp_index = m_element_component_index[element_index]; for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { uint a = pDXT3_block->get_alpha(i & 3, i >> 2, true); pPixels[i][comp_index] = static_cast(a); } break; } default: break; } } // element_index return success; } void dxt_image::set_block_pixels(uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p) { set_block_pixels_context context; set_block_pixels(block_x, block_y, pPixels, p, context); } void dxt_image::set_block_pixels( uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p, set_block_pixels_context& context) { element* pElement = &get_element(block_x, block_y, 0); if (m_format == cETC1) { etc1_block& dst_block = *reinterpret_cast(pElement); #if CRNLIB_USE_RG_ETC1 rg_etc1::etc1_quality etc_quality = rg_etc1::cHighQuality; if (p.m_quality <= cCRNDXTQualityFast) etc_quality = rg_etc1::cLowQuality; else if (p.m_quality <= cCRNDXTQualityNormal) etc_quality = rg_etc1::cMediumQuality; rg_etc1::etc1_pack_params pack_params; pack_params.m_dithering = p.m_dithering; //pack_params.m_perceptual = p.m_perceptual; pack_params.m_quality = etc_quality; rg_etc1::pack_etc1_block(&dst_block, (uint32*)pPixels, pack_params); #else crn_etc_quality etc_quality = cCRNETCQualitySlow; if (p.m_quality <= cCRNDXTQualityFast) etc_quality = cCRNETCQualityFast; else if (p.m_quality <= cCRNDXTQualityNormal) etc_quality = cCRNETCQualityMedium; crn_etc1_pack_params pack_params; pack_params.m_perceptual = p.m_perceptual; pack_params.m_quality = etc_quality; pack_params.m_dithering = p.m_dithering; pack_etc1_block(dst_block, pPixels, pack_params, context.m_etc1_optimizer); #endif } else if (m_format == cETC2) { etc1_block& dst_block = *reinterpret_cast(pElement); rg_etc1::etc1_pack_params pack_params; pack_params.m_dithering = p.m_dithering; pack_params.m_quality = p.m_quality <= cCRNDXTQualityFast ? rg_etc1::cLowQuality : p.m_quality <= cCRNDXTQualityNormal ? rg_etc1::cMediumQuality : rg_etc1::cHighQuality; rg_etc1::pack_etc1_block(&dst_block, (uint32*)pPixels, pack_params); } else if (m_format == cETC2A) { rg_etc1::etc1_quality etc_quality = p.m_quality <= cCRNDXTQualityFast ? rg_etc1::cLowQuality : p.m_quality <= cCRNDXTQualityNormal ? rg_etc1::cMediumQuality : rg_etc1::cHighQuality; for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) { if (m_element_type[element_index] == cAlphaETC2) { rg_etc1::etc2a_pack_params pack_params; pack_params.m_quality = etc_quality; pack_params.comp_index = m_element_component_index[element_index]; rg_etc1::pack_etc2_alpha(pElement, (uint32*)pPixels, pack_params); } else { rg_etc1::etc1_pack_params pack_params; pack_params.m_dithering = p.m_dithering; pack_params.m_quality = etc_quality; rg_etc1::pack_etc1_block(pElement, (uint32*)pPixels, pack_params); } } } else if (m_format == cETC1S) { crn_etc1_pack_params pack_params; pack_params.m_perceptual = p.m_perceptual; pack_params.m_dithering = p.m_dithering; pack_params.m_quality = p.m_quality <= cCRNDXTQualityFast ? cCRNETCQualityFast : p.m_quality <= cCRNDXTQualityNormal ? cCRNETCQualityMedium : cCRNETCQualitySlow; pack_etc1s_block(*(etc1_block*)pElement, pPixels, pack_params); } else if (m_format == cETC2AS) { for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) { if (m_element_type[element_index] == cAlphaETC2) { rg_etc1::etc2a_pack_params pack_params; pack_params.m_quality = p.m_quality <= cCRNDXTQualityFast ? rg_etc1::cLowQuality : p.m_quality <= cCRNDXTQualityNormal ? rg_etc1::cMediumQuality : rg_etc1::cHighQuality; pack_params.comp_index = m_element_component_index[element_index]; rg_etc1::pack_etc2_alpha(pElement, (uint32*)pPixels, pack_params); } else { crn_etc1_pack_params pack_params; pack_params.m_perceptual = p.m_perceptual; pack_params.m_dithering = p.m_dithering; pack_params.m_quality = p.m_quality <= cCRNDXTQualityFast ? cCRNETCQualityFast : p.m_quality <= cCRNDXTQualityNormal ? cCRNETCQualityMedium : cCRNETCQualitySlow; pack_etc1s_block(*(etc1_block*)pElement, pPixels, pack_params); } } } else #if CRNLIB_SUPPORT_SQUISH if ((p.m_compressor == cCRNDXTCompressorSquish) && ((m_format == cDXT1) || (m_format == cDXT1A) || (m_format == cDXT3) || (m_format == cDXT5) || (m_format == cDXT5A))) { uint squish_flags = 0; if ((m_format == cDXT1) || (m_format == cDXT1A)) squish_flags = squish::kDxt1; else if (m_format == cDXT3) squish_flags = squish::kDxt3; else if (m_format == cDXT5A) squish_flags = squish::kDxt5A; else squish_flags = squish::kDxt5; if (p.m_perceptual) squish_flags |= squish::kColourMetricPerceptual; else squish_flags |= squish::kColourMetricUniform; if (p.m_quality >= cCRNDXTQualityBetter) squish_flags |= squish::kColourIterativeClusterFit; else if (p.m_quality == cCRNDXTQualitySuperFast) squish_flags |= squish::kColourRangeFit; color_quad_u8 pixels[cDXTBlockSize * cDXTBlockSize]; memcpy(pixels, pPixels, sizeof(color_quad_u8) * cDXTBlockSize * cDXTBlockSize); if (m_format == cDXT1) { for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) pixels[i].a = 255; } else if (m_format == cDXT1A) { for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) if (pixels[i].a < p.m_dxt1a_alpha_threshold) pixels[i].a = 0; else pixels[i].a = 255; } squish::Compress(reinterpret_cast(pixels), pElement, squish_flags); } else #endif // CRNLIB_SUPPORT_SQUISH // RYG doesn't support DXT1A if ((p.m_compressor == cCRNDXTCompressorRYG) && ((m_format == cDXT1) || (m_format == cDXT5) || (m_format == cDXT5A))) { color_quad_u8 pixels[cDXTBlockSize * cDXTBlockSize]; for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { pixels[i].r = pPixels[i].b; pixels[i].g = pPixels[i].g; pixels[i].b = pPixels[i].r; if (m_format == cDXT1) pixels[i].a = 255; else pixels[i].a = pPixels[i].a; } if (m_format == cDXT5A) ryg_dxt::sCompressDXT5ABlock((sU8*)pElement, (const sU32*)pixels); else ryg_dxt::sCompressDXTBlock((sU8*)pElement, (const sU32*)pixels, m_format == cDXT5, 0); } else if ((p.m_compressor == cCRNDXTCompressorCRNF) && (m_format != cDXT1A)) { for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) { switch (m_element_type[element_index]) { case cColorDXT1: { dxt1_block* pDXT1_block = reinterpret_cast(pElement); dxt_fast::compress_color_block(pDXT1_block, pPixels, p.m_quality >= cCRNDXTQualityNormal); break; } case cAlphaDXT5: { dxt5_block* pDXT5_block = reinterpret_cast(pElement); dxt_fast::compress_alpha_block(pDXT5_block, pPixels, m_element_component_index[element_index]); break; } case cAlphaDXT3: { const int comp_index = m_element_component_index[element_index]; dxt3_block* pDXT3_block = reinterpret_cast(pElement); for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) pDXT3_block->set_alpha(i & 3, i >> 2, pPixels[i][comp_index], true); break; } default: break; } } } else { dxt1_endpoint_optimizer& dxt1_optimizer = context.m_dxt1_optimizer; dxt5_endpoint_optimizer& dxt5_optimizer = context.m_dxt5_optimizer; for (uint element_index = 0; element_index < m_num_elements_per_block; element_index++, pElement++) { switch (m_element_type[element_index]) { case cColorDXT1: { dxt1_block* pDXT1_block = reinterpret_cast(pElement); bool pixels_have_alpha = false; if (m_format == cDXT1A) { for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) if (pPixels[i].a < p.m_dxt1a_alpha_threshold) { pixels_have_alpha = true; break; } } dxt1_endpoint_optimizer::results results; uint8 selectors[cDXTBlockSize * cDXTBlockSize]; results.m_pSelectors = selectors; dxt1_endpoint_optimizer::params params; params.m_block_index = block_x + block_y * m_blocks_x; params.m_quality = p.m_quality; params.m_perceptual = p.m_perceptual; params.m_grayscale_sampling = p.m_grayscale_sampling; params.m_pixels_have_alpha = pixels_have_alpha; params.m_use_alpha_blocks = p.m_use_both_block_types; params.m_use_transparent_indices_for_black = p.m_use_transparent_indices_for_black; params.m_dxt1a_alpha_threshold = p.m_dxt1a_alpha_threshold; params.m_pPixels = pPixels; params.m_num_pixels = cDXTBlockSize * cDXTBlockSize; params.m_endpoint_caching = p.m_endpoint_caching; if ((m_format != cDXT1) && (m_format != cDXT1A)) params.m_use_alpha_blocks = false; if (!dxt1_optimizer.compute(params, results)) { CRNLIB_ASSERT(0); break; } pDXT1_block->set_low_color(results.m_low_color); pDXT1_block->set_high_color(results.m_high_color); for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) pDXT1_block->set_selector(i & 3, i >> 2, selectors[i]); break; } case cAlphaDXT5: { dxt5_block* pDXT5_block = reinterpret_cast(pElement); dxt5_endpoint_optimizer::results results; uint8 selectors[cDXTBlockSize * cDXTBlockSize]; results.m_pSelectors = selectors; dxt5_endpoint_optimizer::params params; params.m_block_index = block_x + block_y * m_blocks_x; params.m_pPixels = pPixels; params.m_num_pixels = cDXTBlockSize * cDXTBlockSize; params.m_comp_index = m_element_component_index[element_index]; params.m_quality = p.m_quality; params.m_use_both_block_types = p.m_use_both_block_types; if (!dxt5_optimizer.compute(params, results)) { CRNLIB_ASSERT(0); break; } pDXT5_block->set_low_alpha(results.m_first_endpoint); pDXT5_block->set_high_alpha(results.m_second_endpoint); for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) pDXT5_block->set_selector(i & 3, i >> 2, selectors[i]); break; } case cAlphaDXT3: { const int comp_index = m_element_component_index[element_index]; dxt3_block* pDXT3_block = reinterpret_cast(pElement); for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) pDXT3_block->set_alpha(i & 3, i >> 2, pPixels[i][comp_index], true); break; } default: break; } } } } void dxt_image::get_block_endpoints(uint block_x, uint block_y, uint element_index, uint& packed_low_endpoint, uint& packed_high_endpoint) const { const element& block = get_element(block_x, block_y, element_index); switch (m_element_type[element_index]) { case cColorETC1: { const etc1_block& src_block = *reinterpret_cast(&block); if (src_block.get_diff_bit()) { packed_low_endpoint = src_block.get_base5_color(); packed_high_endpoint = src_block.get_delta3_color(); } else { packed_low_endpoint = src_block.get_base4_color(0); packed_high_endpoint = src_block.get_base4_color(1); } break; } case cColorDXT1: { const dxt1_block& block1 = *reinterpret_cast(&block); packed_low_endpoint = block1.get_low_color(); packed_high_endpoint = block1.get_high_color(); break; } case cAlphaDXT5: { const dxt5_block& block5 = *reinterpret_cast(&block); packed_low_endpoint = block5.get_low_alpha(); packed_high_endpoint = block5.get_high_alpha(); break; } case cAlphaDXT3: { packed_low_endpoint = 0; packed_high_endpoint = 255; break; } default: break; } } int dxt_image::get_block_endpoints(uint block_x, uint block_y, uint element_index, color_quad_u8& low_endpoint, color_quad_u8& high_endpoint, bool scaled) const { uint l = 0, h = 0; get_block_endpoints(block_x, block_y, element_index, l, h); switch (m_element_type[element_index]) { case cColorETC1: { const etc1_block& src_block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); if (src_block.get_diff_bit()) { low_endpoint = etc1_block::unpack_color5(static_cast(l), scaled); etc1_block::unpack_color5(high_endpoint, static_cast(l), static_cast(h), scaled); } else { low_endpoint = etc1_block::unpack_color4(static_cast(l), scaled); high_endpoint = etc1_block::unpack_color4(static_cast(h), scaled); } return -1; } case cColorDXT1: { uint r, g, b; dxt1_block::unpack_color(r, g, b, static_cast(l), scaled); low_endpoint.r = static_cast(r); low_endpoint.g = static_cast(g); low_endpoint.b = static_cast(b); dxt1_block::unpack_color(r, g, b, static_cast(h), scaled); high_endpoint.r = static_cast(r); high_endpoint.g = static_cast(g); high_endpoint.b = static_cast(b); return -1; } case cAlphaDXT5: { const int component = m_element_component_index[element_index]; low_endpoint[component] = static_cast(l); high_endpoint[component] = static_cast(h); return component; } case cAlphaDXT3: { const int component = m_element_component_index[element_index]; low_endpoint[component] = static_cast(l); high_endpoint[component] = static_cast(h); return component; } default: break; } return 0; } uint dxt_image::get_block_colors(uint block_x, uint block_y, uint element_index, color_quad_u8* pColors, uint subblock_index) { const element& block = get_element(block_x, block_y, element_index); switch (m_element_type[element_index]) { case cColorETC1: { const etc1_block& src_block = *reinterpret_cast(&get_element(block_x, block_y, element_index)); const uint table_index0 = src_block.get_inten_table(0); const uint table_index1 = src_block.get_inten_table(1); if (src_block.get_diff_bit()) { const uint16 base_color5 = src_block.get_base5_color(); const uint16 delta_color3 = src_block.get_delta3_color(); if (subblock_index) etc1_block::get_diff_subblock_colors(pColors, base_color5, delta_color3, table_index1); else etc1_block::get_diff_subblock_colors(pColors, base_color5, table_index0); } else { if (subblock_index) { const uint16 base_color4_1 = src_block.get_base4_color(1); etc1_block::get_abs_subblock_colors(pColors, base_color4_1, table_index1); } else { const uint16 base_color4_0 = src_block.get_base4_color(0); etc1_block::get_abs_subblock_colors(pColors, base_color4_0, table_index0); } } break; } case cColorDXT1: { const dxt1_block& block1 = *reinterpret_cast(&block); return dxt1_block::get_block_colors(pColors, static_cast(block1.get_low_color()), static_cast(block1.get_high_color())); } case cAlphaDXT5: { const dxt5_block& block5 = *reinterpret_cast(&block); uint values[cDXT5SelectorValues]; const uint n = dxt5_block::get_block_values(values, block5.get_low_alpha(), block5.get_high_alpha()); const int comp_index = m_element_component_index[element_index]; for (uint i = 0; i < n; i++) pColors[i][comp_index] = static_cast(values[i]); return n; } case cAlphaDXT3: { const int comp_index = m_element_component_index[element_index]; for (uint i = 0; i < 16; i++) pColors[i][comp_index] = static_cast((i << 4) | i); return 16; } default: break; } return 0; } uint dxt_image::get_subblock_index(uint x, uint y, uint element_index) const { if (m_element_type[element_index] != cColorETC1) return 0; const uint block_x = x >> cDXTBlockShift; const uint block_y = y >> cDXTBlockShift; const element& block = get_element(block_x, block_y, element_index); const etc1_block& src_block = *reinterpret_cast(&block); if (src_block.get_flip_bit()) { return ((y & 3) >= 2) ? 1 : 0; } else { return ((x & 3) >= 2) ? 1 : 0; } } uint dxt_image::get_total_subblocks(uint element_index) const { return (m_element_type[element_index] == cColorETC1) ? 2 : 0; } uint dxt_image::get_selector(uint x, uint y, uint element_index) const { CRNLIB_ASSERT((x < m_width) && (y < m_height)); const uint block_x = x >> cDXTBlockShift; const uint block_y = y >> cDXTBlockShift; const element& block = get_element(block_x, block_y, element_index); switch (m_element_type[element_index]) { case cColorETC1: { const etc1_block& src_block = *reinterpret_cast(&block); return src_block.get_selector(x & 3, y & 3); } case cColorDXT1: { const dxt1_block& block1 = *reinterpret_cast(&block); return block1.get_selector(x & 3, y & 3); } case cAlphaDXT5: { const dxt5_block& block5 = *reinterpret_cast(&block); return block5.get_selector(x & 3, y & 3); } case cAlphaDXT3: { const dxt3_block& block3 = *reinterpret_cast(&block); return block3.get_alpha(x & 3, y & 3, false); } default: break; } return 0; } void dxt_image::change_dxt1_to_dxt1a() { if (m_format == cDXT1) m_format = cDXT1A; } void dxt_image::flip_col(uint x) { const uint other_x = (m_blocks_x - 1) - x; for (uint y = 0; y < m_blocks_y; y++) { for (uint e = 0; e < get_elements_per_block(); e++) { element tmp[2] = {get_element(x, y, e), get_element(other_x, y, e)}; for (uint i = 0; i < 2; i++) { switch (get_element_type(e)) { case cColorDXT1: reinterpret_cast(&tmp[i])->flip_x(); break; case cAlphaDXT3: reinterpret_cast(&tmp[i])->flip_x(); break; case cAlphaDXT5: reinterpret_cast(&tmp[i])->flip_x(); break; default: CRNLIB_ASSERT(0); break; } } get_element(x, y, e) = tmp[1]; get_element(other_x, y, e) = tmp[0]; } } } void dxt_image::flip_row(uint y) { const uint other_y = (m_blocks_y - 1) - y; for (uint x = 0; x < m_blocks_x; x++) { for (uint e = 0; e < get_elements_per_block(); e++) { element tmp[2] = {get_element(x, y, e), get_element(x, other_y, e)}; for (uint i = 0; i < 2; i++) { switch (get_element_type(e)) { case cColorDXT1: reinterpret_cast(&tmp[i])->flip_y(); break; case cAlphaDXT3: reinterpret_cast(&tmp[i])->flip_y(); break; case cAlphaDXT5: reinterpret_cast(&tmp[i])->flip_y(); break; default: CRNLIB_ASSERT(0); break; } } get_element(x, y, e) = tmp[1]; get_element(x, other_y, e) = tmp[0]; } } } bool dxt_image::can_flip(uint axis_index) { if (m_format == cETC1 || m_format == cETC2 || m_format == cETC2A || m_format == cETC1S || m_format == cETC2AS) { // Can't reliably flip ETCn textures (because of asymmetry in the 555/333 differential coding of subblock colors). return false; } uint d; if (axis_index) d = m_height; else d = m_width; if (d & 3) { if (d > 4) return false; } return true; } bool dxt_image::flip_x() { if (m_format == cETC1 || m_format == cETC2 || m_format == cETC2A || m_format == cETC1S || m_format == cETC2AS) { // Can't reliably flip ETCn textures (because of asymmetry in the 555/333 differential coding of subblock colors). return false; } if ((m_width & 3) && (m_width > 4)) return false; if (m_width == 1) return true; const uint mid_x = m_blocks_x / 2; for (uint x = 0; x < mid_x; x++) flip_col(x); if (m_blocks_x & 1) { const uint w = math::minimum(m_width, 4U); for (uint y = 0; y < m_blocks_y; y++) { for (uint e = 0; e < get_elements_per_block(); e++) { element tmp(get_element(mid_x, y, e)); switch (get_element_type(e)) { case cColorDXT1: reinterpret_cast(&tmp)->flip_x(w, 4); break; case cAlphaDXT3: reinterpret_cast(&tmp)->flip_x(w, 4); break; case cAlphaDXT5: reinterpret_cast(&tmp)->flip_x(w, 4); break; default: CRNLIB_ASSERT(0); break; } get_element(mid_x, y, e) = tmp; } } } return true; } bool dxt_image::flip_y() { if (m_format == cETC1 || m_format == cETC2 || m_format == cETC2A || m_format == cETC1S || m_format == cETC2AS) { // Can't reliably flip ETCn textures (because of asymmetry in the 555/333 differential coding of subblock colors). return false; } if ((m_height & 3) && (m_height > 4)) return false; if (m_height == 1) return true; const uint mid_y = m_blocks_y / 2; for (uint y = 0; y < mid_y; y++) flip_row(y); if (m_blocks_y & 1) { const uint h = math::minimum(m_height, 4U); for (uint x = 0; x < m_blocks_x; x++) { for (uint e = 0; e < get_elements_per_block(); e++) { element tmp(get_element(x, mid_y, e)); switch (get_element_type(e)) { case cColorDXT1: reinterpret_cast(&tmp)->flip_y(4, h); break; case cAlphaDXT3: reinterpret_cast(&tmp)->flip_y(4, h); break; case cAlphaDXT5: reinterpret_cast(&tmp)->flip_y(4, h); break; default: CRNLIB_ASSERT(0); break; } get_element(x, mid_y, e) = tmp; } } } return true; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_dxt_image.h000066400000000000000000000210461503722002600222470ustar00rootroot00000000000000// File: crn_dxt_image.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_dxt1.h" #include "crn_dxt5a.h" #include "crn_etc.h" #if CRNLIB_SUPPORT_ETC_A1 #include "crn_etc_a1.h" #endif #include "crn_image.h" #define CRNLIB_SUPPORT_ATI_COMPRESS 0 namespace crnlib { class task_pool; class dxt_image { public: dxt_image(); dxt_image(const dxt_image& other); dxt_image& operator=(const dxt_image& rhs); void clear(); inline bool is_valid() const { return m_blocks_x > 0; } uint get_width() const { return m_width; } uint get_height() const { return m_height; } uint get_blocks_x() const { return m_blocks_x; } uint get_blocks_y() const { return m_blocks_y; } uint get_total_blocks() const { return m_blocks_x * m_blocks_y; } uint get_elements_per_block() const { return m_num_elements_per_block; } uint get_bytes_per_block() const { return m_bytes_per_block; } dxt_format get_format() const { return m_format; } bool has_color() const { return (m_format == cDXT1) || (m_format == cDXT1A) || (m_format == cDXT3) || (m_format == cDXT5) || (m_format == cETC1) || (m_format == cETC2) || (m_format == cETC2A) || (m_format == cETC1S) || (m_format == cETC2AS); } // Will be pretty slow if the image is DXT1, as this method scans for alpha blocks/selectors. bool has_alpha() const; enum element_type { cUnused = 0, cColorDXT1, // DXT1 color block cAlphaDXT3, // DXT3 alpha block (only) cAlphaDXT5, // DXT5 alpha block (only) cColorETC1, // ETC1 color block cColorETC2, // ETC2 color block cAlphaETC2, // ETC2 alpha block (only) }; element_type get_element_type(uint element_index) const { CRNLIB_ASSERT(element_index < m_num_elements_per_block); return m_element_type[element_index]; } //Returns -1 for RGB, or [0,3] int8 get_element_component_index(uint element_index) const { CRNLIB_ASSERT(element_index < m_num_elements_per_block); return m_element_component_index[element_index]; } struct element { uint8 m_bytes[8]; uint get_le_word(uint index) const { CRNLIB_ASSERT(index < 4); return m_bytes[index * 2] | (m_bytes[index * 2 + 1] << 8); } uint get_be_word(uint index) const { CRNLIB_ASSERT(index < 4); return m_bytes[index * 2 + 1] | (m_bytes[index * 2] << 8); } void set_le_word(uint index, uint val) { CRNLIB_ASSERT((index < 4) && (val <= cUINT16_MAX)); m_bytes[index * 2] = static_cast(val & 0xFF); m_bytes[index * 2 + 1] = static_cast((val >> 8) & 0xFF); } void set_be_word(uint index, uint val) { CRNLIB_ASSERT((index < 4) && (val <= cUINT16_MAX)); m_bytes[index * 2 + 1] = static_cast(val & 0xFF); m_bytes[index * 2] = static_cast((val >> 8) & 0xFF); } void clear() { memset(this, 0, sizeof(*this)); } }; typedef crnlib::vector element_vec; bool init(dxt_format fmt, uint width, uint height, bool clear_elements); bool init(dxt_format fmt, uint width, uint height, uint num_elements, element* pElements, bool create_copy); struct pack_params { pack_params() { clear(); } void clear() { m_quality = cCRNDXTQualityUber; m_perceptual = true; m_dithering = false; m_grayscale_sampling = false; m_use_both_block_types = true; m_endpoint_caching = true; m_compressor = cCRNDXTCompressorCRN; m_pProgress_callback = NULL; m_pProgress_callback_user_data_ptr = NULL; m_dxt1a_alpha_threshold = 128; m_num_helper_threads = 0; m_progress_start = 0; m_progress_range = 100; m_use_transparent_indices_for_black = false; m_pTask_pool = NULL; } void init(const crn_comp_params& params) { m_perceptual = (params.m_flags & cCRNCompFlagPerceptual) != 0; m_num_helper_threads = params.m_num_helper_threads; m_use_both_block_types = (params.m_flags & cCRNCompFlagUseBothBlockTypes) != 0; m_use_transparent_indices_for_black = (params.m_flags & cCRNCompFlagUseTransparentIndicesForBlack) != 0; m_dxt1a_alpha_threshold = params.m_dxt1a_alpha_threshold; m_quality = params.m_dxt_quality; m_endpoint_caching = (params.m_flags & cCRNCompFlagDisableEndpointCaching) == 0; m_grayscale_sampling = (params.m_flags & cCRNCompFlagGrayscaleSampling) != 0; m_compressor = params.m_dxt_compressor_type; } uint m_dxt1a_alpha_threshold; uint m_num_helper_threads; crn_dxt_quality m_quality; crn_dxt_compressor_type m_compressor; bool m_perceptual; bool m_dithering; bool m_grayscale_sampling; bool m_use_both_block_types; bool m_endpoint_caching; bool m_use_transparent_indices_for_black; typedef bool (*progress_callback_func)(uint percentage_complete, void* pUser_data_ptr); progress_callback_func m_pProgress_callback; void* m_pProgress_callback_user_data_ptr; uint m_progress_start; uint m_progress_range; task_pool* m_pTask_pool; }; bool init(dxt_format fmt, const image_u8& img, const pack_params& p = dxt_image::pack_params()); bool unpack(image_u8& img) const; void endian_swap(); uint get_total_elements() const { return m_elements.size(); } const element_vec& get_element_vec() const { return m_elements; } element_vec& get_element_vec() { return m_elements; } const element& get_element(uint block_x, uint block_y, uint element_index) const; element& get_element(uint block_x, uint block_y, uint element_index); const element* get_element_ptr() const { return m_pElements; } element* get_element_ptr() { return m_pElements; } uint get_size_in_bytes() const { return m_elements.size() * sizeof(element); } uint get_row_pitch_in_bytes() const { return m_blocks_x * m_bytes_per_block; } color_quad_u8 get_pixel(uint x, uint y) const; uint get_pixel_alpha(uint x, uint y, uint element_index) const; void set_pixel(uint x, uint y, const color_quad_u8& c, bool perceptual = true); // get_block_pixels() only sets those components stored in the image! bool get_block_pixels(uint block_x, uint block_y, color_quad_u8* pPixels) const; struct set_block_pixels_context { dxt1_endpoint_optimizer m_dxt1_optimizer; dxt5_endpoint_optimizer m_dxt5_optimizer; pack_etc1_block_context m_etc1_optimizer; #if CRNLIB_SUPPORT_ETC_A1 etc_a1::pack_etc1_block_context m_etc1_a1_optimizer; #endif }; void set_block_pixels(uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p, set_block_pixels_context& context); void set_block_pixels(uint block_x, uint block_y, const color_quad_u8* pPixels, const pack_params& p); void get_block_endpoints(uint block_x, uint block_y, uint element_index, uint& packed_low_endpoint, uint& packed_high_endpoint) const; // Returns a value representing the component(s) that where actually set, where -1 = RGB. // This method does not always set every component! int get_block_endpoints(uint block_x, uint block_y, uint element_index, color_quad_u8& low_endpoint, color_quad_u8& high_endpoint, bool scaled = true) const; // pColors should point to a 16 entry array, to handle DXT3. // Returns the number of block colors: 3, 4, 6, 8, or 16. uint get_block_colors(uint block_x, uint block_y, uint element_index, color_quad_u8* pColors, uint subblock_index = 0); uint get_subblock_index(uint x, uint y, uint element_index) const; uint get_total_subblocks(uint element_index) const; uint get_selector(uint x, uint y, uint element_index) const; void change_dxt1_to_dxt1a(); bool can_flip(uint axis_index); // Returns true if the texture can actually be flipped. bool flip_x(); bool flip_y(); private: element_vec m_elements; element* m_pElements; uint m_width; uint m_height; uint m_blocks_x; uint m_blocks_y; uint m_total_blocks; uint m_total_elements; uint m_num_elements_per_block; // 1 or 2 uint m_bytes_per_block; // 8 or 16 int8 m_element_component_index[2]; element_type m_element_type[2]; dxt_format m_format; // DXT1, 1A, 3, 5, N/3DC, or 5A bool init_internal(dxt_format fmt, uint width, uint height); void init_task(uint64 data, void* pData_ptr); #if CRNLIB_SUPPORT_ATI_COMPRESS bool init_ati_compress(dxt_format fmt, const image_u8& img, const pack_params& p); #endif void flip_col(uint x); void flip_row(uint y); }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_dynamic_stream.h000066400000000000000000000101471503722002600233050ustar00rootroot00000000000000// File: crn_dynamic_stream.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_data_stream.h" namespace crnlib { class dynamic_stream : public data_stream { public: dynamic_stream(uint initial_size, const char* pName = "dynamic_stream", uint attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable) : data_stream(pName, attribs), m_ofs(0) { open(initial_size, pName, attribs); } dynamic_stream(const void* pBuf, uint size, const char* pName = "dynamic_stream", uint attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable) : data_stream(pName, attribs), m_ofs(0) { open(pBuf, size, pName, attribs); } dynamic_stream() : data_stream(), m_ofs(0) { open(); } virtual ~dynamic_stream() { } bool open(uint initial_size = 0, const char* pName = "dynamic_stream", uint attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable) { close(); m_opened = true; m_buf.clear(); m_buf.resize(initial_size); m_ofs = 0; m_name.set(pName ? pName : "dynamic_stream"); m_attribs = static_cast(attribs); return true; } bool reopen(const char* pName, uint attribs) { if (!m_opened) { return open(0, pName, attribs); } m_name.set(pName ? pName : "dynamic_stream"); m_attribs = static_cast(attribs); return true; } bool open(const void* pBuf, uint size, const char* pName = "dynamic_stream", uint attribs = cDataStreamSeekable | cDataStreamWritable | cDataStreamReadable) { if (!m_opened) { m_opened = true; m_buf.resize(size); if (size) { CRNLIB_ASSERT(pBuf); memcpy(&m_buf[0], pBuf, size); } m_ofs = 0; m_name.set(pName ? pName : "dynamic_stream"); m_attribs = static_cast(attribs); return true; } return false; } virtual bool close() { if (m_opened) { m_opened = false; m_buf.clear(); m_ofs = 0; return true; } return false; } const crnlib::vector& get_buf() const { return m_buf; } crnlib::vector& get_buf() { return m_buf; } void reserve(uint size) { if (m_opened) { m_buf.reserve(size); } } virtual const void* get_ptr() const { return m_buf.empty() ? NULL : &m_buf[0]; } virtual uint read(void* pBuf, uint len) { CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); if ((!m_opened) || (!is_readable()) || (!len)) return 0; CRNLIB_ASSERT(m_ofs <= m_buf.size()); uint bytes_left = m_buf.size() - m_ofs; len = math::minimum(len, bytes_left); if (len) memcpy(pBuf, &m_buf[m_ofs], len); m_ofs += len; return len; } virtual uint write(const void* pBuf, uint len) { CRNLIB_ASSERT(pBuf && (len <= 0x7FFFFFFF)); if ((!m_opened) || (!is_writable()) || (!len)) return 0; CRNLIB_ASSERT(m_ofs <= m_buf.size()); uint new_ofs = m_ofs + len; if (new_ofs > m_buf.size()) m_buf.resize(new_ofs); memcpy(&m_buf[m_ofs], pBuf, len); m_ofs = new_ofs; return len; } virtual bool flush() { if (!m_opened) return false; return true; } virtual uint64 get_size() { if (!m_opened) return 0; return m_buf.size(); } virtual uint64 get_remaining() { if (!m_opened) return 0; CRNLIB_ASSERT(m_ofs <= m_buf.size()); return m_buf.size() - m_ofs; } virtual uint64 get_ofs() { if (!m_opened) return 0; return m_ofs; } virtual bool seek(int64 ofs, bool relative) { if ((!m_opened) || (!is_seekable())) return false; int64 new_ofs = relative ? (m_ofs + ofs) : ofs; if (new_ofs < 0) return false; else if (new_ofs > m_buf.size()) return false; m_ofs = static_cast(new_ofs); post_seek(); return true; } private: crnlib::vector m_buf; uint m_ofs; }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_dynamic_string.cpp000066400000000000000000000314121503722002600236510ustar00rootroot00000000000000// File: crn_dynamic_string.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_strutils.h" namespace crnlib { dynamic_string g_empty_dynamic_string; dynamic_string::dynamic_string(eVarArg, const char* p, ...) : m_buf_size(0), m_len(0), m_pStr(NULL) { CRNLIB_ASSERT(p); va_list args; va_start(args, p); format_args(p, args); va_end(args); } dynamic_string::dynamic_string(const char* p) : m_buf_size(0), m_len(0), m_pStr(NULL) { CRNLIB_ASSERT(p); set(p); } dynamic_string::dynamic_string(const char* p, uint len) : m_buf_size(0), m_len(0), m_pStr(NULL) { CRNLIB_ASSERT(p); set_from_buf(p, len); } dynamic_string::dynamic_string(const dynamic_string& other) : m_buf_size(0), m_len(0), m_pStr(NULL) { set(other); } void dynamic_string::clear() { check(); if (m_pStr) { crnlib_delete_array(m_pStr); m_pStr = NULL; m_len = 0; m_buf_size = 0; } } void dynamic_string::empty() { truncate(0); } void dynamic_string::optimize() { if (!m_len) clear(); else { uint min_buf_size = math::next_pow2((uint)m_len + 1); if (m_buf_size > min_buf_size) { char* p = crnlib_new_array(min_buf_size); memcpy(p, m_pStr, m_len + 1); crnlib_delete_array(m_pStr); m_pStr = p; m_buf_size = static_cast(min_buf_size); check(); } } } int dynamic_string::compare(const char* p, bool case_sensitive) const { CRNLIB_ASSERT(p); const int result = (case_sensitive ? strcmp : crnlib_stricmp)(get_ptr_priv(), p); if (result < 0) return -1; else if (result > 0) return 1; return 0; } int dynamic_string::compare(const dynamic_string& rhs, bool case_sensitive) const { return compare(rhs.get_ptr_priv(), case_sensitive); } dynamic_string& dynamic_string::set(const char* p, uint max_len) { CRNLIB_ASSERT(p); const uint len = math::minimum(max_len, static_cast(strlen(p))); CRNLIB_ASSERT(len < cUINT16_MAX); if ((!len) || (len >= cUINT16_MAX)) clear(); else if ((m_pStr) && (p >= m_pStr) && (p < (m_pStr + m_buf_size))) { if (m_pStr != p) memmove(m_pStr, p, len); m_pStr[len] = '\0'; m_len = static_cast(len); } else if (ensure_buf(len, false)) { m_len = static_cast(len); memcpy(m_pStr, p, m_len + 1); } check(); return *this; } dynamic_string& dynamic_string::set(const dynamic_string& other, uint max_len) { if (this == &other) { if (max_len < m_len) { m_pStr[max_len] = '\0'; m_len = static_cast(max_len); } } else { const uint len = math::minimum(max_len, other.m_len); if (!len) clear(); else if (ensure_buf(len, false)) { m_len = static_cast(len); memcpy(m_pStr, other.get_ptr_priv(), m_len); m_pStr[len] = '\0'; } } check(); return *this; } bool dynamic_string::set_len(uint new_len, char fill_char) { if ((new_len >= cUINT16_MAX) || (!fill_char)) { CRNLIB_ASSERT(0); return false; } uint cur_len = m_len; if (ensure_buf(new_len, true)) { if (new_len > cur_len) memset(m_pStr + cur_len, fill_char, new_len - cur_len); m_pStr[new_len] = 0; m_len = static_cast(new_len); check(); } return true; } dynamic_string& dynamic_string::set_from_raw_buf_and_assume_ownership(char* pBuf, uint buf_size_in_chars, uint len_in_chars) { CRNLIB_ASSERT(buf_size_in_chars <= cUINT16_MAX); CRNLIB_ASSERT(math::is_power_of_2(buf_size_in_chars) || (buf_size_in_chars == cUINT16_MAX)); CRNLIB_ASSERT((len_in_chars + 1) <= buf_size_in_chars); clear(); m_pStr = pBuf; m_buf_size = static_cast(buf_size_in_chars); m_len = static_cast(len_in_chars); check(); return *this; } dynamic_string& dynamic_string::set_from_buf(const void* pBuf, uint buf_size) { CRNLIB_ASSERT(pBuf); if (buf_size >= cUINT16_MAX) { clear(); return *this; } #ifdef CRNLIB_BUILD_DEBUG if ((buf_size) && (memchr(pBuf, 0, buf_size) != NULL)) { CRNLIB_ASSERT(0); clear(); return *this; } #endif if (ensure_buf(buf_size, false)) { if (buf_size) memcpy(m_pStr, pBuf, buf_size); m_pStr[buf_size] = 0; m_len = static_cast(buf_size); check(); } return *this; } dynamic_string& dynamic_string::set_char(uint index, char c) { CRNLIB_ASSERT(index <= m_len); if (!c) truncate(index); else if (index < m_len) { m_pStr[index] = c; check(); } else if (index == m_len) append_char(c); return *this; } dynamic_string& dynamic_string::append_char(char c) { if (ensure_buf(m_len + 1)) { m_pStr[m_len] = c; m_pStr[m_len + 1] = '\0'; m_len++; check(); } return *this; } dynamic_string& dynamic_string::truncate(uint new_len) { if (new_len < m_len) { m_pStr[new_len] = '\0'; m_len = static_cast(new_len); check(); } return *this; } dynamic_string& dynamic_string::tolower() { if (m_len) { crnlib_strnlwr(get_ptr_priv(), m_buf_size); } return *this; } dynamic_string& dynamic_string::toupper() { if (m_len) { crnlib_strnupr(get_ptr_priv(), m_buf_size); } return *this; } dynamic_string& dynamic_string::append(const char* p) { CRNLIB_ASSERT(p); uint len = static_cast(strlen(p)); uint new_total_len = m_len + len; if ((new_total_len) && ensure_buf(new_total_len)) { memcpy(m_pStr + m_len, p, len + 1); m_len = static_cast(m_len + len); check(); } return *this; } dynamic_string& dynamic_string::append(const dynamic_string& other) { uint len = other.m_len; uint new_total_len = m_len + len; if ((new_total_len) && ensure_buf(new_total_len)) { memcpy(m_pStr + m_len, other.get_ptr_priv(), len + 1); m_len = static_cast(m_len + len); check(); } return *this; } dynamic_string operator+(const char* p, const dynamic_string& a) { return dynamic_string(p).append(a); } dynamic_string operator+(const dynamic_string& a, const char* p) { return dynamic_string(a).append(p); } dynamic_string operator+(const dynamic_string& a, const dynamic_string& b) { return dynamic_string(a).append(b); } dynamic_string& dynamic_string::format_args(const char* p, va_list args) { CRNLIB_ASSERT(p); const uint cBufSize = 4096; char buf[cBufSize]; #if defined(_WIN32) int l = vsnprintf_s(buf, cBufSize, _TRUNCATE, p, args); #else int l = crnlib_vsnprintf(buf, cBufSize, p, args); #endif if (l <= 0) clear(); else if (ensure_buf(l, false)) { memcpy(m_pStr, buf, l + 1); m_len = static_cast(l); check(); } return *this; } dynamic_string& dynamic_string::format(const char* p, ...) { CRNLIB_ASSERT(p); va_list args; va_start(args, p); format_args(p, args); va_end(args); return *this; } dynamic_string& dynamic_string::crop(uint start, uint len) { if (start >= m_len) { clear(); return *this; } len = math::minimum(len, m_len - start); if (start) memmove(get_ptr_priv(), get_ptr_priv() + start, len); m_pStr[len] = '\0'; m_len = static_cast(len); check(); return *this; } dynamic_string& dynamic_string::substring(uint start, uint end) { CRNLIB_ASSERT(start <= end); if (start > end) return *this; return crop(start, end - start); } dynamic_string& dynamic_string::left(uint len) { return substring(0, len); } dynamic_string& dynamic_string::mid(uint start, uint len) { return crop(start, len); } dynamic_string& dynamic_string::right(uint start) { return substring(start, get_len()); } dynamic_string& dynamic_string::tail(uint num) { return substring(math::maximum(static_cast(get_len()) - static_cast(num), 0), get_len()); } dynamic_string& dynamic_string::unquote() { if (m_len >= 2) { if (((*this)[0] == '\"') && ((*this)[m_len - 1] == '\"')) { return mid(1, m_len - 2); } } return *this; } int dynamic_string::find_left(const char* p, bool case_sensitive) const { CRNLIB_ASSERT(p); const int p_len = (int)strlen(p); for (int i = 0; i <= (m_len - p_len); i++) if ((case_sensitive ? strncmp : crnlib_strnicmp)(p, &m_pStr[i], p_len) == 0) return i; return -1; } bool dynamic_string::contains(const char* p, bool case_sensitive) const { return find_left(p, case_sensitive) >= 0; } uint dynamic_string::count_char(char c) const { uint count = 0; for (uint i = 0; i < m_len; i++) if (m_pStr[i] == c) count++; return count; } int dynamic_string::find_left(char c) const { for (uint i = 0; i < m_len; i++) if (m_pStr[i] == c) return i; return -1; } int dynamic_string::find_right(char c) const { for (int i = (int)m_len - 1; i >= 0; i--) if (m_pStr[i] == c) return i; return -1; } int dynamic_string::find_right(const char* p, bool case_sensitive) const { CRNLIB_ASSERT(p); const int p_len = (int)strlen(p); for (int i = m_len - p_len; i >= 0; i--) if ((case_sensitive ? strncmp : crnlib_strnicmp)(p, &m_pStr[i], p_len) == 0) return i; return -1; } dynamic_string& dynamic_string::trim() { int s, e; for (s = 0; s < (int)m_len; s++) if (!isspace(m_pStr[s])) break; for (e = m_len - 1; e > s; e--) if (!isspace(m_pStr[e])) break; return crop(s, e - s + 1); } dynamic_string& dynamic_string::trim_crlf() { int s = 0, e; for (e = m_len - 1; e > s; e--) if ((m_pStr[e] != 13) && (m_pStr[e] != 10)) break; return crop(s, e - s + 1); } dynamic_string& dynamic_string::remap(int from_char, int to_char) { for (uint i = 0; i < m_len; i++) if (m_pStr[i] == from_char) m_pStr[i] = (char)to_char; return *this; } #ifdef CRNLIB_BUILD_DEBUG void dynamic_string::check() const { if (!m_pStr) { CRNLIB_ASSERT(!m_buf_size && !m_len); } else { CRNLIB_ASSERT(m_buf_size); CRNLIB_ASSERT((m_buf_size == cUINT16_MAX) || math::is_power_of_2((uint32)m_buf_size)); CRNLIB_ASSERT(m_len < m_buf_size); CRNLIB_ASSERT(!m_pStr[m_len]); #if CRNLIB_SLOW_STRING_LEN_CHECKS CRNLIB_ASSERT(strlen(m_pStr) == m_len); #endif } } #endif bool dynamic_string::ensure_buf(uint len, bool preserve_contents) { uint buf_size_needed = len + 1; CRNLIB_ASSERT(buf_size_needed <= cUINT16_MAX); if (buf_size_needed <= cUINT16_MAX) { if (buf_size_needed > m_buf_size) expand_buf(buf_size_needed, preserve_contents); } return m_buf_size >= buf_size_needed; } bool dynamic_string::expand_buf(uint new_buf_size, bool preserve_contents) { new_buf_size = math::minimum(cUINT16_MAX, math::next_pow2(math::maximum(m_buf_size, new_buf_size))); if (new_buf_size != m_buf_size) { char* p = crnlib_new_array(new_buf_size); if (preserve_contents) memcpy(p, get_ptr_priv(), m_len + 1); crnlib_delete_array(m_pStr); m_pStr = p; m_buf_size = static_cast(new_buf_size); if (preserve_contents) check(); } return m_buf_size >= new_buf_size; } void dynamic_string::swap(dynamic_string& other) { utils::swap(other.m_buf_size, m_buf_size); utils::swap(other.m_len, m_len); utils::swap(other.m_pStr, m_pStr); } int dynamic_string::serialize(void* pBuf, uint buf_size, bool little_endian) const { uint buf_left = buf_size; //if (m_len > cUINT16_MAX) // return -1; CRNLIB_ASSUME(sizeof(m_len) == sizeof(uint16)); if (!utils::write_val((uint16)m_len, pBuf, buf_left, little_endian)) return -1; if (buf_left < m_len) return -1; memcpy(pBuf, get_ptr(), m_len); buf_left -= m_len; return buf_size - buf_left; } int dynamic_string::deserialize(const void* pBuf, uint buf_size, bool little_endian) { uint buf_left = buf_size; if (buf_left < sizeof(uint16)) return -1; uint16 l; if (!utils::read_obj(l, pBuf, buf_left, little_endian)) return -1; if (buf_left < l) return -1; set_from_buf(pBuf, l); buf_left -= l; return buf_size - buf_left; } void dynamic_string::translate_lf_to_crlf() { if (find_left(0x0A) < 0) return; dynamic_string tmp; tmp.ensure_buf(m_len + 2); // normal sequence is 0x0D 0x0A (CR LF, \r\n) int prev_char = -1; for (uint i = 0; i < get_len(); i++) { const int cur_char = (*this)[i]; if ((cur_char == 0x0A) && (prev_char != 0x0D)) tmp.append_char(0x0D); tmp.append_char(cur_char); prev_char = cur_char; } swap(tmp); } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_dynamic_string.h000066400000000000000000000146351503722002600233260ustar00rootroot00000000000000// File: crn_dynamic_string.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once namespace crnlib { enum { cMaxDynamicStringLen = cUINT16_MAX - 1 }; class dynamic_string { public: inline dynamic_string() : m_buf_size(0), m_len(0), m_pStr(NULL) {} dynamic_string(eVarArg dummy, const char* p, ...); dynamic_string(const char* p); dynamic_string(const char* p, uint len); dynamic_string(const dynamic_string& other); inline ~dynamic_string() { if (m_pStr) crnlib_delete_array(m_pStr); } // Truncates the string to 0 chars and frees the buffer. void clear(); void optimize(); // Truncates the string to 0 chars, but does not free the buffer. void empty(); inline const char* assume_ownership() { const char* p = m_pStr; m_pStr = NULL; m_len = 0; m_buf_size = 0; return p; } inline uint get_len() const { return m_len; } inline bool is_empty() const { return !m_len; } inline const char* get_ptr() const { return m_pStr ? m_pStr : ""; } inline const char* c_str() const { return get_ptr(); } inline const char* get_ptr_raw() const { return m_pStr; } inline char* get_ptr_raw() { return m_pStr; } inline char front() const { return m_len ? m_pStr[0] : '\0'; } inline char back() const { return m_len ? m_pStr[m_len - 1] : '\0'; } inline char operator[](uint i) const { CRNLIB_ASSERT(i <= m_len); return get_ptr()[i]; } inline operator size_t() const { return fast_hash(get_ptr(), m_len) ^ fast_hash(&m_len, sizeof(m_len)); } int compare(const char* p, bool case_sensitive = false) const; int compare(const dynamic_string& rhs, bool case_sensitive = false) const; inline bool operator==(const dynamic_string& rhs) const { return compare(rhs) == 0; } inline bool operator==(const char* p) const { return compare(p) == 0; } inline bool operator!=(const dynamic_string& rhs) const { return compare(rhs) != 0; } inline bool operator!=(const char* p) const { return compare(p) != 0; } inline bool operator<(const dynamic_string& rhs) const { return compare(rhs) < 0; } inline bool operator<(const char* p) const { return compare(p) < 0; } inline bool operator>(const dynamic_string& rhs) const { return compare(rhs) > 0; } inline bool operator>(const char* p) const { return compare(p) > 0; } inline bool operator<=(const dynamic_string& rhs) const { return compare(rhs) <= 0; } inline bool operator<=(const char* p) const { return compare(p) <= 0; } inline bool operator>=(const dynamic_string& rhs) const { return compare(rhs) >= 0; } inline bool operator>=(const char* p) const { return compare(p) >= 0; } friend inline bool operator==(const char* p, const dynamic_string& rhs) { return rhs.compare(p) == 0; } dynamic_string& set(const char* p, uint max_len = UINT_MAX); dynamic_string& set(const dynamic_string& other, uint max_len = UINT_MAX); bool set_len(uint new_len, char fill_char = ' '); // Set from non-zero terminated buffer. dynamic_string& set_from_buf(const void* pBuf, uint buf_size); dynamic_string& operator=(const dynamic_string& rhs) { return set(rhs); } dynamic_string& operator=(const char* p) { return set(p); } dynamic_string& set_char(uint index, char c); dynamic_string& append_char(char c); dynamic_string& append_char(int c) { CRNLIB_ASSERT((c >= 0) && (c <= 255)); return append_char(static_cast(c)); } dynamic_string& truncate(uint new_len); dynamic_string& tolower(); dynamic_string& toupper(); dynamic_string& append(const char* p); dynamic_string& append(const dynamic_string& other); dynamic_string& operator+=(const char* p) { return append(p); } dynamic_string& operator+=(const dynamic_string& other) { return append(other); } friend dynamic_string operator+(const char* p, const dynamic_string& a); friend dynamic_string operator+(const dynamic_string& a, const char* p); friend dynamic_string operator+(const dynamic_string& a, const dynamic_string& b); dynamic_string& format_args(const char* p, va_list args); dynamic_string& format(const char* p, ...); dynamic_string& crop(uint start, uint len); dynamic_string& substring(uint start, uint end); dynamic_string& left(uint len); dynamic_string& mid(uint start, uint len); dynamic_string& right(uint start); dynamic_string& tail(uint num); dynamic_string& unquote(); uint count_char(char c) const; int find_left(const char* p, bool case_sensitive = false) const; int find_left(char c) const; int find_right(char c) const; int find_right(const char* p, bool case_sensitive = false) const; bool contains(const char* p, bool case_sensitive = false) const; dynamic_string& trim(); dynamic_string& trim_crlf(); dynamic_string& remap(int from_char, int to_char); void swap(dynamic_string& other); // Returns -1 on failure, or the number of bytes written. int serialize(void* pBuf, uint buf_size, bool little_endian) const; // Returns -1 on failure, or the number of bytes read. int deserialize(const void* pBuf, uint buf_size, bool little_endian); void translate_lf_to_crlf(); static inline char* create_raw_buffer(uint& buf_size_in_chars); static inline void free_raw_buffer(char* p) { crnlib_delete_array(p); } dynamic_string& set_from_raw_buf_and_assume_ownership(char* pBuf, uint buf_size_in_chars, uint len_in_chars); private: uint16 m_buf_size; uint16 m_len; char* m_pStr; #ifdef CRNLIB_BUILD_DEBUG void check() const; #else inline void check() const {} #endif bool expand_buf(uint new_buf_size, bool preserve_contents); const char* get_ptr_priv() const { return m_pStr ? m_pStr : ""; } char* get_ptr_priv() { return (char*)(m_pStr ? m_pStr : ""); } bool ensure_buf(uint len, bool preserve_contents = true); }; typedef crnlib::vector dynamic_string_array; extern dynamic_string g_empty_dynamic_string; CRNLIB_DEFINE_BITWISE_MOVABLE(dynamic_string); inline void swap(dynamic_string& a, dynamic_string& b) { a.swap(b); } inline char* dynamic_string::create_raw_buffer(uint& buf_size_in_chars) { if (buf_size_in_chars > cUINT16_MAX) { CRNLIB_ASSERT(0); return NULL; } buf_size_in_chars = math::minimum(cUINT16_MAX, math::next_pow2(buf_size_in_chars)); return crnlib_new_array(buf_size_in_chars); } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_etc.cpp000066400000000000000000002013221503722002600214110ustar00rootroot00000000000000// File: crn_etc.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_etc.h" #include "crn_radix_sort.h" #include "crn_ryg_dxt.hpp" namespace crnlib { const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = { {-8, -2, 2, 8}, {-17, -5, 5, 17}, {-29, -9, 9, 29}, {-42, -13, 13, 42}, {-60, -18, 18, 60}, {-80, -24, 24, 80}, {-106, -33, 33, 106}, {-183, -47, 47, 183}}; const uint8 g_etc1_to_selector_index[cETC1SelectorValues] = {2, 3, 1, 0}; const uint8 g_selector_index_to_etc1[cETC1SelectorValues] = {3, 2, 0, 1}; // [flip][subblock][pixel_index] const etc1_coord2 g_etc1_pixel_coords[2][2][8] = { {{{0, 0}, {0, 1}, {0, 2}, {0, 3}, {1, 0}, {1, 1}, {1, 2}, {1, 3}}, {{2, 0}, {2, 1}, {2, 2}, {2, 3}, {3, 0}, {3, 1}, {3, 2}, {3, 3}}}, { {{0, 0}, {1, 0}, {2, 0}, {3, 0}, {0, 1}, {1, 1}, {2, 1}, {3, 1}}, {{0, 2}, {1, 2}, {2, 2}, {3, 2}, {0, 3}, {1, 3}, {2, 3}, {3, 3}}, }}; // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte. static uint16 g_etc1_inverse_lookup[2 * 8 * 4][256]; // [diff/inten_table/selector][desired_color] // g_color8_to_etc_block_config[color][table_index] = Supplies for each 8-bit color value a list of packed ETC1 diff/intensity table/selectors/packed_colors that map to that color. // To pack: diff | (inten << 1) | (selector << 4) | (packed_c << 8) static const uint16 g_color8_to_etc_block_config_0_255[2][33] = { {0x0000, 0x0010, 0x0002, 0x0012, 0x0004, 0x0014, 0x0006, 0x0016, 0x0008, 0x0018, 0x000A, 0x001A, 0x000C, 0x001C, 0x000E, 0x001E, 0x0001, 0x0011, 0x0003, 0x0013, 0x0005, 0x0015, 0x0007, 0x0017, 0x0009, 0x0019, 0x000B, 0x001B, 0x000D, 0x001D, 0x000F, 0x001F, 0xFFFF}, {0x0F20, 0x0F30, 0x0E32, 0x0F22, 0x0E34, 0x0F24, 0x0D36, 0x0F26, 0x0C38, 0x0E28, 0x0B3A, 0x0E2A, 0x093C, 0x0E2C, 0x053E, 0x0D2E, 0x1E31, 0x1F21, 0x1D33, 0x1F23, 0x1C35, 0x1E25, 0x1A37, 0x1E27, 0x1839, 0x1D29, 0x163B, 0x1C2B, 0x133D, 0x1B2D, 0x093F, 0x1A2F, 0xFFFF}, }; // Really only [254][11]. static const uint16 g_color8_to_etc_block_config_1_to_254[254][12] = { {0x021C, 0x0D0D, 0xFFFF}, {0x0020, 0x0021, 0x0A0B, 0x061F, 0xFFFF}, {0x0113, 0x0217, 0xFFFF}, {0x0116, 0x031E, 0x0B0E, 0x0405, 0xFFFF}, {0x0022, 0x0204, 0x050A, 0x0023, 0xFFFF}, {0x0111, 0x0319, 0x0809, 0x170F, 0xFFFF}, {0x0303, 0x0215, 0x0607, 0xFFFF}, {0x0030, 0x0114, 0x0408, 0x0031, 0x0201, 0x051D, 0xFFFF}, {0x0100, 0x0024, 0x0306, 0x0025, 0x041B, 0x0E0D, 0xFFFF}, {0x021A, 0x0121, 0x0B0B, 0x071F, 0xFFFF}, {0x0213, 0x0317, 0xFFFF}, {0x0112, 0x0505, 0xFFFF}, {0x0026, 0x070C, 0x0123, 0x0027, 0xFFFF}, {0x0211, 0x0909, 0xFFFF}, {0x0110, 0x0315, 0x0707, 0x0419, 0x180F, 0xFFFF}, {0x0218, 0x0131, 0x0301, 0x0403, 0x061D, 0xFFFF}, {0x0032, 0x0202, 0x0033, 0x0125, 0x051B, 0x0F0D, 0xFFFF}, {0x0028, 0x031C, 0x0221, 0x0029, 0xFFFF}, {0x0120, 0x0313, 0x0C0B, 0x081F, 0xFFFF}, {0x0605, 0x0417, 0xFFFF}, {0x0216, 0x041E, 0x0C0E, 0x0223, 0x0127, 0xFFFF}, {0x0122, 0x0304, 0x060A, 0x0311, 0x0A09, 0xFFFF}, {0x0519, 0x190F, 0xFFFF}, {0x002A, 0x0231, 0x0503, 0x0415, 0x0807, 0x002B, 0x071D, 0xFFFF}, {0x0130, 0x0214, 0x0508, 0x0401, 0x0133, 0x0225, 0x061B, 0xFFFF}, {0x0200, 0x0124, 0x0406, 0x0321, 0x0129, 0x100D, 0xFFFF}, {0x031A, 0x0D0B, 0x091F, 0xFFFF}, {0x0413, 0x0705, 0x0517, 0xFFFF}, {0x0212, 0x0034, 0x0323, 0x0035, 0x0227, 0xFFFF}, {0x0126, 0x080C, 0x0B09, 0xFFFF}, {0x0411, 0x0619, 0x1A0F, 0xFFFF}, {0x0210, 0x0331, 0x0603, 0x0515, 0x0907, 0x012B, 0xFFFF}, {0x0318, 0x002C, 0x0501, 0x0233, 0x0325, 0x071B, 0x002D, 0x081D, 0xFFFF}, {0x0132, 0x0302, 0x0229, 0x110D, 0xFFFF}, {0x0128, 0x041C, 0x0421, 0x0E0B, 0x0A1F, 0xFFFF}, {0x0220, 0x0513, 0x0617, 0xFFFF}, {0x0135, 0x0805, 0x0327, 0xFFFF}, {0x0316, 0x051E, 0x0D0E, 0x0423, 0xFFFF}, {0x0222, 0x0404, 0x070A, 0x0511, 0x0719, 0x0C09, 0x1B0F, 0xFFFF}, {0x0703, 0x0615, 0x0A07, 0x022B, 0xFFFF}, {0x012A, 0x0431, 0x0601, 0x0333, 0x012D, 0x091D, 0xFFFF}, {0x0230, 0x0314, 0x0036, 0x0608, 0x0425, 0x0037, 0x0329, 0x081B, 0x120D, 0xFFFF}, {0x0300, 0x0224, 0x0506, 0x0521, 0x0F0B, 0x0B1F, 0xFFFF}, {0x041A, 0x0613, 0x0717, 0xFFFF}, {0x0235, 0x0905, 0xFFFF}, {0x0312, 0x0134, 0x0523, 0x0427, 0xFFFF}, {0x0226, 0x090C, 0x002E, 0x0611, 0x0D09, 0x002F, 0xFFFF}, {0x0715, 0x0B07, 0x0819, 0x032B, 0x1C0F, 0xFFFF}, {0x0310, 0x0531, 0x0701, 0x0803, 0x022D, 0x0A1D, 0xFFFF}, {0x0418, 0x012C, 0x0433, 0x0525, 0x0137, 0x091B, 0x130D, 0xFFFF}, {0x0232, 0x0402, 0x0621, 0x0429, 0xFFFF}, {0x0228, 0x051C, 0x0713, 0x100B, 0x0C1F, 0xFFFF}, {0x0320, 0x0335, 0x0A05, 0x0817, 0xFFFF}, {0x0623, 0x0527, 0xFFFF}, {0x0416, 0x061E, 0x0E0E, 0x0711, 0x0E09, 0x012F, 0xFFFF}, {0x0322, 0x0504, 0x080A, 0x0919, 0x1D0F, 0xFFFF}, {0x0631, 0x0903, 0x0815, 0x0C07, 0x042B, 0x032D, 0x0B1D, 0xFFFF}, {0x022A, 0x0801, 0x0533, 0x0625, 0x0237, 0x0A1B, 0xFFFF}, {0x0330, 0x0414, 0x0136, 0x0708, 0x0721, 0x0529, 0x140D, 0xFFFF}, {0x0400, 0x0324, 0x0606, 0x0038, 0x0039, 0x110B, 0x0D1F, 0xFFFF}, {0x051A, 0x0813, 0x0B05, 0x0917, 0xFFFF}, {0x0723, 0x0435, 0x0627, 0xFFFF}, {0x0412, 0x0234, 0x0F09, 0x022F, 0xFFFF}, {0x0326, 0x0A0C, 0x012E, 0x0811, 0x0A19, 0x1E0F, 0xFFFF}, {0x0731, 0x0A03, 0x0915, 0x0D07, 0x052B, 0xFFFF}, {0x0410, 0x0901, 0x0633, 0x0725, 0x0337, 0x0B1B, 0x042D, 0x0C1D, 0xFFFF}, {0x0518, 0x022C, 0x0629, 0x150D, 0xFFFF}, {0x0332, 0x0502, 0x0821, 0x0139, 0x120B, 0x0E1F, 0xFFFF}, {0x0328, 0x061C, 0x0913, 0x0A17, 0xFFFF}, {0x0420, 0x0535, 0x0C05, 0x0727, 0xFFFF}, {0x0823, 0x032F, 0xFFFF}, {0x0516, 0x071E, 0x0F0E, 0x0911, 0x0B19, 0x1009, 0x1F0F, 0xFFFF}, {0x0422, 0x0604, 0x090A, 0x0B03, 0x0A15, 0x0E07, 0x062B, 0xFFFF}, {0x0831, 0x0A01, 0x0733, 0x052D, 0x0D1D, 0xFFFF}, {0x032A, 0x0825, 0x0437, 0x0729, 0x0C1B, 0x160D, 0xFFFF}, {0x0430, 0x0514, 0x0236, 0x0808, 0x0921, 0x0239, 0x130B, 0x0F1F, 0xFFFF}, {0x0500, 0x0424, 0x0706, 0x0138, 0x0A13, 0x0B17, 0xFFFF}, {0x061A, 0x0635, 0x0D05, 0xFFFF}, {0x0923, 0x0827, 0xFFFF}, {0x0512, 0x0334, 0x003A, 0x0A11, 0x1109, 0x003B, 0x042F, 0xFFFF}, {0x0426, 0x0B0C, 0x022E, 0x0B15, 0x0F07, 0x0C19, 0x072B, 0xFFFF}, {0x0931, 0x0B01, 0x0C03, 0x062D, 0x0E1D, 0xFFFF}, {0x0510, 0x0833, 0x0925, 0x0537, 0x0D1B, 0x170D, 0xFFFF}, {0x0618, 0x032C, 0x0A21, 0x0339, 0x0829, 0xFFFF}, {0x0432, 0x0602, 0x0B13, 0x140B, 0x101F, 0xFFFF}, {0x0428, 0x071C, 0x0735, 0x0E05, 0x0C17, 0xFFFF}, {0x0520, 0x0A23, 0x0927, 0xFFFF}, {0x0B11, 0x1209, 0x013B, 0x052F, 0xFFFF}, {0x0616, 0x081E, 0x0D19, 0xFFFF}, {0x0522, 0x0704, 0x0A0A, 0x0A31, 0x0D03, 0x0C15, 0x1007, 0x082B, 0x072D, 0x0F1D, 0xFFFF}, {0x0C01, 0x0933, 0x0A25, 0x0637, 0x0E1B, 0xFFFF}, {0x042A, 0x0B21, 0x0929, 0x180D, 0xFFFF}, {0x0530, 0x0614, 0x0336, 0x0908, 0x0439, 0x150B, 0x111F, 0xFFFF}, {0x0600, 0x0524, 0x0806, 0x0238, 0x0C13, 0x0F05, 0x0D17, 0xFFFF}, {0x071A, 0x0B23, 0x0835, 0x0A27, 0xFFFF}, {0x1309, 0x023B, 0x062F, 0xFFFF}, {0x0612, 0x0434, 0x013A, 0x0C11, 0x0E19, 0xFFFF}, {0x0526, 0x0C0C, 0x032E, 0x0B31, 0x0E03, 0x0D15, 0x1107, 0x092B, 0xFFFF}, {0x0D01, 0x0A33, 0x0B25, 0x0737, 0x0F1B, 0x082D, 0x101D, 0xFFFF}, {0x0610, 0x0A29, 0x190D, 0xFFFF}, {0x0718, 0x042C, 0x0C21, 0x0539, 0x160B, 0x121F, 0xFFFF}, {0x0532, 0x0702, 0x0D13, 0x0E17, 0xFFFF}, {0x0528, 0x081C, 0x0935, 0x1005, 0x0B27, 0xFFFF}, {0x0620, 0x0C23, 0x033B, 0x072F, 0xFFFF}, {0x0D11, 0x0F19, 0x1409, 0xFFFF}, {0x0716, 0x003C, 0x091E, 0x0F03, 0x0E15, 0x1207, 0x0A2B, 0x003D, 0xFFFF}, {0x0622, 0x0804, 0x0B0A, 0x0C31, 0x0E01, 0x0B33, 0x092D, 0x111D, 0xFFFF}, {0x0C25, 0x0837, 0x0B29, 0x101B, 0x1A0D, 0xFFFF}, {0x052A, 0x0D21, 0x0639, 0x170B, 0x131F, 0xFFFF}, {0x0630, 0x0714, 0x0436, 0x0A08, 0x0E13, 0x0F17, 0xFFFF}, {0x0700, 0x0624, 0x0906, 0x0338, 0x0A35, 0x1105, 0xFFFF}, {0x081A, 0x0D23, 0x0C27, 0xFFFF}, {0x0E11, 0x1509, 0x043B, 0x082F, 0xFFFF}, {0x0712, 0x0534, 0x023A, 0x0F15, 0x1307, 0x1019, 0x0B2B, 0x013D, 0xFFFF}, {0x0626, 0x0D0C, 0x042E, 0x0D31, 0x0F01, 0x1003, 0x0A2D, 0x121D, 0xFFFF}, {0x0C33, 0x0D25, 0x0937, 0x111B, 0x1B0D, 0xFFFF}, {0x0710, 0x0E21, 0x0739, 0x0C29, 0xFFFF}, {0x0818, 0x052C, 0x0F13, 0x180B, 0x141F, 0xFFFF}, {0x0632, 0x0802, 0x0B35, 0x1205, 0x1017, 0xFFFF}, {0x0628, 0x091C, 0x0E23, 0x0D27, 0xFFFF}, {0x0720, 0x0F11, 0x1609, 0x053B, 0x092F, 0xFFFF}, {0x1119, 0x023D, 0xFFFF}, {0x0816, 0x013C, 0x0A1E, 0x0E31, 0x1103, 0x1015, 0x1407, 0x0C2B, 0x0B2D, 0x131D, 0xFFFF}, {0x0722, 0x0904, 0x0C0A, 0x1001, 0x0D33, 0x0E25, 0x0A37, 0x121B, 0xFFFF}, {0x0F21, 0x0D29, 0x1C0D, 0xFFFF}, {0x062A, 0x0839, 0x190B, 0x151F, 0xFFFF}, {0x0730, 0x0814, 0x0536, 0x0B08, 0x1013, 0x1305, 0x1117, 0xFFFF}, {0x0800, 0x0724, 0x0A06, 0x0438, 0x0F23, 0x0C35, 0x0E27, 0xFFFF}, {0x091A, 0x1709, 0x063B, 0x0A2F, 0xFFFF}, {0x1011, 0x1219, 0x033D, 0xFFFF}, {0x0812, 0x0634, 0x033A, 0x0F31, 0x1203, 0x1115, 0x1507, 0x0D2B, 0xFFFF}, {0x0726, 0x0E0C, 0x052E, 0x1101, 0x0E33, 0x0F25, 0x0B37, 0x131B, 0x0C2D, 0x141D, 0xFFFF}, {0x0E29, 0x1D0D, 0xFFFF}, {0x0810, 0x1021, 0x0939, 0x1A0B, 0x161F, 0xFFFF}, {0x0918, 0x062C, 0x1113, 0x1217, 0xFFFF}, {0x0732, 0x0902, 0x0D35, 0x1405, 0x0F27, 0xFFFF}, {0x0728, 0x0A1C, 0x1023, 0x073B, 0x0B2F, 0xFFFF}, {0x0820, 0x1111, 0x1319, 0x1809, 0xFFFF}, {0x1303, 0x1215, 0x1607, 0x0E2B, 0x043D, 0xFFFF}, {0x0916, 0x023C, 0x0B1E, 0x1031, 0x1201, 0x0F33, 0x0D2D, 0x151D, 0xFFFF}, {0x0822, 0x0A04, 0x0D0A, 0x1025, 0x0C37, 0x0F29, 0x141B, 0x1E0D, 0xFFFF}, {0x1121, 0x0A39, 0x1B0B, 0x171F, 0xFFFF}, {0x072A, 0x1213, 0x1317, 0xFFFF}, {0x0830, 0x0914, 0x0636, 0x0C08, 0x0E35, 0x1505, 0xFFFF}, {0x0900, 0x0824, 0x0B06, 0x0538, 0x1123, 0x1027, 0xFFFF}, {0x0A1A, 0x1211, 0x1909, 0x083B, 0x0C2F, 0xFFFF}, {0x1315, 0x1707, 0x1419, 0x0F2B, 0x053D, 0xFFFF}, {0x0912, 0x0734, 0x043A, 0x1131, 0x1301, 0x1403, 0x0E2D, 0x161D, 0xFFFF}, {0x0826, 0x0F0C, 0x062E, 0x1033, 0x1125, 0x0D37, 0x151B, 0x1F0D, 0xFFFF}, {0x1221, 0x0B39, 0x1029, 0xFFFF}, {0x0910, 0x1313, 0x1C0B, 0x181F, 0xFFFF}, {0x0A18, 0x072C, 0x0F35, 0x1605, 0x1417, 0xFFFF}, {0x0832, 0x0A02, 0x1223, 0x1127, 0xFFFF}, {0x0828, 0x0B1C, 0x1311, 0x1A09, 0x093B, 0x0D2F, 0xFFFF}, {0x0920, 0x1519, 0x063D, 0xFFFF}, {0x1231, 0x1503, 0x1415, 0x1807, 0x102B, 0x0F2D, 0x171D, 0xFFFF}, {0x0A16, 0x033C, 0x0C1E, 0x1401, 0x1133, 0x1225, 0x0E37, 0x161B, 0xFFFF}, {0x0922, 0x0B04, 0x0E0A, 0x1321, 0x1129, 0xFFFF}, {0x0C39, 0x1D0B, 0x191F, 0xFFFF}, {0x082A, 0x1413, 0x1705, 0x1517, 0xFFFF}, {0x0930, 0x0A14, 0x0736, 0x0D08, 0x1323, 0x1035, 0x1227, 0xFFFF}, {0x0A00, 0x0924, 0x0C06, 0x0638, 0x1B09, 0x0A3B, 0x0E2F, 0xFFFF}, {0x0B1A, 0x1411, 0x1619, 0x073D, 0xFFFF}, {0x1331, 0x1603, 0x1515, 0x1907, 0x112B, 0xFFFF}, {0x0A12, 0x0834, 0x053A, 0x1501, 0x1233, 0x1325, 0x0F37, 0x171B, 0x102D, 0x181D, 0xFFFF}, {0x0926, 0x072E, 0x1229, 0xFFFF}, {0x1421, 0x0D39, 0x1E0B, 0x1A1F, 0xFFFF}, {0x0A10, 0x1513, 0x1617, 0xFFFF}, {0x0B18, 0x082C, 0x1135, 0x1805, 0x1327, 0xFFFF}, {0x0932, 0x0B02, 0x1423, 0x0B3B, 0x0F2F, 0xFFFF}, {0x0928, 0x0C1C, 0x1511, 0x1719, 0x1C09, 0xFFFF}, {0x0A20, 0x1703, 0x1615, 0x1A07, 0x122B, 0x083D, 0xFFFF}, {0x1431, 0x1601, 0x1333, 0x112D, 0x191D, 0xFFFF}, {0x0B16, 0x043C, 0x0D1E, 0x1425, 0x1037, 0x1329, 0x181B, 0xFFFF}, {0x0A22, 0x0C04, 0x0F0A, 0x1521, 0x0E39, 0x1F0B, 0x1B1F, 0xFFFF}, {0x1613, 0x1717, 0xFFFF}, {0x092A, 0x1235, 0x1905, 0xFFFF}, {0x0A30, 0x0B14, 0x0836, 0x0E08, 0x1523, 0x1427, 0xFFFF}, {0x0B00, 0x0A24, 0x0D06, 0x0738, 0x1611, 0x1D09, 0x0C3B, 0x102F, 0xFFFF}, {0x0C1A, 0x1715, 0x1B07, 0x1819, 0x132B, 0x093D, 0xFFFF}, {0x1531, 0x1701, 0x1803, 0x122D, 0x1A1D, 0xFFFF}, {0x0B12, 0x0934, 0x063A, 0x1433, 0x1525, 0x1137, 0x191B, 0xFFFF}, {0x0A26, 0x003E, 0x082E, 0x1621, 0x0F39, 0x1429, 0x003F, 0xFFFF}, {0x1713, 0x1C1F, 0xFFFF}, {0x0B10, 0x1335, 0x1A05, 0x1817, 0xFFFF}, {0x0C18, 0x092C, 0x1623, 0x1527, 0xFFFF}, {0x0A32, 0x0C02, 0x1711, 0x1E09, 0x0D3B, 0x112F, 0xFFFF}, {0x0A28, 0x0D1C, 0x1919, 0x0A3D, 0xFFFF}, {0x0B20, 0x1631, 0x1903, 0x1815, 0x1C07, 0x142B, 0x132D, 0x1B1D, 0xFFFF}, {0x1801, 0x1533, 0x1625, 0x1237, 0x1A1B, 0xFFFF}, {0x0C16, 0x053C, 0x0E1E, 0x1721, 0x1529, 0x013F, 0xFFFF}, {0x0B22, 0x0D04, 0x1039, 0x1D1F, 0xFFFF}, {0x1813, 0x1B05, 0x1917, 0xFFFF}, {0x0A2A, 0x1723, 0x1435, 0x1627, 0xFFFF}, {0x0B30, 0x0C14, 0x0936, 0x0F08, 0x1F09, 0x0E3B, 0x122F, 0xFFFF}, {0x0C00, 0x0B24, 0x0E06, 0x0838, 0x1811, 0x1A19, 0x0B3D, 0xFFFF}, {0x0D1A, 0x1731, 0x1A03, 0x1915, 0x1D07, 0x152B, 0xFFFF}, {0x1901, 0x1633, 0x1725, 0x1337, 0x1B1B, 0x142D, 0x1C1D, 0xFFFF}, {0x0C12, 0x0A34, 0x073A, 0x1629, 0x023F, 0xFFFF}, {0x0B26, 0x013E, 0x092E, 0x1821, 0x1139, 0x1E1F, 0xFFFF}, {0x1913, 0x1A17, 0xFFFF}, {0x0C10, 0x1535, 0x1C05, 0x1727, 0xFFFF}, {0x0D18, 0x0A2C, 0x1823, 0x0F3B, 0x132F, 0xFFFF}, {0x0B32, 0x0D02, 0x1911, 0x1B19, 0xFFFF}, {0x0B28, 0x0E1C, 0x1B03, 0x1A15, 0x1E07, 0x162B, 0x0C3D, 0xFFFF}, {0x0C20, 0x1831, 0x1A01, 0x1733, 0x152D, 0x1D1D, 0xFFFF}, {0x1825, 0x1437, 0x1729, 0x1C1B, 0x033F, 0xFFFF}, {0x0D16, 0x063C, 0x0F1E, 0x1921, 0x1239, 0x1F1F, 0xFFFF}, {0x0C22, 0x0E04, 0x1A13, 0x1B17, 0xFFFF}, {0x1635, 0x1D05, 0xFFFF}, {0x0B2A, 0x1923, 0x1827, 0xFFFF}, {0x0C30, 0x0D14, 0x0A36, 0x1A11, 0x103B, 0x142F, 0xFFFF}, {0x0D00, 0x0C24, 0x0F06, 0x0938, 0x1B15, 0x1F07, 0x1C19, 0x172B, 0x0D3D, 0xFFFF}, {0x0E1A, 0x1931, 0x1B01, 0x1C03, 0x162D, 0x1E1D, 0xFFFF}, {0x1833, 0x1925, 0x1537, 0x1D1B, 0xFFFF}, {0x0D12, 0x0B34, 0x083A, 0x1A21, 0x1339, 0x1829, 0x043F, 0xFFFF}, {0x0C26, 0x023E, 0x0A2E, 0x1B13, 0xFFFF}, {0x1735, 0x1E05, 0x1C17, 0xFFFF}, {0x0D10, 0x1A23, 0x1927, 0xFFFF}, {0x0E18, 0x0B2C, 0x1B11, 0x113B, 0x152F, 0xFFFF}, {0x0C32, 0x0E02, 0x1D19, 0x0E3D, 0xFFFF}, {0x0C28, 0x0F1C, 0x1A31, 0x1D03, 0x1C15, 0x182B, 0x172D, 0x1F1D, 0xFFFF}, {0x0D20, 0x1C01, 0x1933, 0x1A25, 0x1637, 0x1E1B, 0xFFFF}, {0x1B21, 0x1929, 0x053F, 0xFFFF}, {0x0E16, 0x073C, 0x1439, 0xFFFF}, {0x0D22, 0x0F04, 0x1C13, 0x1F05, 0x1D17, 0xFFFF}, {0x1B23, 0x1835, 0x1A27, 0xFFFF}, {0x0C2A, 0x123B, 0x162F, 0xFFFF}, {0x0D30, 0x0E14, 0x0B36, 0x1C11, 0x1E19, 0x0F3D, 0xFFFF}, {0x0E00, 0x0D24, 0x0A38, 0x1B31, 0x1E03, 0x1D15, 0x192B, 0xFFFF}, {0x0F1A, 0x1D01, 0x1A33, 0x1B25, 0x1737, 0x1F1B, 0x182D, 0xFFFF}, {0x1A29, 0x063F, 0xFFFF}, {0x0E12, 0x0C34, 0x093A, 0x1C21, 0x1539, 0xFFFF}, {0x0D26, 0x033E, 0x0B2E, 0x1D13, 0x1E17, 0xFFFF}, {0x1935, 0x1B27, 0xFFFF}, {0x0E10, 0x1C23, 0x133B, 0x172F, 0xFFFF}, {0x0F18, 0x0C2C, 0x1D11, 0x1F19, 0xFFFF}, {0x0D32, 0x0F02, 0x1F03, 0x1E15, 0x1A2B, 0x103D, 0xFFFF}, {0x0D28, 0x1C31, 0x1E01, 0x1B33, 0x192D, 0xFFFF}, {0x0E20, 0x1C25, 0x1837, 0x1B29, 0x073F, 0xFFFF}, {0x1D21, 0x1639, 0xFFFF}, {0x0F16, 0x083C, 0x1E13, 0x1F17, 0xFFFF}, {0x0E22, 0x1A35, 0xFFFF}, {0x1D23, 0x1C27, 0xFFFF}, {0x0D2A, 0x1E11, 0x143B, 0x182F, 0xFFFF}, {0x0E30, 0x0F14, 0x0C36, 0x1F15, 0x1B2B, 0x113D, 0xFFFF}, {0x0F00, 0x0E24, 0x0B38, 0x1D31, 0x1F01, 0x1A2D, 0xFFFF}, {0x1C33, 0x1D25, 0x1937, 0xFFFF}, {0x1E21, 0x1739, 0x1C29, 0x083F, 0xFFFF}, {0x0F12, 0x0D34, 0x0A3A, 0x1F13, 0xFFFF}, {0x0E26, 0x043E, 0x0C2E, 0x1B35, 0xFFFF}, {0x1E23, 0x1D27, 0xFFFF}, {0x0F10, 0x1F11, 0x153B, 0x192F, 0xFFFF}, {0x0D2C, 0x123D, 0xFFFF}, }; uint16 etc1_block::pack_color5(const color_quad_u8& color, bool scaled, uint bias) { return pack_color5(color.r, color.g, color.b, scaled, bias); } uint16 etc1_block::pack_color5(uint r, uint g, uint b, bool scaled, uint bias) { if (scaled) { r = (r * 31U + bias) / 255U; g = (g * 31U + bias) / 255U; b = (b * 31U + bias) / 255U; } r = math::minimum(r, 31U); g = math::minimum(g, 31U); b = math::minimum(b, 31U); return static_cast(b | (g << 5U) | (r << 10U)); } color_quad_u8 etc1_block::unpack_color5(uint16 packed_color5, bool scaled, uint alpha) { uint b = packed_color5 & 31U; uint g = (packed_color5 >> 5U) & 31U; uint r = (packed_color5 >> 10U) & 31U; if (scaled) { b = (b << 3U) | (b >> 2U); g = (g << 3U) | (g >> 2U); r = (r << 3U) | (r >> 2U); } return color_quad_u8(cNoClamp, r, g, b, math::minimum(alpha, 255U)); } void etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, bool scaled) { color_quad_u8 c(unpack_color5(packed_color5, scaled, 0)); r = c.r; g = c.g; b = c.b; } bool etc1_block::unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha) { color_quad_i16 dc(unpack_delta3(packed_delta3)); int b = (packed_color5 & 31U) + dc.b; int g = ((packed_color5 >> 5U) & 31U) + dc.g; int r = ((packed_color5 >> 10U) & 31U) + dc.r; bool success = true; if (static_cast(r | g | b) > 31U) { success = false; r = math::clamp(r, 0, 31); g = math::clamp(g, 0, 31); b = math::clamp(b, 0, 31); } if (scaled) { b = (b << 3U) | (b >> 2U); g = (g << 3U) | (g >> 2U); r = (r << 3U) | (r >> 2U); } result.set_noclamp_rgba(r, g, b, math::minimum(alpha, 255U)); return success; } bool etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha) { color_quad_u8 result; const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha); r = result.r; g = result.g; b = result.b; return success; } uint16 etc1_block::pack_delta3(const color_quad_i16& color) { return pack_delta3(color.r, color.g, color.b); } uint16 etc1_block::pack_delta3(int r, int g, int b) { CRNLIB_ASSERT((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax)); CRNLIB_ASSERT((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax)); CRNLIB_ASSERT((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax)); if (r < 0) r += 8; if (g < 0) g += 8; if (b < 0) b += 8; return static_cast(b | (g << 3) | (r << 6)); } color_quad_i16 etc1_block::unpack_delta3(uint16 packed_delta3) { int r = (packed_delta3 >> 6) & 7; int g = (packed_delta3 >> 3) & 7; int b = packed_delta3 & 7; if (r >= 4) r -= 8; if (g >= 4) g -= 8; if (b >= 4) b -= 8; return color_quad_i16(r, g, b, 0); } void etc1_block::unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3) { r = (packed_delta3 >> 6) & 7; g = (packed_delta3 >> 3) & 7; b = packed_delta3 & 7; if (r >= 4) r -= 8; if (g >= 4) g -= 8; if (b >= 4) b -= 8; } uint16 etc1_block::pack_color4(const color_quad_u8& color, bool scaled, uint bias) { return pack_color4(color.r, color.g, color.b, scaled, bias); } uint16 etc1_block::pack_color4(uint r, uint g, uint b, bool scaled, uint bias) { if (scaled) { r = (r * 15U + bias) / 255U; g = (g * 15U + bias) / 255U; b = (b * 15U + bias) / 255U; } r = math::minimum(r, 15U); g = math::minimum(g, 15U); b = math::minimum(b, 15U); return static_cast(b | (g << 4U) | (r << 8U)); } color_quad_u8 etc1_block::unpack_color4(uint16 packed_color4, bool scaled, uint alpha) { uint b = packed_color4 & 15U; uint g = (packed_color4 >> 4U) & 15U; uint r = (packed_color4 >> 8U) & 15U; if (scaled) { b = (b << 4U) | b; g = (g << 4U) | g; r = (r << 4U) | r; } return color_quad_u8(cNoClamp, r, g, b, math::minimum(alpha, 255U)); } void etc1_block::unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled) { color_quad_u8 c(unpack_color4(packed_color4, scaled, 0)); r = c.r; g = c.g; b = c.b; } void etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx) { CRNLIB_ASSERT(table_idx < cETC1IntenModifierValues); const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; uint r, g, b; unpack_color5(r, g, b, packed_color5, true); const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); const int y0 = pInten_modifer_table[0]; pDst[0].set(ir + y0, ig + y0, ib + y0); const int y1 = pInten_modifer_table[1]; pDst[1].set(ir + y1, ig + y1, ib + y1); const int y2 = pInten_modifer_table[2]; pDst[2].set(ir + y2, ig + y2, ib + y2); const int y3 = pInten_modifer_table[3]; pDst[3].set(ir + y3, ig + y3, ib + y3); } bool etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx) { CRNLIB_ASSERT(table_idx < cETC1IntenModifierValues); const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; uint r, g, b; bool success = unpack_color5(r, g, b, packed_color5, packed_delta3, true); const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); const int y0 = pInten_modifer_table[0]; pDst[0].set(ir + y0, ig + y0, ib + y0); const int y1 = pInten_modifer_table[1]; pDst[1].set(ir + y1, ig + y1, ib + y1); const int y2 = pInten_modifer_table[2]; pDst[2].set(ir + y2, ig + y2, ib + y2); const int y3 = pInten_modifer_table[3]; pDst[3].set(ir + y3, ig + y3, ib + y3); return success; } void etc1_block::get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx) { CRNLIB_ASSERT(table_idx < cETC1IntenModifierValues); const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; uint r, g, b; unpack_color4(r, g, b, packed_color4, true); const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); const int y0 = pInten_modifer_table[0]; pDst[0].set(ir + y0, ig + y0, ib + y0); const int y1 = pInten_modifer_table[1]; pDst[1].set(ir + y1, ig + y1, ib + y1); const int y2 = pInten_modifer_table[2]; pDst[2].set(ir + y2, ig + y2, ib + y2); const int y3 = pInten_modifer_table[3]; pDst[3].set(ir + y3, ig + y3, ib + y3); } bool unpack_etc1(const etc1_block& block, color_quad_u8* pDst, bool preserve_alpha) { const bool diff_flag = block.get_diff_bit(); const bool flip_flag = block.get_flip_bit(); const uint table_index0 = block.get_inten_table(0); const uint table_index1 = block.get_inten_table(1); color_quad_u8 subblock_colors0[4]; color_quad_u8 subblock_colors1[4]; bool success = true; if (diff_flag) { const uint16 base_color5 = block.get_base5_color(); const uint16 delta_color3 = block.get_delta3_color(); etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); if (!etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1)) success = false; } else { const uint16 base_color4_0 = block.get_base4_color(0); etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); const uint16 base_color4_1 = block.get_base4_color(1); etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); } if (preserve_alpha) { if (flip_flag) { for (uint y = 0; y < 2; y++) { pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]); pDst[3].set_rgb(subblock_colors0[block.get_selector(3, y)]); pDst += 4; } for (uint y = 2; y < 4; y++) { pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]); pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]); pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); pDst += 4; } } else { for (uint y = 0; y < 4; y++) { pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); pDst += 4; } } } else { if (flip_flag) { // 0000 // 0000 // 1111 // 1111 for (uint y = 0; y < 2; y++) { pDst[0] = subblock_colors0[block.get_selector(0, y)]; pDst[1] = subblock_colors0[block.get_selector(1, y)]; pDst[2] = subblock_colors0[block.get_selector(2, y)]; pDst[3] = subblock_colors0[block.get_selector(3, y)]; pDst += 4; } for (uint y = 2; y < 4; y++) { pDst[0] = subblock_colors1[block.get_selector(0, y)]; pDst[1] = subblock_colors1[block.get_selector(1, y)]; pDst[2] = subblock_colors1[block.get_selector(2, y)]; pDst[3] = subblock_colors1[block.get_selector(3, y)]; pDst += 4; } } else { // 0011 // 0011 // 0011 // 0011 for (uint y = 0; y < 4; y++) { pDst[0] = subblock_colors0[block.get_selector(0, y)]; pDst[1] = subblock_colors0[block.get_selector(1, y)]; pDst[2] = subblock_colors1[block.get_selector(2, y)]; pDst[3] = subblock_colors1[block.get_selector(3, y)]; pDst += 4; } } } return success; } bool etc1_optimizer::compute() { const uint n = m_pParams->m_num_src_pixels; const int scan_delta_size = m_pParams->m_scan_delta_size; // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color. // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index. for (int zdi = 0; zdi < scan_delta_size; zdi++) { const int zd = m_pParams->m_pScan_deltas[zdi]; const int mbb = m_bb + zd; if (mbb < 0) continue; else if (mbb > m_limit) break; for (int ydi = 0; ydi < scan_delta_size; ydi++) { const int yd = m_pParams->m_pScan_deltas[ydi]; const int mbg = m_bg + yd; if (mbg < 0) continue; else if (mbg > m_limit) break; for (int xdi = 0; xdi < scan_delta_size; xdi++) { const int xd = m_pParams->m_pScan_deltas[xdi]; const int mbr = m_br + xd; if (mbr < 0) continue; else if (mbr > m_limit) break; etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4); if (m_pParams->m_quality == cCRNETCQualitySlow) { if (!evaluate_solution(coords, m_trial_solution, &m_best_solution)) continue; } else { if (!evaluate_solution_fast(coords, m_trial_solution, &m_best_solution)) continue; } // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index. // Now, for each component, attempt to refine the current solution by solving a simple linear equation. For example, for 4 colors: // The goal is: // pixel0 - (block_color+inten_table[selector0]) + pixel1 - (block_color+inten_table[selector1]) + pixel2 - (block_color+inten_table[selector2]) + pixel3 - (block_color+inten_table[selector3]) = 0 // Rearranging this: // (pixel0 + pixel1 + pixel2 + pixel3) - (block_color+inten_table[selector0]) - (block_color+inten_table[selector1]) - (block_color+inten_table[selector2]) - (block_color+inten_table[selector3]) = 0 // (pixel0 + pixel1 + pixel2 + pixel3) - block_color - inten_table[selector0] - block_color-inten_table[selector1] - block_color-inten_table[selector2] - block_color-inten_table[selector3] = 0 // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - inten_table[selector0] - inten_table[selector1] - inten_table[selector2] - inten_table[selector3] = 0 // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3]) = 0 // (pixel0 + pixel1 + pixel2 + pixel3)/4 - block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 = 0 // block_color = (pixel0 + pixel1 + pixel2 + pixel3)/4 - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 // So what this means: // optimal_block_color = avg_input - avg_inten_delta // So the optimal block color can be computed by taking the average block color and subtracting the current average of the intensity delta. // Unfortunately, optimal_block_color must then be quantized to 555 or 444 so it's not always possible to improve matters using this formula. // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping. const uint max_refinement_trials = (m_pParams->m_quality == cCRNETCQualityFast) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2); for (uint refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++) { const uint8* pSelectors = m_best_solution.m_selectors.get_ptr(); const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; const color_quad_u8 base_color(m_best_solution.m_coords.get_scaled_color()); for (uint r = 0; r < n; r++) { const uint s = *pSelectors++; const int yd = pInten_table[s]; // Compute actual delta being applied to each pixel, taking into account clamping. delta_sum_r += math::clamp(base_color.r + yd, 0, 255) - base_color.r; delta_sum_g += math::clamp(base_color.g + yd, 0, 255) - base_color.g; delta_sum_b += math::clamp(base_color.b + yd, 0, 255) - base_color.b; } if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) break; const float avg_delta_r_f = static_cast(delta_sum_r) / n; const float avg_delta_g_f = static_cast(delta_sum_g) / n; const float avg_delta_b_f = static_cast(delta_sum_b) / n; const int br1 = math::clamp(static_cast((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit); const int bg1 = math::clamp(static_cast((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit); const int bb1 = math::clamp(static_cast((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit); bool skip = false; if ((mbr == br1) && (mbg == bg1) && (mbb == bb1)) skip = true; else if ((br1 == m_best_solution.m_coords.m_unscaled_color.r) && (bg1 == m_best_solution.m_coords.m_unscaled_color.g) && (bb1 == m_best_solution.m_coords.m_unscaled_color.b)) skip = true; else if ((m_br == br1) && (m_bg == bg1) && (m_bb == bb1)) skip = true; if (skip) break; etc1_solution_coordinates coords1(br1, bg1, bb1, 0, m_pParams->m_use_color4); if (m_pParams->m_quality == cCRNETCQualitySlow) { if (!evaluate_solution(coords1, m_trial_solution, &m_best_solution)) break; } else { if (!evaluate_solution_fast(coords1, m_trial_solution, &m_best_solution)) break; } } // refinement_trial } // xdi } // ydi } // zdi if (!m_best_solution.m_valid) { m_pResult->m_error = cUINT32_MAX; return false; } const uint8* pSelectors = m_best_solution.m_selectors.get_ptr(); #ifdef CRNLIB_BUILD_DEBUG { color_quad_u8 block_colors[4]; m_best_solution.m_coords.get_block_colors(block_colors); const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; uint64 actual_error = 0; for (uint i = 0; i < n; i++) actual_error += color::elucidian_distance(pSrc_pixels[i], block_colors[pSelectors[i]], false); CRNLIB_ASSERT(actual_error == m_best_solution.m_error); } #endif m_pResult->m_error = m_best_solution.m_error; m_pResult->m_block_color_unscaled = m_best_solution.m_coords.m_unscaled_color; m_pResult->m_block_color4 = m_best_solution.m_coords.m_color4; m_pResult->m_block_inten_table = m_best_solution.m_coords.m_inten_table; memcpy(m_pResult->m_pSelectors, pSelectors, n); m_pResult->m_n = n; return true; } void etc1_optimizer::init(const params& params, results& result) { m_pParams = ¶ms; m_pResult = &result; const uint n = m_pParams->m_num_src_pixels; m_selectors.resize(n); m_best_selectors.resize(n); m_temp_selectors.resize(n); m_trial_solution.m_selectors.resize(n); m_best_solution.m_selectors.resize(n); m_limit = m_pParams->m_use_color4 ? 15 : 31; vec3F avg_color(0.0f); m_luma.resize(n); m_sorted_luma[0].resize(n); m_sorted_luma[1].resize(n); for (uint i = 0; i < n; i++) { const color_quad_u8& c = m_pParams->m_pSrc_pixels[i]; const vec3F fc(c.r, c.g, c.b); avg_color += fc; m_luma[i] = static_cast(c.r + c.g + c.b); m_sorted_luma[0][i] = i; } avg_color /= static_cast(n); m_avg_color = avg_color; m_br = math::clamp(static_cast(m_avg_color[0] * m_limit / 255.0f + .5f), 0, m_limit); m_bg = math::clamp(static_cast(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit); m_bb = math::clamp(static_cast(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit); if (m_pParams->m_quality <= cCRNETCQualityMedium) { m_pSorted_luma_indices = indirect_radix_sort(n, m_sorted_luma[0].get_ptr(), m_sorted_luma[1].get_ptr(), m_luma.get_ptr(), 0, sizeof(m_luma[0]), false); m_pSorted_luma = m_sorted_luma[0].get_ptr(); if (m_pSorted_luma_indices == m_sorted_luma[0].get_ptr()) m_pSorted_luma = m_sorted_luma[1].get_ptr(); for (uint i = 0; i < n; i++) m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]]; } m_best_solution.m_coords.clear(); m_best_solution.m_valid = false; m_best_solution.m_error = cUINT64_MAX; } bool etc1_optimizer::evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) { trial_solution.m_valid = false; if (m_pParams->m_constrain_against_base_color5) { const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r; const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g; const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b; if ((math::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (math::maximum(dr, dg, db) > cETC1ColorDeltaMax)) return false; } const color_quad_u8 base_color(coords.get_scaled_color()); const uint n = m_pParams->m_num_src_pixels; CRNLIB_ASSERT(trial_solution.m_selectors.size() == n); trial_solution.m_error = cUINT64_MAX; for (uint inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++) { const int* pInten_table = g_etc1_inten_tables[inten_table]; color_quad_u8 block_colors[4]; for (uint s = 0; s < 4; s++) { const int yd = pInten_table[s]; block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0); } uint64 total_error = 0; const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; for (uint c = 0; c < n; c++) { const color_quad_u8& src_pixel = *pSrc_pixels++; uint best_selector_index = 0; uint best_error = math::square(src_pixel.r - block_colors[0].r) + math::square(src_pixel.g - block_colors[0].g) + math::square(src_pixel.b - block_colors[0].b); uint trial_error = math::square(src_pixel.r - block_colors[1].r) + math::square(src_pixel.g - block_colors[1].g) + math::square(src_pixel.b - block_colors[1].b); if (trial_error < best_error) { best_error = trial_error; best_selector_index = 1; } trial_error = math::square(src_pixel.r - block_colors[2].r) + math::square(src_pixel.g - block_colors[2].g) + math::square(src_pixel.b - block_colors[2].b); if (trial_error < best_error) { best_error = trial_error; best_selector_index = 2; } trial_error = math::square(src_pixel.r - block_colors[3].r) + math::square(src_pixel.g - block_colors[3].g) + math::square(src_pixel.b - block_colors[3].b); if (trial_error < best_error) { best_error = trial_error; best_selector_index = 3; } m_temp_selectors[c] = static_cast(best_selector_index); total_error += best_error; if (total_error >= trial_solution.m_error) break; } if (total_error < trial_solution.m_error) { trial_solution.m_error = total_error; trial_solution.m_coords.m_inten_table = inten_table; trial_solution.m_selectors.swap(m_temp_selectors); trial_solution.m_valid = true; } } trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; bool success = false; if (pBest_solution) { if (trial_solution.m_error < pBest_solution->m_error) { *pBest_solution = trial_solution; success = true; } } return success; } bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) { if (m_pParams->m_constrain_against_base_color5) { const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r; const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g; const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b; if ((math::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (math::maximum(dr, dg, db) > cETC1ColorDeltaMax)) { trial_solution.m_valid = false; return false; } } const color_quad_u8 base_color(coords.get_scaled_color()); const uint n = m_pParams->m_num_src_pixels; CRNLIB_ASSERT(trial_solution.m_selectors.size() == n); trial_solution.m_error = cUINT64_MAX; for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table) { const int* pInten_table = g_etc1_inten_tables[inten_table]; uint block_inten[4]; color_quad_u8 block_colors[4]; for (uint s = 0; s < 4; s++) { const int yd = pInten_table[s]; color_quad_u8 block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0); block_colors[s] = block_color; block_inten[s] = block_color.r + block_color.g + block_color.b; } // evaluate_solution_fast() enforces/assumesd a total ordering of the input colors along the intensity (1,1,1) axis to more quickly classify the inputs to selectors. // The inputs colors have been presorted along the projection onto this axis, and ETC1 block colors are always ordered along the intensity axis, so this classification is fast. // 0 1 2 3 // 01 12 23 const uint block_inten_midpoints[3] = {block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3]}; uint64 total_error = 0; const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) { if (block_inten[0] > m_pSorted_luma[n - 1]) { const uint min_error = block_inten[0] - m_pSorted_luma[n - 1]; if (min_error >= trial_solution.m_error) continue; } memset(&m_temp_selectors[0], 0, n); for (uint c = 0; c < n; c++) total_error += color::elucidian_distance(block_colors[0], pSrc_pixels[c], false); } else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2]) { if (m_pSorted_luma[0] > block_inten[3]) { const uint min_error = m_pSorted_luma[0] - block_inten[3]; if (min_error >= trial_solution.m_error) continue; } memset(&m_temp_selectors[0], 3, n); for (uint c = 0; c < n; c++) total_error += color::elucidian_distance(block_colors[3], pSrc_pixels[c], false); } else { uint cur_selector = 0, c; for (c = 0; c < n; c++) { const uint y = m_pSorted_luma[c]; while ((y * 2) >= block_inten_midpoints[cur_selector]) if (++cur_selector > 2) goto done; const uint sorted_pixel_index = m_pSorted_luma_indices[c]; m_temp_selectors[sorted_pixel_index] = static_cast(cur_selector); total_error += color::elucidian_distance(block_colors[cur_selector], pSrc_pixels[sorted_pixel_index], false); } done: while (c < n) { const uint sorted_pixel_index = m_pSorted_luma_indices[c]; m_temp_selectors[sorted_pixel_index] = 3; total_error += color::elucidian_distance(block_colors[3], pSrc_pixels[sorted_pixel_index], false); ++c; } } if (total_error < trial_solution.m_error) { trial_solution.m_error = total_error; trial_solution.m_coords.m_inten_table = inten_table; trial_solution.m_selectors.swap(m_temp_selectors); trial_solution.m_valid = true; if (!total_error) break; } } trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; bool success = false; if (pBest_solution) { if (trial_solution.m_error < pBest_solution->m_error) { *pBest_solution = trial_solution; success = true; } } return success; } // Dither function from RYG's public domain real-time DXT1 compressor, modified for 555. static void DitherBlock(color_quad_u8* dest, const color_quad_u8* block) { int err[8], *ep1 = err, *ep2 = err + 4; uint8* quant = ryg_dxt::QuantRBTab + 8; // process channels seperately for (int ch = 0; ch < 3; ch++) { uint8* bp = (uint8*)block; uint8* dp = (uint8*)dest; bp += ch; dp += ch; memset(err, 0, sizeof(err)); for (int y = 0; y < 4; y++) { // pixel 0 dp[0] = quant[bp[0] + ((3 * ep2[1] + 5 * ep2[0]) >> 4)]; ep1[0] = bp[0] - dp[0]; // pixel 1 dp[4] = quant[bp[4] + ((7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]) >> 4)]; ep1[1] = bp[4] - dp[4]; // pixel 2 dp[8] = quant[bp[8] + ((7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]) >> 4)]; ep1[2] = bp[8] - dp[8]; // pixel 3 dp[12] = quant[bp[12] + ((7 * ep1[2] + 5 * ep2[3] + ep2[2]) >> 4)]; ep1[3] = bp[12] - dp[12]; // advance to next line std::swap(ep1, ep2); bp += 16; dp += 16; } } } static uint etc1_decode_value(uint diff, uint inten, uint selector, uint packed_c) { CRNLIB_ASSERT((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < (diff ? 32 : 16))); int c; if (diff) c = (packed_c >> 2) | (packed_c << 3); else c = packed_c | (packed_c << 4); c += g_etc1_inten_tables[inten][selector]; c = math::clamp(c, 0, 255); return c; } void pack_etc1_block_init() { for (uint diff = 0; diff < 2; diff++) { const uint limit = diff ? 32 : 16; for (uint inten = 0; inten < 8; inten++) { for (uint selector = 0; selector < 4; selector++) { const uint inverse_table_index = diff + (inten << 1) + (selector << 4); for (int color = 0; color < 256; color++) { uint best_error = cUINT32_MAX, best_packed_c = 0; for (uint packed_c = 0; packed_c < limit; packed_c++) { int v = etc1_decode_value(diff, inten, selector, packed_c); uint err = labs(v - color); if (err < best_error) { best_error = err; best_packed_c = packed_c; if (!best_error) break; } } CRNLIB_ASSERT(best_error <= 255); g_etc1_inverse_lookup[inverse_table_index][color] = static_cast(best_packed_c | (best_error << 8)); } } } } } // Packs solid color blocks efficiently using a set of small precomputed tables. // For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time. static uint64 pack_etc1_block_solid_color(etc1_block& block, const uint8* pColor, crn_etc1_pack_params& /* pack_params */, pack_etc1_block_context& /* context */) { CRNLIB_ASSERT(g_etc1_inverse_lookup[0][255]); static uint s_next_comp[4] = {1, 2, 0, 1}; uint best_error = cUINT32_MAX, best_i = 0; int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. for (uint i = 0; i < 3; i++) { const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; const int delta_range = 1; for (int delta = -delta_range; delta <= delta_range; delta++) { const int c_plus_delta = math::clamp(pColor[i] + delta, 0, 255); const uint16* pTable; if (!c_plus_delta) pTable = g_color8_to_etc_block_config_0_255[0]; else if (c_plus_delta == 255) pTable = g_color8_to_etc_block_config_0_255[1]; else pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; do { const uint x = *pTable++; #ifdef CRNLIB_BUILD_DEBUG const uint diff = x & 1; const uint inten = (x >> 1) & 7; const uint selector = (x >> 4) & 3; const uint p0 = (x >> 8) & 255; CRNLIB_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta); #endif const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; uint16 p1 = pInverse_table[c1]; uint16 p2 = pInverse_table[c2]; const uint trial_error = math::square(c_plus_delta - pColor[i]) + math::square(p1 >> 8) + math::square(p2 >> 8); if (trial_error < best_error) { best_error = trial_error; best_x = x; best_packed_c1 = p1 & 0xFF; best_packed_c2 = p2 & 0xFF; best_i = i; if (!best_error) goto found_perfect_match; } } while (*pTable != 0xFFFF); } } found_perfect_match: const uint diff = best_x & 1; const uint inten = (best_x >> 1) & 7; block.m_bytes[3] = static_cast(((inten | (inten << 3)) << 2) | (diff << 1)); const uint etc1_selector = g_selector_index_to_etc1[(best_x >> 4) & 3]; *reinterpret_cast(&block.m_bytes[4]) = (etc1_selector & 2) ? 0xFFFF : 0; *reinterpret_cast(&block.m_bytes[6]) = (etc1_selector & 1) ? 0xFFFF : 0; const uint best_packed_c0 = (best_x >> 8) & 255; if (diff) { block.m_bytes[best_i] = static_cast(best_packed_c0 << 3); block.m_bytes[s_next_comp[best_i]] = static_cast(best_packed_c1 << 3); block.m_bytes[s_next_comp[best_i + 1]] = static_cast(best_packed_c2 << 3); } else { block.m_bytes[best_i] = static_cast(best_packed_c0 | (best_packed_c0 << 4)); block.m_bytes[s_next_comp[best_i]] = static_cast(best_packed_c1 | (best_packed_c1 << 4)); block.m_bytes[s_next_comp[best_i + 1]] = static_cast(best_packed_c2 | (best_packed_c2 << 4)); } return best_error; } static uint pack_etc1_block_solid_color_constrained( etc1_optimizer::results& results, uint num_colors, const uint8* pColor, crn_etc1_pack_params& /* pack_params */, pack_etc1_block_context& /* context */, bool use_diff, const color_quad_u8* pBase_color5_unscaled) { CRNLIB_ASSERT(g_etc1_inverse_lookup[0][255]); static uint s_next_comp[4] = {1, 2, 0, 1}; uint best_error = cUINT32_MAX, best_i = 0; int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. for (uint i = 0; i < 3; i++) { const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; const int delta_range = 1; for (int delta = -delta_range; delta <= delta_range; delta++) { const int c_plus_delta = math::clamp(pColor[i] + delta, 0, 255); const uint16* pTable; if (!c_plus_delta) pTable = g_color8_to_etc_block_config_0_255[0]; else if (c_plus_delta == 255) pTable = g_color8_to_etc_block_config_0_255[1]; else pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; do { const uint x = *pTable++; const uint diff = x & 1; if (static_cast(use_diff) != diff) { if (*pTable == 0xFFFF) break; continue; } if ((diff) && (pBase_color5_unscaled)) { const int p0 = (x >> 8) & 255; int delta = p0 - static_cast(pBase_color5_unscaled->c[i]); if ((delta < cETC1ColorDeltaMin) || (delta > cETC1ColorDeltaMax)) { if (*pTable == 0xFFFF) break; continue; } } #ifdef CRNLIB_BUILD_DEBUG { const uint inten = (x >> 1) & 7; const uint selector = (x >> 4) & 3; const uint p0 = (x >> 8) & 255; CRNLIB_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta); } #endif const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; uint16 p1 = pInverse_table[c1]; uint16 p2 = pInverse_table[c2]; if ((diff) && (pBase_color5_unscaled)) { int delta1 = (p1 & 0xFF) - static_cast(pBase_color5_unscaled->c[s_next_comp[i]]); int delta2 = (p2 & 0xFF) - static_cast(pBase_color5_unscaled->c[s_next_comp[i + 1]]); if ((delta1 < cETC1ColorDeltaMin) || (delta1 > cETC1ColorDeltaMax) || (delta2 < cETC1ColorDeltaMin) || (delta2 > cETC1ColorDeltaMax)) { if (*pTable == 0xFFFF) break; continue; } } const uint trial_error = math::square(c_plus_delta - pColor[i]) + math::square(p1 >> 8) + math::square(p2 >> 8); if (trial_error < best_error) { best_error = trial_error; best_x = x; best_packed_c1 = p1 & 0xFF; best_packed_c2 = p2 & 0xFF; best_i = i; if (!best_error) goto found_perfect_match; } } while (*pTable != 0xFFFF); } } found_perfect_match: if (best_error == cUINT32_MAX) return best_error; best_error *= num_colors; results.m_n = num_colors; results.m_block_color4 = !(best_x & 1); results.m_block_inten_table = (best_x >> 1) & 7; memset(results.m_pSelectors, (best_x >> 4) & 3, num_colors); const uint best_packed_c0 = (best_x >> 8) & 255; results.m_block_color_unscaled[best_i] = static_cast(best_packed_c0); results.m_block_color_unscaled[s_next_comp[best_i]] = static_cast(best_packed_c1); results.m_block_color_unscaled[s_next_comp[best_i + 1]] = static_cast(best_packed_c2); results.m_error = best_error; return best_error; } uint64 pack_etc1_block(etc1_block& dst_block, const color_quad_u8* pSrc_pixels, crn_etc1_pack_params& pack_params, pack_etc1_block_context& context) { color_quad_u8 src_pixel0(pSrc_pixels[0]); int r; for (r = 15; r >= 1; --r) if ((pSrc_pixels[r].r != src_pixel0.r) || (pSrc_pixels[r].g != src_pixel0.g) || (pSrc_pixels[r].b != src_pixel0.b)) break; if (!r) return 16 * pack_etc1_block_solid_color(dst_block, &pSrc_pixels[0].r, pack_params, context); color_quad_u8 dithered_pixels[16]; if (pack_params.m_dithering) { DitherBlock(dithered_pixels, pSrc_pixels); pSrc_pixels = dithered_pixels; } uint64 best_error = cUINT64_MAX; uint best_flip = false, best_use_color4 = false; uint8 best_selectors[2][8]; etc1_optimizer::results best_results[2]; for (uint i = 0; i < 2; i++) { best_results[i].m_n = 8; best_results[i].m_pSelectors = best_selectors[i]; } uint8 selectors[3][8]; etc1_optimizer::results results[3]; for (uint i = 0; i < 3; i++) { results[i].m_n = 8; results[i].m_pSelectors = selectors[i]; } color_quad_u8 subblock_pixels[8]; etc1_optimizer::params params(pack_params); params.m_num_src_pixels = 8; params.m_pSrc_pixels = subblock_pixels; for (uint flip = 0; flip < 2; flip++) { for (uint use_color4 = 0; use_color4 < 2; use_color4++) { uint64 trial_error = 0; uint subblock; for (subblock = 0; subblock < 2; subblock++) { if (flip) memcpy(subblock_pixels, pSrc_pixels + subblock * 8, sizeof(color_quad_u8) * 8); else { const color_quad_u8* pSrc_col = pSrc_pixels + subblock * 2; subblock_pixels[0] = pSrc_col[0]; subblock_pixels[1] = pSrc_col[4]; subblock_pixels[2] = pSrc_col[8]; subblock_pixels[3] = pSrc_col[12]; subblock_pixels[4] = pSrc_col[1]; subblock_pixels[5] = pSrc_col[5]; subblock_pixels[6] = pSrc_col[9]; subblock_pixels[7] = pSrc_col[13]; } results[2].m_error = cUINT64_MAX; if ((params.m_quality >= cCRNETCQualityMedium) && ((subblock) || (use_color4))) { color_quad_u8 subblock_pixel0(subblock_pixels[0]); for (r = 7; r >= 1; --r) if ((subblock_pixels[r].r != subblock_pixel0.r) || (subblock_pixels[r].g != subblock_pixel0.g) || (subblock_pixels[r].b != subblock_pixel0.b)) break; if (!r) { pack_etc1_block_solid_color_constrained(results[2], 8, &subblock_pixel0.r, pack_params, context, !use_color4, (subblock && !use_color4) ? &results[0].m_block_color_unscaled : NULL); } } params.m_use_color4 = (use_color4 != 0); params.m_constrain_against_base_color5 = false; if ((!use_color4) && (subblock)) { params.m_constrain_against_base_color5 = true; params.m_base_color5 = results[0].m_block_color_unscaled; } if (params.m_quality == cCRNETCQualitySlow) { static const int s_scan_delta_0_to_4[] = {-4, -3, -2, -1, 0, 1, 2, 3, 4}; params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_0_to_4); params.m_pScan_deltas = s_scan_delta_0_to_4; } else if (params.m_quality == cCRNETCQualityMedium) { static const int s_scan_delta_0_to_1[] = {-1, 0, 1}; params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_0_to_1); params.m_pScan_deltas = s_scan_delta_0_to_1; } else { static const int s_scan_delta_0[] = {0}; params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_0); params.m_pScan_deltas = s_scan_delta_0; } context.m_optimizer.init(params, results[subblock]); if (!context.m_optimizer.compute()) break; // Fairly arbitrary/unrefined thresholds that control how far away to scan for potentially better solutions. const uint refinement_error_thresh0 = 3000; const uint refinement_error_thresh1 = 6000; if ((params.m_quality >= cCRNETCQualityMedium) && (results[subblock].m_error > refinement_error_thresh0)) { if (params.m_quality == cCRNETCQualityMedium) { static const int s_scan_delta_2_to_3[] = {-3, -2, 2, 3}; params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_2_to_3); params.m_pScan_deltas = s_scan_delta_2_to_3; } else { static const int s_scan_delta_5_to_5[] = {-5, 5}; static const int s_scan_delta_5_to_8[] = {-8, -7, -6, -5, 5, 6, 7, 8}; if (results[subblock].m_error > refinement_error_thresh1) { params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_5_to_8); params.m_pScan_deltas = s_scan_delta_5_to_8; } else { params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(s_scan_delta_5_to_5); params.m_pScan_deltas = s_scan_delta_5_to_5; } } if (!context.m_optimizer.compute()) break; } if (results[2].m_error < results[subblock].m_error) results[subblock] = results[2]; trial_error += results[subblock].m_error; if (trial_error >= best_error) break; } if (subblock < 2) continue; best_error = trial_error; best_results[0] = results[0]; best_results[1] = results[1]; best_flip = flip; best_use_color4 = use_color4; } // use_color4 } // flip int dr = best_results[1].m_block_color_unscaled.r - best_results[0].m_block_color_unscaled.r; int dg = best_results[1].m_block_color_unscaled.g - best_results[0].m_block_color_unscaled.g; int db = best_results[1].m_block_color_unscaled.b - best_results[0].m_block_color_unscaled.b; if (!best_use_color4) { if ((math::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (math::maximum(dr, dg, db) > cETC1ColorDeltaMax)) { // Shouldn't ever happen CRNLIB_VERIFY(0); } } if (best_use_color4) { dst_block.m_bytes[0] = static_cast(best_results[1].m_block_color_unscaled.r | (best_results[0].m_block_color_unscaled.r << 4)); dst_block.m_bytes[1] = static_cast(best_results[1].m_block_color_unscaled.g | (best_results[0].m_block_color_unscaled.g << 4)); dst_block.m_bytes[2] = static_cast(best_results[1].m_block_color_unscaled.b | (best_results[0].m_block_color_unscaled.b << 4)); } else { if (dr < 0) dr += 8; if (dg < 0) dg += 8; if (db < 0) db += 8; dst_block.m_bytes[0] = static_cast((best_results[0].m_block_color_unscaled.r << 3) | dr); dst_block.m_bytes[1] = static_cast((best_results[0].m_block_color_unscaled.g << 3) | dg); dst_block.m_bytes[2] = static_cast((best_results[0].m_block_color_unscaled.b << 3) | db); } dst_block.m_bytes[3] = static_cast((best_results[1].m_block_inten_table << 2) | (best_results[0].m_block_inten_table << 5) | ((~best_use_color4 & 1) << 1) | best_flip); uint selector0 = 0, selector1 = 0; if (best_flip) { // flipped: // { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, // { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } // // { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, // { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } const uint8* pSelectors0 = best_results[0].m_pSelectors; const uint8* pSelectors1 = best_results[1].m_pSelectors; for (int x = 3; x >= 0; --x) { uint b; b = g_selector_index_to_etc1[pSelectors1[4 + x]]; selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); b = g_selector_index_to_etc1[pSelectors1[x]]; selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); b = g_selector_index_to_etc1[pSelectors0[4 + x]]; selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); b = g_selector_index_to_etc1[pSelectors0[x]]; selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); } } else { // non-flipped: // { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, // { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } // // { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, // { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } for (int subblock = 1; subblock >= 0; --subblock) { const uint8* pSelectors = best_results[subblock].m_pSelectors + 4; for (uint i = 0; i < 2; i++) { uint b; b = g_selector_index_to_etc1[pSelectors[3]]; selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); b = g_selector_index_to_etc1[pSelectors[2]]; selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); b = g_selector_index_to_etc1[pSelectors[1]]; selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); b = g_selector_index_to_etc1[pSelectors[0]]; selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); pSelectors -= 4; } } } dst_block.m_bytes[4] = static_cast(selector1 >> 8); dst_block.m_bytes[5] = static_cast(selector1 & 0xFF); dst_block.m_bytes[6] = static_cast(selector0 >> 8); dst_block.m_bytes[7] = static_cast(selector0 & 0xFF); return best_error; } uint64 pack_etc1s_block(etc1_block& dst_block, const color_quad_u8* pSrc_pixels, crn_etc1_pack_params& pack_params) { uint8 selectors[16]; etc1_optimizer optimizer; etc1_optimizer::params params; params.m_pSrc_pixels = pSrc_pixels; params.m_num_src_pixels = 16; params.m_use_color4 = false; params.m_constrain_against_base_color5 = false; etc1_optimizer::results results; results.m_pSelectors = selectors; results.m_n = 16; optimizer.init(params, results); const int scan[] = {-4, -3, -2, -1, 0, 1, 2, 3, 4}; params.m_scan_delta_size = pack_params.m_quality == cCRNETCQualitySlow ? CRNLIB_ARRAY_SIZE(scan) : pack_params.m_quality == cCRNETCQualityMedium ? 3 : 1; params.m_pScan_deltas = scan + ((CRNLIB_ARRAY_SIZE(scan) - params.m_scan_delta_size) >> 1); optimizer.compute(); if (params.m_quality >= cCRNETCQualityMedium && results.m_error > 6000) { const int refine_medium[] = {-3, -2, 2, 3}; const int refine_high[] = {-8, -7, -6, -5, 5, 6, 7, 8}; if (params.m_quality == cCRNETCQualityMedium) { params.m_scan_delta_size = CRNLIB_ARRAY_SIZE(refine_medium); params.m_pScan_deltas = refine_medium; } else { params.m_scan_delta_size = results.m_error > 12000 ? CRNLIB_ARRAY_SIZE(refine_high) : 2; params.m_pScan_deltas = refine_high + ((CRNLIB_ARRAY_SIZE(refine_high) - params.m_scan_delta_size) >> 1); } optimizer.compute(); } uint32 selector = 0; for (uint32 i = 0, t = 8, h = 0; h < 4; h++, t -= 15) { for (uint32 w = 0; w < 4; w++, t += 4, i++) { uint32 s = g_selector_index_to_etc1[selectors[i]]; selector |= (s >> 1 | (s & 1) << 16) << (t & 15); } } dst_block.m_uint64 = (uint64)selector << 32 | results.m_block_inten_table << 29 | results.m_block_inten_table << 26 | 1 << 25 | (results.m_block_color_unscaled.m_u32 & 0xFFFFFF) << 3; return results.m_error; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_etc.h000066400000000000000000000421251503722002600210620ustar00rootroot00000000000000// File: crn_etc.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "../inc/crnlib.h" #include "crn_dxt.h" namespace crnlib { enum etc_constants { cETC1BytesPerBlock = 8U, cETC1SelectorBits = 2U, cETC1SelectorValues = 1U << cETC1SelectorBits, cETC1SelectorMask = cETC1SelectorValues - 1U, cETC1BlockShift = 2U, cETC1BlockSize = 1U << cETC1BlockShift, cETC1LSBSelectorIndicesBitOffset = 0, cETC1MSBSelectorIndicesBitOffset = 16, cETC1FlipBitOffset = 32, cETC1DiffBitOffset = 33, cETC1IntenModifierNumBits = 3, cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits, cETC1RightIntenModifierTableBitOffset = 34, cETC1LeftIntenModifierTableBitOffset = 37, // Base+Delta encoding (5 bit bases, 3 bit delta) cETC1BaseColorCompNumBits = 5, cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits, cETC1DeltaColorCompNumBits = 3, cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits, cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits, cETC1BaseColor5RBitOffset = 59, cETC1BaseColor5GBitOffset = 51, cETC1BaseColor5BBitOffset = 43, cETC1DeltaColor3RBitOffset = 56, cETC1DeltaColor3GBitOffset = 48, cETC1DeltaColor3BBitOffset = 40, // Absolute (non-delta) encoding (two 4-bit per component bases) cETC1AbsColorCompNumBits = 4, cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits, cETC1AbsColor4R1BitOffset = 60, cETC1AbsColor4G1BitOffset = 52, cETC1AbsColor4B1BitOffset = 44, cETC1AbsColor4R2BitOffset = 56, cETC1AbsColor4G2BitOffset = 48, cETC1AbsColor4B2BitOffset = 40, cETC1ColorDeltaMin = -4, cETC1ColorDeltaMax = 3, // Delta3: // 0 1 2 3 4 5 6 7 // 000 001 010 011 100 101 110 111 // 0 1 2 3 -4 -3 -2 -1 }; extern const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues]; extern const uint8 g_etc1_to_selector_index[cETC1SelectorValues]; extern const uint8 g_selector_index_to_etc1[cETC1SelectorValues]; struct etc1_coord2 { uint8 m_x, m_y; }; extern const etc1_coord2 g_etc1_pixel_coords[2][2][8]; // [flipped][subblock][subblock_pixel] struct etc1_block { // big endian uint64: // bit ofs: 56 48 40 32 24 16 8 0 // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 union { uint64 m_uint64; uint8 m_bytes[8]; }; uint8 m_low_color[2]; uint8 m_high_color[2]; enum { cNumSelectorBytes = 4 }; uint8 m_selectors[cNumSelectorBytes]; inline void clear() { utils::zero_this(this); } inline uint get_general_bits(uint ofs, uint num) const { CRNLIB_ASSERT((ofs + num) <= 64U); CRNLIB_ASSERT(num && (num < 32U)); return (utils::read_be64(&m_uint64) >> ofs) & ((1UL << num) - 1UL); } inline void set_general_bits(uint ofs, uint num, uint bits) { CRNLIB_ASSERT((ofs + num) <= 64U); CRNLIB_ASSERT(num && (num < 32U)); uint64 x = utils::read_be64(&m_uint64); uint64 msk = ((1ULL << static_cast(num)) - 1ULL) << static_cast(ofs); x &= ~msk; x |= (static_cast(bits) << static_cast(ofs)); utils::write_be64(&m_uint64, x); } inline uint get_byte_bits(uint ofs, uint num) const { CRNLIB_ASSERT((ofs + num) <= 64U); CRNLIB_ASSERT(num && (num <= 8U)); CRNLIB_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3)); const uint byte_ofs = 7 - (ofs >> 3); const uint byte_bit_ofs = ofs & 7; return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1); } inline void set_byte_bits(uint ofs, uint num, uint bits) { CRNLIB_ASSERT((ofs + num) <= 64U); CRNLIB_ASSERT(num && (num < 32U)); CRNLIB_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3)); CRNLIB_ASSERT(bits < (1U << num)); const uint byte_ofs = 7 - (ofs >> 3); const uint byte_bit_ofs = ofs & 7; const uint mask = (1 << num) - 1; m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs); m_bytes[byte_ofs] |= (bits << byte_bit_ofs); } // false = left/right subblocks // true = upper/lower subblocks inline bool get_flip_bit() const { return (m_bytes[3] & 1) != 0; } inline void set_flip_bit(bool flip) { m_bytes[3] &= ~1; m_bytes[3] |= static_cast(flip); } inline bool get_diff_bit() const { return (m_bytes[3] & 2) != 0; } inline void set_diff_bit(bool diff) { m_bytes[3] &= ~2; m_bytes[3] |= (static_cast(diff) << 1); } // Returns intensity modifier table (0-7) used by subblock subblock_id. // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2) inline uint get_inten_table(uint subblock_id) const { CRNLIB_ASSERT(subblock_id < 2); const uint ofs = subblock_id ? 2 : 5; return (m_bytes[3] >> ofs) & 7; } // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1) inline void set_inten_table(uint subblock_id, uint t) { CRNLIB_ASSERT(subblock_id < 2); CRNLIB_ASSERT(t < 8); const uint ofs = subblock_id ? 2 : 5; m_bytes[3] &= ~(7 << ofs); m_bytes[3] |= (t << ofs); } // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. inline uint get_selector(uint x, uint y) const { CRNLIB_ASSERT((x | y) < 4); const uint bit_index = x * 4 + y; const uint byte_bit_ofs = bit_index & 7; const uint8* p = &m_bytes[7 - (bit_index >> 3)]; const uint lsb = (p[0] >> byte_bit_ofs) & 1; const uint msb = (p[-2] >> byte_bit_ofs) & 1; const uint val = lsb | (msb << 1); return g_etc1_to_selector_index[val]; } // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables. inline void set_selector(uint x, uint y, uint val) { CRNLIB_ASSERT((x | y | val) < 4); const uint bit_index = x * 4 + y; uint8* p = &m_bytes[7 - (bit_index >> 3)]; const uint byte_bit_ofs = bit_index & 7; const uint mask = 1 << byte_bit_ofs; const uint etc1_val = g_selector_index_to_etc1[val]; const uint lsb = etc1_val & 1; const uint msb = etc1_val >> 1; p[0] &= ~mask; p[0] |= (lsb << byte_bit_ofs); p[-2] &= ~mask; p[-2] |= (msb << byte_bit_ofs); } inline void set_base4_color(uint idx, uint16 c) { if (idx) { set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15); set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15); set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15); } else { set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15); set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15); set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15); } } inline uint16 get_base4_color(uint idx) const { uint r, g, b; if (idx) { r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4); g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4); b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4); } else { r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4); g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4); b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4); } return static_cast(b | (g << 4U) | (r << 8U)); } inline void set_base5_color(uint16 c) { set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31); set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31); set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31); } inline uint16 get_base5_color() const { const uint r = get_byte_bits(cETC1BaseColor5RBitOffset, 5); const uint g = get_byte_bits(cETC1BaseColor5GBitOffset, 5); const uint b = get_byte_bits(cETC1BaseColor5BBitOffset, 5); return static_cast(b | (g << 5U) | (r << 10U)); } void set_delta3_color(uint16 c) { set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7); set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7); set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7); } inline uint16 get_delta3_color() const { const uint r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3); const uint g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3); const uint b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); return static_cast(b | (g << 3U) | (r << 6U)); } // Base color 5 static uint16 pack_color5(const color_quad_u8& color, bool scaled, uint bias = 127U); static uint16 pack_color5(uint r, uint g, uint b, bool scaled, uint bias = 127U); static color_quad_u8 unpack_color5(uint16 packed_color5, bool scaled, uint alpha = 255U); static void unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled); static bool unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U); static bool unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U); // Delta color 3 // Inputs range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) static uint16 pack_delta3(const color_quad_i16& color); static uint16 pack_delta3(int r, int g, int b); // Results range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) static color_quad_i16 unpack_delta3(uint16 packed_delta3); static void unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3); // Abs color 4 static uint16 pack_color4(const color_quad_u8& color, bool scaled, uint bias = 127U); static uint16 pack_color4(uint r, uint g, uint b, bool scaled, uint bias = 127U); static color_quad_u8 unpack_color4(uint16 packed_color4, bool scaled, uint alpha = 255U); static void unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled); // subblock colors static void get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx); static bool get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx); static void get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx); static inline void unscaled_to_scaled_color(color_quad_u8& dst, const color_quad_u8& src, bool color4) { if (color4) { dst.r = src.r | (src.r << 4); dst.g = src.g | (src.g << 4); dst.b = src.b | (src.b << 4); } else { dst.r = (src.r >> 2) | (src.r << 3); dst.g = (src.g >> 2) | (src.g << 3); dst.b = (src.b >> 2) | (src.b << 3); } dst.a = src.a; } }; CRNLIB_DEFINE_BITWISE_COPYABLE(etc1_block); // Returns false if the block is invalid (it will still be unpacked with clamping). bool unpack_etc1(const etc1_block& block, color_quad_u8* pDst, bool preserve_alpha = false); enum crn_etc_quality { cCRNETCQualityFast, cCRNETCQualityMedium, cCRNETCQualitySlow, cCRNETCQualityTotal, cCRNETCQualityForceDWORD = 0xFFFFFFFF }; struct crn_etc1_pack_params { crn_etc_quality m_quality; bool m_perceptual; bool m_dithering; inline crn_etc1_pack_params() { clear(); } void clear() { m_quality = cCRNETCQualitySlow; m_perceptual = true; m_dithering = false; } }; struct etc1_solution_coordinates { inline etc1_solution_coordinates() : m_unscaled_color(0, 0, 0, 0), m_inten_table(0), m_color4(false) { } inline etc1_solution_coordinates(uint r, uint g, uint b, uint inten_table, bool color4) : m_unscaled_color(r, g, b, 255), m_inten_table(inten_table), m_color4(color4) { } inline etc1_solution_coordinates(const color_quad_u8& c, uint inten_table, bool color4) : m_unscaled_color(c), m_inten_table(inten_table), m_color4(color4) { } inline etc1_solution_coordinates(const etc1_solution_coordinates& other) { *this = other; } inline etc1_solution_coordinates& operator=(const etc1_solution_coordinates& rhs) { m_unscaled_color = rhs.m_unscaled_color; m_inten_table = rhs.m_inten_table; m_color4 = rhs.m_color4; return *this; } inline void clear() { m_unscaled_color.clear(); m_inten_table = 0; m_color4 = false; } inline color_quad_u8 get_scaled_color() const { int br, bg, bb; if (m_color4) { br = m_unscaled_color.r | (m_unscaled_color.r << 4); bg = m_unscaled_color.g | (m_unscaled_color.g << 4); bb = m_unscaled_color.b | (m_unscaled_color.b << 4); } else { br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); } return color_quad_u8(br, bg, bb); } inline void get_block_colors(color_quad_u8* pBlock_colors) { int br, bg, bb; if (m_color4) { br = m_unscaled_color.r | (m_unscaled_color.r << 4); bg = m_unscaled_color.g | (m_unscaled_color.g << 4); bb = m_unscaled_color.b | (m_unscaled_color.b << 4); } else { br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); } const int* pInten_table = g_etc1_inten_tables[m_inten_table]; pBlock_colors[0].set(br + pInten_table[0], bg + pInten_table[0], bb + pInten_table[0]); pBlock_colors[1].set(br + pInten_table[1], bg + pInten_table[1], bb + pInten_table[1]); pBlock_colors[2].set(br + pInten_table[2], bg + pInten_table[2], bb + pInten_table[2]); pBlock_colors[3].set(br + pInten_table[3], bg + pInten_table[3], bb + pInten_table[3]); } color_quad_u8 m_unscaled_color; uint m_inten_table; bool m_color4; }; class etc1_optimizer { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(etc1_optimizer); public: etc1_optimizer() { clear(); } void clear() { m_pParams = NULL; m_pResult = NULL; m_pSorted_luma = NULL; m_pSorted_luma_indices = NULL; } struct params : crn_etc1_pack_params { params() { clear(); } params(const crn_etc1_pack_params& base_params) : crn_etc1_pack_params(base_params) { clear_optimizer_params(); } void clear() { crn_etc1_pack_params::clear(); clear_optimizer_params(); } void clear_optimizer_params() { m_num_src_pixels = 0; m_pSrc_pixels = 0; m_use_color4 = false; static const int s_default_scan_delta[] = {0}; m_pScan_deltas = s_default_scan_delta; m_scan_delta_size = 1; m_base_color5.clear(); m_constrain_against_base_color5 = false; } uint m_num_src_pixels; const color_quad_u8* m_pSrc_pixels; bool m_use_color4; const int* m_pScan_deltas; uint m_scan_delta_size; color_quad_u8 m_base_color5; bool m_constrain_against_base_color5; }; struct results { uint64 m_error; color_quad_u8 m_block_color_unscaled; uint m_block_inten_table; uint m_n; uint8* m_pSelectors; bool m_block_color4; inline results& operator=(const results& rhs) { m_block_color_unscaled = rhs.m_block_color_unscaled; m_block_color4 = rhs.m_block_color4; m_block_inten_table = rhs.m_block_inten_table; m_error = rhs.m_error; CRNLIB_ASSERT(m_n == rhs.m_n); memcpy(m_pSelectors, rhs.m_pSelectors, rhs.m_n); return *this; } }; void init(const params& params, results& result); bool compute(); private: struct potential_solution { potential_solution() : m_coords(), m_error(cUINT64_MAX), m_valid(false) { } etc1_solution_coordinates m_coords; crnlib::vector m_selectors; uint64 m_error; bool m_valid; void clear() { m_coords.clear(); m_selectors.resize(0); m_error = cUINT64_MAX; m_valid = false; } bool are_selectors_all_equal() const { if (m_selectors.empty()) return false; const uint s = m_selectors[0]; for (uint i = 1; i < m_selectors.size(); i++) if (m_selectors[i] != s) return false; return true; } }; const params* m_pParams; results* m_pResult; int m_limit; vec3F m_avg_color; int m_br, m_bg, m_bb; crnlib::vector m_luma; crnlib::vector m_sorted_luma[2]; const uint32* m_pSorted_luma_indices; uint32* m_pSorted_luma; crnlib::vector m_selectors; crnlib::vector m_best_selectors; potential_solution m_best_solution; potential_solution m_trial_solution; crnlib::vector m_temp_selectors; bool evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); }; struct pack_etc1_block_context { etc1_optimizer m_optimizer; }; void pack_etc1_block_init(); uint64 pack_etc1_block(etc1_block& block, const color_quad_u8* pSrc_pixels, crn_etc1_pack_params& pack_params, pack_etc1_block_context& context); uint64 pack_etc1s_block(etc1_block& block, const color_quad_u8* pSrc_pixels, crn_etc1_pack_params& pack_params); } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_file_utils.cpp000066400000000000000000000306761503722002600230110ustar00rootroot00000000000000// File: crn_file_utils.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_file_utils.h" #include "crn_strutils.h" #if CRNLIB_USE_WIN32_API #include "crn_winhdr.h" #endif #ifdef WIN32 #include #endif #ifdef __GNUC__ #include #include #include #endif namespace crnlib { #if CRNLIB_USE_WIN32_API bool file_utils::is_read_only(const char* pFilename) { uint32 dst_file_attribs = GetFileAttributesA(pFilename); if (dst_file_attribs == INVALID_FILE_ATTRIBUTES) return false; if (dst_file_attribs & FILE_ATTRIBUTE_READONLY) return true; return false; } bool file_utils::disable_read_only(const char* pFilename) { uint32 dst_file_attribs = GetFileAttributesA(pFilename); if (dst_file_attribs == INVALID_FILE_ATTRIBUTES) return false; if (dst_file_attribs & FILE_ATTRIBUTE_READONLY) { dst_file_attribs &= ~FILE_ATTRIBUTE_READONLY; if (SetFileAttributesA(pFilename, dst_file_attribs)) return true; } return false; } bool file_utils::is_older_than(const char* pSrcFilename, const char* pDstFilename) { WIN32_FILE_ATTRIBUTE_DATA src_file_attribs; const BOOL src_file_exists = GetFileAttributesExA(pSrcFilename, GetFileExInfoStandard, &src_file_attribs); WIN32_FILE_ATTRIBUTE_DATA dst_file_attribs; const BOOL dest_file_exists = GetFileAttributesExA(pDstFilename, GetFileExInfoStandard, &dst_file_attribs); if ((dest_file_exists) && (src_file_exists)) { LONG timeComp = CompareFileTime(&src_file_attribs.ftLastWriteTime, &dst_file_attribs.ftLastWriteTime); if (timeComp < 0) return true; } return false; } bool file_utils::does_file_exist(const char* pFilename) { const DWORD fullAttributes = GetFileAttributesA(pFilename); if (fullAttributes == INVALID_FILE_ATTRIBUTES) return false; if (fullAttributes & FILE_ATTRIBUTE_DIRECTORY) return false; return true; } bool file_utils::does_dir_exist(const char* pDir) { //-- Get the file attributes. DWORD fullAttributes = GetFileAttributesA(pDir); if (fullAttributes == INVALID_FILE_ATTRIBUTES) return false; if (fullAttributes & FILE_ATTRIBUTE_DIRECTORY) return true; return false; } bool file_utils::get_file_size(const char* pFilename, uint64& file_size) { file_size = 0; WIN32_FILE_ATTRIBUTE_DATA attr; if (0 == GetFileAttributesExA(pFilename, GetFileExInfoStandard, &attr)) return false; if (attr.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) return false; file_size = static_cast(attr.nFileSizeLow) | (static_cast(attr.nFileSizeHigh) << 32U); return true; } #elif defined(__GNUC__) bool file_utils::is_read_only(const char* /* pFilename */) { // TODO return false; } bool file_utils::disable_read_only(const char* /* pFilename */) { // TODO return false; } bool file_utils::is_older_than(const char* /* pSrcFilename */, const char* /* pDstFilename */) { // TODO return false; } bool file_utils::does_file_exist(const char* pFilename) { struct stat stat_buf; int result = stat(pFilename, &stat_buf); if (result) return false; if (S_ISREG(stat_buf.st_mode)) return true; return false; } bool file_utils::does_dir_exist(const char* pDir) { struct stat stat_buf; int result = stat(pDir, &stat_buf); if (result) return false; if (S_ISDIR(stat_buf.st_mode) || S_ISLNK(stat_buf.st_mode)) return true; return false; } bool file_utils::get_file_size(const char* pFilename, uint64& file_size) { file_size = 0; struct stat stat_buf; int result = stat(pFilename, &stat_buf); if (result) return false; if (!S_ISREG(stat_buf.st_mode)) return false; file_size = stat_buf.st_size; return true; } #else bool file_utils::is_read_only(const char* pFilename) { return false; } bool file_utils::disable_read_only(const char* pFilename) { pFilename; // TODO return false; } bool file_utils::is_older_than(const char* pSrcFilename, const char* pDstFilename) { return false; } bool file_utils::does_file_exist(const char* pFilename) { FILE* pFile; crn_fopen(&pFile, pFilename, "rb"); if (!pFile) return false; fclose(pFile); return true; } bool file_utils::does_dir_exist(const char* pDir) { return false; } bool file_utils::get_file_size(const char* pFilename, uint64& file_size) { FILE* pFile; crn_fopen(&pFile, pFilename, "rb"); if (!pFile) return false; crn_fseek(pFile, 0, SEEK_END); file_size = crn_ftell(pFile); fclose(pFile); return true; } #endif bool file_utils::get_file_size(const char* pFilename, uint32& file_size) { uint64 file_size64; if (!get_file_size(pFilename, file_size64)) { file_size = 0; return false; } if (file_size64 > cUINT32_MAX) file_size64 = cUINT32_MAX; file_size = static_cast(file_size64); return true; } bool file_utils::is_path_separator(char c) { #ifdef WIN32 return (c == '/') || (c == '\\'); #else return (c == '/'); #endif } bool file_utils::is_path_or_drive_separator(char c) { #ifdef WIN32 return (c == '/') || (c == '\\') || (c == ':'); #else return (c == '/'); #endif } bool file_utils::is_drive_separator(char c) { #ifdef WIN32 return (c == ':'); #else (void)c; return false; #endif } bool file_utils::split_path(const char* p, dynamic_string* pDrive, dynamic_string* pDir, dynamic_string* pFilename, dynamic_string* pExt) { CRNLIB_ASSERT(p); #ifdef WIN32 char drive_buf[_MAX_DRIVE]; char dir_buf[_MAX_DIR]; char fname_buf[_MAX_FNAME]; char ext_buf[_MAX_EXT]; #ifdef _MSC_VER // Compiling with MSVC errno_t error = _splitpath_s(p, pDrive ? drive_buf : NULL, pDrive ? _MAX_DRIVE : 0, pDir ? dir_buf : NULL, pDir ? _MAX_DIR : 0, pFilename ? fname_buf : NULL, pFilename ? _MAX_FNAME : 0, pExt ? ext_buf : NULL, pExt ? _MAX_EXT : 0); if (error != 0) return false; #else // Compiling with MinGW _splitpath(p, pDrive ? drive_buf : NULL, pDir ? dir_buf : NULL, pFilename ? fname_buf : NULL, pExt ? ext_buf : NULL); #endif if (pDrive) *pDrive = drive_buf; if (pDir) *pDir = dir_buf; if (pFilename) *pFilename = fname_buf; if (pExt) *pExt = ext_buf; #else char dirtmp[1024]; char nametmp[1024]; strcpy_safe(dirtmp, sizeof(dirtmp), p); strcpy_safe(nametmp, sizeof(nametmp), p); if (pDrive) pDrive->clear(); const char* pDirName = dirname(dirtmp); if (!pDirName) return false; if (pDir) { pDir->set(pDirName); if ((!pDir->is_empty()) && (pDir->back() != '/')) pDir->append_char('/'); } const char* pBaseName = basename(nametmp); if (!pBaseName) return false; if (pFilename) { pFilename->set(pBaseName); remove_extension(*pFilename); } if (pExt) { pExt->set(pBaseName); get_extension(*pExt); *pExt = "." + *pExt; } #endif // #ifdef WIN32 return true; } bool file_utils::split_path(const char* p, dynamic_string& path, dynamic_string& filename) { dynamic_string temp_drive, temp_path, temp_ext; if (!split_path(p, &temp_drive, &temp_path, &filename, &temp_ext)) return false; filename += temp_ext; combine_path(path, temp_drive.get_ptr(), temp_path.get_ptr()); return true; } bool file_utils::get_pathname(const char* p, dynamic_string& path) { dynamic_string temp_drive, temp_path; if (!split_path(p, &temp_drive, &temp_path, NULL, NULL)) return false; combine_path(path, temp_drive.get_ptr(), temp_path.get_ptr()); return true; } bool file_utils::get_filename(const char* p, dynamic_string& filename) { dynamic_string temp_ext; if (!split_path(p, NULL, NULL, &filename, &temp_ext)) return false; filename += temp_ext; return true; } void file_utils::combine_path(dynamic_string& dst, const char* pA, const char* pB) { dynamic_string temp(pA); if ((!temp.is_empty()) && (!is_path_separator(pB[0]))) { char c = temp[temp.get_len() - 1]; if (!is_path_separator(c)) temp.append_char(CRNLIB_PATH_SEPERATOR_CHAR); } temp += pB; dst.swap(temp); } void file_utils::combine_path(dynamic_string& dst, const char* pA, const char* pB, const char* pC) { combine_path(dst, pA, pB); combine_path(dst, dst.get_ptr(), pC); } bool file_utils::full_path(dynamic_string& path) { #ifdef WIN32 char buf[1024]; char* p = _fullpath(buf, path.get_ptr(), sizeof(buf)); if (!p) return false; #else char buf[PATH_MAX]; char* p; dynamic_string pn, fn; split_path(path.get_ptr(), pn, fn); if ((fn == ".") || (fn == "..")) { p = realpath(path.get_ptr(), buf); if (!p) return false; path.set(buf); } else { if (pn.is_empty()) pn = "./"; p = realpath(pn.get_ptr(), buf); if (!p) return false; combine_path(path, buf, fn.get_ptr()); } #endif return true; } bool file_utils::get_extension(dynamic_string& filename) { int sep = -1; #ifdef WIN32 sep = filename.find_right('\\'); #endif if (sep < 0) sep = filename.find_right('/'); int dot = filename.find_right('.'); if (dot < sep) { filename.clear(); return false; } filename.right(dot + 1); return true; } bool file_utils::remove_extension(dynamic_string& filename) { int sep = -1; #ifdef WIN32 sep = filename.find_right('\\'); #endif if (sep < 0) sep = filename.find_right('/'); int dot = filename.find_right('.'); if (dot < sep) return false; filename.left(dot); return true; } bool file_utils::create_path(const dynamic_string& fullpath) { #ifdef WIN32 bool got_unc = false; #endif dynamic_string cur_path; const int l = fullpath.get_len(); int n = 0; while (n < l) { const char c = fullpath.get_ptr()[n]; const bool sep = is_path_separator(c); const bool back_sep = is_path_separator(cur_path.back()); const bool is_last_char = (n == (l - 1)); if (((sep) && (!back_sep)) || (is_last_char)) { if ((is_last_char) && (!sep)) cur_path.append_char(c); bool valid = !cur_path.is_empty(); #ifdef WIN32 // reject obvious stuff (drives, beginning of UNC paths): // c:\b\cool // \\machine\blah // \cool\blah if ((cur_path.get_len() == 2) && (cur_path[1] == ':')) valid = false; else if ((cur_path.get_len() >= 2) && (cur_path[0] == '\\') && (cur_path[1] == '\\')) { if (!got_unc) valid = false; got_unc = true; } else if (cur_path == "\\") valid = false; #endif if (cur_path == "/") valid = false; if ((valid) && (cur_path.get_len())) { #ifdef WIN32 _mkdir(cur_path.get_ptr()); #else mkdir(cur_path.get_ptr(), S_IRWXU | S_IRWXG | S_IRWXO); #endif } } cur_path.append_char(c); n++; } return true; } void file_utils::trim_trailing_seperator(dynamic_string& path) { if ((path.get_len()) && (is_path_separator(path.back()))) path.truncate(path.get_len() - 1); } // https://www.codeproject.com/Articles/1088/Wildcard-string-compare-globbing int file_utils::wildcmp(const char* pWild, const char* pString) { const char *cp = NULL, *mp = NULL; while ((*pString) && (*pWild != '*')) { if ((*pWild != *pString) && (*pWild != '?')) return 0; pWild++; pString++; } // Either *pString=='\0' or *pWild='*' here. while (*pString) { if (*pWild == '*') { if (!*++pWild) return 1; mp = pWild; cp = pString + 1; } else if ((*pWild == *pString) || (*pWild == '?')) { pWild++; pString++; } else { pWild = mp; pString = cp++; } } while (*pWild == '*') pWild++; return !*pWild; } bool file_utils::write_buf_to_file(const char* pPath, const void* pData, size_t data_size) { FILE* pFile = NULL; #ifdef _MSC_VER // Compiling with MSVC if (fopen_s(&pFile, pPath, "wb")) return false; #else pFile = fopen(pPath, "wb"); #endif if (!pFile) return false; bool success = fwrite(pData, 1, data_size, pFile) == data_size; fclose(pFile); return success; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_file_utils.h000066400000000000000000000034461503722002600224510ustar00rootroot00000000000000// File: crn_file_utils.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once namespace crnlib { struct file_utils { // Returns true if pSrcFilename is older than pDstFilename static bool is_read_only(const char* pFilename); static bool disable_read_only(const char* pFilename); static bool is_older_than(const char* pSrcFilename, const char* pDstFilename); static bool does_file_exist(const char* pFilename); static bool does_dir_exist(const char* pDir); static bool get_file_size(const char* pFilename, uint64& file_size); static bool get_file_size(const char* pFilename, uint32& file_size); static bool is_path_separator(char c); static bool is_path_or_drive_separator(char c); static bool is_drive_separator(char c); static bool split_path(const char* p, dynamic_string* pDrive, dynamic_string* pDir, dynamic_string* pFilename, dynamic_string* pExt); static bool split_path(const char* p, dynamic_string& path, dynamic_string& filename); static bool get_pathname(const char* p, dynamic_string& path); static bool get_filename(const char* p, dynamic_string& filename); static void combine_path(dynamic_string& dst, const char* pA, const char* pB); static void combine_path(dynamic_string& dst, const char* pA, const char* pB, const char* pC); static bool full_path(dynamic_string& path); static bool get_extension(dynamic_string& filename); static bool remove_extension(dynamic_string& filename); static bool create_path(const dynamic_string& path); static void trim_trailing_seperator(dynamic_string& path); static int wildcmp(const char* pWild, const char* pString); static bool write_buf_to_file(const char* pPath, const void* pData, size_t data_size); }; // struct file_utils } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_find_files.cpp000066400000000000000000000205371503722002600227470ustar00rootroot00000000000000// File: crn_win32_find_files.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_console.h" #include "crn_find_files.h" #include "crn_file_utils.h" #include "crn_strutils.h" #ifdef CRNLIB_USE_WIN32_API #include "crn_winhdr.h" #elif defined(__GNUC__) #include #include #include #endif namespace crnlib { #ifdef CRNLIB_USE_WIN32_API bool find_files::find(const char* pBasepath, const char* pFilespec, uint flags) { m_last_error = S_OK; m_files.resize(0); return find_internal(pBasepath, "", pFilespec, flags, 0); } bool find_files::find(const char* pSpec, uint flags) { dynamic_string find_name(pSpec); if (!file_utils::full_path(find_name)) return false; dynamic_string find_pathname, find_filename; if (!file_utils::split_path(find_name.get_ptr(), find_pathname, find_filename)) return false; return find(find_pathname.get_ptr(), find_filename.get_ptr(), flags); } bool find_files::find_internal(const char* pBasepath, const char* pRelpath, const char* pFilespec, uint flags, int level) { WIN32_FIND_DATAA find_data; dynamic_string filename; dynamic_string_array child_paths; if (flags & cFlagRecursive) { if (strlen(pRelpath)) file_utils::combine_path(filename, pBasepath, pRelpath, "*"); else file_utils::combine_path(filename, pBasepath, "*"); HANDLE handle = FindFirstFileA(filename.get_ptr(), &find_data); if (handle == INVALID_HANDLE_VALUE) { HRESULT hres = GetLastError(); if ((level == 0) && (hres != NO_ERROR) && (hres != ERROR_FILE_NOT_FOUND)) { m_last_error = hres; return false; } } else { do { const bool is_dir = (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; bool skip = !is_dir; if (is_dir) skip = (strcmp(find_data.cFileName, ".") == 0) || (strcmp(find_data.cFileName, "..") == 0); if (find_data.dwFileAttributes & (FILE_ATTRIBUTE_SYSTEM | FILE_ATTRIBUTE_TEMPORARY)) skip = true; if (find_data.dwFileAttributes & FILE_ATTRIBUTE_HIDDEN) { if ((flags & cFlagAllowHidden) == 0) skip = true; } if (!skip) { dynamic_string child_path(find_data.cFileName); if ((!child_path.count_char('?')) && (!child_path.count_char('*'))) child_paths.push_back(child_path); } } while (FindNextFileA(handle, &find_data) != 0); HRESULT hres = GetLastError(); FindClose(handle); handle = INVALID_HANDLE_VALUE; if (hres != ERROR_NO_MORE_FILES) { m_last_error = hres; return false; } } } if (strlen(pRelpath)) file_utils::combine_path(filename, pBasepath, pRelpath, pFilespec); else file_utils::combine_path(filename, pBasepath, pFilespec); HANDLE handle = FindFirstFileA(filename.get_ptr(), &find_data); if (handle == INVALID_HANDLE_VALUE) { HRESULT hres = GetLastError(); if ((level == 0) && (hres != NO_ERROR) && (hres != ERROR_FILE_NOT_FOUND)) { m_last_error = hres; return false; } } else { do { const bool is_dir = (find_data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) != 0; bool skip = false; if (is_dir) skip = (strcmp(find_data.cFileName, ".") == 0) || (strcmp(find_data.cFileName, "..") == 0); if (find_data.dwFileAttributes & (FILE_ATTRIBUTE_SYSTEM | FILE_ATTRIBUTE_TEMPORARY)) skip = true; if (find_data.dwFileAttributes & FILE_ATTRIBUTE_HIDDEN) { if ((flags & cFlagAllowHidden) == 0) skip = true; } if (!skip) { if (((is_dir) && (flags & cFlagAllowDirs)) || ((!is_dir) && (flags & cFlagAllowFiles))) { m_files.resize(m_files.size() + 1); file_desc& file = m_files.back(); file.m_is_dir = is_dir; file.m_base = pBasepath; file.m_name = find_data.cFileName; file.m_rel = pRelpath; if (strlen(pRelpath)) file_utils::combine_path(file.m_fullname, pBasepath, pRelpath, find_data.cFileName); else file_utils::combine_path(file.m_fullname, pBasepath, find_data.cFileName); } } } while (FindNextFileA(handle, &find_data) != 0); HRESULT hres = GetLastError(); FindClose(handle); if (hres != ERROR_NO_MORE_FILES) { m_last_error = hres; return false; } } for (uint i = 0; i < child_paths.size(); i++) { dynamic_string child_path; if (strlen(pRelpath)) file_utils::combine_path(child_path, pRelpath, child_paths[i].get_ptr()); else child_path = child_paths[i]; if (!find_internal(pBasepath, child_path.get_ptr(), pFilespec, flags, level + 1)) return false; } return true; } #elif defined(__GNUC__) bool find_files::find(const char* pBasepath, const char* pFilespec, uint flags) { m_files.resize(0); return find_internal(pBasepath, "", pFilespec, flags, 0); } bool find_files::find(const char* pSpec, uint flags) { dynamic_string find_name(pSpec); if (!file_utils::full_path(find_name)) return false; dynamic_string find_pathname, find_filename; if (!file_utils::split_path(find_name.get_ptr(), find_pathname, find_filename)) return false; return find(find_pathname.get_ptr(), find_filename.get_ptr(), flags); } bool find_files::find_internal(const char* pBasepath, const char* pRelpath, const char* pFilespec, uint flags, int level) { dynamic_string pathname; if (strlen(pRelpath)) file_utils::combine_path(pathname, pBasepath, pRelpath); else pathname = pBasepath; if (!pathname.is_empty()) { char c = pathname.back(); if (c != '/') pathname += "/"; } DIR* dp = opendir(pathname.get_ptr()); if (!dp) return level ? true : false; dynamic_string_array paths; for (;;) { struct dirent* ep = readdir(dp); if (!ep) break; if ((strcmp(ep->d_name, ".") == 0) || (strcmp(ep->d_name, "..") == 0)) continue; bool is_directory = false; bool is_file = false; bool known = false; /* This is the faster implementation as it doesn't require any extra IO as everything is already read. But the standard doesn't require filesystems to set d_type. */ if (ep->d_type != DT_UNKNOWN) { is_directory = (ep->d_type & DT_DIR) != 0; is_file = (ep->d_type & DT_REG) != 0; known = true; } /* Not all filesystems set d_type which is optional, especially network file systems and non-native ones. This is the standard and portable implementation. See https://github.com/DaemonEngine/crunch/issues/37 */ if (!known) { dynamic_string filepath = pathname + dynamic_string("/") + dynamic_string(ep->d_name); struct stat s; if (stat(filepath.get_ptr(), &s) == 0) { is_directory = S_ISDIR(s.st_mode); is_file = S_ISREG(s.st_mode); known = true; } } dynamic_string filename(ep->d_name); dynamic_string fullname = pathname + filename; if (!known || (!is_file && !is_directory)) { console::warning("Ignoring unsupported path: %s", fullname.get_ptr()); continue; } if (is_directory) { if (flags & cFlagRecursive) { paths.push_back(filename); } } if (((is_file) && (flags & cFlagAllowFiles)) || ((is_directory) && (flags & cFlagAllowDirs))) { if (0 == fnmatch(pFilespec, filename.get_ptr(), 0)) { m_files.resize(m_files.size() + 1); file_desc& file = m_files.back(); file.m_is_dir = is_directory; file.m_base = pBasepath; file.m_rel = pRelpath; file.m_name = filename; file.m_fullname = fullname; } } } closedir(dp); dp = NULL; if (flags & cFlagRecursive) { for (uint i = 0; i < paths.size(); i++) { dynamic_string childpath; if (strlen(pRelpath)) file_utils::combine_path(childpath, pRelpath, paths[i].get_ptr()); else childpath = paths[i]; if (!find_internal(pBasepath, childpath.get_ptr(), pFilespec, flags, level + 1)) return false; } } return true; } #else #error Unimplemented #endif } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_find_files.h000066400000000000000000000027561503722002600224170ustar00rootroot00000000000000// File: crn_win32_find_files.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once namespace crnlib { class find_files { public: struct file_desc { inline file_desc() : m_is_dir(false) {} dynamic_string m_fullname; dynamic_string m_base; dynamic_string m_rel; dynamic_string m_name; bool m_is_dir; inline bool operator==(const file_desc& other) const { return m_fullname == other.m_fullname; } inline bool operator<(const file_desc& other) const { return m_fullname < other.m_fullname; } inline operator size_t() const { return static_cast(m_fullname); } }; typedef crnlib::vector file_desc_vec; inline find_files() { m_last_error = 0; // S_OK; } enum flags { cFlagRecursive = 1, cFlagAllowDirs = 2, cFlagAllowFiles = 4, cFlagAllowHidden = 8 }; bool find(const char* pBasepath, const char* pFilespec, uint flags = cFlagAllowFiles); bool find(const char* pSpec, uint flags = cFlagAllowFiles); // An HRESULT under Win32. FIXME: Abstract this better? inline int64 get_last_error() const { return m_last_error; } const file_desc_vec& get_files() const { return m_files; } private: file_desc_vec m_files; // A HRESULT under Win32 int64 m_last_error; bool find_internal(const char* pBasepath, const char* pRelpath, const char* pFilespec, uint flags, int level); }; // class find_files } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_freeimage_image_utils.h000066400000000000000000000073661503722002600246250ustar00rootroot00000000000000// File: crn_freeimage_image_utils.h // See Copyright Notice and license at the end of inc/crnlib.h // Note: This header file requires FreeImage/FreeImagePlus. #include "crn_image_utils.h" #include "freeImagePlus.h" namespace crnlib { namespace freeimage_image_utils { inline bool load_from_file(image_u8& dest, const wchar_t* pFilename, int fi_flag) { fipImage src_image; if (!src_image.loadU(pFilename, fi_flag)) return false; const uint orig_bits_per_pixel = src_image.getBitsPerPixel(); const FREE_IMAGE_COLOR_TYPE orig_color_type = src_image.getColorType(); if (!src_image.convertTo32Bits()) return false; if (src_image.getBitsPerPixel() != 32) return false; uint width = src_image.getWidth(); uint height = src_image.getHeight(); dest.resize(src_image.getWidth(), src_image.getHeight(), src_image.getWidth()); color_quad_u8* pDst = dest.get_ptr(); bool grayscale = true; bool has_alpha = false; for (uint y = 0; y < height; y++) { const BYTE* pSrc = src_image.getScanLine((WORD)(height - 1 - y)); color_quad_u8* pD = pDst; for (uint x = width; x; x--) { color_quad_u8 c; c.r = pSrc[FI_RGBA_RED]; c.g = pSrc[FI_RGBA_GREEN]; c.b = pSrc[FI_RGBA_BLUE]; c.a = pSrc[FI_RGBA_ALPHA]; if (!c.is_grayscale()) grayscale = false; has_alpha |= (c.a < 255); pSrc += 4; *pD++ = c; } pDst += width; } dest.reset_comp_flags(); if (grayscale) dest.set_grayscale(true); dest.set_component_valid(3, has_alpha || (orig_color_type == FIC_RGBALPHA) || (orig_bits_per_pixel == 32)); return true; } const int cSaveLuma = -1; inline bool save_to_grayscale_file(const wchar_t* pFilename, const image_u8& src, int component, int fi_flag) { fipImage dst_image(FIT_BITMAP, (WORD)src.get_width(), (WORD)src.get_height(), 8); RGBQUAD* p = dst_image.getPalette(); for (uint i = 0; i < dst_image.getPaletteSize(); i++) { p[i].rgbRed = (BYTE)i; p[i].rgbGreen = (BYTE)i; p[i].rgbBlue = (BYTE)i; p[i].rgbReserved = 255; } for (uint y = 0; y < src.get_height(); y++) { const color_quad_u8* pSrc = src.get_scanline(y); for (uint x = 0; x < src.get_width(); x++) { BYTE v; if (component == cSaveLuma) v = (BYTE)(*pSrc).get_luma(); else v = (*pSrc)[component]; dst_image.setPixelIndex(x, src.get_height() - 1 - y, &v); pSrc++; } } if (!dst_image.saveU(pFilename, fi_flag)) return false; return true; } inline bool save_to_file(const wchar_t* pFilename, const image_u8& src, int fi_flag, bool ignore_alpha = false) { const bool save_alpha = src.is_component_valid(3); uint bpp = (save_alpha && !ignore_alpha) ? 32 : 24; if (bpp == 32) { dynamic_wstring ext(pFilename); get_extension(ext); if ((ext == L"jpg") || (ext == L"jpeg") || (ext == L"gif") || (ext == L"jp2")) bpp = 24; } if ((bpp == 24) && (src.is_grayscale())) return save_to_grayscale_file(pFilename, src, cSaveLuma, fi_flag); fipImage dst_image(FIT_BITMAP, (WORD)src.get_width(), (WORD)src.get_height(), (WORD)bpp); for (uint y = 0; y < src.get_height(); y++) { for (uint x = 0; x < src.get_width(); x++) { color_quad_u8 c(src(x, y)); RGBQUAD quad; quad.rgbRed = c.r; quad.rgbGreen = c.g; quad.rgbBlue = c.b; if (bpp == 32) quad.rgbReserved = c.a; else quad.rgbReserved = 255; dst_image.setPixelColor(x, src.get_height() - 1 - y, &quad); } } if (!dst_image.saveU(pFilename, fi_flag)) return false; return true; } } // namespace freeimage_image_utils } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_hash.cpp000066400000000000000000000032261503722002600215640ustar00rootroot00000000000000// File: crn_hash.cpp // See Paul Hsieh's page at: http://www.azillionmonkeys.com/qed/hash.html // Also see http://www.concentric.net/~Ttwang/tech/inthash.htm, // http://burtleburtle.net/bob/hash/integer.html #include "crn_core.h" #undef get16bits #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) || defined(_MSC_VER) || defined(__BORLANDC__) || defined(__TURBOC__) #define get16bits(d) (*((const uint16*)(d))) #endif #if !defined(get16bits) #define get16bits(d) ((((uint32)(((const uint8*)(d))[1])) << 8) + (uint32)(((const uint8*)(d))[0])) #endif namespace crnlib { uint32 fast_hash(const void* p, int len) { const char* data = static_cast(p); uint32 hash = len, tmp; int rem; if (len <= 0 || data == NULL) return 0; rem = len & 3; len >>= 2; /* Main loop */ for (; len > 0; len--) { hash += get16bits(data); tmp = (get16bits(data + 2) << 11) ^ hash; hash = (hash << 16) ^ tmp; data += 2 * sizeof(uint16); hash += hash >> 11; } /* Handle end cases */ switch (rem) { case 3: hash += get16bits(data); hash ^= hash << 16; hash ^= data[sizeof(uint16)] << 18; hash += hash >> 11; break; case 2: hash += get16bits(data); hash ^= hash << 11; hash += hash >> 17; break; case 1: hash += *data; hash ^= hash << 10; hash += hash >> 1; } /* Force "avalanching" of final 127 bits */ hash ^= hash << 3; hash += hash >> 5; hash ^= hash << 4; hash += hash >> 17; hash ^= hash << 25; hash += hash >> 6; return hash; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_hash.h000066400000000000000000000013451503722002600212310ustar00rootroot00000000000000// File: crn_hash.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once namespace crnlib { uint32 fast_hash(const void* p, int len); // 4-byte integer hash, full avalanche inline uint32 bitmix32c(uint32 a) { a = (a + 0x7ed55d16) + (a << 12); a = (a ^ 0xc761c23c) ^ (a >> 19); a = (a + 0x165667b1) + (a << 5); a = (a + 0xd3a2646c) ^ (a << 9); a = (a + 0xfd7046c5) + (a << 3); a = (a ^ 0xb55a4f09) ^ (a >> 16); return a; } // 4-byte integer hash, full avalanche, no constants inline uint32 bitmix32(uint32 a) { a -= (a << 6); a ^= (a >> 17); a -= (a << 9); a ^= (a << 4); a -= (a << 3); a ^= (a << 10); a ^= (a >> 15); return a; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_hash_map.cpp000066400000000000000000000074621503722002600224270ustar00rootroot00000000000000// File: crn_hash_map.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_hash_map.h" #include "crn_rand.h" namespace crnlib { #if 0 class counted_obj { public: counted_obj(uint v = 0) : m_val(v) { m_count++; } counted_obj(const counted_obj& obj) : m_val(obj.m_val) { m_count++; } ~counted_obj() { CRNLIB_ASSERT(m_count > 0); m_count--; } static uint m_count; uint m_val; operator size_t() const { return m_val; } bool operator== (const counted_obj& rhs) const { return m_val == rhs.m_val; } bool operator== (const uint rhs) const { return m_val == rhs; } }; uint counted_obj::m_count; void hash_map_test() { random r0, r1; uint seed = 0; for ( ; ; ) { seed++; typedef crnlib::hash_map my_hash_map; my_hash_map m; const uint n = r0.irand(1, 100000); printf("%u\n", n); r1.seed(seed); crnlib::vector q; uint count = 0; for (uint i = 0; i < n; i++) { uint v = r1.urand32() & 0x7FFFFFFF; my_hash_map::insert_result res = m.insert(counted_obj(v), counted_obj(v ^ 0xdeadbeef)); if (res.second) { count++; q.push_back(v); } } CRNLIB_VERIFY(m.size() == count); r1.seed(seed); my_hash_map cm(m); m.clear(); m = cm; cm.reset(); for (uint i = 0; i < n; i++) { uint v = r1.urand32() & 0x7FFFFFFF; my_hash_map::const_iterator it = m.find(counted_obj(v)); CRNLIB_VERIFY(it != m.end()); CRNLIB_VERIFY(it->first == v); CRNLIB_VERIFY(it->second == (v ^ 0xdeadbeef)); } for (uint t = 0; t < 2; t++) { const uint nd = r0.irand(1, q.size() + 1); for (uint i = 0; i < nd; i++) { uint p = r0.irand(0, q.size()); int k = q[p]; if (k >= 0) { q[p] = -k - 1; bool s = m.erase(counted_obj(k)); CRNLIB_VERIFY(s); } } typedef crnlib::hash_map uint_hash_set; uint_hash_set s; for (uint i = 0; i < q.size(); i++) { int v = q[i]; if (v >= 0) { my_hash_map::const_iterator it = m.find(counted_obj(v)); CRNLIB_VERIFY(it != m.end()); CRNLIB_VERIFY(it->first == (uint)v); CRNLIB_VERIFY(it->second == ((uint)v ^ 0xdeadbeef)); s.insert(v); } else { my_hash_map::const_iterator it = m.find(counted_obj(-v - 1)); CRNLIB_VERIFY(it == m.end()); } } uint found_count = 0; for (my_hash_map::const_iterator it = m.begin(); it != m.end(); ++it) { CRNLIB_VERIFY(it->second == ((uint)it->first ^ 0xdeadbeef)); uint_hash_set::const_iterator fit(s.find((uint)it->first)); CRNLIB_VERIFY(fit != s.end()); CRNLIB_VERIFY(fit->first == it->first); found_count++; } CRNLIB_VERIFY(found_count == s.size()); } CRNLIB_VERIFY(counted_obj::m_count == m.size() * 2); } } #endif } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_hash_map.h000066400000000000000000000474711503722002600221000ustar00rootroot00000000000000// File: crn_hash_map.h // See Copyright Notice and license at the end of inc/crnlib.h // // Notes: // stl-like hash map/hash set, with predictable performance across platforms/compilers/C run times/etc. // Hash function ref: https://web.archive.org/web/20160420203106/http://brpreiss.com/books/opus4/html/page215.html // Compared for performance against VC9's std::hash_map. // Linear probing, auto resizes on ~50% load factor. // Uses Knuth's multiplicative method (Fibonacci hashing). #pragma once #include "crn_sparse_array.h" #include "crn_sparse_bit_array.h" #include "crn_hash.h" namespace crnlib { template struct hasher { inline size_t operator()(const T& key) const { return static_cast(key); } }; template struct bit_hasher { inline size_t operator()(const T& key) const { return static_cast(fast_hash(&key, sizeof(key))); } }; template struct equal_to { inline bool operator()(const T& a, const T& b) const { return a == b; } }; // Important: The Hasher and Equals objects must be bitwise movable! template , typename Equals = equal_to > class hash_map { friend class iterator; friend class const_iterator; enum state { cStateInvalid = 0, cStateValid = 1 }; enum { cMinHashSize = 4U }; public: typedef hash_map hash_map_type; typedef std::pair value_type; typedef Key key_type; typedef Value referent_type; typedef Hasher hasher_type; typedef Equals equals_type; hash_map() : m_hash_shift(32), m_num_valid(0), m_grow_threshold(0) { } hash_map(const hash_map& other) : m_values(other.m_values), m_hash_shift(other.m_hash_shift), m_hasher(other.m_hasher), m_equals(other.m_equals), m_num_valid(other.m_num_valid), m_grow_threshold(other.m_grow_threshold) { } hash_map& operator=(const hash_map& other) { if (this == &other) return *this; clear(); m_values = other.m_values; m_hash_shift = other.m_hash_shift; m_num_valid = other.m_num_valid; m_grow_threshold = other.m_grow_threshold; m_hasher = other.m_hasher; m_equals = other.m_equals; return *this; } inline ~hash_map() { clear(); } const Equals& get_equals() const { return m_equals; } Equals& get_equals() { return m_equals; } void set_equals(const Equals& equals) { m_equals = equals; } const Hasher& get_hasher() const { return m_hasher; } Hasher& get_hasher() { return m_hasher; } void set_hasher(const Hasher& hasher) { m_hasher = hasher; } inline void clear() { if (!m_values.empty()) { if (CRNLIB_HAS_DESTRUCTOR(Key) || CRNLIB_HAS_DESTRUCTOR(Value)) { node* p = &get_node(0); node* p_end = p + m_values.size(); uint num_remaining = m_num_valid; while (p != p_end) { if (p->state) { destruct_value_type(p); num_remaining--; if (!num_remaining) break; } p++; } } m_values.clear_no_destruction(); m_hash_shift = 32; m_num_valid = 0; m_grow_threshold = 0; } } inline void reset() { if (!m_num_valid) return; if (CRNLIB_HAS_DESTRUCTOR(Key) || CRNLIB_HAS_DESTRUCTOR(Value)) { node* p = &get_node(0); node* p_end = p + m_values.size(); uint num_remaining = m_num_valid; while (p != p_end) { if (p->state) { destruct_value_type(p); p->state = cStateInvalid; num_remaining--; if (!num_remaining) break; } p++; } } else if (sizeof(node) <= 32) { memset(&m_values[0], 0, m_values.size_in_bytes()); } else { node* p = &get_node(0); node* p_end = p + m_values.size(); uint num_remaining = m_num_valid; while (p != p_end) { if (p->state) { p->state = cStateInvalid; num_remaining--; if (!num_remaining) break; } p++; } } m_num_valid = 0; } inline uint size() { return m_num_valid; } inline uint get_table_size() { return m_values.size(); } inline bool empty() { return !m_num_valid; } inline void reserve(uint new_capacity) { uint new_hash_size = math::maximum(1U, new_capacity); new_hash_size = new_hash_size * 2U; if (!math::is_power_of_2(new_hash_size)) new_hash_size = math::next_pow2(new_hash_size); new_hash_size = math::maximum(cMinHashSize, new_hash_size); if (new_hash_size > m_values.size()) rehash(new_hash_size); } class const_iterator; class iterator { friend class hash_map; friend class hash_map::const_iterator; public: inline iterator() : m_pTable(NULL), m_index(0) {} inline iterator(hash_map_type& table, uint index) : m_pTable(&table), m_index(index) {} inline iterator(const iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) {} inline iterator& operator=(const iterator& other) { m_pTable = other.m_pTable; m_index = other.m_index; return *this; } // post-increment inline iterator operator++(int) { iterator result(*this); ++*this; return result; } // pre-increment inline iterator& operator++() { probe(); return *this; } inline value_type& operator*() const { return *get_cur(); } inline value_type* operator->() const { return get_cur(); } inline bool operator==(const iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } inline bool operator!=(const iterator& b) const { return !(*this == b); } inline bool operator==(const const_iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } inline bool operator!=(const const_iterator& b) const { return !(*this == b); } private: hash_map_type* m_pTable; uint m_index; inline value_type* get_cur() const { CRNLIB_ASSERT(m_pTable && (m_index < m_pTable->m_values.size())); CRNLIB_ASSERT(m_pTable->get_node_state(m_index) == cStateValid); return &m_pTable->get_node(m_index); } inline void probe() { CRNLIB_ASSERT(m_pTable); m_index = m_pTable->find_next(m_index); } }; class const_iterator { friend class hash_map; friend class hash_map::iterator; public: inline const_iterator() : m_pTable(NULL), m_index(0) {} inline const_iterator(const hash_map_type& table, uint index) : m_pTable(&table), m_index(index) {} inline const_iterator(const iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) {} inline const_iterator(const const_iterator& other) : m_pTable(other.m_pTable), m_index(other.m_index) {} inline const_iterator& operator=(const const_iterator& other) { m_pTable = other.m_pTable; m_index = other.m_index; return *this; } inline const_iterator& operator=(const iterator& other) { m_pTable = other.m_pTable; m_index = other.m_index; return *this; } // post-increment inline const_iterator operator++(int) { const_iterator result(*this); ++*this; return result; } // pre-increment inline const_iterator& operator++() { probe(); return *this; } inline const value_type& operator*() const { return *get_cur(); } inline const value_type* operator->() const { return get_cur(); } inline bool operator==(const const_iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } inline bool operator!=(const const_iterator& b) const { return !(*this == b); } inline bool operator==(const iterator& b) const { return (m_pTable == b.m_pTable) && (m_index == b.m_index); } inline bool operator!=(const iterator& b) const { return !(*this == b); } private: const hash_map_type* m_pTable; uint m_index; inline const value_type* get_cur() const { CRNLIB_ASSERT(m_pTable && (m_index < m_pTable->m_values.size())); CRNLIB_ASSERT(m_pTable->get_node_state(m_index) == cStateValid); return &m_pTable->get_node(m_index); } inline void probe() { CRNLIB_ASSERT(m_pTable); m_index = m_pTable->find_next(m_index); } }; inline const_iterator begin() const { if (!m_num_valid) return end(); return const_iterator(*this, find_next(-1)); } inline const_iterator end() const { return const_iterator(*this, m_values.size()); } inline iterator begin() { if (!m_num_valid) return end(); return iterator(*this, find_next(-1)); } inline iterator end() { return iterator(*this, m_values.size()); } // insert_result.first will always point to inserted key/value (or the already existing key/value). // insert_resutt.second will be true if a new key/value was inserted, or false if the key already existed (in which case first will point to the already existing value). typedef std::pair insert_result; inline insert_result insert(const Key& k, const Value& v = Value()) { insert_result result; if (!insert_no_grow(result, k, v)) { grow(); // This must succeed. if (!insert_no_grow(result, k, v)) { CRNLIB_FAIL("insert() failed"); } } return result; } inline insert_result insert(const value_type& v) { return insert(v.first, v.second); } inline const_iterator find(const Key& k) const { return const_iterator(*this, find_index(k)); } inline iterator find(const Key& k) { return iterator(*this, find_index(k)); } inline bool erase(const Key& k) { int i = find_index(k); if (i >= static_cast(m_values.size())) return false; node* pDst = &get_node(i); destruct_value_type(pDst); pDst->state = cStateInvalid; m_num_valid--; for (;;) { int r, j = i; node* pSrc = pDst; do { if (!i) { i = m_values.size() - 1; pSrc = &get_node(i); } else { i--; pSrc--; } if (!pSrc->state) return true; r = hash_key(pSrc->first); } while ((i <= r && r < j) || (r < j && j < i) || (j < i && i <= r)); move_node(pDst, pSrc); pDst = pSrc; } } inline void swap(hash_map_type& other) { m_values.swap(other.m_values); utils::swap(m_hash_shift, other.m_hash_shift); utils::swap(m_num_valid, other.m_num_valid); utils::swap(m_grow_threshold, other.m_grow_threshold); utils::swap(m_hasher, other.m_hasher); utils::swap(m_equals, other.m_equals); } private: struct node : public value_type { uint8 state; }; static inline void construct_value_type(value_type* pDst, const Key& k, const Value& v) { if (CRNLIB_IS_BITWISE_COPYABLE(Key)) memcpy(&pDst->first, &k, sizeof(Key)); else scalar_type::construct(&pDst->first, k); if (CRNLIB_IS_BITWISE_COPYABLE(Value)) memcpy(&pDst->second, &v, sizeof(Value)); else scalar_type::construct(&pDst->second, v); } static inline void construct_value_type(value_type* pDst, const value_type* pSrc) { if ((CRNLIB_IS_BITWISE_COPYABLE(Key)) && (CRNLIB_IS_BITWISE_COPYABLE(Value))) { memcpy(pDst, pSrc, sizeof(value_type)); } else { if (CRNLIB_IS_BITWISE_COPYABLE(Key)) memcpy(&pDst->first, &pSrc->first, sizeof(Key)); else scalar_type::construct(&pDst->first, pSrc->first); if (CRNLIB_IS_BITWISE_COPYABLE(Value)) memcpy(&pDst->second, &pSrc->second, sizeof(Value)); else scalar_type::construct(&pDst->second, pSrc->second); } } static inline void destruct_value_type(value_type* p) { scalar_type::destruct(&p->first); scalar_type::destruct(&p->second); } // Moves *pSrc to *pDst efficiently. // pDst should NOT be constructed on entry. static inline void move_node(node* pDst, node* pSrc) { CRNLIB_ASSERT(!pDst->state); if (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(Key) && CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(Value)) { memcpy(pDst, pSrc, sizeof(node)); } else { if (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(Key)) memcpy(&pDst->first, &pSrc->first, sizeof(Key)); else { scalar_type::construct(&pDst->first, pSrc->first); scalar_type::destruct(&pSrc->first); } if (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(Value)) memcpy(&pDst->second, &pSrc->second, sizeof(Value)); else { scalar_type::construct(&pDst->second, pSrc->second); scalar_type::destruct(&pSrc->second); } pDst->state = cStateValid; } pSrc->state = cStateInvalid; } struct raw_node { inline raw_node() { node* p = reinterpret_cast(this); p->state = cStateInvalid; } inline ~raw_node() { node* p = reinterpret_cast(this); if (p->state) hash_map_type::destruct_value_type(p); } inline raw_node(const raw_node& other) { node* pDst = reinterpret_cast(this); const node* pSrc = reinterpret_cast(&other); if (pSrc->state) { hash_map_type::construct_value_type(pDst, pSrc); pDst->state = cStateValid; } else pDst->state = cStateInvalid; } inline raw_node& operator=(const raw_node& rhs) { if (this == &rhs) return *this; node* pDst = reinterpret_cast(this); const node* pSrc = reinterpret_cast(&rhs); if (pSrc->state) { if (pDst->state) { pDst->first = pSrc->first; pDst->second = pSrc->second; } else { hash_map_type::construct_value_type(pDst, pSrc); pDst->state = cStateValid; } } else if (pDst->state) { hash_map_type::destruct_value_type(pDst); pDst->state = cStateInvalid; } return *this; } uint8 m_bits[sizeof(node)]; }; typedef crnlib::vector node_vector; node_vector m_values; uint m_hash_shift; Hasher m_hasher; Equals m_equals; uint m_num_valid; uint m_grow_threshold; inline int hash_key(const Key& k) const { CRNLIB_ASSERT((1U << (32U - m_hash_shift)) == m_values.size()); uint hash = static_cast(m_hasher(k)); // Fibonacci hashing hash = (2654435769U * hash) >> m_hash_shift; CRNLIB_ASSERT(hash < m_values.size()); return hash; } inline const node& get_node(uint index) const { return *reinterpret_cast(&m_values[index]); } inline node& get_node(uint index) { return *reinterpret_cast(&m_values[index]); } inline state get_node_state(uint index) const { return static_cast(get_node(index).state); } inline void set_node_state(uint index, bool valid) { get_node(index).state = valid; } inline void grow() { rehash(math::maximum(cMinHashSize, m_values.size() * 2U)); } inline void rehash(uint new_hash_size) { CRNLIB_ASSERT(new_hash_size >= m_num_valid); CRNLIB_ASSERT(math::is_power_of_2(new_hash_size)); if ((new_hash_size < m_num_valid) || (new_hash_size == m_values.size())) return; hash_map new_map; new_map.m_values.resize(new_hash_size); new_map.m_hash_shift = 32U - math::floor_log2i(new_hash_size); CRNLIB_ASSERT(new_hash_size == (1U << (32U - new_map.m_hash_shift))); new_map.m_grow_threshold = UINT_MAX; node* pNode = reinterpret_cast(m_values.begin()); node* pNode_end = pNode + m_values.size(); while (pNode != pNode_end) { if (pNode->state) { new_map.move_into(pNode); if (new_map.m_num_valid == m_num_valid) break; } pNode++; } new_map.m_grow_threshold = (new_hash_size + 1U) >> 1U; m_values.clear_no_destruction(); m_hash_shift = 32; swap(new_map); } inline uint find_next(int index) const { index++; if (index >= static_cast(m_values.size())) return index; const node* pNode = &get_node(index); for (;;) { if (pNode->state) break; if (++index >= static_cast(m_values.size())) break; pNode++; } return index; } inline uint find_index(const Key& k) const { if (m_num_valid) { int index = hash_key(k); const node* pNode = &get_node(index); if (pNode->state) { if (m_equals(pNode->first, k)) return index; const int orig_index = index; for (;;) { if (!index) { index = m_values.size() - 1; pNode = &get_node(index); } else { index--; pNode--; } if (index == orig_index) break; if (!pNode->state) break; if (m_equals(pNode->first, k)) return index; } } } return m_values.size(); } inline bool insert_no_grow(insert_result& result, const Key& k, const Value& v = Value()) { if (!m_values.size()) return false; int index = hash_key(k); node* pNode = &get_node(index); if (pNode->state) { if (m_equals(pNode->first, k)) { result.first = iterator(*this, index); result.second = false; return true; } const int orig_index = index; for (;;) { if (!index) { index = m_values.size() - 1; pNode = &get_node(index); } else { index--; pNode--; } if (orig_index == index) return false; if (!pNode->state) break; if (m_equals(pNode->first, k)) { result.first = iterator(*this, index); result.second = false; return true; } } } if (m_num_valid >= m_grow_threshold) return false; construct_value_type(pNode, k, v); pNode->state = cStateValid; m_num_valid++; CRNLIB_ASSERT(m_num_valid <= m_values.size()); result.first = iterator(*this, index); result.second = true; return true; } inline void move_into(node* pNode) { int index = hash_key(pNode->first); node* pDst_node = &get_node(index); if (pDst_node->state) { const int orig_index = index; for (;;) { if (!index) { index = m_values.size() - 1; pDst_node = &get_node(index); } else { index--; pDst_node--; } if (index == orig_index) { CRNLIB_ASSERT(false); return; } if (!pDst_node->state) break; } } move_node(pDst_node, pNode); m_num_valid++; } }; template struct bitwise_movable > { enum { cFlag = true }; }; template inline void swap(hash_map& a, hash_map& b) { a.swap(b); } extern void hash_map_test(); } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_helpers.h000066400000000000000000000026141503722002600217500ustar00rootroot00000000000000// File: crn_helpers.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #define CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(c) \ c(const c&); \ c& operator=(const c&) namespace crnlib { namespace helpers { template struct rel_ops { friend bool operator!=(const T& x, const T& y) { return (!(x == y)); } friend bool operator>(const T& x, const T& y) { return (y < x); } friend bool operator<=(const T& x, const T& y) { return (!(y < x)); } friend bool operator>=(const T& x, const T& y) { return (!(x < y)); } }; template inline T* construct(T* p) { return new (static_cast(p)) T; } template inline T* construct(T* p, const U& init) { return new (static_cast(p)) T(init); } template inline void construct_array(T* p, uint n) { T* q = p + n; for (; p != q; ++p) new (static_cast(p)) T; } template inline void construct_array(T* p, uint n, const U& init) { T* q = p + n; for (; p != q; ++p) new (static_cast(p)) T(init); } template inline void destruct(T* p) { (void)p; p->~T(); } template inline void destruct_array(T* p, uint n) { T* q = p + n; for (; p != q; ++p) p->~T(); } } // namespace helpers } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_huffman_codes.cpp000066400000000000000000000222141503722002600234400ustar00rootroot00000000000000// File: crn_huffman_codes.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_huffman_codes.h" namespace crnlib { struct sym_freq { uint m_freq; uint16 m_left; uint16 m_right; inline bool operator<(const sym_freq& other) const { return m_freq > other.m_freq; } }; static inline sym_freq* radix_sort_syms(uint num_syms, sym_freq* syms0, sym_freq* syms1) { const uint cMaxPasses = 2; uint hist[256 * cMaxPasses]; memset(hist, 0, sizeof(hist[0]) * 256 * cMaxPasses); sym_freq* p = syms0; sym_freq* q = syms0 + (num_syms >> 1) * 2; for (; p != q; p += 2) { const uint freq0 = p[0].m_freq; const uint freq1 = p[1].m_freq; hist[freq0 & 0xFF]++; hist[256 + ((freq0 >> 8) & 0xFF)]++; hist[freq1 & 0xFF]++; hist[256 + ((freq1 >> 8) & 0xFF)]++; } if (num_syms & 1) { const uint freq = p->m_freq; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; } sym_freq* pCur_syms = syms0; sym_freq* pNew_syms = syms1; for (uint pass = 0; pass < cMaxPasses; pass++) { const uint* pHist = &hist[pass << 8]; uint offsets[256]; uint cur_ofs = 0; for (uint i = 0; i < 256; i += 2) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; offsets[i + 1] = cur_ofs; cur_ofs += pHist[i + 1]; } const uint pass_shift = pass << 3; sym_freq* p = pCur_syms; sym_freq* q = pCur_syms + (num_syms >> 1) * 2; for (; p != q; p += 2) { uint c0 = p[0].m_freq; uint c1 = p[1].m_freq; if (pass) { c0 >>= 8; c1 >>= 8; } c0 &= 0xFF; c1 &= 0xFF; if (c0 == c1) { uint dst_offset0 = offsets[c0]; offsets[c0] = dst_offset0 + 2; pNew_syms[dst_offset0] = p[0]; pNew_syms[dst_offset0 + 1] = p[1]; } else { uint dst_offset0 = offsets[c0]++; uint dst_offset1 = offsets[c1]++; pNew_syms[dst_offset0] = p[0]; pNew_syms[dst_offset1] = p[1]; } } if (num_syms & 1) { uint c = ((p->m_freq) >> pass_shift) & 0xFF; uint dst_offset = offsets[c]; offsets[c] = dst_offset + 1; pNew_syms[dst_offset] = *p; } sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t; } #ifdef CRNLIB_ASSERTS_ENABLED uint prev_freq = 0; for (uint i = 0; i < num_syms; i++) { CRNLIB_ASSERT(!(pCur_syms[i].m_freq < prev_freq)); prev_freq = pCur_syms[i].m_freq; } #endif return pCur_syms; } struct huffman_work_tables { enum { cMaxInternalNodes = cHuffmanMaxSupportedSyms }; sym_freq syms0[cHuffmanMaxSupportedSyms + 1 + cMaxInternalNodes]; sym_freq syms1[cHuffmanMaxSupportedSyms + 1 + cMaxInternalNodes]; uint16 queue[cMaxInternalNodes]; }; void* create_generate_huffman_codes_tables() { return crnlib_new(); } void free_generate_huffman_codes_tables(void* p) { crnlib_delete(static_cast(p)); } #if USE_CALCULATE_MINIMUM_REDUNDANCY /* calculate_minimum_redundancy() written by Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk November 1996. */ static void calculate_minimum_redundancy(int A[], int n) { int root; /* next root node to be used */ int leaf; /* next leaf to be used */ int next; /* next value to be assigned */ int avbl; /* number of available nodes */ int used; /* number of internal nodes */ int dpth; /* current depth of leaves */ /* check for pathological cases */ if (n == 0) { return; } if (n == 1) { A[0] = 0; return; } /* first pass, left to right, setting parent pointers */ A[0] += A[1]; root = 0; leaf = 2; for (next = 1; next < n - 1; next++) { /* select first item for a pairing */ if (leaf >= n || A[root] < A[leaf]) { A[next] = A[root]; A[root++] = next; } else A[next] = A[leaf++]; /* add on the second item */ if (leaf >= n || (root < next && A[root] < A[leaf])) { A[next] += A[root]; A[root++] = next; } else A[next] += A[leaf++]; } /* second pass, right to left, setting internal depths */ A[n - 2] = 0; for (next = n - 3; next >= 0; next--) A[next] = A[A[next]] + 1; /* third pass, right to left, setting leaf depths */ avbl = 1; used = dpth = 0; root = n - 2; next = n - 1; while (avbl > 0) { while (root >= 0 && A[root] == dpth) { used++; root--; } while (avbl > used) { A[next--] = dpth; avbl--; } avbl = 2 * used; dpth++; used = 0; } } #endif bool generate_huffman_codes(void* pContext, uint num_syms, const uint16* pFreq, uint8* pCodesizes, uint& max_code_size, uint& total_freq_ret) { if ((!num_syms) || (num_syms > cHuffmanMaxSupportedSyms)) return false; huffman_work_tables& state = *static_cast(pContext); ; uint max_freq = 0; uint total_freq = 0; uint num_used_syms = 0; for (uint i = 0; i < num_syms; i++) { uint freq = pFreq[i]; if (!freq) pCodesizes[i] = 0; else { total_freq += freq; max_freq = math::maximum(max_freq, freq); sym_freq& sf = state.syms0[num_used_syms]; sf.m_left = (uint16)i; sf.m_right = cUINT16_MAX; sf.m_freq = freq; num_used_syms++; } } total_freq_ret = total_freq; if (num_used_syms == 1) { pCodesizes[state.syms0[0].m_left] = 1; return true; } sym_freq* syms = radix_sort_syms(num_used_syms, state.syms0, state.syms1); #if USE_CALCULATE_MINIMUM_REDUNDANCY int x[cHuffmanMaxSupportedSyms]; for (uint i = 0; i < num_used_syms; i++) x[i] = state.syms0[i].m_freq; calculate_minimum_redundancy(x, num_used_syms); uint max_len = 0; for (uint i = 0; i < num_used_syms; i++) { uint len = x[i]; max_len = math::maximum(len, max_len); pCodesizes[state.syms0[i].m_left] = static_cast(len); } return true; #else // Dummy node sym_freq& sf = state.syms0[num_used_syms]; sf.m_left = cUINT16_MAX; sf.m_right = cUINT16_MAX; sf.m_freq = UINT_MAX; uint next_internal_node = num_used_syms + 1; uint queue_front = 0; uint queue_end = 0; uint next_lowest_sym = 0; uint num_nodes_remaining = num_used_syms; do { uint left_freq = syms[next_lowest_sym].m_freq; uint left_child = next_lowest_sym; if ((queue_end > queue_front) && (syms[state.queue[queue_front]].m_freq < left_freq)) { left_child = state.queue[queue_front]; left_freq = syms[left_child].m_freq; queue_front++; } else next_lowest_sym++; uint right_freq = syms[next_lowest_sym].m_freq; uint right_child = next_lowest_sym; if ((queue_end > queue_front) && (syms[state.queue[queue_front]].m_freq < right_freq)) { right_child = state.queue[queue_front]; right_freq = syms[right_child].m_freq; queue_front++; } else next_lowest_sym++; const uint internal_node_index = next_internal_node; next_internal_node++; CRNLIB_ASSERT(next_internal_node < CRNLIB_ARRAYSIZE(state.syms0)); syms[internal_node_index].m_freq = left_freq + right_freq; syms[internal_node_index].m_left = static_cast(left_child); syms[internal_node_index].m_right = static_cast(right_child); CRNLIB_ASSERT(queue_end < huffman_work_tables::cMaxInternalNodes); state.queue[queue_end] = static_cast(internal_node_index); queue_end++; num_nodes_remaining--; } while (num_nodes_remaining > 1); CRNLIB_ASSERT(next_lowest_sym == num_used_syms); CRNLIB_ASSERT((queue_end - queue_front) == 1); uint cur_node_index = state.queue[queue_front]; uint32* pStack = (syms == state.syms0) ? (uint32*)state.syms1 : (uint32*)state.syms0; uint32* pStack_top = pStack; uint max_level = 0; for (;;) { uint level = cur_node_index >> 16; uint node_index = cur_node_index & 0xFFFF; uint left_child = syms[node_index].m_left; uint right_child = syms[node_index].m_right; uint next_level = (cur_node_index + 0x10000) & 0xFFFF0000; if (left_child < num_used_syms) { max_level = math::maximum(max_level, level); pCodesizes[syms[left_child].m_left] = static_cast(level + 1); if (right_child < num_used_syms) { pCodesizes[syms[right_child].m_left] = static_cast(level + 1); if (pStack == pStack_top) break; cur_node_index = *--pStack; } else { cur_node_index = next_level | right_child; } } else { if (right_child < num_used_syms) { max_level = math::maximum(max_level, level); pCodesizes[syms[right_child].m_left] = static_cast(level + 1); cur_node_index = next_level | left_child; } else { *pStack++ = next_level | left_child; cur_node_index = next_level | right_child; } } } max_code_size = max_level + 1; #endif return true; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_huffman_codes.h000066400000000000000000000006771503722002600231160ustar00rootroot00000000000000// File: crn_huffman_codes.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once namespace crnlib { const uint cHuffmanMaxSupportedSyms = 8192; void* create_generate_huffman_codes_tables(); void free_generate_huffman_codes_tables(void* p); bool generate_huffman_codes(void* pContext, uint num_syms, const uint16* pFreq, uint8* pCodesizes, uint& max_code_size, uint& total_freq_ret); } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_image.h000066400000000000000000000452071503722002600213750ustar00rootroot00000000000000// File: crn_image.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_color.h" #include "crn_vec.h" #include "crn_pixel_format.h" #include "crn_rect.h" namespace crnlib { template class image { public: typedef color_type color_t; typedef crnlib::vector pixel_buf_t; image() : m_width(0), m_height(0), m_pitch(0), m_total(0), m_comp_flags(pixel_format_helpers::cDefaultCompFlags), m_pPixels(NULL) { } // pitch is in PIXELS, not bytes. image(uint width, uint height, uint pitch = UINT_MAX, const color_type& background = color_type::make_black(), uint flags = pixel_format_helpers::cDefaultCompFlags) : m_comp_flags(flags) { CRNLIB_ASSERT((width > 0) && (height > 0)); if (pitch == UINT_MAX) pitch = width; m_pixel_buf.resize(pitch * height); m_width = width; m_height = height; m_pitch = pitch; m_total = m_pitch * m_height; m_pPixels = &m_pixel_buf.front(); set_all(background); } // pitch is in PIXELS, not bytes. image(color_type* pPixels, uint width, uint height, uint pitch = UINT_MAX, uint flags = pixel_format_helpers::cDefaultCompFlags) { alias(pPixels, width, height, pitch, flags); } image& operator=(const image& other) { if (this == &other) return *this; if (other.m_pixel_buf.empty()) { // This doesn't look very safe - let's make a new instance. //m_pixel_buf.clear(); //m_pPixels = other.m_pPixels; const uint total_pixels = other.m_pitch * other.m_height; if ((total_pixels) && (other.m_pPixels)) { m_pixel_buf.resize(total_pixels); m_pixel_buf.insert(0, other.m_pPixels, m_pixel_buf.size()); m_pPixels = &m_pixel_buf.front(); } else { m_pixel_buf.clear(); m_pPixels = NULL; } } else { m_pixel_buf = other.m_pixel_buf; m_pPixels = &m_pixel_buf.front(); } m_width = other.m_width; m_height = other.m_height; m_pitch = other.m_pitch; m_total = other.m_total; m_comp_flags = other.m_comp_flags; return *this; } image(const image& other) : m_width(0), m_height(0), m_pitch(0), m_total(0), m_comp_flags(pixel_format_helpers::cDefaultCompFlags), m_pPixels(NULL) { *this = other; } // pitch is in PIXELS, not bytes. void alias(color_type* pPixels, uint width, uint height, uint pitch = UINT_MAX, uint flags = pixel_format_helpers::cDefaultCompFlags) { m_pixel_buf.clear(); m_pPixels = pPixels; m_width = width; m_height = height; m_pitch = (pitch == UINT_MAX) ? width : pitch; m_total = m_pitch * m_height; m_comp_flags = flags; } // pitch is in PIXELS, not bytes. bool grant_ownership(color_type* pPixels, uint width, uint height, uint pitch = UINT_MAX, uint flags = pixel_format_helpers::cDefaultCompFlags) { if (pitch == UINT_MAX) pitch = width; if ((!pPixels) || (!width) || (!height) || (pitch < width)) { CRNLIB_ASSERT(0); return false; } if (pPixels == get_ptr()) { CRNLIB_ASSERT(0); return false; } clear(); if (!m_pixel_buf.grant_ownership(pPixels, height * pitch, height * pitch)) return false; m_pPixels = pPixels; m_width = width; m_height = height; m_pitch = pitch; m_total = pitch * height; m_comp_flags = flags; return true; } void clear() { m_pPixels = NULL; m_pixel_buf.clear(); m_width = 0; m_height = 0; m_pitch = 0; m_total = 0; m_comp_flags = pixel_format_helpers::cDefaultCompFlags; } inline bool is_valid() const { return m_total > 0; } inline pixel_format_helpers::component_flags get_comp_flags() const { return static_cast(m_comp_flags); } inline void set_comp_flags(pixel_format_helpers::component_flags new_flags) { m_comp_flags = new_flags; } inline void reset_comp_flags() { m_comp_flags = pixel_format_helpers::cDefaultCompFlags; } inline bool is_component_valid(uint index) const { CRNLIB_ASSERT(index < 4U); return utils::is_flag_set(m_comp_flags, index); } inline void set_component_valid(uint index, bool state) { CRNLIB_ASSERT(index < 4U); utils::set_flag(m_comp_flags, index, state); } inline bool has_rgb() const { return is_component_valid(0) || is_component_valid(1) || is_component_valid(2); } inline bool has_alpha() const { return is_component_valid(3); } inline bool is_grayscale() const { return utils::is_bit_set(m_comp_flags, pixel_format_helpers::cCompFlagGrayscale); } inline void set_grayscale(bool state) { utils::set_bit(m_comp_flags, pixel_format_helpers::cCompFlagGrayscale, state); } void set_all(const color_type& c) { for (uint i = 0; i < m_total; i++) m_pPixels[i] = c; } void flip_x() { const uint half_width = m_width / 2; for (uint y = 0; y < m_height; y++) { for (uint x = 0; x < half_width; x++) { color_type c((*this)(x, y)); (*this)(x, y) = (*this)(m_width - 1 - x, y); (*this)(m_width - 1 - x, y) = c; } } } void flip_y() { const uint half_height = m_height / 2; for (uint y = 0; y < half_height; y++) { for (uint x = 0; x < m_width; x++) { color_type c((*this)(x, y)); (*this)(x, y) = (*this)(x, m_height - 1 - y); (*this)(x, m_height - 1 - y) = c; } } } void convert_to_grayscale() { for (uint y = 0; y < m_height; y++) for (uint x = 0; x < m_width; x++) { color_type c((*this)(x, y)); typename color_type::component_t l = static_cast(c.get_luma()); c.r = l; c.g = l; c.b = l; (*this)(x, y) = c; } set_grayscale(true); } void swizzle(uint r, uint g, uint b, uint a) { for (uint y = 0; y < m_height; y++) for (uint x = 0; x < m_width; x++) { const color_type& c = (*this)(x, y); (*this)(x, y) = color_type(c[r], c[g], c[b], c[a]); } } void set_alpha_to_luma() { for (uint y = 0; y < m_height; y++) for (uint x = 0; x < m_width; x++) { color_type c((*this)(x, y)); typename color_type::component_t l = static_cast(c.get_luma()); c.a = l; (*this)(x, y) = c; } set_component_valid(3, true); } bool extract_block(color_type* pDst, uint x, uint y, uint w, uint h, bool flip_xy = false) const { if ((x >= m_width) || (y >= m_height)) { CRNLIB_ASSERT(0); return false; } if (flip_xy) { for (uint y_ofs = 0; y_ofs < h; y_ofs++) for (uint x_ofs = 0; x_ofs < w; x_ofs++) pDst[x_ofs * h + y_ofs] = get_clamped(x_ofs + x, y_ofs + y); // 5/4/12 - this was incorrectly x_ofs * 4 } else if (((x + w) > m_width) || ((y + h) > m_height)) { for (uint y_ofs = 0; y_ofs < h; y_ofs++) for (uint x_ofs = 0; x_ofs < w; x_ofs++) *pDst++ = get_clamped(x_ofs + x, y_ofs + y); } else { const color_type* pSrc = get_scanline(y) + x; for (uint i = h; i; i--) { memcpy(pDst, pSrc, w * sizeof(color_type)); pDst += w; pSrc += m_pitch; } } return true; } // No clipping! void unclipped_fill_box(uint x, uint y, uint w, uint h, const color_type& c) { if (((x + w) > m_width) || ((y + h) > m_height)) { CRNLIB_ASSERT(0); return; } color_type* p = get_scanline(y) + x; for (uint i = h; i; i--) { color_type* q = p; for (uint j = w; j; j--) *q++ = c; p += m_pitch; } } void draw_rect(int x, int y, uint width, uint height, const color_type& c) { draw_line(x, y, x + width - 1, y, c); draw_line(x, y, x, y + height - 1, c); draw_line(x + width - 1, y, x + width - 1, y + height - 1, c); draw_line(x, y + height - 1, x + width - 1, y + height - 1, c); } // No clipping! bool unclipped_blit(uint src_x, uint src_y, uint src_w, uint src_h, uint dst_x, uint dst_y, const image& src) { if ((!is_valid()) || (!src.is_valid())) { CRNLIB_ASSERT(0); return false; } if (((src_x + src_w) > src.get_width()) || ((src_y + src_h) > src.get_height())) { CRNLIB_ASSERT(0); return false; } if (((dst_x + src_w) > get_width()) || ((dst_y + src_h) > get_height())) { CRNLIB_ASSERT(0); return false; } const color_type* pS = &src(src_x, src_y); color_type* pD = &(*this)(dst_x, dst_y); const uint bytes_to_copy = src_w * sizeof(color_type); for (uint i = src_h; i; i--) { memcpy(pD, pS, bytes_to_copy); pS += src.get_pitch(); pD += get_pitch(); } return true; } // With clipping. bool blit(int dst_x, int dst_y, const image& src) { if ((!is_valid()) || (!src.is_valid())) { CRNLIB_ASSERT(0); return false; } int src_x = 0; int src_y = 0; if (dst_x < 0) { src_x = -dst_x; if (src_x >= static_cast(src.get_width())) return false; dst_x = 0; } if (dst_y < 0) { src_y = -dst_y; if (src_y >= static_cast(src.get_height())) return false; dst_y = 0; } if ((dst_x >= (int)m_width) || (dst_y >= (int)m_height)) return false; uint width = math::minimum(m_width - dst_x, src.get_width() - src_x); uint height = math::minimum(m_height - dst_y, src.get_height() - src_y); bool success = unclipped_blit(src_x, src_y, width, height, dst_x, dst_y, src); (void)success; CRNLIB_ASSERT(success); return true; } // With clipping. bool blit(int src_x, int src_y, int src_w, int src_h, int dst_x, int dst_y, const image& src) { if ((!is_valid()) || (!src.is_valid())) { CRNLIB_ASSERT(0); return false; } rect src_rect(src_x, src_y, src_x + src_w, src_y + src_h); if (!src_rect.intersect(src.get_bounds())) return false; rect dst_rect(dst_x, dst_y, dst_x + src_rect.get_width(), dst_y + src_rect.get_height()); if (!dst_rect.intersect(get_bounds())) return false; bool success = unclipped_blit( src_rect.get_left(), src_rect.get_top(), math::minimum(src_rect.get_width(), dst_rect.get_width()), math::minimum(src_rect.get_height(), dst_rect.get_height()), dst_rect.get_left(), dst_rect.get_top(), src); (void)success; CRNLIB_ASSERT(success); return true; } // In-place resize of image dimensions (cropping). bool resize(uint new_width, uint new_height, uint new_pitch = UINT_MAX, const color_type background = color_type::make_black()) { if (new_pitch == UINT_MAX) new_pitch = new_width; if ((new_width == m_width) && (new_height == m_height) && (new_pitch == m_pitch)) return true; if ((!new_width) || (!new_height) || (!new_pitch)) { clear(); return false; } pixel_buf_t existing_pixels; existing_pixels.swap(m_pixel_buf); if (!m_pixel_buf.try_resize(new_height * new_pitch)) { clear(); return false; } for (uint y = 0; y < new_height; y++) { for (uint x = 0; x < new_width; x++) { if ((x < m_width) && (y < m_height)) m_pixel_buf[x + y * new_pitch] = existing_pixels[x + y * m_pitch]; else m_pixel_buf[x + y * new_pitch] = background; } } m_width = new_width; m_height = new_height; m_pitch = new_pitch; m_total = new_pitch * new_height; m_pPixels = &m_pixel_buf.front(); return true; } inline uint get_width() const { return m_width; } inline uint get_height() const { return m_height; } inline uint get_total_pixels() const { return m_width * m_height; } inline rect get_bounds() const { return rect(0, 0, m_width, m_height); } inline uint get_pitch() const { return m_pitch; } inline uint get_pitch_in_bytes() const { return m_pitch * sizeof(color_type); } // Returns pitch * height, NOT width * height! inline uint get_total() const { return m_total; } inline uint get_block_width(uint block_size) const { return (m_width + block_size - 1) / block_size; } inline uint get_block_height(uint block_size) const { return (m_height + block_size - 1) / block_size; } inline uint get_total_blocks(uint block_size) const { return get_block_width(block_size) * get_block_height(block_size); } inline uint get_size_in_bytes() const { return sizeof(color_type) * m_total; } inline const color_type* get_pixels() const { return m_pPixels; } inline color_type* get_pixels() { return m_pPixels; } inline const color_type& operator()(uint x, uint y) const { CRNLIB_ASSERT((x < m_width) && (y < m_height)); return m_pPixels[x + y * m_pitch]; } inline color_type& operator()(uint x, uint y) { CRNLIB_ASSERT((x < m_width) && (y < m_height)); return m_pPixels[x + y * m_pitch]; } inline const color_type& get_unclamped(uint x, uint y) const { CRNLIB_ASSERT((x < m_width) && (y < m_height)); return m_pPixels[x + y * m_pitch]; } inline const color_type& get_clamped(int x, int y) const { x = math::clamp(x, 0, m_width - 1); y = math::clamp(y, 0, m_height - 1); return m_pPixels[x + y * m_pitch]; } // Sample image with bilinear filtering. // (x,y) - Continuous coordinates, where pixel centers are at (.5,.5), valid image coords are [0,width] and [0,height]. void get_filtered(float x, float y, color_type& result) const { x -= .5f; y -= .5f; int ix = (int)floor(x); int iy = (int)floor(y); float wx = x - ix; float wy = y - iy; color_type a(get_clamped(ix, iy)); color_type b(get_clamped(ix + 1, iy)); color_type c(get_clamped(ix, iy + 1)); color_type d(get_clamped(ix + 1, iy + 1)); for (uint i = 0; i < 4; i++) { double top = math::lerp(a[i], b[i], wx); double bot = math::lerp(c[i], d[i], wx); double m = math::lerp(top, bot, wy); if (!color_type::component_traits::cFloat) m += .5f; result.set_component(i, static_cast(m)); } } void get_filtered(float x, float y, vec4F& result) const { x -= .5f; y -= .5f; int ix = (int)floor(x); int iy = (int)floor(y); float wx = x - ix; float wy = y - iy; color_type a(get_clamped(ix, iy)); color_type b(get_clamped(ix + 1, iy)); color_type c(get_clamped(ix, iy + 1)); color_type d(get_clamped(ix + 1, iy + 1)); for (uint i = 0; i < 4; i++) { float top = math::lerp(a[i], b[i], wx); float bot = math::lerp(c[i], d[i], wx); float m = math::lerp(top, bot, wy); result[i] = m; } } inline void set_pixel_unclipped(uint x, uint y, const color_type& c) { CRNLIB_ASSERT((x < m_width) && (y < m_height)); m_pPixels[x + y * m_pitch] = c; } inline void set_pixel_clipped(int x, int y, const color_type& c) { if ((static_cast(x) >= m_width) || (static_cast(y) >= m_height)) return; m_pPixels[x + y * m_pitch] = c; } inline const color_type* get_scanline(uint y) const { CRNLIB_ASSERT(y < m_height); return &m_pPixels[y * m_pitch]; } inline color_type* get_scanline(uint y) { CRNLIB_ASSERT(y < m_height); return &m_pPixels[y * m_pitch]; } inline const color_type* get_ptr() const { return m_pPixels; } inline color_type* get_ptr() { return m_pPixels; } inline void swap(image& other) { utils::swap(m_width, other.m_width); utils::swap(m_height, other.m_height); utils::swap(m_pitch, other.m_pitch); utils::swap(m_total, other.m_total); utils::swap(m_comp_flags, other.m_comp_flags); utils::swap(m_pPixels, other.m_pPixels); m_pixel_buf.swap(other.m_pixel_buf); } void draw_line(int xs, int ys, int xe, int ye, const color_type& color) { if (xs > xe) { utils::swap(xs, xe); utils::swap(ys, ye); } int dx = xe - xs, dy = ye - ys; if (!dx) { if (ys > ye) utils::swap(ys, ye); for (int i = ys; i <= ye; i++) set_pixel_clipped(xs, i, color); } else if (!dy) { for (int i = xs; i < xe; i++) set_pixel_clipped(i, ys, color); } else if (dy > 0) { if (dy <= dx) { int e = 2 * dy - dx, e_no_inc = 2 * dy, e_inc = 2 * (dy - dx); rasterize_line(xs, ys, xe, ye, 0, 1, e, e_inc, e_no_inc, color); } else { int e = 2 * dx - dy, e_no_inc = 2 * dx, e_inc = 2 * (dx - dy); rasterize_line(xs, ys, xe, ye, 1, 1, e, e_inc, e_no_inc, color); } } else { dy = -dy; if (dy <= dx) { int e = 2 * dy - dx, e_no_inc = 2 * dy, e_inc = 2 * (dy - dx); rasterize_line(xs, ys, xe, ye, 0, -1, e, e_inc, e_no_inc, color); } else { int e = 2 * dx - dy, e_no_inc = (2 * dx), e_inc = 2 * (dx - dy); rasterize_line(xe, ye, xs, ys, 1, -1, e, e_inc, e_no_inc, color); } } } const pixel_buf_t& get_pixel_buf() const { return m_pixel_buf; } pixel_buf_t& get_pixel_buf() { return m_pixel_buf; } private: uint m_width; uint m_height; uint m_pitch; uint m_total; uint m_comp_flags; color_type* m_pPixels; pixel_buf_t m_pixel_buf; void rasterize_line(int xs, int ys, int xe, int ye, int pred, int inc_dec, int e, int e_inc, int e_no_inc, const color_type& color) { int start, end, var; if (pred) { start = ys; end = ye; var = xs; for (int i = start; i <= end; i++) { set_pixel_clipped(var, i, color); if (e < 0) e += e_no_inc; else { var += inc_dec; e += e_inc; } } } else { start = xs; end = xe; var = ys; for (int i = start; i <= end; i++) { set_pixel_clipped(i, var, color); if (e < 0) e += e_no_inc; else { var += inc_dec; e += e_inc; } } } } }; typedef image image_u8; typedef image image_i16; typedef image image_u16; typedef image image_i32; typedef image image_u32; typedef image image_f; template inline void swap(image& a, image& b) { a.swap(b); } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_image_utils.cpp000066400000000000000000001133471503722002600231510ustar00rootroot00000000000000// File: crn_image_utils.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_image_utils.h" #include "crn_console.h" #include "crn_resampler.h" #include "crn_threaded_resampler.h" #include "crn_strutils.h" #include "crn_file_utils.h" #include "crn_threading.h" #include "crn_miniz.h" #include "crn_jpge.h" #include "crn_cfile_stream.h" #include "crn_mipmapped_texture.h" #include "crn_buffer_stream.h" #include "stb_image.h" #include "stb_image_write.h" #include "crn_jpgd.h" #include "crn_pixel_format.h" namespace crnlib { const float cInfinitePSNR = 999999.0f; const uint CRNLIB_LARGEST_SUPPORTED_IMAGE_DIMENSION = 16384; namespace image_utils { bool read_from_stream_stb(data_stream_serializer& serializer, image_u8& img) { uint8_vec buf; if (!serializer.read_entire_file(buf)) return false; int x = 0, y = 0, n = 0; unsigned char* pData = stbi_load_from_memory(buf.get_ptr(), buf.size_in_bytes(), &x, &y, &n, 4); if (!pData) return false; if ((x > (int)CRNLIB_LARGEST_SUPPORTED_IMAGE_DIMENSION) || (y > (int)CRNLIB_LARGEST_SUPPORTED_IMAGE_DIMENSION)) { stbi_image_free(pData); return false; } const bool has_alpha = ((n == 2) || (n == 4)); img.resize(x, y); bool grayscale = true; for (int py = 0; py < y; py++) { const color_quad_u8* pSrc = reinterpret_cast(pData) + (py * x); color_quad_u8* pDst = img.get_scanline(py); color_quad_u8* pDst_end = pDst + x; while (pDst != pDst_end) { color_quad_u8 c(*pSrc++); if (!has_alpha) c.a = 255; if (!c.is_grayscale()) grayscale = false; *pDst++ = c; } } stbi_image_free(pData); img.reset_comp_flags(); img.set_grayscale(grayscale); img.set_component_valid(3, has_alpha); return true; } bool read_from_stream_jpgd(data_stream_serializer& serializer, image_u8& img) { uint8_vec buf; if (!serializer.read_entire_file(buf)) return false; int width = 0, height = 0, actual_comps = 0; unsigned char* pSrc_img = jpgd::decompress_jpeg_image_from_memory(buf.get_ptr(), buf.size_in_bytes(), &width, &height, &actual_comps, 4); if (!pSrc_img) return false; if (math::maximum(width, height) > (int)CRNLIB_LARGEST_SUPPORTED_IMAGE_DIMENSION) { crnlib_free(pSrc_img); return false; } if (!img.grant_ownership(reinterpret_cast(pSrc_img), width, height)) { crnlib_free(pSrc_img); return false; } img.reset_comp_flags(); img.set_grayscale(actual_comps == 1); img.set_component_valid(3, false); return true; } bool read_from_stream(image_u8& dest, data_stream_serializer& serializer, uint read_flags) { if (read_flags > cReadFlagsAllFlags) { CRNLIB_ASSERT(0); return false; } if (!serializer.get_stream()) { CRNLIB_ASSERT(0); return false; } dynamic_string ext(serializer.get_name()); file_utils::get_extension(ext); if ((ext == "jpg") || (ext == "jpeg")) { // Use my jpeg decoder by default because it supports progressive jpeg's. if ((read_flags & cReadFlagForceSTB) == 0) { return image_utils::read_from_stream_jpgd(serializer, dest); } } return image_utils::read_from_stream_stb(serializer, dest); } bool read_from_file(image_u8& dest, const char* pFilename, uint read_flags) { if (read_flags > cReadFlagsAllFlags) { CRNLIB_ASSERT(0); return false; } cfile_stream file_stream; if (!file_stream.open(pFilename)) return false; data_stream_serializer serializer(file_stream); return read_from_stream(dest, serializer, read_flags); } bool write_to_file(const char* pFilename, const image_u8& img, uint write_flags, int grayscale_comp_index) { if ((grayscale_comp_index < -1) || (grayscale_comp_index > 3)) { CRNLIB_ASSERT(0); return false; } if (!img.get_width()) { CRNLIB_ASSERT(0); return false; } dynamic_string ext(pFilename); bool is_jpeg = false; if (file_utils::get_extension(ext)) { is_jpeg = ((ext == "jpg") || (ext == "jpeg")); if ((ext != "png") && (ext != "bmp") && (ext != "tga") && (!is_jpeg)) { console::error("crnlib::image_utils::write_to_file: Can only write .BMP, .TGA, .PNG, or .JPG files!\n"); return false; } } crnlib::vector temp; uint num_src_chans = 0; const void* pSrc_img = NULL; if (is_jpeg) { write_flags |= cWriteFlagIgnoreAlpha; } if ((img.get_comp_flags() & pixel_format_helpers::cCompFlagGrayscale) || (write_flags & image_utils::cWriteFlagGrayscale)) { CRNLIB_ASSERT(grayscale_comp_index < 4); if (grayscale_comp_index > 3) grayscale_comp_index = 3; temp.resize(img.get_total_pixels()); for (uint y = 0; y < img.get_height(); y++) { const color_quad_u8* pSrc = img.get_scanline(y); const color_quad_u8* pSrc_end = pSrc + img.get_width(); uint8* pDst = &temp[y * img.get_width()]; if (img.get_comp_flags() & pixel_format_helpers::cCompFlagGrayscale) { while (pSrc != pSrc_end) *pDst++ = (*pSrc++)[1]; } else if (grayscale_comp_index < 0) { while (pSrc != pSrc_end) *pDst++ = static_cast((*pSrc++).get_luma()); } else { while (pSrc != pSrc_end) *pDst++ = (*pSrc++)[grayscale_comp_index]; } } pSrc_img = &temp[0]; num_src_chans = 1; } else if ((!img.is_component_valid(3)) || (write_flags & cWriteFlagIgnoreAlpha)) { temp.resize(img.get_total_pixels() * 3); for (uint y = 0; y < img.get_height(); y++) { const color_quad_u8* pSrc = img.get_scanline(y); const color_quad_u8* pSrc_end = pSrc + img.get_width(); uint8* pDst = &temp[y * img.get_width() * 3]; while (pSrc != pSrc_end) { const color_quad_u8 c(*pSrc++); pDst[0] = c.r; pDst[1] = c.g; pDst[2] = c.b; pDst += 3; } } num_src_chans = 3; pSrc_img = &temp[0]; } else { num_src_chans = 4; pSrc_img = img.get_ptr(); } bool success = false; if (ext == "png") { size_t png_image_size = 0; void* pPNG_image_data = tdefl_write_image_to_png_file_in_memory(pSrc_img, img.get_width(), img.get_height(), num_src_chans, &png_image_size); if (!pPNG_image_data) return false; success = file_utils::write_buf_to_file(pFilename, pPNG_image_data, png_image_size); mz_free(pPNG_image_data); } else if (is_jpeg) { jpge::params params; if (write_flags & cWriteFlagJPEGQualityLevelMask) params.m_quality = math::clamp((write_flags & cWriteFlagJPEGQualityLevelMask) >> cWriteFlagJPEGQualityLevelShift, 1U, 100U); params.m_two_pass_flag = (write_flags & cWriteFlagJPEGTwoPass) != 0; params.m_no_chroma_discrim_flag = (write_flags & cWriteFlagJPEGNoChromaDiscrim) != 0; if (write_flags & cWriteFlagJPEGH1V1) params.m_subsampling = jpge::H1V1; else if (write_flags & cWriteFlagJPEGH2V1) params.m_subsampling = jpge::H2V1; else if (write_flags & cWriteFlagJPEGH2V2) params.m_subsampling = jpge::H2V2; success = jpge::compress_image_to_jpeg_file(pFilename, img.get_width(), img.get_height(), num_src_chans, (const jpge::uint8*)pSrc_img, params); } else { success = ((ext == "bmp" ? stbi_write_bmp : stbi_write_tga)(pFilename, img.get_width(), img.get_height(), num_src_chans, pSrc_img) == CRNLIB_TRUE); } return success; } bool has_alpha(const image_u8& img) { for (uint y = 0; y < img.get_height(); y++) for (uint x = 0; x < img.get_width(); x++) if (img(x, y).a < 255) return true; return false; } void renorm_normal_map(image_u8& img) { for (uint y = 0; y < img.get_height(); y++) { for (uint x = 0; x < img.get_width(); x++) { color_quad_u8& c = img(x, y); if ((c.r == 128) && (c.g == 128) && (c.b == 128)) continue; vec3F v(c.r, c.g, c.b); v *= 1.0f / 255.0f; v *= 2.0f; v -= vec3F(1.0f); v.clamp(-1.0f, 1.0f); float length = v.length(); if (length < .077f) c.set(128, 128, 128, c.a); else if (fabs(length - 1.0f) > .077f) { if (length) v /= length; for (uint i = 0; i < 3; i++) c[i] = static_cast(math::clamp(floor((v[i] + 1.0f) * .5f * 255.0f + .5f), 0.0f, 255.0f)); if ((c.r == 128) && (c.g == 128)) { if (c.b < 128) c.b = 0; else c.b = 255; } } } } } bool is_normal_map(const image_u8& img, const char* pFilename) { float score = 0.0f; uint num_invalid_pixels = 0; // TODO: Derive better score from pixel mean, eigenvecs/vals //crnlib::vector pixels; for (uint y = 0; y < img.get_height(); y++) { for (uint x = 0; x < img.get_width(); x++) { const color_quad_u8& c = img(x, y); if (c.b < 123) { num_invalid_pixels++; continue; } else if ((c.r != 128) || (c.g != 128) || (c.b != 128)) { vec3F v(c.r, c.g, c.b); v -= vec3F(128.0f); v /= vec3F(127.0f); //pixels.push_back(v); v.clamp(-1.0f, 1.0f); float norm = v.norm(); if ((norm < 0.83f) || (norm > 1.29f)) num_invalid_pixels++; } } } score -= math::clamp(float(num_invalid_pixels) / (img.get_width() * img.get_height()) - .026f, 0.0f, 1.0f) * 5.0f; if (pFilename) { dynamic_string str(pFilename); str.tolower(); if (str.contains("normal") || str.contains("local") || str.contains("nmap")) score += 1.0f; if (str.contains("diffuse") || str.contains("spec") || str.contains("gloss")) score -= 1.0f; } return score >= 0.0f; } bool resample_single_thread(const image_u8& src, image_u8& dst, const resample_params& params) { const uint src_width = src.get_width(); const uint src_height = src.get_height(); if (math::maximum(src_width, src_height) > CRNLIB_RESAMPLER_MAX_DIMENSION) { printf("Image is too large!\n"); return EXIT_FAILURE; } const int cMaxComponents = 4; if (((int)params.m_num_comps < 1) || ((int)params.m_num_comps > (int)cMaxComponents)) return false; const uint dst_width = params.m_dst_width; const uint dst_height = params.m_dst_height; if ((math::minimum(dst_width, dst_height) < 1) || (math::maximum(dst_width, dst_height) > CRNLIB_RESAMPLER_MAX_DIMENSION)) { printf("Image is too large!\n"); return EXIT_FAILURE; } if ((src_width == dst_width) && (src_height == dst_height)) { dst = src; return true; } dst.clear(); dst.resize(params.m_dst_width, params.m_dst_height); // Partial gamma correction looks better on mips. Set to 1.0 to disable gamma correction. const float source_gamma = params.m_source_gamma; //1.75f; float srgb_to_linear[256]; if (params.m_srgb) { for (int i = 0; i < 256; ++i) srgb_to_linear[i] = (float)pow(i * 1.0f / 255.0f, source_gamma); } const int linear_to_srgb_table_size = 8192; unsigned char linear_to_srgb[linear_to_srgb_table_size]; const float inv_linear_to_srgb_table_size = 1.0f / linear_to_srgb_table_size; const float inv_source_gamma = 1.0f / source_gamma; if (params.m_srgb) { for (int i = 0; i < linear_to_srgb_table_size; ++i) { int k = (int)(255.0f * pow(i * inv_linear_to_srgb_table_size, inv_source_gamma) + .5f); if (k < 0) k = 0; else if (k > 255) k = 255; linear_to_srgb[i] = (unsigned char)k; } } Resampler* resamplers[cMaxComponents]; crnlib::vector samples[cMaxComponents]; resamplers[0] = crnlib_new(src_width, src_height, dst_width, dst_height, params.m_wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, params.m_pFilter, (Resampler::Contrib_List*)NULL, (Resampler::Contrib_List*)NULL, params.m_filter_scale, params.m_filter_scale); samples[0].resize(src_width); for (uint i = 1; i < params.m_num_comps; i++) { resamplers[i] = crnlib_new(src_width, src_height, dst_width, dst_height, params.m_wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP, 0.0f, 1.0f, params.m_pFilter, resamplers[0]->get_clist_x(), resamplers[0]->get_clist_y(), params.m_filter_scale, params.m_filter_scale); samples[i].resize(src_width); } uint dst_y = 0; for (uint src_y = 0; src_y < src_height; src_y++) { const color_quad_u8* pSrc = src.get_scanline(src_y); for (uint x = 0; x < src_width; x++) { for (uint c = 0; c < params.m_num_comps; c++) { const uint comp_index = params.m_first_comp + c; const uint8 v = (*pSrc)[comp_index]; if (!params.m_srgb || (comp_index == 3)) samples[c][x] = v * (1.0f / 255.0f); else samples[c][x] = srgb_to_linear[v]; } pSrc++; } for (uint c = 0; c < params.m_num_comps; c++) { if (!resamplers[c]->put_line(&samples[c][0])) { for (uint i = 0; i < params.m_num_comps; i++) crnlib_delete(resamplers[i]); return false; } } for (;;) { uint c; for (c = 0; c < params.m_num_comps; c++) { const uint comp_index = params.m_first_comp + c; const float* pOutput_samples = resamplers[c]->get_line(); if (!pOutput_samples) break; const bool linear = !params.m_srgb || (comp_index == 3); CRNLIB_ASSERT(dst_y < dst_height); color_quad_u8* pDst = dst.get_scanline(dst_y); for (uint x = 0; x < dst_width; x++) { if (linear) { int c = (int)(255.0f * pOutput_samples[x] + .5f); if (c < 0) c = 0; else if (c > 255) c = 255; (*pDst)[comp_index] = (unsigned char)c; } else { int j = (int)(linear_to_srgb_table_size * pOutput_samples[x] + .5f); if (j < 0) j = 0; else if (j >= linear_to_srgb_table_size) j = linear_to_srgb_table_size - 1; (*pDst)[comp_index] = linear_to_srgb[j]; } pDst++; } } if (c < params.m_num_comps) break; dst_y++; } } for (uint i = 0; i < params.m_num_comps; i++) crnlib_delete(resamplers[i]); return true; } bool resample_multithreaded(const image_u8& src, image_u8& dst, const resample_params& params) { const uint src_width = src.get_width(); const uint src_height = src.get_height(); if (math::maximum(src_width, src_height) > CRNLIB_RESAMPLER_MAX_DIMENSION) { printf("Image is too large!\n"); return EXIT_FAILURE; } const int cMaxComponents = 4; if (((int)params.m_num_comps < 1) || ((int)params.m_num_comps > (int)cMaxComponents)) return false; const uint dst_width = params.m_dst_width; const uint dst_height = params.m_dst_height; if ((math::minimum(dst_width, dst_height) < 1) || (math::maximum(dst_width, dst_height) > CRNLIB_RESAMPLER_MAX_DIMENSION)) { printf("Image is too large!\n"); return EXIT_FAILURE; } if ((src_width == dst_width) && (src_height == dst_height)) { dst = src; return true; } dst.clear(); // Partial gamma correction looks better on mips. Set to 1.0 to disable gamma correction. const float source_gamma = params.m_source_gamma; //1.75f; float srgb_to_linear[256]; if (params.m_srgb) { for (int i = 0; i < 256; ++i) srgb_to_linear[i] = (float)pow(i * 1.0f / 255.0f, source_gamma); } const int linear_to_srgb_table_size = 8192; unsigned char linear_to_srgb[linear_to_srgb_table_size]; const float inv_linear_to_srgb_table_size = 1.0f / linear_to_srgb_table_size; const float inv_source_gamma = 1.0f / source_gamma; if (params.m_srgb) { for (int i = 0; i < linear_to_srgb_table_size; ++i) { int k = (int)(255.0f * pow(i * inv_linear_to_srgb_table_size, inv_source_gamma) + .5f); if (k < 0) k = 0; else if (k > 255) k = 255; linear_to_srgb[i] = (unsigned char)k; } } task_pool tp; tp.init(g_number_of_processors - 1); threaded_resampler resampler(tp); threaded_resampler::params p; p.m_src_width = src_width; p.m_src_height = src_height; p.m_dst_width = dst_width; p.m_dst_height = dst_height; p.m_sample_low = 0.0f; p.m_sample_high = 1.0f; p.m_boundary_op = params.m_wrapping ? Resampler::BOUNDARY_WRAP : Resampler::BOUNDARY_CLAMP; p.m_Pfilter_name = params.m_pFilter; p.m_filter_x_scale = params.m_filter_scale; p.m_filter_y_scale = params.m_filter_scale; uint resampler_comps = 4; if (params.m_num_comps == 1) { p.m_fmt = threaded_resampler::cPF_Y_F32; resampler_comps = 1; } else if (params.m_num_comps <= 3) p.m_fmt = threaded_resampler::cPF_RGBX_F32; else p.m_fmt = threaded_resampler::cPF_RGBA_F32; crnlib::vector src_samples; crnlib::vector dst_samples; if (!src_samples.try_resize(src_width * src_height * resampler_comps)) return false; if (!dst_samples.try_resize(dst_width * dst_height * resampler_comps)) return false; p.m_pSrc_pixels = src_samples.get_ptr(); p.m_src_pitch = src_width * resampler_comps * sizeof(float); p.m_pDst_pixels = dst_samples.get_ptr(); p.m_dst_pitch = dst_width * resampler_comps * sizeof(float); for (uint src_y = 0; src_y < src_height; src_y++) { const color_quad_u8* pSrc = src.get_scanline(src_y); float* pDst = src_samples.get_ptr() + src_width * resampler_comps * src_y; for (uint x = 0; x < src_width; x++) { for (uint c = 0; c < params.m_num_comps; c++) { const uint comp_index = params.m_first_comp + c; const uint8 v = (*pSrc)[comp_index]; if (!params.m_srgb || (comp_index == 3)) pDst[c] = v * (1.0f / 255.0f); else pDst[c] = srgb_to_linear[v]; } pSrc++; pDst += resampler_comps; } } if (!resampler.resample(p)) return false; src_samples.clear(); if (!dst.resize(params.m_dst_width, params.m_dst_height)) return false; for (uint dst_y = 0; dst_y < dst_height; dst_y++) { const float* pSrc = dst_samples.get_ptr() + dst_width * resampler_comps * dst_y; color_quad_u8* pDst = dst.get_scanline(dst_y); for (uint x = 0; x < dst_width; x++) { color_quad_u8 dst(0, 0, 0, 255); for (uint c = 0; c < params.m_num_comps; c++) { const uint comp_index = params.m_first_comp + c; const float v = pSrc[c]; if ((!params.m_srgb) || (comp_index == 3)) { int c = static_cast(255.0f * v + .5f); if (c < 0) c = 0; else if (c > 255) c = 255; dst[comp_index] = (unsigned char)c; } else { int j = static_cast(linear_to_srgb_table_size * v + .5f); if (j < 0) j = 0; else if (j >= linear_to_srgb_table_size) j = linear_to_srgb_table_size - 1; dst[comp_index] = linear_to_srgb[j]; } } *pDst++ = dst; pSrc += resampler_comps; } } return true; } bool resample(const image_u8& src, image_u8& dst, const resample_params& params) { if ((params.m_multithreaded) && (g_number_of_processors > 1)) return resample_multithreaded(src, dst, params); else return resample_single_thread(src, dst, params); } bool compute_delta(image_u8& dest, image_u8& a, image_u8& b, uint scale) { if ((a.get_width() != b.get_width()) || (a.get_height() != b.get_height())) return false; dest.resize(a.get_width(), b.get_height()); for (uint y = 0; y < a.get_height(); y++) { for (uint x = 0; x < a.get_width(); x++) { const color_quad_u8& ca = a(x, y); const color_quad_u8& cb = b(x, y); color_quad_u8 cd; for (uint c = 0; c < 4; c++) { int d = (ca[c] - cb[c]) * scale + 128; d = math::clamp(d, 0, 255); cd[c] = static_cast(d); } dest(x, y) = cd; } } return true; } // FIXME: Totally hack-ass computation. // Perhaps port https://www.lomont.org/software/misc/ssim/SSIM.html ? double compute_block_ssim(uint t, const uint8* pX, const uint8* pY) { double ave_x = 0.0f; double ave_y = 0.0f; for (uint i = 0; i < t; i++) { ave_x += pX[i]; ave_y += pY[i]; } ave_x /= t; ave_y /= t; double var_x = 0.0f; double var_y = 0.0f; for (uint i = 0; i < t; i++) { var_x += math::square(pX[i] - ave_x); var_y += math::square(pY[i] - ave_y); } var_x = sqrt(var_x / (t - 1)); var_y = sqrt(var_y / (t - 1)); double covar_xy = 0.0f; for (uint i = 0; i < t; i++) covar_xy += (pX[i] - ave_x) * (pY[i] - ave_y); covar_xy /= (t - 1); const double c1 = 6.5025; //(255*.01)^2 const double c2 = 58.5225; //(255*.03)^2 double n = (2.0f * ave_x * ave_y + c1) * (2.0f * covar_xy + c2); double d = (ave_x * ave_x + ave_y * ave_y + c1) * (var_x * var_x + var_y * var_y + c2); return n / d; } double compute_ssim(const image_u8& a, const image_u8& b, int channel_index) { const uint N = 6; uint8 sx[N * N], sy[N * N]; double total_ssim = 0.0f; uint total_blocks = 0; //image_u8 yimg((a.get_width() + N - 1) / N, (a.get_height() + N - 1) / N); for (uint y = 0; y < a.get_height(); y += N) { for (uint x = 0; x < a.get_width(); x += N) { for (uint iy = 0; iy < N; iy++) { for (uint ix = 0; ix < N; ix++) { if (channel_index < 0) sx[ix + iy * N] = (uint8)a.get_clamped(x + ix, y + iy).get_luma(); else sx[ix + iy * N] = (uint8)a.get_clamped(x + ix, y + iy)[channel_index]; if (channel_index < 0) sy[ix + iy * N] = (uint8)b.get_clamped(x + ix, y + iy).get_luma(); else sy[ix + iy * N] = (uint8)b.get_clamped(x + ix, y + iy)[channel_index]; } } double ssim = compute_block_ssim(N * N, sx, sy); total_ssim += ssim; total_blocks++; //uint ssim_c = (uint)math::clamp(ssim * 127.0f + 128.0f, 0, 255); //yimg(x / N, y / N).set(ssim_c, ssim_c, ssim_c, 255); } } if (!total_blocks) return 0.0f; //save_to_file_stb_or_miniz("ssim.tga", yimg, cWriteFlagGrayscale); return total_ssim / total_blocks; } void print_ssim(const image_u8& src_img, const image_u8& dst_img) { (void)src_img; (void)dst_img; //double y_ssim = compute_ssim(src_img, dst_img, -1); //console::printf("Luma MSSIM: %f, Scaled: %f", y_ssim, (y_ssim - .8f) / .2f); //double r_ssim = compute_ssim(src_img, dst_img, 0); //console::printf(" R MSSIM: %f", r_ssim); //double g_ssim = compute_ssim(src_img, dst_img, 1); //console::printf(" G MSSIM: %f", g_ssim); //double b_ssim = compute_ssim(src_img, dst_img, 2); //console::printf(" B MSSIM: %f", b_ssim); } void error_metrics::print(const char* pName) const { if (mPeakSNR >= cInfinitePSNR) console::printf("%s Error: Max: %3u, Mean: %3.3f, MSE: %3.3f, RMSE: %3.3f, PSNR: Infinite", pName, mMax, mMean, mMeanSquared, mRootMeanSquared); else console::printf("%s Error: Max: %3u, Mean: %3.3f, MSE: %3.3f, RMSE: %3.3f, PSNR: %3.3f", pName, mMax, mMean, mMeanSquared, mRootMeanSquared, mPeakSNR); } bool error_metrics::compute(const image_u8& a, const image_u8& b, uint first_channel, uint num_channels, bool average_component_error) { //if ( (!a.get_width()) || (!b.get_height()) || (a.get_width() != b.get_width()) || (a.get_height() != b.get_height()) ) // return false; const uint width = math::minimum(a.get_width(), b.get_width()); const uint height = math::minimum(a.get_height(), b.get_height()); CRNLIB_ASSERT((first_channel < 4U) && (first_channel + num_channels <= 4U)); // Histogram approach due to Charles Bloom. double hist[256]; utils::zero_object(hist); for (uint y = 0; y < height; y++) { for (uint x = 0; x < width; x++) { const color_quad_u8& ca = a(x, y); const color_quad_u8& cb = b(x, y); if (!num_channels) hist[labs(ca.get_luma() - cb.get_luma())]++; else { for (uint c = 0; c < num_channels; c++) hist[labs(ca[first_channel + c] - cb[first_channel + c])]++; } } } mMax = 0; double sum = 0.0f, sum2 = 0.0f; for (uint i = 0; i < 256; i++) { if (!hist[i]) continue; mMax = math::maximum(mMax, i); double x = i * hist[i]; sum += x; sum2 += i * x; } // See http://bmrc.berkeley.edu/courseware/cs294/fall97/assignment/psnr.html double total_values = width * height; if (average_component_error) total_values *= math::clamp(num_channels, 1, 4); mMean = math::clamp(sum / total_values, 0.0f, 255.0f); mMeanSquared = math::clamp(sum2 / total_values, 0.0f, 255.0f * 255.0f); mRootMeanSquared = sqrt(mMeanSquared); if (!mRootMeanSquared) mPeakSNR = cInfinitePSNR; else mPeakSNR = math::clamp(log10(255.0f / mRootMeanSquared) * 20.0f, 0.0f, 500.0f); return true; } void print_image_metrics(const image_u8& src_img, const image_u8& dst_img) { if ((!src_img.get_width()) || (!dst_img.get_height()) || (src_img.get_width() != dst_img.get_width()) || (src_img.get_height() != dst_img.get_height())) console::printf("print_image_metrics: Image resolutions don't match exactly (%ux%u) vs. (%ux%u)", src_img.get_width(), src_img.get_height(), dst_img.get_width(), dst_img.get_height()); image_utils::error_metrics error_metrics; if (src_img.has_rgb() || dst_img.has_rgb()) { error_metrics.compute(src_img, dst_img, 0, 3, false); error_metrics.print("RGB Total "); error_metrics.compute(src_img, dst_img, 0, 3, true); error_metrics.print("RGB Average"); error_metrics.compute(src_img, dst_img, 0, 0); error_metrics.print("Luma "); error_metrics.compute(src_img, dst_img, 0, 1); error_metrics.print("Red "); error_metrics.compute(src_img, dst_img, 1, 1); error_metrics.print("Green "); error_metrics.compute(src_img, dst_img, 2, 1); error_metrics.print("Blue "); } if (src_img.has_alpha() || dst_img.has_alpha()) { error_metrics.compute(src_img, dst_img, 3, 1); error_metrics.print("Alpha "); } } static uint8 regen_z(uint x, uint y) { float vx = math::clamp((x - 128.0f) * 1.0f / 127.0f, -1.0f, 1.0f); float vy = math::clamp((y - 128.0f) * 1.0f / 127.0f, -1.0f, 1.0f); float vz = sqrt(math::clamp(1.0f - vx * vx - vy * vy, 0.0f, 1.0f)); vz = vz * 127.0f + 128.0f; if (vz < 128.0f) vz -= .5f; else vz += .5f; int ib = math::float_to_int(vz); return static_cast(math::clamp(ib, 0, 255)); } void convert_image(image_u8& img, image_utils::conversion_type conv_type) { switch (conv_type) { case image_utils::cConversion_To_CCxY: { img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagAValid | pixel_format_helpers::cCompFlagLumaChroma)); break; } case image_utils::cConversion_From_CCxY: { img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid)); break; } case image_utils::cConversion_To_xGxR: { img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagAValid | pixel_format_helpers::cCompFlagNormalMap)); break; } case image_utils::cConversion_From_xGxR: { img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagNormalMap)); break; } case image_utils::cConversion_To_xGBR: { img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagAValid | pixel_format_helpers::cCompFlagNormalMap)); break; } case image_utils::cConversion_To_AGBR: { img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagAValid | pixel_format_helpers::cCompFlagNormalMap)); break; } case image_utils::cConversion_From_xGBR: { img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagNormalMap)); break; } case image_utils::cConversion_From_AGBR: { img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagAValid | pixel_format_helpers::cCompFlagNormalMap)); break; } case image_utils::cConversion_XY_to_XYZ: { img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagNormalMap)); break; } case cConversion_Y_To_A: { img.set_comp_flags(static_cast(img.get_comp_flags() | pixel_format_helpers::cCompFlagAValid)); break; } case cConversion_A_To_RGBA: { img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagAValid)); break; } case cConversion_Y_To_RGB: { img.set_comp_flags(static_cast(pixel_format_helpers::cCompFlagRValid | pixel_format_helpers::cCompFlagGValid | pixel_format_helpers::cCompFlagBValid | pixel_format_helpers::cCompFlagGrayscale | (img.has_alpha() ? pixel_format_helpers::cCompFlagAValid : 0))); break; } case cConversion_To_Y: { img.set_comp_flags(static_cast(img.get_comp_flags() | pixel_format_helpers::cCompFlagGrayscale)); break; } default: { CRNLIB_ASSERT(false); return; } } for (uint y = 0; y < img.get_height(); y++) { for (uint x = 0; x < img.get_width(); x++) { color_quad_u8 src(img(x, y)); color_quad_u8 dst; switch (conv_type) { case image_utils::cConversion_To_CCxY: { color::RGB_to_YCC(dst, src); break; } case image_utils::cConversion_From_CCxY: { color::YCC_to_RGB(dst, src); break; } case image_utils::cConversion_To_xGxR: { dst.r = 0; dst.g = src.g; dst.b = 0; dst.a = src.r; break; } case image_utils::cConversion_From_xGxR: { dst.r = src.a; dst.g = src.g; // This is kinda iffy, we're assuming the image is a normal map here. dst.b = regen_z(src.a, src.g); dst.a = 255; break; } case image_utils::cConversion_To_xGBR: { dst.r = 0; dst.g = src.g; dst.b = src.b; dst.a = src.r; break; } case image_utils::cConversion_To_AGBR: { dst.r = src.a; dst.g = src.g; dst.b = src.b; dst.a = src.r; break; } case image_utils::cConversion_From_xGBR: { dst.r = src.a; dst.g = src.g; dst.b = src.b; dst.a = 255; break; } case image_utils::cConversion_From_AGBR: { dst.r = src.a; dst.g = src.g; dst.b = src.b; dst.a = src.r; break; } case image_utils::cConversion_XY_to_XYZ: { dst.r = src.r; dst.g = src.g; // This is kinda iffy, we're assuming the image is a normal map here. dst.b = regen_z(src.r, src.g); dst.a = 255; break; } case image_utils::cConversion_Y_To_A: { dst.r = src.r; dst.g = src.g; dst.b = src.b; dst.a = static_cast(src.get_luma()); break; } case image_utils::cConversion_Y_To_RGB: { uint8 y = static_cast(src.get_luma()); dst.r = y; dst.g = y; dst.b = y; dst.a = src.a; break; } case image_utils::cConversion_A_To_RGBA: { dst.r = src.a; dst.g = src.a; dst.b = src.a; dst.a = src.a; break; } case image_utils::cConversion_To_Y: { uint8 y = static_cast(src.get_luma()); dst.r = y; dst.g = y; dst.b = y; dst.a = src.a; break; } default: { CRNLIB_ASSERT(false); dst = src; break; } } img(x, y) = dst; } } } image_utils::conversion_type get_conversion_type(bool cooking, pixel_format fmt) { image_utils::conversion_type conv_type = image_utils::cConversion_Invalid; if (cooking) { switch (fmt) { case PIXEL_FMT_DXT5_CCxY: { conv_type = image_utils::cConversion_To_CCxY; break; } case PIXEL_FMT_DXT5_xGxR: { conv_type = image_utils::cConversion_To_xGxR; break; } case PIXEL_FMT_DXT5_xGBR: { conv_type = image_utils::cConversion_To_xGBR; break; } case PIXEL_FMT_DXT5_AGBR: { conv_type = image_utils::cConversion_To_AGBR; break; } default: break; } } else { switch (fmt) { case PIXEL_FMT_3DC: case PIXEL_FMT_DXN: { conv_type = image_utils::cConversion_XY_to_XYZ; break; } case PIXEL_FMT_DXT5_CCxY: { conv_type = image_utils::cConversion_From_CCxY; break; } case PIXEL_FMT_DXT5_xGxR: { conv_type = image_utils::cConversion_From_xGxR; break; } case PIXEL_FMT_DXT5_xGBR: { conv_type = image_utils::cConversion_From_xGBR; break; } case PIXEL_FMT_DXT5_AGBR: { conv_type = image_utils::cConversion_From_AGBR; break; } default: break; } } return conv_type; } image_utils::conversion_type get_image_conversion_type_from_crn_format(crn_format fmt) { switch (fmt) { case cCRNFmtDXT5_CCxY: return image_utils::cConversion_To_CCxY; case cCRNFmtDXT5_xGxR: return image_utils::cConversion_To_xGxR; case cCRNFmtDXT5_xGBR: return image_utils::cConversion_To_xGBR; case cCRNFmtDXT5_AGBR: return image_utils::cConversion_To_AGBR; default: break; } return image_utils::cConversion_Invalid; } double compute_std_dev(uint n, const color_quad_u8* pPixels, uint first_channel, uint num_channels) { if (!n) return 0.0f; double sum = 0.0f; double sum2 = 0.0f; for (uint i = 0; i < n; i++) { const color_quad_u8& cp = pPixels[i]; if (!num_channels) { uint l = cp.get_luma(); sum += l; sum2 += l * l; } else { for (uint c = 0; c < num_channels; c++) { uint l = cp[first_channel + c]; sum += l; sum2 += l * l; } } } double w = math::maximum(1U, num_channels) * n; sum /= w; sum2 /= w; double var = sum2 - sum * sum; var = math::maximum(var, 0.0f); return sqrt(var); } uint8* read_image_from_memory(const uint8* pImage, int nSize, int* pWidth, int* pHeight, int* pActualComps, int req_comps, const char* pFilename) { *pWidth = 0; *pHeight = 0; *pActualComps = 0; if ((req_comps < 1) || (req_comps > 4)) return NULL; mipmapped_texture tex; buffer_stream buf_stream(pImage, nSize); buf_stream.set_name(pFilename); data_stream_serializer serializer(buf_stream); if (!tex.read_from_stream(serializer)) return NULL; if (tex.is_packed()) { if (!tex.unpack_from_dxt(true)) return NULL; } image_u8 img; image_u8* pImg = tex.get_level_image(0, 0, img); if (!pImg) return NULL; *pWidth = tex.get_width(); *pHeight = tex.get_height(); if (pImg->has_alpha()) *pActualComps = 4; else if (pImg->is_grayscale()) *pActualComps = 1; else *pActualComps = 3; uint8* pDst = NULL; if (req_comps == 4) { pDst = (uint8*)malloc(tex.get_total_pixels() * sizeof(uint32)); uint8* pSrc = (uint8*)pImg->get_ptr(); memcpy(pDst, pSrc, tex.get_total_pixels() * sizeof(uint32)); } else { image_u8 luma_img; if (req_comps == 1) { luma_img = *pImg; luma_img.convert_to_grayscale(); pImg = &luma_img; } pixel_packer packer(req_comps, 8); uint32 n; pDst = image_utils::pack_image(*pImg, packer, n); } return pDst; } } // namespace image_utils } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_image_utils.h000066400000000000000000000126471503722002600226170ustar00rootroot00000000000000// File: crn_image_utils.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_image.h" #include "crn_data_stream_serializer.h" namespace crnlib { namespace image_utils { enum read_flags_t { cReadFlagForceSTB = 1, cReadFlagsAllFlags = 1 }; bool read_from_stream_stb(data_stream_serializer& serializer, image_u8& img); bool read_from_stream_jpgd(data_stream_serializer& serializer, image_u8& img); bool read_from_stream(image_u8& dest, data_stream_serializer& serializer, uint read_flags = 0); bool read_from_file(image_u8& dest, const char* pFilename, uint read_flags = 0); // Reads texture from memory, results returned stb_image.c style. // *pActual_comps is set to 1, 3, or 4. req_comps must range from 1-4. uint8* read_from_memory(const uint8* pImage, int nSize, int* pWidth, int* pHeight, int* pActualComps, int req_comps, const char* pFilename); enum { cWriteFlagIgnoreAlpha = 0x00000001, cWriteFlagGrayscale = 0x00000002, cWriteFlagJPEGH1V1 = 0x00010000, cWriteFlagJPEGH2V1 = 0x00020000, cWriteFlagJPEGH2V2 = 0x00040000, cWriteFlagJPEGTwoPass = 0x00080000, cWriteFlagJPEGNoChromaDiscrim = 0x00100000, cWriteFlagJPEGQualityLevelMask = 0xFF000000, cWriteFlagJPEGQualityLevelShift = 24, }; const int cLumaComponentIndex = -1; inline uint create_jpeg_write_flags(uint base_flags, uint quality_level) { CRNLIB_ASSERT(quality_level <= 100); return base_flags | ((quality_level << cWriteFlagJPEGQualityLevelShift) & cWriteFlagJPEGQualityLevelMask); } bool write_to_file(const char* pFilename, const image_u8& img, uint write_flags = 0, int grayscale_comp_index = cLumaComponentIndex); bool has_alpha(const image_u8& img); bool is_normal_map(const image_u8& img, const char* pFilename = NULL); void renorm_normal_map(image_u8& img); struct resample_params { resample_params() : m_dst_width(0), m_dst_height(0), m_pFilter("lanczos4"), m_filter_scale(1.0f), m_srgb(true), m_wrapping(false), m_first_comp(0), m_num_comps(4), m_source_gamma(2.2f), // 1.75f m_multithreaded(true) { } uint m_dst_width; uint m_dst_height; const char* m_pFilter; float m_filter_scale; bool m_srgb; bool m_wrapping; uint m_first_comp; uint m_num_comps; float m_source_gamma; bool m_multithreaded; }; bool resample_single_thread(const image_u8& src, image_u8& dst, const resample_params& params); bool resample_multithreaded(const image_u8& src, image_u8& dst, const resample_params& params); bool resample(const image_u8& src, image_u8& dst, const resample_params& params); bool compute_delta(image_u8& dest, image_u8& a, image_u8& b, uint scale = 2); class error_metrics { public: error_metrics() { utils::zero_this(this); } void print(const char* pName) const; // If num_channels==0, luma error is computed. // If pHist != NULL, it must point to a 256 entry array. bool compute(const image_u8& a, const image_u8& b, uint first_channel, uint num_channels, bool average_component_error = true); uint mMax; double mMean; double mMeanSquared; double mRootMeanSquared; double mPeakSNR; inline bool operator==(const error_metrics& other) const { return mPeakSNR == other.mPeakSNR; } inline bool operator<(const error_metrics& other) const { return mPeakSNR < other.mPeakSNR; } inline bool operator>(const error_metrics& other) const { return mPeakSNR > other.mPeakSNR; } }; void print_image_metrics(const image_u8& src_img, const image_u8& dst_img); double compute_block_ssim(uint n, const uint8* pX, const uint8* pY); double compute_ssim(const image_u8& a, const image_u8& b, int channel_index); void print_ssim(const image_u8& src_img, const image_u8& dst_img); enum conversion_type { cConversion_Invalid = -1, cConversion_To_CCxY, cConversion_From_CCxY, cConversion_To_xGxR, cConversion_From_xGxR, cConversion_To_xGBR, cConversion_From_xGBR, cConversion_To_AGBR, cConversion_From_AGBR, cConversion_XY_to_XYZ, cConversion_Y_To_A, cConversion_A_To_RGBA, cConversion_Y_To_RGB, cConversion_To_Y, cConversionTotal }; void convert_image(image_u8& img, conversion_type conv_type); template inline uint8* pack_image(const image_type& img, const pixel_packer& packer, uint& n) { n = 0; if (!packer.is_valid()) return NULL; const uint width = img.get_width(), height = img.get_height(); uint dst_pixel_stride = packer.get_pixel_stride(); uint dst_pitch = width * dst_pixel_stride; n = dst_pitch * height; uint8* pImage = static_cast(crnlib_malloc(n)); uint8* pDst = pImage; for (uint y = 0; y < height; y++) { const typename image_type::color_t* pSrc = img.get_scanline(y); for (uint x = 0; x < width; x++) pDst = (uint8*)packer.pack(*pSrc++, pDst); } return pImage; } image_utils::conversion_type get_conversion_type(bool cooking, pixel_format fmt); image_utils::conversion_type get_image_conversion_type_from_crn_format(crn_format fmt); double compute_std_dev(uint n, const color_quad_u8* pPixels, uint first_channel, uint num_channels); uint8* read_image_from_memory(const uint8* pImage, int nSize, int* pWidth, int* pHeight, int* pActualComps, int req_comps, const char* pFilename); } // namespace image_utils } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_intersect.h000066400000000000000000000052651503722002600223130ustar00rootroot00000000000000// File: crn_intersect.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_ray.h" namespace crnlib { namespace intersection { enum result { cBackfacing = -1, cFailure = 0, cSuccess, cParallel, cInside, }; // Returns cInside, cSuccess, or cFailure. // Algorithm: Graphics Gems 1 template result ray_aabb(vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box) { enum { cNumDim = vector_type::num_elements, cRight = 0, cLeft = 1, cMiddle = 2 }; bool inside = true; int quadrant[cNumDim]; scalar_type candidate_plane[cNumDim]; for (int i = 0; i < cNumDim; i++) { if (ray.get_origin()[i] < box[0][i]) { quadrant[i] = cLeft; candidate_plane[i] = box[0][i]; inside = false; } else if (ray.get_origin()[i] > box[1][i]) { quadrant[i] = cRight; candidate_plane[i] = box[1][i]; inside = false; } else { quadrant[i] = cMiddle; } } if (inside) { coord = ray.get_origin(); t = 0.0f; return cInside; } scalar_type max_t[cNumDim]; for (int i = 0; i < cNumDim; i++) { if ((quadrant[i] != cMiddle) && (ray.get_direction()[i] != 0.0f)) max_t[i] = (candidate_plane[i] - ray.get_origin()[i]) / ray.get_direction()[i]; else max_t[i] = -1.0f; } int which_plane = 0; for (int i = 1; i < cNumDim; i++) if (max_t[which_plane] < max_t[i]) which_plane = i; if (max_t[which_plane] < 0.0f) return cFailure; for (int i = 0; i < cNumDim; i++) { if (i != which_plane) { coord[i] = ray.get_origin()[i] + max_t[which_plane] * ray.get_direction()[i]; if ((coord[i] < box[0][i]) || (coord[i] > box[1][i])) return cFailure; } else { coord[i] = candidate_plane[i]; } CRNLIB_ASSERT(coord[i] >= box[0][i] && coord[i] <= box[1][i]); } t = max_t[which_plane]; return cSuccess; } template result ray_aabb(bool& started_within, vector_type& coord, scalar_type& t, const ray_type& ray, const aabb_type& box) { if (!box.contains(ray.get_origin())) { started_within = false; return ray_aabb(coord, t, ray, box); } started_within = true; float diag_dist = box.diagonal_length() * 1.5f; ray_type outside_ray(ray.eval(diag_dist), -ray.get_direction()); result res(ray_aabb(coord, t, outside_ray, box)); if (res != cSuccess) return res; t = math::maximum(0.0f, diag_dist - t); return cSuccess; } } } DaemonEngine-crunch-ef4d32f/crnlib/crn_jpgd.cpp000066400000000000000000002577501503722002600216020ustar00rootroot00000000000000// jpgd.cpp - C++ class for JPEG decompression. // Public domain, Rich Geldreich // Alex Evans: Linear memory allocator (taken from jpge.h). // v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings (all looked harmless) // // Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2. // // Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling. // Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain" // https://web.archive.org/web/20110316030157/http://vision.ai.uiuc.edu/~dugad/research/dct/index.html/ #include "crn_jpgd.h" #include #include #define JPGD_ASSERT(x) assert(x) #include "crn_core.h" #ifdef _MSC_VER #pragma warning(disable : 4611) // warning C4611: interaction between '_setjmp' and C++ object destruction is non-portable #endif // Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling). // This is slower, but results in higher quality on images with highly saturated colors. #define JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING 1 #define JPGD_TRUE (1) #define JPGD_FALSE (0) #define JPGD_MAX(a, b) (((a) > (b)) ? (a) : (b)) #define JPGD_MIN(a, b) (((a) < (b)) ? (a) : (b)) namespace jpgd { static inline void* jpgd_malloc(size_t nSize) { return crnlib::crnlib_malloc(nSize); } static inline void jpgd_free(void* p) { crnlib::crnlib_free(p); } // DCT coefficients are stored in this sequence. static int g_ZAG[64] = {0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63}; enum JPEG_MARKER { M_SOF0 = 0xC0, M_SOF1 = 0xC1, M_SOF2 = 0xC2, M_SOF3 = 0xC3, M_SOF5 = 0xC5, M_SOF6 = 0xC6, M_SOF7 = 0xC7, M_JPG = 0xC8, M_SOF9 = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT = 0xC4, M_DAC = 0xCC, M_RST0 = 0xD0, M_RST1 = 0xD1, M_RST2 = 0xD2, M_RST3 = 0xD3, M_RST4 = 0xD4, M_RST5 = 0xD5, M_RST6 = 0xD6, M_RST7 = 0xD7, M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_DNL = 0xDC, M_DRI = 0xDD, M_DHP = 0xDE, M_EXP = 0xDF, M_APP0 = 0xE0, M_APP15 = 0xEF, M_JPG0 = 0xF0, M_JPG13 = 0xFD, M_COM = 0xFE, M_TEM = 0x01, M_ERROR = 0x100, RST0 = 0xD0 }; enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 }; #define CONST_BITS 13 #define PASS1_BITS 2 #define SCALEDONE ((int32)1) #define FIX_0_298631336 ((int32)2446) /* FIX(0.298631336) */ #define FIX_0_390180644 ((int32)3196) /* FIX(0.390180644) */ #define FIX_0_541196100 ((int32)4433) /* FIX(0.541196100) */ #define FIX_0_765366865 ((int32)6270) /* FIX(0.765366865) */ #define FIX_0_899976223 ((int32)7373) /* FIX(0.899976223) */ #define FIX_1_175875602 ((int32)9633) /* FIX(1.175875602) */ #define FIX_1_501321110 ((int32)12299) /* FIX(1.501321110) */ #define FIX_1_847759065 ((int32)15137) /* FIX(1.847759065) */ #define FIX_1_961570560 ((int32)16069) /* FIX(1.961570560) */ #define FIX_2_053119869 ((int32)16819) /* FIX(2.053119869) */ #define FIX_2_562915447 ((int32)20995) /* FIX(2.562915447) */ #define FIX_3_072711026 ((int32)25172) /* FIX(3.072711026) */ #define DESCALE(x, n) (((x) + (SCALEDONE << ((n)-1))) >> (n)) #define DESCALE_ZEROSHIFT(x, n) (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n)) #define MULTIPLY(var, cnst) ((var) * (cnst)) #define CLAMP(i) ((static_cast(i) > 255) ? (((~i) >> 31) & 0xFF) : (i)) // Compiler creates a fast path 1D IDCT for X non-zero columns template struct Row { static void idct(int* pTemp, const jpgd_block_t* pSrc) { // ACCESS_COL() will be optimized at compile time to either an array access, or 0. #define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0) const int z2 = ACCESS_COL(2), z3 = ACCESS_COL(6); const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100); const int tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065); const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); const int tmp0 = (ACCESS_COL(0) + ACCESS_COL(4)) << CONST_BITS; const int tmp1 = (ACCESS_COL(0) - ACCESS_COL(4)) << CONST_BITS; const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2; const int atmp0 = ACCESS_COL(7), atmp1 = ACCESS_COL(5), atmp2 = ACCESS_COL(3), atmp3 = ACCESS_COL(1); const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3; const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602); const int az1 = MULTIPLY(bz1, -FIX_0_899976223); const int az2 = MULTIPLY(bz2, -FIX_2_562915447); const int az3 = MULTIPLY(bz3, -FIX_1_961570560) + bz5; const int az4 = MULTIPLY(bz4, -FIX_0_390180644) + bz5; const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3; const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4; const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3; const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4; pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS - PASS1_BITS); pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS - PASS1_BITS); pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS - PASS1_BITS); pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS - PASS1_BITS); pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS - PASS1_BITS); pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS - PASS1_BITS); pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS - PASS1_BITS); pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS - PASS1_BITS); } }; template <> struct Row<0> { static void idct(int* pTemp, const jpgd_block_t* pSrc) { (void)pTemp; (void)pSrc; } }; template <> struct Row<1> { static void idct(int* pTemp, const jpgd_block_t* pSrc) { const int dcval = (pSrc[0] << PASS1_BITS); pTemp[0] = dcval; pTemp[1] = dcval; pTemp[2] = dcval; pTemp[3] = dcval; pTemp[4] = dcval; pTemp[5] = dcval; pTemp[6] = dcval; pTemp[7] = dcval; } }; // Compiler creates a fast path 1D IDCT for X non-zero rows template struct Col { static void idct(uint8* pDst_ptr, const int* pTemp) { // ACCESS_ROW() will be optimized at compile time to either an array access, or 0. #define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0) const int z2 = ACCESS_ROW(2); const int z3 = ACCESS_ROW(6); const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100); const int tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065); const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); const int tmp0 = (ACCESS_ROW(0) + ACCESS_ROW(4)) << CONST_BITS; const int tmp1 = (ACCESS_ROW(0) - ACCESS_ROW(4)) << CONST_BITS; const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2; const int atmp0 = ACCESS_ROW(7), atmp1 = ACCESS_ROW(5), atmp2 = ACCESS_ROW(3), atmp3 = ACCESS_ROW(1); const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3; const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602); const int az1 = MULTIPLY(bz1, -FIX_0_899976223); const int az2 = MULTIPLY(bz2, -FIX_2_562915447); const int az3 = MULTIPLY(bz3, -FIX_1_961570560) + bz5; const int az4 = MULTIPLY(bz4, -FIX_0_390180644) + bz5; const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3; const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4; const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3; const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4; int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS + PASS1_BITS + 3); pDst_ptr[8 * 0] = (uint8)CLAMP(i); i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS + PASS1_BITS + 3); pDst_ptr[8 * 7] = (uint8)CLAMP(i); i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS + PASS1_BITS + 3); pDst_ptr[8 * 1] = (uint8)CLAMP(i); i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS + PASS1_BITS + 3); pDst_ptr[8 * 6] = (uint8)CLAMP(i); i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS + PASS1_BITS + 3); pDst_ptr[8 * 2] = (uint8)CLAMP(i); i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS + PASS1_BITS + 3); pDst_ptr[8 * 5] = (uint8)CLAMP(i); i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS + PASS1_BITS + 3); pDst_ptr[8 * 3] = (uint8)CLAMP(i); i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS + PASS1_BITS + 3); pDst_ptr[8 * 4] = (uint8)CLAMP(i); } }; template <> struct Col<1> { static void idct(uint8* pDst_ptr, const int* pTemp) { int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS + 3); const uint8 dcval_clamped = (uint8)CLAMP(dcval); pDst_ptr[0 * 8] = dcval_clamped; pDst_ptr[1 * 8] = dcval_clamped; pDst_ptr[2 * 8] = dcval_clamped; pDst_ptr[3 * 8] = dcval_clamped; pDst_ptr[4 * 8] = dcval_clamped; pDst_ptr[5 * 8] = dcval_clamped; pDst_ptr[6 * 8] = dcval_clamped; pDst_ptr[7 * 8] = dcval_clamped; } }; static const uint8 s_idct_row_table[] = { 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 2, 1, 1, 0, 0, 0, 0, 0, 2, 2, 1, 0, 0, 0, 0, 0, 3, 2, 1, 0, 0, 0, 0, 0, 4, 2, 1, 0, 0, 0, 0, 0, 4, 3, 1, 0, 0, 0, 0, 0, 4, 3, 2, 0, 0, 0, 0, 0, 4, 3, 2, 1, 0, 0, 0, 0, 4, 3, 2, 1, 1, 0, 0, 0, 4, 3, 2, 2, 1, 0, 0, 0, 4, 3, 3, 2, 1, 0, 0, 0, 4, 4, 3, 2, 1, 0, 0, 0, 5, 4, 3, 2, 1, 0, 0, 0, 6, 4, 3, 2, 1, 0, 0, 0, 6, 5, 3, 2, 1, 0, 0, 0, 6, 5, 4, 2, 1, 0, 0, 0, 6, 5, 4, 3, 1, 0, 0, 0, 6, 5, 4, 3, 2, 0, 0, 0, 6, 5, 4, 3, 2, 1, 0, 0, 6, 5, 4, 3, 2, 1, 1, 0, 6, 5, 4, 3, 2, 2, 1, 0, 6, 5, 4, 3, 3, 2, 1, 0, 6, 5, 4, 4, 3, 2, 1, 0, 6, 5, 5, 4, 3, 2, 1, 0, 6, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0, 8, 6, 5, 4, 3, 2, 1, 0, 8, 7, 5, 4, 3, 2, 1, 0, 8, 7, 6, 4, 3, 2, 1, 0, 8, 7, 6, 5, 3, 2, 1, 0, 8, 7, 6, 5, 4, 2, 1, 0, 8, 7, 6, 5, 4, 3, 1, 0, 8, 7, 6, 5, 4, 3, 2, 0, 8, 7, 6, 5, 4, 3, 2, 1, 8, 7, 6, 5, 4, 3, 2, 2, 8, 7, 6, 5, 4, 3, 3, 2, 8, 7, 6, 5, 4, 4, 3, 2, 8, 7, 6, 5, 5, 4, 3, 2, 8, 7, 6, 6, 5, 4, 3, 2, 8, 7, 7, 6, 5, 4, 3, 2, 8, 8, 7, 6, 5, 4, 3, 2, 8, 8, 8, 6, 5, 4, 3, 2, 8, 8, 8, 7, 5, 4, 3, 2, 8, 8, 8, 7, 6, 4, 3, 2, 8, 8, 8, 7, 6, 5, 3, 2, 8, 8, 8, 7, 6, 5, 4, 2, 8, 8, 8, 7, 6, 5, 4, 3, 8, 8, 8, 7, 6, 5, 4, 4, 8, 8, 8, 7, 6, 5, 5, 4, 8, 8, 8, 7, 6, 6, 5, 4, 8, 8, 8, 7, 7, 6, 5, 4, 8, 8, 8, 8, 7, 6, 5, 4, 8, 8, 8, 8, 8, 6, 5, 4, 8, 8, 8, 8, 8, 7, 5, 4, 8, 8, 8, 8, 8, 7, 6, 4, 8, 8, 8, 8, 8, 7, 6, 5, 8, 8, 8, 8, 8, 7, 6, 6, 8, 8, 8, 8, 8, 7, 7, 6, 8, 8, 8, 8, 8, 8, 7, 6, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 8, }; static const uint8 s_idct_col_table[] = {1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8}; void idct(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr, int block_max_zag) { JPGD_ASSERT(block_max_zag >= 1); JPGD_ASSERT(block_max_zag <= 64); if (block_max_zag <= 1) { int k = ((pSrc_ptr[0] + 4) >> 3) + 128; k = CLAMP(k); k = k | (k << 8); k = k | (k << 16); for (int i = 8; i > 0; i--) { *(int*)&pDst_ptr[0] = k; *(int*)&pDst_ptr[4] = k; pDst_ptr += 8; } return; } int temp[64]; const jpgd_block_t* pSrc = pSrc_ptr; int* pTemp = temp; const uint8* pRow_tab = &s_idct_row_table[(block_max_zag - 1) * 8]; int i; for (i = 8; i > 0; i--, pRow_tab++) { switch (*pRow_tab) { case 0: Row<0>::idct(pTemp, pSrc); break; case 1: Row<1>::idct(pTemp, pSrc); break; case 2: Row<2>::idct(pTemp, pSrc); break; case 3: Row<3>::idct(pTemp, pSrc); break; case 4: Row<4>::idct(pTemp, pSrc); break; case 5: Row<5>::idct(pTemp, pSrc); break; case 6: Row<6>::idct(pTemp, pSrc); break; case 7: Row<7>::idct(pTemp, pSrc); break; case 8: Row<8>::idct(pTemp, pSrc); break; } pSrc += 8; pTemp += 8; } pTemp = temp; const int nonzero_rows = s_idct_col_table[block_max_zag - 1]; for (i = 8; i > 0; i--) { switch (nonzero_rows) { case 1: Col<1>::idct(pDst_ptr, pTemp); break; case 2: Col<2>::idct(pDst_ptr, pTemp); break; case 3: Col<3>::idct(pDst_ptr, pTemp); break; case 4: Col<4>::idct(pDst_ptr, pTemp); break; case 5: Col<5>::idct(pDst_ptr, pTemp); break; case 6: Col<6>::idct(pDst_ptr, pTemp); break; case 7: Col<7>::idct(pDst_ptr, pTemp); break; case 8: Col<8>::idct(pDst_ptr, pTemp); break; } pTemp++; pDst_ptr++; } } void idct_4x4(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr) { int temp[64]; int* pTemp = temp; const jpgd_block_t* pSrc = pSrc_ptr; for (int i = 4; i > 0; i--) { Row<4>::idct(pTemp, pSrc); pSrc += 8; pTemp += 8; } pTemp = temp; for (int i = 8; i > 0; i--) { Col<4>::idct(pDst_ptr, pTemp); pTemp++; pDst_ptr++; } } // Retrieve one character from the input stream. inline uint jpeg_decoder::get_char() { // Any bytes remaining in buffer? if (!m_in_buf_left) { // Try to get more bytes. prep_in_buffer(); // Still nothing to get? if (!m_in_buf_left) { // Pad the end of the stream with 0xFF 0xD9 (EOI marker) int t = m_tem_flag; m_tem_flag ^= 1; if (t) return 0xD9; else return 0xFF; } } uint c = *m_pIn_buf_ofs++; m_in_buf_left--; return c; } // Same as previous method, except can indicate if the character is a pad character or not. inline uint jpeg_decoder::get_char(bool* pPadding_flag) { if (!m_in_buf_left) { prep_in_buffer(); if (!m_in_buf_left) { *pPadding_flag = true; int t = m_tem_flag; m_tem_flag ^= 1; if (t) return 0xD9; else return 0xFF; } } *pPadding_flag = false; uint c = *m_pIn_buf_ofs++; m_in_buf_left--; return c; } // Inserts a previously retrieved character back into the input buffer. inline void jpeg_decoder::stuff_char(uint8 q) { *(--m_pIn_buf_ofs) = q; m_in_buf_left++; } // Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered. inline uint8 jpeg_decoder::get_octet() { bool padding_flag; int c = get_char(&padding_flag); if (c == 0xFF) { if (padding_flag) return 0xFF; c = get_char(&padding_flag); if (padding_flag) { stuff_char(0xFF); return 0xFF; } if (c == 0x00) return 0xFF; else { stuff_char(static_cast(c)); stuff_char(0xFF); return 0xFF; } } return static_cast(c); } // Retrieves a variable number of bits from the input stream. Does not recognize markers. inline uint jpeg_decoder::get_bits(int num_bits) { if (!num_bits) return 0; uint i = m_bit_buf >> (32 - num_bits); if ((m_bits_left -= num_bits) <= 0) { m_bit_buf <<= (num_bits += m_bits_left); uint c1 = get_char(); uint c2 = get_char(); m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2; m_bit_buf <<= -m_bits_left; m_bits_left += 16; JPGD_ASSERT(m_bits_left >= 0); } else m_bit_buf <<= num_bits; return i; } // Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered. inline uint jpeg_decoder::get_bits_no_markers(int num_bits) { if (!num_bits) return 0; uint i = m_bit_buf >> (32 - num_bits); if ((m_bits_left -= num_bits) <= 0) { m_bit_buf <<= (num_bits += m_bits_left); if ((m_in_buf_left < 2) || (m_pIn_buf_ofs[0] == 0xFF) || (m_pIn_buf_ofs[1] == 0xFF)) { uint c1 = get_octet(); uint c2 = get_octet(); m_bit_buf |= (c1 << 8) | c2; } else { m_bit_buf |= ((uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1]; m_in_buf_left -= 2; m_pIn_buf_ofs += 2; } m_bit_buf <<= -m_bits_left; m_bits_left += 16; JPGD_ASSERT(m_bits_left >= 0); } else m_bit_buf <<= num_bits; return i; } // Decodes a Huffman encoded symbol. inline int jpeg_decoder::huff_decode(huff_tables* pH) { int symbol; // Check first 8-bits: do we have a complete symbol? if ((symbol = pH->look_up[m_bit_buf >> 24]) < 0) { // Decode more bits, use a tree traversal to find symbol. int ofs = 23; do { symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))]; ofs--; } while (symbol < 0); get_bits_no_markers(8 + (23 - ofs)); } else get_bits_no_markers(pH->code_size[symbol]); return symbol; } // Decodes a Huffman encoded symbol. inline int jpeg_decoder::huff_decode(huff_tables* pH, int& extra_bits) { int symbol; // Check first 8-bits: do we have a complete symbol? if ((symbol = pH->look_up2[m_bit_buf >> 24]) < 0) { // Use a tree traversal to find symbol. int ofs = 23; do { symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))]; ofs--; } while (symbol < 0); get_bits_no_markers(8 + (23 - ofs)); extra_bits = get_bits_no_markers(symbol & 0xF); } else { JPGD_ASSERT(((symbol >> 8) & 31) == pH->code_size[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0)); if (symbol & 0x8000) { get_bits_no_markers((symbol >> 8) & 31); extra_bits = symbol >> 16; } else { int code_size = (symbol >> 8) & 31; int num_extra_bits = symbol & 0xF; int bits = code_size + num_extra_bits; if (bits <= (m_bits_left + 16)) extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1); else { get_bits_no_markers(code_size); extra_bits = get_bits_no_markers(num_extra_bits); } } symbol &= 0xFF; } return symbol; } // Tables and macro used to fully decode the DPCM differences. static const int s_extend_test[16] = {0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000}; static const int s_extend_offset[16] = {0, 1 - (1 << 1), 1 - (1 << 2), 1 - (1 << 3), 1 - (1 << 4), 1 - (1 << 5), 1 - (1 << 6), 1 - (1 << 7), 1 - (1 << 8), 1 - (1 << 9), 1 - (1 << 10), 1 - (1 << 11), 1 - (1 << 12), 1 - (1 << 13), 1 - (1 << 14), 1 - (1 << 15)}; // The logical AND's in this macro are to shut up static code analysis (aren't really necessary - couldn't find another way to do this) #define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x)) // Clamps a value between 0-255. inline uint8 jpeg_decoder::clamp(int i) { if (static_cast(i) > 255) i = (((~i) >> 31) & 0xFF); return static_cast(i); } namespace DCT_Upsample { struct Matrix44 { typedef int Element_Type; enum { NUM_ROWS = 4, NUM_COLS = 4 }; Element_Type v[NUM_ROWS][NUM_COLS]; inline int rows() const { return NUM_ROWS; } inline int cols() const { return NUM_COLS; } inline const Element_Type& at(int r, int c) const { return v[r][c]; } inline Element_Type& at(int r, int c) { return v[r][c]; } inline Matrix44() {} inline Matrix44& operator+=(const Matrix44& a) { for (int r = 0; r < NUM_ROWS; r++) { at(r, 0) += a.at(r, 0); at(r, 1) += a.at(r, 1); at(r, 2) += a.at(r, 2); at(r, 3) += a.at(r, 3); } return *this; } inline Matrix44& operator-=(const Matrix44& a) { for (int r = 0; r < NUM_ROWS; r++) { at(r, 0) -= a.at(r, 0); at(r, 1) -= a.at(r, 1); at(r, 2) -= a.at(r, 2); at(r, 3) -= a.at(r, 3); } return *this; } friend inline Matrix44 operator+(const Matrix44& a, const Matrix44& b) { Matrix44 ret; for (int r = 0; r < NUM_ROWS; r++) { ret.at(r, 0) = a.at(r, 0) + b.at(r, 0); ret.at(r, 1) = a.at(r, 1) + b.at(r, 1); ret.at(r, 2) = a.at(r, 2) + b.at(r, 2); ret.at(r, 3) = a.at(r, 3) + b.at(r, 3); } return ret; } friend inline Matrix44 operator-(const Matrix44& a, const Matrix44& b) { Matrix44 ret; for (int r = 0; r < NUM_ROWS; r++) { ret.at(r, 0) = a.at(r, 0) - b.at(r, 0); ret.at(r, 1) = a.at(r, 1) - b.at(r, 1); ret.at(r, 2) = a.at(r, 2) - b.at(r, 2); ret.at(r, 3) = a.at(r, 3) - b.at(r, 3); } return ret; } static inline void add_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b) { for (int r = 0; r < 4; r++) { pDst[0 * 8 + r] = static_cast(a.at(r, 0) + b.at(r, 0)); pDst[1 * 8 + r] = static_cast(a.at(r, 1) + b.at(r, 1)); pDst[2 * 8 + r] = static_cast(a.at(r, 2) + b.at(r, 2)); pDst[3 * 8 + r] = static_cast(a.at(r, 3) + b.at(r, 3)); } } static inline void sub_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b) { for (int r = 0; r < 4; r++) { pDst[0 * 8 + r] = static_cast(a.at(r, 0) - b.at(r, 0)); pDst[1 * 8 + r] = static_cast(a.at(r, 1) - b.at(r, 1)); pDst[2 * 8 + r] = static_cast(a.at(r, 2) - b.at(r, 2)); pDst[3 * 8 + r] = static_cast(a.at(r, 3) - b.at(r, 3)); } } }; const int FRACT_BITS = 10; const int SCALE = 1 << FRACT_BITS; typedef int Temp_Type; #define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS) #define F(i) ((int)((i)*SCALE + .5f)) // Any decent C++ compiler will optimize this at compile time to a 0, or an array access. #define AT(c, r) ((((c) >= NUM_COLS) || ((r) >= NUM_ROWS)) ? 0 : pSrc[(c) + (r)*8]) // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix template struct P_Q { static void calc(Matrix44& P, Matrix44& Q, const jpgd_block_t* pSrc) { // 4x8 = 4x8 times 8x8, matrix 0 is constant const Temp_Type X000 = AT(0, 0); const Temp_Type X001 = AT(0, 1); const Temp_Type X002 = AT(0, 2); const Temp_Type X003 = AT(0, 3); const Temp_Type X004 = AT(0, 4); const Temp_Type X005 = AT(0, 5); const Temp_Type X006 = AT(0, 6); const Temp_Type X007 = AT(0, 7); const Temp_Type X010 = D(F(0.415735f) * AT(1, 0) + F(0.791065f) * AT(3, 0) + F(-0.352443f) * AT(5, 0) + F(0.277785f) * AT(7, 0)); const Temp_Type X011 = D(F(0.415735f) * AT(1, 1) + F(0.791065f) * AT(3, 1) + F(-0.352443f) * AT(5, 1) + F(0.277785f) * AT(7, 1)); const Temp_Type X012 = D(F(0.415735f) * AT(1, 2) + F(0.791065f) * AT(3, 2) + F(-0.352443f) * AT(5, 2) + F(0.277785f) * AT(7, 2)); const Temp_Type X013 = D(F(0.415735f) * AT(1, 3) + F(0.791065f) * AT(3, 3) + F(-0.352443f) * AT(5, 3) + F(0.277785f) * AT(7, 3)); const Temp_Type X014 = D(F(0.415735f) * AT(1, 4) + F(0.791065f) * AT(3, 4) + F(-0.352443f) * AT(5, 4) + F(0.277785f) * AT(7, 4)); const Temp_Type X015 = D(F(0.415735f) * AT(1, 5) + F(0.791065f) * AT(3, 5) + F(-0.352443f) * AT(5, 5) + F(0.277785f) * AT(7, 5)); const Temp_Type X016 = D(F(0.415735f) * AT(1, 6) + F(0.791065f) * AT(3, 6) + F(-0.352443f) * AT(5, 6) + F(0.277785f) * AT(7, 6)); const Temp_Type X017 = D(F(0.415735f) * AT(1, 7) + F(0.791065f) * AT(3, 7) + F(-0.352443f) * AT(5, 7) + F(0.277785f) * AT(7, 7)); const Temp_Type X020 = AT(4, 0); const Temp_Type X021 = AT(4, 1); const Temp_Type X022 = AT(4, 2); const Temp_Type X023 = AT(4, 3); const Temp_Type X024 = AT(4, 4); const Temp_Type X025 = AT(4, 5); const Temp_Type X026 = AT(4, 6); const Temp_Type X027 = AT(4, 7); const Temp_Type X030 = D(F(0.022887f) * AT(1, 0) + F(-0.097545f) * AT(3, 0) + F(0.490393f) * AT(5, 0) + F(0.865723f) * AT(7, 0)); const Temp_Type X031 = D(F(0.022887f) * AT(1, 1) + F(-0.097545f) * AT(3, 1) + F(0.490393f) * AT(5, 1) + F(0.865723f) * AT(7, 1)); const Temp_Type X032 = D(F(0.022887f) * AT(1, 2) + F(-0.097545f) * AT(3, 2) + F(0.490393f) * AT(5, 2) + F(0.865723f) * AT(7, 2)); const Temp_Type X033 = D(F(0.022887f) * AT(1, 3) + F(-0.097545f) * AT(3, 3) + F(0.490393f) * AT(5, 3) + F(0.865723f) * AT(7, 3)); const Temp_Type X034 = D(F(0.022887f) * AT(1, 4) + F(-0.097545f) * AT(3, 4) + F(0.490393f) * AT(5, 4) + F(0.865723f) * AT(7, 4)); const Temp_Type X035 = D(F(0.022887f) * AT(1, 5) + F(-0.097545f) * AT(3, 5) + F(0.490393f) * AT(5, 5) + F(0.865723f) * AT(7, 5)); const Temp_Type X036 = D(F(0.022887f) * AT(1, 6) + F(-0.097545f) * AT(3, 6) + F(0.490393f) * AT(5, 6) + F(0.865723f) * AT(7, 6)); const Temp_Type X037 = D(F(0.022887f) * AT(1, 7) + F(-0.097545f) * AT(3, 7) + F(0.490393f) * AT(5, 7) + F(0.865723f) * AT(7, 7)); // 4x4 = 4x8 times 8x4, matrix 1 is constant P.at(0, 0) = X000; P.at(0, 1) = D(X001 * F(0.415735f) + X003 * F(0.791065f) + X005 * F(-0.352443f) + X007 * F(0.277785f)); P.at(0, 2) = X004; P.at(0, 3) = D(X001 * F(0.022887f) + X003 * F(-0.097545f) + X005 * F(0.490393f) + X007 * F(0.865723f)); P.at(1, 0) = X010; P.at(1, 1) = D(X011 * F(0.415735f) + X013 * F(0.791065f) + X015 * F(-0.352443f) + X017 * F(0.277785f)); P.at(1, 2) = X014; P.at(1, 3) = D(X011 * F(0.022887f) + X013 * F(-0.097545f) + X015 * F(0.490393f) + X017 * F(0.865723f)); P.at(2, 0) = X020; P.at(2, 1) = D(X021 * F(0.415735f) + X023 * F(0.791065f) + X025 * F(-0.352443f) + X027 * F(0.277785f)); P.at(2, 2) = X024; P.at(2, 3) = D(X021 * F(0.022887f) + X023 * F(-0.097545f) + X025 * F(0.490393f) + X027 * F(0.865723f)); P.at(3, 0) = X030; P.at(3, 1) = D(X031 * F(0.415735f) + X033 * F(0.791065f) + X035 * F(-0.352443f) + X037 * F(0.277785f)); P.at(3, 2) = X034; P.at(3, 3) = D(X031 * F(0.022887f) + X033 * F(-0.097545f) + X035 * F(0.490393f) + X037 * F(0.865723f)); // 40 muls 24 adds // 4x4 = 4x8 times 8x4, matrix 1 is constant Q.at(0, 0) = D(X001 * F(0.906127f) + X003 * F(-0.318190f) + X005 * F(0.212608f) + X007 * F(-0.180240f)); Q.at(0, 1) = X002; Q.at(0, 2) = D(X001 * F(-0.074658f) + X003 * F(0.513280f) + X005 * F(0.768178f) + X007 * F(-0.375330f)); Q.at(0, 3) = X006; Q.at(1, 0) = D(X011 * F(0.906127f) + X013 * F(-0.318190f) + X015 * F(0.212608f) + X017 * F(-0.180240f)); Q.at(1, 1) = X012; Q.at(1, 2) = D(X011 * F(-0.074658f) + X013 * F(0.513280f) + X015 * F(0.768178f) + X017 * F(-0.375330f)); Q.at(1, 3) = X016; Q.at(2, 0) = D(X021 * F(0.906127f) + X023 * F(-0.318190f) + X025 * F(0.212608f) + X027 * F(-0.180240f)); Q.at(2, 1) = X022; Q.at(2, 2) = D(X021 * F(-0.074658f) + X023 * F(0.513280f) + X025 * F(0.768178f) + X027 * F(-0.375330f)); Q.at(2, 3) = X026; Q.at(3, 0) = D(X031 * F(0.906127f) + X033 * F(-0.318190f) + X035 * F(0.212608f) + X037 * F(-0.180240f)); Q.at(3, 1) = X032; Q.at(3, 2) = D(X031 * F(-0.074658f) + X033 * F(0.513280f) + X035 * F(0.768178f) + X037 * F(-0.375330f)); Q.at(3, 3) = X036; // 40 muls 24 adds } }; template struct R_S { static void calc(Matrix44& R, Matrix44& S, const jpgd_block_t* pSrc) { // 4x8 = 4x8 times 8x8, matrix 0 is constant const Temp_Type X100 = D(F(0.906127f) * AT(1, 0) + F(-0.318190f) * AT(3, 0) + F(0.212608f) * AT(5, 0) + F(-0.180240f) * AT(7, 0)); const Temp_Type X101 = D(F(0.906127f) * AT(1, 1) + F(-0.318190f) * AT(3, 1) + F(0.212608f) * AT(5, 1) + F(-0.180240f) * AT(7, 1)); const Temp_Type X102 = D(F(0.906127f) * AT(1, 2) + F(-0.318190f) * AT(3, 2) + F(0.212608f) * AT(5, 2) + F(-0.180240f) * AT(7, 2)); const Temp_Type X103 = D(F(0.906127f) * AT(1, 3) + F(-0.318190f) * AT(3, 3) + F(0.212608f) * AT(5, 3) + F(-0.180240f) * AT(7, 3)); const Temp_Type X104 = D(F(0.906127f) * AT(1, 4) + F(-0.318190f) * AT(3, 4) + F(0.212608f) * AT(5, 4) + F(-0.180240f) * AT(7, 4)); const Temp_Type X105 = D(F(0.906127f) * AT(1, 5) + F(-0.318190f) * AT(3, 5) + F(0.212608f) * AT(5, 5) + F(-0.180240f) * AT(7, 5)); const Temp_Type X106 = D(F(0.906127f) * AT(1, 6) + F(-0.318190f) * AT(3, 6) + F(0.212608f) * AT(5, 6) + F(-0.180240f) * AT(7, 6)); const Temp_Type X107 = D(F(0.906127f) * AT(1, 7) + F(-0.318190f) * AT(3, 7) + F(0.212608f) * AT(5, 7) + F(-0.180240f) * AT(7, 7)); const Temp_Type X110 = AT(2, 0); const Temp_Type X111 = AT(2, 1); const Temp_Type X112 = AT(2, 2); const Temp_Type X113 = AT(2, 3); const Temp_Type X114 = AT(2, 4); const Temp_Type X115 = AT(2, 5); const Temp_Type X116 = AT(2, 6); const Temp_Type X117 = AT(2, 7); const Temp_Type X120 = D(F(-0.074658f) * AT(1, 0) + F(0.513280f) * AT(3, 0) + F(0.768178f) * AT(5, 0) + F(-0.375330f) * AT(7, 0)); const Temp_Type X121 = D(F(-0.074658f) * AT(1, 1) + F(0.513280f) * AT(3, 1) + F(0.768178f) * AT(5, 1) + F(-0.375330f) * AT(7, 1)); const Temp_Type X122 = D(F(-0.074658f) * AT(1, 2) + F(0.513280f) * AT(3, 2) + F(0.768178f) * AT(5, 2) + F(-0.375330f) * AT(7, 2)); const Temp_Type X123 = D(F(-0.074658f) * AT(1, 3) + F(0.513280f) * AT(3, 3) + F(0.768178f) * AT(5, 3) + F(-0.375330f) * AT(7, 3)); const Temp_Type X124 = D(F(-0.074658f) * AT(1, 4) + F(0.513280f) * AT(3, 4) + F(0.768178f) * AT(5, 4) + F(-0.375330f) * AT(7, 4)); const Temp_Type X125 = D(F(-0.074658f) * AT(1, 5) + F(0.513280f) * AT(3, 5) + F(0.768178f) * AT(5, 5) + F(-0.375330f) * AT(7, 5)); const Temp_Type X126 = D(F(-0.074658f) * AT(1, 6) + F(0.513280f) * AT(3, 6) + F(0.768178f) * AT(5, 6) + F(-0.375330f) * AT(7, 6)); const Temp_Type X127 = D(F(-0.074658f) * AT(1, 7) + F(0.513280f) * AT(3, 7) + F(0.768178f) * AT(5, 7) + F(-0.375330f) * AT(7, 7)); const Temp_Type X130 = AT(6, 0); const Temp_Type X131 = AT(6, 1); const Temp_Type X132 = AT(6, 2); const Temp_Type X133 = AT(6, 3); const Temp_Type X134 = AT(6, 4); const Temp_Type X135 = AT(6, 5); const Temp_Type X136 = AT(6, 6); const Temp_Type X137 = AT(6, 7); // 80 muls 48 adds // 4x4 = 4x8 times 8x4, matrix 1 is constant R.at(0, 0) = X100; R.at(0, 1) = D(X101 * F(0.415735f) + X103 * F(0.791065f) + X105 * F(-0.352443f) + X107 * F(0.277785f)); R.at(0, 2) = X104; R.at(0, 3) = D(X101 * F(0.022887f) + X103 * F(-0.097545f) + X105 * F(0.490393f) + X107 * F(0.865723f)); R.at(1, 0) = X110; R.at(1, 1) = D(X111 * F(0.415735f) + X113 * F(0.791065f) + X115 * F(-0.352443f) + X117 * F(0.277785f)); R.at(1, 2) = X114; R.at(1, 3) = D(X111 * F(0.022887f) + X113 * F(-0.097545f) + X115 * F(0.490393f) + X117 * F(0.865723f)); R.at(2, 0) = X120; R.at(2, 1) = D(X121 * F(0.415735f) + X123 * F(0.791065f) + X125 * F(-0.352443f) + X127 * F(0.277785f)); R.at(2, 2) = X124; R.at(2, 3) = D(X121 * F(0.022887f) + X123 * F(-0.097545f) + X125 * F(0.490393f) + X127 * F(0.865723f)); R.at(3, 0) = X130; R.at(3, 1) = D(X131 * F(0.415735f) + X133 * F(0.791065f) + X135 * F(-0.352443f) + X137 * F(0.277785f)); R.at(3, 2) = X134; R.at(3, 3) = D(X131 * F(0.022887f) + X133 * F(-0.097545f) + X135 * F(0.490393f) + X137 * F(0.865723f)); // 40 muls 24 adds // 4x4 = 4x8 times 8x4, matrix 1 is constant S.at(0, 0) = D(X101 * F(0.906127f) + X103 * F(-0.318190f) + X105 * F(0.212608f) + X107 * F(-0.180240f)); S.at(0, 1) = X102; S.at(0, 2) = D(X101 * F(-0.074658f) + X103 * F(0.513280f) + X105 * F(0.768178f) + X107 * F(-0.375330f)); S.at(0, 3) = X106; S.at(1, 0) = D(X111 * F(0.906127f) + X113 * F(-0.318190f) + X115 * F(0.212608f) + X117 * F(-0.180240f)); S.at(1, 1) = X112; S.at(1, 2) = D(X111 * F(-0.074658f) + X113 * F(0.513280f) + X115 * F(0.768178f) + X117 * F(-0.375330f)); S.at(1, 3) = X116; S.at(2, 0) = D(X121 * F(0.906127f) + X123 * F(-0.318190f) + X125 * F(0.212608f) + X127 * F(-0.180240f)); S.at(2, 1) = X122; S.at(2, 2) = D(X121 * F(-0.074658f) + X123 * F(0.513280f) + X125 * F(0.768178f) + X127 * F(-0.375330f)); S.at(2, 3) = X126; S.at(3, 0) = D(X131 * F(0.906127f) + X133 * F(-0.318190f) + X135 * F(0.212608f) + X137 * F(-0.180240f)); S.at(3, 1) = X132; S.at(3, 2) = D(X131 * F(-0.074658f) + X133 * F(0.513280f) + X135 * F(0.768178f) + X137 * F(-0.375330f)); S.at(3, 3) = X136; // 40 muls 24 adds } }; } // end namespace DCT_Upsample // Unconditionally frees all allocated m_blocks. void jpeg_decoder::free_all_blocks() { m_pStream = NULL; for (mem_block* b = m_pMem_blocks; b;) { mem_block* n = b->m_pNext; jpgd_free(b); b = n; } m_pMem_blocks = NULL; } // This method handles all errors. It will never return. // It could easily be changed to use C++ exceptions. JPGD_NORETURN void jpeg_decoder::stop_decoding(jpgd_status status) { m_error_code = status; free_all_blocks(); longjmp(m_jmp_state, status); } void* jpeg_decoder::alloc(size_t nSize, bool zero) { nSize = (JPGD_MAX(nSize, 1) + 3) & ~3; char* rv = NULL; for (mem_block* b = m_pMem_blocks; b; b = b->m_pNext) { if ((b->m_used_count + nSize) <= b->m_size) { rv = b->m_data + b->m_used_count; b->m_used_count += nSize; break; } } if (!rv) { int capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047); mem_block* b = (mem_block*)jpgd_malloc(sizeof(mem_block) + capacity); if (!b) { stop_decoding(JPGD_NOTENOUGHMEM); } b->m_pNext = m_pMem_blocks; m_pMem_blocks = b; b->m_used_count = nSize; b->m_size = capacity; rv = b->m_data; } if (zero) memset(rv, 0, nSize); return rv; } void jpeg_decoder::word_clear(void* p, uint16 c, uint n) { uint8* pD = (uint8*)p; const uint8 l = c & 0xFF, h = (c >> 8) & 0xFF; while (n) { pD[0] = l; pD[1] = h; pD += 2; n--; } } // Refill the input buffer. // This method will sit in a loop until (A) the buffer is full or (B) // the stream's read() method reports and end of file condition. void jpeg_decoder::prep_in_buffer() { m_in_buf_left = 0; m_pIn_buf_ofs = m_in_buf; if (m_eof_flag) return; do { int bytes_read = m_pStream->read(m_in_buf + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag); if (bytes_read == -1) stop_decoding(JPGD_STREAM_READ); m_in_buf_left += bytes_read; } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag)); m_total_bytes_read += m_in_buf_left; // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid). // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.) word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64); } // Read a Huffman code table. void jpeg_decoder::read_dht_marker() { int i, index, count; uint8 huff_num[17]; uint8 huff_val[256]; uint num_left = get_bits(16); if (num_left < 2) stop_decoding(JPGD_BAD_DHT_MARKER); num_left -= 2; while (num_left) { index = get_bits(8); huff_num[0] = 0; count = 0; for (i = 1; i <= 16; i++) { huff_num[i] = static_cast(get_bits(8)); count += huff_num[i]; } if (count > 255) stop_decoding(JPGD_BAD_DHT_COUNTS); for (i = 0; i < count; i++) huff_val[i] = static_cast(get_bits(8)); i = 1 + 16 + count; if (num_left < (uint)i) stop_decoding(JPGD_BAD_DHT_MARKER); num_left -= i; if ((index & 0x10) > 0x10) stop_decoding(JPGD_BAD_DHT_INDEX); index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1); if (index >= JPGD_MAX_HUFF_TABLES) stop_decoding(JPGD_BAD_DHT_INDEX); if (!m_huff_num[index]) m_huff_num[index] = (uint8*)alloc(17); if (!m_huff_val[index]) m_huff_val[index] = (uint8*)alloc(256); m_huff_ac[index] = (index & 0x10) != 0; memcpy(m_huff_num[index], huff_num, 17); memcpy(m_huff_val[index], huff_val, 256); } } // Read a quantization table. void jpeg_decoder::read_dqt_marker() { int n, i, prec; uint num_left; uint temp; num_left = get_bits(16); if (num_left < 2) stop_decoding(JPGD_BAD_DQT_MARKER); num_left -= 2; while (num_left) { n = get_bits(8); prec = n >> 4; n &= 0x0F; if (n >= JPGD_MAX_QUANT_TABLES) stop_decoding(JPGD_BAD_DQT_TABLE); if (!m_quant[n]) m_quant[n] = (jpgd_quant_t*)alloc(64 * sizeof(jpgd_quant_t)); // read quantization entries, in zag order for (i = 0; i < 64; i++) { temp = get_bits(8); if (prec) temp = (temp << 8) + get_bits(8); m_quant[n][i] = static_cast(temp); } i = 64 + 1; if (prec) i += 64; if (num_left < (uint)i) stop_decoding(JPGD_BAD_DQT_LENGTH); num_left -= i; } } // Read the start of frame (SOF) marker. void jpeg_decoder::read_sof_marker() { int i; uint num_left; num_left = get_bits(16); if (get_bits(8) != 8) /* precision: sorry, only 8-bit precision is supported right now */ stop_decoding(JPGD_BAD_PRECISION); m_image_y_size = get_bits(16); if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT)) stop_decoding(JPGD_BAD_HEIGHT); m_image_x_size = get_bits(16); if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH)) stop_decoding(JPGD_BAD_WIDTH); m_comps_in_frame = get_bits(8); if (m_comps_in_frame > JPGD_MAX_COMPONENTS) stop_decoding(JPGD_TOO_MANY_COMPONENTS); if (num_left != (uint)(m_comps_in_frame * 3 + 8)) stop_decoding(JPGD_BAD_SOF_LENGTH); for (i = 0; i < m_comps_in_frame; i++) { m_comp_ident[i] = get_bits(8); m_comp_h_samp[i] = get_bits(4); m_comp_v_samp[i] = get_bits(4); m_comp_quant[i] = get_bits(8); } } // Used to skip unrecognized markers. void jpeg_decoder::skip_variable_marker() { uint num_left; num_left = get_bits(16); if (num_left < 2) stop_decoding(JPGD_BAD_VARIABLE_MARKER); num_left -= 2; while (num_left) { get_bits(8); num_left--; } } // Read a define restart interval (DRI) marker. void jpeg_decoder::read_dri_marker() { if (get_bits(16) != 4) stop_decoding(JPGD_BAD_DRI_LENGTH); m_restart_interval = get_bits(16); } // Read a start of scan (SOS) marker. void jpeg_decoder::read_sos_marker() { uint num_left; int i, ci, n, c, cc; num_left = get_bits(16); n = get_bits(8); m_comps_in_scan = n; num_left -= 3; if ((num_left != (uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN)) stop_decoding(JPGD_BAD_SOS_LENGTH); for (i = 0; i < n; i++) { cc = get_bits(8); c = get_bits(8); num_left -= 2; for (ci = 0; ci < m_comps_in_frame; ci++) if (cc == m_comp_ident[ci]) break; if (ci >= m_comps_in_frame) stop_decoding(JPGD_BAD_SOS_COMP_ID); m_comp_list[i] = ci; m_comp_dc_tab[ci] = (c >> 4) & 15; m_comp_ac_tab[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1); } m_spectral_start = get_bits(8); m_spectral_end = get_bits(8); m_successive_high = get_bits(4); m_successive_low = get_bits(4); if (!m_progressive_flag) { m_spectral_start = 0; m_spectral_end = 63; } num_left -= 3; while (num_left) /* read past whatever is num_left */ { get_bits(8); num_left--; } } // Finds the next marker. int jpeg_decoder::next_marker() { uint c; do { do { c = get_bits(8); } while (c != 0xFF); do { c = get_bits(8); } while (c == 0xFF); } while (c == 0); // If bytes > 0 here, there where extra bytes before the marker (not good). return c; } // Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is // encountered. int jpeg_decoder::process_markers() { int c; for (;;) { c = next_marker(); switch (c) { case M_SOF0: case M_SOF1: case M_SOF2: case M_SOF3: case M_SOF5: case M_SOF6: case M_SOF7: // case M_JPG: case M_SOF9: case M_SOF10: case M_SOF11: case M_SOF13: case M_SOF14: case M_SOF15: case M_SOI: case M_EOI: case M_SOS: { return c; } case M_DHT: { read_dht_marker(); break; } // No arithmitic support - dumb patents! case M_DAC: { stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT); break; } case M_DQT: { read_dqt_marker(); break; } case M_DRI: { read_dri_marker(); break; } //case M_APP0: /* no need to read the JFIF marker */ case M_JPG: case M_RST0: /* no parameters */ case M_RST1: case M_RST2: case M_RST3: case M_RST4: case M_RST5: case M_RST6: case M_RST7: case M_TEM: { stop_decoding(JPGD_UNEXPECTED_MARKER); break; } default: /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */ { skip_variable_marker(); break; } } } } // Finds the start of image (SOI) marker. // This code is rather defensive: it only checks the first 512 bytes to avoid // false positives. void jpeg_decoder::locate_soi_marker() { uint lastchar, thischar; uint bytesleft; lastchar = get_bits(8); thischar = get_bits(8); /* ok if it's a normal JPEG file without a special header */ if ((lastchar == 0xFF) && (thischar == M_SOI)) return; bytesleft = 4096; //512; for (;;) { if (--bytesleft == 0) stop_decoding(JPGD_NOT_JPEG); lastchar = thischar; thischar = get_bits(8); if (lastchar == 0xFF) { if (thischar == M_SOI) break; else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end stop_decoding(JPGD_NOT_JPEG); } } // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad. thischar = (m_bit_buf >> 24) & 0xFF; if (thischar != 0xFF) stop_decoding(JPGD_NOT_JPEG); } // Find a start of frame (SOF) marker. void jpeg_decoder::locate_sof_marker() { locate_soi_marker(); int c = process_markers(); switch (c) { case M_SOF2: m_progressive_flag = JPGD_TRUE; case M_SOF0: /* baseline DCT */ case M_SOF1: /* extended sequential DCT */ { read_sof_marker(); break; } case M_SOF9: /* Arithmitic coding */ { stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT); break; } default: { stop_decoding(JPGD_UNSUPPORTED_MARKER); break; } } } // Find a start of scan (SOS) marker. int jpeg_decoder::locate_sos_marker() { int c; c = process_markers(); if (c == M_EOI) return JPGD_FALSE; else if (c != M_SOS) stop_decoding(JPGD_UNEXPECTED_MARKER); read_sos_marker(); return JPGD_TRUE; } // Reset everything to default/uninitialized state. void jpeg_decoder::init(jpeg_decoder_stream* pStream) { m_pMem_blocks = NULL; m_error_code = JPGD_SUCCESS; m_ready_flag = false; m_image_x_size = m_image_y_size = 0; m_pStream = pStream; m_progressive_flag = JPGD_FALSE; memset(m_huff_ac, 0, sizeof(m_huff_ac)); memset(m_huff_num, 0, sizeof(m_huff_num)); memset(m_huff_val, 0, sizeof(m_huff_val)); memset(m_quant, 0, sizeof(m_quant)); m_scan_type = 0; m_comps_in_frame = 0; memset(m_comp_h_samp, 0, sizeof(m_comp_h_samp)); memset(m_comp_v_samp, 0, sizeof(m_comp_v_samp)); memset(m_comp_quant, 0, sizeof(m_comp_quant)); memset(m_comp_ident, 0, sizeof(m_comp_ident)); memset(m_comp_h_blocks, 0, sizeof(m_comp_h_blocks)); memset(m_comp_v_blocks, 0, sizeof(m_comp_v_blocks)); m_comps_in_scan = 0; memset(m_comp_list, 0, sizeof(m_comp_list)); memset(m_comp_dc_tab, 0, sizeof(m_comp_dc_tab)); memset(m_comp_ac_tab, 0, sizeof(m_comp_ac_tab)); m_spectral_start = 0; m_spectral_end = 0; m_successive_low = 0; m_successive_high = 0; m_max_mcu_x_size = 0; m_max_mcu_y_size = 0; m_blocks_per_mcu = 0; m_max_blocks_per_row = 0; m_mcus_per_row = 0; m_mcus_per_col = 0; m_expanded_blocks_per_component = 0; m_expanded_blocks_per_mcu = 0; m_expanded_blocks_per_row = 0; m_freq_domain_chroma_upsample = false; memset(m_mcu_org, 0, sizeof(m_mcu_org)); m_total_lines_left = 0; m_mcu_lines_left = 0; m_real_dest_bytes_per_scan_line = 0; m_dest_bytes_per_scan_line = 0; m_dest_bytes_per_pixel = 0; memset(m_pHuff_tabs, 0, sizeof(m_pHuff_tabs)); memset(m_dc_coeffs, 0, sizeof(m_dc_coeffs)); memset(m_ac_coeffs, 0, sizeof(m_ac_coeffs)); memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu)); m_eob_run = 0; memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu)); m_pIn_buf_ofs = m_in_buf; m_in_buf_left = 0; m_eof_flag = false; m_tem_flag = 0; memset(m_in_buf_pad_start, 0, sizeof(m_in_buf_pad_start)); memset(m_in_buf, 0, sizeof(m_in_buf)); memset(m_in_buf_pad_end, 0, sizeof(m_in_buf_pad_end)); m_restart_interval = 0; m_restarts_left = 0; m_next_restart_num = 0; m_max_mcus_per_row = 0; m_max_blocks_per_mcu = 0; m_max_mcus_per_col = 0; memset(m_last_dc_val, 0, sizeof(m_last_dc_val)); m_pMCU_coefficients = NULL; m_pSample_buf = NULL; m_total_bytes_read = 0; m_pScan_line_0 = NULL; m_pScan_line_1 = NULL; // Ready the input buffer. prep_in_buffer(); // Prime the bit buffer. m_bits_left = 16; m_bit_buf = 0; get_bits(16); get_bits(16); for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++) m_mcu_block_max_zag[i] = 64; } #define SCALEBITS 16 #define ONE_HALF ((int)1 << (SCALEBITS - 1)) #define FIX(x) ((int)((x) * (1L << SCALEBITS) + 0.5f)) // Create a few tables that allow us to quickly convert YCbCr to RGB. void jpeg_decoder::create_look_ups() { for (int i = 0; i <= 255; i++) { int k = i - 128; m_crr[i] = (FIX(1.40200f) * k + ONE_HALF) >> SCALEBITS; m_cbb[i] = (FIX(1.77200f) * k + ONE_HALF) >> SCALEBITS; m_crg[i] = (-FIX(0.71414f)) * k; m_cbg[i] = (-FIX(0.34414f)) * k + ONE_HALF; } } // This method throws back into the stream any bytes that where read // into the bit buffer during initial marker scanning. void jpeg_decoder::fix_in_buffer() { // In case any 0xFF's where pulled into the buffer during marker scanning. JPGD_ASSERT((m_bits_left & 7) == 0); if (m_bits_left == 16) stuff_char((uint8)(m_bit_buf & 0xFF)); if (m_bits_left >= 8) stuff_char((uint8)((m_bit_buf >> 8) & 0xFF)); stuff_char((uint8)((m_bit_buf >> 16) & 0xFF)); stuff_char((uint8)((m_bit_buf >> 24) & 0xFF)); m_bits_left = 16; get_bits_no_markers(16); get_bits_no_markers(16); } void jpeg_decoder::transform_mcu(int mcu_row) { jpgd_block_t* pSrc_ptr = m_pMCU_coefficients; uint8* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64; for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) { idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]); pSrc_ptr += 64; pDst_ptr += 64; } } static const uint8 s_max_rc[64] = { 17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86, 102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136}; void jpeg_decoder::transform_mcu_expand(int mcu_row) { jpgd_block_t* pSrc_ptr = m_pMCU_coefficients; uint8* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64; // Y IDCT int mcu_block; for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++) { idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]); pSrc_ptr += 64; pDst_ptr += 64; } // Chroma IDCT, with upsampling jpgd_block_t temp_block[64]; for (int i = 0; i < 2; i++) { DCT_Upsample::Matrix44 P, Q, R, S; JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] >= 1); JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] <= 64); int max_zag = m_mcu_block_max_zag[mcu_block++] - 1; if (max_zag <= 0) max_zag = 0; // should never happen, only here to shut up static analysis switch (s_max_rc[max_zag]) { case 1 * 16 + 1: DCT_Upsample::P_Q<1, 1>::calc(P, Q, pSrc_ptr); DCT_Upsample::R_S<1, 1>::calc(R, S, pSrc_ptr); break; case 1 * 16 + 2: DCT_Upsample::P_Q<1, 2>::calc(P, Q, pSrc_ptr); DCT_Upsample::R_S<1, 2>::calc(R, S, pSrc_ptr); break; case 2 * 16 + 2: DCT_Upsample::P_Q<2, 2>::calc(P, Q, pSrc_ptr); DCT_Upsample::R_S<2, 2>::calc(R, S, pSrc_ptr); break; case 3 * 16 + 2: DCT_Upsample::P_Q<3, 2>::calc(P, Q, pSrc_ptr); DCT_Upsample::R_S<3, 2>::calc(R, S, pSrc_ptr); break; case 3 * 16 + 3: DCT_Upsample::P_Q<3, 3>::calc(P, Q, pSrc_ptr); DCT_Upsample::R_S<3, 3>::calc(R, S, pSrc_ptr); break; case 3 * 16 + 4: DCT_Upsample::P_Q<3, 4>::calc(P, Q, pSrc_ptr); DCT_Upsample::R_S<3, 4>::calc(R, S, pSrc_ptr); break; case 4 * 16 + 4: DCT_Upsample::P_Q<4, 4>::calc(P, Q, pSrc_ptr); DCT_Upsample::R_S<4, 4>::calc(R, S, pSrc_ptr); break; case 5 * 16 + 4: DCT_Upsample::P_Q<5, 4>::calc(P, Q, pSrc_ptr); DCT_Upsample::R_S<5, 4>::calc(R, S, pSrc_ptr); break; case 5 * 16 + 5: DCT_Upsample::P_Q<5, 5>::calc(P, Q, pSrc_ptr); DCT_Upsample::R_S<5, 5>::calc(R, S, pSrc_ptr); break; case 5 * 16 + 6: DCT_Upsample::P_Q<5, 6>::calc(P, Q, pSrc_ptr); DCT_Upsample::R_S<5, 6>::calc(R, S, pSrc_ptr); break; case 6 * 16 + 6: DCT_Upsample::P_Q<6, 6>::calc(P, Q, pSrc_ptr); DCT_Upsample::R_S<6, 6>::calc(R, S, pSrc_ptr); break; case 7 * 16 + 6: DCT_Upsample::P_Q<7, 6>::calc(P, Q, pSrc_ptr); DCT_Upsample::R_S<7, 6>::calc(R, S, pSrc_ptr); break; case 7 * 16 + 7: DCT_Upsample::P_Q<7, 7>::calc(P, Q, pSrc_ptr); DCT_Upsample::R_S<7, 7>::calc(R, S, pSrc_ptr); break; case 7 * 16 + 8: DCT_Upsample::P_Q<7, 8>::calc(P, Q, pSrc_ptr); DCT_Upsample::R_S<7, 8>::calc(R, S, pSrc_ptr); break; case 8 * 16 + 8: DCT_Upsample::P_Q<8, 8>::calc(P, Q, pSrc_ptr); DCT_Upsample::R_S<8, 8>::calc(R, S, pSrc_ptr); break; default: JPGD_ASSERT(false); } DCT_Upsample::Matrix44 a(P + Q); P -= Q; DCT_Upsample::Matrix44& b = P; DCT_Upsample::Matrix44 c(R + S); R -= S; DCT_Upsample::Matrix44& d = R; DCT_Upsample::Matrix44::add_and_store(temp_block, a, c); idct_4x4(temp_block, pDst_ptr); pDst_ptr += 64; DCT_Upsample::Matrix44::sub_and_store(temp_block, a, c); idct_4x4(temp_block, pDst_ptr); pDst_ptr += 64; DCT_Upsample::Matrix44::add_and_store(temp_block, b, d); idct_4x4(temp_block, pDst_ptr); pDst_ptr += 64; DCT_Upsample::Matrix44::sub_and_store(temp_block, b, d); idct_4x4(temp_block, pDst_ptr); pDst_ptr += 64; pSrc_ptr += 64; } } // Loads and dequantizes the next row of (already decoded) coefficients. // Progressive images only. void jpeg_decoder::load_next_row() { int i; jpgd_block_t* p; jpgd_quant_t* q; int mcu_row, mcu_block = 0; int component_num, component_id; int block_x_mcu[JPGD_MAX_COMPONENTS]; memset(block_x_mcu, 0, JPGD_MAX_COMPONENTS * sizeof(int)); for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) { int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0; for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) { component_id = m_mcu_org[mcu_block]; q = m_quant[m_comp_quant[component_id]]; p = m_pMCU_coefficients + 64 * mcu_block; jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs); jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs); p[0] = pDC[0]; memcpy(&p[1], &pAC[1], 63 * sizeof(jpgd_block_t)); for (i = 63; i > 0; i--) if (p[g_ZAG[i]]) break; m_mcu_block_max_zag[mcu_block] = i + 1; for (; i >= 0; i--) if (p[g_ZAG[i]]) p[g_ZAG[i]] = static_cast(p[g_ZAG[i]] * q[i]); if (m_comps_in_scan == 1) block_x_mcu[component_id]++; else { if (++block_x_mcu_ofs == m_comp_h_samp[component_id]) { block_x_mcu_ofs = 0; if (++block_y_mcu_ofs == m_comp_v_samp[component_id]) { block_y_mcu_ofs = 0; block_x_mcu[component_id] += m_comp_h_samp[component_id]; } } } } if (m_freq_domain_chroma_upsample) transform_mcu_expand(mcu_row); else transform_mcu(mcu_row); } if (m_comps_in_scan == 1) m_block_y_mcu[m_comp_list[0]]++; else { for (component_num = 0; component_num < m_comps_in_scan; component_num++) { component_id = m_comp_list[component_num]; m_block_y_mcu[component_id] += m_comp_v_samp[component_id]; } } } // Restart interval processing. void jpeg_decoder::process_restart() { int i; int c = 0; // Align to a byte boundry // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers! //get_bits_no_markers(m_bits_left & 7); // Let's scan a little bit to find the marker, but not _too_ far. // 1536 is a "fudge factor" that determines how much to scan. for (i = 1536; i > 0; i--) if (get_char() == 0xFF) break; if (i == 0) stop_decoding(JPGD_BAD_RESTART_MARKER); for (; i > 0; i--) if ((c = get_char()) != 0xFF) break; if (i == 0) stop_decoding(JPGD_BAD_RESTART_MARKER); // Is it the expected marker? If not, something bad happened. if (c != (m_next_restart_num + M_RST0)) stop_decoding(JPGD_BAD_RESTART_MARKER); // Reset each component's DC prediction values. memset(&m_last_dc_val, 0, m_comps_in_frame * sizeof(uint)); m_eob_run = 0; m_restarts_left = m_restart_interval; m_next_restart_num = (m_next_restart_num + 1) & 7; // Get the bit buffer going again... m_bits_left = 16; get_bits_no_markers(16); get_bits_no_markers(16); } static inline int dequantize_ac(int c, int q) { c *= q; return c; } // Decodes and dequantizes the next row of coefficients. void jpeg_decoder::decode_next_row() { for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) { if ((m_restart_interval) && (m_restarts_left == 0)) process_restart(); jpgd_block_t* p = m_pMCU_coefficients; for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64) { int component_id = m_mcu_org[mcu_block]; jpgd_quant_t* q = m_quant[m_comp_quant[component_id]]; int r, s; s = huff_decode(m_pHuff_tabs[m_comp_dc_tab[component_id]], r); s = JPGD_HUFF_EXTEND(r, s); m_last_dc_val[component_id] = (s += m_last_dc_val[component_id]); p[0] = static_cast(s * q[0]); int prev_num_set = m_mcu_block_max_zag[mcu_block]; huff_tables* pH = m_pHuff_tabs[m_comp_ac_tab[component_id]]; int k; for (k = 1; k < 64; k++) { int extra_bits; s = huff_decode(pH, extra_bits); r = s >> 4; s &= 15; if (s) { if (r) { if ((k + r) > 63) stop_decoding(JPGD_DECODE_ERROR); if (k < prev_num_set) { int n = JPGD_MIN(r, prev_num_set - k); int kt = k; while (n--) p[g_ZAG[kt++]] = 0; } k += r; } s = JPGD_HUFF_EXTEND(extra_bits, s); JPGD_ASSERT(k < 64); p[g_ZAG[k]] = static_cast(dequantize_ac(s, q[k])); //s * q[k]; } else { if (r == 15) { if ((k + 16) > 64) stop_decoding(JPGD_DECODE_ERROR); if (k < prev_num_set) { int n = JPGD_MIN(16, prev_num_set - k); int kt = k; while (n--) { JPGD_ASSERT(kt <= 63); p[g_ZAG[kt++]] = 0; } } k += 16 - 1; // - 1 because the loop counter is k JPGD_ASSERT(p[g_ZAG[k]] == 0); } else break; } } if (k < prev_num_set) { int kt = k; while (kt < prev_num_set) p[g_ZAG[kt++]] = 0; } m_mcu_block_max_zag[mcu_block] = k; } if (m_freq_domain_chroma_upsample) transform_mcu_expand(mcu_row); else transform_mcu(mcu_row); m_restarts_left--; } } // YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB void jpeg_decoder::H1V1Convert() { int row = m_max_mcu_y_size - m_mcu_lines_left; uint8* d = m_pScan_line_0; uint8* s = m_pSample_buf + row * 8; for (int i = m_max_mcus_per_row; i > 0; i--) { for (int j = 0; j < 8; j++) { int y = s[j]; int cb = s[64 + j]; int cr = s[128 + j]; d[0] = clamp(y + m_crr[cr]); d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16)); d[2] = clamp(y + m_cbb[cb]); d[3] = 255; d += 4; } s += 64 * 3; } } // YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB void jpeg_decoder::H2V1Convert() { int row = m_max_mcu_y_size - m_mcu_lines_left; uint8* d0 = m_pScan_line_0; uint8* y = m_pSample_buf + row * 8; uint8* c = m_pSample_buf + 2 * 64 + row * 8; for (int i = m_max_mcus_per_row; i > 0; i--) { for (int l = 0; l < 2; l++) { for (int j = 0; j < 4; j++) { int cb = c[0]; int cr = c[64]; int rc = m_crr[cr]; int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); int bc = m_cbb[cb]; int yy = y[j << 1]; d0[0] = clamp(yy + rc); d0[1] = clamp(yy + gc); d0[2] = clamp(yy + bc); d0[3] = 255; yy = y[(j << 1) + 1]; d0[4] = clamp(yy + rc); d0[5] = clamp(yy + gc); d0[6] = clamp(yy + bc); d0[7] = 255; d0 += 8; c++; } y += 64; } y += 64 * 4 - 64 * 2; c += 64 * 4 - 8; } } // YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB void jpeg_decoder::H1V2Convert() { int row = m_max_mcu_y_size - m_mcu_lines_left; uint8* d0 = m_pScan_line_0; uint8* d1 = m_pScan_line_1; uint8* y; uint8* c; if (row < 8) y = m_pSample_buf + row * 8; else y = m_pSample_buf + 64 * 1 + (row & 7) * 8; c = m_pSample_buf + 64 * 2 + (row >> 1) * 8; for (int i = m_max_mcus_per_row; i > 0; i--) { for (int j = 0; j < 8; j++) { int cb = c[0 + j]; int cr = c[64 + j]; int rc = m_crr[cr]; int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); int bc = m_cbb[cb]; int yy = y[j]; d0[0] = clamp(yy + rc); d0[1] = clamp(yy + gc); d0[2] = clamp(yy + bc); d0[3] = 255; yy = y[8 + j]; d1[0] = clamp(yy + rc); d1[1] = clamp(yy + gc); d1[2] = clamp(yy + bc); d1[3] = 255; d0 += 4; d1 += 4; } y += 64 * 4; c += 64 * 4; } } // YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB void jpeg_decoder::H2V2Convert() { int row = m_max_mcu_y_size - m_mcu_lines_left; uint8* d0 = m_pScan_line_0; uint8* d1 = m_pScan_line_1; uint8* y; uint8* c; if (row < 8) y = m_pSample_buf + row * 8; else y = m_pSample_buf + 64 * 2 + (row & 7) * 8; c = m_pSample_buf + 64 * 4 + (row >> 1) * 8; for (int i = m_max_mcus_per_row; i > 0; i--) { for (int l = 0; l < 2; l++) { for (int j = 0; j < 8; j += 2) { int cb = c[0]; int cr = c[64]; int rc = m_crr[cr]; int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); int bc = m_cbb[cb]; int yy = y[j]; d0[0] = clamp(yy + rc); d0[1] = clamp(yy + gc); d0[2] = clamp(yy + bc); d0[3] = 255; yy = y[j + 1]; d0[4] = clamp(yy + rc); d0[5] = clamp(yy + gc); d0[6] = clamp(yy + bc); d0[7] = 255; yy = y[j + 8]; d1[0] = clamp(yy + rc); d1[1] = clamp(yy + gc); d1[2] = clamp(yy + bc); d1[3] = 255; yy = y[j + 8 + 1]; d1[4] = clamp(yy + rc); d1[5] = clamp(yy + gc); d1[6] = clamp(yy + bc); d1[7] = 255; d0 += 8; d1 += 8; c++; } y += 64; } y += 64 * 6 - 64 * 2; c += 64 * 6 - 8; } } // Y (1 block per MCU) to 8-bit grayscale void jpeg_decoder::gray_convert() { int row = m_max_mcu_y_size - m_mcu_lines_left; uint8* d = m_pScan_line_0; uint8* s = m_pSample_buf + row * 8; for (int i = m_max_mcus_per_row; i > 0; i--) { *(uint*)d = *(uint*)s; *(uint*)(&d[4]) = *(uint*)(&s[4]); s += 64; d += 8; } } void jpeg_decoder::expanded_convert() { int row = m_max_mcu_y_size - m_mcu_lines_left; uint8* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp[0] + (row & 7) * 8; uint8* d = m_pScan_line_0; for (int i = m_max_mcus_per_row; i > 0; i--) { for (int k = 0; k < m_max_mcu_x_size; k += 8) { const int Y_ofs = k * 8; const int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component; const int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2; for (int j = 0; j < 8; j++) { int y = Py[Y_ofs + j]; int cb = Py[Cb_ofs + j]; int cr = Py[Cr_ofs + j]; d[0] = clamp(y + m_crr[cr]); d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16)); d[2] = clamp(y + m_cbb[cb]); d[3] = 255; d += 4; } } Py += 64 * m_expanded_blocks_per_mcu; } } // Find end of image (EOI) marker, so we can return to the user the exact size of the input stream. void jpeg_decoder::find_eoi() { if (!m_progressive_flag) { // Attempt to read the EOI marker. //get_bits_no_markers(m_bits_left & 7); // Prime the bit buffer m_bits_left = 16; get_bits(16); get_bits(16); // The next marker _should_ be EOI process_markers(); } m_total_bytes_read -= m_in_buf_left; } int jpeg_decoder::decode(const void** pScan_line, uint* pScan_line_len) { if ((m_error_code) || (!m_ready_flag)) return JPGD_FAILED; if (m_total_lines_left == 0) return JPGD_DONE; if (m_mcu_lines_left == 0) { if (setjmp(m_jmp_state)) return JPGD_FAILED; if (m_progressive_flag) load_next_row(); else decode_next_row(); // Find the EOI marker if that was the last row. if (m_total_lines_left <= m_max_mcu_y_size) find_eoi(); m_mcu_lines_left = m_max_mcu_y_size; } if (m_freq_domain_chroma_upsample) { expanded_convert(); *pScan_line = m_pScan_line_0; } else { switch (m_scan_type) { case JPGD_YH2V2: { if ((m_mcu_lines_left & 1) == 0) { H2V2Convert(); *pScan_line = m_pScan_line_0; } else *pScan_line = m_pScan_line_1; break; } case JPGD_YH2V1: { H2V1Convert(); *pScan_line = m_pScan_line_0; break; } case JPGD_YH1V2: { if ((m_mcu_lines_left & 1) == 0) { H1V2Convert(); *pScan_line = m_pScan_line_0; } else *pScan_line = m_pScan_line_1; break; } case JPGD_YH1V1: { H1V1Convert(); *pScan_line = m_pScan_line_0; break; } case JPGD_GRAYSCALE: { gray_convert(); *pScan_line = m_pScan_line_0; break; } } } *pScan_line_len = m_real_dest_bytes_per_scan_line; m_mcu_lines_left--; m_total_lines_left--; return JPGD_SUCCESS; } // Creates the tables needed for efficient Huffman decoding. void jpeg_decoder::make_huff_table(int index, huff_tables* pH) { int p, i, l, si; uint8 huffsize[257]; uint huffcode[257]; uint code; uint subtree; int code_size; int lastp; int nextfreeentry; int currententry; pH->ac_table = m_huff_ac[index] != 0; p = 0; for (l = 1; l <= 16; l++) { for (i = 1; i <= m_huff_num[index][l]; i++) huffsize[p++] = static_cast(l); } huffsize[p] = 0; lastp = p; code = 0; si = huffsize[0]; p = 0; while (huffsize[p]) { while (huffsize[p] == si) { huffcode[p++] = code; code++; } code <<= 1; si++; } memset(pH->look_up, 0, sizeof(pH->look_up)); memset(pH->look_up2, 0, sizeof(pH->look_up2)); memset(pH->tree, 0, sizeof(pH->tree)); memset(pH->code_size, 0, sizeof(pH->code_size)); nextfreeentry = -1; p = 0; while (p < lastp) { i = m_huff_val[index][p]; code = huffcode[p]; code_size = huffsize[p]; pH->code_size[i] = static_cast(code_size); if (code_size <= 8) { code <<= (8 - code_size); for (l = 1 << (8 - code_size); l > 0; l--) { JPGD_ASSERT(i < 256); pH->look_up[code] = i; bool has_extrabits = false; int extra_bits = 0; int num_extra_bits = i & 15; int bits_to_fetch = code_size; if (num_extra_bits) { int total_codesize = code_size + num_extra_bits; if (total_codesize <= 8) { has_extrabits = true; extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize)); JPGD_ASSERT(extra_bits <= 0x7FFF); bits_to_fetch += num_extra_bits; } } if (!has_extrabits) pH->look_up2[code] = i | (bits_to_fetch << 8); else pH->look_up2[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8); code++; } } else { subtree = (code >> (code_size - 8)) & 0xFF; currententry = pH->look_up[subtree]; if (currententry == 0) { pH->look_up[subtree] = currententry = nextfreeentry; pH->look_up2[subtree] = currententry = nextfreeentry; nextfreeentry -= 2; } code <<= (16 - (code_size - 8)); for (l = code_size; l > 9; l--) { if ((code & 0x8000) == 0) currententry--; if (pH->tree[-currententry - 1] == 0) { pH->tree[-currententry - 1] = nextfreeentry; currententry = nextfreeentry; nextfreeentry -= 2; } else currententry = pH->tree[-currententry - 1]; code <<= 1; } if ((code & 0x8000) == 0) currententry--; pH->tree[-currententry - 1] = i; } p++; } } // Verifies the quantization tables needed for this scan are available. void jpeg_decoder::check_quant_tables() { for (int i = 0; i < m_comps_in_scan; i++) if (m_quant[m_comp_quant[m_comp_list[i]]] == NULL) stop_decoding(JPGD_UNDEFINED_QUANT_TABLE); } // Verifies that all the Huffman tables needed for this scan are available. void jpeg_decoder::check_huff_tables() { for (int i = 0; i < m_comps_in_scan; i++) { if ((m_spectral_start == 0) && (m_huff_num[m_comp_dc_tab[m_comp_list[i]]] == NULL)) stop_decoding(JPGD_UNDEFINED_HUFF_TABLE); if ((m_spectral_end > 0) && (m_huff_num[m_comp_ac_tab[m_comp_list[i]]] == NULL)) stop_decoding(JPGD_UNDEFINED_HUFF_TABLE); } for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++) if (m_huff_num[i]) { if (!m_pHuff_tabs[i]) m_pHuff_tabs[i] = (huff_tables*)alloc(sizeof(huff_tables)); make_huff_table(i, m_pHuff_tabs[i]); } } // Determines the component order inside each MCU. // Also calcs how many MCU's are on each row, etc. void jpeg_decoder::calc_mcu_block_order() { int component_num, component_id; int max_h_samp = 0, max_v_samp = 0; for (component_id = 0; component_id < m_comps_in_frame; component_id++) { if (m_comp_h_samp[component_id] > max_h_samp) max_h_samp = m_comp_h_samp[component_id]; if (m_comp_v_samp[component_id] > max_v_samp) max_v_samp = m_comp_v_samp[component_id]; } for (component_id = 0; component_id < m_comps_in_frame; component_id++) { m_comp_h_blocks[component_id] = ((((m_image_x_size * m_comp_h_samp[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8; m_comp_v_blocks[component_id] = ((((m_image_y_size * m_comp_v_samp[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8; } if (m_comps_in_scan == 1) { m_mcus_per_row = m_comp_h_blocks[m_comp_list[0]]; m_mcus_per_col = m_comp_v_blocks[m_comp_list[0]]; } else { m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp; m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp; } if (m_comps_in_scan == 1) { m_mcu_org[0] = m_comp_list[0]; m_blocks_per_mcu = 1; } else { m_blocks_per_mcu = 0; for (component_num = 0; component_num < m_comps_in_scan; component_num++) { int num_blocks; component_id = m_comp_list[component_num]; num_blocks = m_comp_h_samp[component_id] * m_comp_v_samp[component_id]; while (num_blocks--) m_mcu_org[m_blocks_per_mcu++] = component_id; } } } // Starts a new scan. int jpeg_decoder::init_scan() { if (!locate_sos_marker()) return JPGD_FALSE; calc_mcu_block_order(); check_huff_tables(); check_quant_tables(); memset(m_last_dc_val, 0, m_comps_in_frame * sizeof(uint)); m_eob_run = 0; if (m_restart_interval) { m_restarts_left = m_restart_interval; m_next_restart_num = 0; } fix_in_buffer(); return JPGD_TRUE; } // Starts a frame. Determines if the number of components or sampling factors // are supported. void jpeg_decoder::init_frame() { int i; if (m_comps_in_frame == 1) { if ((m_comp_h_samp[0] != 1) || (m_comp_v_samp[0] != 1)) stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); m_scan_type = JPGD_GRAYSCALE; m_max_blocks_per_mcu = 1; m_max_mcu_x_size = 8; m_max_mcu_y_size = 8; } else if (m_comps_in_frame == 3) { if (((m_comp_h_samp[1] != 1) || (m_comp_v_samp[1] != 1)) || ((m_comp_h_samp[2] != 1) || (m_comp_v_samp[2] != 1))) stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1)) { m_scan_type = JPGD_YH1V1; m_max_blocks_per_mcu = 3; m_max_mcu_x_size = 8; m_max_mcu_y_size = 8; } else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1)) { m_scan_type = JPGD_YH2V1; m_max_blocks_per_mcu = 4; m_max_mcu_x_size = 16; m_max_mcu_y_size = 8; } else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 2)) { m_scan_type = JPGD_YH1V2; m_max_blocks_per_mcu = 4; m_max_mcu_x_size = 8; m_max_mcu_y_size = 16; } else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2)) { m_scan_type = JPGD_YH2V2; m_max_blocks_per_mcu = 6; m_max_mcu_x_size = 16; m_max_mcu_y_size = 16; } else stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); } else stop_decoding(JPGD_UNSUPPORTED_COLORSPACE); m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size; m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size; // These values are for the *destination* pixels: after conversion. if (m_scan_type == JPGD_GRAYSCALE) m_dest_bytes_per_pixel = 1; else m_dest_bytes_per_pixel = 4; m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel; m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel); // Initialize two scan line buffers. m_pScan_line_0 = (uint8*)alloc(m_dest_bytes_per_scan_line, true); if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2)) m_pScan_line_1 = (uint8*)alloc(m_dest_bytes_per_scan_line, true); m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu; // Should never happen if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW) stop_decoding(JPGD_ASSERTION_ERROR); // Allocate the coefficient buffer, enough for one MCU m_pMCU_coefficients = (jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_t)); for (i = 0; i < m_max_blocks_per_mcu; i++) m_mcu_block_max_zag[i] = 64; m_expanded_blocks_per_component = m_comp_h_samp[0] * m_comp_v_samp[0]; m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame; m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu; // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor (the most common one I've seen). m_freq_domain_chroma_upsample = false; #if JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4 * 3); #endif if (m_freq_domain_chroma_upsample) m_pSample_buf = (uint8*)alloc(m_expanded_blocks_per_row * 64); else m_pSample_buf = (uint8*)alloc(m_max_blocks_per_row * 64); m_total_lines_left = m_image_y_size; m_mcu_lines_left = 0; create_look_ups(); } // The coeff_buf series of methods originally stored the coefficients // into a "virtual" file which was located in EMS, XMS, or a disk file. A cache // was used to make this process more efficient. Now, we can store the entire // thing in RAM. jpeg_decoder::coeff_buf* jpeg_decoder::coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y) { coeff_buf* cb = (coeff_buf*)alloc(sizeof(coeff_buf)); cb->block_num_x = block_num_x; cb->block_num_y = block_num_y; cb->block_len_x = block_len_x; cb->block_len_y = block_len_y; cb->block_size = (block_len_x * block_len_y) * sizeof(jpgd_block_t); cb->pData = (uint8*)alloc(cb->block_size * block_num_x * block_num_y, true); return cb; } inline jpgd_block_t* jpeg_decoder::coeff_buf_getp(coeff_buf* cb, int block_x, int block_y) { JPGD_ASSERT((block_x < cb->block_num_x) && (block_y < cb->block_num_y)); return (jpgd_block_t*)(cb->pData + block_x * cb->block_size + block_y * (cb->block_size * cb->block_num_x)); } // The following methods decode the various types of m_blocks encountered // in progressively encoded images. void jpeg_decoder::decode_block_dc_first(jpeg_decoder* pD, int component_id, int block_x, int block_y) { int s, r; jpgd_block_t* p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y); if ((s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_dc_tab[component_id]])) != 0) { r = pD->get_bits_no_markers(s); s = JPGD_HUFF_EXTEND(r, s); } pD->m_last_dc_val[component_id] = (s += pD->m_last_dc_val[component_id]); p[0] = static_cast(s << pD->m_successive_low); } void jpeg_decoder::decode_block_dc_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y) { if (pD->get_bits_no_markers(1)) { jpgd_block_t* p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y); p[0] |= (1 << pD->m_successive_low); } } void jpeg_decoder::decode_block_ac_first(jpeg_decoder* pD, int component_id, int block_x, int block_y) { int k, s, r; if (pD->m_eob_run) { pD->m_eob_run--; return; } jpgd_block_t* p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y); for (k = pD->m_spectral_start; k <= pD->m_spectral_end; k++) { s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]); r = s >> 4; s &= 15; if (s) { if ((k += r) > 63) pD->stop_decoding(JPGD_DECODE_ERROR); r = pD->get_bits_no_markers(s); s = JPGD_HUFF_EXTEND(r, s); p[g_ZAG[k]] = static_cast(s << pD->m_successive_low); } else { if (r == 15) { if ((k += 15) > 63) pD->stop_decoding(JPGD_DECODE_ERROR); } else { pD->m_eob_run = 1 << r; if (r) pD->m_eob_run += pD->get_bits_no_markers(r); pD->m_eob_run--; break; } } } } void jpeg_decoder::decode_block_ac_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y) { int s, k, r; int p1 = 1 << pD->m_successive_low; int m1 = (-1) << pD->m_successive_low; jpgd_block_t* p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y); JPGD_ASSERT(pD->m_spectral_end <= 63); k = pD->m_spectral_start; if (pD->m_eob_run == 0) { for (; k <= pD->m_spectral_end; k++) { s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]); r = s >> 4; s &= 15; if (s) { if (s != 1) pD->stop_decoding(JPGD_DECODE_ERROR); if (pD->get_bits_no_markers(1)) s = p1; else s = m1; } else { if (r != 15) { pD->m_eob_run = 1 << r; if (r) pD->m_eob_run += pD->get_bits_no_markers(r); break; } } do { jpgd_block_t* this_coef = p + g_ZAG[k & 63]; if (*this_coef != 0) { if (pD->get_bits_no_markers(1)) { if ((*this_coef & p1) == 0) { if (*this_coef >= 0) *this_coef = static_cast(*this_coef + p1); else *this_coef = static_cast(*this_coef + m1); } } } else { if (--r < 0) break; } k++; } while (k <= pD->m_spectral_end); if ((s) && (k < 64)) { p[g_ZAG[k]] = static_cast(s); } } } if (pD->m_eob_run > 0) { for (; k <= pD->m_spectral_end; k++) { jpgd_block_t* this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis if (*this_coef != 0) { if (pD->get_bits_no_markers(1)) { if ((*this_coef & p1) == 0) { if (*this_coef >= 0) *this_coef = static_cast(*this_coef + p1); else *this_coef = static_cast(*this_coef + m1); } } } } pD->m_eob_run--; } } // Decode a scan in a progressively encoded image. void jpeg_decoder::decode_scan(pDecode_block_func decode_block_func) { int mcu_row, mcu_col, mcu_block; int block_x_mcu[JPGD_MAX_COMPONENTS], m_block_y_mcu[JPGD_MAX_COMPONENTS]; memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu)); for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++) { int component_num, component_id; memset(block_x_mcu, 0, sizeof(block_x_mcu)); for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) { int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0; if ((m_restart_interval) && (m_restarts_left == 0)) process_restart(); for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) { component_id = m_mcu_org[mcu_block]; decode_block_func(this, component_id, block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs); if (m_comps_in_scan == 1) block_x_mcu[component_id]++; else { if (++block_x_mcu_ofs == m_comp_h_samp[component_id]) { block_x_mcu_ofs = 0; if (++block_y_mcu_ofs == m_comp_v_samp[component_id]) { block_y_mcu_ofs = 0; block_x_mcu[component_id] += m_comp_h_samp[component_id]; } } } } m_restarts_left--; } if (m_comps_in_scan == 1) m_block_y_mcu[m_comp_list[0]]++; else { for (component_num = 0; component_num < m_comps_in_scan; component_num++) { component_id = m_comp_list[component_num]; m_block_y_mcu[component_id] += m_comp_v_samp[component_id]; } } } } // Decode a progressively encoded image. void jpeg_decoder::init_progressive() { int i; if (m_comps_in_frame == 4) stop_decoding(JPGD_UNSUPPORTED_COLORSPACE); // Allocate the coefficient buffers. for (i = 0; i < m_comps_in_frame; i++) { m_dc_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 1, 1); m_ac_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 8, 8); } for (;;) { int dc_only_scan, refinement_scan; pDecode_block_func decode_block_func; if (!init_scan()) break; dc_only_scan = (m_spectral_start == 0); refinement_scan = (m_successive_high != 0); if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63)) stop_decoding(JPGD_BAD_SOS_SPECTRAL); if (dc_only_scan) { if (m_spectral_end) stop_decoding(JPGD_BAD_SOS_SPECTRAL); } else if (m_comps_in_scan != 1) /* AC scans can only contain one component */ stop_decoding(JPGD_BAD_SOS_SPECTRAL); if ((refinement_scan) && (m_successive_low != m_successive_high - 1)) stop_decoding(JPGD_BAD_SOS_SUCCESSIVE); if (dc_only_scan) { if (refinement_scan) decode_block_func = decode_block_dc_refine; else decode_block_func = decode_block_dc_first; } else { if (refinement_scan) decode_block_func = decode_block_ac_refine; else decode_block_func = decode_block_ac_first; } decode_scan(decode_block_func); m_bits_left = 16; get_bits(16); get_bits(16); } m_comps_in_scan = m_comps_in_frame; for (i = 0; i < m_comps_in_frame; i++) m_comp_list[i] = i; calc_mcu_block_order(); } void jpeg_decoder::init_sequential() { if (!init_scan()) stop_decoding(JPGD_UNEXPECTED_MARKER); } void jpeg_decoder::decode_start() { init_frame(); if (m_progressive_flag) init_progressive(); else init_sequential(); } void jpeg_decoder::decode_init(jpeg_decoder_stream* pStream) { init(pStream); locate_sof_marker(); } jpeg_decoder::jpeg_decoder(jpeg_decoder_stream* pStream) { if (setjmp(m_jmp_state)) return; decode_init(pStream); } int jpeg_decoder::begin_decoding() { if (m_ready_flag) return JPGD_SUCCESS; if (m_error_code) return JPGD_FAILED; if (setjmp(m_jmp_state)) return JPGD_FAILED; decode_start(); m_ready_flag = true; return JPGD_SUCCESS; } jpeg_decoder::~jpeg_decoder() { free_all_blocks(); } jpeg_decoder_file_stream::jpeg_decoder_file_stream() { m_pFile = NULL; m_eof_flag = false; m_error_flag = false; } void jpeg_decoder_file_stream::close() { if (m_pFile) { fclose(m_pFile); m_pFile = NULL; } m_eof_flag = false; m_error_flag = false; } jpeg_decoder_file_stream::~jpeg_decoder_file_stream() { close(); } bool jpeg_decoder_file_stream::open(const char* Pfilename) { close(); m_eof_flag = false; m_error_flag = false; #if defined(_MSC_VER) m_pFile = NULL; fopen_s(&m_pFile, Pfilename, "rb"); #else m_pFile = fopen(Pfilename, "rb"); #endif return m_pFile != NULL; } int jpeg_decoder_file_stream::read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag) { if (!m_pFile) return -1; if (m_eof_flag) { *pEOF_flag = true; return 0; } if (m_error_flag) return -1; int bytes_read = static_cast(fread(pBuf, 1, max_bytes_to_read, m_pFile)); if (bytes_read < max_bytes_to_read) { if (ferror(m_pFile)) { m_error_flag = true; return -1; } m_eof_flag = true; *pEOF_flag = true; } return bytes_read; } bool jpeg_decoder_mem_stream::open(const uint8* pSrc_data, uint size) { close(); m_pSrc_data = pSrc_data; m_ofs = 0; m_size = size; return true; } int jpeg_decoder_mem_stream::read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag) { *pEOF_flag = false; if (!m_pSrc_data) return -1; uint bytes_remaining = m_size - m_ofs; if ((uint)max_bytes_to_read > bytes_remaining) { max_bytes_to_read = bytes_remaining; *pEOF_flag = true; } memcpy(pBuf, m_pSrc_data + m_ofs, max_bytes_to_read); m_ofs += max_bytes_to_read; return max_bytes_to_read; } unsigned char* decompress_jpeg_image_from_stream(jpeg_decoder_stream* pStream, int* width, int* height, int* actual_comps, int req_comps) { if (!actual_comps) return NULL; *actual_comps = 0; if ((!pStream) || (!width) || (!height) || (!req_comps)) return NULL; if ((req_comps != 1) && (req_comps != 3) && (req_comps != 4)) return NULL; jpeg_decoder decoder(pStream); if (decoder.get_error_code() != JPGD_SUCCESS) return NULL; const int image_width = decoder.get_width(), image_height = decoder.get_height(); *width = image_width; *height = image_height; *actual_comps = decoder.get_num_components(); if (decoder.begin_decoding() != JPGD_SUCCESS) return NULL; const int dst_bpl = image_width * req_comps; uint8* pImage_data = (uint8*)jpgd_malloc(dst_bpl * image_height); if (!pImage_data) return NULL; for (int y = 0; y < image_height; y++) { const uint8* pScan_line; uint scan_line_len; if (decoder.decode((const void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) { jpgd_free(pImage_data); return NULL; } uint8* pDst = pImage_data + y * dst_bpl; if (((req_comps == 1) && (decoder.get_num_components() == 1)) || ((req_comps == 4) && (decoder.get_num_components() == 3))) memcpy(pDst, pScan_line, dst_bpl); else if (decoder.get_num_components() == 1) { if (req_comps == 3) { for (int x = 0; x < image_width; x++) { uint8 luma = pScan_line[x]; pDst[0] = luma; pDst[1] = luma; pDst[2] = luma; pDst += 3; } } else { for (int x = 0; x < image_width; x++) { uint8 luma = pScan_line[x]; pDst[0] = luma; pDst[1] = luma; pDst[2] = luma; pDst[3] = 255; pDst += 4; } } } else if (decoder.get_num_components() == 3) { if (req_comps == 1) { const int YR = 19595, YG = 38470, YB = 7471; for (int x = 0; x < image_width; x++) { int r = pScan_line[x * 4 + 0]; int g = pScan_line[x * 4 + 1]; int b = pScan_line[x * 4 + 2]; *pDst++ = static_cast((r * YR + g * YG + b * YB + 32768) >> 16); } } else { for (int x = 0; x < image_width; x++) { pDst[0] = pScan_line[x * 4 + 0]; pDst[1] = pScan_line[x * 4 + 1]; pDst[2] = pScan_line[x * 4 + 2]; pDst += 3; } } } } return pImage_data; } unsigned char* decompress_jpeg_image_from_memory(const unsigned char* pSrc_data, int src_data_size, int* width, int* height, int* actual_comps, int req_comps) { jpgd::jpeg_decoder_mem_stream mem_stream(pSrc_data, src_data_size); return decompress_jpeg_image_from_stream(&mem_stream, width, height, actual_comps, req_comps); } unsigned char* decompress_jpeg_image_from_file(const char* pSrc_filename, int* width, int* height, int* actual_comps, int req_comps) { jpgd::jpeg_decoder_file_stream file_stream; if (!file_stream.open(pSrc_filename)) return NULL; return decompress_jpeg_image_from_stream(&file_stream, width, height, actual_comps, req_comps); } } // namespace jpgd DaemonEngine-crunch-ef4d32f/crnlib/crn_jpgd.h000066400000000000000000000321761503722002600212400ustar00rootroot00000000000000// jpgd.h - C++ class for JPEG decompression. // Public domain, Rich Geldreich #ifndef JPEG_DECODER_H #define JPEG_DECODER_H #include #include #include #ifdef _MSC_VER #define JPGD_NORETURN __declspec(noreturn) #elif defined(__GNUC__) #define JPGD_NORETURN __attribute__((noreturn)) #else #define JPGD_NORETURN #endif namespace jpgd { typedef unsigned char uint8; typedef signed short int16; typedef unsigned short uint16; typedef unsigned int uint; typedef signed int int32; // Loads a JPEG image from a memory buffer or a file. // req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA). // On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB). // Notes: For more control over where and how the source data is read, see the decompress_jpeg_image_from_stream() function below, or call the jpeg_decoder class directly. // Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp. unsigned char* decompress_jpeg_image_from_memory(const unsigned char* pSrc_data, int src_data_size, int* width, int* height, int* actual_comps, int req_comps); unsigned char* decompress_jpeg_image_from_file(const char* pSrc_filename, int* width, int* height, int* actual_comps, int req_comps); // Success/failure error codes. enum jpgd_status { JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1, JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE, JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS, JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH, JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER, JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS, JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE, JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR, JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM }; // Input stream interface. // Derive from this class to read input data from sources other than files or memory. Set m_eof_flag to true when no more data is available. // The decoder is rather greedy: it will keep on calling this method until its internal input buffer is full, or until the EOF flag is set. // It the input stream contains data after the JPEG stream's EOI (end of image) marker it will probably be pulled into the internal buffer. // Call the get_total_bytes_read() method to determine the actual size of the JPEG stream after successful decoding. class jpeg_decoder_stream { public: jpeg_decoder_stream() {} virtual ~jpeg_decoder_stream() {} // The read() method is called when the internal input buffer is empty. // Parameters: // pBuf - input buffer // max_bytes_to_read - maximum bytes that can be written to pBuf // pEOF_flag - set this to true if at end of stream (no more bytes remaining) // Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0). // Notes: This method will be called in a loop until you set *pEOF_flag to true or the internal buffer is full. virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag) = 0; }; // stdio FILE stream class. class jpeg_decoder_file_stream : public jpeg_decoder_stream { jpeg_decoder_file_stream(const jpeg_decoder_file_stream&); jpeg_decoder_file_stream& operator=(const jpeg_decoder_file_stream&); FILE* m_pFile; bool m_eof_flag, m_error_flag; public: jpeg_decoder_file_stream(); virtual ~jpeg_decoder_file_stream(); bool open(const char* Pfilename); void close(); virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag); }; // Memory stream class. class jpeg_decoder_mem_stream : public jpeg_decoder_stream { const uint8* m_pSrc_data; uint m_ofs, m_size; public: jpeg_decoder_mem_stream() : m_pSrc_data(NULL), m_ofs(0), m_size(0) {} jpeg_decoder_mem_stream(const uint8* pSrc_data, uint size) : m_pSrc_data(pSrc_data), m_ofs(0), m_size(size) {} virtual ~jpeg_decoder_mem_stream() {} bool open(const uint8* pSrc_data, uint size); void close() { m_pSrc_data = NULL; m_ofs = 0; m_size = 0; } virtual int read(uint8* pBuf, int max_bytes_to_read, bool* pEOF_flag); }; // Loads JPEG file from a jpeg_decoder_stream. unsigned char* decompress_jpeg_image_from_stream(jpeg_decoder_stream* pStream, int* width, int* height, int* actual_comps, int req_comps); enum { JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4, JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384 }; typedef int16 jpgd_quant_t; typedef int16 jpgd_block_t; class jpeg_decoder { public: // Call get_error_code() after constructing to determine if the stream is valid or not. You may call the get_width(), get_height(), etc. // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline. jpeg_decoder(jpeg_decoder_stream* pStream); ~jpeg_decoder(); // Call this method after constructing the object to begin decompression. // If JPGD_SUCCESS is returned you may then call decode() on each scanline. int begin_decoding(); // Returns the next scan line. // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1). // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4). // Returns JPGD_SUCCESS if a scan line has been returned. // Returns JPGD_DONE if all scan lines have been returned. // Returns JPGD_FAILED if an error occurred. Call get_error_code() for a more info. int decode(const void** pScan_line, uint* pScan_line_len); inline jpgd_status get_error_code() const { return m_error_code; } inline int get_width() const { return m_image_x_size; } inline int get_height() const { return m_image_y_size; } inline int get_num_components() const { return m_comps_in_frame; } inline int get_bytes_per_pixel() const { return m_dest_bytes_per_pixel; } inline int get_bytes_per_scan_line() const { return m_image_x_size * get_bytes_per_pixel(); } // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file). inline int get_total_bytes_read() const { return m_total_bytes_read; } private: jpeg_decoder(const jpeg_decoder&); jpeg_decoder& operator=(const jpeg_decoder&); typedef void (*pDecode_block_func)(jpeg_decoder*, int, int, int); struct huff_tables { bool ac_table; uint look_up[256]; uint look_up2[256]; uint8 code_size[256]; uint tree[512]; }; struct coeff_buf { uint8* pData; int block_num_x, block_num_y; int block_len_x, block_len_y; int block_size; }; struct mem_block { mem_block* m_pNext; size_t m_used_count; size_t m_size; char m_data[1]; }; jmp_buf m_jmp_state; mem_block* m_pMem_blocks; int m_image_x_size; int m_image_y_size; jpeg_decoder_stream* m_pStream; int m_progressive_flag; uint8 m_huff_ac[JPGD_MAX_HUFF_TABLES]; uint8* m_huff_num[JPGD_MAX_HUFF_TABLES]; // pointer to number of Huffman codes per bit size uint8* m_huff_val[JPGD_MAX_HUFF_TABLES]; // pointer to Huffman codes per bit size jpgd_quant_t* m_quant[JPGD_MAX_QUANT_TABLES]; // pointer to quantization tables int m_scan_type; // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported) int m_comps_in_frame; // # of components in frame int m_comp_h_samp[JPGD_MAX_COMPONENTS]; // component's horizontal sampling factor int m_comp_v_samp[JPGD_MAX_COMPONENTS]; // component's vertical sampling factor int m_comp_quant[JPGD_MAX_COMPONENTS]; // component's quantization table selector int m_comp_ident[JPGD_MAX_COMPONENTS]; // component's ID int m_comp_h_blocks[JPGD_MAX_COMPONENTS]; int m_comp_v_blocks[JPGD_MAX_COMPONENTS]; int m_comps_in_scan; // # of components in scan int m_comp_list[JPGD_MAX_COMPS_IN_SCAN]; // components in this scan int m_comp_dc_tab[JPGD_MAX_COMPONENTS]; // component's DC Huffman coding table selector int m_comp_ac_tab[JPGD_MAX_COMPONENTS]; // component's AC Huffman coding table selector int m_spectral_start; // spectral selection start int m_spectral_end; // spectral selection end int m_successive_low; // successive approximation low int m_successive_high; // successive approximation high int m_max_mcu_x_size; // MCU's max. X size in pixels int m_max_mcu_y_size; // MCU's max. Y size in pixels int m_blocks_per_mcu; int m_max_blocks_per_row; int m_mcus_per_row, m_mcus_per_col; int m_mcu_org[JPGD_MAX_BLOCKS_PER_MCU]; int m_total_lines_left; // total # lines left in image int m_mcu_lines_left; // total # lines left in this MCU int m_real_dest_bytes_per_scan_line; int m_dest_bytes_per_scan_line; // rounded up int m_dest_bytes_per_pixel; // 4 (RGB) or 1 (Y) huff_tables* m_pHuff_tabs[JPGD_MAX_HUFF_TABLES]; coeff_buf* m_dc_coeffs[JPGD_MAX_COMPONENTS]; coeff_buf* m_ac_coeffs[JPGD_MAX_COMPONENTS]; int m_eob_run; int m_block_y_mcu[JPGD_MAX_COMPONENTS]; uint8* m_pIn_buf_ofs; int m_in_buf_left; int m_tem_flag; bool m_eof_flag; uint8 m_in_buf_pad_start[128]; uint8 m_in_buf[JPGD_IN_BUF_SIZE + 128]; uint8 m_in_buf_pad_end[128]; int m_bits_left; uint m_bit_buf; int m_restart_interval; int m_restarts_left; int m_next_restart_num; int m_max_mcus_per_row; int m_max_blocks_per_mcu; int m_expanded_blocks_per_mcu; int m_expanded_blocks_per_row; int m_expanded_blocks_per_component; bool m_freq_domain_chroma_upsample; int m_max_mcus_per_col; uint m_last_dc_val[JPGD_MAX_COMPONENTS]; jpgd_block_t* m_pMCU_coefficients; int m_mcu_block_max_zag[JPGD_MAX_BLOCKS_PER_MCU]; uint8* m_pSample_buf; int m_crr[256]; int m_cbb[256]; int m_crg[256]; int m_cbg[256]; uint8* m_pScan_line_0; uint8* m_pScan_line_1; jpgd_status m_error_code; bool m_ready_flag; int m_total_bytes_read; void free_all_blocks(); JPGD_NORETURN void stop_decoding(jpgd_status status); void* alloc(size_t n, bool zero = false); void word_clear(void* p, uint16 c, uint n); void prep_in_buffer(); void read_dht_marker(); void read_dqt_marker(); void read_sof_marker(); void skip_variable_marker(); void read_dri_marker(); void read_sos_marker(); int next_marker(); int process_markers(); void locate_soi_marker(); void locate_sof_marker(); int locate_sos_marker(); void init(jpeg_decoder_stream* pStream); void create_look_ups(); void fix_in_buffer(); void transform_mcu(int mcu_row); void transform_mcu_expand(int mcu_row); coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y); inline jpgd_block_t* coeff_buf_getp(coeff_buf* cb, int block_x, int block_y); void load_next_row(); void decode_next_row(); void make_huff_table(int index, huff_tables* pH); void check_quant_tables(); void check_huff_tables(); void calc_mcu_block_order(); int init_scan(); void init_frame(); void process_restart(); void decode_scan(pDecode_block_func decode_block_func); void init_progressive(); void init_sequential(); void decode_start(); void decode_init(jpeg_decoder_stream* pStream); void H2V2Convert(); void H2V1Convert(); void H1V2Convert(); void H1V1Convert(); void gray_convert(); void expanded_convert(); void find_eoi(); inline uint get_char(); inline uint get_char(bool* pPadding_flag); inline void stuff_char(uint8 q); inline uint8 get_octet(); inline uint get_bits(int num_bits); inline uint get_bits_no_markers(int numbits); inline int huff_decode(huff_tables* pH); inline int huff_decode(huff_tables* pH, int& extrabits); static inline uint8 clamp(int i); static void decode_block_dc_first(jpeg_decoder* pD, int component_id, int block_x, int block_y); static void decode_block_dc_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y); static void decode_block_ac_first(jpeg_decoder* pD, int component_id, int block_x, int block_y); static void decode_block_ac_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y); }; } // namespace jpgd #endif // JPEG_DECODER_H DaemonEngine-crunch-ef4d32f/crnlib/crn_jpge.cpp000066400000000000000000001157141503722002600215740ustar00rootroot00000000000000// jpge.cpp - C++ class for JPEG compression. // Public domain, Rich Geldreich // v1.01, Dec. 18, 2010 - Initial release // v1.02, Apr. 6, 2011 - Removed 2x2 ordered dither in H2V1 chroma subsampling method load_block_16_8_8(). (The rounding factor was 2, when it should have been 1. Either way, it wasn't helping.) // v1.03, Apr. 16, 2011 - Added support for optimized Huffman code tables, optimized dynamic memory allocation down to only 1 alloc. // Also from Alex Evans: Added RGBA support, linear memory allocator (no longer needed in v1.03). // v1.04, May. 19, 2012: Forgot to set m_pFile ptr to NULL in cfile_stream::close(). Thanks to Owen Kaluza for reporting this bug. // Code tweaks to fix VS2008 static code analysis warnings (all looked harmless). // Code review revealed method load_block_16_8_8() (used for the non-default H2V1 sampling mode to downsample chroma) somehow didn't get the rounding factor fix from v1.02. #include "crn_jpge.h" #include #include #if defined(__FreeBSD__) // has been replaced by #include // for malloc_usable_size #elif defined(__APPLE__) #include #else #include #endif #include "crn_core.h" #define JPGE_MAX(a, b) (((a) > (b)) ? (a) : (b)) #define JPGE_MIN(a, b) (((a) < (b)) ? (a) : (b)) namespace jpge { static inline void* jpge_malloc(size_t nSize) { return crnlib::crnlib_malloc(nSize); } static inline void jpge_free(void* p) { crnlib::crnlib_free(p); } // Various JPEG enums and tables. enum { M_SOF0 = 0xC0, M_DHT = 0xC4, M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_APP0 = 0xE0 }; enum { DC_LUM_CODES = 12, AC_LUM_CODES = 256, DC_CHROMA_CODES = 12, AC_CHROMA_CODES = 256, MAX_HUFF_SYMBOLS = 257, MAX_HUFF_CODESIZE = 32 }; static uint8 s_zag[64] = {0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63}; static int16 s_std_lum_quant[64] = {16, 11, 12, 14, 12, 10, 16, 14, 13, 14, 18, 17, 16, 19, 24, 40, 26, 24, 22, 22, 24, 49, 35, 37, 29, 40, 58, 51, 61, 60, 57, 51, 56, 55, 64, 72, 92, 78, 64, 68, 87, 69, 55, 56, 80, 109, 81, 87, 95, 98, 103, 104, 103, 62, 77, 113, 121, 112, 100, 120, 92, 101, 103, 99}; static int16 s_std_croma_quant[64] = {17, 18, 18, 24, 21, 24, 47, 26, 26, 47, 99, 66, 56, 66, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99}; static uint8 s_dc_lum_bits[17] = {0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0}; static uint8 s_dc_lum_val[DC_LUM_CODES] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; static uint8 s_ac_lum_bits[17] = {0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d}; static uint8 s_ac_lum_val[AC_LUM_CODES] = { 0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08, 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0, 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa}; static uint8 s_dc_chroma_bits[17] = {0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0}; static uint8 s_dc_chroma_val[DC_CHROMA_CODES] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}; static uint8 s_ac_chroma_bits[17] = {0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77}; static uint8 s_ac_chroma_val[AC_CHROMA_CODES] = { 0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, 0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34, 0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa}; // Low-level helper functions. template inline void clear_obj(T& obj) { memset(&obj, 0, sizeof(obj)); } const int YR = 19595, YG = 38470, YB = 7471, CB_R = -11059, CB_G = -21709, CB_B = 32768, CR_R = 32768, CR_G = -27439, CR_B = -5329; static inline uint8 clamp(int i) { if (static_cast(i) > 255U) { if (i < 0) i = 0; else if (i > 255) i = 255; } return static_cast(i); } static void RGB_to_YCC(uint8* pDst, const uint8* pSrc, int num_pixels) { for (; num_pixels; pDst += 3, pSrc += 3, num_pixels--) { const int r = pSrc[0], g = pSrc[1], b = pSrc[2]; pDst[0] = static_cast((r * YR + g * YG + b * YB + 32768) >> 16); pDst[1] = clamp(128 + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16)); pDst[2] = clamp(128 + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16)); } } static void RGB_to_Y(uint8* pDst, const uint8* pSrc, int num_pixels) { for (; num_pixels; pDst++, pSrc += 3, num_pixels--) pDst[0] = static_cast((pSrc[0] * YR + pSrc[1] * YG + pSrc[2] * YB + 32768) >> 16); } static void RGBA_to_YCC(uint8* pDst, const uint8* pSrc, int num_pixels) { for (; num_pixels; pDst += 3, pSrc += 4, num_pixels--) { const int r = pSrc[0], g = pSrc[1], b = pSrc[2]; pDst[0] = static_cast((r * YR + g * YG + b * YB + 32768) >> 16); pDst[1] = clamp(128 + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16)); pDst[2] = clamp(128 + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16)); } } static void RGBA_to_Y(uint8* pDst, const uint8* pSrc, int num_pixels) { for (; num_pixels; pDst++, pSrc += 4, num_pixels--) pDst[0] = static_cast((pSrc[0] * YR + pSrc[1] * YG + pSrc[2] * YB + 32768) >> 16); } static void Y_to_YCC(uint8* pDst, const uint8* pSrc, int num_pixels) { for (; num_pixels; pDst += 3, pSrc++, num_pixels--) { pDst[0] = pSrc[0]; pDst[1] = 128; pDst[2] = 128; } } // Forward DCT - DCT derived from jfdctint. enum { CONST_BITS = 13, ROW_BITS = 2 }; #define DCT_DESCALE(x, n) (((x) + (((int32)1) << ((n)-1))) >> (n)) #define DCT_MUL(var, c) (static_cast(var) * static_cast(c)) #define DCT1D(s0, s1, s2, s3, s4, s5, s6, s7) \ int32 t0 = s0 + s7, t7 = s0 - s7, t1 = s1 + s6, t6 = s1 - s6, t2 = s2 + s5, t5 = s2 - s5, t3 = s3 + s4, t4 = s3 - s4; \ int32 t10 = t0 + t3, t13 = t0 - t3, t11 = t1 + t2, t12 = t1 - t2; \ int32 u1 = DCT_MUL(t12 + t13, 4433); \ s2 = u1 + DCT_MUL(t13, 6270); \ s6 = u1 + DCT_MUL(t12, -15137); \ u1 = t4 + t7; \ int32 u2 = t5 + t6, u3 = t4 + t6, u4 = t5 + t7; \ int32 z5 = DCT_MUL(u3 + u4, 9633); \ t4 = DCT_MUL(t4, 2446); \ t5 = DCT_MUL(t5, 16819); \ t6 = DCT_MUL(t6, 25172); \ t7 = DCT_MUL(t7, 12299); \ u1 = DCT_MUL(u1, -7373); \ u2 = DCT_MUL(u2, -20995); \ u3 = DCT_MUL(u3, -16069); \ u4 = DCT_MUL(u4, -3196); \ u3 += z5; \ u4 += z5; \ s0 = t10 + t11; \ s1 = t7 + u1 + u4; \ s3 = t6 + u2 + u3; \ s4 = t10 - t11; \ s5 = t5 + u2 + u4; \ s7 = t4 + u1 + u3; static void DCT2D(int32* p) { int32 c, *q = p; for (c = 7; c >= 0; c--, q += 8) { int32 s0 = q[0], s1 = q[1], s2 = q[2], s3 = q[3], s4 = q[4], s5 = q[5], s6 = q[6], s7 = q[7]; DCT1D(s0, s1, s2, s3, s4, s5, s6, s7); q[0] = s0 << ROW_BITS; q[1] = DCT_DESCALE(s1, CONST_BITS - ROW_BITS); q[2] = DCT_DESCALE(s2, CONST_BITS - ROW_BITS); q[3] = DCT_DESCALE(s3, CONST_BITS - ROW_BITS); q[4] = s4 << ROW_BITS; q[5] = DCT_DESCALE(s5, CONST_BITS - ROW_BITS); q[6] = DCT_DESCALE(s6, CONST_BITS - ROW_BITS); q[7] = DCT_DESCALE(s7, CONST_BITS - ROW_BITS); } for (q = p, c = 7; c >= 0; c--, q++) { int32 s0 = q[0 * 8], s1 = q[1 * 8], s2 = q[2 * 8], s3 = q[3 * 8], s4 = q[4 * 8], s5 = q[5 * 8], s6 = q[6 * 8], s7 = q[7 * 8]; DCT1D(s0, s1, s2, s3, s4, s5, s6, s7); q[0 * 8] = DCT_DESCALE(s0, ROW_BITS + 3); q[1 * 8] = DCT_DESCALE(s1, CONST_BITS + ROW_BITS + 3); q[2 * 8] = DCT_DESCALE(s2, CONST_BITS + ROW_BITS + 3); q[3 * 8] = DCT_DESCALE(s3, CONST_BITS + ROW_BITS + 3); q[4 * 8] = DCT_DESCALE(s4, ROW_BITS + 3); q[5 * 8] = DCT_DESCALE(s5, CONST_BITS + ROW_BITS + 3); q[6 * 8] = DCT_DESCALE(s6, CONST_BITS + ROW_BITS + 3); q[7 * 8] = DCT_DESCALE(s7, CONST_BITS + ROW_BITS + 3); } } struct sym_freq { uint m_key, m_sym_index; }; // Radix sorts sym_freq[] array by 32-bit key m_key. Returns ptr to sorted values. static inline sym_freq* radix_sort_syms(uint num_syms, sym_freq* pSyms0, sym_freq* pSyms1) { const uint cMaxPasses = 4; uint32 hist[256 * cMaxPasses]; clear_obj(hist); for (uint i = 0; i < num_syms; i++) { uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; hist[256 * 2 + ((freq >> 16) & 0xFF)]++; hist[256 * 3 + ((freq >> 24) & 0xFF)]++; } sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1; uint total_passes = cMaxPasses; while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--; for (uint pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) { const uint32* pHist = &hist[pass << 8]; uint offsets[256], cur_ofs = 0; for (uint i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; } for (uint i = 0; i < num_syms; i++) pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i]; sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t; } return pCur_syms; } // calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. static void calculate_minimum_redundancy(sym_freq* A, int n) { int root, leaf, next, avbl, used, dpth; if (n == 0) return; else if (n == 1) { A[0].m_key = 1; return; } A[0].m_key += A[1].m_key; root = 0; leaf = 2; for (next = 1; next < n - 1; next++) { if (leaf >= n || A[root].m_key < A[leaf].m_key) { A[next].m_key = A[root].m_key; A[root++].m_key = next; } else A[next].m_key = A[leaf++].m_key; if (leaf >= n || (root < next && A[root].m_key < A[leaf].m_key)) { A[next].m_key += A[root].m_key; A[root++].m_key = next; } else A[next].m_key += A[leaf++].m_key; } A[n - 2].m_key = 0; for (next = n - 3; next >= 0; next--) A[next].m_key = A[A[next].m_key].m_key + 1; avbl = 1; used = dpth = 0; root = n - 2; next = n - 1; while (avbl > 0) { while (root >= 0 && (int)A[root].m_key == dpth) { used++; root--; } while (avbl > used) { A[next--].m_key = dpth; avbl--; } avbl = 2 * used; dpth++; used = 0; } } // Limits canonical Huffman code table's max code size to max_code_size. static void huffman_enforce_max_code_size(int* pNum_codes, int code_list_len, int max_code_size) { if (code_list_len <= 1) return; for (int i = max_code_size + 1; i <= MAX_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i]; uint32 total = 0; for (int i = max_code_size; i > 0; i--) total += (((uint32)pNum_codes[i]) << (max_code_size - i)); while (total != (1UL << max_code_size)) { pNum_codes[max_code_size]--; for (int i = max_code_size - 1; i > 0; i--) { if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; } } total--; } } // Generates an optimized offman table. void jpeg_encoder::optimize_huffman_table(int table_num, int table_len) { sym_freq syms0[MAX_HUFF_SYMBOLS], syms1[MAX_HUFF_SYMBOLS]; syms0[0].m_key = 1; syms0[0].m_sym_index = 0; // dummy symbol, assures that no valid code contains all 1's int num_used_syms = 1; const uint32* pSym_count = &m_huff_count[table_num][0]; for (int i = 0; i < table_len; i++) if (pSym_count[i]) { syms0[num_used_syms].m_key = pSym_count[i]; syms0[num_used_syms++].m_sym_index = i + 1; } sym_freq* pSyms = radix_sort_syms(num_used_syms, syms0, syms1); calculate_minimum_redundancy(pSyms, num_used_syms); // Count the # of symbols of each code size. int num_codes[1 + MAX_HUFF_CODESIZE]; clear_obj(num_codes); for (int i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++; const uint JPGE_CODE_SIZE_LIMIT = 16; // the maximum possible size of a JPEG Huffman code (valid range is [9,16] - 9 vs. 8 because of the dummy symbol) huffman_enforce_max_code_size(num_codes, num_used_syms, JPGE_CODE_SIZE_LIMIT); // Compute m_huff_bits array, which contains the # of symbols per code size. clear_obj(m_huff_bits[table_num]); for (int i = 1; i <= (int)JPGE_CODE_SIZE_LIMIT; i++) m_huff_bits[table_num][i] = static_cast(num_codes[i]); // Remove the dummy symbol added above, which must be in largest bucket. for (int i = JPGE_CODE_SIZE_LIMIT; i >= 1; i--) { if (m_huff_bits[table_num][i]) { m_huff_bits[table_num][i]--; break; } } // Compute the m_huff_val array, which contains the symbol indices sorted by code size (smallest to largest). for (int i = num_used_syms - 1; i >= 1; i--) m_huff_val[table_num][num_used_syms - 1 - i] = static_cast(pSyms[i].m_sym_index - 1); } // JPEG marker generation. void jpeg_encoder::emit_byte(uint8 i) { m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && m_pStream->put_obj(i); } void jpeg_encoder::emit_word(uint i) { emit_byte(uint8(i >> 8)); emit_byte(uint8(i & 0xFF)); } void jpeg_encoder::emit_marker(int marker) { emit_byte(uint8(0xFF)); emit_byte(uint8(marker)); } // Emit JFIF marker void jpeg_encoder::emit_jfif_app0() { emit_marker(M_APP0); emit_word(2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1); emit_byte(0x4A); emit_byte(0x46); emit_byte(0x49); emit_byte(0x46); /* Identifier: ASCII "JFIF" */ emit_byte(0); emit_byte(1); /* Major version */ emit_byte(1); /* Minor version */ emit_byte(0); /* Density unit */ emit_word(1); emit_word(1); emit_byte(0); /* No thumbnail image */ emit_byte(0); } // Emit quantization tables void jpeg_encoder::emit_dqt() { for (int i = 0; i < ((m_num_components == 3) ? 2 : 1); i++) { emit_marker(M_DQT); emit_word(64 + 1 + 2); emit_byte(static_cast(i)); for (int j = 0; j < 64; j++) emit_byte(static_cast(m_quantization_tables[i][j])); } } // Emit start of frame marker void jpeg_encoder::emit_sof() { emit_marker(M_SOF0); /* baseline */ emit_word(3 * m_num_components + 2 + 5 + 1); emit_byte(8); /* precision */ emit_word(m_image_y); emit_word(m_image_x); emit_byte(m_num_components); for (int i = 0; i < m_num_components; i++) { emit_byte(static_cast(i + 1)); /* component ID */ emit_byte((m_comp_h_samp[i] << 4) + m_comp_v_samp[i]); /* h and v sampling */ emit_byte(i > 0); /* quant. table num */ } } // Emit Huffman table. void jpeg_encoder::emit_dht(uint8* bits, uint8* val, int index, bool ac_flag) { emit_marker(M_DHT); int length = 0; for (int i = 1; i <= 16; i++) length += bits[i]; emit_word(length + 2 + 1 + 16); emit_byte(static_cast(index + (ac_flag << 4))); for (int i = 1; i <= 16; i++) emit_byte(bits[i]); for (int i = 0; i < length; i++) emit_byte(val[i]); } // Emit all Huffman tables. void jpeg_encoder::emit_dhts() { emit_dht(m_huff_bits[0 + 0], m_huff_val[0 + 0], 0, false); emit_dht(m_huff_bits[2 + 0], m_huff_val[2 + 0], 0, true); if (m_num_components == 3) { emit_dht(m_huff_bits[0 + 1], m_huff_val[0 + 1], 1, false); emit_dht(m_huff_bits[2 + 1], m_huff_val[2 + 1], 1, true); } } // emit start of scan void jpeg_encoder::emit_sos() { emit_marker(M_SOS); emit_word(2 * m_num_components + 2 + 1 + 3); emit_byte(m_num_components); for (int i = 0; i < m_num_components; i++) { emit_byte(static_cast(i + 1)); if (i == 0) emit_byte((0 << 4) + 0); else emit_byte((1 << 4) + 1); } emit_byte(0); /* spectral selection */ emit_byte(63); emit_byte(0); } // Emit all markers at beginning of image file. void jpeg_encoder::emit_markers() { emit_marker(M_SOI); emit_jfif_app0(); emit_dqt(); emit_sof(); emit_dhts(); emit_sos(); } // Compute the actual canonical Huffman codes/code sizes given the JPEG huff bits and val arrays. void jpeg_encoder::compute_huffman_table(uint* codes, uint8* code_sizes, uint8* bits, uint8* val) { int i, l, last_p, si; uint8 huff_size[257]; uint huff_code[257]; uint code; int p = 0; for (l = 1; l <= 16; l++) for (i = 1; i <= bits[l]; i++) huff_size[p++] = (char)l; huff_size[p] = 0; last_p = p; // write sentinel code = 0; si = huff_size[0]; p = 0; while (huff_size[p]) { while (huff_size[p] == si) huff_code[p++] = code++; code <<= 1; si++; } memset(codes, 0, sizeof(codes[0]) * 256); memset(code_sizes, 0, sizeof(code_sizes[0]) * 256); for (p = 0; p < last_p; p++) { codes[val[p]] = huff_code[p]; code_sizes[val[p]] = huff_size[p]; } } // Quantization table generation. void jpeg_encoder::compute_quant_table(int32* pDst, int16* pSrc) { int32 q; if (m_params.m_quality < 50) q = 5000 / m_params.m_quality; else q = 200 - m_params.m_quality * 2; for (int i = 0; i < 64; i++) { int32 j = *pSrc++; j = (j * q + 50L) / 100L; *pDst++ = JPGE_MIN(JPGE_MAX(j, 1), 255); } } // Higher-level methods. void jpeg_encoder::first_pass_init() { m_bit_buffer = 0; m_bits_in = 0; memset(m_last_dc_val, 0, 3 * sizeof(m_last_dc_val[0])); m_mcu_y_ofs = 0; m_pass_num = 1; } bool jpeg_encoder::second_pass_init() { compute_huffman_table(&m_huff_codes[0 + 0][0], &m_huff_code_sizes[0 + 0][0], m_huff_bits[0 + 0], m_huff_val[0 + 0]); compute_huffman_table(&m_huff_codes[2 + 0][0], &m_huff_code_sizes[2 + 0][0], m_huff_bits[2 + 0], m_huff_val[2 + 0]); if (m_num_components > 1) { compute_huffman_table(&m_huff_codes[0 + 1][0], &m_huff_code_sizes[0 + 1][0], m_huff_bits[0 + 1], m_huff_val[0 + 1]); compute_huffman_table(&m_huff_codes[2 + 1][0], &m_huff_code_sizes[2 + 1][0], m_huff_bits[2 + 1], m_huff_val[2 + 1]); } first_pass_init(); emit_markers(); m_pass_num = 2; return true; } bool jpeg_encoder::jpg_open(int p_x_res, int p_y_res, int src_channels) { m_num_components = 3; switch (m_params.m_subsampling) { case Y_ONLY: { m_num_components = 1; m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1; m_mcu_x = 8; m_mcu_y = 8; break; } case H1V1: { m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1; m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1; m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1; m_mcu_x = 8; m_mcu_y = 8; break; } case H2V1: { m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 1; m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1; m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1; m_mcu_x = 16; m_mcu_y = 8; break; } case H2V2: { m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 2; m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1; m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1; m_mcu_x = 16; m_mcu_y = 16; } } m_image_x = p_x_res; m_image_y = p_y_res; m_image_bpp = src_channels; m_image_bpl = m_image_x * src_channels; m_image_x_mcu = (m_image_x + m_mcu_x - 1) & (~(m_mcu_x - 1)); m_image_y_mcu = (m_image_y + m_mcu_y - 1) & (~(m_mcu_y - 1)); m_image_bpl_xlt = m_image_x * m_num_components; m_image_bpl_mcu = m_image_x_mcu * m_num_components; m_mcus_per_row = m_image_x_mcu / m_mcu_x; if ((m_mcu_lines[0] = static_cast(jpge_malloc(m_image_bpl_mcu * m_mcu_y))) == NULL) return false; for (int i = 1; i < m_mcu_y; i++) m_mcu_lines[i] = m_mcu_lines[i - 1] + m_image_bpl_mcu; compute_quant_table(m_quantization_tables[0], s_std_lum_quant); compute_quant_table(m_quantization_tables[1], m_params.m_no_chroma_discrim_flag ? s_std_lum_quant : s_std_croma_quant); m_out_buf_left = JPGE_OUT_BUF_SIZE; m_pOut_buf = m_out_buf; if (m_params.m_two_pass_flag) { clear_obj(m_huff_count); first_pass_init(); } else { memcpy(m_huff_bits[0 + 0], s_dc_lum_bits, 17); memcpy(m_huff_val[0 + 0], s_dc_lum_val, DC_LUM_CODES); memcpy(m_huff_bits[2 + 0], s_ac_lum_bits, 17); memcpy(m_huff_val[2 + 0], s_ac_lum_val, AC_LUM_CODES); memcpy(m_huff_bits[0 + 1], s_dc_chroma_bits, 17); memcpy(m_huff_val[0 + 1], s_dc_chroma_val, DC_CHROMA_CODES); memcpy(m_huff_bits[2 + 1], s_ac_chroma_bits, 17); memcpy(m_huff_val[2 + 1], s_ac_chroma_val, AC_CHROMA_CODES); if (!second_pass_init()) return false; // in effect, skip over the first pass } return m_all_stream_writes_succeeded; } void jpeg_encoder::load_block_8_8_grey(int x) { uint8* pSrc; sample_array_t* pDst = m_sample_array; x <<= 3; for (int i = 0; i < 8; i++, pDst += 8) { pSrc = m_mcu_lines[i] + x; pDst[0] = pSrc[0] - 128; pDst[1] = pSrc[1] - 128; pDst[2] = pSrc[2] - 128; pDst[3] = pSrc[3] - 128; pDst[4] = pSrc[4] - 128; pDst[5] = pSrc[5] - 128; pDst[6] = pSrc[6] - 128; pDst[7] = pSrc[7] - 128; } } void jpeg_encoder::load_block_8_8(int x, int y, int c) { uint8* pSrc; sample_array_t* pDst = m_sample_array; x = (x * (8 * 3)) + c; y <<= 3; for (int i = 0; i < 8; i++, pDst += 8) { pSrc = m_mcu_lines[y + i] + x; pDst[0] = pSrc[0 * 3] - 128; pDst[1] = pSrc[1 * 3] - 128; pDst[2] = pSrc[2 * 3] - 128; pDst[3] = pSrc[3 * 3] - 128; pDst[4] = pSrc[4 * 3] - 128; pDst[5] = pSrc[5 * 3] - 128; pDst[6] = pSrc[6 * 3] - 128; pDst[7] = pSrc[7 * 3] - 128; } } void jpeg_encoder::load_block_16_8(int x, int c) { uint8 *pSrc1, *pSrc2; sample_array_t* pDst = m_sample_array; x = (x * (16 * 3)) + c; int a = 0, b = 2; for (int i = 0; i < 16; i += 2, pDst += 8) { pSrc1 = m_mcu_lines[i + 0] + x; pSrc2 = m_mcu_lines[i + 1] + x; pDst[0] = ((pSrc1[0 * 3] + pSrc1[1 * 3] + pSrc2[0 * 3] + pSrc2[1 * 3] + a) >> 2) - 128; pDst[1] = ((pSrc1[2 * 3] + pSrc1[3 * 3] + pSrc2[2 * 3] + pSrc2[3 * 3] + b) >> 2) - 128; pDst[2] = ((pSrc1[4 * 3] + pSrc1[5 * 3] + pSrc2[4 * 3] + pSrc2[5 * 3] + a) >> 2) - 128; pDst[3] = ((pSrc1[6 * 3] + pSrc1[7 * 3] + pSrc2[6 * 3] + pSrc2[7 * 3] + b) >> 2) - 128; pDst[4] = ((pSrc1[8 * 3] + pSrc1[9 * 3] + pSrc2[8 * 3] + pSrc2[9 * 3] + a) >> 2) - 128; pDst[5] = ((pSrc1[10 * 3] + pSrc1[11 * 3] + pSrc2[10 * 3] + pSrc2[11 * 3] + b) >> 2) - 128; pDst[6] = ((pSrc1[12 * 3] + pSrc1[13 * 3] + pSrc2[12 * 3] + pSrc2[13 * 3] + a) >> 2) - 128; pDst[7] = ((pSrc1[14 * 3] + pSrc1[15 * 3] + pSrc2[14 * 3] + pSrc2[15 * 3] + b) >> 2) - 128; int temp = a; a = b; b = temp; } } void jpeg_encoder::load_block_16_8_8(int x, int c) { uint8* pSrc1; sample_array_t* pDst = m_sample_array; x = (x * (16 * 3)) + c; for (int i = 0; i < 8; i++, pDst += 8) { pSrc1 = m_mcu_lines[i + 0] + x; pDst[0] = ((pSrc1[0 * 3] + pSrc1[1 * 3]) >> 1) - 128; pDst[1] = ((pSrc1[2 * 3] + pSrc1[3 * 3]) >> 1) - 128; pDst[2] = ((pSrc1[4 * 3] + pSrc1[5 * 3]) >> 1) - 128; pDst[3] = ((pSrc1[6 * 3] + pSrc1[7 * 3]) >> 1) - 128; pDst[4] = ((pSrc1[8 * 3] + pSrc1[9 * 3]) >> 1) - 128; pDst[5] = ((pSrc1[10 * 3] + pSrc1[11 * 3]) >> 1) - 128; pDst[6] = ((pSrc1[12 * 3] + pSrc1[13 * 3]) >> 1) - 128; pDst[7] = ((pSrc1[14 * 3] + pSrc1[15 * 3]) >> 1) - 128; } } void jpeg_encoder::load_quantized_coefficients(int component_num) { int32* q = m_quantization_tables[component_num > 0]; int16* pDst = m_coefficient_array; for (int i = 0; i < 64; i++) { sample_array_t j = m_sample_array[s_zag[i]]; if (j < 0) { if ((j = -j + (*q >> 1)) < *q) *pDst++ = 0; else *pDst++ = static_cast(-(j / *q)); } else { if ((j = j + (*q >> 1)) < *q) *pDst++ = 0; else *pDst++ = static_cast((j / *q)); } q++; } } void jpeg_encoder::flush_output_buffer() { if (m_out_buf_left != JPGE_OUT_BUF_SIZE) m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && m_pStream->put_buf(m_out_buf, JPGE_OUT_BUF_SIZE - m_out_buf_left); m_pOut_buf = m_out_buf; m_out_buf_left = JPGE_OUT_BUF_SIZE; } void jpeg_encoder::put_bits(uint bits, uint len) { m_bit_buffer |= ((uint32)bits << (24 - (m_bits_in += len))); while (m_bits_in >= 8) { uint8 c; #define JPGE_PUT_BYTE(c) \ { \ *m_pOut_buf++ = (c); \ if (--m_out_buf_left == 0) \ flush_output_buffer(); \ } JPGE_PUT_BYTE(c = (uint8)((m_bit_buffer >> 16) & 0xFF)); if (c == 0xFF) JPGE_PUT_BYTE(0); m_bit_buffer <<= 8; m_bits_in -= 8; } } void jpeg_encoder::code_coefficients_pass_one(int component_num) { if (component_num >= 3) return; // just to shut up static analysis int i, run_len, nbits, temp1; int16* src = m_coefficient_array; uint32 *dc_count = component_num ? m_huff_count[0 + 1] : m_huff_count[0 + 0], *ac_count = component_num ? m_huff_count[2 + 1] : m_huff_count[2 + 0]; temp1 = src[0] - m_last_dc_val[component_num]; m_last_dc_val[component_num] = src[0]; if (temp1 < 0) temp1 = -temp1; nbits = 0; while (temp1) { nbits++; temp1 >>= 1; } dc_count[nbits]++; for (run_len = 0, i = 1; i < 64; i++) { if ((temp1 = m_coefficient_array[i]) == 0) run_len++; else { while (run_len >= 16) { ac_count[0xF0]++; run_len -= 16; } if (temp1 < 0) temp1 = -temp1; nbits = 1; while (temp1 >>= 1) nbits++; ac_count[(run_len << 4) + nbits]++; run_len = 0; } } if (run_len) ac_count[0]++; } void jpeg_encoder::code_coefficients_pass_two(int component_num) { int i, j, run_len, nbits, temp1, temp2; int16* pSrc = m_coefficient_array; uint* codes[2]; uint8* code_sizes[2]; if (component_num == 0) { codes[0] = m_huff_codes[0 + 0]; codes[1] = m_huff_codes[2 + 0]; code_sizes[0] = m_huff_code_sizes[0 + 0]; code_sizes[1] = m_huff_code_sizes[2 + 0]; } else { codes[0] = m_huff_codes[0 + 1]; codes[1] = m_huff_codes[2 + 1]; code_sizes[0] = m_huff_code_sizes[0 + 1]; code_sizes[1] = m_huff_code_sizes[2 + 1]; } temp1 = temp2 = pSrc[0] - m_last_dc_val[component_num]; m_last_dc_val[component_num] = pSrc[0]; if (temp1 < 0) { temp1 = -temp1; temp2--; } nbits = 0; while (temp1) { nbits++; temp1 >>= 1; } put_bits(codes[0][nbits], code_sizes[0][nbits]); if (nbits) put_bits(temp2 & ((1 << nbits) - 1), nbits); for (run_len = 0, i = 1; i < 64; i++) { if ((temp1 = m_coefficient_array[i]) == 0) run_len++; else { while (run_len >= 16) { put_bits(codes[1][0xF0], code_sizes[1][0xF0]); run_len -= 16; } if ((temp2 = temp1) < 0) { temp1 = -temp1; temp2--; } nbits = 1; while (temp1 >>= 1) nbits++; j = (run_len << 4) + nbits; put_bits(codes[1][j], code_sizes[1][j]); put_bits(temp2 & ((1 << nbits) - 1), nbits); run_len = 0; } } if (run_len) put_bits(codes[1][0], code_sizes[1][0]); } void jpeg_encoder::code_block(int component_num) { DCT2D(m_sample_array); load_quantized_coefficients(component_num); if (m_pass_num == 1) code_coefficients_pass_one(component_num); else code_coefficients_pass_two(component_num); } void jpeg_encoder::process_mcu_row() { if (m_num_components == 1) { for (int i = 0; i < m_mcus_per_row; i++) { load_block_8_8_grey(i); code_block(0); } } else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1)) { for (int i = 0; i < m_mcus_per_row; i++) { load_block_8_8(i, 0, 0); code_block(0); load_block_8_8(i, 0, 1); code_block(1); load_block_8_8(i, 0, 2); code_block(2); } } else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1)) { for (int i = 0; i < m_mcus_per_row; i++) { load_block_8_8(i * 2 + 0, 0, 0); code_block(0); load_block_8_8(i * 2 + 1, 0, 0); code_block(0); load_block_16_8_8(i, 1); code_block(1); load_block_16_8_8(i, 2); code_block(2); } } else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2)) { for (int i = 0; i < m_mcus_per_row; i++) { load_block_8_8(i * 2 + 0, 0, 0); code_block(0); load_block_8_8(i * 2 + 1, 0, 0); code_block(0); load_block_8_8(i * 2 + 0, 1, 0); code_block(0); load_block_8_8(i * 2 + 1, 1, 0); code_block(0); load_block_16_8(i, 1); code_block(1); load_block_16_8(i, 2); code_block(2); } } } bool jpeg_encoder::terminate_pass_one() { optimize_huffman_table(0 + 0, DC_LUM_CODES); optimize_huffman_table(2 + 0, AC_LUM_CODES); if (m_num_components > 1) { optimize_huffman_table(0 + 1, DC_CHROMA_CODES); optimize_huffman_table(2 + 1, AC_CHROMA_CODES); } return second_pass_init(); } bool jpeg_encoder::terminate_pass_two() { put_bits(0x7F, 7); flush_output_buffer(); emit_marker(M_EOI); m_pass_num++; // purposely bump up m_pass_num, for debugging return true; } bool jpeg_encoder::process_end_of_image() { if (m_mcu_y_ofs) { if (m_mcu_y_ofs < 16) // check here just to shut up static analysis { for (int i = m_mcu_y_ofs; i < m_mcu_y; i++) memcpy(m_mcu_lines[i], m_mcu_lines[m_mcu_y_ofs - 1], m_image_bpl_mcu); } process_mcu_row(); } if (m_pass_num == 1) return terminate_pass_one(); else return terminate_pass_two(); } void jpeg_encoder::load_mcu(const void* pSrc) { const uint8* Psrc = reinterpret_cast(pSrc); uint8* pDst = m_mcu_lines[m_mcu_y_ofs]; // OK to write up to m_image_bpl_xlt bytes to pDst if (m_num_components == 1) { if (m_image_bpp == 4) RGBA_to_Y(pDst, Psrc, m_image_x); else if (m_image_bpp == 3) RGB_to_Y(pDst, Psrc, m_image_x); else memcpy(pDst, Psrc, m_image_x); } else { if (m_image_bpp == 4) RGBA_to_YCC(pDst, Psrc, m_image_x); else if (m_image_bpp == 3) RGB_to_YCC(pDst, Psrc, m_image_x); else Y_to_YCC(pDst, Psrc, m_image_x); } // Possibly duplicate pixels at end of scanline if not a multiple of 8 or 16 if (m_num_components == 1) memset(m_mcu_lines[m_mcu_y_ofs] + m_image_bpl_xlt, pDst[m_image_bpl_xlt - 1], m_image_x_mcu - m_image_x); else { const uint8 y = pDst[m_image_bpl_xlt - 3 + 0], cb = pDst[m_image_bpl_xlt - 3 + 1], cr = pDst[m_image_bpl_xlt - 3 + 2]; uint8* q = m_mcu_lines[m_mcu_y_ofs] + m_image_bpl_xlt; for (int i = m_image_x; i < m_image_x_mcu; i++) { *q++ = y; *q++ = cb; *q++ = cr; } } if (++m_mcu_y_ofs == m_mcu_y) { process_mcu_row(); m_mcu_y_ofs = 0; } } void jpeg_encoder::clear() { m_mcu_lines[0] = NULL; m_pass_num = 0; m_all_stream_writes_succeeded = true; } jpeg_encoder::jpeg_encoder() { clear(); } jpeg_encoder::~jpeg_encoder() { deinit(); } bool jpeg_encoder::init(output_stream* pStream, int width, int height, int src_channels, const params& comp_params) { deinit(); if (((!pStream) || (width < 1) || (height < 1)) || ((src_channels != 1) && (src_channels != 3) && (src_channels != 4)) || (!comp_params.check())) return false; m_pStream = pStream; m_params = comp_params; return jpg_open(width, height, src_channels); } void jpeg_encoder::deinit() { jpge_free(m_mcu_lines[0]); clear(); } bool jpeg_encoder::process_scanline(const void* pScanline) { if ((m_pass_num < 1) || (m_pass_num > 2)) return false; if (m_all_stream_writes_succeeded) { if (!pScanline) { if (!process_end_of_image()) return false; } else { load_mcu(pScanline); } } return m_all_stream_writes_succeeded; } // Higher level wrappers/examples (optional). #include class cfile_stream : public output_stream { cfile_stream(const cfile_stream&); cfile_stream& operator=(const cfile_stream&); FILE* m_pFile; bool m_bStatus; public: cfile_stream() : m_pFile(NULL), m_bStatus(false) {} virtual ~cfile_stream() { close(); } bool open(const char* pFilename) { close(); #ifdef _MSC_VER fopen_s(&m_pFile, pFilename, "wb"); #else m_pFile = fopen(pFilename, "wb"); #endif m_bStatus = (m_pFile != NULL); return m_bStatus; } bool close() { if (m_pFile) { if (fclose(m_pFile) == EOF) { m_bStatus = false; } m_pFile = NULL; } return m_bStatus; } virtual bool put_buf(const void* pBuf, int len) { m_bStatus = m_bStatus && (fwrite(pBuf, len, 1, m_pFile) == 1); return m_bStatus; } uint get_size() const { return m_pFile ? ftell(m_pFile) : 0; } }; // Writes JPEG image to file. bool compress_image_to_jpeg_file(const char* pFilename, int width, int height, int num_channels, const uint8* pImage_data, const params& comp_params) { cfile_stream dst_stream; if (!dst_stream.open(pFilename)) return false; jpge::jpeg_encoder dst_image; if (!dst_image.init(&dst_stream, width, height, num_channels, comp_params)) return false; for (uint pass_index = 0; pass_index < dst_image.get_total_passes(); pass_index++) { for (int i = 0; i < height; i++) { const uint8* pBuf = pImage_data + i * width * num_channels; if (!dst_image.process_scanline(pBuf)) return false; } if (!dst_image.process_scanline(NULL)) return false; } dst_image.deinit(); return dst_stream.close(); } class memory_stream : public output_stream { memory_stream(const memory_stream&); memory_stream& operator=(const memory_stream&); uint8* m_pBuf; uint m_buf_size, m_buf_ofs; public: memory_stream(void* pBuf, uint buf_size) : m_pBuf(static_cast(pBuf)), m_buf_size(buf_size), m_buf_ofs(0) {} virtual ~memory_stream() {} virtual bool put_buf(const void* pBuf, int len) { uint buf_remaining = m_buf_size - m_buf_ofs; if ((uint)len > buf_remaining) return false; memcpy(m_pBuf + m_buf_ofs, pBuf, len); m_buf_ofs += len; return true; } uint get_size() const { return m_buf_ofs; } }; bool compress_image_to_jpeg_file_in_memory(void* pDstBuf, int& buf_size, int width, int height, int num_channels, const uint8* pImage_data, const params& comp_params) { if ((!pDstBuf) || (!buf_size)) return false; memory_stream dst_stream(pDstBuf, buf_size); buf_size = 0; jpge::jpeg_encoder dst_image; if (!dst_image.init(&dst_stream, width, height, num_channels, comp_params)) return false; for (uint pass_index = 0; pass_index < dst_image.get_total_passes(); pass_index++) { for (int i = 0; i < height; i++) { const uint8* pScanline = pImage_data + i * width * num_channels; if (!dst_image.process_scanline(pScanline)) return false; } if (!dst_image.process_scanline(NULL)) return false; } dst_image.deinit(); buf_size = dst_stream.get_size(); return true; } } // namespace jpge DaemonEngine-crunch-ef4d32f/crnlib/crn_jpge.h000066400000000000000000000143741503722002600212410ustar00rootroot00000000000000// jpge.h - C++ class for JPEG compression. // Public domain, Rich Geldreich // Alex Evans: Added RGBA support, linear memory allocator. #ifndef JPEG_ENCODER_H #define JPEG_ENCODER_H namespace jpge { typedef unsigned char uint8; typedef signed short int16; typedef signed int int32; typedef unsigned short uint16; typedef unsigned int uint32; typedef unsigned int uint; // JPEG chroma subsampling factors. Y_ONLY (grayscale images) and H2V2 (color images) are the most common. enum subsampling_t { Y_ONLY = 0, H1V1 = 1, H2V1 = 2, H2V2 = 3 }; // JPEG compression parameters structure. struct params { inline params() : m_quality(85), m_subsampling(H2V2), m_no_chroma_discrim_flag(false), m_two_pass_flag(false) {} inline bool check() const { if ((m_quality < 1) || (m_quality > 100)) return false; if ((uint)m_subsampling > (uint)H2V2) return false; return true; } // Quality: 1-100, higher is better. Typical values are around 50-95. int m_quality; // m_subsampling: // 0 = Y (grayscale) only // 1 = YCbCr, no subsampling (H1V1, YCbCr 1x1x1, 3 blocks per MCU) // 2 = YCbCr, H2V1 subsampling (YCbCr 2x1x1, 4 blocks per MCU) // 3 = YCbCr, H2V2 subsampling (YCbCr 4x1x1, 6 blocks per MCU-- very common) subsampling_t m_subsampling; // Disables CbCr discrimination - only intended for testing. // If true, the Y quantization table is also used for the CbCr channels. bool m_no_chroma_discrim_flag; bool m_two_pass_flag; }; // Writes JPEG image to a file. // num_channels must be 1 (Y) or 3 (RGB), image pitch must be width*num_channels. bool compress_image_to_jpeg_file(const char* pFilename, int width, int height, int num_channels, const uint8* pImage_data, const params& comp_params = params()); // Writes JPEG image to memory buffer. // On entry, buf_size is the size of the output buffer pointed at by pBuf, which should be at least ~1024 bytes. // If return value is true, buf_size will be set to the size of the compressed data. bool compress_image_to_jpeg_file_in_memory(void* pBuf, int& buf_size, int width, int height, int num_channels, const uint8* pImage_data, const params& comp_params = params()); // Output stream abstract class - used by the jpeg_encoder class to write to the output stream. // put_buf() is generally called with len==JPGE_OUT_BUF_SIZE bytes, but for headers it'll be called with smaller amounts. class output_stream { public: virtual ~output_stream(){}; virtual bool put_buf(const void* Pbuf, int len) = 0; template inline bool put_obj(const T& obj) { return put_buf(&obj, sizeof(T)); } }; // Lower level jpeg_encoder class - useful if more control is needed than the above helper functions. class jpeg_encoder { public: jpeg_encoder(); ~jpeg_encoder(); // Initializes the compressor. // pStream: The stream object to use for writing compressed data. // params - Compression parameters structure, defined above. // width, height - Image dimensions. // channels - May be 1, or 3. 1 indicates grayscale, 3 indicates RGB source data. // Returns false on out of memory or if a stream write fails. bool init(output_stream* pStream, int width, int height, int src_channels, const params& comp_params = params()); const params& get_params() const { return m_params; } // Deinitializes the compressor, freeing any allocated memory. May be called at any time. void deinit(); uint get_total_passes() const { return m_params.m_two_pass_flag ? 2 : 1; } inline uint get_cur_pass() { return m_pass_num; } // Call this method with each source scanline. // width * src_channels bytes per scanline is expected (RGB or Y format). // You must call with NULL after all scanlines are processed to finish compression. // Returns false on out of memory or if a stream write fails. bool process_scanline(const void* pScanline); private: jpeg_encoder(const jpeg_encoder&); jpeg_encoder& operator=(const jpeg_encoder&); typedef int32 sample_array_t; output_stream* m_pStream; params m_params; uint8 m_num_components; uint8 m_comp_h_samp[3], m_comp_v_samp[3]; int m_image_x, m_image_y, m_image_bpp, m_image_bpl; int m_image_x_mcu, m_image_y_mcu; int m_image_bpl_xlt, m_image_bpl_mcu; int m_mcus_per_row; int m_mcu_x, m_mcu_y; uint8* m_mcu_lines[16]; uint8 m_mcu_y_ofs; sample_array_t m_sample_array[64]; int16 m_coefficient_array[64]; int32 m_quantization_tables[2][64]; uint m_huff_codes[4][256]; uint8 m_huff_code_sizes[4][256]; uint8 m_huff_bits[4][17]; uint8 m_huff_val[4][256]; uint32 m_huff_count[4][256]; int m_last_dc_val[3]; enum { JPGE_OUT_BUF_SIZE = 2048 }; uint8 m_out_buf[JPGE_OUT_BUF_SIZE]; uint8* m_pOut_buf; uint m_out_buf_left; uint32 m_bit_buffer; uint m_bits_in; uint8 m_pass_num; bool m_all_stream_writes_succeeded; void optimize_huffman_table(int table_num, int table_len); void emit_byte(uint8 i); void emit_word(uint i); void emit_marker(int marker); void emit_jfif_app0(); void emit_dqt(); void emit_sof(); void emit_dht(uint8* bits, uint8* val, int index, bool ac_flag); void emit_dhts(); void emit_sos(); void emit_markers(); void compute_huffman_table(uint* codes, uint8* code_sizes, uint8* bits, uint8* val); void compute_quant_table(int32* dst, int16* src); void adjust_quant_table(int32* dst, int32* src); void first_pass_init(); bool second_pass_init(); bool jpg_open(int p_x_res, int p_y_res, int src_channels); void load_block_8_8_grey(int x); void load_block_8_8(int x, int y, int c); void load_block_16_8(int x, int c); void load_block_16_8_8(int x, int c); void load_quantized_coefficients(int component_num); void flush_output_buffer(); void put_bits(uint bits, uint len); void code_coefficients_pass_one(int component_num); void code_coefficients_pass_two(int component_num); void code_block(int component_num); void process_mcu_row(); bool terminate_pass_one(); bool terminate_pass_two(); bool process_end_of_image(); void load_mcu(const void* src); void clear(); void init(); }; } // namespace jpge #endif // JPEG_ENCODER DaemonEngine-crunch-ef4d32f/crnlib/crn_ktx_texture.cpp000066400000000000000000000661721503722002600232400ustar00rootroot00000000000000// File: crn_ktx_texture.cpp #include "crn_core.h" #include "crn_ktx_texture.h" #include "crn_console.h" // Set #if CRNLIB_KTX_PVRTEX_WORKAROUNDS to 1 to enable various workarounds for oddball KTX files written by PVRTexTool. #define CRNLIB_KTX_PVRTEX_WORKAROUNDS 1 namespace crnlib { const uint8 s_ktx_file_id[12] = {0xAB, 0x4B, 0x54, 0x58, 0x20, 0x31, 0x31, 0xBB, 0x0D, 0x0A, 0x1A, 0x0A}; bool is_packed_pixel_ogl_type(uint32 ogl_type) { switch (ogl_type) { case KTX_UNSIGNED_BYTE_3_3_2: case KTX_UNSIGNED_BYTE_2_3_3_REV: case KTX_UNSIGNED_SHORT_5_6_5: case KTX_UNSIGNED_SHORT_5_6_5_REV: case KTX_UNSIGNED_SHORT_4_4_4_4: case KTX_UNSIGNED_SHORT_4_4_4_4_REV: case KTX_UNSIGNED_SHORT_5_5_5_1: case KTX_UNSIGNED_SHORT_1_5_5_5_REV: case KTX_UNSIGNED_INT_8_8_8_8: case KTX_UNSIGNED_INT_8_8_8_8_REV: case KTX_UNSIGNED_INT_10_10_10_2: case KTX_UNSIGNED_INT_2_10_10_10_REV: case KTX_UNSIGNED_INT_24_8: case KTX_UNSIGNED_INT_10F_11F_11F_REV: case KTX_UNSIGNED_INT_5_9_9_9_REV: return true; } return false; } uint get_ogl_type_size(uint32 ogl_type) { switch (ogl_type) { case KTX_UNSIGNED_BYTE: case KTX_BYTE: return 1; case KTX_HALF_FLOAT: case KTX_UNSIGNED_SHORT: case KTX_SHORT: return 2; case KTX_FLOAT: case KTX_UNSIGNED_INT: case KTX_INT: return 4; case KTX_UNSIGNED_BYTE_3_3_2: case KTX_UNSIGNED_BYTE_2_3_3_REV: return 1; case KTX_UNSIGNED_SHORT_5_6_5: case KTX_UNSIGNED_SHORT_5_6_5_REV: case KTX_UNSIGNED_SHORT_4_4_4_4: case KTX_UNSIGNED_SHORT_4_4_4_4_REV: case KTX_UNSIGNED_SHORT_5_5_5_1: case KTX_UNSIGNED_SHORT_1_5_5_5_REV: return 2; case KTX_UNSIGNED_INT_8_8_8_8: case KTX_UNSIGNED_INT_8_8_8_8_REV: case KTX_UNSIGNED_INT_10_10_10_2: case KTX_UNSIGNED_INT_2_10_10_10_REV: case KTX_UNSIGNED_INT_24_8: case KTX_UNSIGNED_INT_10F_11F_11F_REV: case KTX_UNSIGNED_INT_5_9_9_9_REV: return 4; } return 0; } uint32 get_ogl_base_internal_fmt(uint32 ogl_fmt) { switch (ogl_fmt) { case KTX_ETC1_RGB8_OES: case KTX_COMPRESSED_RGB8_ETC2: case KTX_RGB_S3TC: case KTX_RGB4_S3TC: case KTX_COMPRESSED_RGB_S3TC_DXT1_EXT: case KTX_COMPRESSED_SRGB_S3TC_DXT1_EXT: return KTX_RGB; case KTX_COMPRESSED_RGBA8_ETC2_EAC: case KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT: case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: case KTX_RGBA_S3TC: case KTX_RGBA4_S3TC: case KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT: case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: case KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT: case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: case KTX_RGBA_DXT5_S3TC: case KTX_RGBA4_DXT5_S3TC: return KTX_RGBA; case 1: case KTX_RED: case KTX_RED_INTEGER: case KTX_GREEN: case KTX_GREEN_INTEGER: case KTX_BLUE: case KTX_BLUE_INTEGER: case KTX_R8: case KTX_R8UI: case KTX_LUMINANCE8: case KTX_ALPHA: case KTX_LUMINANCE: case KTX_COMPRESSED_RED_RGTC1_EXT: case KTX_COMPRESSED_SIGNED_RED_RGTC1_EXT: case KTX_COMPRESSED_LUMINANCE_LATC1_EXT: case KTX_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT: return KTX_RED; case 2: case KTX_RG: case KTX_RG8: case KTX_RG_INTEGER: case KTX_LUMINANCE_ALPHA: case KTX_COMPRESSED_RED_GREEN_RGTC2_EXT: case KTX_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT: case KTX_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT: case KTX_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT: return KTX_RG; case 3: case KTX_SRGB: case KTX_RGB: case KTX_RGB_INTEGER: case KTX_BGR: case KTX_BGR_INTEGER: case KTX_RGB8: case KTX_SRGB8: return KTX_RGB; case 4: case KTX_RGBA: case KTX_BGRA: case KTX_RGBA_INTEGER: case KTX_BGRA_INTEGER: case KTX_SRGB_ALPHA: case KTX_SRGB8_ALPHA8: case KTX_RGBA8: return KTX_RGBA; } return 0; } bool get_ogl_fmt_desc(uint32 ogl_fmt, uint32 ogl_type, uint& block_dim, uint& bytes_per_block) { uint ogl_type_size = get_ogl_type_size(ogl_type); block_dim = 1; bytes_per_block = 0; switch (ogl_fmt) { case KTX_COMPRESSED_RED_RGTC1_EXT: case KTX_COMPRESSED_SIGNED_RED_RGTC1_EXT: case KTX_COMPRESSED_LUMINANCE_LATC1_EXT: case KTX_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT: case KTX_ETC1_RGB8_OES: case KTX_COMPRESSED_RGB8_ETC2: case KTX_RGB_S3TC: case KTX_RGB4_S3TC: case KTX_COMPRESSED_RGB_S3TC_DXT1_EXT: case KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT: case KTX_COMPRESSED_SRGB_S3TC_DXT1_EXT: case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: { block_dim = 4; bytes_per_block = 8; break; } case KTX_COMPRESSED_RGBA8_ETC2_EAC: case KTX_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT: case KTX_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT: case KTX_COMPRESSED_RED_GREEN_RGTC2_EXT: case KTX_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT: case KTX_RGBA_S3TC: case KTX_RGBA4_S3TC: case KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT: case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: case KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT: case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: case KTX_RGBA_DXT5_S3TC: case KTX_RGBA4_DXT5_S3TC: { block_dim = 4; bytes_per_block = 16; break; } case 1: case KTX_ALPHA: case KTX_RED: case KTX_GREEN: case KTX_BLUE: case KTX_RED_INTEGER: case KTX_GREEN_INTEGER: case KTX_BLUE_INTEGER: case KTX_LUMINANCE: { bytes_per_block = ogl_type_size; break; } case KTX_R8: case KTX_R8UI: case KTX_ALPHA8: case KTX_LUMINANCE8: { bytes_per_block = 1; break; } case 2: case KTX_RG: case KTX_RG_INTEGER: case KTX_LUMINANCE_ALPHA: { bytes_per_block = 2 * ogl_type_size; break; } case KTX_RG8: case KTX_LUMINANCE8_ALPHA8: { bytes_per_block = 2; break; } case 3: case KTX_SRGB: case KTX_RGB: case KTX_BGR: case KTX_RGB_INTEGER: case KTX_BGR_INTEGER: { bytes_per_block = is_packed_pixel_ogl_type(ogl_type) ? ogl_type_size : (3 * ogl_type_size); break; } case KTX_RGB8: case KTX_SRGB8: { bytes_per_block = 3; break; } case 4: case KTX_RGBA: case KTX_BGRA: case KTX_RGBA_INTEGER: case KTX_BGRA_INTEGER: case KTX_SRGB_ALPHA: { bytes_per_block = is_packed_pixel_ogl_type(ogl_type) ? ogl_type_size : (4 * ogl_type_size); break; } case KTX_SRGB8_ALPHA8: case KTX_RGBA8: { bytes_per_block = 4; break; } default: return false; } return true; } bool ktx_texture::compute_pixel_info() { if ((!m_header.m_glType) || (!m_header.m_glFormat)) { if ((m_header.m_glType) || (m_header.m_glFormat)) return false; // Must be a compressed format. if (!get_ogl_fmt_desc(m_header.m_glInternalFormat, m_header.m_glType, m_block_dim, m_bytes_per_block)) { #if CRNLIB_KTX_PVRTEX_WORKAROUNDS if ((!m_header.m_glInternalFormat) && (!m_header.m_glType) && (!m_header.m_glTypeSize) && (!m_header.m_glBaseInternalFormat)) { // PVRTexTool writes bogus headers when outputting ETC1. console::warning("ktx_texture::compute_pixel_info: Header doesn't specify any format, assuming ETC1 and hoping for the best"); m_header.m_glBaseInternalFormat = KTX_RGB; m_header.m_glInternalFormat = KTX_ETC1_RGB8_OES; m_header.m_glTypeSize = 1; m_block_dim = 4; m_bytes_per_block = 8; return true; } #endif return false; } if (m_block_dim == 1) return false; } else { // Must be an uncompressed format. if (!get_ogl_fmt_desc(m_header.m_glFormat, m_header.m_glType, m_block_dim, m_bytes_per_block)) return false; if (m_block_dim > 1) return false; } return true; } bool ktx_texture::read_from_stream(data_stream_serializer& serializer) { clear(); // Read header if (serializer.read(&m_header, 1, sizeof(m_header)) != sizeof(ktx_header)) return false; // Check header if (memcmp(s_ktx_file_id, m_header.m_identifier, sizeof(m_header.m_identifier))) return false; if ((m_header.m_endianness != KTX_OPPOSITE_ENDIAN) && (m_header.m_endianness != KTX_ENDIAN)) return false; m_opposite_endianness = (m_header.m_endianness == KTX_OPPOSITE_ENDIAN); if (m_opposite_endianness) { m_header.endian_swap(); if ((m_header.m_glTypeSize != sizeof(uint8)) && (m_header.m_glTypeSize != sizeof(uint16)) && (m_header.m_glTypeSize != sizeof(uint32))) return false; } if (!check_header()) return false; if (!compute_pixel_info()) return false; uint8 pad_bytes[3]; // Read the key value entries uint num_key_value_bytes_remaining = m_header.m_bytesOfKeyValueData; while (num_key_value_bytes_remaining) { if (num_key_value_bytes_remaining < sizeof(uint32)) return false; uint32 key_value_byte_size; if (serializer.read(&key_value_byte_size, 1, sizeof(uint32)) != sizeof(uint32)) return false; num_key_value_bytes_remaining -= sizeof(uint32); if (m_opposite_endianness) key_value_byte_size = utils::swap32(key_value_byte_size); if (key_value_byte_size > num_key_value_bytes_remaining) return false; uint8_vec key_value_data; if (key_value_byte_size) { key_value_data.resize(key_value_byte_size); if (serializer.read(&key_value_data[0], 1, key_value_byte_size) != key_value_byte_size) return false; } m_key_values.push_back(key_value_data); uint padding = 3 - ((key_value_byte_size + 3) % 4); if (padding) { if (serializer.read(pad_bytes, 1, padding) != padding) return false; } num_key_value_bytes_remaining -= key_value_byte_size; if (num_key_value_bytes_remaining < padding) return false; num_key_value_bytes_remaining -= padding; } // Now read the mip levels uint total_faces = get_num_mips() * get_array_size() * get_num_faces() * get_depth(); if ((!total_faces) || (total_faces > 65535)) return false; // See Section 2.8 of KTX file format: No rounding to block sizes should be applied for block compressed textures. // OK, I'm going to break that rule otherwise KTX can only store a subset of textures that DDS can handle for no good reason. #if 0 const uint mip0_row_blocks = m_header.m_pixelWidth / m_block_dim; const uint mip0_col_blocks = CRNLIB_MAX(1, m_header.m_pixelHeight) / m_block_dim; #else const uint mip0_row_blocks = (m_header.m_pixelWidth + m_block_dim - 1) / m_block_dim; const uint mip0_col_blocks = (CRNLIB_MAX(1, m_header.m_pixelHeight) + m_block_dim - 1) / m_block_dim; #endif if ((!mip0_row_blocks) || (!mip0_col_blocks)) return false; bool has_valid_image_size_fields = true; bool disable_mip_and_cubemap_padding = false; #if CRNLIB_KTX_PVRTEX_WORKAROUNDS { // PVRTexTool has a bogus KTX writer that doesn't write any imageSize fields. Nice. size_t expected_bytes_remaining = 0; for (uint mip_level = 0; mip_level < get_num_mips(); mip_level++) { uint mip_width, mip_height, mip_depth; get_mip_dim(mip_level, mip_width, mip_height, mip_depth); const uint mip_row_blocks = (mip_width + m_block_dim - 1) / m_block_dim; const uint mip_col_blocks = (mip_height + m_block_dim - 1) / m_block_dim; if ((!mip_row_blocks) || (!mip_col_blocks)) return false; expected_bytes_remaining += sizeof(uint32); if ((!m_header.m_numberOfArrayElements) && (get_num_faces() == 6)) { for (uint face = 0; face < get_num_faces(); face++) { uint slice_size = mip_row_blocks * mip_col_blocks * m_bytes_per_block; expected_bytes_remaining += slice_size; uint num_cube_pad_bytes = 3 - ((slice_size + 3) % 4); expected_bytes_remaining += num_cube_pad_bytes; } } else { uint total_mip_size = 0; for (uint array_element = 0; array_element < get_array_size(); array_element++) { for (uint face = 0; face < get_num_faces(); face++) { for (uint zslice = 0; zslice < mip_depth; zslice++) { uint slice_size = mip_row_blocks * mip_col_blocks * m_bytes_per_block; total_mip_size += slice_size; } } } expected_bytes_remaining += total_mip_size; uint num_mip_pad_bytes = 3 - ((total_mip_size + 3) % 4); expected_bytes_remaining += num_mip_pad_bytes; } } if (serializer.get_stream()->get_remaining() < expected_bytes_remaining) { has_valid_image_size_fields = false; disable_mip_and_cubemap_padding = true; console::warning("ktx_texture::read_from_stream: KTX file size is smaller than expected - trying to read anyway without imageSize fields"); } } #endif for (uint mip_level = 0; mip_level < get_num_mips(); mip_level++) { uint mip_width, mip_height, mip_depth; get_mip_dim(mip_level, mip_width, mip_height, mip_depth); const uint mip_row_blocks = (mip_width + m_block_dim - 1) / m_block_dim; const uint mip_col_blocks = (mip_height + m_block_dim - 1) / m_block_dim; if ((!mip_row_blocks) || (!mip_col_blocks)) return false; uint32 image_size = 0; if (!has_valid_image_size_fields) image_size = mip_depth * mip_row_blocks * mip_col_blocks * m_bytes_per_block * get_array_size() * get_num_faces(); else { if (serializer.read(&image_size, 1, sizeof(image_size)) != sizeof(image_size)) return false; if (m_opposite_endianness) image_size = utils::swap32(image_size); } if (!image_size) return false; uint total_mip_size = 0; if ((!m_header.m_numberOfArrayElements) && (get_num_faces() == 6)) { // plain non-array cubemap for (uint face = 0; face < get_num_faces(); face++) { CRNLIB_ASSERT(m_image_data.size() == get_image_index(mip_level, 0, face, 0)); m_image_data.push_back(uint8_vec()); uint8_vec& image_data = m_image_data.back(); image_data.resize(image_size); if (serializer.read(&image_data[0], 1, image_size) != image_size) return false; if (m_opposite_endianness) utils::endian_swap_mem(&image_data[0], image_size, m_header.m_glTypeSize); uint num_cube_pad_bytes = disable_mip_and_cubemap_padding ? 0 : (3 - ((image_size + 3) % 4)); if (serializer.read(pad_bytes, 1, num_cube_pad_bytes) != num_cube_pad_bytes) return false; total_mip_size += image_size + num_cube_pad_bytes; } } else { // 1D, 2D, 3D (normal or array texture), or array cubemap uint num_image_bytes_remaining = image_size; for (uint array_element = 0; array_element < get_array_size(); array_element++) { for (uint face = 0; face < get_num_faces(); face++) { for (uint zslice = 0; zslice < mip_depth; zslice++) { CRNLIB_ASSERT(m_image_data.size() == get_image_index(mip_level, array_element, face, zslice)); uint slice_size = mip_row_blocks * mip_col_blocks * m_bytes_per_block; if ((!slice_size) || (slice_size > num_image_bytes_remaining)) return false; m_image_data.push_back(uint8_vec()); uint8_vec& image_data = m_image_data.back(); image_data.resize(slice_size); if (serializer.read(&image_data[0], 1, slice_size) != slice_size) return false; if (m_opposite_endianness) utils::endian_swap_mem(&image_data[0], slice_size, m_header.m_glTypeSize); num_image_bytes_remaining -= slice_size; total_mip_size += slice_size; } } } if (num_image_bytes_remaining) return false; } uint num_mip_pad_bytes = disable_mip_and_cubemap_padding ? 0 : (3 - ((total_mip_size + 3) % 4)); if (serializer.read(pad_bytes, 1, num_mip_pad_bytes) != num_mip_pad_bytes) return false; } return true; } bool ktx_texture::write_to_stream(data_stream_serializer& serializer, bool no_keyvalue_data) { if (!consistency_check()) { CRNLIB_ASSERT(0); return false; } memcpy(m_header.m_identifier, s_ktx_file_id, sizeof(m_header.m_identifier)); m_header.m_endianness = m_opposite_endianness ? KTX_OPPOSITE_ENDIAN : KTX_ENDIAN; if (m_block_dim == 1) { m_header.m_glTypeSize = get_ogl_type_size(m_header.m_glType); m_header.m_glBaseInternalFormat = m_header.m_glFormat; } else { m_header.m_glBaseInternalFormat = get_ogl_base_internal_fmt(m_header.m_glInternalFormat); } m_header.m_bytesOfKeyValueData = 0; if (!no_keyvalue_data) { for (uint i = 0; i < m_key_values.size(); i++) m_header.m_bytesOfKeyValueData += sizeof(uint32) + ((m_key_values[i].size() + 3) & ~3); } if (m_opposite_endianness) m_header.endian_swap(); bool success = (serializer.write(&m_header, sizeof(m_header), 1) == 1); if (m_opposite_endianness) m_header.endian_swap(); if (!success) return success; uint total_key_value_bytes = 0; const uint8 padding[3] = {0, 0, 0}; if (!no_keyvalue_data) { for (uint i = 0; i < m_key_values.size(); i++) { uint32 key_value_size = m_key_values[i].size(); if (m_opposite_endianness) key_value_size = utils::swap32(key_value_size); success = (serializer.write(&key_value_size, sizeof(key_value_size), 1) == 1); total_key_value_bytes += sizeof(key_value_size); if (m_opposite_endianness) key_value_size = utils::swap32(key_value_size); if (!success) return false; if (key_value_size) { if (serializer.write(&m_key_values[i][0], key_value_size, 1) != 1) return false; total_key_value_bytes += key_value_size; uint num_padding = 3 - ((key_value_size + 3) % 4); if ((num_padding) && (serializer.write(padding, num_padding, 1) != 1)) return false; total_key_value_bytes += num_padding; } } (void)total_key_value_bytes; } CRNLIB_ASSERT(total_key_value_bytes == m_header.m_bytesOfKeyValueData); for (uint mip_level = 0; mip_level < get_num_mips(); mip_level++) { uint mip_width, mip_height, mip_depth; get_mip_dim(mip_level, mip_width, mip_height, mip_depth); const uint mip_row_blocks = (mip_width + m_block_dim - 1) / m_block_dim; const uint mip_col_blocks = (mip_height + m_block_dim - 1) / m_block_dim; if ((!mip_row_blocks) || (!mip_col_blocks)) return false; uint32 image_size = mip_row_blocks * mip_col_blocks * m_bytes_per_block; if ((m_header.m_numberOfArrayElements) || (get_num_faces() == 1)) image_size *= (get_array_size() * get_num_faces() * get_depth()); if (!image_size) return false; if (m_opposite_endianness) image_size = utils::swap32(image_size); success = (serializer.write(&image_size, sizeof(image_size), 1) == 1); if (m_opposite_endianness) image_size = utils::swap32(image_size); if (!success) return false; uint total_mip_size = 0; if ((!m_header.m_numberOfArrayElements) && (get_num_faces() == 6)) { // plain non-array cubemap for (uint face = 0; face < get_num_faces(); face++) { const uint8_vec& image_data = get_image_data(get_image_index(mip_level, 0, face, 0)); if ((!image_data.size()) || (image_data.size() != image_size)) return false; if (m_opposite_endianness) { uint8_vec tmp_image_data(image_data); utils::endian_swap_mem(&tmp_image_data[0], tmp_image_data.size(), m_header.m_glTypeSize); if (serializer.write(&tmp_image_data[0], tmp_image_data.size(), 1) != 1) return false; } else if (serializer.write(&image_data[0], image_data.size(), 1) != 1) return false; uint num_cube_pad_bytes = 3 - ((image_data.size() + 3) % 4); if ((num_cube_pad_bytes) && (serializer.write(padding, num_cube_pad_bytes, 1) != 1)) return false; total_mip_size += image_size + num_cube_pad_bytes; } } else { // 1D, 2D, 3D (normal or array texture), or array cubemap for (uint array_element = 0; array_element < get_array_size(); array_element++) { for (uint face = 0; face < get_num_faces(); face++) { for (uint zslice = 0; zslice < mip_depth; zslice++) { const uint8_vec& image_data = get_image_data(get_image_index(mip_level, array_element, face, zslice)); if (!image_data.size()) return false; if (m_opposite_endianness) { uint8_vec tmp_image_data(image_data); utils::endian_swap_mem(&tmp_image_data[0], tmp_image_data.size(), m_header.m_glTypeSize); if (serializer.write(&tmp_image_data[0], tmp_image_data.size(), 1) != 1) return false; } else if (serializer.write(&image_data[0], image_data.size(), 1) != 1) return false; total_mip_size += image_data.size(); } } } uint num_mip_pad_bytes = 3 - ((total_mip_size + 3) % 4); if ((num_mip_pad_bytes) && (serializer.write(padding, num_mip_pad_bytes, 1) != 1)) return false; total_mip_size += num_mip_pad_bytes; } CRNLIB_ASSERT((total_mip_size & 3) == 0); } return true; } bool ktx_texture::init_2D(uint width, uint height, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type) { clear(); m_header.m_pixelWidth = width; m_header.m_pixelHeight = height; m_header.m_numberOfMipmapLevels = num_mips; m_header.m_glInternalFormat = ogl_internal_fmt; m_header.m_glFormat = ogl_fmt; m_header.m_glType = ogl_type; m_header.m_numberOfFaces = 1; if (!compute_pixel_info()) return false; return true; } bool ktx_texture::init_2D_array(uint width, uint height, uint num_mips, uint array_size, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type) { clear(); m_header.m_pixelWidth = width; m_header.m_pixelHeight = height; m_header.m_numberOfMipmapLevels = num_mips; m_header.m_numberOfArrayElements = array_size; m_header.m_glInternalFormat = ogl_internal_fmt; m_header.m_glFormat = ogl_fmt; m_header.m_glType = ogl_type; m_header.m_numberOfFaces = 1; if (!compute_pixel_info()) return false; return true; } bool ktx_texture::init_3D(uint width, uint height, uint depth, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type) { clear(); m_header.m_pixelWidth = width; m_header.m_pixelHeight = height; m_header.m_pixelDepth = depth; m_header.m_numberOfMipmapLevels = num_mips; m_header.m_glInternalFormat = ogl_internal_fmt; m_header.m_glFormat = ogl_fmt; m_header.m_glType = ogl_type; m_header.m_numberOfFaces = 1; if (!compute_pixel_info()) return false; return true; } bool ktx_texture::init_cubemap(uint dim, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type) { clear(); m_header.m_pixelWidth = dim; m_header.m_pixelHeight = dim; m_header.m_numberOfMipmapLevels = num_mips; m_header.m_glInternalFormat = ogl_internal_fmt; m_header.m_glFormat = ogl_fmt; m_header.m_glType = ogl_type; m_header.m_numberOfFaces = 6; if (!compute_pixel_info()) return false; return true; } bool ktx_texture::check_header() const { if (((get_num_faces() != 1) && (get_num_faces() != 6)) || (!m_header.m_pixelWidth)) return false; if ((!m_header.m_pixelHeight) && (m_header.m_pixelDepth)) return false; if ((get_num_faces() == 6) && ((m_header.m_pixelDepth) || (!m_header.m_pixelHeight))) return false; if (m_header.m_numberOfMipmapLevels) { const uint max_mipmap_dimension = 1U << (m_header.m_numberOfMipmapLevels - 1U); if (max_mipmap_dimension > (CRNLIB_MAX(CRNLIB_MAX(m_header.m_pixelWidth, m_header.m_pixelHeight), m_header.m_pixelDepth))) return false; } return true; } bool ktx_texture::consistency_check() const { if (!check_header()) return false; uint block_dim = 0, bytes_per_block = 0; if ((!m_header.m_glType) || (!m_header.m_glFormat)) { if ((m_header.m_glType) || (m_header.m_glFormat)) return false; if (!get_ogl_fmt_desc(m_header.m_glInternalFormat, m_header.m_glType, block_dim, bytes_per_block)) return false; if (block_dim == 1) return false; //if ((get_width() % block_dim) || (get_height() % block_dim)) // return false; } else { if (!get_ogl_fmt_desc(m_header.m_glFormat, m_header.m_glType, block_dim, bytes_per_block)) return false; if (block_dim > 1) return false; } if ((m_block_dim != block_dim) || (m_bytes_per_block != bytes_per_block)) return false; if (m_image_data.size() != get_total_images()) return false; for (uint mip_level = 0; mip_level < get_num_mips(); mip_level++) { uint mip_width, mip_height, mip_depth; get_mip_dim(mip_level, mip_width, mip_height, mip_depth); const uint mip_row_blocks = (mip_width + m_block_dim - 1) / m_block_dim; const uint mip_col_blocks = (mip_height + m_block_dim - 1) / m_block_dim; if ((!mip_row_blocks) || (!mip_col_blocks)) return false; for (uint array_element = 0; array_element < get_array_size(); array_element++) { for (uint face = 0; face < get_num_faces(); face++) { for (uint zslice = 0; zslice < mip_depth; zslice++) { const uint8_vec& image_data = get_image_data(get_image_index(mip_level, array_element, face, zslice)); uint expected_image_size = mip_row_blocks * mip_col_blocks * m_bytes_per_block; if (image_data.size() != expected_image_size) return false; } } } } return true; } const uint8_vec* ktx_texture::find_key(const char* pKey) const { const size_t n = strlen(pKey) + 1; for (uint i = 0; i < m_key_values.size(); i++) { const uint8_vec& v = m_key_values[i]; if ((v.size() >= n) && (!memcmp(&v[0], pKey, n))) return &v; } return NULL; } bool ktx_texture::get_key_value_as_string(const char* pKey, dynamic_string& str) const { const uint8_vec* p = find_key(pKey); if (!p) { str.clear(); return false; } const uint ofs = (static_cast(strlen(pKey)) + 1); const uint8* pValue = p->get_ptr() + ofs; const uint n = p->size() - ofs; uint i; for (i = 0; i < n; i++) if (!pValue[i]) break; str.set_from_buf(pValue, i); return true; } uint ktx_texture::add_key_value(const char* pKey, const void* pVal, uint val_size) { const uint idx = m_key_values.size(); m_key_values.resize(idx + 1); uint8_vec& v = m_key_values.back(); v.append(reinterpret_cast(pKey), static_cast(strlen(pKey)) + 1); v.append(static_cast(pVal), val_size); return idx; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_ktx_texture.h000066400000000000000000000240761503722002600227020ustar00rootroot00000000000000// File: crn_ktx_texture.h #ifndef _KTX_TEXTURE_H_ #define _KTX_TEXTURE_H_ #ifdef _MSC_VER #pragma once #endif #include "crn_data_stream_serializer.h" #define KTX_ENDIAN 0x04030201 #define KTX_OPPOSITE_ENDIAN 0x01020304 namespace crnlib { extern const uint8 s_ktx_file_id[12]; struct ktx_header { uint8 m_identifier[12]; uint32 m_endianness; uint32 m_glType; uint32 m_glTypeSize; uint32 m_glFormat; uint32 m_glInternalFormat; uint32 m_glBaseInternalFormat; uint32 m_pixelWidth; uint32 m_pixelHeight; uint32 m_pixelDepth; uint32 m_numberOfArrayElements; uint32 m_numberOfFaces; uint32 m_numberOfMipmapLevels; uint32 m_bytesOfKeyValueData; void clear() { memset(this, 0, sizeof(*this)); } void endian_swap() { utils::endian_swap_mem32(&m_endianness, (sizeof(*this) - sizeof(m_identifier)) / sizeof(uint32)); } }; typedef crnlib::vector ktx_key_value_vec; typedef crnlib::vector ktx_image_data_vec; // Compressed pixel data formats: ETC1, DXT1, DXT3, DXT5 enum { KTX_ETC1_RGB8_OES = 0x8D64, KTX_COMPRESSED_RGB8_ETC2 = 0x9274, KTX_COMPRESSED_RGBA8_ETC2_EAC = 0x9278, KTX_RGB_S3TC = 0x83A0, KTX_RGB4_S3TC = 0x83A1, KTX_COMPRESSED_RGB_S3TC_DXT1_EXT = 0x83F0, KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT = 0x83F1, KTX_COMPRESSED_SRGB_S3TC_DXT1_EXT = 0x8C4C, KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT = 0x8C4D, KTX_RGBA_S3TC = 0x83A2, KTX_RGBA4_S3TC = 0x83A3, KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT = 0x83F2, KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT = 0x8C4E, KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT = 0x83F3, KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT = 0x8C4F, KTX_RGBA_DXT5_S3TC = 0x83A4, KTX_RGBA4_DXT5_S3TC = 0x83A5, KTX_COMPRESSED_RED_RGTC1_EXT = 0x8DBB, KTX_COMPRESSED_SIGNED_RED_RGTC1_EXT = 0x8DBC, KTX_COMPRESSED_RED_GREEN_RGTC2_EXT = 0x8DBD, KTX_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT = 0x8DBE, KTX_COMPRESSED_LUMINANCE_LATC1_EXT = 0x8C70, KTX_COMPRESSED_SIGNED_LUMINANCE_LATC1_EXT = 0x8C71, KTX_COMPRESSED_LUMINANCE_ALPHA_LATC2_EXT = 0x8C72, KTX_COMPRESSED_SIGNED_LUMINANCE_ALPHA_LATC2_EXT = 0x8C73 }; // Pixel formats (various internal, base, and base internal formats) enum { KTX_R8 = 0x8229, KTX_R8UI = 0x8232, KTX_RGB8 = 0x8051, KTX_SRGB8 = 0x8C41, KTX_SRGB = 0x8C40, KTX_SRGB_ALPHA = 0x8C42, KTX_SRGB8_ALPHA8 = 0x8C43, KTX_RGBA8 = 0x8058, KTX_STENCIL_INDEX = 0x1901, KTX_DEPTH_COMPONENT = 0x1902, KTX_DEPTH_STENCIL = 0x84F9, KTX_RED = 0x1903, KTX_GREEN = 0x1904, KTX_BLUE = 0x1905, KTX_ALPHA = 0x1906, KTX_RG = 0x8227, KTX_RGB = 0x1907, KTX_RGBA = 0x1908, KTX_BGR = 0x80E0, KTX_BGRA = 0x80E1, KTX_RED_INTEGER = 0x8D94, KTX_GREEN_INTEGER = 0x8D95, KTX_BLUE_INTEGER = 0x8D96, KTX_ALPHA_INTEGER = 0x8D97, KTX_RGB_INTEGER = 0x8D98, KTX_RGBA_INTEGER = 0x8D99, KTX_BGR_INTEGER = 0x8D9A, KTX_BGRA_INTEGER = 0x8D9B, KTX_LUMINANCE = 0x1909, KTX_LUMINANCE_ALPHA = 0x190A, KTX_RG_INTEGER = 0x8228, KTX_RG8 = 0x822B, KTX_ALPHA8 = 0x803C, KTX_LUMINANCE8 = 0x8040, KTX_LUMINANCE8_ALPHA8 = 0x8045 }; // Pixel data types enum { KTX_UNSIGNED_BYTE = 0x1401, KTX_BYTE = 0x1400, KTX_UNSIGNED_SHORT = 0x1403, KTX_SHORT = 0x1402, KTX_UNSIGNED_INT = 0x1405, KTX_INT = 0x1404, KTX_HALF_FLOAT = 0x140B, KTX_FLOAT = 0x1406, KTX_UNSIGNED_BYTE_3_3_2 = 0x8032, KTX_UNSIGNED_BYTE_2_3_3_REV = 0x8362, KTX_UNSIGNED_SHORT_5_6_5 = 0x8363, KTX_UNSIGNED_SHORT_5_6_5_REV = 0x8364, KTX_UNSIGNED_SHORT_4_4_4_4 = 0x8033, KTX_UNSIGNED_SHORT_4_4_4_4_REV = 0x8365, KTX_UNSIGNED_SHORT_5_5_5_1 = 0x8034, KTX_UNSIGNED_SHORT_1_5_5_5_REV = 0x8366, KTX_UNSIGNED_INT_8_8_8_8 = 0x8035, KTX_UNSIGNED_INT_8_8_8_8_REV = 0x8367, KTX_UNSIGNED_INT_10_10_10_2 = 0x8036, KTX_UNSIGNED_INT_2_10_10_10_REV = 0x8368, KTX_UNSIGNED_INT_24_8 = 0x84FA, KTX_UNSIGNED_INT_10F_11F_11F_REV = 0x8C3B, KTX_UNSIGNED_INT_5_9_9_9_REV = 0x8C3E, KTX_FLOAT_32_UNSIGNED_INT_24_8_REV = 0x8DAD }; bool is_packed_pixel_ogl_type(uint32 ogl_type); uint get_ogl_type_size(uint32 ogl_type); bool get_ogl_fmt_desc(uint32 ogl_fmt, uint32 ogl_type, uint& block_dim, uint& bytes_per_block); uint get_ogl_type_size(uint32 ogl_type); uint32 get_ogl_base_internal_fmt(uint32 ogl_fmt); class ktx_texture { public: ktx_texture() { clear(); } ktx_texture(const ktx_texture& other) { *this = other; } ktx_texture& operator=(const ktx_texture& rhs) { if (this == &rhs) return *this; clear(); m_header = rhs.m_header; m_key_values = rhs.m_key_values; m_image_data = rhs.m_image_data; m_block_dim = rhs.m_block_dim; m_bytes_per_block = rhs.m_bytes_per_block; m_opposite_endianness = rhs.m_opposite_endianness; return *this; } void clear() { m_header.clear(); m_key_values.clear(); m_image_data.clear(); m_block_dim = 0; m_bytes_per_block = 0; m_opposite_endianness = false; } // High level methods bool read_from_stream(data_stream_serializer& serializer); bool write_to_stream(data_stream_serializer& serializer, bool no_keyvalue_data = false); bool init_2D(uint width, uint height, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type); bool init_2D_array(uint width, uint height, uint num_mips, uint array_size, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type); bool init_3D(uint width, uint height, uint depth, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type); bool init_cubemap(uint dim, uint num_mips, uint32 ogl_internal_fmt, uint32 ogl_fmt, uint32 ogl_type); bool check_header() const; bool consistency_check() const; // General info bool is_valid() const { return (m_header.m_pixelWidth > 0) && (m_image_data.size() > 0); } uint get_width() const { return m_header.m_pixelWidth; } uint get_height() const { return CRNLIB_MAX(m_header.m_pixelHeight, 1); } uint get_depth() const { return CRNLIB_MAX(m_header.m_pixelDepth, 1); } uint get_num_mips() const { return CRNLIB_MAX(m_header.m_numberOfMipmapLevels, 1); } uint get_array_size() const { return CRNLIB_MAX(m_header.m_numberOfArrayElements, 1); } uint get_num_faces() const { return m_header.m_numberOfFaces; } uint32 get_ogl_type() const { return m_header.m_glType; } uint32 get_ogl_fmt() const { return m_header.m_glFormat; } uint32 get_ogl_base_fmt() const { return m_header.m_glBaseInternalFormat; } uint32 get_ogl_internal_fmt() const { return m_header.m_glInternalFormat; } uint get_total_images() const { return get_num_mips() * (get_depth() * get_num_faces() * get_array_size()); } bool is_compressed() const { return m_block_dim > 1; } bool is_uncompressed() const { return !is_compressed(); } bool get_opposite_endianness() const { return m_opposite_endianness; } void set_opposite_endianness(bool flag) { m_opposite_endianness = flag; } uint32 get_block_dim() const { return m_block_dim; } uint32 get_bytes_per_block() const { return m_bytes_per_block; } const ktx_header& get_header() const { return m_header; } // Key values const ktx_key_value_vec& get_key_value_vec() const { return m_key_values; } ktx_key_value_vec& get_key_value_vec() { return m_key_values; } const uint8_vec* find_key(const char* pKey) const; bool get_key_value_as_string(const char* pKey, dynamic_string& str) const; uint add_key_value(const char* pKey, const void* pVal, uint val_size); uint add_key_value(const char* pKey, const char* pVal) { return add_key_value(pKey, pVal, static_cast(strlen(pVal)) + 1); } // Image data uint get_num_images() const { return m_image_data.size(); } const uint8_vec& get_image_data(uint image_index) const { return m_image_data[image_index]; } uint8_vec& get_image_data(uint image_index) { return m_image_data[image_index]; } const uint8_vec& get_image_data(uint mip_index, uint array_index, uint face_index, uint zslice_index) const { return get_image_data(get_image_index(mip_index, array_index, face_index, zslice_index)); } uint8_vec& get_image_data(uint mip_index, uint array_index, uint face_index, uint zslice_index) { return get_image_data(get_image_index(mip_index, array_index, face_index, zslice_index)); } const ktx_image_data_vec& get_image_data_vec() const { return m_image_data; } ktx_image_data_vec& get_image_data_vec() { return m_image_data; } void add_image(uint face_index, uint mip_index, const void* pImage, uint image_size) { const uint image_index = get_image_index(mip_index, 0, face_index, 0); if (image_index >= m_image_data.size()) m_image_data.resize(image_index + 1); if (image_size) { uint8_vec& v = m_image_data[image_index]; v.resize(image_size); memcpy(&v[0], pImage, image_size); } } uint get_image_index(uint mip_index, uint array_index, uint face_index, uint zslice_index) const { CRNLIB_ASSERT((mip_index < get_num_mips()) && (array_index < get_array_size()) && (face_index < get_num_faces()) && (zslice_index < get_depth())); return zslice_index + (face_index * get_depth()) + (array_index * (get_depth() * get_num_faces())) + (mip_index * (get_depth() * get_num_faces() * get_array_size())); } void get_mip_dim(uint mip_index, uint& mip_width, uint& mip_height) const { CRNLIB_ASSERT(mip_index < get_num_mips()); mip_width = CRNLIB_MAX(get_width() >> mip_index, 1); mip_height = CRNLIB_MAX(get_height() >> mip_index, 1); } void get_mip_dim(uint mip_index, uint& mip_width, uint& mip_height, uint& mip_depth) const { CRNLIB_ASSERT(mip_index < get_num_mips()); mip_width = CRNLIB_MAX(get_width() >> mip_index, 1); mip_height = CRNLIB_MAX(get_height() >> mip_index, 1); mip_depth = CRNLIB_MAX(get_depth() >> mip_index, 1); } private: ktx_header m_header; ktx_key_value_vec m_key_values; ktx_image_data_vec m_image_data; uint32 m_block_dim; uint32 m_bytes_per_block; bool m_opposite_endianness; bool compute_pixel_info(); }; } // namespace crnlib #endif // #ifndef _KTX_TEXTURE_H_ DaemonEngine-crunch-ef4d32f/crnlib/crn_lzma_codec.cpp000066400000000000000000000072121503722002600227400ustar00rootroot00000000000000// File: crn_lzma_codec.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_lzma_codec.h" #include "crn_strutils.h" #include "crn_checksum.h" #include "lzma_LzmaLib.h" #include "crn_threading.h" namespace crnlib { lzma_codec::lzma_codec() : m_pCompress(LzmaCompress), m_pUncompress(LzmaUncompress) { CRNLIB_ASSUME(cLZMAPropsSize == LZMA_PROPS_SIZE); } lzma_codec::~lzma_codec() { } bool lzma_codec::pack(const void* p, uint n, crnlib::vector& buf) { if (n > 1024U * 1024U * 1024U) return false; uint max_comp_size = n + math::maximum(128, n >> 8); buf.resize(sizeof(header) + max_comp_size); header* pHDR = reinterpret_cast(&buf[0]); uint8* pComp_data = &buf[sizeof(header)]; utils::zero_object(*pHDR); pHDR->m_uncomp_size = n; pHDR->m_adler32 = adler32(p, n); if (n) { size_t destLen = 0; size_t outPropsSize = 0; int status = SZ_ERROR_INPUT_EOF; for (uint trial = 0; trial < 3; trial++) { destLen = max_comp_size; outPropsSize = cLZMAPropsSize; status = (*m_pCompress)(pComp_data, &destLen, reinterpret_cast(p), n, pHDR->m_lzma_props, &outPropsSize, -1, /* 0 <= level <= 9, default = 5 */ 0, /* default = (1 << 24) */ -1, /* 0 <= lc <= 8, default = 3 */ -1, /* 0 <= lp <= 4, default = 0 */ -1, /* 0 <= pb <= 4, default = 2 */ -1, /* 5 <= fb <= 273, default = 32 */ (g_number_of_processors > 1) ? 2 : 1 ); if (status != SZ_ERROR_OUTPUT_EOF) break; max_comp_size += ((n + 1) / 2); buf.resize(sizeof(header) + max_comp_size); pHDR = reinterpret_cast(&buf[0]); pComp_data = &buf[sizeof(header)]; } if (status != SZ_OK) { buf.clear(); return false; } pHDR->m_comp_size = static_cast(destLen); buf.resize(CRNLIB_SIZEOF_U32(header) + static_cast(destLen)); } pHDR->m_sig = header::cSig; pHDR->m_checksum = static_cast(adler32((uint8*)pHDR + header::cChecksumSkipBytes, sizeof(header) - header::cChecksumSkipBytes)); return true; } bool lzma_codec::unpack(const void* p, uint n, crnlib::vector& buf) { buf.resize(0); if (n < sizeof(header)) return false; const header& hdr = *static_cast(p); if (hdr.m_sig != header::cSig) return false; if (static_cast(adler32((const uint8*)&hdr + header::cChecksumSkipBytes, sizeof(hdr) - header::cChecksumSkipBytes)) != hdr.m_checksum) return false; if (!hdr.m_uncomp_size) return true; if (!hdr.m_comp_size) return false; if (hdr.m_uncomp_size > 1024U * 1024U * 1024U) return false; if (!buf.try_resize(hdr.m_uncomp_size)) return false; const uint8* pComp_data = static_cast(p) + sizeof(header); size_t srcLen = n - sizeof(header); if (srcLen < hdr.m_comp_size) return false; size_t destLen = hdr.m_uncomp_size; int status = (*m_pUncompress)(&buf[0], &destLen, pComp_data, &srcLen, hdr.m_lzma_props, cLZMAPropsSize); if ((status != SZ_OK) || (destLen != hdr.m_uncomp_size)) { buf.clear(); return false; } if (adler32(&buf[0], buf.size()) != hdr.m_adler32) { buf.clear(); return false; } return true; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_lzma_codec.h000066400000000000000000000046721503722002600224140ustar00rootroot00000000000000// File: crn_lzma_codec.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_packed_uint.h" namespace crnlib { class lzma_codec { public: lzma_codec(); ~lzma_codec(); // Always available, because we're statically linking in lzmalib now vs. dynamically loading the DLL. bool is_initialized() const { return true; } bool pack(const void* p, uint n, crnlib::vector& buf); bool unpack(const void* p, uint n, crnlib::vector& buf); private: typedef int(CRNLIB_STDCALL* LzmaCompressFuncPtr)(unsigned char* dest, size_t* destLen, const unsigned char* src, size_t srcLen, unsigned char* outProps, size_t* outPropsSize, /* *outPropsSize must be = 5 */ int level, /* 0 <= level <= 9, default = 5 */ unsigned dictSize, /* default = (1 << 24) */ int lc, /* 0 <= lc <= 8, default = 3 */ int lp, /* 0 <= lp <= 4, default = 0 */ int pb, /* 0 <= pb <= 4, default = 2 */ int fb, /* 5 <= fb <= 273, default = 32 */ int numThreads /* 1 or 2, default = 2 */ ); typedef int(CRNLIB_STDCALL* LzmaUncompressFuncPtr)(unsigned char* dest, size_t* destLen, const unsigned char* src, size_t* srcLen, const unsigned char* props, size_t propsSize); LzmaCompressFuncPtr m_pCompress; LzmaUncompressFuncPtr m_pUncompress; enum { cLZMAPropsSize = 5 }; #pragma pack(push) #pragma pack(1) struct header { enum { cSig = 'L' | ('0' << 8), cChecksumSkipBytes = 3 }; packed_uint<2> m_sig; uint8 m_checksum; uint8 m_lzma_props[cLZMAPropsSize]; packed_uint<4> m_comp_size; packed_uint<4> m_uncomp_size; packed_uint<4> m_adler32; }; #pragma pack(pop) }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_math.cpp000066400000000000000000000031611503722002600215700ustar00rootroot00000000000000// File: crn_math.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" namespace crnlib { namespace math { uint g_bitmasks[32] = { 1U << 0U, 1U << 1U, 1U << 2U, 1U << 3U, 1U << 4U, 1U << 5U, 1U << 6U, 1U << 7U, 1U << 8U, 1U << 9U, 1U << 10U, 1U << 11U, 1U << 12U, 1U << 13U, 1U << 14U, 1U << 15U, 1U << 16U, 1U << 17U, 1U << 18U, 1U << 19U, 1U << 20U, 1U << 21U, 1U << 22U, 1U << 23U, 1U << 24U, 1U << 25U, 1U << 26U, 1U << 27U, 1U << 28U, 1U << 29U, 1U << 30U, 1U << 31U}; double compute_entropy(const uint8* p, uint n) { uint hist[256]; utils::zero_object(hist); for (uint i = 0; i < n; i++) hist[*p++]++; double entropy = 0.0f; const double invln2 = 1.0f / log(2.0f); for (uint i = 0; i < 256; i++) { if (!hist[i]) continue; double prob = static_cast(hist[i]) / n; entropy += (-log(prob) * invln2) * hist[i]; } return entropy; } void compute_lower_pow2_dim(int& width, int& height) { const int tex_width = width; const int tex_height = height; width = 1; for (;;) { if ((width * 2) > tex_width) break; width *= 2; } height = 1; for (;;) { if ((height * 2) > tex_height) break; height *= 2; } } void compute_upper_pow2_dim(int& width, int& height) { if (!math::is_power_of_2((uint32)width)) width = math::next_pow2((uint32)width); if (!math::is_power_of_2((uint32)height)) height = math::next_pow2((uint32)height); } } // namespace math } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_math.h000066400000000000000000000134701503722002600212410ustar00rootroot00000000000000// File: crn_math.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #if defined(_M_IX86) && defined(_MSC_VER) #include #pragma intrinsic(__emulu) unsigned __int64 __emulu(unsigned int a, unsigned int b); #endif namespace crnlib { namespace math { const float cNearlyInfinite = 1.0e+37f; const float cDegToRad = 0.01745329252f; const float cRadToDeg = 57.29577951f; extern uint g_bitmasks[32]; template inline bool within_closed_range(T a, T b, T c) { return (a >= b) && (a <= c); } template inline bool within_open_range(T a, T b, T c) { return (a >= b) && (a < c); } // Yes I know these should probably be pass by ref, not val: // http://www.stepanovpapers.com/notes.pdf // Just don't use them on non-simple (non built-in) types! template inline T minimum(T a, T b) { return (a < b) ? a : b; } template inline T minimum(T a, T b, T c) { return minimum(minimum(a, b), c); } template inline T maximum(T a, T b) { return (a > b) ? a : b; } template inline T maximum(T a, T b, T c) { return maximum(maximum(a, b), c); } template inline T lerp(T a, T b, U c) { return a + (b - a) * c; } template inline T clamp(T value, T low, T high) { return (value < low) ? low : ((value > high) ? high : value); } template inline T saturate(T value) { return (value < 0.0f) ? 0.0f : ((value > 1.0f) ? 1.0f : value); } inline int float_to_int(float f) { return static_cast(f); } inline uint float_to_uint(float f) { return static_cast(f); } inline int float_to_int(double f) { return static_cast(f); } inline uint float_to_uint(double f) { return static_cast(f); } inline int float_to_int_round(float f) { return static_cast((f < 0.0f) ? -floor(-f + .5f) : floor(f + .5f)); } inline uint float_to_uint_round(float f) { return static_cast((f < 0.0f) ? 0.0f : floor(f + .5f)); } template inline int sign(T value) { return (value < 0) ? -1 : ((value > 0) ? 1 : 0); } template inline T square(T value) { return value * value; } inline bool is_power_of_2(uint32 x) { return x && ((x & (x - 1U)) == 0U); } inline bool is_power_of_2(uint64 x) { return x && ((x & (x - 1U)) == 0U); } template inline T align_up_value(T x, uint alignment) { CRNLIB_ASSERT(is_power_of_2(alignment)); uint q = static_cast(x); q = (q + alignment - 1) & (~(alignment - 1)); return static_cast(q); } template inline T align_down_value(T x, uint alignment) { CRNLIB_ASSERT(is_power_of_2(alignment)); uint q = static_cast(x); q = q & (~(alignment - 1)); return static_cast(q); } template inline T get_align_up_value_delta(T x, uint alignment) { return align_up_value(x, alignment) - x; } // From "Hackers Delight" inline uint32 next_pow2(uint32 val) { val--; val |= val >> 16; val |= val >> 8; val |= val >> 4; val |= val >> 2; val |= val >> 1; return val + 1; } inline uint64 next_pow2(uint64 val) { val--; val |= val >> 32; val |= val >> 16; val |= val >> 8; val |= val >> 4; val |= val >> 2; val |= val >> 1; return val + 1; } inline uint floor_log2i(uint v) { uint l = 0; while (v > 1U) { v >>= 1; l++; } return l; } inline uint ceil_log2i(uint v) { uint l = floor_log2i(v); if ((l != cIntBits) && (v > (1U << l))) l++; return l; } // Returns the total number of bits needed to encode v. inline uint total_bits(uint v) { uint l = 0; while (v > 0U) { v >>= 1; l++; } return l; } // Actually counts the number of set bits, but hey inline uint bitmask_size(uint mask) { uint size = 0; while (mask) { mask &= (mask - 1U); size++; } return size; } inline uint bitmask_ofs(uint mask) { if (!mask) return 0; uint ofs = 0; while ((mask & 1U) == 0) { mask >>= 1U; ofs++; } return ofs; } // See Bit Twiddling Hacks (public domain) // http://www-graphics.stanford.edu/~seander/bithacks.html inline uint count_trailing_zero_bits(uint v) { uint c = 32; // c will be the number of zero bits on the right static const unsigned int B[] = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF, 0x0000FFFF}; static const unsigned int S[] = {1, 2, 4, 8, 16}; // Our Magic Binary Numbers for (int i = 4; i >= 0; --i) // unroll for more speed { if (v & B[i]) { v <<= S[i]; c -= S[i]; } } if (v) { c--; } return c; } inline uint count_leading_zero_bits(uint v) { uint temp; uint result = 32U; temp = (v >> 16U); if (temp) { result -= 16U; v = temp; } temp = (v >> 8U); if (temp) { result -= 8U; v = temp; } temp = (v >> 4U); if (temp) { result -= 4U; v = temp; } temp = (v >> 2U); if (temp) { result -= 2U; v = temp; } temp = (v >> 1U); if (temp) { result -= 1U; v = temp; } if (v & 1U) result--; return result; } inline uint64 emulu(uint32 a, uint32 b) { #if defined(_M_IX86) && defined(_MSC_VER) return __emulu(a, b); #else return static_cast(a) * static_cast(b); #endif } double compute_entropy(const uint8* p, uint n); void compute_lower_pow2_dim(int& width, int& height); void compute_upper_pow2_dim(int& width, int& height); inline bool equal_tol(float a, float b, float t) { return fabs(a - b) < ((maximum(fabs(a), fabs(b)) + 1.0f) * t); } inline bool equal_tol(double a, double b, double t) { return fabs(a - b) < ((maximum(fabs(a), fabs(b)) + 1.0f) * t); } } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_matrix.h000066400000000000000000000323321503722002600216120ustar00rootroot00000000000000// File: crn_matrix.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_vec.h" namespace crnlib { template Z& matrix_mul_helper(Z& result, const X& lhs, const Y& rhs) { CRNLIB_ASSUME(Z::num_rows == X::num_rows); CRNLIB_ASSUME(Z::num_cols == Y::num_cols); CRNLIB_ASSUME(X::num_cols == Y::num_rows); CRNLIB_ASSERT((&result != &lhs) && (&result != &rhs)); for (int r = 0; r < X::num_rows; r++) for (int c = 0; c < Y::num_cols; c++) { typename Z::scalar_type s = lhs(r, 0) * rhs(0, c); for (uint i = 1; i < X::num_cols; i++) s += lhs(r, i) * rhs(i, c); result(r, c) = s; } return result; } template Z& matrix_mul_helper_transpose_lhs(Z& result, const X& lhs, const Y& rhs) { CRNLIB_ASSUME(Z::num_rows == X::num_cols); CRNLIB_ASSUME(Z::num_cols == Y::num_cols); CRNLIB_ASSUME(X::num_rows == Y::num_rows); for (int r = 0; r < X::num_cols; r++) for (int c = 0; c < Y::num_cols; c++) { typename Z::scalar_type s = lhs(0, r) * rhs(0, c); for (uint i = 1; i < X::num_rows; i++) s += lhs(i, r) * rhs(i, c); result(r, c) = s; } return result; } template Z& matrix_mul_helper_transpose_rhs(Z& result, const X& lhs, const Y& rhs) { CRNLIB_ASSUME(Z::num_rows == X::num_rows); CRNLIB_ASSUME(Z::num_cols == Y::num_rows); CRNLIB_ASSUME(X::num_cols == Y::num_cols); for (int r = 0; r < X::num_rows; r++) for (int c = 0; c < Y::num_rows; c++) { typename Z::scalar_type s = lhs(r, 0) * rhs(c, 0); for (uint i = 1; i < X::num_cols; i++) s += lhs(r, i) * rhs(c, i); result(r, c) = s; } return result; } template class matrix { public: typedef T scalar_type; enum { num_rows = R, num_cols = C }; typedef vec col_vec; typedef vec<(R > 1) ? (R - 1) : 0, T> subcol_vec; typedef vec row_vec; typedef vec<(C > 1) ? (C - 1) : 0, T> subrow_vec; inline matrix() {} inline matrix(eClear) { clear(); } inline matrix(const T* p) { set(p); } inline matrix(const matrix& other) { for (uint i = 0; i < R; i++) m_rows[i] = other.m_rows[i]; } inline matrix& operator=(const matrix& rhs) { if (this != &rhs) for (uint i = 0; i < R; i++) m_rows[i] = rhs.m_rows[i]; return *this; } inline matrix(T val00, T val01, T val10, T val11) { set(val00, val01, val10, val11); } inline matrix(T val00, T val01, T val02, T val10, T val11, T val12, T val20, T val21, T val22) { set(val00, val01, val02, val10, val11, val12, val20, val21, val22); } inline matrix(T val00, T val01, T val02, T val03, T val10, T val11, T val12, T val13, T val20, T val21, T val22, T val23, T val30, T val31, T val32, T val33) { set(val00, val01, val02, val03, val10, val11, val12, val13, val20, val21, val22, val23, val30, val31, val32, val33); } inline void set(const float* p) { for (uint i = 0; i < R; i++) { m_rows[i].set(p); p += C; } } inline void set(T val00, T val01, T val10, T val11) { m_rows[0].set(val00, val01); if (R >= 2) { m_rows[1].set(val10, val11); for (uint i = 2; i < R; i++) m_rows[i].clear(); } } inline void set(T val00, T val01, T val02, T val10, T val11, T val12, T val20, T val21, T val22) { m_rows[0].set(val00, val01, val02); if (R >= 2) { m_rows[1].set(val10, val11, val12); if (R >= 3) { m_rows[2].set(val20, val21, val22); for (uint i = 3; i < R; i++) m_rows[i].clear(); } } } inline void set(T val00, T val01, T val02, T val03, T val10, T val11, T val12, T val13, T val20, T val21, T val22, T val23, T val30, T val31, T val32, T val33) { m_rows[0].set(val00, val01, val02, val03); if (R >= 2) { m_rows[1].set(val10, val11, val12, val13); if (R >= 3) { m_rows[2].set(val20, val21, val22, val23); if (R >= 4) { m_rows[3].set(val30, val31, val32, val33); for (uint i = 4; i < R; i++) m_rows[i].clear(); } } } } inline T operator()(uint r, uint c) const { CRNLIB_ASSERT((r < R) && (c < C)); return m_rows[r][c]; } inline T& operator()(uint r, uint c) { CRNLIB_ASSERT((r < R) && (c < C)); return m_rows[r][c]; } inline const row_vec& operator[](uint r) const { CRNLIB_ASSERT(r < R); return m_rows[r]; } inline row_vec& operator[](uint r) { CRNLIB_ASSERT(r < R); return m_rows[r]; } inline const row_vec& get_row(uint r) const { return (*this)[r]; } inline row_vec& get_row(uint r) { return (*this)[r]; } inline col_vec get_col(uint c) const { CRNLIB_ASSERT(c < C); col_vec result; for (uint i = 0; i < R; i++) result[i] = m_rows[i][c]; return result; } inline void set_col(uint c, const col_vec& col) { CRNLIB_ASSERT(c < C); for (uint i = 0; i < R; i++) m_rows[i][c] = col[i]; } inline void set_col(uint c, const subcol_vec& col) { CRNLIB_ASSERT(c < C); for (uint i = 0; i < (R - 1); i++) m_rows[i][c] = col[i]; m_rows[R - 1][c] = 0.0f; } inline const row_vec& get_translate() const { return m_rows[R - 1]; } inline matrix& set_translate(const row_vec& r) { m_rows[R - 1] = r; return *this; } inline matrix& set_translate(const subrow_vec& r) { m_rows[R - 1] = row_vec(r).as_point(); return *this; } inline const T* get_ptr() const { return reinterpret_cast(&m_rows[0]); } inline T* get_ptr() { return reinterpret_cast(&m_rows[0]); } inline matrix& operator+=(const matrix& other) { for (uint i = 0; i < R; i++) m_rows[i] += other.m_rows[i]; return *this; } inline matrix& operator-=(const matrix& other) { for (uint i = 0; i < R; i++) m_rows[i] -= other.m_rows[i]; return *this; } inline matrix& operator*=(T val) { for (uint i = 0; i < R; i++) m_rows[i] *= val; return *this; } inline matrix& operator/=(T val) { for (uint i = 0; i < R; i++) m_rows[i] /= val; return *this; } inline matrix& operator*=(const matrix& other) { matrix result; matrix_mul_helper(result, *this, other); *this = result; return *this; } friend inline matrix operator+(const matrix& lhs, const matrix& rhs) { matrix result; for (uint i = 0; i < R; i++) result[i] = lhs.m_rows[i] + rhs.m_rows[i]; return result; } friend inline matrix operator-(const matrix& lhs, const matrix& rhs) { matrix result; for (uint i = 0; i < R; i++) result[i] = lhs.m_rows[i] - rhs.m_rows[i]; return result; } friend inline matrix operator*(const matrix& lhs, T val) { matrix result; for (uint i = 0; i < R; i++) result[i] = lhs.m_rows[i] * val; return result; } friend inline matrix operator/(const matrix& lhs, T val) { matrix result; for (uint i = 0; i < R; i++) result[i] = lhs.m_rows[i] / val; return result; } friend inline matrix operator*(T val, const matrix& rhs) { matrix result; for (uint i = 0; i < R; i++) result[i] = val * rhs.m_rows[i]; return result; } friend inline matrix operator*(const matrix& lhs, const matrix& rhs) { matrix result; return matrix_mul_helper(result, lhs, rhs); } friend inline row_vec operator*(const col_vec& a, const matrix& b) { return transform(a, b); } inline matrix operator+() const { return *this; } inline matrix operator-() const { matrix result; for (uint i = 0; i < R; i++) result[i] = -m_rows[i]; return result; } inline void clear(void) { for (uint i = 0; i < R; i++) m_rows[i].clear(); } inline void set_zero_matrix() { clear(); } inline void set_identity_matrix() { for (uint i = 0; i < R; i++) { m_rows[i].clear(); m_rows[i][i] = 1.0f; } } inline matrix& set_scale_matrix(float s) { clear(); for (int i = 0; i < (R - 1); i++) m_rows[i][i] = s; m_rows[R - 1][C - 1] = 1.0f; return *this; } inline matrix& set_scale_matrix(const row_vec& s) { clear(); for (uint i = 0; i < R; i++) m_rows[i][i] = s[i]; return *this; } inline matrix& set_translate_matrix(const row_vec& s) { set_identity_matrix(); set_translate(s); return *this; } inline matrix& set_translate_matrix(float x, float y) { set_identity_matrix(); set_translate(row_vec(x, y).as_point()); return *this; } inline matrix& set_translate_matrix(float x, float y, float z) { set_identity_matrix(); set_translate(row_vec(x, y, z).as_point()); return *this; } inline matrix get_transposed(void) const { matrix result; for (uint i = 0; i < R; i++) for (uint j = 0; j < C; j++) result.m_rows[i][j] = m_rows[j][i]; return result; } inline matrix& transpose_in_place(void) { matrix result; for (uint i = 0; i < R; i++) for (uint j = 0; j < C; j++) result.m_rows[i][j] = m_rows[j][i]; *this = result; return *this; } // This method transforms a column vec by a matrix (D3D-style). static inline row_vec transform(const col_vec& a, const matrix& b) { row_vec result(b[0] * a[0]); for (uint r = 1; r < R; r++) result += b[r] * a[r]; return result; } // This method transforms a column vec by a matrix. Last component of vec is assumed to be 1. static inline row_vec transform_point(const col_vec& a, const matrix& b) { row_vec result(0); for (int r = 0; r < (R - 1); r++) result += b[r] * a[r]; result += b[R - 1]; return result; } // This method transforms a column vec by a matrix. Last component of vec is assumed to be 0. static inline row_vec transform_vector(const col_vec& a, const matrix& b) { row_vec result(0); for (int r = 0; r < (R - 1); r++) result += b[r] * a[r]; return result; } static inline subcol_vec transform_point(const subcol_vec& a, const matrix& b) { subcol_vec result(0); for (int r = 0; r < R; r++) { const T s = (r < subcol_vec::num_elements) ? a[r] : 1.0f; for (int c = 0; c < (C - 1); c++) result[c] += b[r][c] * s; } return result; } static inline subcol_vec transform_vector(const subcol_vec& a, const matrix& b) { subcol_vec result(0); for (int r = 0; r < (R - 1); r++) { const T s = a[r]; for (int c = 0; c < (C - 1); c++) result[c] += b[r][c] * s; } return result; } // This method transforms a column vec by the transpose of a matrix. static inline col_vec transform_transposed(const matrix& b, const col_vec& a) { CRNLIB_ASSUME(R == C); col_vec result; for (uint r = 0; r < R; r++) result[r] = b[r] * a; return result; } // This method transforms a column vec by the transpose of a matrix. Last component of vec is assumed to be 0. static inline col_vec transform_vector_transposed(const matrix& b, const col_vec& a) { CRNLIB_ASSUME(R == C); col_vec result; for (uint r = 0; r < R; r++) { T s = 0; for (uint c = 0; c < (C - 1); c++) s += b[r][c] * a[c]; result[r] = s; } return result; } // This method transforms a matrix by a row vector (OGL style). static inline col_vec transform(const matrix& b, const row_vec& a) { col_vec result; for (int r = 0; r < R; r++) result[r] = b[r] * a; return result; } static inline matrix& multiply(matrix& result, const matrix& lhs, const matrix& rhs) { return matrix_mul_helper(result, lhs, rhs); } static inline matrix make_scale_matrix(float s) { return matrix().set_scale_matrix(s); } static inline matrix make_scale_matrix(const row_vec& s) { return matrix().set_scale_matrix(s); } static inline matrix make_scale_matrix(float x, float y) { CRNLIB_ASSUME(R >= 3 && C >= 3); matrix result; result.clear(); result.m_rows[0][0] = x; result.m_rows[1][1] = y; result.m_rows[2][2] = 1.0f; return result; } static inline matrix make_scale_matrix(float x, float y, float z) { CRNLIB_ASSUME(R >= 4 && C >= 4); matrix result; result.clear(); result.m_rows[0][0] = x; result.m_rows[1][1] = y; result.m_rows[2][2] = z; result.m_rows[3][3] = 1.0f; return result; } private: row_vec m_rows[R]; }; typedef matrix<2, 2, float> matrix22F; typedef matrix<2, 2, double> matrix22D; typedef matrix<3, 3, float> matrix33F; typedef matrix<3, 3, double> matrix33D; typedef matrix<4, 4, float> matrix44F; typedef matrix<4, 4, double> matrix44D; typedef matrix<8, 8, float> matrix88F; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_mem.cpp000066400000000000000000000176311503722002600214240ustar00rootroot00000000000000// File: crn_mem.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_console.h" #include "../inc/crnlib.h" #if defined(__FreeBSD__) // has been replaced by #include // for malloc_usable_size #elif defined(__APPLE__) #include #else #include #endif #if CRNLIB_USE_WIN32_API #include "crn_winhdr.h" #endif #define CRNLIB_MEM_STATS 0 #if defined(__APPLE__) #define _msize malloc_size #elif !CRNLIB_USE_WIN32_API #define _msize malloc_usable_size #endif namespace crnlib { #if CRNLIB_MEM_STATS #if CRNLIB_64BIT_POINTERS typedef LONGLONG mem_stat_t; #define CRNLIB_MEM_COMPARE_EXCHANGE InterlockedCompareExchange64 #else typedef LONG mem_stat_t; #define CRNLIB_MEM_COMPARE_EXCHANGE InterlockedCompareExchange #endif static volatile mem_stat_t g_total_blocks; static volatile mem_stat_t g_total_allocated; static volatile mem_stat_t g_max_allocated; static mem_stat_t update_total_allocated(int block_delta, mem_stat_t byte_delta) { mem_stat_t cur_total_blocks; for (;;) { cur_total_blocks = (mem_stat_t)g_total_blocks; mem_stat_t new_total_blocks = static_cast(cur_total_blocks + block_delta); CRNLIB_ASSERT(new_total_blocks >= 0); if (CRNLIB_MEM_COMPARE_EXCHANGE(&g_total_blocks, new_total_blocks, cur_total_blocks) == cur_total_blocks) break; } mem_stat_t cur_total_allocated, new_total_allocated; for (;;) { cur_total_allocated = g_total_allocated; new_total_allocated = static_cast(cur_total_allocated + byte_delta); CRNLIB_ASSERT(new_total_allocated >= 0); if (CRNLIB_MEM_COMPARE_EXCHANGE(&g_total_allocated, new_total_allocated, cur_total_allocated) == cur_total_allocated) break; } for (;;) { mem_stat_t cur_max_allocated = g_max_allocated; mem_stat_t new_max_allocated = CRNLIB_MAX(new_total_allocated, cur_max_allocated); if (CRNLIB_MEM_COMPARE_EXCHANGE(&g_max_allocated, new_max_allocated, cur_max_allocated) == cur_max_allocated) break; } return new_total_allocated; } #endif // CRNLIB_MEM_STATS static void* crnlib_default_realloc(void* p, size_t size, size_t* pActual_size, bool movable, void*) { void* p_new; if (!p) { #if defined(__FreeBSD__) // See https://github.com/DaemonEngine/crunch/pull/36 p_new = ::aligned_alloc(CRNLIB_MIN_ALLOC_ALIGNMENT, size); #else p_new = ::malloc(size); #endif CRNLIB_ASSERT((reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0); if (!p_new) { printf("WARNING: ::malloc() of size %u failed!\n", (uint)size); } if (pActual_size) *pActual_size = p_new ? ::_msize(p_new) : 0; } else if (!size) { ::free(p); p_new = NULL; if (pActual_size) *pActual_size = 0; } else { void* p_final_block = p; #ifdef WIN32 p_new = ::_expand(p, size); #else p_new = NULL; #endif if (p_new) { CRNLIB_ASSERT((reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0); p_final_block = p_new; } else if (movable) { p_new = ::realloc(p, size); if (p_new) { CRNLIB_ASSERT((reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0); p_final_block = p_new; } else { printf("WARNING: ::realloc() of size %u failed!\n", (uint)size); } } if (pActual_size) *pActual_size = ::_msize(p_final_block); } return p_new; } static size_t crnlib_default_msize(void* p, void*) { return p ? _msize(p) : 0; } static crn_realloc_func g_pRealloc = crnlib_default_realloc; static crn_msize_func g_pMSize = crnlib_default_msize; static void* g_pUser_data; void crnlib_mem_error(const char* p_msg) { crnlib_assert(p_msg, __FILE__, __LINE__); } void* crnlib_malloc(size_t size) { return crnlib_malloc(size, NULL); } void* crnlib_malloc(size_t size, size_t* pActual_size) { size = (size + sizeof(uint32) - 1U) & ~(sizeof(uint32) - 1U); if (!size) size = sizeof(uint32); if (size > CRNLIB_MAX_POSSIBLE_BLOCK_SIZE) { crnlib_mem_error("crnlib_malloc: size too big"); return NULL; } size_t actual_size = size; uint8* p_new = static_cast((*g_pRealloc)(NULL, size, &actual_size, true, g_pUser_data)); if (pActual_size) *pActual_size = actual_size; if ((!p_new) || (actual_size < size)) { crnlib_mem_error("crnlib_malloc: out of memory"); return NULL; } CRNLIB_ASSERT((reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0); #if CRNLIB_MEM_STATS CRNLIB_ASSERT((*g_pMSize)(p_new, g_pUser_data) == actual_size); update_total_allocated(1, static_cast(actual_size)); #endif return p_new; } void* crnlib_realloc(void* p, size_t size, size_t* pActual_size, bool movable) { if ((ptr_bits_t)p & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) { crnlib_mem_error("crnlib_realloc: bad ptr"); return NULL; } if (size > CRNLIB_MAX_POSSIBLE_BLOCK_SIZE) { crnlib_mem_error("crnlib_malloc: size too big"); return NULL; } #if CRNLIB_MEM_STATS size_t cur_size = p ? (*g_pMSize)(p, g_pUser_data) : 0; CRNLIB_ASSERT(!p || (cur_size >= sizeof(uint32))); #endif if ((size) && (size < sizeof(uint32))) size = sizeof(uint32); size_t actual_size = size; void* p_new = (*g_pRealloc)(p, size, &actual_size, movable, g_pUser_data); if (pActual_size) *pActual_size = actual_size; CRNLIB_ASSERT((reinterpret_cast(p_new) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) == 0); #if CRNLIB_MEM_STATS CRNLIB_ASSERT(!p_new || ((*g_pMSize)(p_new, g_pUser_data) == actual_size)); int num_new_blocks = 0; if (p) { if (!p_new) num_new_blocks = -1; } else if (p_new) { num_new_blocks = 1; } update_total_allocated(num_new_blocks, static_cast(actual_size) - static_cast(cur_size)); #endif return p_new; } void* crnlib_calloc(size_t count, size_t size, size_t* pActual_size) { size_t total = count * size; void* p = crnlib_malloc(total, pActual_size); if (p) memset(p, 0, total); return p; } void crnlib_free(void* p) { if (!p) return; if (reinterpret_cast(p) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) { crnlib_mem_error("crnlib_free: bad ptr"); return; } #if CRNLIB_MEM_STATS size_t cur_size = (*g_pMSize)(p, g_pUser_data); CRNLIB_ASSERT(cur_size >= sizeof(uint32)); update_total_allocated(-1, -static_cast(cur_size)); #endif (*g_pRealloc)(p, 0, NULL, true, g_pUser_data); } size_t crnlib_msize(void* p) { if (!p) return 0; if (reinterpret_cast(p) & (CRNLIB_MIN_ALLOC_ALIGNMENT - 1)) { crnlib_mem_error("crnlib_msize: bad ptr"); return 0; } return (*g_pMSize)(p, g_pUser_data); } void crnlib_print_mem_stats() { #if CRNLIB_MEM_STATS if (console::is_initialized()) { console::debug("crnlib_print_mem_stats:"); console::debug("Current blocks: %u, allocated: " CRNLIB_INT64_FORMAT_SPECIFIER ", max ever allocated: " CRNLIB_INT64_FORMAT_SPECIFIER, g_total_blocks, (int64)g_total_allocated, (int64)g_max_allocated); } else { printf("crnlib_print_mem_stats:\n"); printf("Current blocks: %u, allocated: " CRNLIB_INT64_FORMAT_SPECIFIER ", max ever allocated: " CRNLIB_INT64_FORMAT_SPECIFIER "\n", g_total_blocks, (int64)g_total_allocated, (int64)g_max_allocated); } #endif } } // namespace crnlib void crn_set_memory_callbacks(crn_realloc_func pRealloc, crn_msize_func pMSize, void* pUser_data) { if ((!pRealloc) || (!pMSize)) { crnlib::g_pRealloc = crnlib::crnlib_default_realloc; crnlib::g_pMSize = crnlib::crnlib_default_msize; crnlib::g_pUser_data = NULL; } else { crnlib::g_pRealloc = pRealloc; crnlib::g_pMSize = pMSize; crnlib::g_pUser_data = pUser_data; } } DaemonEngine-crunch-ef4d32f/crnlib/crn_mem.h000066400000000000000000000177621503722002600210760ustar00rootroot00000000000000// File: crn_mem.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #ifndef CRNLIB_MIN_ALLOC_ALIGNMENT #define CRNLIB_MIN_ALLOC_ALIGNMENT sizeof(size_t) * 2 #endif namespace crnlib { #if CRNLIB_64BIT_POINTERS const uint64 CRNLIB_MAX_POSSIBLE_BLOCK_SIZE = 0x400000000ULL; #else const uint32 CRNLIB_MAX_POSSIBLE_BLOCK_SIZE = 0x7FFF0000U; #endif void* crnlib_malloc(size_t size); void* crnlib_malloc(size_t size, size_t* pActual_size); void* crnlib_realloc(void* p, size_t size, size_t* pActual_size = NULL, bool movable = true); void* crnlib_calloc(size_t count, size_t size, size_t* pActual_size = NULL); void crnlib_free(void* p); size_t crnlib_msize(void* p); void crnlib_print_mem_stats(); void crnlib_mem_error(const char* p_msg); // omfg - there must be a better way template inline T* crnlib_new() { T* p = static_cast(crnlib_malloc(sizeof(T))); if (CRNLIB_IS_SCALAR_TYPE(T)) return p; return helpers::construct(p); } template inline T* crnlib_new(const A& init0) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0); } template inline T* crnlib_new(A& init0) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0); } template inline T* crnlib_new(const A& init0, const B& init1) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1); } template inline T* crnlib_new(const A& init0, const B& init1, const C& init2) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1, init2); } template inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1, init2, init3); } template inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1, init2, init3, init4); } template inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5); } template inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6); } template inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7); } template inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7, const I& init8) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7, init8); } template inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7, const I& init8, const J& init9) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7, init8, init9); } template inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7, const I& init8, const J& init9, const K& init10) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7, init8, init9, init10); } template inline T* crnlib_new(const A& init0, const B& init1, const C& init2, const D& init3, const E& init4, const F& init5, const G& init6, const H& init7, const I& init8, const J& init9, const K& init10, const L& init11) { T* p = static_cast(crnlib_malloc(sizeof(T))); return new (static_cast(p)) T(init0, init1, init2, init3, init4, init5, init6, init7, init8, init9, init10, init11); } template inline T* crnlib_new_array(uint32 num) { if (!num) num = 1; uint64 total = CRNLIB_MIN_ALLOC_ALIGNMENT + sizeof(T) * num; if (total > CRNLIB_MAX_POSSIBLE_BLOCK_SIZE) { crnlib_mem_error("crnlib_new_array: Array too large!"); return NULL; } uint8* q = static_cast(crnlib_malloc(static_cast(total))); T* p = reinterpret_cast(q + CRNLIB_MIN_ALLOC_ALIGNMENT); reinterpret_cast(p)[-1] = num; reinterpret_cast(p)[-2] = ~num; if (!CRNLIB_IS_SCALAR_TYPE(T)) { helpers::construct_array(p, num); } return p; } template inline void crnlib_delete(T* p) { if (p) { if (!CRNLIB_IS_SCALAR_TYPE(T)) { helpers::destruct(p); } crnlib_free(p); } } template inline void crnlib_delete_array(T* p) { if (p) { const uint32 num = reinterpret_cast(p)[-1]; const uint32 num_check = reinterpret_cast(p)[-2]; CRNLIB_ASSERT(num && (num == ~num_check)); if (num == ~num_check) { if (!CRNLIB_IS_SCALAR_TYPE(T)) { helpers::destruct_array(p, num); } crnlib_free(reinterpret_cast(p) - CRNLIB_MIN_ALLOC_ALIGNMENT); } } } } // namespace crnlib #define CRNLIB_DEFINE_NEW_DELETE \ void* operator new(size_t size) { \ void* p = crnlib::crnlib_malloc(size); \ if (!p) \ crnlib_fail("new: Out of memory!", __FILE__, __LINE__); \ return p; \ } \ void* operator new[](size_t size) { \ void* p = crnlib::crnlib_malloc(size); \ if (!p) \ crnlib_fail("new[]: Out of memory!", __FILE__, __LINE__); \ return p; \ } \ void operator delete(void* p_block) { \ crnlib::crnlib_free(p_block); \ } \ void operator delete[](void* p_block) { \ crnlib::crnlib_free(p_block); \ } DaemonEngine-crunch-ef4d32f/crnlib/crn_miniz.cpp000066400000000000000000005546041503722002600220020ustar00rootroot00000000000000// File: crn_miniz.cpp #include "crn_core.h" #include "crn_miniz.h" // ------------------- End of Header: Implementation follows. (If you only want the header, define MINIZ_HEADER_FILE_ONLY.) #ifndef MINIZ_HEADER_FILE_ONLY typedef unsigned char mz_validate_uint16[sizeof(mz_uint16) == 2 ? 1 : -1]; typedef unsigned char mz_validate_uint32[sizeof(mz_uint32) == 4 ? 1 : -1]; typedef unsigned char mz_validate_uint64[sizeof(mz_uint64) == 8 ? 1 : -1]; #include #include #include "crn_core.h" #define MZ_ASSERT(x) assert(x) #ifdef MINIZ_NO_MALLOC #define MZ_MALLOC(x) NULL #define MZ_FREE(x) (void)x, ((void)0) #define MZ_REALLOC(p, x) NULL #else #define MZ_MALLOC(x) crnlib::crnlib_malloc(x) #define MZ_FREE(x) crnlib::crnlib_free(x) #define MZ_REALLOC(p, x) crnlib::crnlib_realloc(p, x) #endif #define MZ_MAX(a, b) (((a) > (b)) ? (a) : (b)) #define MZ_MIN(a, b) (((a) < (b)) ? (a) : (b)) #define MZ_CLEAR_OBJ(obj) memset(&(obj), 0, sizeof(obj)) #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN #define MZ_READ_LE16(p) *((const mz_uint16*)(p)) #define MZ_READ_LE32(p) *((const mz_uint32*)(p)) #else #define MZ_READ_LE16(p) ((mz_uint32)(((const mz_uint8*)(p))[0]) | ((mz_uint32)(((const mz_uint8*)(p))[1]) << 8U)) #define MZ_READ_LE32(p) ((mz_uint32)(((const mz_uint8*)(p))[0]) | ((mz_uint32)(((const mz_uint8*)(p))[1]) << 8U) | ((mz_uint32)(((const mz_uint8*)(p))[2]) << 16U) | ((mz_uint32)(((const mz_uint8*)(p))[3]) << 24U)) #endif #ifdef _MSC_VER #define MZ_FORCEINLINE __forceinline #elif defined(__GNUC__) #define MZ_FORCEINLINE __attribute__((__always_inline__)) inline #else #define MZ_FORCEINLINE #endif #ifdef __cplusplus extern "C" { #endif // ------------------- zlib-style API's mz_ulong mz_adler32(mz_ulong adler, const unsigned char* ptr, size_t buf_len) { mz_uint32 i, s1 = (mz_uint32)(adler & 0xffff), s2 = (mz_uint32)(adler >> 16); size_t block_len = buf_len % 5552; if (!ptr) return MZ_ADLER32_INIT; while (buf_len) { for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1; s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1; } for (; i < block_len; ++i) s1 += *ptr++, s2 += s1; s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; } return (s2 << 16) + s1; } // Karl Malbrain's compact CRC-32. See "A compact CCITT crc16 and crc32 C implementation that balances processor cache usage against speed": https://www.oocities.com/malbrain/ mz_ulong mz_crc32(mz_ulong crc, const mz_uint8* ptr, size_t buf_len) { static const mz_uint32 s_crc32[16] = {0, 0x1db71064, 0x3b6e20c8, 0x26d930ac, 0x76dc4190, 0x6b6b51f4, 0x4db26158, 0x5005713c, 0xedb88320, 0xf00f9344, 0xd6d6a3e8, 0xcb61b38c, 0x9b64c2b0, 0x86d3d2d4, 0xa00ae278, 0xbdbdf21c}; mz_uint32 crcu32 = (mz_uint32)crc; if (!ptr) return MZ_CRC32_INIT; crcu32 = ~crcu32; while (buf_len--) { mz_uint8 b = *ptr++; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b & 0xF)]; crcu32 = (crcu32 >> 4) ^ s_crc32[(crcu32 & 0xF) ^ (b >> 4)]; } return ~crcu32; } void mz_free(void* p) { MZ_FREE(p); } #ifndef MINIZ_NO_ZLIB_APIS static void* def_alloc_func(void* opaque, size_t items, size_t size) { (void)opaque, (void)items, (void)size; return MZ_MALLOC(items * size); } static void def_free_func(void* opaque, void* address) { (void)opaque, (void)address; MZ_FREE(address); } static void* def_realloc_func(void* opaque, void* address, size_t items, size_t size) { (void)opaque, (void)address, (void)items, (void)size; return MZ_REALLOC(address, items * size); } const char* mz_version(void) { return MZ_VERSION; } int mz_deflateInit(mz_streamp pStream, int level) { return mz_deflateInit2(pStream, level, MZ_DEFLATED, MZ_DEFAULT_WINDOW_BITS, 9, MZ_DEFAULT_STRATEGY); } int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy) { tdefl_compressor* pComp; mz_uint comp_flags = TDEFL_COMPUTE_ADLER32 | tdefl_create_comp_flags_from_zip_params(level, window_bits, strategy); if (!pStream) return MZ_STREAM_ERROR; if ((method != MZ_DEFLATED) || ((mem_level < 1) || (mem_level > 9)) || ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS))) return MZ_PARAM_ERROR; pStream->data_type = 0; pStream->adler = MZ_ADLER32_INIT; pStream->msg = NULL; pStream->reserved = 0; pStream->total_in = 0; pStream->total_out = 0; if (!pStream->zalloc) pStream->zalloc = def_alloc_func; if (!pStream->zfree) pStream->zfree = def_free_func; pComp = (tdefl_compressor*)pStream->zalloc(pStream->opaque, 1, sizeof(tdefl_compressor)); if (!pComp) return MZ_MEM_ERROR; pStream->state = (struct mz_internal_state*)pComp; if (tdefl_init(pComp, NULL, NULL, comp_flags) != TDEFL_STATUS_OKAY) { mz_deflateEnd(pStream); return MZ_PARAM_ERROR; } return MZ_OK; } int mz_deflateReset(mz_streamp pStream) { if ((!pStream) || (!pStream->state) || (!pStream->zalloc) || (!pStream->zfree)) return MZ_STREAM_ERROR; pStream->total_in = pStream->total_out = 0; tdefl_init((tdefl_compressor*)pStream->state, NULL, NULL, ((tdefl_compressor*)pStream->state)->m_flags); return MZ_OK; } int mz_deflate(mz_streamp pStream, int flush) { size_t in_bytes, out_bytes; mz_ulong orig_total_in, orig_total_out; int mz_status = MZ_OK; if ((!pStream) || (!pStream->state) || (flush < 0) || (flush > MZ_FINISH) || (!pStream->next_out)) return MZ_STREAM_ERROR; if (!pStream->avail_out) return MZ_BUF_ERROR; if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; if (((tdefl_compressor*)pStream->state)->m_prev_return_status == TDEFL_STATUS_DONE) return (flush == MZ_FINISH) ? MZ_STREAM_END : MZ_BUF_ERROR; orig_total_in = pStream->total_in; orig_total_out = pStream->total_out; for (;;) { tdefl_status defl_status; in_bytes = pStream->avail_in; out_bytes = pStream->avail_out; defl_status = tdefl_compress((tdefl_compressor*)pStream->state, pStream->next_in, &in_bytes, pStream->next_out, &out_bytes, (tdefl_flush)flush); pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; pStream->total_in += (mz_uint)in_bytes; pStream->adler = tdefl_get_adler32((tdefl_compressor*)pStream->state); pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; pStream->total_out += (mz_uint)out_bytes; if (defl_status < 0) { mz_status = MZ_STREAM_ERROR; break; } else if (defl_status == TDEFL_STATUS_DONE) { mz_status = MZ_STREAM_END; break; } else if (!pStream->avail_out) break; else if ((!pStream->avail_in) && (flush != MZ_FINISH)) { if ((flush) || (pStream->total_in != orig_total_in) || (pStream->total_out != orig_total_out)) break; return MZ_BUF_ERROR; // Can't make forward progress without some input. } } return mz_status; } int mz_deflateEnd(mz_streamp pStream) { if (!pStream) return MZ_STREAM_ERROR; if (pStream->state) { pStream->zfree(pStream->opaque, pStream->state); pStream->state = NULL; } return MZ_OK; } mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len) { (void)pStream; // This is really over conservative. (And lame, but it's actually pretty tricky to compute a true upper bound given the way tdefl's blocking works.) return MZ_MAX(128 + (source_len * 110) / 100, 128 + source_len + ((source_len / (31 * 1024)) + 1) * 5); } int mz_compress2(unsigned char* pDest, mz_ulong* pDest_len, const unsigned char* pSource, mz_ulong source_len, int level) { int status; mz_stream stream; memset(&stream, 0, sizeof(stream)); // In case mz_ulong is 64-bits (argh I hate longs). if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; stream.next_in = pSource; stream.avail_in = (mz_uint32)source_len; stream.next_out = pDest; stream.avail_out = (mz_uint32)*pDest_len; status = mz_deflateInit(&stream, level); if (status != MZ_OK) return status; status = mz_deflate(&stream, MZ_FINISH); if (status != MZ_STREAM_END) { mz_deflateEnd(&stream); return (status == MZ_OK) ? MZ_BUF_ERROR : status; } *pDest_len = stream.total_out; return mz_deflateEnd(&stream); } int mz_compress(unsigned char* pDest, mz_ulong* pDest_len, const unsigned char* pSource, mz_ulong source_len) { return mz_compress2(pDest, pDest_len, pSource, source_len, MZ_DEFAULT_COMPRESSION); } mz_ulong mz_compressBound(mz_ulong source_len) { return mz_deflateBound(NULL, source_len); } typedef struct { tinfl_decompressor m_decomp; mz_uint m_dict_ofs, m_dict_avail, m_first_call, m_has_flushed; int m_window_bits; mz_uint8 m_dict[TINFL_LZ_DICT_SIZE]; tinfl_status m_last_status; } inflate_state; int mz_inflateInit2(mz_streamp pStream, int window_bits) { inflate_state* pDecomp; if (!pStream) return MZ_STREAM_ERROR; if ((window_bits != MZ_DEFAULT_WINDOW_BITS) && (-window_bits != MZ_DEFAULT_WINDOW_BITS)) return MZ_PARAM_ERROR; pStream->data_type = 0; pStream->adler = 0; pStream->msg = NULL; pStream->total_in = 0; pStream->total_out = 0; pStream->reserved = 0; if (!pStream->zalloc) pStream->zalloc = def_alloc_func; if (!pStream->zfree) pStream->zfree = def_free_func; pDecomp = (inflate_state*)pStream->zalloc(pStream->opaque, 1, sizeof(inflate_state)); if (!pDecomp) return MZ_MEM_ERROR; pStream->state = (struct mz_internal_state*)pDecomp; tinfl_init(&pDecomp->m_decomp); pDecomp->m_dict_ofs = 0; pDecomp->m_dict_avail = 0; pDecomp->m_last_status = TINFL_STATUS_NEEDS_MORE_INPUT; pDecomp->m_first_call = 1; pDecomp->m_has_flushed = 0; pDecomp->m_window_bits = window_bits; return MZ_OK; } int mz_inflateInit(mz_streamp pStream) { return mz_inflateInit2(pStream, MZ_DEFAULT_WINDOW_BITS); } int mz_inflate(mz_streamp pStream, int flush) { inflate_state* pState; mz_uint n, first_call, decomp_flags = TINFL_FLAG_COMPUTE_ADLER32; size_t in_bytes, out_bytes, orig_avail_in; tinfl_status status; if ((!pStream) || (!pStream->state)) return MZ_STREAM_ERROR; if (flush == MZ_PARTIAL_FLUSH) flush = MZ_SYNC_FLUSH; if ((flush) && (flush != MZ_SYNC_FLUSH) && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; pState = (inflate_state*)pStream->state; if (pState->m_window_bits > 0) decomp_flags |= TINFL_FLAG_PARSE_ZLIB_HEADER; orig_avail_in = pStream->avail_in; first_call = pState->m_first_call; pState->m_first_call = 0; if (pState->m_last_status < 0) return MZ_DATA_ERROR; if (pState->m_has_flushed && (flush != MZ_FINISH)) return MZ_STREAM_ERROR; pState->m_has_flushed |= (flush == MZ_FINISH); if ((flush == MZ_FINISH) && (first_call)) { // MZ_FINISH on the first call implies that the input and output buffers are large enough to hold the entire compressed/decompressed file. decomp_flags |= TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF; in_bytes = pStream->avail_in; out_bytes = pStream->avail_out; status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pStream->next_out, pStream->next_out, &out_bytes, decomp_flags); pState->m_last_status = status; pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; pStream->total_in += (mz_uint)in_bytes; pStream->adler = tinfl_get_adler32(&pState->m_decomp); pStream->next_out += (mz_uint)out_bytes; pStream->avail_out -= (mz_uint)out_bytes; pStream->total_out += (mz_uint)out_bytes; if (status < 0) return MZ_DATA_ERROR; else if (status != TINFL_STATUS_DONE) { pState->m_last_status = TINFL_STATUS_FAILED; return MZ_BUF_ERROR; } return MZ_STREAM_END; } // flush != MZ_FINISH then we must assume there's more input. if (flush != MZ_FINISH) decomp_flags |= TINFL_FLAG_HAS_MORE_INPUT; if (pState->m_dict_avail) { n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n; pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); return ((pState->m_last_status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; } for (;;) { in_bytes = pStream->avail_in; out_bytes = TINFL_LZ_DICT_SIZE - pState->m_dict_ofs; status = tinfl_decompress(&pState->m_decomp, pStream->next_in, &in_bytes, pState->m_dict, pState->m_dict + pState->m_dict_ofs, &out_bytes, decomp_flags); pState->m_last_status = status; pStream->next_in += (mz_uint)in_bytes; pStream->avail_in -= (mz_uint)in_bytes; pStream->total_in += (mz_uint)in_bytes; pStream->adler = tinfl_get_adler32(&pState->m_decomp); pState->m_dict_avail = (mz_uint)out_bytes; n = MZ_MIN(pState->m_dict_avail, pStream->avail_out); memcpy(pStream->next_out, pState->m_dict + pState->m_dict_ofs, n); pStream->next_out += n; pStream->avail_out -= n; pStream->total_out += n; pState->m_dict_avail -= n; pState->m_dict_ofs = (pState->m_dict_ofs + n) & (TINFL_LZ_DICT_SIZE - 1); if (status < 0) return MZ_DATA_ERROR; // Stream is corrupted (there could be some uncompressed data left in the output dictionary - oh well). else if ((status == TINFL_STATUS_NEEDS_MORE_INPUT) && (!orig_avail_in)) return MZ_BUF_ERROR; // Signal caller that we can't make forward progress without supplying more input or by setting flush to MZ_FINISH. else if (flush == MZ_FINISH) { // The output buffer MUST be large to hold the remaining uncompressed data when flush==MZ_FINISH. if (status == TINFL_STATUS_DONE) return pState->m_dict_avail ? MZ_BUF_ERROR : MZ_STREAM_END; // status here must be TINFL_STATUS_HAS_MORE_OUTPUT, which means there's at least 1 more byte on the way. If there's no more room left in the output buffer then something is wrong. else if (!pStream->avail_out) return MZ_BUF_ERROR; } else if ((status == TINFL_STATUS_DONE) || (!pStream->avail_in) || (!pStream->avail_out) || (pState->m_dict_avail)) break; } return ((status == TINFL_STATUS_DONE) && (!pState->m_dict_avail)) ? MZ_STREAM_END : MZ_OK; } int mz_inflateEnd(mz_streamp pStream) { if (!pStream) return MZ_STREAM_ERROR; if (pStream->state) { pStream->zfree(pStream->opaque, pStream->state); pStream->state = NULL; } return MZ_OK; } int mz_uncompress(unsigned char* pDest, mz_ulong* pDest_len, const unsigned char* pSource, mz_ulong source_len) { mz_stream stream; int status; memset(&stream, 0, sizeof(stream)); // In case mz_ulong is 64-bits (argh I hate longs). if ((source_len | *pDest_len) > 0xFFFFFFFFU) return MZ_PARAM_ERROR; stream.next_in = pSource; stream.avail_in = (mz_uint32)source_len; stream.next_out = pDest; stream.avail_out = (mz_uint32)*pDest_len; status = mz_inflateInit(&stream); if (status != MZ_OK) return status; status = mz_inflate(&stream, MZ_FINISH); if (status != MZ_STREAM_END) { mz_inflateEnd(&stream); return ((status == MZ_BUF_ERROR) && (!stream.avail_in)) ? MZ_DATA_ERROR : status; } *pDest_len = stream.total_out; return mz_inflateEnd(&stream); } const char* mz_error(int err) { static struct { int m_err; const char* m_pDesc; } s_error_descs[] = { {MZ_OK, ""}, {MZ_STREAM_END, "stream end"}, {MZ_NEED_DICT, "need dictionary"}, {MZ_ERRNO, "file error"}, {MZ_STREAM_ERROR, "stream error"}, {MZ_DATA_ERROR, "data error"}, {MZ_MEM_ERROR, "out of memory"}, {MZ_BUF_ERROR, "buf error"}, {MZ_VERSION_ERROR, "version error"}, {MZ_PARAM_ERROR, "parameter error"}}; mz_uint i; for (i = 0; i < sizeof(s_error_descs) / sizeof(s_error_descs[0]); ++i) if (s_error_descs[i].m_err == err) return s_error_descs[i].m_pDesc; return NULL; } #endif //MINIZ_NO_ZLIB_APIS // ------------------- Low-level Decompression (completely independent from all compression API's) #define TINFL_MEMCPY(d, s, l) memcpy(d, s, l) #define TINFL_MEMSET(p, c, l) memset(p, c, l) #define TINFL_CR_BEGIN \ switch (r->m_state) { \ case 0: #define TINFL_CR_RETURN(state_index, result) \ do { \ status = result; \ r->m_state = state_index; \ goto common_exit; \ case state_index:; \ } \ MZ_MACRO_END #define TINFL_CR_RETURN_FOREVER(state_index, result) \ do { \ for (;;) { \ TINFL_CR_RETURN(state_index, result); \ } \ } \ MZ_MACRO_END #define TINFL_CR_FINISH } // TODO: If the caller has indicated that there's no more input, and we attempt to read beyond the input buf, then something is wrong with the input because the inflator never // reads ahead more than it needs to. Currently TINFL_GET_BYTE() pads the end of the stream with 0's in this scenario. #define TINFL_GET_BYTE(state_index, c) \ do { \ if (pIn_buf_cur >= pIn_buf_end) { \ for (;;) { \ if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { \ TINFL_CR_RETURN(state_index, TINFL_STATUS_NEEDS_MORE_INPUT); \ if (pIn_buf_cur < pIn_buf_end) { \ c = *pIn_buf_cur++; \ break; \ } \ } else { \ c = 0; \ break; \ } \ } \ } else \ c = *pIn_buf_cur++; \ } \ MZ_MACRO_END #define TINFL_NEED_BITS(state_index, n) \ do { \ mz_uint c; \ TINFL_GET_BYTE(state_index, c); \ bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \ num_bits += 8; \ } while (num_bits < (mz_uint)(n)) #define TINFL_SKIP_BITS(state_index, n) \ do { \ if (num_bits < (mz_uint)(n)) { \ TINFL_NEED_BITS(state_index, n); \ } \ bit_buf >>= (n); \ num_bits -= (n); \ } \ MZ_MACRO_END #define TINFL_GET_BITS(state_index, b, n) \ do { \ if (num_bits < (mz_uint)(n)) { \ TINFL_NEED_BITS(state_index, n); \ } \ b = bit_buf & ((1 << (n)) - 1); \ bit_buf >>= (n); \ num_bits -= (n); \ } \ MZ_MACRO_END // TINFL_HUFF_BITBUF_FILL() is only used rarely, when the number of bytes remaining in the input buffer falls below 2. // It reads just enough bytes from the input stream that are needed to decode the next Huffman code (and absolutely no more). It works by trying to fully decode a // Huffman code by using whatever bits are currently present in the bit buffer. If this fails, it reads another byte, and tries again until it succeeds or until the // bit buffer contains >=15 bits (deflate's max. Huffman code size). #define TINFL_HUFF_BITBUF_FILL(state_index, pHuff) \ do { \ temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]; \ if (temp >= 0) { \ code_len = temp >> 9; \ if ((code_len) && (num_bits >= code_len)) \ break; \ } else if (num_bits > TINFL_FAST_LOOKUP_BITS) { \ code_len = TINFL_FAST_LOOKUP_BITS; \ do { \ temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ } while ((temp < 0) && (num_bits >= (code_len + 1))); \ if (temp >= 0) \ break; \ } \ TINFL_GET_BYTE(state_index, c); \ bit_buf |= (((tinfl_bit_buf_t)c) << num_bits); \ num_bits += 8; \ } while (num_bits < 15); // TINFL_HUFF_DECODE() decodes the next Huffman coded symbol. It's more complex than you would initially expect because the zlib API expects the decompressor to never read // beyond the final byte of the deflate stream. (In other words, when this macro wants to read another byte from the input, it REALLY needs another byte in order to fully // decode the next Huffman code.) Handling this properly is particularly important on raw deflate (non-zlib) streams, which aren't followed by a byte aligned adler-32. // The slow path is only executed at the very end of the input buffer. #define TINFL_HUFF_DECODE(state_index, sym, pHuff) \ do { \ int temp; \ mz_uint code_len, c; \ if (num_bits < 15) { \ if ((pIn_buf_end - pIn_buf_cur) < 2) { \ TINFL_HUFF_BITBUF_FILL(state_index, pHuff); \ } else { \ bit_buf |= (((tinfl_bit_buf_t)pIn_buf_cur[0]) << num_bits) | (((tinfl_bit_buf_t)pIn_buf_cur[1]) << (num_bits + 8)); \ pIn_buf_cur += 2; \ num_bits += 16; \ } \ } \ if ((temp = (pHuff)->m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) \ code_len = temp >> 9, temp &= 511; \ else { \ code_len = TINFL_FAST_LOOKUP_BITS; \ do { \ temp = (pHuff)->m_tree[~temp + ((bit_buf >> code_len++) & 1)]; \ } while (temp < 0); \ } \ sym = temp; \ bit_buf >>= code_len; \ num_bits -= code_len; \ } \ MZ_MACRO_END tinfl_status tinfl_decompress(tinfl_decompressor* r, const mz_uint8* pIn_buf_next, size_t* pIn_buf_size, mz_uint8* pOut_buf_start, mz_uint8* pOut_buf_next, size_t* pOut_buf_size, const mz_uint32 decomp_flags) { static const int s_length_base[31] = {3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0}; static const int s_length_extra[31] = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0}; static const int s_dist_base[32] = {1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0, 0}; static const int s_dist_extra[32] = {0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13}; static const mz_uint8 s_length_dezigzag[19] = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; static const int s_min_table_sizes[3] = {257, 1, 4}; tinfl_status status = TINFL_STATUS_FAILED; mz_uint32 num_bits, dist, counter, num_extra; tinfl_bit_buf_t bit_buf; const mz_uint8 *pIn_buf_cur = pIn_buf_next, *const pIn_buf_end = pIn_buf_next + *pIn_buf_size; mz_uint8 *pOut_buf_cur = pOut_buf_next, *const pOut_buf_end = pOut_buf_next + *pOut_buf_size; size_t out_buf_size_mask = (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF) ? (size_t)-1 : ((pOut_buf_next - pOut_buf_start) + *pOut_buf_size) - 1, dist_from_out_buf_start; // Ensure the output buffer's size is a power of 2, unless the output buffer is large enough to hold the entire output file (in which case it doesn't matter). if (((out_buf_size_mask + 1) & out_buf_size_mask) || (pOut_buf_next < pOut_buf_start)) { *pIn_buf_size = *pOut_buf_size = 0; return TINFL_STATUS_BAD_PARAM; } num_bits = r->m_num_bits; bit_buf = r->m_bit_buf; dist = r->m_dist; counter = r->m_counter; num_extra = r->m_num_extra; dist_from_out_buf_start = r->m_dist_from_out_buf_start; TINFL_CR_BEGIN bit_buf = num_bits = dist = counter = num_extra = r->m_zhdr0 = r->m_zhdr1 = 0; r->m_z_adler32 = r->m_check_adler32 = 1; if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) { TINFL_GET_BYTE(1, r->m_zhdr0); TINFL_GET_BYTE(2, r->m_zhdr1); counter = (((r->m_zhdr0 * 256 + r->m_zhdr1) % 31 != 0) || (r->m_zhdr1 & 32) || ((r->m_zhdr0 & 15) != 8)); if (!(decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) counter |= (((1U << (8U + (r->m_zhdr0 >> 4))) > 32768U) || ((out_buf_size_mask + 1) < (size_t)(1U << (8U + (r->m_zhdr0 >> 4))))); if (counter) { TINFL_CR_RETURN_FOREVER(36, TINFL_STATUS_FAILED); } } do { TINFL_GET_BITS(3, r->m_final, 3); r->m_type = r->m_final >> 1; if (r->m_type == 0) { TINFL_SKIP_BITS(5, num_bits & 7); for (counter = 0; counter < 4; ++counter) { if (num_bits) TINFL_GET_BITS(6, r->m_raw_header[counter], 8); else TINFL_GET_BYTE(7, r->m_raw_header[counter]); } if ((counter = (r->m_raw_header[0] | (r->m_raw_header[1] << 8))) != (mz_uint)(0xFFFF ^ (r->m_raw_header[2] | (r->m_raw_header[3] << 8)))) { TINFL_CR_RETURN_FOREVER(39, TINFL_STATUS_FAILED); } while ((counter) && (num_bits)) { TINFL_GET_BITS(51, dist, 8); while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(52, TINFL_STATUS_HAS_MORE_OUTPUT); } *pOut_buf_cur++ = (mz_uint8)dist; counter--; } while (counter) { size_t n; while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(9, TINFL_STATUS_HAS_MORE_OUTPUT); } while (pIn_buf_cur >= pIn_buf_end) { if (decomp_flags & TINFL_FLAG_HAS_MORE_INPUT) { TINFL_CR_RETURN(38, TINFL_STATUS_NEEDS_MORE_INPUT); } else { TINFL_CR_RETURN_FOREVER(40, TINFL_STATUS_FAILED); } } n = MZ_MIN(MZ_MIN((size_t)(pOut_buf_end - pOut_buf_cur), (size_t)(pIn_buf_end - pIn_buf_cur)), counter); TINFL_MEMCPY(pOut_buf_cur, pIn_buf_cur, n); pIn_buf_cur += n; pOut_buf_cur += n; counter -= (mz_uint)n; } } else if (r->m_type == 3) { TINFL_CR_RETURN_FOREVER(10, TINFL_STATUS_FAILED); } else { if (r->m_type == 1) { mz_uint8* p = r->m_tables[0].m_code_size; mz_uint i; r->m_table_sizes[0] = 288; r->m_table_sizes[1] = 32; TINFL_MEMSET(r->m_tables[1].m_code_size, 5, 32); for (i = 0; i <= 143; ++i) *p++ = 8; for (; i <= 255; ++i) *p++ = 9; for (; i <= 279; ++i) *p++ = 7; for (; i <= 287; ++i) *p++ = 8; } else { for (counter = 0; counter < 3; counter++) { TINFL_GET_BITS(11, r->m_table_sizes[counter], "\05\05\04"[counter]); r->m_table_sizes[counter] += s_min_table_sizes[counter]; } MZ_CLEAR_OBJ(r->m_tables[2].m_code_size); for (counter = 0; counter < r->m_table_sizes[2]; counter++) { mz_uint s; TINFL_GET_BITS(14, s, 3); r->m_tables[2].m_code_size[s_length_dezigzag[counter]] = (mz_uint8)s; } r->m_table_sizes[2] = 19; } for (; (int)r->m_type >= 0; r->m_type--) { int tree_next, tree_cur; tinfl_huff_table* pTable; mz_uint i, j, used_syms, total, sym_index, next_code[17], total_syms[16]; pTable = &r->m_tables[r->m_type]; MZ_CLEAR_OBJ(total_syms); MZ_CLEAR_OBJ(pTable->m_look_up); MZ_CLEAR_OBJ(pTable->m_tree); for (i = 0; i < r->m_table_sizes[r->m_type]; ++i) total_syms[pTable->m_code_size[i]]++; used_syms = 0, total = 0; next_code[0] = next_code[1] = 0; for (i = 1; i <= 15; ++i) { used_syms += total_syms[i]; next_code[i + 1] = (total = ((total + total_syms[i]) << 1)); } if ((65536 != total) && (used_syms > 1)) { TINFL_CR_RETURN_FOREVER(35, TINFL_STATUS_FAILED); } for (tree_next = -1, sym_index = 0; sym_index < r->m_table_sizes[r->m_type]; ++sym_index) { mz_uint rev_code = 0, l, cur_code, code_size = pTable->m_code_size[sym_index]; if (!code_size) continue; cur_code = next_code[code_size]++; for (l = code_size; l > 0; l--, cur_code >>= 1) rev_code = (rev_code << 1) | (cur_code & 1); if (code_size <= TINFL_FAST_LOOKUP_BITS) { mz_int16 k = (mz_int16)((code_size << 9) | sym_index); while (rev_code < TINFL_FAST_LOOKUP_SIZE) { pTable->m_look_up[rev_code] = k; rev_code += (1 << code_size); } continue; } if (0 == (tree_cur = pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)])) { pTable->m_look_up[rev_code & (TINFL_FAST_LOOKUP_SIZE - 1)] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } rev_code >>= (TINFL_FAST_LOOKUP_BITS - 1); for (j = code_size; j > (TINFL_FAST_LOOKUP_BITS + 1); j--) { tree_cur -= ((rev_code >>= 1) & 1); if (!pTable->m_tree[-tree_cur - 1]) { pTable->m_tree[-tree_cur - 1] = (mz_int16)tree_next; tree_cur = tree_next; tree_next -= 2; } else tree_cur = pTable->m_tree[-tree_cur - 1]; } tree_cur -= ((rev_code >>= 1) & 1); pTable->m_tree[-tree_cur - 1] = (mz_int16)sym_index; } if (r->m_type == 2) { for (counter = 0; counter < (r->m_table_sizes[0] + r->m_table_sizes[1]);) { mz_uint s; TINFL_HUFF_DECODE(16, dist, &r->m_tables[2]); if (dist < 16) { r->m_len_codes[counter++] = (mz_uint8)dist; continue; } if ((dist == 16) && (!counter)) { TINFL_CR_RETURN_FOREVER(17, TINFL_STATUS_FAILED); } num_extra = "\02\03\07"[dist - 16]; TINFL_GET_BITS(18, s, num_extra); s += "\03\03\013"[dist - 16]; TINFL_MEMSET(r->m_len_codes + counter, (dist == 16) ? r->m_len_codes[counter - 1] : 0, s); counter += s; } if ((r->m_table_sizes[0] + r->m_table_sizes[1]) != counter) { TINFL_CR_RETURN_FOREVER(21, TINFL_STATUS_FAILED); } TINFL_MEMCPY(r->m_tables[0].m_code_size, r->m_len_codes, r->m_table_sizes[0]); TINFL_MEMCPY(r->m_tables[1].m_code_size, r->m_len_codes + r->m_table_sizes[0], r->m_table_sizes[1]); } } for (;;) { mz_uint8* pSrc; for (;;) { if (((pIn_buf_end - pIn_buf_cur) < 4) || ((pOut_buf_end - pOut_buf_cur) < 2)) { TINFL_HUFF_DECODE(23, counter, &r->m_tables[0]); if (counter >= 256) break; while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(24, TINFL_STATUS_HAS_MORE_OUTPUT); } *pOut_buf_cur++ = (mz_uint8)counter; } else { int sym2; mz_uint code_len; #if TINFL_USE_64BIT_BITBUF if (num_bits < 30) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE32(pIn_buf_cur)) << num_bits); pIn_buf_cur += 4; num_bits += 32; } #else if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; } #endif if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) code_len = sym2 >> 9; else { code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0); } counter = sym2; bit_buf >>= code_len; num_bits -= code_len; if (counter & 256) break; #if !TINFL_USE_64BIT_BITBUF if (num_bits < 15) { bit_buf |= (((tinfl_bit_buf_t)MZ_READ_LE16(pIn_buf_cur)) << num_bits); pIn_buf_cur += 2; num_bits += 16; } #endif if ((sym2 = r->m_tables[0].m_look_up[bit_buf & (TINFL_FAST_LOOKUP_SIZE - 1)]) >= 0) code_len = sym2 >> 9; else { code_len = TINFL_FAST_LOOKUP_BITS; do { sym2 = r->m_tables[0].m_tree[~sym2 + ((bit_buf >> code_len++) & 1)]; } while (sym2 < 0); } bit_buf >>= code_len; num_bits -= code_len; pOut_buf_cur[0] = (mz_uint8)counter; if (sym2 & 256) { pOut_buf_cur++; counter = sym2; break; } pOut_buf_cur[1] = (mz_uint8)sym2; pOut_buf_cur += 2; } } if ((counter &= 511) == 256) break; num_extra = s_length_extra[counter - 257]; counter = s_length_base[counter - 257]; if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(25, extra_bits, num_extra); counter += extra_bits; } TINFL_HUFF_DECODE(26, dist, &r->m_tables[1]); num_extra = s_dist_extra[dist]; dist = s_dist_base[dist]; if (num_extra) { mz_uint extra_bits; TINFL_GET_BITS(27, extra_bits, num_extra); dist += extra_bits; } dist_from_out_buf_start = pOut_buf_cur - pOut_buf_start; if ((dist > dist_from_out_buf_start) && (decomp_flags & TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF)) { TINFL_CR_RETURN_FOREVER(37, TINFL_STATUS_FAILED); } pSrc = pOut_buf_start + ((dist_from_out_buf_start - dist) & out_buf_size_mask); if ((MZ_MAX(pOut_buf_cur, pSrc) + counter) > pOut_buf_end) { while (counter--) { while (pOut_buf_cur >= pOut_buf_end) { TINFL_CR_RETURN(53, TINFL_STATUS_HAS_MORE_OUTPUT); } *pOut_buf_cur++ = pOut_buf_start[(dist_from_out_buf_start++ - dist) & out_buf_size_mask]; } continue; } #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES else if ((counter >= 9) && (counter <= dist)) { const mz_uint8* pSrc_end = pSrc + (counter & ~7); do { ((mz_uint32*)pOut_buf_cur)[0] = ((const mz_uint32*)pSrc)[0]; ((mz_uint32*)pOut_buf_cur)[1] = ((const mz_uint32*)pSrc)[1]; pOut_buf_cur += 8; } while ((pSrc += 8) < pSrc_end); if ((counter &= 7) < 3) { if (counter) { pOut_buf_cur[0] = pSrc[0]; if (counter > 1) pOut_buf_cur[1] = pSrc[1]; pOut_buf_cur += counter; } continue; } } #endif do { pOut_buf_cur[0] = pSrc[0]; pOut_buf_cur[1] = pSrc[1]; pOut_buf_cur[2] = pSrc[2]; pOut_buf_cur += 3; pSrc += 3; } while ((int)(counter -= 3) > 2); if ((int)counter > 0) { pOut_buf_cur[0] = pSrc[0]; if ((int)counter > 1) pOut_buf_cur[1] = pSrc[1]; pOut_buf_cur += counter; } } } } while (!(r->m_final & 1)); if (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) { TINFL_SKIP_BITS(32, num_bits & 7); for (counter = 0; counter < 4; ++counter) { mz_uint s; if (num_bits) TINFL_GET_BITS(41, s, 8); else TINFL_GET_BYTE(42, s); r->m_z_adler32 = (r->m_z_adler32 << 8) | s; } } TINFL_CR_RETURN_FOREVER(34, TINFL_STATUS_DONE); TINFL_CR_FINISH common_exit: r->m_num_bits = num_bits; r->m_bit_buf = bit_buf; r->m_dist = dist; r->m_counter = counter; r->m_num_extra = num_extra; r->m_dist_from_out_buf_start = dist_from_out_buf_start; *pIn_buf_size = pIn_buf_cur - pIn_buf_next; *pOut_buf_size = pOut_buf_cur - pOut_buf_next; if ((decomp_flags & (TINFL_FLAG_PARSE_ZLIB_HEADER | TINFL_FLAG_COMPUTE_ADLER32)) && (status >= 0)) { const mz_uint8* ptr = pOut_buf_next; size_t buf_len = *pOut_buf_size; mz_uint32 i, s1 = r->m_check_adler32 & 0xffff, s2 = r->m_check_adler32 >> 16; size_t block_len = buf_len % 5552; while (buf_len) { for (i = 0; i + 7 < block_len; i += 8, ptr += 8) { s1 += ptr[0], s2 += s1; s1 += ptr[1], s2 += s1; s1 += ptr[2], s2 += s1; s1 += ptr[3], s2 += s1; s1 += ptr[4], s2 += s1; s1 += ptr[5], s2 += s1; s1 += ptr[6], s2 += s1; s1 += ptr[7], s2 += s1; } for (; i < block_len; ++i) s1 += *ptr++, s2 += s1; s1 %= 65521U, s2 %= 65521U; buf_len -= block_len; block_len = 5552; } r->m_check_adler32 = (s2 << 16) + s1; if ((status == TINFL_STATUS_DONE) && (decomp_flags & TINFL_FLAG_PARSE_ZLIB_HEADER) && (r->m_check_adler32 != r->m_z_adler32)) status = TINFL_STATUS_ADLER32_MISMATCH; } return status; } // Higher level helper functions. void* tinfl_decompress_mem_to_heap(const void* pSrc_buf, size_t src_buf_len, size_t* pOut_len, int flags) { tinfl_decompressor decomp; void *pBuf = NULL, *pNew_buf; size_t src_buf_ofs = 0, out_buf_capacity = 0; *pOut_len = 0; tinfl_init(&decomp); for (;;) { size_t src_buf_size = src_buf_len - src_buf_ofs, dst_buf_size = out_buf_capacity - *pOut_len, new_out_buf_capacity; tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf + src_buf_ofs, &src_buf_size, (mz_uint8*)pBuf, pBuf ? (mz_uint8*)pBuf + *pOut_len : NULL, &dst_buf_size, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); if ((status < 0) || (status == TINFL_STATUS_NEEDS_MORE_INPUT)) { MZ_FREE(pBuf); *pOut_len = 0; return NULL; } src_buf_ofs += src_buf_size; *pOut_len += dst_buf_size; if (status == TINFL_STATUS_DONE) break; new_out_buf_capacity = out_buf_capacity * 2; if (new_out_buf_capacity < 128) new_out_buf_capacity = 128; pNew_buf = MZ_REALLOC(pBuf, new_out_buf_capacity); if (!pNew_buf) { MZ_FREE(pBuf); *pOut_len = 0; return NULL; } pBuf = pNew_buf; out_buf_capacity = new_out_buf_capacity; } return pBuf; } size_t tinfl_decompress_mem_to_mem(void* pOut_buf, size_t out_buf_len, const void* pSrc_buf, size_t src_buf_len, int flags) { tinfl_decompressor decomp; tinfl_status status; tinfl_init(&decomp); status = tinfl_decompress(&decomp, (const mz_uint8*)pSrc_buf, &src_buf_len, (mz_uint8*)pOut_buf, (mz_uint8*)pOut_buf, &out_buf_len, (flags & ~TINFL_FLAG_HAS_MORE_INPUT) | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF); return (status != TINFL_STATUS_DONE) ? TINFL_DECOMPRESS_MEM_TO_MEM_FAILED : out_buf_len; } int tinfl_decompress_mem_to_callback(const void* pIn_buf, size_t* pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void* pPut_buf_user, int flags) { int result = 0; tinfl_decompressor decomp; mz_uint8* pDict = (mz_uint8*)MZ_MALLOC(TINFL_LZ_DICT_SIZE); size_t in_buf_ofs = 0, dict_ofs = 0; if (!pDict) return TINFL_STATUS_FAILED; tinfl_init(&decomp); for (;;) { size_t in_buf_size = *pIn_buf_size - in_buf_ofs, dst_buf_size = TINFL_LZ_DICT_SIZE - dict_ofs; tinfl_status status = tinfl_decompress(&decomp, (const mz_uint8*)pIn_buf + in_buf_ofs, &in_buf_size, pDict, pDict + dict_ofs, &dst_buf_size, (flags & ~(TINFL_FLAG_HAS_MORE_INPUT | TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF))); in_buf_ofs += in_buf_size; if ((dst_buf_size) && (!(*pPut_buf_func)(pDict + dict_ofs, (int)dst_buf_size, pPut_buf_user))) break; if (status != TINFL_STATUS_HAS_MORE_OUTPUT) { result = (status == TINFL_STATUS_DONE); break; } dict_ofs = (dict_ofs + dst_buf_size) & (TINFL_LZ_DICT_SIZE - 1); } MZ_FREE(pDict); *pIn_buf_size = in_buf_ofs; return result; } // ------------------- Low-level Compression (independent from all decompression API's) // Purposely making these tables static for faster init and thread safety. static const mz_uint16 s_tdefl_len_sym[256] = { 257, 258, 259, 260, 261, 262, 263, 264, 265, 265, 266, 266, 267, 267, 268, 268, 269, 269, 269, 269, 270, 270, 270, 270, 271, 271, 271, 271, 272, 272, 272, 272, 273, 273, 273, 273, 273, 273, 273, 273, 274, 274, 274, 274, 274, 274, 274, 274, 275, 275, 275, 275, 275, 275, 275, 275, 276, 276, 276, 276, 276, 276, 276, 276, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 277, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 278, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 279, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 280, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 281, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 282, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 283, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 284, 285}; static const mz_uint8 s_tdefl_len_extra[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 0}; static const mz_uint8 s_tdefl_small_dist_sym[512] = { 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17}; static const mz_uint8 s_tdefl_small_dist_extra[512] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; static const mz_uint8 s_tdefl_large_dist_sym[128] = { 0, 0, 18, 19, 20, 20, 21, 21, 22, 22, 22, 22, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29}; static const mz_uint8 s_tdefl_large_dist_extra[128] = { 0, 0, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13}; // Radix sorts tdefl_sym_freq[] array by 16-bit key m_key. Returns ptr to sorted values. typedef struct { mz_uint16 m_key, m_sym_index; } tdefl_sym_freq; static tdefl_sym_freq* tdefl_radix_sort_syms(mz_uint num_syms, tdefl_sym_freq* pSyms0, tdefl_sym_freq* pSyms1) { mz_uint32 total_passes = 2, pass_shift, pass, i, hist[256 * 2]; tdefl_sym_freq *pCur_syms = pSyms0, *pNew_syms = pSyms1; MZ_CLEAR_OBJ(hist); for (i = 0; i < num_syms; i++) { mz_uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; } while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--; for (pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) { const mz_uint32* pHist = &hist[pass << 8]; mz_uint offsets[256], cur_ofs = 0; for (i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; } for (i = 0; i < num_syms; i++) pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i]; { tdefl_sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t; } } return pCur_syms; } // tdefl_calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. static void tdefl_calculate_minimum_redundancy(tdefl_sym_freq* A, int n) { int root, leaf, next, avbl, used, dpth; if (n == 0) return; else if (n == 1) { A[0].m_key = 1; return; } A[0].m_key += A[1].m_key; root = 0; leaf = 2; for (next = 1; next < n - 1; next++) { if (leaf >= n || A[root].m_key < A[leaf].m_key) { A[next].m_key = A[root].m_key; A[root++].m_key = (mz_uint16)next; } else A[next].m_key = A[leaf++].m_key; if (leaf >= n || (root < next && A[root].m_key < A[leaf].m_key)) { A[next].m_key = (mz_uint16)(A[next].m_key + A[root].m_key); A[root++].m_key = (mz_uint16)next; } else A[next].m_key = (mz_uint16)(A[next].m_key + A[leaf++].m_key); } A[n - 2].m_key = 0; for (next = n - 3; next >= 0; next--) A[next].m_key = A[A[next].m_key].m_key + 1; avbl = 1; used = dpth = 0; root = n - 2; next = n - 1; while (avbl > 0) { while (root >= 0 && (int)A[root].m_key == dpth) { used++; root--; } while (avbl > used) { A[next--].m_key = (mz_uint16)(dpth); avbl--; } avbl = 2 * used; dpth++; used = 0; } } // Limits canonical Huffman code table's max code size. enum { TDEFL_MAX_SUPPORTED_HUFF_CODESIZE = 32 }; static void tdefl_huffman_enforce_max_code_size(int* pNum_codes, int code_list_len, int max_code_size) { int i; mz_uint32 total = 0; if (code_list_len <= 1) return; for (i = max_code_size + 1; i <= TDEFL_MAX_SUPPORTED_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i]; for (i = max_code_size; i > 0; i--) total += (((mz_uint32)pNum_codes[i]) << (max_code_size - i)); while (total != (1UL << max_code_size)) { pNum_codes[max_code_size]--; for (i = max_code_size - 1; i > 0; i--) if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; } total--; } } static void tdefl_optimize_huffman_table(tdefl_compressor* d, int table_num, int table_len, int code_size_limit, int static_table) { int i, j, l, num_codes[1 + TDEFL_MAX_SUPPORTED_HUFF_CODESIZE]; mz_uint next_code[TDEFL_MAX_SUPPORTED_HUFF_CODESIZE + 1]; MZ_CLEAR_OBJ(num_codes); if (static_table) { for (i = 0; i < table_len; i++) num_codes[d->m_huff_code_sizes[table_num][i]]++; } else { tdefl_sym_freq syms0[TDEFL_MAX_HUFF_SYMBOLS], syms1[TDEFL_MAX_HUFF_SYMBOLS], *pSyms; int num_used_syms = 0; const mz_uint16* pSym_count = &d->m_huff_count[table_num][0]; for (i = 0; i < table_len; i++) if (pSym_count[i]) { syms0[num_used_syms].m_key = (mz_uint16)pSym_count[i]; syms0[num_used_syms++].m_sym_index = (mz_uint16)i; } pSyms = tdefl_radix_sort_syms(num_used_syms, syms0, syms1); tdefl_calculate_minimum_redundancy(pSyms, num_used_syms); for (i = 0; i < num_used_syms; i++) num_codes[pSyms[i].m_key]++; tdefl_huffman_enforce_max_code_size(num_codes, num_used_syms, code_size_limit); MZ_CLEAR_OBJ(d->m_huff_code_sizes[table_num]); MZ_CLEAR_OBJ(d->m_huff_codes[table_num]); for (i = 1, j = num_used_syms; i <= code_size_limit; i++) for (l = num_codes[i]; l > 0; l--) d->m_huff_code_sizes[table_num][pSyms[--j].m_sym_index] = (mz_uint8)(i); } next_code[1] = 0; for (j = 0, i = 2; i <= code_size_limit; i++) next_code[i] = j = ((j + num_codes[i - 1]) << 1); for (i = 0; i < table_len; i++) { mz_uint rev_code = 0, code, code_size; if ((code_size = d->m_huff_code_sizes[table_num][i]) == 0) continue; code = next_code[code_size]++; for (l = code_size; l > 0; l--, code >>= 1) rev_code = (rev_code << 1) | (code & 1); d->m_huff_codes[table_num][i] = (mz_uint16)rev_code; } } #define TDEFL_PUT_BITS(b, l) \ do { \ mz_uint bits = b; \ mz_uint len = l; \ MZ_ASSERT(bits <= ((1U << len) - 1U)); \ d->m_bit_buffer |= (bits << d->m_bits_in); \ d->m_bits_in += len; \ while (d->m_bits_in >= 8) { \ if (d->m_pOutput_buf < d->m_pOutput_buf_end) \ *d->m_pOutput_buf++ = (mz_uint8)(d->m_bit_buffer); \ d->m_bit_buffer >>= 8; \ d->m_bits_in -= 8; \ } \ \ } \ MZ_MACRO_END #define TDEFL_RLE_PREV_CODE_SIZE() \ { \ if (rle_repeat_count) { \ if (rle_repeat_count < 3) { \ d->m_huff_count[2][prev_code_size] = (mz_uint16)(d->m_huff_count[2][prev_code_size] + rle_repeat_count); \ while (rle_repeat_count--) \ packed_code_sizes[num_packed_code_sizes++] = prev_code_size; \ } else { \ d->m_huff_count[2][16] = (mz_uint16)(d->m_huff_count[2][16] + 1); \ packed_code_sizes[num_packed_code_sizes++] = 16; \ packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_repeat_count - 3); \ \ } \ rle_repeat_count = 0; \ } \ } #define TDEFL_RLE_ZERO_CODE_SIZE() \ { \ if (rle_z_count) { \ if (rle_z_count < 3) { \ d->m_huff_count[2][0] = (mz_uint16)(d->m_huff_count[2][0] + rle_z_count); \ while (rle_z_count--) \ packed_code_sizes[num_packed_code_sizes++] = 0; \ } else if (rle_z_count <= 10) { \ d->m_huff_count[2][17] = (mz_uint16)(d->m_huff_count[2][17] + 1); \ packed_code_sizes[num_packed_code_sizes++] = 17; \ packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 3); \ } else { \ d->m_huff_count[2][18] = (mz_uint16)(d->m_huff_count[2][18] + 1); \ packed_code_sizes[num_packed_code_sizes++] = 18; \ packed_code_sizes[num_packed_code_sizes++] = (mz_uint8)(rle_z_count - 11); \ \ } \ rle_z_count = 0; \ } \ } static mz_uint8 s_tdefl_packed_code_size_syms_swizzle[] = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; static void tdefl_start_dynamic_block(tdefl_compressor* d) { int num_lit_codes, num_dist_codes, num_bit_lengths; mz_uint i, total_code_sizes_to_pack, num_packed_code_sizes, rle_z_count, rle_repeat_count, packed_code_sizes_index; mz_uint8 code_sizes_to_pack[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], packed_code_sizes[TDEFL_MAX_HUFF_SYMBOLS_0 + TDEFL_MAX_HUFF_SYMBOLS_1], prev_code_size = 0xFF; d->m_huff_count[0][256] = 1; tdefl_optimize_huffman_table(d, 0, TDEFL_MAX_HUFF_SYMBOLS_0, 15, MZ_FALSE); tdefl_optimize_huffman_table(d, 1, TDEFL_MAX_HUFF_SYMBOLS_1, 15, MZ_FALSE); for (num_lit_codes = 286; num_lit_codes > 257; num_lit_codes--) if (d->m_huff_code_sizes[0][num_lit_codes - 1]) break; for (num_dist_codes = 30; num_dist_codes > 1; num_dist_codes--) if (d->m_huff_code_sizes[1][num_dist_codes - 1]) break; memcpy(code_sizes_to_pack, &d->m_huff_code_sizes[0][0], num_lit_codes); memcpy(code_sizes_to_pack + num_lit_codes, &d->m_huff_code_sizes[1][0], num_dist_codes); total_code_sizes_to_pack = num_lit_codes + num_dist_codes; num_packed_code_sizes = 0; rle_z_count = 0; rle_repeat_count = 0; memset(&d->m_huff_count[2][0], 0, sizeof(d->m_huff_count[2][0]) * TDEFL_MAX_HUFF_SYMBOLS_2); for (i = 0; i < total_code_sizes_to_pack; i++) { mz_uint8 code_size = code_sizes_to_pack[i]; if (!code_size) { TDEFL_RLE_PREV_CODE_SIZE(); if (++rle_z_count == 138) { TDEFL_RLE_ZERO_CODE_SIZE(); } } else { TDEFL_RLE_ZERO_CODE_SIZE(); if (code_size != prev_code_size) { TDEFL_RLE_PREV_CODE_SIZE(); d->m_huff_count[2][code_size] = (mz_uint16)(d->m_huff_count[2][code_size] + 1); packed_code_sizes[num_packed_code_sizes++] = code_size; } else if (++rle_repeat_count == 6) { TDEFL_RLE_PREV_CODE_SIZE(); } } prev_code_size = code_size; } if (rle_repeat_count) { TDEFL_RLE_PREV_CODE_SIZE(); } else { TDEFL_RLE_ZERO_CODE_SIZE(); } tdefl_optimize_huffman_table(d, 2, TDEFL_MAX_HUFF_SYMBOLS_2, 7, MZ_FALSE); TDEFL_PUT_BITS(2, 2); TDEFL_PUT_BITS(num_lit_codes - 257, 5); TDEFL_PUT_BITS(num_dist_codes - 1, 5); for (num_bit_lengths = 18; num_bit_lengths >= 0; num_bit_lengths--) if (d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[num_bit_lengths]]) break; num_bit_lengths = MZ_MAX(4, (num_bit_lengths + 1)); TDEFL_PUT_BITS(num_bit_lengths - 4, 4); for (i = 0; (int)i < num_bit_lengths; i++) TDEFL_PUT_BITS(d->m_huff_code_sizes[2][s_tdefl_packed_code_size_syms_swizzle[i]], 3); for (packed_code_sizes_index = 0; packed_code_sizes_index < num_packed_code_sizes;) { mz_uint code = packed_code_sizes[packed_code_sizes_index++]; MZ_ASSERT(code < TDEFL_MAX_HUFF_SYMBOLS_2); TDEFL_PUT_BITS(d->m_huff_codes[2][code], d->m_huff_code_sizes[2][code]); if (code >= 16) TDEFL_PUT_BITS(packed_code_sizes[packed_code_sizes_index++], "\02\03\07"[code - 16]); } } static void tdefl_start_static_block(tdefl_compressor* d) { mz_uint i; mz_uint8* p = &d->m_huff_code_sizes[0][0]; for (i = 0; i <= 143; ++i) *p++ = 8; for (; i <= 255; ++i) *p++ = 9; for (; i <= 279; ++i) *p++ = 7; for (; i <= 287; ++i) *p++ = 8; memset(d->m_huff_code_sizes[1], 5, 32); tdefl_optimize_huffman_table(d, 0, 288, 15, MZ_TRUE); tdefl_optimize_huffman_table(d, 1, 32, 15, MZ_TRUE); TDEFL_PUT_BITS(1, 2); } static const mz_uint mz_bitmasks[17] = {0x0000, 0x0001, 0x0003, 0x0007, 0x000F, 0x001F, 0x003F, 0x007F, 0x00FF, 0x01FF, 0x03FF, 0x07FF, 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF}; #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS static mz_bool tdefl_compress_lz_codes(tdefl_compressor* d) { mz_uint flags; mz_uint8* pLZ_codes; mz_uint8* pOutput_buf = d->m_pOutput_buf; mz_uint8* pLZ_code_buf_end = d->m_pLZ_code_buf; mz_uint64 bit_buffer = d->m_bit_buffer; mz_uint bits_in = d->m_bits_in; #define TDEFL_PUT_BITS_FAST(b, l) \ { \ bit_buffer |= (((mz_uint64)(b)) << bits_in); \ bits_in += (l); \ } flags = 1; for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < pLZ_code_buf_end; flags >>= 1) { if (flags == 1) flags = *pLZ_codes++ | 0x100; if (flags & 1) { mz_uint s0, s1, n0, n1, sym, num_extra_bits; mz_uint match_len = pLZ_codes[0], match_dist = *(const mz_uint16*)(pLZ_codes + 1); pLZ_codes += 3; MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); TDEFL_PUT_BITS_FAST(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); // This sequence coaxes MSVC into using cmov's vs. jmp's. s0 = s_tdefl_small_dist_sym[match_dist & 511]; n0 = s_tdefl_small_dist_extra[match_dist & 511]; s1 = s_tdefl_large_dist_sym[match_dist >> 8]; n1 = s_tdefl_large_dist_extra[match_dist >> 8]; sym = (match_dist < 512) ? s0 : s1; num_extra_bits = (match_dist < 512) ? n0 : n1; MZ_ASSERT(d->m_huff_code_sizes[1][sym]); TDEFL_PUT_BITS_FAST(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); TDEFL_PUT_BITS_FAST(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); } else { mz_uint lit = *pLZ_codes++; MZ_ASSERT(d->m_huff_code_sizes[0][lit]); TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) { flags >>= 1; lit = *pLZ_codes++; MZ_ASSERT(d->m_huff_code_sizes[0][lit]); TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); if (((flags & 2) == 0) && (pLZ_codes < pLZ_code_buf_end)) { flags >>= 1; lit = *pLZ_codes++; MZ_ASSERT(d->m_huff_code_sizes[0][lit]); TDEFL_PUT_BITS_FAST(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); } } } if (pOutput_buf >= d->m_pOutput_buf_end) return MZ_FALSE; *(mz_uint64*)pOutput_buf = bit_buffer; pOutput_buf += (bits_in >> 3); bit_buffer >>= (bits_in & ~7); bits_in &= 7; } #undef TDEFL_PUT_BITS_FAST d->m_pOutput_buf = pOutput_buf; d->m_bits_in = 0; d->m_bit_buffer = 0; while (bits_in) { mz_uint32 n = MZ_MIN(bits_in, 16); TDEFL_PUT_BITS((mz_uint)bit_buffer & mz_bitmasks[n], n); bit_buffer >>= n; bits_in -= n; } TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); return (d->m_pOutput_buf < d->m_pOutput_buf_end); } #else static mz_bool tdefl_compress_lz_codes(tdefl_compressor* d) { mz_uint flags; mz_uint8* pLZ_codes; flags = 1; for (pLZ_codes = d->m_lz_code_buf; pLZ_codes < d->m_pLZ_code_buf; flags >>= 1) { if (flags == 1) flags = *pLZ_codes++ | 0x100; if (flags & 1) { mz_uint sym, num_extra_bits; mz_uint match_len = pLZ_codes[0], match_dist = (pLZ_codes[1] | (pLZ_codes[2] << 8)); pLZ_codes += 3; MZ_ASSERT(d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); TDEFL_PUT_BITS(d->m_huff_codes[0][s_tdefl_len_sym[match_len]], d->m_huff_code_sizes[0][s_tdefl_len_sym[match_len]]); TDEFL_PUT_BITS(match_len & mz_bitmasks[s_tdefl_len_extra[match_len]], s_tdefl_len_extra[match_len]); if (match_dist < 512) { sym = s_tdefl_small_dist_sym[match_dist]; num_extra_bits = s_tdefl_small_dist_extra[match_dist]; } else { sym = s_tdefl_large_dist_sym[match_dist >> 8]; num_extra_bits = s_tdefl_large_dist_extra[match_dist >> 8]; } MZ_ASSERT(d->m_huff_code_sizes[1][sym]); TDEFL_PUT_BITS(d->m_huff_codes[1][sym], d->m_huff_code_sizes[1][sym]); TDEFL_PUT_BITS(match_dist & mz_bitmasks[num_extra_bits], num_extra_bits); } else { mz_uint lit = *pLZ_codes++; MZ_ASSERT(d->m_huff_code_sizes[0][lit]); TDEFL_PUT_BITS(d->m_huff_codes[0][lit], d->m_huff_code_sizes[0][lit]); } } TDEFL_PUT_BITS(d->m_huff_codes[0][256], d->m_huff_code_sizes[0][256]); return (d->m_pOutput_buf < d->m_pOutput_buf_end); } #endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN && MINIZ_HAS_64BIT_REGISTERS static mz_bool tdefl_compress_block(tdefl_compressor* d, mz_bool static_block) { if (static_block) tdefl_start_static_block(d); else tdefl_start_dynamic_block(d); return tdefl_compress_lz_codes(d); } static int tdefl_flush_block(tdefl_compressor* d, int flush) { mz_uint saved_bit_buf, saved_bits_in; mz_uint8* pSaved_output_buf; mz_bool comp_block_succeeded = MZ_FALSE; int n, use_raw_block = ((d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS) != 0) && (d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size; mz_uint8* pOutput_buf_start = ((d->m_pPut_buf_func == NULL) && ((*d->m_pOut_buf_size - d->m_out_buf_ofs) >= TDEFL_OUT_BUF_SIZE)) ? ((mz_uint8*)d->m_pOut_buf + d->m_out_buf_ofs) : d->m_output_buf; d->m_pOutput_buf = pOutput_buf_start; d->m_pOutput_buf_end = d->m_pOutput_buf + TDEFL_OUT_BUF_SIZE - 16; MZ_ASSERT(!d->m_output_flush_remaining); d->m_output_flush_ofs = 0; d->m_output_flush_remaining = 0; *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> d->m_num_flags_left); d->m_pLZ_code_buf -= (d->m_num_flags_left == 8); if ((d->m_flags & TDEFL_WRITE_ZLIB_HEADER) && (!d->m_block_index)) { TDEFL_PUT_BITS(0x78, 8); TDEFL_PUT_BITS(0x01, 8); } TDEFL_PUT_BITS(flush == TDEFL_FINISH, 1); pSaved_output_buf = d->m_pOutput_buf; saved_bit_buf = d->m_bit_buffer; saved_bits_in = d->m_bits_in; if (!use_raw_block) comp_block_succeeded = tdefl_compress_block(d, (d->m_flags & TDEFL_FORCE_ALL_STATIC_BLOCKS) || (d->m_total_lz_bytes < 48)); // If the block gets expanded, forget the current contents of the output buffer and send a raw block instead. if (((use_raw_block) || ((d->m_total_lz_bytes) && ((d->m_pOutput_buf - pSaved_output_buf + 1U) >= d->m_total_lz_bytes))) && ((d->m_lookahead_pos - d->m_lz_code_buf_dict_pos) <= d->m_dict_size)) { mz_uint i; d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; TDEFL_PUT_BITS(0, 2); if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } for (i = 2; i; --i, d->m_total_lz_bytes ^= 0xFFFF) { TDEFL_PUT_BITS(d->m_total_lz_bytes & 0xFFFF, 16); } for (i = 0; i < d->m_total_lz_bytes; ++i) { TDEFL_PUT_BITS(d->m_dict[(d->m_lz_code_buf_dict_pos + i) & TDEFL_LZ_DICT_SIZE_MASK], 8); } } // Check for the extremely unlikely (if not impossible) case of the compressed block not fitting into the output buffer when using dynamic codes. else if (!comp_block_succeeded) { d->m_pOutput_buf = pSaved_output_buf; d->m_bit_buffer = saved_bit_buf, d->m_bits_in = saved_bits_in; tdefl_compress_block(d, MZ_TRUE); } if (flush) { if (flush == TDEFL_FINISH) { if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } if (d->m_flags & TDEFL_WRITE_ZLIB_HEADER) { mz_uint i, a = d->m_adler32; for (i = 0; i < 4; i++) { TDEFL_PUT_BITS((a >> 24) & 0xFF, 8); a <<= 8; } } } else { mz_uint i, z = 0; TDEFL_PUT_BITS(0, 3); if (d->m_bits_in) { TDEFL_PUT_BITS(0, 8 - d->m_bits_in); } for (i = 2; i; --i, z ^= 0xFFFF) { TDEFL_PUT_BITS(z & 0xFFFF, 16); } } } MZ_ASSERT(d->m_pOutput_buf < d->m_pOutput_buf_end); memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; d->m_lz_code_buf_dict_pos += d->m_total_lz_bytes; d->m_total_lz_bytes = 0; d->m_block_index++; if ((n = (int)(d->m_pOutput_buf - pOutput_buf_start)) != 0) { if (d->m_pPut_buf_func) { *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8*)d->m_pIn_buf; if (!(*d->m_pPut_buf_func)(d->m_output_buf, n, d->m_pPut_buf_user)) return (d->m_prev_return_status = TDEFL_STATUS_PUT_BUF_FAILED); } else if (pOutput_buf_start == d->m_output_buf) { int bytes_to_copy = (int)MZ_MIN((size_t)n, (size_t)(*d->m_pOut_buf_size - d->m_out_buf_ofs)); memcpy((mz_uint8*)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf, bytes_to_copy); d->m_out_buf_ofs += bytes_to_copy; if ((n -= bytes_to_copy) != 0) { d->m_output_flush_ofs = bytes_to_copy; d->m_output_flush_remaining = n; } } else { d->m_out_buf_ofs += n; } } return d->m_output_flush_remaining; } #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES #define TDEFL_READ_UNALIGNED_WORD(p) *(const mz_uint16*)(p) static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor* d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint* pMatch_dist, mz_uint* pMatch_len) { mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; const mz_uint16 *s = (const mz_uint16*)(d->m_dict + pos), *p, *q; mz_uint16 c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]), s01 = TDEFL_READ_UNALIGNED_WORD(s); MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; for (;;) { for (;;) { if (--num_probes_left == 0) return; #define TDEFL_PROBE \ next_probe_pos = d->m_next[probe_pos]; \ if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \ return; \ probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ if (TDEFL_READ_UNALIGNED_WORD(&d->m_dict[probe_pos + match_len - 1]) == c01) \ break; TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE; } if (!dist) break; q = (const mz_uint16*)(d->m_dict + probe_pos); if (TDEFL_READ_UNALIGNED_WORD(q) != s01) continue; p = s; probe_len = 32; do { } while ((TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0)); if (!probe_len) { *pMatch_dist = dist; *pMatch_len = MZ_MIN(max_match_len, TDEFL_MAX_MATCH_LEN); break; } else if ((probe_len = ((mz_uint)(p - s) * 2) + (mz_uint)(*(const mz_uint8*)p == *(const mz_uint8*)q)) > match_len) { *pMatch_dist = dist; if ((*pMatch_len = match_len = MZ_MIN(max_match_len, probe_len)) == max_match_len) break; c01 = TDEFL_READ_UNALIGNED_WORD(&d->m_dict[pos + match_len - 1]); } } } #else static MZ_FORCEINLINE void tdefl_find_match(tdefl_compressor* d, mz_uint lookahead_pos, mz_uint max_dist, mz_uint max_match_len, mz_uint* pMatch_dist, mz_uint* pMatch_len) { mz_uint dist, pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK, match_len = *pMatch_len, probe_pos = pos, next_probe_pos, probe_len; mz_uint num_probes_left = d->m_max_probes[match_len >= 32]; const mz_uint8 *s = d->m_dict + pos, *p, *q; mz_uint8 c0 = d->m_dict[pos + match_len], c1 = d->m_dict[pos + match_len - 1]; MZ_ASSERT(max_match_len <= TDEFL_MAX_MATCH_LEN); if (max_match_len <= match_len) return; for (;;) { for (;;) { if (--num_probes_left == 0) return; #define TDEFL_PROBE \ next_probe_pos = d->m_next[probe_pos]; \ if ((!next_probe_pos) || ((dist = (mz_uint16)(lookahead_pos - next_probe_pos)) > max_dist)) \ return; \ probe_pos = next_probe_pos & TDEFL_LZ_DICT_SIZE_MASK; \ if ((d->m_dict[probe_pos + match_len] == c0) && (d->m_dict[probe_pos + match_len - 1] == c1)) \ break; TDEFL_PROBE; TDEFL_PROBE; TDEFL_PROBE; } if (!dist) break; p = s; q = d->m_dict + probe_pos; for (probe_len = 0; probe_len < max_match_len; probe_len++) if (*p++ != *q++) break; if (probe_len > match_len) { *pMatch_dist = dist; if ((*pMatch_len = match_len = probe_len) == max_match_len) return; c0 = d->m_dict[pos + match_len]; c1 = d->m_dict[pos + match_len - 1]; } } } #endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN static mz_bool tdefl_compress_fast(tdefl_compressor* d) { // Faster, minimally featured LZRW1-style match+parse loop with better register utilization. Intended for applications where raw throughput is valued more highly than ratio. mz_uint lookahead_pos = d->m_lookahead_pos, lookahead_size = d->m_lookahead_size, dict_size = d->m_dict_size, total_lz_bytes = d->m_total_lz_bytes, num_flags_left = d->m_num_flags_left; mz_uint8 *pLZ_code_buf = d->m_pLZ_code_buf, *pLZ_flags = d->m_pLZ_flags; mz_uint cur_pos = lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; while ((d->m_src_buf_left) || ((d->m_flush) && (lookahead_size))) { const mz_uint TDEFL_COMP_FAST_LOOKAHEAD_SIZE = 4096; mz_uint dst_pos = (lookahead_pos + lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(d->m_src_buf_left, TDEFL_COMP_FAST_LOOKAHEAD_SIZE - lookahead_size); d->m_src_buf_left -= num_bytes_to_process; lookahead_size += num_bytes_to_process; while (num_bytes_to_process) { mz_uint32 n = MZ_MIN(TDEFL_LZ_DICT_SIZE - dst_pos, num_bytes_to_process); memcpy(d->m_dict + dst_pos, d->m_pSrc, n); if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) memcpy(d->m_dict + TDEFL_LZ_DICT_SIZE + dst_pos, d->m_pSrc, MZ_MIN(n, (TDEFL_MAX_MATCH_LEN - 1) - dst_pos)); d->m_pSrc += n; dst_pos = (dst_pos + n) & TDEFL_LZ_DICT_SIZE_MASK; num_bytes_to_process -= n; } dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - lookahead_size, dict_size); if ((!d->m_flush) && (lookahead_size < TDEFL_COMP_FAST_LOOKAHEAD_SIZE)) break; while (lookahead_size >= 4) { mz_uint cur_match_dist, cur_match_len = 1; mz_uint8* pCur_dict = d->m_dict + cur_pos; mz_uint first_trigram = (*(const mz_uint32*)pCur_dict) & 0xFFFFFF; mz_uint hash = (first_trigram ^ (first_trigram >> (24 - (TDEFL_LZ_HASH_BITS - 8)))) & TDEFL_LEVEL1_HASH_SIZE_MASK; mz_uint probe_pos = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)lookahead_pos; if (((cur_match_dist = (mz_uint16)(lookahead_pos - probe_pos)) <= dict_size) && ((*(const mz_uint32*)(d->m_dict + (probe_pos &= TDEFL_LZ_DICT_SIZE_MASK)) & 0xFFFFFF) == first_trigram)) { const mz_uint16* p = (const mz_uint16*)pCur_dict; const mz_uint16* q = (const mz_uint16*)(d->m_dict + probe_pos); mz_uint32 probe_len = 32; do { } while ((TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (TDEFL_READ_UNALIGNED_WORD(++p) == TDEFL_READ_UNALIGNED_WORD(++q)) && (--probe_len > 0)); cur_match_len = ((mz_uint)(p - (const mz_uint16*)pCur_dict) * 2) + (mz_uint)(*(const mz_uint8*)p == *(const mz_uint8*)q); if (!probe_len) cur_match_len = cur_match_dist ? TDEFL_MAX_MATCH_LEN : 0; if ((cur_match_len < TDEFL_MIN_MATCH_LEN) || ((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U * 1024U))) { cur_match_len = 1; *pLZ_code_buf++ = (mz_uint8)first_trigram; *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); d->m_huff_count[0][(mz_uint8)first_trigram]++; } else { mz_uint32 s0, s1; cur_match_len = MZ_MIN(cur_match_len, lookahead_size); MZ_ASSERT((cur_match_len >= TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 1) && (cur_match_dist <= TDEFL_LZ_DICT_SIZE)); cur_match_dist--; pLZ_code_buf[0] = (mz_uint8)(cur_match_len - TDEFL_MIN_MATCH_LEN); *(mz_uint16*)(&pLZ_code_buf[1]) = (mz_uint16)cur_match_dist; pLZ_code_buf += 3; *pLZ_flags = (mz_uint8)((*pLZ_flags >> 1) | 0x80); s0 = s_tdefl_small_dist_sym[cur_match_dist & 511]; s1 = s_tdefl_large_dist_sym[cur_match_dist >> 8]; d->m_huff_count[1][(cur_match_dist < 512) ? s0 : s1]++; d->m_huff_count[0][s_tdefl_len_sym[cur_match_len - TDEFL_MIN_MATCH_LEN]]++; } } else { *pLZ_code_buf++ = (mz_uint8)first_trigram; *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); d->m_huff_count[0][(mz_uint8)first_trigram]++; } if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; } total_lz_bytes += cur_match_len; lookahead_pos += cur_match_len; dict_size = MZ_MIN(dict_size + cur_match_len, TDEFL_LZ_DICT_SIZE); cur_pos = (cur_pos + cur_match_len) & TDEFL_LZ_DICT_SIZE_MASK; MZ_ASSERT(lookahead_size >= cur_match_len); lookahead_size -= cur_match_len; if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) { int n; d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; if ((n = tdefl_flush_block(d, 0)) != 0) return (n < 0) ? MZ_FALSE : MZ_TRUE; total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left; } } while (lookahead_size) { mz_uint8 lit = d->m_dict[cur_pos]; total_lz_bytes++; *pLZ_code_buf++ = lit; *pLZ_flags = (mz_uint8)(*pLZ_flags >> 1); if (--num_flags_left == 0) { num_flags_left = 8; pLZ_flags = pLZ_code_buf++; } d->m_huff_count[0][lit]++; lookahead_pos++; dict_size = MZ_MIN(dict_size + 1, TDEFL_LZ_DICT_SIZE); cur_pos = (cur_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; lookahead_size--; if (pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) { int n; d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; if ((n = tdefl_flush_block(d, 0)) != 0) return (n < 0) ? MZ_FALSE : MZ_TRUE; total_lz_bytes = d->m_total_lz_bytes; pLZ_code_buf = d->m_pLZ_code_buf; pLZ_flags = d->m_pLZ_flags; num_flags_left = d->m_num_flags_left; } } } d->m_lookahead_pos = lookahead_pos; d->m_lookahead_size = lookahead_size; d->m_dict_size = dict_size; d->m_total_lz_bytes = total_lz_bytes; d->m_pLZ_code_buf = pLZ_code_buf; d->m_pLZ_flags = pLZ_flags; d->m_num_flags_left = num_flags_left; return MZ_TRUE; } #endif // MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN static MZ_FORCEINLINE void tdefl_record_literal(tdefl_compressor* d, mz_uint8 lit) { d->m_total_lz_bytes++; *d->m_pLZ_code_buf++ = lit; *d->m_pLZ_flags = (mz_uint8)(*d->m_pLZ_flags >> 1); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; } d->m_huff_count[0][lit]++; } static MZ_FORCEINLINE void tdefl_record_match(tdefl_compressor* d, mz_uint match_len, mz_uint match_dist) { mz_uint32 s0, s1; MZ_ASSERT((match_len >= TDEFL_MIN_MATCH_LEN) && (match_dist >= 1) && (match_dist <= TDEFL_LZ_DICT_SIZE)); d->m_total_lz_bytes += match_len; d->m_pLZ_code_buf[0] = (mz_uint8)(match_len - TDEFL_MIN_MATCH_LEN); match_dist -= 1; d->m_pLZ_code_buf[1] = (mz_uint8)(match_dist & 0xFF); d->m_pLZ_code_buf[2] = (mz_uint8)(match_dist >> 8); d->m_pLZ_code_buf += 3; *d->m_pLZ_flags = (mz_uint8)((*d->m_pLZ_flags >> 1) | 0x80); if (--d->m_num_flags_left == 0) { d->m_num_flags_left = 8; d->m_pLZ_flags = d->m_pLZ_code_buf++; } s0 = s_tdefl_small_dist_sym[match_dist & 511]; s1 = s_tdefl_large_dist_sym[(match_dist >> 8) & 127]; d->m_huff_count[1][(match_dist < 512) ? s0 : s1]++; if (match_len >= TDEFL_MIN_MATCH_LEN) d->m_huff_count[0][s_tdefl_len_sym[match_len - TDEFL_MIN_MATCH_LEN]]++; } static mz_bool tdefl_compress_normal(tdefl_compressor* d) { const mz_uint8* pSrc = d->m_pSrc; size_t src_buf_left = d->m_src_buf_left; tdefl_flush flush = d->m_flush; while ((src_buf_left) || ((flush) && (d->m_lookahead_size))) { mz_uint len_to_move, cur_match_dist, cur_match_len, cur_pos; // Update dictionary and hash chains. Keeps the lookahead size equal to TDEFL_MAX_MATCH_LEN. if ((d->m_lookahead_size + d->m_dict_size) >= (TDEFL_MIN_MATCH_LEN - 1)) { mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK, ins_pos = d->m_lookahead_pos + d->m_lookahead_size - 2; mz_uint hash = (d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK]; mz_uint num_bytes_to_process = (mz_uint)MZ_MIN(src_buf_left, TDEFL_MAX_MATCH_LEN - d->m_lookahead_size); const mz_uint8* pSrc_end = pSrc + num_bytes_to_process; src_buf_left -= num_bytes_to_process; d->m_lookahead_size += num_bytes_to_process; while (pSrc != pSrc_end) { mz_uint8 c = *pSrc++; d->m_dict[dst_pos] = c; if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; hash = ((hash << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos); dst_pos = (dst_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK; ins_pos++; } } else { while ((src_buf_left) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) { mz_uint8 c = *pSrc++; mz_uint dst_pos = (d->m_lookahead_pos + d->m_lookahead_size) & TDEFL_LZ_DICT_SIZE_MASK; src_buf_left--; d->m_dict[dst_pos] = c; if (dst_pos < (TDEFL_MAX_MATCH_LEN - 1)) d->m_dict[TDEFL_LZ_DICT_SIZE + dst_pos] = c; if ((++d->m_lookahead_size + d->m_dict_size) >= TDEFL_MIN_MATCH_LEN) { mz_uint ins_pos = d->m_lookahead_pos + (d->m_lookahead_size - 1) - 2; mz_uint hash = ((d->m_dict[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] << (TDEFL_LZ_HASH_SHIFT * 2)) ^ (d->m_dict[(ins_pos + 1) & TDEFL_LZ_DICT_SIZE_MASK] << TDEFL_LZ_HASH_SHIFT) ^ c) & (TDEFL_LZ_HASH_SIZE - 1); d->m_next[ins_pos & TDEFL_LZ_DICT_SIZE_MASK] = d->m_hash[hash]; d->m_hash[hash] = (mz_uint16)(ins_pos); } } } d->m_dict_size = MZ_MIN(TDEFL_LZ_DICT_SIZE - d->m_lookahead_size, d->m_dict_size); if ((!flush) && (d->m_lookahead_size < TDEFL_MAX_MATCH_LEN)) break; // Simple lazy/greedy parsing state machine. len_to_move = 1; cur_match_dist = 0; cur_match_len = d->m_saved_match_len ? d->m_saved_match_len : (TDEFL_MIN_MATCH_LEN - 1); cur_pos = d->m_lookahead_pos & TDEFL_LZ_DICT_SIZE_MASK; if (d->m_flags & (TDEFL_RLE_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS)) { if ((d->m_dict_size) && (!(d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS))) { mz_uint8 c = d->m_dict[(cur_pos - 1) & TDEFL_LZ_DICT_SIZE_MASK]; cur_match_len = 0; while (cur_match_len < d->m_lookahead_size) { if (d->m_dict[cur_pos + cur_match_len] != c) break; cur_match_len++; } if (cur_match_len < TDEFL_MIN_MATCH_LEN) cur_match_len = 0; else cur_match_dist = 1; } } else { tdefl_find_match(d, d->m_lookahead_pos, d->m_dict_size, d->m_lookahead_size, &cur_match_dist, &cur_match_len); } if (((cur_match_len == TDEFL_MIN_MATCH_LEN) && (cur_match_dist >= 8U * 1024U)) || (cur_pos == cur_match_dist) || ((d->m_flags & TDEFL_FILTER_MATCHES) && (cur_match_len <= 5))) { cur_match_dist = cur_match_len = 0; } if (d->m_saved_match_len) { if (cur_match_len > d->m_saved_match_len) { tdefl_record_literal(d, (mz_uint8)d->m_saved_lit); if (cur_match_len >= 128) { tdefl_record_match(d, cur_match_len, cur_match_dist); d->m_saved_match_len = 0; len_to_move = cur_match_len; } else { d->m_saved_lit = d->m_dict[cur_pos]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len; } } else { tdefl_record_match(d, d->m_saved_match_len, d->m_saved_match_dist); len_to_move = d->m_saved_match_len - 1; d->m_saved_match_len = 0; } } else if (!cur_match_dist) tdefl_record_literal(d, d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]); else if ((d->m_greedy_parsing) || (d->m_flags & TDEFL_RLE_MATCHES) || (cur_match_len >= 128)) { tdefl_record_match(d, cur_match_len, cur_match_dist); len_to_move = cur_match_len; } else { d->m_saved_lit = d->m_dict[MZ_MIN(cur_pos, sizeof(d->m_dict) - 1)]; d->m_saved_match_dist = cur_match_dist; d->m_saved_match_len = cur_match_len; } // Move the lookahead forward by len_to_move bytes. d->m_lookahead_pos += len_to_move; MZ_ASSERT(d->m_lookahead_size >= len_to_move); d->m_lookahead_size -= len_to_move; d->m_dict_size = MZ_MIN(d->m_dict_size + len_to_move, TDEFL_LZ_DICT_SIZE); // Check if it's time to flush the current LZ codes to the internal output buffer. if ((d->m_pLZ_code_buf > &d->m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE - 8]) || ((d->m_total_lz_bytes > 31 * 1024) && (((((mz_uint)(d->m_pLZ_code_buf - d->m_lz_code_buf) * 115) >> 7) >= d->m_total_lz_bytes) || (d->m_flags & TDEFL_FORCE_ALL_RAW_BLOCKS)))) { int n; d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left; if ((n = tdefl_flush_block(d, 0)) != 0) return (n < 0) ? MZ_FALSE : MZ_TRUE; } } d->m_pSrc = pSrc; d->m_src_buf_left = src_buf_left; return MZ_TRUE; } static tdefl_status tdefl_flush_output_buffer(tdefl_compressor* d) { if (d->m_pIn_buf_size) { *d->m_pIn_buf_size = d->m_pSrc - (const mz_uint8*)d->m_pIn_buf; } if (d->m_pOut_buf_size) { size_t n = MZ_MIN(*d->m_pOut_buf_size - d->m_out_buf_ofs, d->m_output_flush_remaining); memcpy((mz_uint8*)d->m_pOut_buf + d->m_out_buf_ofs, d->m_output_buf + d->m_output_flush_ofs, n); d->m_output_flush_ofs += (mz_uint)n; d->m_output_flush_remaining -= (mz_uint)n; d->m_out_buf_ofs += n; *d->m_pOut_buf_size = d->m_out_buf_ofs; } return (d->m_finished && !d->m_output_flush_remaining) ? TDEFL_STATUS_DONE : TDEFL_STATUS_OKAY; } tdefl_status tdefl_compress(tdefl_compressor* d, const void* pIn_buf, size_t* pIn_buf_size, void* pOut_buf, size_t* pOut_buf_size, tdefl_flush flush) { if (!d) { if (pIn_buf_size) *pIn_buf_size = 0; if (pOut_buf_size) *pOut_buf_size = 0; return TDEFL_STATUS_BAD_PARAM; } d->m_pIn_buf = pIn_buf; d->m_pIn_buf_size = pIn_buf_size; d->m_pOut_buf = pOut_buf; d->m_pOut_buf_size = pOut_buf_size; d->m_pSrc = (const mz_uint8*)(pIn_buf); d->m_src_buf_left = pIn_buf_size ? *pIn_buf_size : 0; d->m_out_buf_ofs = 0; d->m_flush = flush; if (((d->m_pPut_buf_func != NULL) == ((pOut_buf != NULL) || (pOut_buf_size != NULL))) || (d->m_prev_return_status != TDEFL_STATUS_OKAY) || (d->m_wants_to_finish && (flush != TDEFL_FINISH)) || (pIn_buf_size && *pIn_buf_size && !pIn_buf) || (pOut_buf_size && *pOut_buf_size && !pOut_buf)) { if (pIn_buf_size) *pIn_buf_size = 0; if (pOut_buf_size) *pOut_buf_size = 0; return (d->m_prev_return_status = TDEFL_STATUS_BAD_PARAM); } d->m_wants_to_finish |= (flush == TDEFL_FINISH); if ((d->m_output_flush_remaining) || (d->m_finished)) return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN if (((d->m_flags & TDEFL_MAX_PROBES_MASK) == 1) && ((d->m_flags & TDEFL_GREEDY_PARSING_FLAG) != 0) && ((d->m_flags & (TDEFL_FILTER_MATCHES | TDEFL_FORCE_ALL_RAW_BLOCKS | TDEFL_RLE_MATCHES)) == 0)) { if (!tdefl_compress_fast(d)) return d->m_prev_return_status; } else #endif // #if MINIZ_USE_UNALIGNED_LOADS_AND_STORES && MINIZ_LITTLE_ENDIAN { if (!tdefl_compress_normal(d)) return d->m_prev_return_status; } if ((d->m_flags & (TDEFL_WRITE_ZLIB_HEADER | TDEFL_COMPUTE_ADLER32)) && (pIn_buf)) d->m_adler32 = (mz_uint32)mz_adler32(d->m_adler32, (const mz_uint8*)pIn_buf, d->m_pSrc - (const mz_uint8*)pIn_buf); if ((flush) && (!d->m_lookahead_size) && (!d->m_src_buf_left) && (!d->m_output_flush_remaining)) { if (tdefl_flush_block(d, flush) < 0) return d->m_prev_return_status; d->m_finished = (flush == TDEFL_FINISH); if (flush == TDEFL_FULL_FLUSH) { MZ_CLEAR_OBJ(d->m_hash); MZ_CLEAR_OBJ(d->m_next); d->m_dict_size = 0; } } return (d->m_prev_return_status = tdefl_flush_output_buffer(d)); } tdefl_status tdefl_compress_buffer(tdefl_compressor* d, const void* pIn_buf, size_t in_buf_size, tdefl_flush flush) { MZ_ASSERT(d->m_pPut_buf_func); return tdefl_compress(d, pIn_buf, &in_buf_size, NULL, NULL, flush); } tdefl_status tdefl_init(tdefl_compressor* d, tdefl_put_buf_func_ptr pPut_buf_func, void* pPut_buf_user, int flags) { d->m_pPut_buf_func = pPut_buf_func; d->m_pPut_buf_user = pPut_buf_user; d->m_flags = (mz_uint)(flags); d->m_max_probes[0] = 1 + ((flags & 0xFFF) + 2) / 3; d->m_greedy_parsing = (flags & TDEFL_GREEDY_PARSING_FLAG) != 0; d->m_max_probes[1] = 1 + (((flags & 0xFFF) >> 2) + 2) / 3; if (!(flags & TDEFL_NONDETERMINISTIC_PARSING_FLAG)) MZ_CLEAR_OBJ(d->m_hash); d->m_lookahead_pos = d->m_lookahead_size = d->m_dict_size = d->m_total_lz_bytes = d->m_lz_code_buf_dict_pos = d->m_bits_in = 0; d->m_output_flush_ofs = d->m_output_flush_remaining = d->m_finished = d->m_block_index = d->m_bit_buffer = d->m_wants_to_finish = 0; d->m_pLZ_code_buf = d->m_lz_code_buf + 1; d->m_pLZ_flags = d->m_lz_code_buf; d->m_num_flags_left = 8; d->m_pOutput_buf = d->m_output_buf; d->m_pOutput_buf_end = d->m_output_buf; d->m_prev_return_status = TDEFL_STATUS_OKAY; d->m_saved_match_dist = d->m_saved_match_len = d->m_saved_lit = 0; d->m_adler32 = 1; d->m_pIn_buf = NULL; d->m_pOut_buf = NULL; d->m_pIn_buf_size = NULL; d->m_pOut_buf_size = NULL; d->m_flush = TDEFL_NO_FLUSH; d->m_pSrc = NULL; d->m_src_buf_left = 0; d->m_out_buf_ofs = 0; memset(&d->m_huff_count[0][0], 0, sizeof(d->m_huff_count[0][0]) * TDEFL_MAX_HUFF_SYMBOLS_0); memset(&d->m_huff_count[1][0], 0, sizeof(d->m_huff_count[1][0]) * TDEFL_MAX_HUFF_SYMBOLS_1); return TDEFL_STATUS_OKAY; } tdefl_status tdefl_get_prev_return_status(tdefl_compressor* d) { return d->m_prev_return_status; } mz_uint32 tdefl_get_adler32(tdefl_compressor* d) { return d->m_adler32; } mz_bool tdefl_compress_mem_to_output(const void* pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void* pPut_buf_user, int flags) { tdefl_compressor* pComp; mz_bool succeeded; if (((buf_len) && (!pBuf)) || (!pPut_buf_func)) return MZ_FALSE; pComp = (tdefl_compressor*)MZ_MALLOC(sizeof(tdefl_compressor)); if (!pComp) return MZ_FALSE; succeeded = (tdefl_init(pComp, pPut_buf_func, pPut_buf_user, flags) == TDEFL_STATUS_OKAY); succeeded = succeeded && (tdefl_compress_buffer(pComp, pBuf, buf_len, TDEFL_FINISH) == TDEFL_STATUS_DONE); MZ_FREE(pComp); return succeeded; } typedef struct { size_t m_size, m_capacity; mz_uint8* m_pBuf; mz_bool m_expandable; } tdefl_output_buffer; static mz_bool tdefl_output_buffer_putter(const void* pBuf, int len, void* pUser) { tdefl_output_buffer* p = (tdefl_output_buffer*)pUser; size_t new_size = p->m_size + len; if (new_size > p->m_capacity) { size_t new_capacity = p->m_capacity; mz_uint8* pNew_buf; if (!p->m_expandable) return MZ_FALSE; do { new_capacity = MZ_MAX(128U, new_capacity << 1U); } while (new_size > new_capacity); pNew_buf = (mz_uint8*)MZ_REALLOC(p->m_pBuf, new_capacity); if (!pNew_buf) return MZ_FALSE; p->m_pBuf = pNew_buf; p->m_capacity = new_capacity; } memcpy((mz_uint8*)p->m_pBuf + p->m_size, pBuf, len); p->m_size = new_size; return MZ_TRUE; } void* tdefl_compress_mem_to_heap(const void* pSrc_buf, size_t src_buf_len, size_t* pOut_len, int flags) { tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf); if (!pOut_len) return MZ_FALSE; else *pOut_len = 0; out_buf.m_expandable = MZ_TRUE; if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return NULL; *pOut_len = out_buf.m_size; return out_buf.m_pBuf; } size_t tdefl_compress_mem_to_mem(void* pOut_buf, size_t out_buf_len, const void* pSrc_buf, size_t src_buf_len, int flags) { tdefl_output_buffer out_buf; MZ_CLEAR_OBJ(out_buf); if (!pOut_buf) return 0; out_buf.m_pBuf = (mz_uint8*)pOut_buf; out_buf.m_capacity = out_buf_len; if (!tdefl_compress_mem_to_output(pSrc_buf, src_buf_len, tdefl_output_buffer_putter, &out_buf, flags)) return 0; return out_buf.m_size; } #ifndef MINIZ_NO_ZLIB_APIS static const mz_uint s_tdefl_num_probes[11] = {0, 1, 6, 32, 16, 32, 128, 256, 512, 768, 1500}; // level may actually range from [0,10] (10 is a "hidden" max level, where we want a bit more compression and it's fine if throughput to fall off a cliff on some files). mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy) { mz_uint comp_flags = s_tdefl_num_probes[(level >= 0) ? MZ_MIN(10, level) : MZ_DEFAULT_LEVEL] | ((level <= 3) ? TDEFL_GREEDY_PARSING_FLAG : 0); if (window_bits > 0) comp_flags |= TDEFL_WRITE_ZLIB_HEADER; if (!level) comp_flags |= TDEFL_FORCE_ALL_RAW_BLOCKS; else if (strategy == MZ_FILTERED) comp_flags |= TDEFL_FILTER_MATCHES; else if (strategy == MZ_HUFFMAN_ONLY) comp_flags &= ~TDEFL_MAX_PROBES_MASK; else if (strategy == MZ_FIXED) comp_flags |= TDEFL_FORCE_ALL_STATIC_BLOCKS; else if (strategy == MZ_RLE) comp_flags |= TDEFL_RLE_MATCHES; return comp_flags; } #endif //MINIZ_NO_ZLIB_APIS #ifdef _MSC_VER #pragma warning(push) #pragma warning(disable : 4204) // nonstandard extension used : non-constant aggregate initializer (also supported by GNU C and C99, so no big deal) #endif // Simple PNG writer function by Alex Evans, 2011. Released into the public domain: https://gist.github.com/908299, more context at // http://altdevblogaday.org/2011/04/06/a-smaller-jpg-encoder/. void* tdefl_write_image_to_png_file_in_memory(const void* pImage, int w, int h, int num_chans, size_t* pLen_out) { tdefl_compressor* pComp = (tdefl_compressor*)MZ_MALLOC(sizeof(tdefl_compressor)); tdefl_output_buffer out_buf; int i, bpl = w * num_chans, y, z; mz_uint32 c; *pLen_out = 0; if (!pComp) return NULL; MZ_CLEAR_OBJ(out_buf); out_buf.m_expandable = MZ_TRUE; out_buf.m_capacity = 57 + MZ_MAX(64, (1 + bpl) * h); if (NULL == (out_buf.m_pBuf = (mz_uint8*)MZ_MALLOC(out_buf.m_capacity))) { MZ_FREE(pComp); return NULL; } // write dummy header for (z = 41; z; --z) tdefl_output_buffer_putter(&z, 1, &out_buf); // compress image data tdefl_init(pComp, tdefl_output_buffer_putter, &out_buf, TDEFL_DEFAULT_MAX_PROBES | TDEFL_WRITE_ZLIB_HEADER); for (y = 0; y < h; ++y) { tdefl_compress_buffer(pComp, &z, 1, TDEFL_NO_FLUSH); tdefl_compress_buffer(pComp, (mz_uint8*)pImage + y * bpl, bpl, TDEFL_NO_FLUSH); } if (tdefl_compress_buffer(pComp, NULL, 0, TDEFL_FINISH) != TDEFL_STATUS_DONE) { MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; } // write real header *pLen_out = out_buf.m_size - 41; { mz_uint8 pnghdr[41] = {0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, 0x00, 0x00, 0x00, 0x0d, 0x49, 0x48, 0x44, 0x52, 0, 0, (mz_uint8)(w >> 8), (mz_uint8)w, 0, 0, (mz_uint8)(h >> 8), (mz_uint8)h, 8, (mz_uint8)"\0\0\04\02\06"[num_chans], 0, 0, 0, 0, 0, 0, 0, (mz_uint8)(*pLen_out >> 24), (mz_uint8)(*pLen_out >> 16), (mz_uint8)(*pLen_out >> 8), (mz_uint8)*pLen_out, 0x49, 0x44, 0x41, 0x54}; c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, pnghdr + 12, 17); for (i = 0; i < 4; ++i, c <<= 8) ((mz_uint8*)(pnghdr + 29))[i] = (mz_uint8)(c >> 24); memcpy(out_buf.m_pBuf, pnghdr, 41); } // write footer (IDAT CRC-32, followed by IEND chunk) if (!tdefl_output_buffer_putter("\0\0\0\0\0\0\0\0\x49\x45\x4e\x44\xae\x42\x60\x82", 16, &out_buf)) { *pLen_out = 0; MZ_FREE(pComp); MZ_FREE(out_buf.m_pBuf); return NULL; } c = (mz_uint32)mz_crc32(MZ_CRC32_INIT, out_buf.m_pBuf + 41 - 4, *pLen_out + 4); for (i = 0; i < 4; ++i, c <<= 8) (out_buf.m_pBuf + out_buf.m_size - 16)[i] = (mz_uint8)(c >> 24); // compute final size of file, grab compressed data buffer and return *pLen_out += 57; MZ_FREE(pComp); return out_buf.m_pBuf; } #ifdef _MSC_VER #pragma warning(pop) #endif // ------------------- .ZIP archive reading #ifndef MINIZ_NO_ARCHIVE_APIS #ifdef MINIZ_NO_STDIO #define MZ_FILE void* #else #include #include #if defined(_MSC_VER) static FILE* mz_fopen(const char* pFilename, const char* pMode) { FILE* pFile = NULL; fopen_s(&pFile, pFilename, pMode); return pFile; } static FILE* mz_freopen(const char* pPath, const char* pMode, FILE* pStream) { FILE* pFile = NULL; if (freopen_s(&pFile, pPath, pMode, pStream)) return NULL; return pFile; } #else static FILE* mz_fopen(const char* pFilename, const char* pMode) { return fopen(pFilename, pMode); } static FILE* mz_freopen(const char* pPath, const char* pMode, FILE* pStream) { return freopen(pPath, pMode, pStream); } #endif // #if defined(_MSC_VER) #if defined(_MSC_VER) || defined(__MINGW64__) #ifndef MINIZ_NO_TIME #include #endif #define MZ_FILE FILE #define MZ_FOPEN mz_fopen #define MZ_FCLOSE fclose #define MZ_FREAD fread #define MZ_FWRITE fwrite #define MZ_FTELL64 _ftelli64 #define MZ_FSEEK64 _fseeki64 #define MZ_FILE_STAT_STRUCT _stat #define MZ_FILE_STAT _stat #define MZ_FFLUSH fflush #define MZ_FREOPEN mz_freopen #define MZ_DELETE_FILE remove #elif defined(__MINGW32__) #ifndef MINIZ_NO_TIME #include #endif #define MZ_FILE FILE #define MZ_FOPEN mz_fopen #define MZ_FCLOSE fclose #define MZ_FREAD fread #define MZ_FWRITE fwrite #define MZ_FTELL64 ftello64 #define MZ_FSEEK64 fseeko64 #define MZ_FILE_STAT_STRUCT _stat #define MZ_FILE_STAT _stat #define MZ_FFLUSH fflush #define MZ_FREOPEN mz_freopen #define MZ_DELETE_FILE remove #elif defined(__TINYC__) #ifndef MINIZ_NO_TIME #include #endif #define MZ_FILE FILE #define MZ_FOPEN mz_fopen #define MZ_FCLOSE fclose #define MZ_FREAD fread #define MZ_FWRITE fwrite #define MZ_FTELL64 ftell #define MZ_FSEEK64 fseek #define MZ_FILE_STAT_STRUCT stat #define MZ_FILE_STAT stat #define MZ_FFLUSH fflush #define MZ_FREOPEN mz_freopen #define MZ_DELETE_FILE remove #else #ifndef MINIZ_NO_TIME #include #endif #define MZ_FILE FILE #define MZ_FOPEN mz_fopen #define MZ_FCLOSE fclose #define MZ_FREAD fread #define MZ_FWRITE fwrite #define MZ_FTELL64 ftello #define MZ_FSEEK64 fseeko #define MZ_FILE_STAT_STRUCT stat #define MZ_FILE_STAT stat #define MZ_FFLUSH fflush #define MZ_FREOPEN mz_freopen #define MZ_DELETE_FILE remove #endif // #ifdef _MSC_VER #endif // #ifdef MINIZ_NO_STDIO #define MZ_TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) - 'A' + 'a') : (c)) // Various ZIP archive enums. To completely avoid cross platform compiler alignment and platform endian issues, miniz.c doesn't use structs for any of this stuff. enum { // ZIP archive identifiers and record sizes MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG = 0x06054b50, MZ_ZIP_CENTRAL_DIR_HEADER_SIG = 0x02014b50, MZ_ZIP_LOCAL_DIR_HEADER_SIG = 0x04034b50, MZ_ZIP_LOCAL_DIR_HEADER_SIZE = 30, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE = 46, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE = 22, // Central directory header record offsets MZ_ZIP_CDH_SIG_OFS = 0, MZ_ZIP_CDH_VERSION_MADE_BY_OFS = 4, MZ_ZIP_CDH_VERSION_NEEDED_OFS = 6, MZ_ZIP_CDH_BIT_FLAG_OFS = 8, MZ_ZIP_CDH_METHOD_OFS = 10, MZ_ZIP_CDH_FILE_TIME_OFS = 12, MZ_ZIP_CDH_FILE_DATE_OFS = 14, MZ_ZIP_CDH_CRC32_OFS = 16, MZ_ZIP_CDH_COMPRESSED_SIZE_OFS = 20, MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS = 24, MZ_ZIP_CDH_FILENAME_LEN_OFS = 28, MZ_ZIP_CDH_EXTRA_LEN_OFS = 30, MZ_ZIP_CDH_COMMENT_LEN_OFS = 32, MZ_ZIP_CDH_DISK_START_OFS = 34, MZ_ZIP_CDH_INTERNAL_ATTR_OFS = 36, MZ_ZIP_CDH_EXTERNAL_ATTR_OFS = 38, MZ_ZIP_CDH_LOCAL_HEADER_OFS = 42, // Local directory header offsets MZ_ZIP_LDH_SIG_OFS = 0, MZ_ZIP_LDH_VERSION_NEEDED_OFS = 4, MZ_ZIP_LDH_BIT_FLAG_OFS = 6, MZ_ZIP_LDH_METHOD_OFS = 8, MZ_ZIP_LDH_FILE_TIME_OFS = 10, MZ_ZIP_LDH_FILE_DATE_OFS = 12, MZ_ZIP_LDH_CRC32_OFS = 14, MZ_ZIP_LDH_COMPRESSED_SIZE_OFS = 18, MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS = 22, MZ_ZIP_LDH_FILENAME_LEN_OFS = 26, MZ_ZIP_LDH_EXTRA_LEN_OFS = 28, // End of central directory offsets MZ_ZIP_ECDH_SIG_OFS = 0, MZ_ZIP_ECDH_NUM_THIS_DISK_OFS = 4, MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS = 6, MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS = 8, MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS = 10, MZ_ZIP_ECDH_CDIR_SIZE_OFS = 12, MZ_ZIP_ECDH_CDIR_OFS_OFS = 16, MZ_ZIP_ECDH_COMMENT_SIZE_OFS = 20, }; typedef struct { void* m_p; size_t m_size, m_capacity; mz_uint m_element_size; } mz_zip_array; struct mz_zip_internal_state_tag { mz_zip_array m_central_dir; mz_zip_array m_central_dir_offsets; mz_zip_array m_sorted_central_dir_offsets; MZ_FILE* m_pFile; void* m_pMem; size_t m_mem_size; size_t m_mem_capacity; }; #define MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(array_ptr, element_size) (array_ptr)->m_element_size = element_size #define MZ_ZIP_ARRAY_ELEMENT(array_ptr, element_type, index) ((element_type*)((array_ptr)->m_p))[index] static MZ_FORCEINLINE void mz_zip_array_clear(mz_zip_archive* pZip, mz_zip_array* pArray) { pZip->m_pFree(pZip->m_pAlloc_opaque, pArray->m_p); memset(pArray, 0, sizeof(mz_zip_array)); } static mz_bool mz_zip_array_ensure_capacity(mz_zip_archive* pZip, mz_zip_array* pArray, size_t min_new_capacity, mz_uint growing) { void* pNew_p; size_t new_capacity = min_new_capacity; MZ_ASSERT(pArray->m_element_size); if (pArray->m_capacity >= min_new_capacity) return MZ_TRUE; if (growing) { new_capacity = MZ_MAX(1, pArray->m_capacity); while (new_capacity < min_new_capacity) new_capacity *= 2; } if (NULL == (pNew_p = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pArray->m_p, pArray->m_element_size, new_capacity))) return MZ_FALSE; pArray->m_p = pNew_p; pArray->m_capacity = new_capacity; return MZ_TRUE; } static MZ_FORCEINLINE mz_bool mz_zip_array_reserve(mz_zip_archive* pZip, mz_zip_array* pArray, size_t new_capacity, mz_uint growing) { if (new_capacity > pArray->m_capacity) { if (!mz_zip_array_ensure_capacity(pZip, pArray, new_capacity, growing)) return MZ_FALSE; } return MZ_TRUE; } static MZ_FORCEINLINE mz_bool mz_zip_array_resize(mz_zip_archive* pZip, mz_zip_array* pArray, size_t new_size, mz_uint growing) { if (new_size > pArray->m_capacity) { if (!mz_zip_array_ensure_capacity(pZip, pArray, new_size, growing)) return MZ_FALSE; } pArray->m_size = new_size; return MZ_TRUE; } static MZ_FORCEINLINE mz_bool mz_zip_array_ensure_room(mz_zip_archive* pZip, mz_zip_array* pArray, size_t n) { return mz_zip_array_reserve(pZip, pArray, pArray->m_size + n, MZ_TRUE); } static MZ_FORCEINLINE mz_bool mz_zip_array_push_back(mz_zip_archive* pZip, mz_zip_array* pArray, const void* pElements, size_t n) { size_t orig_size = pArray->m_size; if (!mz_zip_array_resize(pZip, pArray, orig_size + n, MZ_TRUE)) return MZ_FALSE; memcpy((mz_uint8*)pArray->m_p + orig_size * pArray->m_element_size, pElements, n * pArray->m_element_size); return MZ_TRUE; } #ifndef MINIZ_NO_TIME static time_t mz_zip_dos_to_time_t(int dos_time, int dos_date) { struct tm tm; memset(&tm, 0, sizeof(tm)); tm.tm_isdst = -1; tm.tm_year = ((dos_date >> 9) & 127) + 1980 - 1900; tm.tm_mon = ((dos_date >> 5) & 15) - 1; tm.tm_mday = dos_date & 31; tm.tm_hour = (dos_time >> 11) & 31; tm.tm_min = (dos_time >> 5) & 63; tm.tm_sec = (dos_time << 1) & 62; return mktime(&tm); } static void mz_zip_time_to_dos_time(time_t time, mz_uint16* pDOS_time, mz_uint16* pDOS_date) { #ifdef _MSC_VER struct tm tm_struct; struct tm* tm = &tm_struct; errno_t err = localtime_s(tm, &time); if (err) { *pDOS_date = 0; *pDOS_time = 0; return; } #else struct tm* tm = localtime(&time); #endif *pDOS_time = (mz_uint16)(((tm->tm_hour) << 11) + ((tm->tm_min) << 5) + ((tm->tm_sec) >> 1)); *pDOS_date = (mz_uint16)(((tm->tm_year + 1900 - 1980) << 9) + ((tm->tm_mon + 1) << 5) + tm->tm_mday); } #endif #ifndef MINIZ_NO_STDIO static mz_bool mz_zip_get_file_modified_time(const char* pFilename, mz_uint16* pDOS_time, mz_uint16* pDOS_date) { #ifdef MINIZ_NO_TIME (void)pFilename; *pDOS_date = *pDOS_time = 0; #else struct MZ_FILE_STAT_STRUCT file_stat; if (MZ_FILE_STAT(pFilename, &file_stat) != 0) return MZ_FALSE; mz_zip_time_to_dos_time(file_stat.st_mtime, pDOS_time, pDOS_date); #endif // #ifdef MINIZ_NO_TIME return MZ_TRUE; } static mz_bool mz_zip_set_file_times(const char* pFilename, time_t access_time, time_t modified_time) { #ifndef MINIZ_NO_TIME struct utimbuf t; t.actime = access_time; t.modtime = modified_time; return !utime(pFilename, &t); #else (void)pFilename, (void)access_time, (void)modified_time; return MZ_TRUE; #endif // #ifndef MINIZ_NO_TIME } #endif static mz_bool mz_zip_reader_init_internal(mz_zip_archive* pZip, mz_uint32 flags) { (void)flags; if ((!pZip) || (pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) return MZ_FALSE; if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func; if (!pZip->m_pFree) pZip->m_pFree = def_free_func; if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func; pZip->m_zip_mode = MZ_ZIP_MODE_READING; pZip->m_archive_size = 0; pZip->m_central_directory_file_ofs = 0; pZip->m_total_files = 0; if (NULL == (pZip->m_pState = (mz_zip_internal_state*)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) return MZ_FALSE; memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8)); MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32)); MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32)); return MZ_TRUE; } static MZ_FORCEINLINE mz_bool mz_zip_reader_filename_less(const mz_zip_array* pCentral_dir_array, const mz_zip_array* pCentral_dir_offsets, mz_uint l_index, mz_uint r_index) { const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE; const mz_uint8* pR = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, r_index)); mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS), r_len = MZ_READ_LE16(pR + MZ_ZIP_CDH_FILENAME_LEN_OFS); mz_uint8 l = 0, r = 0; pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; pR += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; pE = pL + MZ_MIN(l_len, r_len); while (pL < pE) { if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) break; pL++; pR++; } return (pL == pE) ? (l_len < r_len) : (l < r); } #define MZ_SWAP_UINT32(a, b) \ do { \ mz_uint32 t = a; \ a = b; \ b = t; \ } \ MZ_MACRO_END // Heap sort of lowercased filenames, used to help accelerate plain central directory searches by mz_zip_reader_locate_file(). (Could also use qsort(), but it could allocate memory.) static void mz_zip_reader_sort_central_dir_offsets_by_filename(mz_zip_archive* pZip) { mz_zip_internal_state* pState = pZip->m_pState; const mz_zip_array* pCentral_dir_offsets = &pState->m_central_dir_offsets; const mz_zip_array* pCentral_dir = &pState->m_central_dir; mz_uint32* pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0); const int size = pZip->m_total_files; int start = (size - 2) >> 1, end; while (start >= 0) { int child, root = start; for (;;) { if ((child = (root << 1) + 1) >= size) break; child += (((child + 1) < size) && (mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1]))); if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child])) break; MZ_SWAP_UINT32(pIndices[root], pIndices[child]); root = child; } start--; } end = size - 1; while (end > 0) { int child, root = 0; MZ_SWAP_UINT32(pIndices[end], pIndices[0]); for (;;) { if ((child = (root << 1) + 1) >= end) break; child += (((child + 1) < end) && mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[child], pIndices[child + 1])); if (!mz_zip_reader_filename_less(pCentral_dir, pCentral_dir_offsets, pIndices[root], pIndices[child])) break; MZ_SWAP_UINT32(pIndices[root], pIndices[child]); root = child; } end--; } } static mz_bool mz_zip_reader_read_central_dir(mz_zip_archive* pZip, mz_uint32 flags) { mz_uint cdir_size, num_this_disk, cdir_disk_index; mz_uint64 cdir_ofs; mz_int64 cur_file_ofs; const mz_uint8* p; mz_uint32 buf_u32[4096 / sizeof(mz_uint32)]; mz_uint8* pBuf = (mz_uint8*)buf_u32; // Basic sanity checks - reject files which are too small, and check the first 4 bytes of the file to make sure a local header is there. if (pZip->m_archive_size < MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) return MZ_FALSE; // Find the end of central directory record by scanning the file from the end towards the beginning. cur_file_ofs = MZ_MAX((mz_int64)pZip->m_archive_size - (mz_int64)sizeof(buf_u32), 0); for (;;) { int i, n = (int)MZ_MIN(sizeof(buf_u32), pZip->m_archive_size - cur_file_ofs); if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, n) != (mz_uint)n) return MZ_FALSE; for (i = n - 4; i >= 0; --i) if (MZ_READ_LE32(pBuf + i) == MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) break; if (i >= 0) { cur_file_ofs += i; break; } if ((!cur_file_ofs) || ((pZip->m_archive_size - cur_file_ofs) >= (0xFFFF + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE))) return MZ_FALSE; cur_file_ofs = MZ_MAX(cur_file_ofs - (sizeof(buf_u32) - 3), 0); } // Read and verify the end of central directory record. if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) return MZ_FALSE; if ((MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_SIG_OFS) != MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG) || ((pZip->m_total_files = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS)) != MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS))) return MZ_FALSE; num_this_disk = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_THIS_DISK_OFS); cdir_disk_index = MZ_READ_LE16(pBuf + MZ_ZIP_ECDH_NUM_DISK_CDIR_OFS); if (((num_this_disk | cdir_disk_index) != 0) && ((num_this_disk != 1) || (cdir_disk_index != 1))) return MZ_FALSE; if ((cdir_size = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_SIZE_OFS)) < pZip->m_total_files * MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) return MZ_FALSE; cdir_ofs = MZ_READ_LE32(pBuf + MZ_ZIP_ECDH_CDIR_OFS_OFS); if ((cdir_ofs + (mz_uint64)cdir_size) > pZip->m_archive_size) return MZ_FALSE; pZip->m_central_directory_file_ofs = cdir_ofs; if (pZip->m_total_files) { mz_uint i, n; // Read the entire central directory into a heap block, and allocate another heap block to hold the unsorted central dir file record offsets, and another to hold the sorted indices. if ((!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir, cdir_size, MZ_FALSE)) || (!mz_zip_array_resize(pZip, &pZip->m_pState->m_central_dir_offsets, pZip->m_total_files, MZ_FALSE)) || (!mz_zip_array_resize(pZip, &pZip->m_pState->m_sorted_central_dir_offsets, pZip->m_total_files, MZ_FALSE))) return MZ_FALSE; if (pZip->m_pRead(pZip->m_pIO_opaque, cdir_ofs, pZip->m_pState->m_central_dir.m_p, cdir_size) != cdir_size) return MZ_FALSE; // Now create an index into the central directory file records, do some basic sanity checking on each record, and check for zip64 entries (which are not yet supported). p = (const mz_uint8*)pZip->m_pState->m_central_dir.m_p; for (n = cdir_size, i = 0; i < pZip->m_total_files; ++i) { mz_uint total_header_size, comp_size, decomp_size, disk_index; if ((n < MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) || (MZ_READ_LE32(p) != MZ_ZIP_CENTRAL_DIR_HEADER_SIG)) return MZ_FALSE; MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, i) = (mz_uint32)(p - (const mz_uint8*)pZip->m_pState->m_central_dir.m_p); MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_sorted_central_dir_offsets, mz_uint32, i) = i; comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); decomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); if (((!MZ_READ_LE32(p + MZ_ZIP_CDH_METHOD_OFS)) && (decomp_size != comp_size)) || (decomp_size && !comp_size) || (decomp_size == 0xFFFFFFFF) || (comp_size == 0xFFFFFFFF)) return MZ_FALSE; disk_index = MZ_READ_LE16(p + MZ_ZIP_CDH_DISK_START_OFS); if ((disk_index != num_this_disk) && (disk_index != 1)) return MZ_FALSE; if (((mz_uint64)MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS) + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + comp_size) > pZip->m_archive_size) return MZ_FALSE; if ((total_header_size = MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS)) > n) return MZ_FALSE; n -= total_header_size; p += total_header_size; } } if ((flags & MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY) == 0) mz_zip_reader_sort_central_dir_offsets_by_filename(pZip); return MZ_TRUE; } mz_bool mz_zip_reader_init(mz_zip_archive* pZip, mz_uint64 size, mz_uint32 flags) { if ((!pZip) || (!pZip->m_pRead)) return MZ_FALSE; if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE; pZip->m_archive_size = size; if (!mz_zip_reader_read_central_dir(pZip, flags)) { mz_zip_reader_end(pZip); return MZ_FALSE; } return MZ_TRUE; } static size_t mz_zip_mem_read_func(void* pOpaque, mz_uint64 file_ofs, void* pBuf, size_t n) { mz_zip_archive* pZip = (mz_zip_archive*)pOpaque; size_t s = (file_ofs >= pZip->m_archive_size) ? 0 : (size_t)MZ_MIN(pZip->m_archive_size - file_ofs, n); memcpy(pBuf, (const mz_uint8*)pZip->m_pState->m_pMem + file_ofs, s); return s; } mz_bool mz_zip_reader_init_mem(mz_zip_archive* pZip, const void* pMem, size_t size, mz_uint32 flags) { if (!mz_zip_reader_init_internal(pZip, flags)) return MZ_FALSE; pZip->m_archive_size = size; pZip->m_pRead = mz_zip_mem_read_func; pZip->m_pIO_opaque = pZip; pZip->m_pState->m_pMem = (void*)pMem; pZip->m_pState->m_mem_size = size; if (!mz_zip_reader_read_central_dir(pZip, flags)) { mz_zip_reader_end(pZip); return MZ_FALSE; } return MZ_TRUE; } #ifndef MINIZ_NO_STDIO static size_t mz_zip_file_read_func(void* pOpaque, mz_uint64 file_ofs, void* pBuf, size_t n) { mz_zip_archive* pZip = (mz_zip_archive*)pOpaque; mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) return 0; return MZ_FREAD(pBuf, 1, n, pZip->m_pState->m_pFile); } mz_bool mz_zip_reader_init_file(mz_zip_archive* pZip, const char* pFilename, mz_uint32 flags) { mz_uint64 file_size; MZ_FILE* pFile = MZ_FOPEN(pFilename, "rb"); if (!pFile) return MZ_FALSE; if (MZ_FSEEK64(pFile, 0, SEEK_END)) return MZ_FALSE; file_size = MZ_FTELL64(pFile); if (!mz_zip_reader_init_internal(pZip, flags)) { MZ_FCLOSE(pFile); return MZ_FALSE; } pZip->m_pRead = mz_zip_file_read_func; pZip->m_pIO_opaque = pZip; pZip->m_pState->m_pFile = pFile; pZip->m_archive_size = file_size; if (!mz_zip_reader_read_central_dir(pZip, flags)) { mz_zip_reader_end(pZip); return MZ_FALSE; } return MZ_TRUE; } #endif // #ifndef MINIZ_NO_STDIO mz_uint mz_zip_reader_get_num_files(mz_zip_archive* pZip) { return pZip ? pZip->m_total_files : 0; } static MZ_FORCEINLINE const mz_uint8* mz_zip_reader_get_cdh(mz_zip_archive* pZip, mz_uint file_index) { if ((!pZip) || (!pZip->m_pState) || (file_index >= pZip->m_total_files) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) return NULL; return &MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index)); } mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive* pZip, mz_uint file_index) { mz_uint m_bit_flag; const mz_uint8* p = mz_zip_reader_get_cdh(pZip, file_index); if (!p) return MZ_FALSE; m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); return (m_bit_flag & 1); } mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive* pZip, mz_uint file_index) { mz_uint filename_len, internal_attr, external_attr; const mz_uint8* p = mz_zip_reader_get_cdh(pZip, file_index); if (!p) return MZ_FALSE; internal_attr = MZ_READ_LE16(p + MZ_ZIP_CDH_INTERNAL_ATTR_OFS); external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); if ((!internal_attr) && ((external_attr & 0x10) != 0)) return MZ_TRUE; filename_len = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); if (filename_len) { if (*(p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_len - 1) == '/') return MZ_TRUE; } return MZ_FALSE; } mz_bool mz_zip_reader_file_stat(mz_zip_archive* pZip, mz_uint file_index, mz_zip_archive_file_stat* pStat) { mz_uint n; const mz_uint8* p = mz_zip_reader_get_cdh(pZip, file_index); if ((!p) || (!pStat)) return MZ_FALSE; // Unpack the central directory record. pStat->m_file_index = file_index; pStat->m_central_dir_ofs = MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index); pStat->m_version_made_by = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_MADE_BY_OFS); pStat->m_version_needed = MZ_READ_LE16(p + MZ_ZIP_CDH_VERSION_NEEDED_OFS); pStat->m_bit_flag = MZ_READ_LE16(p + MZ_ZIP_CDH_BIT_FLAG_OFS); pStat->m_method = MZ_READ_LE16(p + MZ_ZIP_CDH_METHOD_OFS); #ifndef MINIZ_NO_TIME pStat->m_time = mz_zip_dos_to_time_t(MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_TIME_OFS), MZ_READ_LE16(p + MZ_ZIP_CDH_FILE_DATE_OFS)); #endif pStat->m_crc32 = MZ_READ_LE32(p + MZ_ZIP_CDH_CRC32_OFS); pStat->m_comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); pStat->m_uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); pStat->m_internal_attr = MZ_READ_LE16(p + MZ_ZIP_CDH_INTERNAL_ATTR_OFS); pStat->m_external_attr = MZ_READ_LE32(p + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS); pStat->m_local_header_ofs = MZ_READ_LE32(p + MZ_ZIP_CDH_LOCAL_HEADER_OFS); // Copy as much of the filename and comment as possible. n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE - 1); memcpy(pStat->m_filename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); pStat->m_filename[n] = '\0'; n = MZ_READ_LE16(p + MZ_ZIP_CDH_COMMENT_LEN_OFS); n = MZ_MIN(n, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE - 1); pStat->m_comment_size = n; memcpy(pStat->m_comment, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(p + MZ_ZIP_CDH_EXTRA_LEN_OFS), n); pStat->m_comment[n] = '\0'; return MZ_TRUE; } mz_uint mz_zip_reader_get_filename(mz_zip_archive* pZip, mz_uint file_index, char* pFilename, mz_uint filename_buf_size) { mz_uint n; const mz_uint8* p = mz_zip_reader_get_cdh(pZip, file_index); if (!p) { if (filename_buf_size) pFilename[0] = '\0'; return 0; } n = MZ_READ_LE16(p + MZ_ZIP_CDH_FILENAME_LEN_OFS); if (filename_buf_size) { n = MZ_MIN(n, filename_buf_size - 1); memcpy(pFilename, p + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n); pFilename[n] = '\0'; } return n + 1; } static MZ_FORCEINLINE mz_bool mz_zip_reader_string_equal(const char* pA, const char* pB, mz_uint len, mz_uint flags) { mz_uint i; if (flags & MZ_ZIP_FLAG_CASE_SENSITIVE) return 0 == memcmp(pA, pB, len); for (i = 0; i < len; ++i) if (MZ_TOLOWER(pA[i]) != MZ_TOLOWER(pB[i])) return MZ_FALSE; return MZ_TRUE; } static MZ_FORCEINLINE int mz_zip_reader_filename_compare(const mz_zip_array* pCentral_dir_array, const mz_zip_array* pCentral_dir_offsets, mz_uint l_index, const char* pR, mz_uint r_len) { const mz_uint8 *pL = &MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_array, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(pCentral_dir_offsets, mz_uint32, l_index)), *pE; mz_uint l_len = MZ_READ_LE16(pL + MZ_ZIP_CDH_FILENAME_LEN_OFS); mz_uint8 l = 0, r = 0; pL += MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; pE = pL + MZ_MIN(l_len, r_len); while (pL < pE) { if ((l = MZ_TOLOWER(*pL)) != (r = MZ_TOLOWER(*pR))) break; pL++; pR++; } return (pL == pE) ? (int)(l_len - r_len) : (l - r); } static int mz_zip_reader_locate_file_binary_search(mz_zip_archive* pZip, const char* pFilename) { mz_zip_internal_state* pState = pZip->m_pState; const mz_zip_array* pCentral_dir_offsets = &pState->m_central_dir_offsets; const mz_zip_array* pCentral_dir = &pState->m_central_dir; mz_uint32* pIndices = &MZ_ZIP_ARRAY_ELEMENT(&pState->m_sorted_central_dir_offsets, mz_uint32, 0); const int size = pZip->m_total_files; const mz_uint filename_len = (mz_uint)strlen(pFilename); int l = 0, h = size - 1; while (l <= h) { int m = (l + h) >> 1, file_index = pIndices[m], comp = mz_zip_reader_filename_compare(pCentral_dir, pCentral_dir_offsets, file_index, pFilename, filename_len); if (!comp) return file_index; else if (comp < 0) l = m + 1; else h = m - 1; } return -1; } int mz_zip_reader_locate_file(mz_zip_archive* pZip, const char* pName, const char* pComment, mz_uint flags) { mz_uint file_index; size_t name_len, comment_len; if ((!pZip) || (!pZip->m_pState) || (!pName) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) return -1; if (((flags & (MZ_ZIP_FLAG_IGNORE_PATH | MZ_ZIP_FLAG_CASE_SENSITIVE)) == 0) && (!pComment) && (pZip->m_pState->m_sorted_central_dir_offsets.m_p)) return mz_zip_reader_locate_file_binary_search(pZip, pName); name_len = strlen(pName); if (name_len > 0xFFFF) return -1; comment_len = pComment ? strlen(pComment) : 0; if (comment_len > 0xFFFF) return -1; for (file_index = 0; file_index < pZip->m_total_files; file_index++) { const mz_uint8* pHeader = &MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir, mz_uint8, MZ_ZIP_ARRAY_ELEMENT(&pZip->m_pState->m_central_dir_offsets, mz_uint32, file_index)); mz_uint filename_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_FILENAME_LEN_OFS); const char* pFilename = (const char*)pHeader + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE; if (filename_len < name_len) continue; if (comment_len) { mz_uint file_extra_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_EXTRA_LEN_OFS), file_comment_len = MZ_READ_LE16(pHeader + MZ_ZIP_CDH_COMMENT_LEN_OFS); const char* pFile_comment = pFilename + filename_len + file_extra_len; if ((file_comment_len != comment_len) || (!mz_zip_reader_string_equal(pComment, pFile_comment, file_comment_len, flags))) continue; } if ((flags & MZ_ZIP_FLAG_IGNORE_PATH) && (filename_len)) { int ofs = filename_len - 1; do { if ((pFilename[ofs] == '/') || (pFilename[ofs] == '\\') || (pFilename[ofs] == ':')) break; } while (--ofs >= 0); ofs++; pFilename += ofs; filename_len -= ofs; } if ((filename_len == name_len) && (mz_zip_reader_string_equal(pName, pFilename, filename_len, flags))) return file_index; } return -1; } mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive* pZip, mz_uint file_index, void* pBuf, size_t buf_size, mz_uint flags, void* pUser_read_buf, size_t user_read_buf_size) { int status = TINFL_STATUS_DONE; mz_uint64 needed_size, cur_file_ofs, comp_remaining, out_buf_ofs = 0, read_buf_size, read_buf_ofs = 0, read_buf_avail; mz_zip_archive_file_stat file_stat; void* pRead_buf; mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8* pLocal_header = (mz_uint8*)local_header_u32; tinfl_decompressor inflator; if ((buf_size) && (!pBuf)) return MZ_FALSE; if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; if (!file_stat.m_comp_size) return MZ_TRUE; // Encryption and patch files are not supported. if (file_stat.m_bit_flag & (1 | 32)) return MZ_FALSE; // This function only supports stored and deflate. if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED)) return MZ_FALSE; // Ensure supplied output buffer is large enough. needed_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? file_stat.m_comp_size : file_stat.m_uncomp_size; if (buf_size < needed_size) return MZ_FALSE; // Read and parse the local directory entry. cur_file_ofs = file_stat.m_local_header_ofs; if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) return MZ_FALSE; if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) return MZ_FALSE; cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) return MZ_FALSE; if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) { // The file is stored or the caller has requested the compressed data. if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pBuf, (size_t)needed_size) != needed_size) return MZ_FALSE; return ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) != 0) || (mz_crc32(MZ_CRC32_INIT, (const mz_uint8*)pBuf, (size_t)file_stat.m_uncomp_size) == file_stat.m_crc32); } // Decompress the file either directly from memory or from a file input buffer. tinfl_init(&inflator); if (pZip->m_pState->m_pMem) { // Read directly from the archive in memory. pRead_buf = (mz_uint8*)pZip->m_pState->m_pMem + cur_file_ofs; read_buf_size = read_buf_avail = file_stat.m_comp_size; comp_remaining = 0; } else if (pUser_read_buf) { // Use a user provided read buffer. if (!user_read_buf_size) return MZ_FALSE; pRead_buf = (mz_uint8*)pUser_read_buf; read_buf_size = user_read_buf_size; read_buf_avail = 0; comp_remaining = file_stat.m_uncomp_size; } else { // Temporarily allocate a read buffer. read_buf_size = MZ_MIN(file_stat.m_comp_size, MZ_ZIP_MAX_IO_BUF_SIZE); #ifdef _MSC_VER if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF)) #else if (((sizeof(size_t) == sizeof(mz_uint32))) && (read_buf_size > 0x7FFFFFFF)) #endif return MZ_FALSE; if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)read_buf_size))) return MZ_FALSE; read_buf_avail = 0; comp_remaining = file_stat.m_comp_size; } do { size_t in_buf_size, out_buf_size = (size_t)(file_stat.m_uncomp_size - out_buf_ofs); if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) { read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) { status = TINFL_STATUS_FAILED; break; } cur_file_ofs += read_buf_avail; comp_remaining -= read_buf_avail; read_buf_ofs = 0; } in_buf_size = (size_t)read_buf_avail; status = tinfl_decompress(&inflator, (mz_uint8*)pRead_buf + read_buf_ofs, &in_buf_size, (mz_uint8*)pBuf, (mz_uint8*)pBuf + out_buf_ofs, &out_buf_size, TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF | (comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0)); read_buf_avail -= in_buf_size; read_buf_ofs += in_buf_size; out_buf_ofs += out_buf_size; } while (status == TINFL_STATUS_NEEDS_MORE_INPUT); if (status == TINFL_STATUS_DONE) { // Make sure the entire file was decompressed, and check its CRC. if ((out_buf_ofs != file_stat.m_uncomp_size) || (mz_crc32(MZ_CRC32_INIT, (const mz_uint8*)pBuf, (size_t)file_stat.m_uncomp_size) != file_stat.m_crc32)) status = TINFL_STATUS_FAILED; } if ((!pZip->m_pState->m_pMem) && (!pUser_read_buf)) pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); return status == TINFL_STATUS_DONE; } mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive* pZip, const char* pFilename, void* pBuf, size_t buf_size, mz_uint flags, void* pUser_read_buf, size_t user_read_buf_size) { int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); if (file_index < 0) return MZ_FALSE; return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, flags, pUser_read_buf, user_read_buf_size); } mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive* pZip, mz_uint file_index, void* pBuf, size_t buf_size, mz_uint flags) { return mz_zip_reader_extract_to_mem_no_alloc(pZip, file_index, pBuf, buf_size, flags, NULL, 0); } mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive* pZip, const char* pFilename, void* pBuf, size_t buf_size, mz_uint flags) { return mz_zip_reader_extract_file_to_mem_no_alloc(pZip, pFilename, pBuf, buf_size, flags, NULL, 0); } void* mz_zip_reader_extract_to_heap(mz_zip_archive* pZip, mz_uint file_index, size_t* pSize, mz_uint flags) { mz_uint64 comp_size, uncomp_size, alloc_size; const mz_uint8* p = mz_zip_reader_get_cdh(pZip, file_index); void* pBuf; if (pSize) *pSize = 0; if (!p) return NULL; comp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); uncomp_size = MZ_READ_LE32(p + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS); alloc_size = (flags & MZ_ZIP_FLAG_COMPRESSED_DATA) ? comp_size : uncomp_size; #ifdef _MSC_VER if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) #else if (((sizeof(size_t) == sizeof(mz_uint32))) && (alloc_size > 0x7FFFFFFF)) #endif return NULL; if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)alloc_size))) return NULL; if (!mz_zip_reader_extract_to_mem(pZip, file_index, pBuf, (size_t)alloc_size, flags)) { pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); return NULL; } if (pSize) *pSize = (size_t)alloc_size; return pBuf; } void* mz_zip_reader_extract_file_to_heap(mz_zip_archive* pZip, const char* pFilename, size_t* pSize, mz_uint flags) { int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); if (file_index < 0) { if (pSize) *pSize = 0; return MZ_FALSE; } return mz_zip_reader_extract_to_heap(pZip, file_index, pSize, flags); } mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive* pZip, mz_uint file_index, mz_file_write_func pCallback, void* pOpaque, mz_uint flags) { int status = TINFL_STATUS_DONE; mz_uint file_crc32 = MZ_CRC32_INIT; mz_uint64 read_buf_size, read_buf_ofs = 0, read_buf_avail, comp_remaining, out_buf_ofs = 0, cur_file_ofs; mz_zip_archive_file_stat file_stat; void* pRead_buf = NULL; void* pWrite_buf = NULL; mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8* pLocal_header = (mz_uint8*)local_header_u32; if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; if (!file_stat.m_comp_size) return MZ_TRUE; // Encryption and patch files are not supported. if (file_stat.m_bit_flag & (1 | 32)) return MZ_FALSE; // This function only supports stored and deflate. if ((!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (file_stat.m_method != 0) && (file_stat.m_method != MZ_DEFLATED)) return MZ_FALSE; // Read and parse the local directory entry. cur_file_ofs = file_stat.m_local_header_ofs; if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) return MZ_FALSE; if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) return MZ_FALSE; cur_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); if ((cur_file_ofs + file_stat.m_comp_size) > pZip->m_archive_size) return MZ_FALSE; // Decompress the file either directly from memory or from a file input buffer. if (pZip->m_pState->m_pMem) { pRead_buf = (mz_uint8*)pZip->m_pState->m_pMem + cur_file_ofs; read_buf_size = read_buf_avail = file_stat.m_comp_size; comp_remaining = 0; } else { read_buf_size = MZ_MIN(file_stat.m_comp_size, MZ_ZIP_MAX_IO_BUF_SIZE); if (NULL == (pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)read_buf_size))) return MZ_FALSE; read_buf_avail = 0; comp_remaining = file_stat.m_comp_size; } if ((flags & MZ_ZIP_FLAG_COMPRESSED_DATA) || (!file_stat.m_method)) { // The file is stored or the caller has requested the compressed data. if (pZip->m_pState->m_pMem) { #ifdef _MSC_VER if (((0, sizeof(size_t) == sizeof(mz_uint32))) && (file_stat.m_comp_size > 0xFFFFFFFF)) #else if (((sizeof(size_t) == sizeof(mz_uint32))) && (file_stat.m_comp_size > 0xFFFFFFFF)) #endif return MZ_FALSE; if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t)file_stat.m_comp_size) != file_stat.m_comp_size) status = TINFL_STATUS_FAILED; else if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) file_crc32 = (mz_uint32)mz_crc32(file_crc32, (const mz_uint8*)pRead_buf, (size_t)file_stat.m_comp_size); cur_file_ofs += file_stat.m_comp_size; out_buf_ofs += file_stat.m_comp_size; comp_remaining = 0; } else { while (comp_remaining) { read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) { status = TINFL_STATUS_FAILED; break; } if (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) file_crc32 = (mz_uint32)mz_crc32(file_crc32, (const mz_uint8*)pRead_buf, (size_t)read_buf_avail); if (pCallback(pOpaque, out_buf_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) { status = TINFL_STATUS_FAILED; break; } cur_file_ofs += read_buf_avail; out_buf_ofs += read_buf_avail; comp_remaining -= read_buf_avail; } } } else { tinfl_decompressor inflator; tinfl_init(&inflator); if (NULL == (pWrite_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, TINFL_LZ_DICT_SIZE))) status = TINFL_STATUS_FAILED; else { do { mz_uint8* pWrite_buf_cur = (mz_uint8*)pWrite_buf + (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); size_t in_buf_size, out_buf_size = TINFL_LZ_DICT_SIZE - (out_buf_ofs & (TINFL_LZ_DICT_SIZE - 1)); if ((!read_buf_avail) && (!pZip->m_pState->m_pMem)) { read_buf_avail = MZ_MIN(read_buf_size, comp_remaining); if (pZip->m_pRead(pZip->m_pIO_opaque, cur_file_ofs, pRead_buf, (size_t)read_buf_avail) != read_buf_avail) { status = TINFL_STATUS_FAILED; break; } cur_file_ofs += read_buf_avail; comp_remaining -= read_buf_avail; read_buf_ofs = 0; } in_buf_size = (size_t)read_buf_avail; status = tinfl_decompress(&inflator, (const mz_uint8*)pRead_buf + read_buf_ofs, &in_buf_size, (mz_uint8*)pWrite_buf, pWrite_buf_cur, &out_buf_size, comp_remaining ? TINFL_FLAG_HAS_MORE_INPUT : 0); read_buf_avail -= in_buf_size; read_buf_ofs += in_buf_size; if (out_buf_size) { if (pCallback(pOpaque, out_buf_ofs, pWrite_buf_cur, out_buf_size) != out_buf_size) { status = TINFL_STATUS_FAILED; break; } file_crc32 = (mz_uint32)mz_crc32(file_crc32, pWrite_buf_cur, out_buf_size); if ((out_buf_ofs += out_buf_size) > file_stat.m_uncomp_size) { status = TINFL_STATUS_FAILED; break; } } } while ((status == TINFL_STATUS_NEEDS_MORE_INPUT) || (status == TINFL_STATUS_HAS_MORE_OUTPUT)); } } if ((status == TINFL_STATUS_DONE) && (!(flags & MZ_ZIP_FLAG_COMPRESSED_DATA))) { // Make sure the entire file was decompressed, and check its CRC. if ((out_buf_ofs != file_stat.m_uncomp_size) || (file_crc32 != file_stat.m_crc32)) status = TINFL_STATUS_FAILED; } if (!pZip->m_pState->m_pMem) pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); if (pWrite_buf) pZip->m_pFree(pZip->m_pAlloc_opaque, pWrite_buf); return status == TINFL_STATUS_DONE; } mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive* pZip, const char* pFilename, mz_file_write_func pCallback, void* pOpaque, mz_uint flags) { int file_index = mz_zip_reader_locate_file(pZip, pFilename, NULL, flags); if (file_index < 0) return MZ_FALSE; return mz_zip_reader_extract_to_callback(pZip, file_index, pCallback, pOpaque, flags); } #ifndef MINIZ_NO_STDIO static size_t mz_zip_file_write_callback(void* pOpaque, mz_uint64 ofs, const void* pBuf, size_t n) { (void)ofs; return MZ_FWRITE(pBuf, 1, n, (MZ_FILE*)pOpaque); } mz_bool mz_zip_reader_extract_to_file(mz_zip_archive* pZip, mz_uint file_index, const char* pDst_filename, mz_uint flags) { mz_bool status; mz_zip_archive_file_stat file_stat; MZ_FILE* pFile; if (!mz_zip_reader_file_stat(pZip, file_index, &file_stat)) return MZ_FALSE; pFile = MZ_FOPEN(pDst_filename, "wb"); if (!pFile) return MZ_FALSE; status = mz_zip_reader_extract_to_callback(pZip, file_index, mz_zip_file_write_callback, pFile, flags); if (MZ_FCLOSE(pFile) == EOF) return MZ_FALSE; #ifndef MINIZ_NO_TIME if (status) mz_zip_set_file_times(pDst_filename, file_stat.m_time, file_stat.m_time); #endif return status; } #endif // #ifndef MINIZ_NO_STDIO mz_bool mz_zip_reader_end(mz_zip_archive* pZip) { if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) return MZ_FALSE; if (pZip->m_pState) { mz_zip_internal_state* pState = pZip->m_pState; pZip->m_pState = NULL; mz_zip_array_clear(pZip, &pState->m_central_dir); mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); #ifndef MINIZ_NO_STDIO if (pState->m_pFile) { MZ_FCLOSE(pState->m_pFile); pState->m_pFile = NULL; } #endif // #ifndef MINIZ_NO_STDIO pZip->m_pFree(pZip->m_pAlloc_opaque, pState); } pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; return MZ_TRUE; } #ifndef MINIZ_NO_STDIO mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive* pZip, const char* pArchive_filename, const char* pDst_filename, mz_uint flags) { int file_index = mz_zip_reader_locate_file(pZip, pArchive_filename, NULL, flags); if (file_index < 0) return MZ_FALSE; return mz_zip_reader_extract_to_file(pZip, file_index, pDst_filename, flags); } #endif // ------------------- .ZIP archive writing #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS static void mz_write_le16(mz_uint8* p, mz_uint16 v) { p[0] = (mz_uint8)v; p[1] = (mz_uint8)(v >> 8); } static void mz_write_le32(mz_uint8* p, mz_uint32 v) { p[0] = (mz_uint8)v; p[1] = (mz_uint8)(v >> 8); p[2] = (mz_uint8)(v >> 16); p[3] = (mz_uint8)(v >> 24); } #define MZ_WRITE_LE16(p, v) mz_write_le16((mz_uint8*)(p), (mz_uint16)(v)) #define MZ_WRITE_LE32(p, v) mz_write_le32((mz_uint8*)(p), (mz_uint32)(v)) mz_bool mz_zip_writer_init(mz_zip_archive* pZip, mz_uint64 existing_size) { if ((!pZip) || (pZip->m_pState) || (!pZip->m_pWrite) || (pZip->m_zip_mode != MZ_ZIP_MODE_INVALID)) return MZ_FALSE; if (pZip->m_file_offset_alignment) { // Ensure user specified file offset alignment is a power of 2. if (pZip->m_file_offset_alignment & (pZip->m_file_offset_alignment - 1)) return MZ_FALSE; } if (!pZip->m_pAlloc) pZip->m_pAlloc = def_alloc_func; if (!pZip->m_pFree) pZip->m_pFree = def_free_func; if (!pZip->m_pRealloc) pZip->m_pRealloc = def_realloc_func; pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; pZip->m_archive_size = existing_size; pZip->m_central_directory_file_ofs = 0; pZip->m_total_files = 0; if (NULL == (pZip->m_pState = (mz_zip_internal_state*)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(mz_zip_internal_state)))) return MZ_FALSE; memset(pZip->m_pState, 0, sizeof(mz_zip_internal_state)); MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir, sizeof(mz_uint8)); MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_central_dir_offsets, sizeof(mz_uint32)); MZ_ZIP_ARRAY_SET_ELEMENT_SIZE(&pZip->m_pState->m_sorted_central_dir_offsets, sizeof(mz_uint32)); return MZ_TRUE; } static size_t mz_zip_heap_write_func(void* pOpaque, mz_uint64 file_ofs, const void* pBuf, size_t n) { mz_zip_archive* pZip = (mz_zip_archive*)pOpaque; mz_zip_internal_state* pState = pZip->m_pState; mz_uint64 new_size = MZ_MAX(file_ofs + n, pState->m_mem_size); #ifdef _MSC_VER if ((!n) || ((0, sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF))) #else if ((!n) || ((sizeof(size_t) == sizeof(mz_uint32)) && (new_size > 0x7FFFFFFF))) #endif return 0; if (new_size > pState->m_mem_capacity) { void* pNew_block; size_t new_capacity = MZ_MAX(64, pState->m_mem_capacity); while (new_capacity < new_size) new_capacity *= 2; if (NULL == (pNew_block = pZip->m_pRealloc(pZip->m_pAlloc_opaque, pState->m_pMem, 1, new_capacity))) return 0; pState->m_pMem = pNew_block; pState->m_mem_capacity = new_capacity; } memcpy((mz_uint8*)pState->m_pMem + file_ofs, pBuf, n); pState->m_mem_size = (size_t)new_size; return n; } mz_bool mz_zip_writer_init_heap(mz_zip_archive* pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size) { pZip->m_pWrite = mz_zip_heap_write_func; pZip->m_pIO_opaque = pZip; if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) return MZ_FALSE; if (0 != (initial_allocation_size = MZ_MAX(initial_allocation_size, size_to_reserve_at_beginning))) { if (NULL == (pZip->m_pState->m_pMem = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, initial_allocation_size))) { mz_zip_writer_end(pZip); return MZ_FALSE; } pZip->m_pState->m_mem_capacity = initial_allocation_size; } return MZ_TRUE; } #ifndef MINIZ_NO_STDIO static size_t mz_zip_file_write_func(void* pOpaque, mz_uint64 file_ofs, const void* pBuf, size_t n) { mz_zip_archive* pZip = (mz_zip_archive*)pOpaque; mz_int64 cur_ofs = MZ_FTELL64(pZip->m_pState->m_pFile); if (((mz_int64)file_ofs < 0) || (((cur_ofs != (mz_int64)file_ofs)) && (MZ_FSEEK64(pZip->m_pState->m_pFile, (mz_int64)file_ofs, SEEK_SET)))) return 0; return MZ_FWRITE(pBuf, 1, n, pZip->m_pState->m_pFile); } mz_bool mz_zip_writer_init_file(mz_zip_archive* pZip, const char* pFilename, mz_uint64 size_to_reserve_at_beginning) { MZ_FILE* pFile; pZip->m_pWrite = mz_zip_file_write_func; pZip->m_pIO_opaque = pZip; if (!mz_zip_writer_init(pZip, size_to_reserve_at_beginning)) return MZ_FALSE; if (NULL == (pFile = MZ_FOPEN(pFilename, "wb"))) { mz_zip_writer_end(pZip); return MZ_FALSE; } pZip->m_pState->m_pFile = pFile; if (size_to_reserve_at_beginning) { mz_uint64 cur_ofs = 0; char buf[4096]; MZ_CLEAR_OBJ(buf); do { size_t n = (size_t)MZ_MIN(sizeof(buf), size_to_reserve_at_beginning); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_ofs, buf, n) != n) { mz_zip_writer_end(pZip); return MZ_FALSE; } cur_ofs += n; size_to_reserve_at_beginning -= n; } while (size_to_reserve_at_beginning); } return MZ_TRUE; } #endif // #ifndef MINIZ_NO_STDIO mz_bool mz_zip_writer_init_from_reader(mz_zip_archive* pZip, const char* pFilename) { mz_zip_internal_state* pState; if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_READING)) return MZ_FALSE; // No sense in trying to write to an archive that's already at the support max size if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + MZ_ZIP_LOCAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) return MZ_FALSE; pState = pZip->m_pState; if (pState->m_pFile) { #ifdef MINIZ_NO_STDIO pFilename; return MZ_FALSE; #else // Archive is being read from stdio - try to reopen as writable. if (pZip->m_pIO_opaque != pZip) return MZ_FALSE; if (!pFilename) return MZ_FALSE; pZip->m_pWrite = mz_zip_file_write_func; if (NULL == (pState->m_pFile = MZ_FREOPEN(pFilename, "r+b", pState->m_pFile))) { // The mz_zip_archive is now in a bogus state because pState->m_pFile is NULL, so just close it. mz_zip_reader_end(pZip); return MZ_FALSE; } #endif // #ifdef MINIZ_NO_STDIO } else if (pState->m_pMem) { // Archive lives in a memory block. Assume it's from the heap that we can resize using the realloc callback. if (pZip->m_pIO_opaque != pZip) return MZ_FALSE; pState->m_mem_capacity = pState->m_mem_size; pZip->m_pWrite = mz_zip_heap_write_func; } // Archive is being read via a user provided read function - make sure the user has specified a write function too. else if (!pZip->m_pWrite) return MZ_FALSE; // Start writing new files at the archive's current central directory location. pZip->m_archive_size = pZip->m_central_directory_file_ofs; pZip->m_zip_mode = MZ_ZIP_MODE_WRITING; pZip->m_central_directory_file_ofs = 0; return MZ_TRUE; } mz_bool mz_zip_writer_add_mem(mz_zip_archive* pZip, const char* pArchive_name, const void* pBuf, size_t buf_size, mz_uint level_and_flags) { return mz_zip_writer_add_mem_ex(pZip, pArchive_name, pBuf, buf_size, NULL, 0, level_and_flags, 0, 0); } typedef struct { mz_zip_archive* m_pZip; mz_uint64 m_cur_archive_file_ofs; mz_uint64 m_comp_size; } mz_zip_writer_add_state; static mz_bool mz_zip_writer_add_put_buf_callback(const void* pBuf, int len, void* pUser) { mz_zip_writer_add_state* pState = (mz_zip_writer_add_state*)pUser; if ((int)pState->m_pZip->m_pWrite(pState->m_pZip->m_pIO_opaque, pState->m_cur_archive_file_ofs, pBuf, len) != len) return MZ_FALSE; pState->m_cur_archive_file_ofs += len; pState->m_comp_size += len; return MZ_TRUE; } static mz_bool mz_zip_writer_create_local_dir_header(mz_zip_archive* pZip, mz_uint8* pDst, mz_uint16 filename_size, mz_uint16 extra_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date) { (void)pZip; memset(pDst, 0, MZ_ZIP_LOCAL_DIR_HEADER_SIZE); MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_SIG_OFS, MZ_ZIP_LOCAL_DIR_HEADER_SIG); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_VERSION_NEEDED_OFS, method ? 20 : 0); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_BIT_FLAG_OFS, bit_flags); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_METHOD_OFS, method); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_TIME_OFS, dos_time); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILE_DATE_OFS, dos_date); MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_CRC32_OFS, uncomp_crc32); MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_COMPRESSED_SIZE_OFS, comp_size); MZ_WRITE_LE32(pDst + MZ_ZIP_LDH_DECOMPRESSED_SIZE_OFS, uncomp_size); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_FILENAME_LEN_OFS, filename_size); MZ_WRITE_LE16(pDst + MZ_ZIP_LDH_EXTRA_LEN_OFS, extra_size); return MZ_TRUE; } static mz_bool mz_zip_writer_create_central_dir_header(mz_zip_archive* pZip, mz_uint8* pDst, mz_uint16 filename_size, mz_uint16 extra_size, mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, mz_uint32 ext_attributes) { (void)pZip; memset(pDst, 0, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_SIG_OFS, MZ_ZIP_CENTRAL_DIR_HEADER_SIG); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_VERSION_NEEDED_OFS, method ? 20 : 0); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_BIT_FLAG_OFS, bit_flags); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_METHOD_OFS, method); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_TIME_OFS, dos_time); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILE_DATE_OFS, dos_date); MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_CRC32_OFS, uncomp_crc32); MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS, comp_size); MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_DECOMPRESSED_SIZE_OFS, uncomp_size); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_FILENAME_LEN_OFS, filename_size); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_EXTRA_LEN_OFS, extra_size); MZ_WRITE_LE16(pDst + MZ_ZIP_CDH_COMMENT_LEN_OFS, comment_size); MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_EXTERNAL_ATTR_OFS, ext_attributes); MZ_WRITE_LE32(pDst + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_header_ofs); return MZ_TRUE; } static mz_bool mz_zip_writer_add_to_central_dir(mz_zip_archive* pZip, const char* pFilename, mz_uint16 filename_size, const void* pExtra, mz_uint16 extra_size, const void* pComment, mz_uint16 comment_size, mz_uint64 uncomp_size, mz_uint64 comp_size, mz_uint32 uncomp_crc32, mz_uint16 method, mz_uint16 bit_flags, mz_uint16 dos_time, mz_uint16 dos_date, mz_uint64 local_header_ofs, mz_uint32 ext_attributes) { mz_zip_internal_state* pState = pZip->m_pState; mz_uint32 central_dir_ofs = (mz_uint32)pState->m_central_dir.m_size; size_t orig_central_dir_size = pState->m_central_dir.m_size; mz_uint8 central_dir_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; // No zip64 support yet if ((local_header_ofs > 0xFFFFFFFF) || (((mz_uint64)pState->m_central_dir.m_size + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + filename_size + extra_size + comment_size) > 0xFFFFFFFF)) return MZ_FALSE; if (!mz_zip_writer_create_central_dir_header(pZip, central_dir_header, filename_size, extra_size, comment_size, uncomp_size, comp_size, uncomp_crc32, method, bit_flags, dos_time, dos_date, local_header_ofs, ext_attributes)) return MZ_FALSE; if ((!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_dir_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) || (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pFilename, filename_size)) || (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pExtra, extra_size)) || (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pComment, comment_size)) || (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, ¢ral_dir_ofs, 1))) { // Try to push the central directory array back into its original state. mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); return MZ_FALSE; } return MZ_TRUE; } static mz_bool mz_zip_writer_validate_archive_name(const char* pArchive_name) { // Basic ZIP archive filename validity checks: Valid filenames cannot start with a forward slash, cannot contain a drive letter, and cannot use DOS-style backward slashes. if (*pArchive_name == '/') return MZ_FALSE; while (*pArchive_name) { if ((*pArchive_name == '\\') || (*pArchive_name == ':')) return MZ_FALSE; pArchive_name++; } return MZ_TRUE; } static mz_uint mz_zip_writer_compute_padding_needed_for_file_alignment(mz_zip_archive* pZip) { mz_uint32 n; if (!pZip->m_file_offset_alignment) return 0; n = (mz_uint32)(pZip->m_archive_size & (pZip->m_file_offset_alignment - 1)); return (pZip->m_file_offset_alignment - n) & (pZip->m_file_offset_alignment - 1); } static mz_bool mz_zip_writer_write_zeros(mz_zip_archive* pZip, mz_uint64 cur_file_ofs, mz_uint32 n) { char buf[4096]; memset(buf, 0, MZ_MIN(sizeof(buf), n)); while (n) { mz_uint32 s = MZ_MIN(sizeof(buf), n); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_file_ofs, buf, s) != s) return MZ_FALSE; cur_file_ofs += s; n -= s; } return MZ_TRUE; } mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive* pZip, const char* pArchive_name, const void* pBuf, size_t buf_size, const void* pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32) { mz_uint16 method = 0, dos_time = 0, dos_date = 0; mz_uint level, ext_attributes = 0, num_alignment_padding_bytes; mz_uint64 local_dir_header_ofs = pZip->m_archive_size, cur_archive_file_ofs = pZip->m_archive_size, comp_size = 0; size_t archive_name_size; mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; tdefl_compressor* pComp = NULL; mz_bool store_data_uncompressed; mz_zip_internal_state* pState; if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; level = level_and_flags & 0xF; store_data_uncompressed = ((!level) || (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)); if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || ((buf_size) && (!pBuf)) || (!pArchive_name) || ((comment_size) && (!pComment)) || (pZip->m_total_files == 0xFFFF) || (level > MZ_UBER_COMPRESSION)) return MZ_FALSE; pState = pZip->m_pState; if ((!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) && (uncomp_size)) return MZ_FALSE; // No zip64 support yet if ((buf_size > 0xFFFFFFFF) || (uncomp_size > 0xFFFFFFFF)) return MZ_FALSE; if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE; #ifndef MINIZ_NO_TIME { time_t cur_time; time(&cur_time); mz_zip_time_to_dos_time(cur_time, &dos_time, &dos_date); } #endif // #ifndef MINIZ_NO_TIME archive_name_size = strlen(pArchive_name); if (archive_name_size > 0xFFFF) return MZ_FALSE; num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); // no zip64 support yet if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + comment_size + archive_name_size) > 0xFFFFFFFF)) return MZ_FALSE; if ((archive_name_size) && (pArchive_name[archive_name_size - 1] == '/')) { // Set DOS Subdirectory attribute bit. ext_attributes |= 0x10; // Subdirectories cannot contain data. if ((buf_size) || (uncomp_size)) return MZ_FALSE; } // Try to do any allocations before writing to the archive, so if an allocation fails the file remains unmodified. (A good idea if we're doing an in-place modification.) if ((!mz_zip_array_ensure_room(pZip, &pState->m_central_dir, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + archive_name_size + comment_size)) || (!mz_zip_array_ensure_room(pZip, &pState->m_central_dir_offsets, 1))) return MZ_FALSE; if ((!store_data_uncompressed) && (buf_size)) { if (NULL == (pComp = (tdefl_compressor*)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)))) return MZ_FALSE; } if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes + sizeof(local_dir_header))) { pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); return MZ_FALSE; } local_dir_header_ofs += num_alignment_padding_bytes; if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); } cur_archive_file_ofs += num_alignment_padding_bytes + sizeof(local_dir_header); MZ_CLEAR_OBJ(local_dir_header); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size) { pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); return MZ_FALSE; } cur_archive_file_ofs += archive_name_size; if (!(level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA)) { uncomp_crc32 = (mz_uint32)mz_crc32(MZ_CRC32_INIT, (const mz_uint8*)pBuf, buf_size); uncomp_size = buf_size; if (uncomp_size <= 3) { level = 0; store_data_uncompressed = MZ_TRUE; } } if (store_data_uncompressed) { if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pBuf, buf_size) != buf_size) { pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); return MZ_FALSE; } cur_archive_file_ofs += buf_size; comp_size = buf_size; if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) method = MZ_DEFLATED; } else if (buf_size) { mz_zip_writer_add_state state; state.m_pZip = pZip; state.m_cur_archive_file_ofs = cur_archive_file_ofs; state.m_comp_size = 0; if ((tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY) || (tdefl_compress_buffer(pComp, pBuf, buf_size, TDEFL_FINISH) != TDEFL_STATUS_DONE)) { pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); return MZ_FALSE; } comp_size = state.m_comp_size; cur_archive_file_ofs = state.m_cur_archive_file_ofs; method = MZ_DEFLATED; } pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); pComp = NULL; // no zip64 support yet if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF)) return MZ_FALSE; if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date)) return MZ_FALSE; if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) return MZ_FALSE; if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date, local_dir_header_ofs, ext_attributes)) return MZ_FALSE; pZip->m_total_files++; pZip->m_archive_size = cur_archive_file_ofs; return MZ_TRUE; } #ifndef MINIZ_NO_STDIO mz_bool mz_zip_writer_add_file(mz_zip_archive* pZip, const char* pArchive_name, const char* pSrc_filename, const void* pComment, mz_uint16 comment_size, mz_uint level_and_flags) { mz_uint uncomp_crc32 = MZ_CRC32_INIT, level, num_alignment_padding_bytes; mz_uint16 method = 0, dos_time = 0, dos_date = 0, ext_attributes = 0; mz_uint64 local_dir_header_ofs = pZip->m_archive_size, cur_archive_file_ofs = pZip->m_archive_size, uncomp_size = 0, comp_size = 0; size_t archive_name_size; mz_uint8 local_dir_header[MZ_ZIP_LOCAL_DIR_HEADER_SIZE]; MZ_FILE* pSrc_file = NULL; if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; level = level_and_flags & 0xF; if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) || (!pArchive_name) || ((comment_size) && (!pComment)) || (level > MZ_UBER_COMPRESSION)) return MZ_FALSE; if (level_and_flags & MZ_ZIP_FLAG_COMPRESSED_DATA) return MZ_FALSE; if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE; archive_name_size = strlen(pArchive_name); if (archive_name_size > 0xFFFF) return MZ_FALSE; num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); // no zip64 support yet if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE + comment_size + archive_name_size) > 0xFFFFFFFF)) return MZ_FALSE; if (!mz_zip_get_file_modified_time(pSrc_filename, &dos_time, &dos_date)) return MZ_FALSE; pSrc_file = MZ_FOPEN(pSrc_filename, "rb"); if (!pSrc_file) return MZ_FALSE; MZ_FSEEK64(pSrc_file, 0, SEEK_END); uncomp_size = MZ_FTELL64(pSrc_file); MZ_FSEEK64(pSrc_file, 0, SEEK_SET); if (uncomp_size > 0xFFFFFFFF) { // No zip64 support yet MZ_FCLOSE(pSrc_file); return MZ_FALSE; } if (uncomp_size <= 3) level = 0; if (!mz_zip_writer_write_zeros(pZip, cur_archive_file_ofs, num_alignment_padding_bytes + sizeof(local_dir_header))) return MZ_FALSE; local_dir_header_ofs += num_alignment_padding_bytes; if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); } cur_archive_file_ofs += num_alignment_padding_bytes + sizeof(local_dir_header); MZ_CLEAR_OBJ(local_dir_header); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pArchive_name, archive_name_size) != archive_name_size) { MZ_FCLOSE(pSrc_file); return MZ_FALSE; } cur_archive_file_ofs += archive_name_size; if (uncomp_size) { mz_uint64 uncomp_remaining = uncomp_size; void* pRead_buf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, MZ_ZIP_MAX_IO_BUF_SIZE); if (!pRead_buf) { MZ_FCLOSE(pSrc_file); return MZ_FALSE; } if (!level) { while (uncomp_remaining) { mz_uint n = (mz_uint)MZ_MIN(MZ_ZIP_MAX_IO_BUF_SIZE, uncomp_remaining); if ((MZ_FREAD(pRead_buf, 1, n, pSrc_file) != n) || (pZip->m_pWrite(pZip->m_pIO_opaque, cur_archive_file_ofs, pRead_buf, n) != n)) { pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); MZ_FCLOSE(pSrc_file); return MZ_FALSE; } uncomp_crc32 = (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8*)pRead_buf, n); uncomp_remaining -= n; cur_archive_file_ofs += n; } comp_size = uncomp_size; } else { mz_bool result = MZ_FALSE; mz_zip_writer_add_state state; tdefl_compressor* pComp = (tdefl_compressor*)pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, sizeof(tdefl_compressor)); if (!pComp) { pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); MZ_FCLOSE(pSrc_file); return MZ_FALSE; } state.m_pZip = pZip; state.m_cur_archive_file_ofs = cur_archive_file_ofs; state.m_comp_size = 0; if (tdefl_init(pComp, mz_zip_writer_add_put_buf_callback, &state, tdefl_create_comp_flags_from_zip_params(level, -15, MZ_DEFAULT_STRATEGY)) != TDEFL_STATUS_OKAY) { pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); MZ_FCLOSE(pSrc_file); return MZ_FALSE; } for (;;) { size_t in_buf_size = (mz_uint32)MZ_MIN(uncomp_remaining, MZ_ZIP_MAX_IO_BUF_SIZE); tdefl_status status; if (MZ_FREAD(pRead_buf, 1, in_buf_size, pSrc_file) != in_buf_size) break; uncomp_crc32 = (mz_uint32)mz_crc32(uncomp_crc32, (const mz_uint8*)pRead_buf, in_buf_size); uncomp_remaining -= in_buf_size; status = tdefl_compress_buffer(pComp, pRead_buf, in_buf_size, uncomp_remaining ? TDEFL_NO_FLUSH : TDEFL_FINISH); if (status == TDEFL_STATUS_DONE) { result = MZ_TRUE; break; } else if (status != TDEFL_STATUS_OKAY) break; } pZip->m_pFree(pZip->m_pAlloc_opaque, pComp); if (!result) { pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); MZ_FCLOSE(pSrc_file); return MZ_FALSE; } comp_size = state.m_comp_size; cur_archive_file_ofs = state.m_cur_archive_file_ofs; method = MZ_DEFLATED; } pZip->m_pFree(pZip->m_pAlloc_opaque, pRead_buf); } MZ_FCLOSE(pSrc_file); pSrc_file = NULL; // no zip64 support yet if ((comp_size > 0xFFFFFFFF) || (cur_archive_file_ofs > 0xFFFFFFFF)) return MZ_FALSE; if (!mz_zip_writer_create_local_dir_header(pZip, local_dir_header, (mz_uint16)archive_name_size, 0, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date)) return MZ_FALSE; if (pZip->m_pWrite(pZip->m_pIO_opaque, local_dir_header_ofs, local_dir_header, sizeof(local_dir_header)) != sizeof(local_dir_header)) return MZ_FALSE; if (!mz_zip_writer_add_to_central_dir(pZip, pArchive_name, (mz_uint16)archive_name_size, NULL, 0, pComment, comment_size, uncomp_size, comp_size, uncomp_crc32, method, 0, dos_time, dos_date, local_dir_header_ofs, ext_attributes)) return MZ_FALSE; pZip->m_total_files++; pZip->m_archive_size = cur_archive_file_ofs; return MZ_TRUE; } #endif // #ifndef MINIZ_NO_STDIO mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive* pZip, mz_zip_archive* pSource_zip, mz_uint file_index) { mz_uint n, bit_flags, num_alignment_padding_bytes; mz_uint64 comp_bytes_remaining, local_dir_header_ofs; mz_uint64 cur_src_file_ofs, cur_dst_file_ofs; mz_uint32 local_header_u32[(MZ_ZIP_LOCAL_DIR_HEADER_SIZE + sizeof(mz_uint32) - 1) / sizeof(mz_uint32)]; mz_uint8* pLocal_header = (mz_uint8*)local_header_u32; mz_uint8 central_header[MZ_ZIP_CENTRAL_DIR_HEADER_SIZE]; size_t orig_central_dir_size; mz_zip_internal_state* pState; void* pBuf; const mz_uint8* pSrc_central_header; if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) return MZ_FALSE; if (NULL == (pSrc_central_header = mz_zip_reader_get_cdh(pSource_zip, file_index))) return MZ_FALSE; pState = pZip->m_pState; num_alignment_padding_bytes = mz_zip_writer_compute_padding_needed_for_file_alignment(pZip); // no zip64 support yet if ((pZip->m_total_files == 0xFFFF) || ((pZip->m_archive_size + num_alignment_padding_bytes + MZ_ZIP_LOCAL_DIR_HEADER_SIZE + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) return MZ_FALSE; cur_src_file_ofs = MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS); cur_dst_file_ofs = pZip->m_archive_size; if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) return MZ_FALSE; if (MZ_READ_LE32(pLocal_header) != MZ_ZIP_LOCAL_DIR_HEADER_SIG) return MZ_FALSE; cur_src_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; if (!mz_zip_writer_write_zeros(pZip, cur_dst_file_ofs, num_alignment_padding_bytes)) return MZ_FALSE; cur_dst_file_ofs += num_alignment_padding_bytes; local_dir_header_ofs = cur_dst_file_ofs; if (pZip->m_file_offset_alignment) { MZ_ASSERT((local_dir_header_ofs & (pZip->m_file_offset_alignment - 1)) == 0); } if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pLocal_header, MZ_ZIP_LOCAL_DIR_HEADER_SIZE) != MZ_ZIP_LOCAL_DIR_HEADER_SIZE) return MZ_FALSE; cur_dst_file_ofs += MZ_ZIP_LOCAL_DIR_HEADER_SIZE; n = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_EXTRA_LEN_OFS); comp_bytes_remaining = n + MZ_READ_LE32(pSrc_central_header + MZ_ZIP_CDH_COMPRESSED_SIZE_OFS); if (NULL == (pBuf = pZip->m_pAlloc(pZip->m_pAlloc_opaque, 1, (size_t)MZ_MAX(sizeof(mz_uint32) * 4, MZ_MIN(MZ_ZIP_MAX_IO_BUF_SIZE, comp_bytes_remaining))))) return MZ_FALSE; while (comp_bytes_remaining) { n = (mz_uint)MZ_MIN(MZ_ZIP_MAX_IO_BUF_SIZE, comp_bytes_remaining); if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, n) != n) { pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); return MZ_FALSE; } cur_src_file_ofs += n; if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) { pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); return MZ_FALSE; } cur_dst_file_ofs += n; comp_bytes_remaining -= n; } bit_flags = MZ_READ_LE16(pLocal_header + MZ_ZIP_LDH_BIT_FLAG_OFS); if (bit_flags & 8) { // Copy data descriptor if (pSource_zip->m_pRead(pSource_zip->m_pIO_opaque, cur_src_file_ofs, pBuf, sizeof(mz_uint32) * 4) != sizeof(mz_uint32) * 4) { pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); return MZ_FALSE; } n = sizeof(mz_uint32) * ((MZ_READ_LE32(pBuf) == 0x08074b50) ? 4 : 3); if (pZip->m_pWrite(pZip->m_pIO_opaque, cur_dst_file_ofs, pBuf, n) != n) { pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); return MZ_FALSE; } cur_src_file_ofs += n; cur_dst_file_ofs += n; } pZip->m_pFree(pZip->m_pAlloc_opaque, pBuf); // no zip64 support yet if (cur_dst_file_ofs > 0xFFFFFFFF) return MZ_FALSE; orig_central_dir_size = pState->m_central_dir.m_size; memcpy(central_header, pSrc_central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE); MZ_WRITE_LE32(central_header + MZ_ZIP_CDH_LOCAL_HEADER_OFS, local_dir_header_ofs); if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, central_header, MZ_ZIP_CENTRAL_DIR_HEADER_SIZE)) return MZ_FALSE; n = MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_FILENAME_LEN_OFS) + MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_EXTRA_LEN_OFS) + MZ_READ_LE16(pSrc_central_header + MZ_ZIP_CDH_COMMENT_LEN_OFS); if (!mz_zip_array_push_back(pZip, &pState->m_central_dir, pSrc_central_header + MZ_ZIP_CENTRAL_DIR_HEADER_SIZE, n)) { mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); return MZ_FALSE; } if (pState->m_central_dir.m_size > 0xFFFFFFFF) return MZ_FALSE; n = (mz_uint32)pState->m_central_dir.m_size; if (!mz_zip_array_push_back(pZip, &pState->m_central_dir_offsets, &n, 1)) { mz_zip_array_resize(pZip, &pState->m_central_dir, orig_central_dir_size, MZ_FALSE); return MZ_FALSE; } pZip->m_total_files++; pZip->m_archive_size = cur_dst_file_ofs; return MZ_TRUE; } mz_bool mz_zip_writer_finalize_archive(mz_zip_archive* pZip) { mz_zip_internal_state* pState; mz_uint64 central_dir_ofs, central_dir_size; mz_uint8 hdr[MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE]; if ((!pZip) || (!pZip->m_pState) || (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING)) return MZ_FALSE; pState = pZip->m_pState; // no zip64 support yet if ((pZip->m_total_files > 0xFFFF) || ((pZip->m_archive_size + pState->m_central_dir.m_size + MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIZE) > 0xFFFFFFFF)) return MZ_FALSE; central_dir_ofs = 0; central_dir_size = 0; if (pZip->m_total_files) { // Write central directory central_dir_ofs = pZip->m_archive_size; central_dir_size = pState->m_central_dir.m_size; pZip->m_central_directory_file_ofs = central_dir_ofs; if (pZip->m_pWrite(pZip->m_pIO_opaque, central_dir_ofs, pState->m_central_dir.m_p, (size_t)central_dir_size) != central_dir_size) return MZ_FALSE; pZip->m_archive_size += central_dir_size; } // Write end of central directory record MZ_CLEAR_OBJ(hdr); MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_SIG_OFS, MZ_ZIP_END_OF_CENTRAL_DIR_HEADER_SIG); MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_NUM_ENTRIES_ON_DISK_OFS, pZip->m_total_files); MZ_WRITE_LE16(hdr + MZ_ZIP_ECDH_CDIR_TOTAL_ENTRIES_OFS, pZip->m_total_files); MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_SIZE_OFS, central_dir_size); MZ_WRITE_LE32(hdr + MZ_ZIP_ECDH_CDIR_OFS_OFS, central_dir_ofs); if (pZip->m_pWrite(pZip->m_pIO_opaque, pZip->m_archive_size, hdr, sizeof(hdr)) != sizeof(hdr)) return MZ_FALSE; #ifndef MINIZ_NO_STDIO if ((pState->m_pFile) && (MZ_FFLUSH(pState->m_pFile) == EOF)) return MZ_FALSE; #endif // #ifndef MINIZ_NO_STDIO pZip->m_archive_size += sizeof(hdr); pZip->m_zip_mode = MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED; return MZ_TRUE; } mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive* pZip, void** pBuf, size_t* pSize) { if ((!pZip) || (!pZip->m_pState) || (!pBuf) || (!pSize)) return MZ_FALSE; if (pZip->m_pWrite != mz_zip_heap_write_func) return MZ_FALSE; if (!mz_zip_writer_finalize_archive(pZip)) return MZ_FALSE; *pBuf = pZip->m_pState->m_pMem; *pSize = pZip->m_pState->m_mem_size; pZip->m_pState->m_pMem = NULL; pZip->m_pState->m_mem_size = pZip->m_pState->m_mem_capacity = 0; return MZ_TRUE; } mz_bool mz_zip_writer_end(mz_zip_archive* pZip) { mz_zip_internal_state* pState; mz_bool status = MZ_TRUE; if ((!pZip) || (!pZip->m_pState) || (!pZip->m_pAlloc) || (!pZip->m_pFree) || ((pZip->m_zip_mode != MZ_ZIP_MODE_WRITING) && (pZip->m_zip_mode != MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED))) return MZ_FALSE; pState = pZip->m_pState; pZip->m_pState = NULL; mz_zip_array_clear(pZip, &pState->m_central_dir); mz_zip_array_clear(pZip, &pState->m_central_dir_offsets); mz_zip_array_clear(pZip, &pState->m_sorted_central_dir_offsets); #ifndef MINIZ_NO_STDIO if (pState->m_pFile) { MZ_FCLOSE(pState->m_pFile); pState->m_pFile = NULL; } #endif // #ifndef MINIZ_NO_STDIO if ((pZip->m_pWrite == mz_zip_heap_write_func) && (pState->m_pMem)) { pZip->m_pFree(pZip->m_pAlloc_opaque, pState->m_pMem); pState->m_pMem = NULL; } pZip->m_pFree(pZip->m_pAlloc_opaque, pState); pZip->m_zip_mode = MZ_ZIP_MODE_INVALID; return status; } #ifndef MINIZ_NO_STDIO mz_bool mz_zip_add_mem_to_archive_file_in_place(const char* pZip_filename, const char* pArchive_name, const void* pBuf, size_t buf_size, const void* pComment, mz_uint16 comment_size, mz_uint level_and_flags) { mz_bool status, created_new_archive = MZ_FALSE; mz_zip_archive zip_archive; struct MZ_FILE_STAT_STRUCT file_stat; MZ_CLEAR_OBJ(zip_archive); if ((int)level_and_flags < 0) level_and_flags = MZ_DEFAULT_LEVEL; if ((!pZip_filename) || (!pArchive_name) || ((buf_size) && (!pBuf)) || ((comment_size) && (!pComment)) || ((level_and_flags & 0xF) > MZ_UBER_COMPRESSION)) return MZ_FALSE; if (!mz_zip_writer_validate_archive_name(pArchive_name)) return MZ_FALSE; if (MZ_FILE_STAT(pZip_filename, &file_stat) != 0) { // Create a new archive. if (!mz_zip_writer_init_file(&zip_archive, pZip_filename, 0)) return MZ_FALSE; created_new_archive = MZ_TRUE; } else { // Append to an existing archive. if (!mz_zip_reader_init_file(&zip_archive, pZip_filename, level_and_flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY)) return MZ_FALSE; if (!mz_zip_writer_init_from_reader(&zip_archive, pZip_filename)) { mz_zip_reader_end(&zip_archive); return MZ_FALSE; } } status = mz_zip_writer_add_mem_ex(&zip_archive, pArchive_name, pBuf, buf_size, pComment, comment_size, level_and_flags, 0, 0); // Always finalize, even if adding failed for some reason, so we have a valid central directory. (This may not always succeed, but we can try.) if (!mz_zip_writer_finalize_archive(&zip_archive)) status = MZ_FALSE; if (!mz_zip_writer_end(&zip_archive)) status = MZ_FALSE; if ((!status) && (created_new_archive)) { // It's a new archive and something went wrong, so just delete it. int ignoredStatus = MZ_DELETE_FILE(pZip_filename); (void)ignoredStatus; } return status; } void* mz_zip_extract_archive_file_to_heap(const char* pZip_filename, const char* pArchive_name, const char* pComment, size_t* pSize, mz_uint flags) { int file_index; mz_zip_archive zip_archive; void* p = NULL; if (pSize) *pSize = 0; if ((!pZip_filename) || (!pArchive_name)) return NULL; MZ_CLEAR_OBJ(zip_archive); if (!mz_zip_reader_init_file(&zip_archive, pZip_filename, flags | MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY)) return NULL; if ((file_index = mz_zip_reader_locate_file(&zip_archive, pArchive_name, pComment, flags)) >= 0) p = mz_zip_reader_extract_to_heap(&zip_archive, file_index, pSize, flags); mz_zip_reader_end(&zip_archive); return p; } #endif // #ifndef MINIZ_NO_STDIO #endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS #endif // #ifndef MINIZ_NO_ARCHIVE_APIS #ifdef __cplusplus } #endif #endif // MINIZ_HEADER_FILE_ONLY /* This is free and unencumbered software released into the public domain. Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. For more information, please refer to */ DaemonEngine-crunch-ef4d32f/crnlib/crn_miniz.h000066400000000000000000001406641503722002600214440ustar00rootroot00000000000000/* miniz.c v1.14 - public domain deflate/inflate, zlib-subset, ZIP reading/writing/appending, PNG writing See "unlicense" statement at the end of this file. Rich Geldreich , last updated May 20, 2012 Implements RFC 1950: http://www.ietf.org/rfc/rfc1950.txt and RFC 1951: http://www.ietf.org/rfc/rfc1951.txt Most API's defined in miniz.c are optional. For example, to disable the archive related functions just define MINIZ_NO_ARCHIVE_APIS, or to get rid of all stdio usage define MINIZ_NO_STDIO (see the list below for more macros). * Change History 5/20/12 v1.14 - MinGW32/64 GCC 4.6.1 compiler fixes: added MZ_FORCEINLINE, #include (thanks fermtect). 5/19/12 v1.13 - From jason@cornsyrup.org and kelwert@mtu.edu - Fix mz_crc32() so it doesn't compute the wrong CRC-32's when mz_ulong is 64-bit. Temporarily/locally slammed in "typedef unsigned long mz_ulong" and re-ran a randomized regression test on ~500k files. Eliminated a bunch of warnings when compiling with GCC 32-bit/64. Ran all examples, miniz.c, and tinfl.c through MSVC 2008's /analyze (static analysis) option and fixed all warnings (except for the silly "Use of the comma-operator in a tested expression.." analysis warning, which I purposely use to work around a MSVC compiler warning). Created 32-bit and 64-bit Codeblocks projects/workspace. Built and tested Linux executables. The codeblocks workspace is compatible with Linux+Win32/x64. Added miniz_tester solution/project, which is a useful little app derived from LZHAM's tester app that I use as part of the regression test. Ran miniz.c and tinfl.c through another series of regression testing on ~500,000 files and archives. Modified example5.c so it purposely disables a bunch of high-level functionality (MINIZ_NO_STDIO, etc.). (Thanks to corysama for the MINIZ_NO_STDIO bug report.) Fix ftell() usage in examples so they exit with an error on files which are too large (a limitation of the examples, not miniz itself). 4/12/12 v1.12 - More comments, added low-level example5.c, fixed a couple minor level_and_flags issues in the archive API's. level_and_flags can now be set to MZ_DEFAULT_COMPRESSION. Thanks to Bruce Dawson for the feedback/bug report. 5/28/11 v1.11 - Added statement from unlicense.org 5/27/11 v1.10 - Substantial compressor optimizations: Level 1 is now ~4x faster than before. The L1 compressor's throughput now varies between 70-110MB/sec. on a Core i7 (actual throughput varies depending on the type of data, and x64 vs. x86). Improved baseline L2-L9 compression perf. Also, greatly improved compression perf. issues on some file types. Refactored the compression code for better readability and maintainability. Added level 10 compression level (L10 has slightly better ratio than level 9, but could have a potentially large drop in throughput on some files). 5/15/11 v1.09 - Initial stable release. * Low-level Deflate/Inflate implementation notes: Compression: Use the "tdefl" API's. The compressor supports raw, static, and dynamic blocks, lazy or greedy parsing, match length filtering, RLE-only, and Huffman-only streams. It performs and compresses approximately as well as zlib. Decompression: Use the "tinfl" API's. The entire decompressor is implemented as a single function coroutine: see tinfl_decompress(). It supports decompression into a 32KB (or larger power of 2) wrapping buffer, or into a memory block large enough to hold the entire file. The low-level tdefl/tinfl API's do not make any use of dynamic memory allocation. * zlib-style API notes: miniz.c implements a fairly large subset of zlib. There's enough functionality present for it to be a drop-in zlib replacement in many apps: The z_stream struct, optional memory allocation callbacks deflateInit/deflateInit2/deflate/deflateReset/deflateEnd/deflateBound inflateInit/inflateInit2/inflate/inflateEnd compress, compress2, compressBound, uncompress CRC-32, Adler-32 - Using modern, minimal code size, CPU cache friendly routines. Supports raw deflate streams or standard zlib streams with adler-32 checking. Limitations: The callback API's are not implemented yet. No support for gzip headers or zlib static dictionaries. I've tried to closely emulate zlib's various flavors of stream flushing and return status codes, but there are no guarantees that miniz.c pulls this off perfectly. * PNG writing: See the tdefl_write_image_to_png_file_in_memory() function, originally written by Alex Evans. Supports 1-4 bytes/pixel images. * ZIP archive API notes: The ZIP archive API's where designed with simplicity and efficiency in mind, with just enough abstraction to get the job done with minimal fuss. There are simple API's to retrieve file information, read files from existing archives, create new archives, append new files to existing archives, or clone archive data from one archive to another. It supports archives located in memory or the heap, on disk (using stdio.h), or you can specify custom file read/write callbacks. - Archive reading: Just call this function to read a single file from a disk archive: void *mz_zip_extract_archive_file_to_heap(const char *pZip_filename, const char *pArchive_name, size_t *pSize, mz_uint zip_flags); For more complex cases, use the "mz_zip_reader" functions. Upon opening an archive, the entire central directory is located and read as-is into memory, and subsequent file access only occurs when reading individual files. - Archives file scanning: The simple way is to use this function to scan a loaded archive for a specific file: int mz_zip_reader_locate_file(mz_zip_archive *pZip, const char *pName, const char *pComment, mz_uint flags); The locate operation can optionally check file comments too, which (as one example) can be used to identify multiple versions of the same file in an archive. This function uses a simple linear search through the central directory, so it's not very fast. Alternately, you can iterate through all the files in an archive (using mz_zip_reader_get_num_files()) and retrieve detailed info on each file by calling mz_zip_reader_file_stat(). - Archive creation: Use the "mz_zip_writer" functions. The ZIP writer immediately writes compressed file data to disk and builds an exact image of the central directory in memory. The central directory image is written all at once at the end of the archive file when the archive is finalized. The archive writer can optionally align each file's local header and file data to any power of 2 alignment, which can be useful when the archive will be read from optical media. Also, the writer supports placing arbitrary data blobs at the very beginning of ZIP archives. Archives written using either feature are still readable by any ZIP tool. - Archive appending: The simple way to add a single file to an archive is to call this function: mz_bool mz_zip_add_mem_to_archive_file_in_place(const char *pZip_filename, const char *pArchive_name, const void *pBuf, size_t buf_size, const void *pComment, mz_uint16 comment_size, mz_uint level_and_flags); The archive will be created if it doesn't already exist, otherwise it'll be appended to. Note the appending is done in-place and is not an atomic operation, so if something goes wrong during the operation it's possible the archive could be left without a central directory (although the local file headers and file data will be fine, so the archive will be recoverable). For more complex archive modification scenarios: 1. The safest way is to use a mz_zip_reader to read the existing archive, cloning only those bits you want to preserve into a new archive using using the mz_zip_writer_add_from_zip_reader() function (which compiles the compressed file data as-is). When you're done, delete the old archive and rename the newly written archive, and you're done. This is safe but requires a bunch of temporary disk space or heap memory. 2. Or, you can convert an mz_zip_reader in-place to an mz_zip_writer using mz_zip_writer_init_from_reader(), append new files as needed, then finalize the archive which will write an updated central directory to the original archive. (This is basically what mz_zip_add_mem_to_archive_file_in_place() does.) There's a possibility that the archive's central directory could be lost with this method if anything goes wrong, though. - ZIP archive support limitations: No zip64 or spanning support. Extraction functions can only handle unencrypted, stored or deflated files. Requires streams capable of seeking. * This is a header file library, like stb_image.c. To get only a header file, either cut and paste the below header, or create miniz.h, #define MINIZ_HEADER_FILE_ONLY, and then include miniz.c from it. * Important: For best perf. be sure to customize the below macros for your target platform: #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 #define MINIZ_LITTLE_ENDIAN 1 #define MINIZ_HAS_64BIT_REGISTERS 1 */ #pragma once #ifndef MINIZ_HEADER_INCLUDED #define MINIZ_HEADER_INCLUDED #include #if !defined(MINIZ_NO_TIME) && !defined(MINIZ_NO_ARCHIVE_APIS) #include #endif // Defines to completely disable specific portions of miniz.c: // If all macros here are defined the only functionality remaining will be CRC-32, adler-32, tinfl, and tdefl. // Define MINIZ_NO_STDIO to disable all usage and any functions which rely on stdio for file I/O. //#define MINIZ_NO_STDIO // If MINIZ_NO_TIME is specified then the ZIP archive functions will not be able to get the current time, or // get/set file times. //#define MINIZ_NO_TIME // Define MINIZ_NO_ARCHIVE_APIS to disable all ZIP archive API's. //#define MINIZ_NO_ARCHIVE_APIS // Define MINIZ_NO_ARCHIVE_APIS to disable all writing related ZIP archive API's. //#define MINIZ_NO_ARCHIVE_WRITING_APIS // Define MINIZ_NO_ZLIB_APIS to remove all ZLIB-style compression/decompression API's. //#define MINIZ_NO_ZLIB_APIS // Define MINIZ_NO_ZLIB_COMPATIBLE_NAME to disable zlib names, to prevent conflicts against stock zlib. //#define MINIZ_NO_ZLIB_COMPATIBLE_NAMES // Define MINIZ_NO_MALLOC to disable all calls to malloc, free, and realloc. // Note if MINIZ_NO_MALLOC is defined then the user must always provide custom user alloc/free/realloc // callbacks to the zlib and archive API's, and a few stand-alone helper API's which don't provide custom user // functions (such as tdefl_compress_mem_to_heap() and tinfl_decompress_mem_to_heap()) won't work. //#define MINIZ_NO_MALLOC #if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__i386) || defined(__i486__) || defined(__i486) || defined(i386) || defined(__ia64__) || defined(__x86_64__) // MINIZ_X86_OR_X64_CPU is only used to help set the below macros. #define MINIZ_X86_OR_X64_CPU 1 #endif #if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) || MINIZ_X86_OR_X64_CPU // Set MINIZ_LITTLE_ENDIAN to 1 if the processor is little endian. #define MINIZ_LITTLE_ENDIAN 1 #endif #if MINIZ_X86_OR_X64_CPU // Set MINIZ_USE_UNALIGNED_LOADS_AND_STORES to 1 on CPU's that permit efficient integer loads and stores from unaligned addresses. #define MINIZ_USE_UNALIGNED_LOADS_AND_STORES 1 #endif #if defined(_M_X64) || defined(_WIN64) || defined(__MINGW64__) || defined(_LP64) || defined(__LP64__) || defined(__ia64__) || defined(__x86_64__) // Set MINIZ_HAS_64BIT_REGISTERS to 1 if operations on 64-bit integers are reasonably fast (and don't involve compiler generated calls to helper functions). #define MINIZ_HAS_64BIT_REGISTERS 1 #endif #ifdef __cplusplus extern "C" { #endif // ------------------- zlib-style API Definitions. // For more compatibility with zlib, miniz.c uses unsigned long for some parameters/struct members. Beware: mz_ulong can be either 32 or 64-bits! typedef unsigned long mz_ulong; // mz_free() internally uses the MZ_FREE() macro (which by default calls free() unless you've modified the MZ_MALLOC macro) to release a block allocated from the heap. void mz_free(void* p); #define MZ_ADLER32_INIT (1) // mz_adler32() returns the initial adler-32 value to use when called with ptr==NULL. mz_ulong mz_adler32(mz_ulong adler, const unsigned char* ptr, size_t buf_len); #define MZ_CRC32_INIT (0) // mz_crc32() returns the initial CRC-32 value to use when called with ptr==NULL. mz_ulong mz_crc32(mz_ulong crc, const unsigned char* ptr, size_t buf_len); // Compression strategies. enum { MZ_DEFAULT_STRATEGY = 0, MZ_FILTERED = 1, MZ_HUFFMAN_ONLY = 2, MZ_RLE = 3, MZ_FIXED = 4 }; // Method #define MZ_DEFLATED 8 #ifndef MINIZ_NO_ZLIB_APIS // Heap allocation callbacks. // Note that mz_alloc_func parameter types purpsosely differ from zlib's: items/size is size_t, not unsigned long. typedef void* (*mz_alloc_func)(void* opaque, size_t items, size_t size); typedef void (*mz_free_func)(void* opaque, void* address); typedef void* (*mz_realloc_func)(void* opaque, void* address, size_t items, size_t size); #define MZ_VERSION "9.1.14" #define MZ_VERNUM 0x91E0 #define MZ_VER_MAJOR 9 #define MZ_VER_MINOR 1 #define MZ_VER_REVISION 14 #define MZ_VER_SUBREVISION 0 // Flush values. For typical usage you only need MZ_NO_FLUSH and MZ_FINISH. The other values are for advanced use (refer to the zlib docs). enum { MZ_NO_FLUSH = 0, MZ_PARTIAL_FLUSH = 1, MZ_SYNC_FLUSH = 2, MZ_FULL_FLUSH = 3, MZ_FINISH = 4, MZ_BLOCK = 5 }; // Return status codes. MZ_PARAM_ERROR is non-standard. enum { MZ_OK = 0, MZ_STREAM_END = 1, MZ_NEED_DICT = 2, MZ_ERRNO = -1, MZ_STREAM_ERROR = -2, MZ_DATA_ERROR = -3, MZ_MEM_ERROR = -4, MZ_BUF_ERROR = -5, MZ_VERSION_ERROR = -6, MZ_PARAM_ERROR = -10000 }; // Compression levels: 0-9 are the standard zlib-style levels, 10 is best possible compression (not zlib compatible, and may be very slow), MZ_DEFAULT_COMPRESSION=MZ_DEFAULT_LEVEL. enum { MZ_NO_COMPRESSION = 0, MZ_BEST_SPEED = 1, MZ_BEST_COMPRESSION = 9, MZ_UBER_COMPRESSION = 10, MZ_DEFAULT_LEVEL = 6, MZ_DEFAULT_COMPRESSION = -1 }; // Window bits #define MZ_DEFAULT_WINDOW_BITS 15 struct mz_internal_state; // Compression/decompression stream struct. typedef struct mz_stream_s { const unsigned char* next_in; // pointer to next byte to read unsigned int avail_in; // number of bytes available at next_in mz_ulong total_in; // total number of bytes consumed so far unsigned char* next_out; // pointer to next byte to write unsigned int avail_out; // number of bytes that can be written to next_out mz_ulong total_out; // total number of bytes produced so far char* msg; // error msg (unused) struct mz_internal_state* state; // internal state, allocated by zalloc/zfree mz_alloc_func zalloc; // optional heap allocation function (defaults to malloc) mz_free_func zfree; // optional heap free function (defaults to free) void* opaque; // heap alloc function user pointer int data_type; // data_type (unused) mz_ulong adler; // adler32 of the source or uncompressed data mz_ulong reserved; // not used } mz_stream; typedef mz_stream* mz_streamp; // Returns the version string of miniz.c. const char* mz_version(void); // mz_deflateInit() initializes a compressor with default options: // Parameters: // pStream must point to an initialized mz_stream struct. // level must be between [MZ_NO_COMPRESSION, MZ_BEST_COMPRESSION]. // level 1 enables a specially optimized compression function that's been optimized purely for performance, not ratio. // (This special func. is currently only enabled when MINIZ_USE_UNALIGNED_LOADS_AND_STORES and MINIZ_LITTLE_ENDIAN are defined.) // Return values: // MZ_OK on success. // MZ_STREAM_ERROR if the stream is bogus. // MZ_PARAM_ERROR if the input parameters are bogus. // MZ_MEM_ERROR on out of memory. int mz_deflateInit(mz_streamp pStream, int level); // mz_deflateInit2() is like mz_deflate(), except with more control: // Additional parameters: // method must be MZ_DEFLATED // window_bits must be MZ_DEFAULT_WINDOW_BITS (to wrap the deflate stream with zlib header/adler-32 footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate/no header or footer) // mem_level must be between [1, 9] (it's checked but ignored by miniz.c) int mz_deflateInit2(mz_streamp pStream, int level, int method, int window_bits, int mem_level, int strategy); // Quickly resets a compressor without having to reallocate anything. Same as calling mz_deflateEnd() followed by mz_deflateInit()/mz_deflateInit2(). int mz_deflateReset(mz_streamp pStream); // mz_deflate() compresses the input to output, consuming as much of the input and producing as much output as possible. // Parameters: // pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. // flush may be MZ_NO_FLUSH, MZ_PARTIAL_FLUSH/MZ_SYNC_FLUSH, MZ_FULL_FLUSH, or MZ_FINISH. // Return values: // MZ_OK on success (when flushing, or if more input is needed but not available, and/or there's more output to be written but the output buffer is full). // MZ_STREAM_END if all input has been consumed and all output bytes have been written. Don't call mz_deflate() on the stream anymore. // MZ_STREAM_ERROR if the stream is bogus. // MZ_PARAM_ERROR if one of the parameters is invalid. // MZ_BUF_ERROR if no forward progress is possible because the input and/or output buffers are empty. (Fill up the input buffer or free up some output space and try again.) int mz_deflate(mz_streamp pStream, int flush); // mz_deflateEnd() deinitializes a compressor: // Return values: // MZ_OK on success. // MZ_STREAM_ERROR if the stream is bogus. int mz_deflateEnd(mz_streamp pStream); // mz_deflateBound() returns a (very) conservative upper bound on the amount of data that could be generated by deflate(), assuming flush is set to only MZ_NO_FLUSH or MZ_FINISH. mz_ulong mz_deflateBound(mz_streamp pStream, mz_ulong source_len); // Single-call compression functions mz_compress() and mz_compress2(): // Returns MZ_OK on success, or one of the error codes from mz_deflate() on failure. int mz_compress(unsigned char* pDest, mz_ulong* pDest_len, const unsigned char* pSource, mz_ulong source_len); int mz_compress2(unsigned char* pDest, mz_ulong* pDest_len, const unsigned char* pSource, mz_ulong source_len, int level); // mz_compressBound() returns a (very) conservative upper bound on the amount of data that could be generated by calling mz_compress(). mz_ulong mz_compressBound(mz_ulong source_len); // Initializes a decompressor. int mz_inflateInit(mz_streamp pStream); // mz_inflateInit2() is like mz_inflateInit() with an additional option that controls the window size and whether or not the stream has been wrapped with a zlib header/footer: // window_bits must be MZ_DEFAULT_WINDOW_BITS (to parse zlib header/footer) or -MZ_DEFAULT_WINDOW_BITS (raw deflate). int mz_inflateInit2(mz_streamp pStream, int window_bits); // Decompresses the input stream to the output, consuming only as much of the input as needed, and writing as much to the output as possible. // Parameters: // pStream is the stream to read from and write to. You must initialize/update the next_in, avail_in, next_out, and avail_out members. // flush may be MZ_NO_FLUSH, MZ_SYNC_FLUSH, or MZ_FINISH. // On the first call, if flush is MZ_FINISH it's assumed the input and output buffers are both sized large enough to decompress the entire stream in a single call (this is slightly faster). // MZ_FINISH implies that there are no more source bytes available beside what's already in the input buffer, and that the output buffer is large enough to hold the rest of the decompressed data. // Return values: // MZ_OK on success. Either more input is needed but not available, and/or there's more output to be written but the output buffer is full. // MZ_STREAM_END if all needed input has been consumed and all output bytes have been written. For zlib streams, the adler-32 of the decompressed data has also been verified. // MZ_STREAM_ERROR if the stream is bogus. // MZ_DATA_ERROR if the deflate stream is invalid. // MZ_PARAM_ERROR if one of the parameters is invalid. // MZ_BUF_ERROR if no forward progress is possible because the input buffer is empty but the inflater needs more input to continue, or if the output buffer is not large enough. Call mz_inflate() again // with more input data, or with more room in the output buffer (except when using single call decompression, described above). int mz_inflate(mz_streamp pStream, int flush); // Deinitializes a decompressor. int mz_inflateEnd(mz_streamp pStream); // Single-call decompression. // Returns MZ_OK on success, or one of the error codes from mz_inflate() on failure. int mz_uncompress(unsigned char* pDest, mz_ulong* pDest_len, const unsigned char* pSource, mz_ulong source_len); // Returns a string description of the specified error code, or NULL if the error code is invalid. const char* mz_error(int err); // Redefine zlib-compatible names to miniz equivalents, so miniz.c can be used as a drop-in replacement for the subset of zlib that miniz.c supports. // Define MINIZ_NO_ZLIB_COMPATIBLE_NAMES to disable zlib-compatibility if you use zlib in the same project. #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES typedef unsigned char Byte; typedef unsigned int uInt; typedef mz_ulong uLong; typedef Byte Bytef; typedef uInt uIntf; typedef char charf; typedef int intf; typedef void* voidpf; typedef uLong uLongf; typedef void* voidp; typedef void* const voidpc; #define Z_NULL 0 #define Z_NO_FLUSH MZ_NO_FLUSH #define Z_PARTIAL_FLUSH MZ_PARTIAL_FLUSH #define Z_SYNC_FLUSH MZ_SYNC_FLUSH #define Z_FULL_FLUSH MZ_FULL_FLUSH #define Z_FINISH MZ_FINISH #define Z_BLOCK MZ_BLOCK #define Z_OK MZ_OK #define Z_STREAM_END MZ_STREAM_END #define Z_NEED_DICT MZ_NEED_DICT #define Z_ERRNO MZ_ERRNO #define Z_STREAM_ERROR MZ_STREAM_ERROR #define Z_DATA_ERROR MZ_DATA_ERROR #define Z_MEM_ERROR MZ_MEM_ERROR #define Z_BUF_ERROR MZ_BUF_ERROR #define Z_VERSION_ERROR MZ_VERSION_ERROR #define Z_PARAM_ERROR MZ_PARAM_ERROR #define Z_NO_COMPRESSION MZ_NO_COMPRESSION #define Z_BEST_SPEED MZ_BEST_SPEED #define Z_BEST_COMPRESSION MZ_BEST_COMPRESSION #define Z_DEFAULT_COMPRESSION MZ_DEFAULT_COMPRESSION #define Z_DEFAULT_STRATEGY MZ_DEFAULT_STRATEGY #define Z_FILTERED MZ_FILTERED #define Z_HUFFMAN_ONLY MZ_HUFFMAN_ONLY #define Z_RLE MZ_RLE #define Z_FIXED MZ_FIXED #define Z_DEFLATED MZ_DEFLATED #define Z_DEFAULT_WINDOW_BITS MZ_DEFAULT_WINDOW_BITS #define alloc_func mz_alloc_func #define free_func mz_free_func #define internal_state mz_internal_state #define z_stream mz_stream #define deflateInit mz_deflateInit #define deflateInit2 mz_deflateInit2 #define deflateReset mz_deflateReset #define deflate mz_deflate #define deflateEnd mz_deflateEnd #define deflateBound mz_deflateBound #define compress mz_compress #define compress2 mz_compress2 #define compressBound mz_compressBound #define inflateInit mz_inflateInit #define inflateInit2 mz_inflateInit2 #define inflate mz_inflate #define inflateEnd mz_inflateEnd #define uncompress mz_uncompress #define crc32 mz_crc32 #define adler32 mz_adler32 #define MAX_WBITS 15 #define MAX_MEM_LEVEL 9 #define zError mz_error #define ZLIB_VERSION MZ_VERSION #define ZLIB_VERNUM MZ_VERNUM #define ZLIB_VER_MAJOR MZ_VER_MAJOR #define ZLIB_VER_MINOR MZ_VER_MINOR #define ZLIB_VER_REVISION MZ_VER_REVISION #define ZLIB_VER_SUBREVISION MZ_VER_SUBREVISION #define zlibVersion mz_version #define zlib_version mz_version() #endif // #ifndef MINIZ_NO_ZLIB_COMPATIBLE_NAMES #endif // MINIZ_NO_ZLIB_APIS // ------------------- Types and macros typedef unsigned char mz_uint8; typedef signed short mz_int16; typedef unsigned short mz_uint16; typedef unsigned int mz_uint32; typedef unsigned int mz_uint; typedef long long mz_int64; typedef unsigned long long mz_uint64; typedef int mz_bool; #define MZ_FALSE (0) #define MZ_TRUE (1) // Works around MSVC's spammy "warning C4127: conditional expression is constant" message. #ifdef _MSC_VER #define MZ_MACRO_END while (0, 0) #else #define MZ_MACRO_END while (0) #endif // ------------------- ZIP archive reading/writing #ifndef MINIZ_NO_ARCHIVE_APIS enum { MZ_ZIP_MAX_IO_BUF_SIZE = 64 * 1024, MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE = 260, MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE = 256 }; typedef struct { mz_uint32 m_file_index; mz_uint32 m_central_dir_ofs; mz_uint16 m_version_made_by; mz_uint16 m_version_needed; mz_uint16 m_bit_flag; mz_uint16 m_method; #ifndef MINIZ_NO_TIME time_t m_time; #endif mz_uint32 m_crc32; mz_uint64 m_comp_size; mz_uint64 m_uncomp_size; mz_uint16 m_internal_attr; mz_uint32 m_external_attr; mz_uint64 m_local_header_ofs; mz_uint32 m_comment_size; char m_filename[MZ_ZIP_MAX_ARCHIVE_FILENAME_SIZE]; char m_comment[MZ_ZIP_MAX_ARCHIVE_FILE_COMMENT_SIZE]; } mz_zip_archive_file_stat; typedef size_t (*mz_file_read_func)(void* pOpaque, mz_uint64 file_ofs, void* pBuf, size_t n); typedef size_t (*mz_file_write_func)(void* pOpaque, mz_uint64 file_ofs, const void* pBuf, size_t n); struct mz_zip_internal_state_tag; typedef struct mz_zip_internal_state_tag mz_zip_internal_state; typedef enum { MZ_ZIP_MODE_INVALID = 0, MZ_ZIP_MODE_READING = 1, MZ_ZIP_MODE_WRITING = 2, MZ_ZIP_MODE_WRITING_HAS_BEEN_FINALIZED = 3 } mz_zip_mode; typedef struct { mz_uint64 m_archive_size; mz_uint64 m_central_directory_file_ofs; mz_uint m_total_files; mz_zip_mode m_zip_mode; mz_uint m_file_offset_alignment; mz_alloc_func m_pAlloc; mz_free_func m_pFree; mz_realloc_func m_pRealloc; void* m_pAlloc_opaque; mz_file_read_func m_pRead; mz_file_write_func m_pWrite; void* m_pIO_opaque; mz_zip_internal_state* m_pState; } mz_zip_archive; typedef enum { MZ_ZIP_FLAG_CASE_SENSITIVE = 0x0100, MZ_ZIP_FLAG_IGNORE_PATH = 0x0200, MZ_ZIP_FLAG_COMPRESSED_DATA = 0x0400, MZ_ZIP_FLAG_DO_NOT_SORT_CENTRAL_DIRECTORY = 0x0800 } mz_zip_flags; // ZIP archive reading // Inits a ZIP archive reader. // These functions read and validate the archive's central directory. mz_bool mz_zip_reader_init(mz_zip_archive* pZip, mz_uint64 size, mz_uint32 flags); mz_bool mz_zip_reader_init_mem(mz_zip_archive* pZip, const void* pMem, size_t size, mz_uint32 flags); #ifndef MINIZ_NO_STDIO mz_bool mz_zip_reader_init_file(mz_zip_archive* pZip, const char* pFilename, mz_uint32 flags); #endif // Returns the total number of files in the archive. mz_uint mz_zip_reader_get_num_files(mz_zip_archive* pZip); // Returns detailed information about an archive file entry. mz_bool mz_zip_reader_file_stat(mz_zip_archive* pZip, mz_uint file_index, mz_zip_archive_file_stat* pStat); // Determines if an archive file entry is a directory entry. mz_bool mz_zip_reader_is_file_a_directory(mz_zip_archive* pZip, mz_uint file_index); mz_bool mz_zip_reader_is_file_encrypted(mz_zip_archive* pZip, mz_uint file_index); // Retrieves the filename of an archive file entry. // Returns the number of bytes written to pFilename, or if filename_buf_size is 0 this function returns the number of bytes needed to fully store the filename. mz_uint mz_zip_reader_get_filename(mz_zip_archive* pZip, mz_uint file_index, char* pFilename, mz_uint filename_buf_size); // Attempts to locates a file in the archive's central directory. // Valid flags: MZ_ZIP_FLAG_CASE_SENSITIVE, MZ_ZIP_FLAG_IGNORE_PATH // Returns -1 if the file cannot be found. int mz_zip_reader_locate_file(mz_zip_archive* pZip, const char* pName, const char* pComment, mz_uint flags); // Extracts a archive file to a memory buffer using no memory allocation. mz_bool mz_zip_reader_extract_to_mem_no_alloc(mz_zip_archive* pZip, mz_uint file_index, void* pBuf, size_t buf_size, mz_uint flags, void* pUser_read_buf, size_t user_read_buf_size); mz_bool mz_zip_reader_extract_file_to_mem_no_alloc(mz_zip_archive* pZip, const char* pFilename, void* pBuf, size_t buf_size, mz_uint flags, void* pUser_read_buf, size_t user_read_buf_size); // Extracts a archive file to a memory buffer. mz_bool mz_zip_reader_extract_to_mem(mz_zip_archive* pZip, mz_uint file_index, void* pBuf, size_t buf_size, mz_uint flags); mz_bool mz_zip_reader_extract_file_to_mem(mz_zip_archive* pZip, const char* pFilename, void* pBuf, size_t buf_size, mz_uint flags); // Extracts a archive file to a dynamically allocated heap buffer. void* mz_zip_reader_extract_to_heap(mz_zip_archive* pZip, mz_uint file_index, size_t* pSize, mz_uint flags); void* mz_zip_reader_extract_file_to_heap(mz_zip_archive* pZip, const char* pFilename, size_t* pSize, mz_uint flags); // Extracts a archive file using a callback function to output the file's data. mz_bool mz_zip_reader_extract_to_callback(mz_zip_archive* pZip, mz_uint file_index, mz_file_write_func pCallback, void* pOpaque, mz_uint flags); mz_bool mz_zip_reader_extract_file_to_callback(mz_zip_archive* pZip, const char* pFilename, mz_file_write_func pCallback, void* pOpaque, mz_uint flags); #ifndef MINIZ_NO_STDIO // Extracts a archive file to a disk file and sets its last accessed and modified times. // This function only extracts files, not archive directory records. mz_bool mz_zip_reader_extract_to_file(mz_zip_archive* pZip, mz_uint file_index, const char* pDst_filename, mz_uint flags); mz_bool mz_zip_reader_extract_file_to_file(mz_zip_archive* pZip, const char* pArchive_filename, const char* pDst_filename, mz_uint flags); #endif // Ends archive reading, freeing all allocations, and closing the input archive file if mz_zip_reader_init_file() was used. mz_bool mz_zip_reader_end(mz_zip_archive* pZip); // ZIP archive writing #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS // Inits a ZIP archive writer. mz_bool mz_zip_writer_init(mz_zip_archive* pZip, mz_uint64 existing_size); mz_bool mz_zip_writer_init_heap(mz_zip_archive* pZip, size_t size_to_reserve_at_beginning, size_t initial_allocation_size); #ifndef MINIZ_NO_STDIO mz_bool mz_zip_writer_init_file(mz_zip_archive* pZip, const char* pFilename, mz_uint64 size_to_reserve_at_beginning); #endif // Converts a ZIP archive reader object into a writer object, to allow efficient in-place file appends to occur on an existing archive. // For archives opened using mz_zip_reader_init_file, pFilename must be the archive's filename so it can be reopened for writing. If the file can't be reopened, mz_zip_reader_end() will be called. // For archives opened using mz_zip_reader_init_mem, the memory block must be growable using the realloc callback (which defaults to realloc unless you've overridden it). // Finally, for archives opened using mz_zip_reader_init, the mz_zip_archive's user provided m_pWrite function cannot be NULL. // Note: In-place archive modification is not recommended unless you know what you're doing, because if execution stops or something goes wrong before // the archive is finalized the file's central directory will be hosed. mz_bool mz_zip_writer_init_from_reader(mz_zip_archive* pZip, const char* pFilename); // Adds the contents of a memory buffer to an archive. These functions record the current local time into the archive. // To add a directory entry, call this method with an archive name ending in a forwardslash with empty buffer. // level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. mz_bool mz_zip_writer_add_mem(mz_zip_archive* pZip, const char* pArchive_name, const void* pBuf, size_t buf_size, mz_uint level_and_flags); mz_bool mz_zip_writer_add_mem_ex(mz_zip_archive* pZip, const char* pArchive_name, const void* pBuf, size_t buf_size, const void* pComment, mz_uint16 comment_size, mz_uint level_and_flags, mz_uint64 uncomp_size, mz_uint32 uncomp_crc32); #ifndef MINIZ_NO_STDIO // Adds the contents of a disk file to an archive. This function also records the disk file's modified time into the archive. // level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. mz_bool mz_zip_writer_add_file(mz_zip_archive* pZip, const char* pArchive_name, const char* pSrc_filename, const void* pComment, mz_uint16 comment_size, mz_uint level_and_flags); #endif // Adds a file to an archive by fully cloning the data from another archive. // This function fully clones the source file's compressed data (no recompression), along with its full filename, extra data, and comment fields. mz_bool mz_zip_writer_add_from_zip_reader(mz_zip_archive* pZip, mz_zip_archive* pSource_zip, mz_uint file_index); // Finalizes the archive by writing the central directory records followed by the end of central directory record. // After an archive is finalized, the only valid call on the mz_zip_archive struct is mz_zip_writer_end(). // An archive must be manually finalized by calling this function for it to be valid. mz_bool mz_zip_writer_finalize_archive(mz_zip_archive* pZip); mz_bool mz_zip_writer_finalize_heap_archive(mz_zip_archive* pZip, void** pBuf, size_t* pSize); // Ends archive writing, freeing all allocations, and closing the output file if mz_zip_writer_init_file() was used. // Note for the archive to be valid, it must have been finalized before ending. mz_bool mz_zip_writer_end(mz_zip_archive* pZip); // Misc. high-level helper functions: // mz_zip_add_mem_to_archive_file_in_place() efficiently (but not atomically) appends a memory blob to a ZIP archive. // level_and_flags - compression level (0-10, see MZ_BEST_SPEED, MZ_BEST_COMPRESSION, etc.) logically OR'd with zero or more mz_zip_flags, or just set to MZ_DEFAULT_COMPRESSION. mz_bool mz_zip_add_mem_to_archive_file_in_place(const char* pZip_filename, const char* pArchive_name, const void* pBuf, size_t buf_size, const void* pComment, mz_uint16 comment_size, mz_uint level_and_flags); // Reads a single file from an archive into a heap block. // If pComment is not NULL, only the file with the specified comment will be extracted. // Returns NULL on failure. void* mz_zip_extract_archive_file_to_heap(const char* pZip_filename, const char* pArchive_name, const char* pComment, size_t* pSize, mz_uint flags); #endif // #ifndef MINIZ_NO_ARCHIVE_WRITING_APIS #endif // #ifndef MINIZ_NO_ARCHIVE_APIS // ------------------- Low-level Decompression API Definitions // Decompression flags used by tinfl_decompress(). // TINFL_FLAG_PARSE_ZLIB_HEADER: If set, the input has a valid zlib header and ends with an adler32 checksum (it's a valid zlib stream). Otherwise, the input is a raw deflate stream. // TINFL_FLAG_HAS_MORE_INPUT: If set, there are more input bytes available beyond the end of the supplied input buffer. If clear, the input buffer contains all remaining input. // TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF: If set, the output buffer is large enough to hold the entire decompressed stream. If clear, the output buffer is at least the size of the dictionary (typically 32KB). // TINFL_FLAG_COMPUTE_ADLER32: Force adler-32 checksum computation of the decompressed bytes. enum { TINFL_FLAG_PARSE_ZLIB_HEADER = 1, TINFL_FLAG_HAS_MORE_INPUT = 2, TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF = 4, TINFL_FLAG_COMPUTE_ADLER32 = 8 }; // High level decompression functions: // tinfl_decompress_mem_to_heap() decompresses a block in memory to a heap block allocated via malloc(). // On entry: // pSrc_buf, src_buf_len: Pointer and size of the Deflate or zlib source data to decompress. // On return: // Function returns a pointer to the decompressed data, or NULL on failure. // *pOut_len will be set to the decompressed data's size, which could be larger than src_buf_len on uncompressible data. // The caller must call mz_free() on the returned block when it's no longer needed. void* tinfl_decompress_mem_to_heap(const void* pSrc_buf, size_t src_buf_len, size_t* pOut_len, int flags); // tinfl_decompress_mem_to_mem() decompresses a block in memory to another block in memory. // Returns TINFL_DECOMPRESS_MEM_TO_MEM_FAILED on failure, or the number of bytes written on success. #define TINFL_DECOMPRESS_MEM_TO_MEM_FAILED ((size_t)(-1)) size_t tinfl_decompress_mem_to_mem(void* pOut_buf, size_t out_buf_len, const void* pSrc_buf, size_t src_buf_len, int flags); // tinfl_decompress_mem_to_callback() decompresses a block in memory to an internal 32KB buffer, and a user provided callback function will be called to flush the buffer. // Returns 1 on success or 0 on failure. typedef int (*tinfl_put_buf_func_ptr)(const void* pBuf, int len, void* pUser); int tinfl_decompress_mem_to_callback(const void* pIn_buf, size_t* pIn_buf_size, tinfl_put_buf_func_ptr pPut_buf_func, void* pPut_buf_user, int flags); struct tinfl_decompressor_tag; typedef struct tinfl_decompressor_tag tinfl_decompressor; // Max size of LZ dictionary. #define TINFL_LZ_DICT_SIZE 32768 // Return status. typedef enum { TINFL_STATUS_BAD_PARAM = -3, TINFL_STATUS_ADLER32_MISMATCH = -2, TINFL_STATUS_FAILED = -1, TINFL_STATUS_DONE = 0, TINFL_STATUS_NEEDS_MORE_INPUT = 1, TINFL_STATUS_HAS_MORE_OUTPUT = 2 } tinfl_status; // Initializes the decompressor to its initial state. #define tinfl_init(r) \ do { \ (r)->m_state = 0; \ } \ MZ_MACRO_END #define tinfl_get_adler32(r) (r)->m_check_adler32 // Main low-level decompressor coroutine function. This is the only function actually needed for decompression. All the other functions are just high-level helpers for improved usability. // This is a universal API, i.e. it can be used as a building block to build any desired higher level decompression API. In the limit case, it can be called once per every byte input or output. tinfl_status tinfl_decompress(tinfl_decompressor* r, const mz_uint8* pIn_buf_next, size_t* pIn_buf_size, mz_uint8* pOut_buf_start, mz_uint8* pOut_buf_next, size_t* pOut_buf_size, const mz_uint32 decomp_flags); // Internal/private bits follow. enum { TINFL_MAX_HUFF_TABLES = 3, TINFL_MAX_HUFF_SYMBOLS_0 = 288, TINFL_MAX_HUFF_SYMBOLS_1 = 32, TINFL_MAX_HUFF_SYMBOLS_2 = 19, TINFL_FAST_LOOKUP_BITS = 10, TINFL_FAST_LOOKUP_SIZE = 1 << TINFL_FAST_LOOKUP_BITS }; typedef struct { mz_uint8 m_code_size[TINFL_MAX_HUFF_SYMBOLS_0]; mz_int16 m_look_up[TINFL_FAST_LOOKUP_SIZE], m_tree[TINFL_MAX_HUFF_SYMBOLS_0 * 2]; } tinfl_huff_table; #if MINIZ_HAS_64BIT_REGISTERS #define TINFL_USE_64BIT_BITBUF 1 #endif #if TINFL_USE_64BIT_BITBUF typedef mz_uint64 tinfl_bit_buf_t; #define TINFL_BITBUF_SIZE (64) #else typedef mz_uint32 tinfl_bit_buf_t; #define TINFL_BITBUF_SIZE (32) #endif struct tinfl_decompressor_tag { mz_uint32 m_state, m_num_bits, m_zhdr0, m_zhdr1, m_z_adler32, m_final, m_type, m_check_adler32, m_dist, m_counter, m_num_extra, m_table_sizes[TINFL_MAX_HUFF_TABLES]; tinfl_bit_buf_t m_bit_buf; size_t m_dist_from_out_buf_start; tinfl_huff_table m_tables[TINFL_MAX_HUFF_TABLES]; mz_uint8 m_raw_header[4], m_len_codes[TINFL_MAX_HUFF_SYMBOLS_0 + TINFL_MAX_HUFF_SYMBOLS_1 + 137]; }; // ------------------- Low-level Compression API Definitions // Set TDEFL_LESS_MEMORY to 1 to use less memory (compression will be slightly slower, and raw/dynamic blocks will be output more frequently). #define TDEFL_LESS_MEMORY 0 // tdefl_init() compression flags logically OR'd together (low 12 bits contain the max. number of probes per dictionary search): // TDEFL_DEFAULT_MAX_PROBES: The compressor defaults to 128 dictionary probes per dictionary search. 0=Huffman only, 1=Huffman+LZ (fastest/crap compression), 4095=Huffman+LZ (slowest/best compression). enum { TDEFL_HUFFMAN_ONLY = 0, TDEFL_DEFAULT_MAX_PROBES = 128, TDEFL_MAX_PROBES_MASK = 0xFFF }; // TDEFL_WRITE_ZLIB_HEADER: If set, the compressor outputs a zlib header before the deflate data, and the Adler-32 of the source data at the end. Otherwise, you'll get raw deflate data. // TDEFL_COMPUTE_ADLER32: Always compute the adler-32 of the input data (even when not writing zlib headers). // TDEFL_GREEDY_PARSING_FLAG: Set to use faster greedy parsing, instead of more efficient lazy parsing. // TDEFL_NONDETERMINISTIC_PARSING_FLAG: Enable to decrease the compressor's initialization time to the minimum, but the output may vary from run to run given the same input (depending on the contents of memory). // TDEFL_RLE_MATCHES: Only look for RLE matches (matches with a distance of 1) // TDEFL_FILTER_MATCHES: Discards matches <= 5 chars if enabled. // TDEFL_FORCE_ALL_STATIC_BLOCKS: Disable usage of optimized Huffman tables. // TDEFL_FORCE_ALL_RAW_BLOCKS: Only use raw (uncompressed) deflate blocks. enum { TDEFL_WRITE_ZLIB_HEADER = 0x01000, TDEFL_COMPUTE_ADLER32 = 0x02000, TDEFL_GREEDY_PARSING_FLAG = 0x04000, TDEFL_NONDETERMINISTIC_PARSING_FLAG = 0x08000, TDEFL_RLE_MATCHES = 0x10000, TDEFL_FILTER_MATCHES = 0x20000, TDEFL_FORCE_ALL_STATIC_BLOCKS = 0x40000, TDEFL_FORCE_ALL_RAW_BLOCKS = 0x80000 }; // High level compression functions: // tdefl_compress_mem_to_heap() compresses a block in memory to a heap block allocated via malloc(). // On entry: // pSrc_buf, src_buf_len: Pointer and size of source block to compress. // flags: The max match finder probes (default is 128) logically OR'd against the above flags. Higher probes are slower but improve compression. // On return: // Function returns a pointer to the compressed data, or NULL on failure. // *pOut_len will be set to the compressed data's size, which could be larger than src_buf_len on uncompressible data. // The caller must free() the returned block when it's no longer needed. void* tdefl_compress_mem_to_heap(const void* pSrc_buf, size_t src_buf_len, size_t* pOut_len, int flags); // tdefl_compress_mem_to_mem() compresses a block in memory to another block in memory. // Returns 0 on failure. size_t tdefl_compress_mem_to_mem(void* pOut_buf, size_t out_buf_len, const void* pSrc_buf, size_t src_buf_len, int flags); // Compresses an image to a compressed PNG file in memory. // On entry: // pImage, w, h, and num_chans describe the image to compress. num_chans may be 1, 2, 3, or 4. // The image pitch in bytes per scanline will be w*num_chans. The leftmost pixel on the top scanline is stored first in memory. // On return: // Function returns a pointer to the compressed data, or NULL on failure. // *pLen_out will be set to the size of the PNG image file. // The caller must mz_free() the returned heap block (which will typically be larger than *pLen_out) when it's no longer needed. void* tdefl_write_image_to_png_file_in_memory(const void* pImage, int w, int h, int num_chans, size_t* pLen_out); // Output stream interface. The compressor uses this interface to write compressed data. It'll typically be called TDEFL_OUT_BUF_SIZE at a time. typedef mz_bool (*tdefl_put_buf_func_ptr)(const void* pBuf, int len, void* pUser); // tdefl_compress_mem_to_output() compresses a block to an output stream. The above helpers use this function internally. mz_bool tdefl_compress_mem_to_output(const void* pBuf, size_t buf_len, tdefl_put_buf_func_ptr pPut_buf_func, void* pPut_buf_user, int flags); enum { TDEFL_MAX_HUFF_TABLES = 3, TDEFL_MAX_HUFF_SYMBOLS_0 = 288, TDEFL_MAX_HUFF_SYMBOLS_1 = 32, TDEFL_MAX_HUFF_SYMBOLS_2 = 19, TDEFL_LZ_DICT_SIZE = 32768, TDEFL_LZ_DICT_SIZE_MASK = TDEFL_LZ_DICT_SIZE - 1, TDEFL_MIN_MATCH_LEN = 3, TDEFL_MAX_MATCH_LEN = 258 }; // TDEFL_OUT_BUF_SIZE MUST be large enough to hold a single entire compressed output block (using static/fixed Huffman codes). #if TDEFL_LESS_MEMORY enum { TDEFL_LZ_CODE_BUF_SIZE = 24 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 12, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS }; #else enum { TDEFL_LZ_CODE_BUF_SIZE = 64 * 1024, TDEFL_OUT_BUF_SIZE = (TDEFL_LZ_CODE_BUF_SIZE * 13) / 10, TDEFL_MAX_HUFF_SYMBOLS = 288, TDEFL_LZ_HASH_BITS = 15, TDEFL_LEVEL1_HASH_SIZE_MASK = 4095, TDEFL_LZ_HASH_SHIFT = (TDEFL_LZ_HASH_BITS + 2) / 3, TDEFL_LZ_HASH_SIZE = 1 << TDEFL_LZ_HASH_BITS }; #endif // The low-level tdefl functions below may be used directly if the above helper functions aren't flexible enough. The low-level functions don't make any heap allocations, unlike the above helper functions. typedef enum { TDEFL_STATUS_BAD_PARAM = -2, TDEFL_STATUS_PUT_BUF_FAILED = -1, TDEFL_STATUS_OKAY = 0, TDEFL_STATUS_DONE = 1, } tdefl_status; // Must map to MZ_NO_FLUSH, MZ_SYNC_FLUSH, etc. enums typedef enum { TDEFL_NO_FLUSH = 0, TDEFL_SYNC_FLUSH = 2, TDEFL_FULL_FLUSH = 3, TDEFL_FINISH = 4 } tdefl_flush; // tdefl's compression state structure. typedef struct { tdefl_put_buf_func_ptr m_pPut_buf_func; void* m_pPut_buf_user; mz_uint m_flags, m_max_probes[2]; int m_greedy_parsing; mz_uint m_adler32, m_lookahead_pos, m_lookahead_size, m_dict_size; mz_uint8 *m_pLZ_code_buf, *m_pLZ_flags, *m_pOutput_buf, *m_pOutput_buf_end; mz_uint m_num_flags_left, m_total_lz_bytes, m_lz_code_buf_dict_pos, m_bits_in, m_bit_buffer; mz_uint m_saved_match_dist, m_saved_match_len, m_saved_lit, m_output_flush_ofs, m_output_flush_remaining, m_finished, m_block_index, m_wants_to_finish; tdefl_status m_prev_return_status; const void* m_pIn_buf; void* m_pOut_buf; size_t *m_pIn_buf_size, *m_pOut_buf_size; tdefl_flush m_flush; const mz_uint8* m_pSrc; size_t m_src_buf_left, m_out_buf_ofs; mz_uint8 m_dict[TDEFL_LZ_DICT_SIZE + TDEFL_MAX_MATCH_LEN - 1]; mz_uint16 m_huff_count[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; mz_uint16 m_huff_codes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; mz_uint8 m_huff_code_sizes[TDEFL_MAX_HUFF_TABLES][TDEFL_MAX_HUFF_SYMBOLS]; mz_uint8 m_lz_code_buf[TDEFL_LZ_CODE_BUF_SIZE]; mz_uint16 m_next[TDEFL_LZ_DICT_SIZE]; mz_uint16 m_hash[TDEFL_LZ_HASH_SIZE]; mz_uint8 m_output_buf[TDEFL_OUT_BUF_SIZE]; } tdefl_compressor; // Initializes the compressor. // There is no corresponding deinit() function because the tdefl API's do not dynamically allocate memory. // pBut_buf_func: If NULL, output data will be supplied to the specified callback. In this case, the user should call the tdefl_compress_buffer() API for compression. // If pBut_buf_func is NULL the user should always call the tdefl_compress() API. // flags: See the above enums (TDEFL_HUFFMAN_ONLY, TDEFL_WRITE_ZLIB_HEADER, etc.) tdefl_status tdefl_init(tdefl_compressor* d, tdefl_put_buf_func_ptr pPut_buf_func, void* pPut_buf_user, int flags); // Compresses a block of data, consuming as much of the specified input buffer as possible, and writing as much compressed data to the specified output buffer as possible. tdefl_status tdefl_compress(tdefl_compressor* d, const void* pIn_buf, size_t* pIn_buf_size, void* pOut_buf, size_t* pOut_buf_size, tdefl_flush flush); // tdefl_compress_buffer() is only usable when the tdefl_init() is called with a non-NULL tdefl_put_buf_func_ptr. // tdefl_compress_buffer() always consumes the entire input buffer. tdefl_status tdefl_compress_buffer(tdefl_compressor* d, const void* pIn_buf, size_t in_buf_size, tdefl_flush flush); tdefl_status tdefl_get_prev_return_status(tdefl_compressor* d); mz_uint32 tdefl_get_adler32(tdefl_compressor* d); // Can't use tdefl_create_comp_flags_from_zip_params if MINIZ_NO_ZLIB_APIS isn't defined, because it uses some of its macros. #ifndef MINIZ_NO_ZLIB_APIS // Create tdefl_compress() flags given zlib-style compression parameters. // level may range from [0,10] (where 10 is absolute max compression, but may be much slower on some files) // window_bits may be -15 (raw deflate) or 15 (zlib) // strategy may be either MZ_DEFAULT_STRATEGY, MZ_FILTERED, MZ_HUFFMAN_ONLY, MZ_RLE, or MZ_FIXED mz_uint tdefl_create_comp_flags_from_zip_params(int level, int window_bits, int strategy); #endif // #ifndef MINIZ_NO_ZLIB_APIS #ifdef __cplusplus } #endif #endif // MINIZ_HEADER_INCLUDED DaemonEngine-crunch-ef4d32f/crnlib/crn_mipmapped_texture.cpp000066400000000000000000002727461503722002600244140ustar00rootroot00000000000000// File: crn_dds_texture.cpp - Actually supports both .DDS and .KTX. Probably will rename this eventually. // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_mipmapped_texture.h" #include "crn_cfile_stream.h" #include "crn_image_utils.h" #include "crn_console.h" #include "crn_texture_comp.h" #include "crn_ktx_texture.h" #include "../inc/crn_defs.h" namespace crnlib { const vec2I g_vertical_cross_image_offsets[6] = {vec2I(2, 1), vec2I(0, 1), vec2I(1, 0), vec2I(1, 2), vec2I(1, 1), vec2I(1, 3)}; mip_level::mip_level() : m_width(0), m_height(0), m_comp_flags(pixel_format_helpers::cDefaultCompFlags), m_format(PIXEL_FMT_INVALID), m_pImage(NULL), m_pDXTImage(NULL), m_orient_flags(cDefaultOrientationFlags) { } mip_level::mip_level(const mip_level& other) : m_width(0), m_height(0), m_comp_flags(pixel_format_helpers::cDefaultCompFlags), m_format(PIXEL_FMT_INVALID), m_pImage(NULL), m_pDXTImage(NULL), m_orient_flags(cDefaultOrientationFlags) { *this = other; } mip_level& mip_level::operator=(const mip_level& rhs) { clear(); m_width = rhs.m_width; m_height = rhs.m_height; m_comp_flags = rhs.m_comp_flags; m_format = rhs.m_format; m_orient_flags = rhs.m_orient_flags; if (rhs.m_pImage) m_pImage = crnlib_new(*rhs.m_pImage); if (rhs.m_pDXTImage) m_pDXTImage = crnlib_new(*rhs.m_pDXTImage); return *this; } mip_level::~mip_level() { crnlib_delete(m_pImage); crnlib_delete(m_pDXTImage); } void mip_level::clear() { m_width = 0; m_height = 0; m_comp_flags = pixel_format_helpers::cDefaultCompFlags; m_format = PIXEL_FMT_INVALID; m_orient_flags = cDefaultOrientationFlags; if (m_pImage) { crnlib_delete(m_pImage); m_pImage = NULL; } if (m_pDXTImage) { crnlib_delete(m_pDXTImage); m_pDXTImage = NULL; } } void mip_level::assign(image_u8* p, pixel_format fmt, orientation_flags_t orient_flags) { CRNLIB_ASSERT(p); clear(); m_pImage = p; m_width = p->get_width(); m_height = p->get_height(); m_orient_flags = orient_flags; if (fmt != PIXEL_FMT_INVALID) m_format = fmt; else { if (p->is_grayscale()) m_format = p->is_component_valid(3) ? PIXEL_FMT_A8L8 : PIXEL_FMT_L8; else m_format = p->is_component_valid(3) ? PIXEL_FMT_A8R8G8B8 : PIXEL_FMT_R8G8B8; } m_comp_flags = p->get_comp_flags(); //pixel_format_helpers::get_component_flags(m_format); } void mip_level::assign(dxt_image* p, pixel_format fmt, orientation_flags_t orient_flags) { CRNLIB_ASSERT(p); clear(); m_pDXTImage = p; m_width = p->get_width(); m_height = p->get_height(); m_orient_flags = orient_flags; if (fmt != PIXEL_FMT_INVALID) m_format = fmt; else m_format = pixel_format_helpers::from_dxt_format(p->get_format()); m_comp_flags = pixel_format_helpers::get_component_flags(m_format); } bool mip_level::pack_to_dxt(const image_u8& img, pixel_format fmt, bool cook, const dxt_image::pack_params& orig_params, orientation_flags_t orient_flags) { CRNLIB_ASSERT(pixel_format_helpers::is_dxt(fmt)); if (!pixel_format_helpers::is_dxt(fmt)) return false; dxt_image::pack_params p(orig_params); if (pixel_format_helpers::is_pixel_format_non_srgb(fmt) || (img.get_comp_flags() & pixel_format_helpers::cCompFlagNormalMap) || (img.get_comp_flags() & pixel_format_helpers::cCompFlagLumaChroma)) { // Disable perceptual colorspace metrics when packing to swizzled or non-RGB pixel formats. p.m_perceptual = false; } image_u8 tmp_img(img); clear(); m_format = fmt; if (cook) cook_image(tmp_img); if ((pixel_format_helpers::is_alpha_only(fmt)) && (!tmp_img.has_alpha())) tmp_img.set_alpha_to_luma(); dxt_format dxt_fmt = pixel_format_helpers::get_dxt_format(fmt); dxt_image* pDXT_image = crnlib_new(); if (!pDXT_image->init(dxt_fmt, tmp_img, p)) { clear(); return false; } assign(pDXT_image, fmt, orient_flags); return true; } bool mip_level::pack_to_dxt(pixel_format fmt, bool cook, const dxt_image::pack_params& p) { CRNLIB_ASSERT(pixel_format_helpers::is_dxt(fmt)); if (!pixel_format_helpers::is_dxt(fmt)) return false; image_u8 tmp_img; image_u8* pImage = get_unpacked_image(tmp_img, cUnpackFlagUncook); return pack_to_dxt(*pImage, fmt, cook, p, m_orient_flags); } bool mip_level::unpack_from_dxt(bool uncook) { if (!m_pDXTImage) return false; image_u8* pNew_img = crnlib_new(); CRNLIB_ASSERT(get_unpacked_image(*pNew_img, uncook ? cUnpackFlagUncook : 0) == pNew_img); assign(pNew_img, PIXEL_FMT_INVALID, m_orient_flags); return true; } bool mip_level::is_flipped() const { return ((m_orient_flags & (cOrientationFlagXFlipped | cOrientationFlagYFlipped)) != 0); } bool mip_level::is_x_flipped() const { return ((m_orient_flags & cOrientationFlagXFlipped) != 0); } bool mip_level::is_y_flipped() const { return ((m_orient_flags & cOrientationFlagYFlipped) != 0); } bool mip_level::can_unflip_without_unpacking() const { if (!is_valid()) return false; if (!is_packed()) return true; bool can_unflip = true; if (m_orient_flags & cOrientationFlagXFlipped) { if (!m_pDXTImage->can_flip(0)) can_unflip = false; } if (m_orient_flags & cOrientationFlagYFlipped) { if (!m_pDXTImage->can_flip(1)) can_unflip = false; } return can_unflip; } bool mip_level::unflip(bool allow_unpacking_to_flip, bool uncook_if_necessary_to_unpack) { if (!is_valid()) return false; if (!is_flipped()) return false; if (is_packed()) { if (can_unflip_without_unpacking()) { if (m_orient_flags & cOrientationFlagXFlipped) { m_pDXTImage->flip_x(); m_orient_flags = static_cast(m_orient_flags & ~cOrientationFlagXFlipped); } if (m_orient_flags & cOrientationFlagYFlipped) { m_pDXTImage->flip_y(); m_orient_flags = static_cast(m_orient_flags & ~cOrientationFlagYFlipped); } return true; } if (!allow_unpacking_to_flip) return false; } unpack_from_dxt(uncook_if_necessary_to_unpack); if (m_orient_flags & cOrientationFlagXFlipped) { m_pImage->flip_x(); m_orient_flags = static_cast(m_orient_flags & ~cOrientationFlagXFlipped); } if (m_orient_flags & cOrientationFlagYFlipped) { m_pImage->flip_y(); m_orient_flags = static_cast(m_orient_flags & ~cOrientationFlagYFlipped); } return true; } bool mip_level::set_alpha_to_luma() { if (m_pDXTImage) unpack_from_dxt(true); m_pImage->set_alpha_to_luma(); m_comp_flags = m_pImage->get_comp_flags(); if (m_pImage->is_grayscale()) m_format = PIXEL_FMT_A8L8; else m_format = PIXEL_FMT_A8R8G8B8; return true; } bool mip_level::convert(image_utils::conversion_type conv_type) { if (m_pDXTImage) unpack_from_dxt(true); image_utils::convert_image(*m_pImage, conv_type); m_comp_flags = m_pImage->get_comp_flags(); if (m_pImage->is_grayscale()) m_format = m_pImage->has_alpha() ? PIXEL_FMT_A8L8 : PIXEL_FMT_L8; else m_format = m_pImage->has_alpha() ? PIXEL_FMT_A8R8G8B8 : PIXEL_FMT_R8G8B8; return true; } bool mip_level::convert(pixel_format fmt, bool cook, const dxt_image::pack_params& p) { if (pixel_format_helpers::is_dxt(fmt)) return pack_to_dxt(fmt, cook, p); image_u8 tmp_img; image_u8* pImg = get_unpacked_image(tmp_img, cUnpackFlagUncook); image_u8* pImage = crnlib_new(); pImage->set_comp_flags(pixel_format_helpers::get_component_flags(fmt)); if (!pImage->resize(pImg->get_width(), pImg->get_height())) return false; for (uint y = 0; y < pImg->get_height(); y++) { for (uint x = 0; x < pImg->get_width(); x++) { color_quad_u8 c((*pImg)(x, y)); if ((pixel_format_helpers::is_alpha_only(fmt)) && (!pImg->has_alpha())) { c.a = static_cast(c.get_luma()); } else { if (pImage->is_grayscale()) { uint8 g = static_cast(c.get_luma()); c.r = g; c.g = g; c.b = g; } if (!pImage->is_component_valid(3)) c.a = 255; } (*pImage)(x, y) = c; } } assign(pImage, fmt, m_orient_flags); return true; } void mip_level::cook_image(image_u8& img) const { image_utils::conversion_type conv_type = image_utils::get_conversion_type(true, m_format); if (conv_type != image_utils::cConversion_Invalid) image_utils::convert_image(img, conv_type); } void mip_level::uncook_image(image_u8& img) const { image_utils::conversion_type conv_type = image_utils::get_conversion_type(false, m_format); if (conv_type != image_utils::cConversion_Invalid) image_utils::convert_image(img, conv_type); } image_u8* mip_level::get_unpacked_image(image_u8& tmp, uint unpack_flags) const { if (!is_valid()) return NULL; if (m_pDXTImage) { m_pDXTImage->unpack(tmp); tmp.set_comp_flags(m_comp_flags); if (unpack_flags & cUnpackFlagUncook) uncook_image(tmp); } else if ((unpack_flags & cUnpackFlagUnflip) && (m_orient_flags & (cOrientationFlagXFlipped | cOrientationFlagYFlipped))) tmp = *m_pImage; else return m_pImage; if (unpack_flags & cUnpackFlagUnflip) { if (m_orient_flags & cOrientationFlagXFlipped) tmp.flip_x(); if (m_orient_flags & cOrientationFlagYFlipped) tmp.flip_y(); } return &tmp; } bool mip_level::flip_x() { if (!is_valid()) return false; if (m_pDXTImage) return m_pDXTImage->flip_x(); else if (m_pImage) { m_pImage->flip_x(); return true; } return false; } bool mip_level::flip_y() { if (!is_valid()) return false; if (m_pDXTImage) return m_pDXTImage->flip_y(); else if (m_pImage) { m_pImage->flip_y(); return true; } return false; } // ------------------------------------------------------------------------- mipmapped_texture::mipmapped_texture() : m_width(0), m_height(0), m_comp_flags(pixel_format_helpers::cDefaultCompFlags), m_format(PIXEL_FMT_INVALID), m_source_file_type(texture_file_types::cFormatInvalid) { } mipmapped_texture::~mipmapped_texture() { free_all_mips(); } void mipmapped_texture::clear() { free_all_mips(); m_name.clear(); m_width = 0; m_height = 0; m_comp_flags = pixel_format_helpers::cDefaultCompFlags; m_format = PIXEL_FMT_INVALID; m_source_file_type = texture_file_types::cFormatInvalid; m_last_error.clear(); } void mipmapped_texture::free_all_mips() { for (uint i = 0; i < m_faces.size(); i++) for (uint j = 0; j < m_faces[i].size(); j++) crnlib_delete(m_faces[i][j]); m_faces.clear(); } mipmapped_texture::mipmapped_texture(const mipmapped_texture& other) : m_width(0), m_height(0), m_comp_flags(pixel_format_helpers::cDefaultCompFlags), m_format(PIXEL_FMT_INVALID) { *this = other; } mipmapped_texture& mipmapped_texture::operator=(const mipmapped_texture& rhs) { if (this == &rhs) return *this; clear(); m_name = rhs.m_name; m_width = rhs.m_width; m_height = rhs.m_height; m_comp_flags = rhs.m_comp_flags; m_format = rhs.m_format; m_faces.resize(rhs.m_faces.size()); for (uint i = 0; i < m_faces.size(); i++) { m_faces[i].resize(rhs.m_faces[i].size()); for (uint j = 0; j < rhs.m_faces[i].size(); j++) m_faces[i][j] = crnlib_new(*rhs.m_faces[i][j]); } CRNLIB_ASSERT((!is_valid()) || check()); return *this; } bool mipmapped_texture::read_dds(data_stream_serializer& serializer) { if (!read_dds_internal(serializer)) { clear(); return false; } return true; } bool mipmapped_texture::read_dds_internal(data_stream_serializer& serializer) { CRNLIB_ASSERT(serializer.get_little_endian()); clear(); set_last_error("Not a DDS file"); uint8 hdr[4]; if (!serializer.read(hdr, sizeof(hdr))) return false; if (memcmp(hdr, "DDS ", 4) != 0) return false; DDSURFACEDESC2 desc; if (!serializer.read(&desc, sizeof(desc))) return false; if (!c_crnlib_little_endian_platform) utils::endian_switch_dwords(reinterpret_cast(&desc), sizeof(desc) / sizeof(uint32)); if (desc.dwSize != sizeof(desc)) return false; if ((!desc.dwHeight) || (!desc.dwWidth) || (desc.dwHeight > cDDSMaxImageDimensions) || (desc.dwWidth > cDDSMaxImageDimensions)) return false; m_width = desc.dwWidth; m_height = desc.dwHeight; uint num_mip_levels = 1; if ((desc.dwFlags & DDSD_MIPMAPCOUNT) && (desc.ddsCaps.dwCaps & DDSCAPS_MIPMAP) && (desc.dwMipMapCount)) { num_mip_levels = desc.dwMipMapCount; if (num_mip_levels > utils::compute_max_mips(desc.dwWidth, desc.dwHeight)) return false; } uint num_faces = 1; if (desc.ddsCaps.dwCaps & DDSCAPS_COMPLEX) { if (desc.ddsCaps.dwCaps2 & DDSCAPS2_CUBEMAP) { const uint all_faces_mask = DDSCAPS2_CUBEMAP_POSITIVEX | DDSCAPS2_CUBEMAP_NEGATIVEX | DDSCAPS2_CUBEMAP_POSITIVEY | DDSCAPS2_CUBEMAP_NEGATIVEY | DDSCAPS2_CUBEMAP_POSITIVEZ | DDSCAPS2_CUBEMAP_NEGATIVEZ; if ((desc.ddsCaps.dwCaps2 & all_faces_mask) != all_faces_mask) { set_last_error("Incomplete cubemaps unsupported"); return false; } num_faces = 6; } else if (desc.ddsCaps.dwCaps2 & DDSCAPS2_VOLUME) { set_last_error("Volume textures unsupported"); return false; } } if (desc.ddpfPixelFormat.dwFlags & DDPF_PALETTEINDEXED8) { // It's difficult to even make P8 textures with existing tools: // nvdxt just hangs // dxtex.exe just makes all-white textures // So screw it. set_last_error("Palettized textures unsupported"); return false; } dxt_format dxt_fmt = cDXTInvalid; if (desc.ddpfPixelFormat.dwFlags & DDPF_FOURCC) { // http://code.google.com/p/nvidia-texture-tools/issues/detail?id=41 // ATI2 YX: 0 (0x00000000) // ATI2 XY: 1498952257 (0x59583241) (BC5) // ATI Compressonator obeys this stuff, nvidia's tools (like readdxt) don't - oh great switch (desc.ddpfPixelFormat.dwFourCC) { case PIXEL_FMT_DXT1: { m_format = PIXEL_FMT_DXT1; dxt_fmt = cDXT1; break; } case PIXEL_FMT_DXT2: case PIXEL_FMT_DXT3: { m_format = PIXEL_FMT_DXT3; dxt_fmt = cDXT3; break; } case PIXEL_FMT_DXT4: case PIXEL_FMT_DXT5: { switch (desc.ddpfPixelFormat.dwRGBBitCount) { case PIXEL_FMT_DXT5_CCxY: m_format = PIXEL_FMT_DXT5_CCxY; break; case PIXEL_FMT_DXT5_xGxR: m_format = PIXEL_FMT_DXT5_xGxR; break; case PIXEL_FMT_DXT5_xGBR: m_format = PIXEL_FMT_DXT5_xGBR; break; case PIXEL_FMT_DXT5_AGBR: m_format = PIXEL_FMT_DXT5_AGBR; break; default: m_format = PIXEL_FMT_DXT5; break; } dxt_fmt = cDXT5; break; } case PIXEL_FMT_3DC: { if (desc.ddpfPixelFormat.dwRGBBitCount == CRNLIB_PIXEL_FMT_FOURCC('A', '2', 'X', 'Y')) { dxt_fmt = cDXN_XY; m_format = PIXEL_FMT_DXN; } else { dxt_fmt = cDXN_YX; // aka ATI2 m_format = PIXEL_FMT_3DC; } break; } case PIXEL_FMT_DXT5A: { m_format = PIXEL_FMT_DXT5A; dxt_fmt = cDXT5A; break; } case PIXEL_FMT_ETC1: { m_format = PIXEL_FMT_ETC1; dxt_fmt = cETC1; break; } case PIXEL_FMT_ETC2: { m_format = PIXEL_FMT_ETC2; dxt_fmt = cETC2; break; } case PIXEL_FMT_ETC2A: { m_format = PIXEL_FMT_ETC2A; dxt_fmt = cETC2A; break; } case PIXEL_FMT_ETC1S: { m_format = PIXEL_FMT_ETC1S; dxt_fmt = cETC1S; break; } case PIXEL_FMT_ETC2AS: { m_format = PIXEL_FMT_ETC2AS; dxt_fmt = cETC2AS; break; } default: { dynamic_string err_msg(cVarArg, "Unsupported DDS FOURCC format: 0x%08X", desc.ddpfPixelFormat.dwFourCC); set_last_error(err_msg.get_ptr()); return false; } } } else if ((desc.ddpfPixelFormat.dwRGBBitCount < 8) || (desc.ddpfPixelFormat.dwRGBBitCount > 32) || (desc.ddpfPixelFormat.dwRGBBitCount & 7)) { set_last_error("Unsupported bit count"); return false; } else if (desc.ddpfPixelFormat.dwFlags & DDPF_RGB) { if (desc.ddpfPixelFormat.dwFlags & DDPF_LUMINANCE) { if (desc.ddpfPixelFormat.dwFlags & DDPF_ALPHAPIXELS) m_format = PIXEL_FMT_A8L8; else m_format = PIXEL_FMT_L8; } else if (desc.ddpfPixelFormat.dwFlags & DDPF_ALPHAPIXELS) m_format = PIXEL_FMT_A8R8G8B8; else m_format = PIXEL_FMT_R8G8B8; } else if (desc.ddpfPixelFormat.dwFlags & DDPF_ALPHAPIXELS) { if (desc.ddpfPixelFormat.dwFlags & DDPF_LUMINANCE) m_format = PIXEL_FMT_A8L8; else m_format = PIXEL_FMT_A8; } else if (desc.ddpfPixelFormat.dwFlags & DDPF_LUMINANCE) { m_format = PIXEL_FMT_L8; } else if (desc.ddpfPixelFormat.dwFlags & DDPF_ALPHA) { m_format = PIXEL_FMT_A8; } else { set_last_error("Unsupported format"); return false; } m_comp_flags = pixel_format_helpers::get_component_flags(m_format); uint bits_per_pixel = desc.ddpfPixelFormat.dwRGBBitCount; if (desc.ddpfPixelFormat.dwFlags & DDPF_FOURCC) bits_per_pixel = pixel_format_helpers::get_bpp(m_format); set_last_error("Load failed"); uint default_pitch; if (desc.ddpfPixelFormat.dwFlags & DDPF_FOURCC) default_pitch = (((desc.dwWidth + 3) & ~3) * ((desc.dwHeight + 3) & ~3) * bits_per_pixel) >> 3; else default_pitch = (desc.dwWidth * bits_per_pixel) >> 3; uint pitch = 0; if ((desc.dwFlags & DDSD_PITCH) && (!(desc.dwFlags & DDSD_LINEARSIZE))) { pitch = desc.lPitch; } if (!pitch) pitch = default_pitch; #if 0 else if (pitch & 3) { // MS's DDS docs say the pitch must be DWORD aligned - but this isn't always the case. // ATI Compressonator writes images with non-DWORD aligned pitches, and the DDSWithoutD3DX sample from MS doesn't compute the proper DWORD aligned pitch when reading DDS // files, so the docs must be wrong/outdated. console::warning("DDS file's pitch is not divisible by 4 - trying to load anyway."); } #endif // Check for obviously wacky source pitches (probably a corrupted/invalid file). else if (pitch > default_pitch * 8) { set_last_error("Invalid pitch"); return false; } crnlib::vector load_buf; uint mask_size[4]; mask_size[0] = math::bitmask_size(desc.ddpfPixelFormat.dwRBitMask); mask_size[1] = math::bitmask_size(desc.ddpfPixelFormat.dwGBitMask); mask_size[2] = math::bitmask_size(desc.ddpfPixelFormat.dwBBitMask); mask_size[3] = math::bitmask_size(desc.ddpfPixelFormat.dwRGBAlphaBitMask); uint mask_ofs[4]; mask_ofs[0] = math::bitmask_ofs(desc.ddpfPixelFormat.dwRBitMask); mask_ofs[1] = math::bitmask_ofs(desc.ddpfPixelFormat.dwGBitMask); mask_ofs[2] = math::bitmask_ofs(desc.ddpfPixelFormat.dwBBitMask); mask_ofs[3] = math::bitmask_ofs(desc.ddpfPixelFormat.dwRGBAlphaBitMask); if ((desc.ddpfPixelFormat.dwFlags & DDPF_LUMINANCE) && (!mask_size[0])) { mask_size[0] = desc.ddpfPixelFormat.dwRGBBitCount >> 3; if (desc.ddpfPixelFormat.dwFlags & DDPF_ALPHAPIXELS) mask_size[0] /= 2; } m_faces.resize(num_faces); bool dxt1_alpha = false; for (uint face_index = 0; face_index < num_faces; face_index++) { m_faces[face_index].resize(num_mip_levels); for (uint level_index = 0; level_index < num_mip_levels; level_index++) { const uint width = math::maximum(desc.dwWidth >> level_index, 1U); const uint height = math::maximum(desc.dwHeight >> level_index, 1U); mip_level* pMip = crnlib_new(); m_faces[face_index][level_index] = pMip; if (desc.ddpfPixelFormat.dwFlags & DDPF_FOURCC) { const uint bytes_per_block = pixel_format_helpers::get_dxt_bytes_per_block(m_format); const uint num_blocks_x = (width + 3) >> 2; const uint num_blocks_y = (height + 3) >> 2; const uint actual_level_pitch = num_blocks_x * num_blocks_y * bytes_per_block; const uint level_pitch = level_index ? actual_level_pitch : pitch; dxt_image* pDXTImage = crnlib_new(); if (!pDXTImage->init(dxt_fmt, width, height, false)) { crnlib_delete(pDXTImage); CRNLIB_ASSERT(0); return false; } CRNLIB_ASSERT(pDXTImage->get_element_vec().size() * sizeof(dxt_image::element) == actual_level_pitch); if (!serializer.read(&pDXTImage->get_element_vec()[0], actual_level_pitch)) { crnlib_delete(pDXTImage); return false; } // DDS image in memory are always assumed to be little endian - the same as DDS itself. //if (c_crnlib_big_endian_platform) // utils::endian_switch_words(reinterpret_cast(&pDXTImage->get_element_vec()[0]), actual_level_pitch / sizeof(uint16)); if (level_pitch > actual_level_pitch) { if (!serializer.skip(level_pitch - actual_level_pitch)) { crnlib_delete(pDXTImage); return false; } } if ((m_format == PIXEL_FMT_DXT1) && (!dxt1_alpha)) dxt1_alpha = pDXTImage->has_alpha(); pMip->assign(pDXTImage, m_format); } else { image_u8* pImage = crnlib_new(width, height); pImage->set_comp_flags(m_comp_flags); const uint bytes_per_pixel = desc.ddpfPixelFormat.dwRGBBitCount >> 3; const uint actual_line_pitch = width * bytes_per_pixel; const uint line_pitch = level_index ? actual_line_pitch : pitch; if (load_buf.size() < line_pitch) load_buf.resize(line_pitch); color_quad_u8 q(0, 0, 0, 255); for (uint y = 0; y < height; y++) { if (!serializer.read(&load_buf[0], line_pitch)) { crnlib_delete(pImage); return false; } color_quad_u8* pDst = pImage->get_scanline(y); for (uint x = 0; x < width; x++) { const uint8* pPixel = &load_buf[x * bytes_per_pixel]; uint c = 0; // Assumes DDS is always little endian. for (uint l = 0; l < bytes_per_pixel; l++) c |= (pPixel[l] << (l * 8U)); for (uint i = 0; i < 4; i++) { if (!mask_size[i]) continue; uint mask = (1U << mask_size[i]) - 1U; uint bits = (c >> mask_ofs[i]) & mask; uint v = (bits * 255 + (mask >> 1)) / mask; q.set_component(i, v); } if (desc.ddpfPixelFormat.dwFlags & DDPF_LUMINANCE) { q.g = q.r; q.b = q.r; } *pDst++ = q; } } pMip->assign(pImage, m_format); CRNLIB_ASSERT(pMip->get_comp_flags() == m_comp_flags); } } } clear_last_error(); if (dxt1_alpha) change_dxt1_to_dxt1a(); return true; } void mipmapped_texture::change_dxt1_to_dxt1a() { if (m_format != PIXEL_FMT_DXT1) return; m_format = PIXEL_FMT_DXT1A; m_comp_flags = pixel_format_helpers::get_component_flags(m_format); for (uint f = 0; f < m_faces.size(); f++) { for (uint l = 0; l < m_faces[f].size(); l++) { if (m_faces[f][l]->get_dxt_image()) { m_faces[f][l]->set_format(m_format); m_faces[f][l]->set_comp_flags(m_comp_flags); m_faces[f][l]->get_dxt_image()->change_dxt1_to_dxt1a(); } } } } bool mipmapped_texture::check() const { uint levels = 0; orientation_flags_t orient_flags = cDefaultOrientationFlags; for (uint f = 0; f < m_faces.size(); f++) { if (!f) { levels = m_faces[f].size(); if ((levels) && (m_faces[f][0])) orient_flags = m_faces[f][0]->get_orientation_flags(); } else if (m_faces[f].size() != levels) return false; for (uint l = 0; l < m_faces[f].size(); l++) { mip_level* p = m_faces[f][l]; if (!p) return false; if (!p->is_valid()) return false; if (p->get_orientation_flags() != orient_flags) return false; if (!l) { if (m_width != p->get_width()) return false; if (m_height != p->get_height()) return false; } if (p->get_comp_flags() != m_comp_flags) return false; if (p->get_format() != m_format) return false; if (p->get_image()) { if (pixel_format_helpers::is_dxt(p->get_format())) return false; if (p->get_image()->get_width() != p->get_width()) return false; if (p->get_image()->get_height() != p->get_height()) return false; if (p->get_image()->get_comp_flags() != m_comp_flags) return false; } else if (!pixel_format_helpers::is_dxt(p->get_format())) return false; } } return true; } bool mipmapped_texture::write_dds(data_stream_serializer& serializer) const { if (!m_width) { set_last_error("Nothing to write"); return false; } set_last_error("write_dds() failed"); if (!serializer.write("DDS ", sizeof(uint32))) return false; DDSURFACEDESC2 desc; utils::zero_object(desc); desc.dwSize = sizeof(desc); desc.dwFlags = DDSD_WIDTH | DDSD_HEIGHT | DDSD_PIXELFORMAT | DDSD_CAPS; desc.dwWidth = m_width; desc.dwHeight = m_height; desc.ddsCaps.dwCaps = DDSCAPS_TEXTURE; desc.ddpfPixelFormat.dwSize = sizeof(desc.ddpfPixelFormat); if (get_num_levels() > 1) { desc.dwMipMapCount = get_num_levels(); desc.dwFlags |= DDSD_MIPMAPCOUNT; desc.ddsCaps.dwCaps |= (DDSCAPS_MIPMAP | DDSCAPS_COMPLEX); } if (get_num_faces() > 1) { desc.ddsCaps.dwCaps |= DDSCAPS_COMPLEX; desc.ddsCaps.dwCaps2 |= DDSCAPS2_CUBEMAP; desc.ddsCaps.dwCaps2 |= DDSCAPS2_CUBEMAP_POSITIVEX | DDSCAPS2_CUBEMAP_NEGATIVEX | DDSCAPS2_CUBEMAP_POSITIVEY | DDSCAPS2_CUBEMAP_NEGATIVEY | DDSCAPS2_CUBEMAP_POSITIVEZ | DDSCAPS2_CUBEMAP_NEGATIVEZ; } bool dxt_format = false; if (pixel_format_helpers::is_dxt(m_format)) { dxt_format = true; desc.ddpfPixelFormat.dwFlags |= DDPF_FOURCC; switch (m_format) { case PIXEL_FMT_ETC1: { desc.ddpfPixelFormat.dwFourCC = (uint32)PIXEL_FMT_ETC1; desc.ddpfPixelFormat.dwRGBBitCount = 0; break; } case PIXEL_FMT_ETC2: { desc.ddpfPixelFormat.dwFourCC = (uint32)PIXEL_FMT_ETC2; desc.ddpfPixelFormat.dwRGBBitCount = 0; break; } case PIXEL_FMT_ETC2A: { desc.ddpfPixelFormat.dwFourCC = (uint32)PIXEL_FMT_ETC2A; desc.ddpfPixelFormat.dwRGBBitCount = 0; break; } case PIXEL_FMT_ETC1S: { desc.ddpfPixelFormat.dwFourCC = (uint32)PIXEL_FMT_ETC1S; desc.ddpfPixelFormat.dwRGBBitCount = 0; break; } case PIXEL_FMT_ETC2AS: { desc.ddpfPixelFormat.dwFourCC = (uint32)PIXEL_FMT_ETC2AS; desc.ddpfPixelFormat.dwRGBBitCount = 0; break; } case PIXEL_FMT_DXN: { desc.ddpfPixelFormat.dwFourCC = (uint32)PIXEL_FMT_3DC; desc.ddpfPixelFormat.dwRGBBitCount = PIXEL_FMT_DXN; break; } case PIXEL_FMT_DXT1A: { desc.ddpfPixelFormat.dwFourCC = (uint32)PIXEL_FMT_DXT1; desc.ddpfPixelFormat.dwRGBBitCount = 0; break; } case PIXEL_FMT_DXT5_CCxY: { desc.ddpfPixelFormat.dwFourCC = (uint32)PIXEL_FMT_DXT5; desc.ddpfPixelFormat.dwRGBBitCount = (uint32)PIXEL_FMT_DXT5_CCxY; break; } case PIXEL_FMT_DXT5_xGxR: { desc.ddpfPixelFormat.dwFourCC = (uint32)PIXEL_FMT_DXT5; desc.ddpfPixelFormat.dwRGBBitCount = (uint32)PIXEL_FMT_DXT5_xGxR; break; } case PIXEL_FMT_DXT5_xGBR: { desc.ddpfPixelFormat.dwFourCC = (uint32)PIXEL_FMT_DXT5; desc.ddpfPixelFormat.dwRGBBitCount = (uint32)PIXEL_FMT_DXT5_xGBR; break; } case PIXEL_FMT_DXT5_AGBR: { desc.ddpfPixelFormat.dwFourCC = (uint32)PIXEL_FMT_DXT5; desc.ddpfPixelFormat.dwRGBBitCount = (uint32)PIXEL_FMT_DXT5_AGBR; break; } default: { desc.ddpfPixelFormat.dwFourCC = (uint32)m_format; desc.ddpfPixelFormat.dwRGBBitCount = 0; break; } } uint bits_per_pixel = pixel_format_helpers::get_bpp(m_format); desc.lPitch = (((desc.dwWidth + 3) & ~3) * ((desc.dwHeight + 3) & ~3) * bits_per_pixel) >> 3; desc.dwFlags |= DDSD_LINEARSIZE; } else { switch (m_format) { case PIXEL_FMT_A8R8G8B8: { desc.ddpfPixelFormat.dwFlags |= (DDPF_RGB | DDPF_ALPHAPIXELS); desc.ddpfPixelFormat.dwRGBBitCount = 32; desc.ddpfPixelFormat.dwRBitMask = 0xFF0000; desc.ddpfPixelFormat.dwGBitMask = 0x00FF00; desc.ddpfPixelFormat.dwBBitMask = 0x0000FF; desc.ddpfPixelFormat.dwRGBAlphaBitMask = 0xFF000000; break; } case PIXEL_FMT_R8G8B8: { desc.ddpfPixelFormat.dwFlags |= DDPF_RGB; desc.ddpfPixelFormat.dwRGBBitCount = 24; desc.ddpfPixelFormat.dwRBitMask = 0xFF0000; desc.ddpfPixelFormat.dwGBitMask = 0x00FF00; desc.ddpfPixelFormat.dwBBitMask = 0x0000FF; break; } case PIXEL_FMT_A8: { desc.ddpfPixelFormat.dwFlags |= DDPF_ALPHA; desc.ddpfPixelFormat.dwRGBBitCount = 8; desc.ddpfPixelFormat.dwRGBAlphaBitMask = 0xFF; break; } case PIXEL_FMT_L8: { desc.ddpfPixelFormat.dwFlags |= DDPF_LUMINANCE; desc.ddpfPixelFormat.dwRGBBitCount = 8; desc.ddpfPixelFormat.dwRBitMask = 0xFF; break; } case PIXEL_FMT_A8L8: { desc.ddpfPixelFormat.dwFlags |= DDPF_ALPHAPIXELS | DDPF_LUMINANCE; desc.ddpfPixelFormat.dwRGBBitCount = 16; desc.ddpfPixelFormat.dwRBitMask = 0xFF; desc.ddpfPixelFormat.dwRGBAlphaBitMask = 0xFF00; break; } default: { CRNLIB_ASSERT(false); return false; } } uint bits_per_pixel = desc.ddpfPixelFormat.dwRGBBitCount; desc.lPitch = (desc.dwWidth * bits_per_pixel) >> 3; desc.dwFlags |= DDSD_LINEARSIZE; } if (!c_crnlib_little_endian_platform) utils::endian_switch_dwords(reinterpret_cast(&desc), sizeof(desc) / sizeof(uint32)); if (!serializer.write(&desc, sizeof(desc))) return false; if (!c_crnlib_little_endian_platform) utils::endian_switch_dwords(reinterpret_cast(&desc), sizeof(desc) / sizeof(uint32)); crnlib::vector write_buf; const bool can_unflip_packed_texture = can_unflip_without_unpacking(); if ((is_packed()) && (is_flipped()) && (!can_unflip_without_unpacking())) { console::warning("mipmapped_texture::write_dds: One or more faces/miplevels cannot be unflipped without unpacking. Writing flipped .DDS texture."); } for (uint face = 0; face < get_num_faces(); face++) { for (uint level = 0; level < get_num_levels(); level++) { const mip_level* pLevel = get_level(face, level); if (dxt_format) { #if !defined(NDEBUG) const uint width = pLevel->get_width(); const uint height = pLevel->get_height(); #endif CRNLIB_ASSERT(width == math::maximum(1, m_width >> level)); CRNLIB_ASSERT(height == math::maximum(1, m_height >> level)); const dxt_image* p = pLevel->get_dxt_image(); dxt_image tmp; if ((can_unflip_packed_texture) && (pLevel->get_orientation_flags() & (cOrientationFlagXFlipped | cOrientationFlagYFlipped))) { tmp = *p; if (pLevel->get_orientation_flags() & cOrientationFlagXFlipped) { if (!tmp.flip_x()) console::warning("mipmapped_texture::write_dds: Unable to unflip compressed texture on X axis"); } if (pLevel->get_orientation_flags() & cOrientationFlagYFlipped) { if (!tmp.flip_y()) console::warning("mipmapped_texture::write_dds: Unable to unflip compressed texture on Y axis"); } p = &tmp; } #if !defined(NDEBUG) const uint num_blocks_x = (width + 3) >> 2; const uint num_blocks_y = (height + 3) >> 2; #endif CRNLIB_ASSERT(num_blocks_x * num_blocks_y * p->get_elements_per_block() == p->get_total_elements()); const uint size_in_bytes = p->get_total_elements() * sizeof(dxt_image::element); if (size_in_bytes > write_buf.size()) write_buf.resize(size_in_bytes); memcpy(&write_buf[0], p->get_element_ptr(), size_in_bytes); // DXT data is always little endian in memory, just like the DDS format. // (Except for ETC1, which contains big endian 64-bit QWORD's). //if (!c_crnlib_little_endian_platform) // utils::endian_switch_words(reinterpret_cast(&write_buf[0]), size_in_bytes / sizeof(WORD)); if (!serializer.write(&write_buf[0], size_in_bytes)) return false; } else { const uint width = pLevel->get_width(); const uint height = pLevel->get_height(); const image_u8* p = pLevel->get_image(); image_u8 tmp; if (pLevel->get_orientation_flags() & (cOrientationFlagXFlipped | cOrientationFlagYFlipped)) { p = pLevel->get_unpacked_image(tmp, cUnpackFlagUnflip); } const uint bits_per_pixel = desc.ddpfPixelFormat.dwRGBBitCount; const uint bytes_per_pixel = bits_per_pixel >> 3; const uint pitch = width * bytes_per_pixel; if (pitch > write_buf.size()) write_buf.resize(pitch); for (uint y = 0; y < height; y++) { const color_quad_u8* pSrc = p->get_scanline(y); const color_quad_u8* pEnd = pSrc + width; uint8* pDst = &write_buf[0]; do { const color_quad_u8& c = *pSrc; uint x = 0; switch (m_format) { case PIXEL_FMT_A8R8G8B8: { x = (c.a << 24) | (c.r << 16) | (c.g << 8) | c.b; break; } case PIXEL_FMT_R8G8B8: { x = (c.r << 16) | (c.g << 8) | c.b; break; } case PIXEL_FMT_A8: { x = c.a; break; } case PIXEL_FMT_A8L8: { x = (c.a << 8) | c.get_luma(); break; } case PIXEL_FMT_L8: { x = c.get_luma(); break; } default: break; } pDst[0] = static_cast(x); if (bytes_per_pixel > 1) { pDst[1] = static_cast(x >> 8); if (bytes_per_pixel > 2) { pDst[2] = static_cast(x >> 16); if (bytes_per_pixel > 3) pDst[3] = static_cast(x >> 24); } } pSrc++; pDst += bytes_per_pixel; } while (pSrc != pEnd); if (!serializer.write(&write_buf[0], pitch)) return false; } } } } clear_last_error(); return true; } bool mipmapped_texture::read_ktx(data_stream_serializer& serializer) { clear(); set_last_error("Unable to read KTX file"); ktx_texture kt; if (!kt.read_from_stream(serializer)) return false; if ((kt.get_depth() > 1) || (kt.get_array_size() > 1)) { set_last_error("read_ktx: Depth and array textures are not supported"); return false; } // Must be 1D, 2D, or a cubemap, with or without mipmaps. m_width = kt.get_width(); m_height = kt.get_height(); uint num_mip_levels = kt.get_num_mips(); uint num_faces = kt.get_num_faces(); uint32 crnlib_fourcc = 0; dynamic_string crnlib_fourcc_str; if (kt.get_key_value_as_string("CRNLIB_FOURCC", crnlib_fourcc_str)) { if (crnlib_fourcc_str.get_len() == 4) { for (int i = 3; i >= 0; i--) crnlib_fourcc = (crnlib_fourcc << 8) | crnlib_fourcc_str[i]; } } const bool is_compressed_texture = kt.is_compressed(); dxt_format dxt_fmt = cDXTInvalid; pixel_packer unpacker; if (is_compressed_texture) { switch (kt.get_ogl_internal_fmt()) { case KTX_ETC1_RGB8_OES: dxt_fmt = cETC1; break; case KTX_COMPRESSED_RGB8_ETC2: dxt_fmt = cETC2; break; case KTX_COMPRESSED_RGBA8_ETC2_EAC: dxt_fmt = cETC2A; break; case KTX_RGB_S3TC: case KTX_RGB4_S3TC: case KTX_COMPRESSED_RGB_S3TC_DXT1_EXT: case KTX_COMPRESSED_SRGB_S3TC_DXT1_EXT: dxt_fmt = cDXT1; break; case KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT: case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: dxt_fmt = cDXT1A; break; case KTX_RGBA_S3TC: case KTX_RGBA4_S3TC: case KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT: case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: dxt_fmt = cDXT3; break; case KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT: case KTX_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: case KTX_RGBA_DXT5_S3TC: case KTX_RGBA4_DXT5_S3TC: dxt_fmt = cDXT5; break; case KTX_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT: dxt_fmt = cDXN_YX; if (crnlib_fourcc == PIXEL_FMT_DXN) { dxt_fmt = cDXN_XY; } break; case KTX_COMPRESSED_LUMINANCE_LATC1_EXT: dxt_fmt = cDXT5A; break; default: set_last_error("Unsupported KTX internal format"); return false; } m_format = pixel_format_helpers::from_dxt_format(dxt_fmt); if (m_format == PIXEL_FMT_INVALID) { set_last_error("Unsupported KTX internal compressed format"); return false; } if (crnlib_fourcc != 0) { switch (crnlib_fourcc) { case PIXEL_FMT_DXT5_CCxY: case PIXEL_FMT_DXT5_xGxR: case PIXEL_FMT_DXT5_xGBR: case PIXEL_FMT_DXT5_AGBR: { if (dxt_fmt == cDXT5) { m_format = static_cast(crnlib_fourcc); } break; } } } } else { m_format = PIXEL_FMT_A8R8G8B8; const uint type_size = get_ogl_type_size(kt.get_ogl_type()); const uint type_bits = type_size * 8; // Normal component order: 1,2,3,4 (*last* component packed into LSB of output type) // Reversed component order: 4,3,2,1 (*first* component packed into LSB of output type) if (is_packed_pixel_ogl_type(kt.get_ogl_type())) { switch (kt.get_ogl_type()) { // 24bpp packed formats case KTX_UNSIGNED_BYTE_3_3_2: unpacker.init("B2G3R3"); m_format = PIXEL_FMT_R8G8B8; break; case KTX_UNSIGNED_BYTE_2_3_3_REV: unpacker.init("R3G3B2"); m_format = PIXEL_FMT_R8G8B8; break; case KTX_UNSIGNED_SHORT_5_6_5: unpacker.init("B5G6R5"); m_format = PIXEL_FMT_R8G8B8; break; case KTX_UNSIGNED_SHORT_5_6_5_REV: unpacker.init("R5G6B5"); m_format = PIXEL_FMT_R8G8B8; break; // 32bpp packed formats case KTX_UNSIGNED_SHORT_4_4_4_4: unpacker.init("A4B4G4R4"); break; case KTX_UNSIGNED_SHORT_4_4_4_4_REV: unpacker.init("R4G4B4A4"); break; case KTX_UNSIGNED_SHORT_5_5_5_1: unpacker.init("A1B5G5R5"); break; case KTX_UNSIGNED_SHORT_1_5_5_5_REV: unpacker.init("R5G5B5A1"); break; case KTX_UNSIGNED_INT_8_8_8_8: unpacker.init("A8B8G8R8"); break; case KTX_UNSIGNED_INT_8_8_8_8_REV: unpacker.init("R8G8B8A8"); break; case KTX_UNSIGNED_INT_10_10_10_2: unpacker.init("A2B10G10R10"); break; case KTX_UNSIGNED_INT_2_10_10_10_REV: unpacker.init("R10G10B10A2"); break; case KTX_UNSIGNED_INT_5_9_9_9_REV: unpacker.init("R9G9B9A5"); break; default: set_last_error("Unsupported KTX packed pixel type"); return false; } unpacker.set_pixel_stride(get_ogl_type_size(kt.get_ogl_type())); } else { switch (kt.get_ogl_fmt()) { case 1: case KTX_RED: case KTX_RED_INTEGER: case KTX_R8: case KTX_R8UI: { unpacker.init("R", -1, type_bits); m_format = PIXEL_FMT_R8G8B8; break; } case KTX_GREEN: case KTX_GREEN_INTEGER: { unpacker.init("G", -1, type_bits); m_format = PIXEL_FMT_R8G8B8; break; } case KTX_BLUE: case KTX_BLUE_INTEGER: { unpacker.init("B", -1, type_bits); m_format = PIXEL_FMT_R8G8B8; break; } case KTX_ALPHA: { unpacker.init("A", -1, type_bits); m_format = PIXEL_FMT_A8; break; } case KTX_LUMINANCE: { unpacker.init("Y", -1, type_bits); m_format = PIXEL_FMT_L8; break; } case 2: case KTX_RG: case KTX_RG8: case KTX_RG_INTEGER: { unpacker.init("RG", -1, type_bits); m_format = PIXEL_FMT_A8L8; break; } case KTX_LUMINANCE_ALPHA: { unpacker.init("YA", -1, type_bits); m_format = PIXEL_FMT_A8L8; break; } case 3: case KTX_SRGB: case KTX_RGB: case KTX_RGB_INTEGER: case KTX_RGB8: case KTX_SRGB8: { unpacker.init("RGB", -1, type_bits); m_format = PIXEL_FMT_R8G8B8; break; } case KTX_BGR: case KTX_BGR_INTEGER: { unpacker.init("BGR", -1, type_bits); m_format = PIXEL_FMT_R8G8B8; break; } case 4: case KTX_RGBA_INTEGER: case KTX_RGBA: case KTX_SRGB_ALPHA: case KTX_SRGB8_ALPHA8: case KTX_RGBA8: { unpacker.init("RGBA", -1, type_bits); break; } case KTX_BGRA: case KTX_BGRA_INTEGER: { unpacker.init("BGRA", -1, type_bits); break; } default: set_last_error("Unsupported KTX pixel format"); return false; } unpacker.set_pixel_stride(unpacker.get_num_comps() * get_ogl_type_size(kt.get_ogl_type())); } CRNLIB_ASSERT(unpacker.is_valid()); } m_comp_flags = pixel_format_helpers::get_component_flags(m_format); m_faces.resize(num_faces); bool x_flipped = false; bool y_flipped = true; dynamic_string orient; if ((kt.get_key_value_as_string("KTXorientation", orient)) && (orient.get_len() >= 7)) { // 0123456 // "S=r,T=d" if ((orient[0] == 'S') && (orient[1] == '=') && (orient[3] == ',') && (orient[4] == 'T') && (orient[5] == '=')) { if (tolower(orient[2]) == 'l') x_flipped = true; else if (tolower(orient[2]) == 'r') x_flipped = false; if (tolower(orient[6]) == 'u') y_flipped = true; else if (tolower(orient[6]) == 'd') y_flipped = false; } } orientation_flags_t orient_flags = cDefaultOrientationFlags; if (x_flipped) orient_flags = static_cast(orient_flags | cOrientationFlagXFlipped); if (y_flipped) orient_flags = static_cast(orient_flags | cOrientationFlagYFlipped); bool dxt1_alpha = false; for (uint face_index = 0; face_index < num_faces; face_index++) { m_faces[face_index].resize(num_mip_levels); for (uint level_index = 0; level_index < num_mip_levels; level_index++) { const uint width = math::maximum(m_width >> level_index, 1U); const uint height = math::maximum(m_height >> level_index, 1U); mip_level* pMip = crnlib_new(); m_faces[face_index][level_index] = pMip; const crnlib::vector& image_data = kt.get_image_data(level_index, 0, face_index, 0); if (is_compressed_texture) { const uint bytes_per_block = pixel_format_helpers::get_dxt_bytes_per_block(m_format); const uint num_blocks_x = (width + 3) >> 2; const uint num_blocks_y = (height + 3) >> 2; const uint level_pitch = num_blocks_x * num_blocks_y * bytes_per_block; if (image_data.size() != level_pitch) return false; dxt_image* pDXTImage = crnlib_new(); if (!pDXTImage->init(dxt_fmt, width, height, false)) { crnlib_delete(pDXTImage); CRNLIB_ASSERT(0); return false; } CRNLIB_ASSERT(pDXTImage->get_element_vec().size() * sizeof(dxt_image::element) == level_pitch); memcpy(&pDXTImage->get_element_vec()[0], image_data.get_ptr(), image_data.size()); if ((m_format == PIXEL_FMT_DXT1) && (!dxt1_alpha)) dxt1_alpha = pDXTImage->has_alpha(); pMip->assign(pDXTImage, m_format, orient_flags); } else { if (image_data.size() != (width * height * unpacker.get_pixel_stride())) return false; image_u8* pImage = crnlib_new(width, height); pImage->set_comp_flags(m_comp_flags); const uint8* pSrc = image_data.get_ptr(); color_quad_u8 q(0, 0, 0, 255); for (uint y = 0; y < height; y++) { for (uint x = 0; x < width; x++) { color_quad_u8 c; pSrc = static_cast(unpacker.unpack(pSrc, c)); pImage->set_pixel_unclipped(x, y, c); } } pMip->assign(pImage, m_format, orient_flags); CRNLIB_ASSERT(pMip->get_comp_flags() == m_comp_flags); } } } clear_last_error(); if (dxt1_alpha) change_dxt1_to_dxt1a(); return true; } bool mipmapped_texture::write_ktx(data_stream_serializer& serializer) const { if (!m_width) { set_last_error("Nothing to write"); return false; } set_last_error("write_ktx() failed"); uint32 ogl_internal_fmt = 0, ogl_fmt = 0, ogl_type = 0; pixel_packer packer; if (is_packed()) { switch (get_format()) { case PIXEL_FMT_DXT1: { ogl_internal_fmt = KTX_COMPRESSED_RGB_S3TC_DXT1_EXT; break; } case PIXEL_FMT_DXT1A: { ogl_internal_fmt = KTX_COMPRESSED_RGBA_S3TC_DXT1_EXT; break; } case PIXEL_FMT_DXT2: case PIXEL_FMT_DXT3: { ogl_internal_fmt = KTX_COMPRESSED_RGBA_S3TC_DXT3_EXT; break; } case PIXEL_FMT_DXT4: case PIXEL_FMT_DXT5: case PIXEL_FMT_DXT5_CCxY: case PIXEL_FMT_DXT5_xGxR: case PIXEL_FMT_DXT5_xGBR: case PIXEL_FMT_DXT5_AGBR: { ogl_internal_fmt = KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT; break; } case PIXEL_FMT_3DC: case PIXEL_FMT_DXN: { ogl_internal_fmt = KTX_COMPRESSED_SIGNED_RED_GREEN_RGTC2_EXT; break; } case PIXEL_FMT_DXT5A: { ogl_internal_fmt = KTX_COMPRESSED_LUMINANCE_LATC1_EXT; break; } case PIXEL_FMT_ETC1: case PIXEL_FMT_ETC1S: { ogl_internal_fmt = KTX_ETC1_RGB8_OES; break; } case PIXEL_FMT_ETC2: { ogl_internal_fmt = KTX_COMPRESSED_RGB8_ETC2; break; } case PIXEL_FMT_ETC2A: case PIXEL_FMT_ETC2AS: { ogl_internal_fmt = KTX_COMPRESSED_RGBA8_ETC2_EAC; break; } default: { CRNLIB_ASSERT(0); return false; } } } else { ogl_type = KTX_UNSIGNED_BYTE; switch (get_format()) { case PIXEL_FMT_R8G8B8: ogl_internal_fmt = KTX_RGB8; ogl_fmt = KTX_RGB; packer.init("R8G8B8"); break; case PIXEL_FMT_L8: ogl_internal_fmt = KTX_LUMINANCE8; ogl_fmt = KTX_LUMINANCE; packer.init("G8"); break; case PIXEL_FMT_A8: ogl_internal_fmt = KTX_ALPHA8; ogl_fmt = KTX_ALPHA; packer.init("A8"); break; case PIXEL_FMT_A8L8: ogl_internal_fmt = KTX_LUMINANCE8_ALPHA8; ogl_fmt = KTX_LUMINANCE_ALPHA; packer.init("Y8A8"); break; case PIXEL_FMT_A8R8G8B8: ogl_internal_fmt = KTX_RGBA8; ogl_fmt = KTX_RGBA; packer.init("R8G8B8A8"); break; default: { CRNLIB_ASSERT(0); return false; } } } ktx_texture kt; bool success; if (determine_texture_type(false) == cTextureTypeCubemap) success = kt.init_cubemap(get_width(), get_num_levels(), ogl_internal_fmt, ogl_fmt, ogl_type); else success = kt.init_2D(get_width(), get_height(), get_num_levels(), ogl_internal_fmt, ogl_fmt, ogl_type); if (!success) return false; dynamic_string fourcc_str(cVarArg, "%c%c%c%c", m_format & 0xFF, (m_format >> 8) & 0xFF, (m_format >> 16) & 0xFF, (m_format >> 24) & 0xFF); kt.add_key_value("CRNLIB_FOURCC", fourcc_str.get_ptr()); const mip_level* pLevel0 = get_level(0, 0); dynamic_string ktx_orient_str(cVarArg, "S=%c,T=%c", (pLevel0->get_orientation_flags() & cOrientationFlagXFlipped) ? 'l' : 'r', (pLevel0->get_orientation_flags() & cOrientationFlagYFlipped) ? 'u' : 'd'); kt.add_key_value("KTXorientation", ktx_orient_str.get_ptr()); for (uint face_index = 0; face_index < get_num_faces(); face_index++) { for (uint level_index = 0; level_index < get_num_levels(); level_index++) { const mip_level* pLevel = get_level(face_index, level_index); const uint mip_width = pLevel->get_width(); const uint mip_height = pLevel->get_height(); if (is_packed()) { const dxt_image* p = pLevel->get_dxt_image(); kt.add_image(face_index, level_index, p->get_element_ptr(), p->get_size_in_bytes()); } else { const image_u8* p = pLevel->get_image(); crnlib::vector tmp(mip_width * mip_height * packer.get_pixel_stride()); uint8* pDst = tmp.get_ptr(); for (uint y = 0; y < mip_height; y++) for (uint x = 0; x < mip_width; x++) pDst = (uint8*)packer.pack(p->get_unclamped(x, y), pDst); kt.add_image(face_index, level_index, tmp.get_ptr(), tmp.size_in_bytes()); } } } if (!kt.write_to_stream(serializer)) return false; clear_last_error(); return true; } void mipmapped_texture::assign(face_vec& faces) { CRNLIB_ASSERT(!faces.empty()); if (faces.empty()) return; free_all_mips(); #ifdef CRNLIB_BUILD_DEBUG for (uint i = 1; i < faces.size(); i++) CRNLIB_ASSERT(faces[i].size() == faces[0].size()); #endif mip_level* p = faces[0][0]; m_width = p->get_width(); m_height = p->get_height(); m_comp_flags = p->get_comp_flags(); m_format = p->get_format(); m_faces.swap(faces); CRNLIB_ASSERT(check()); } void mipmapped_texture::assign(mip_level* pLevel) { face_vec faces(1, mip_ptr_vec(1, pLevel)); assign(faces); } void mipmapped_texture::assign(image_u8* p, pixel_format fmt, orientation_flags_t orient_flags) { mip_level* pLevel = crnlib_new(); pLevel->assign(p, fmt, orient_flags); assign(pLevel); } void mipmapped_texture::assign(dxt_image* p, pixel_format fmt, orientation_flags_t orient_flags) { mip_level* pLevel = crnlib_new(); pLevel->assign(p, fmt, orient_flags); assign(pLevel); } void mipmapped_texture::set(texture_file_types::format source_file_type, const mipmapped_texture& mipmapped_texture) { clear(); *this = mipmapped_texture; m_source_file_type = source_file_type; } image_u8* mipmapped_texture::get_level_image(uint face, uint level, image_u8& img, uint unpack_flags) const { if (!is_valid()) return NULL; const mip_level* pLevel = get_level(face, level); return pLevel->get_unpacked_image(img, unpack_flags); } void mipmapped_texture::swap(mipmapped_texture& img) { utils::swap(m_width, img.m_width); utils::swap(m_height, img.m_height); utils::swap(m_comp_flags, img.m_comp_flags); utils::swap(m_format, img.m_format); m_faces.swap(img.m_faces); m_last_error.swap(img.m_last_error); utils::swap(m_source_file_type, img.m_source_file_type); CRNLIB_ASSERT(check()); } texture_type mipmapped_texture::determine_texture_type(bool no_normal_detection) const { if (!is_valid()) return cTextureTypeUnknown; if (get_num_faces() == 6) return cTextureTypeCubemap; else if (is_vertical_cross()) return cTextureTypeVerticalCrossCubemap; else if (!no_normal_detection && is_normal_map()) return cTextureTypeNormalMap; return cTextureTypeRegularMap; } void mipmapped_texture::discard_mips() { for (uint f = 0; f < m_faces.size(); f++) { if (m_faces[f].size() > 1) { for (uint l = 1; l < m_faces[f].size(); l++) crnlib_delete(m_faces[f][l]); m_faces[f].resize(1); } } CRNLIB_ASSERT(check()); } void mipmapped_texture::init(uint width, uint height, uint levels, uint faces, pixel_format fmt, const char* pName, orientation_flags_t orient_flags) { clear(); CRNLIB_ASSERT((width > 0) && (height > 0) && (levels > 0)); CRNLIB_ASSERT((faces == 1) || (faces == 6)); m_width = width; m_height = height; m_comp_flags = pixel_format_helpers::get_component_flags(fmt); m_format = fmt; if (pName) m_name.set(pName); m_faces.resize(faces); for (uint f = 0; f < faces; f++) { m_faces[f].resize(levels); for (uint l = 0; l < levels; l++) { m_faces[f][l] = crnlib_new(); const uint mip_width = math::maximum(1U, width >> l); const uint mip_height = math::maximum(1U, height >> l); if (pixel_format_helpers::is_dxt(fmt)) { dxt_image* p = crnlib_new(); p->init(pixel_format_helpers::get_dxt_format(fmt), mip_width, mip_height, true); m_faces[f][l]->assign(p, m_format, orient_flags); } else { image_u8* p = crnlib_new(mip_width, mip_height); p->set_comp_flags(m_comp_flags); m_faces[f][l]->assign(p, m_format, orient_flags); } } } CRNLIB_ASSERT(check()); } void mipmapped_texture::discard_mipmaps() { if (!is_valid()) return; discard_mips(); } bool mipmapped_texture::convert(pixel_format fmt, bool cook, const dxt_image::pack_params& p) { if (!is_valid()) return false; if (fmt == get_format()) return true; uint total_pixels = 0; for (uint f = 0; f < m_faces.size(); f++) for (uint l = 0; l < m_faces[f].size(); l++) total_pixels += m_faces[f][l]->get_total_pixels(); uint progress_start = p.m_progress_start; for (uint f = 0; f < m_faces.size(); f++) { for (uint l = 0; l < m_faces[f].size(); l++) { const uint num_pixels = m_faces[f][l]->get_total_pixels(); uint progress_range = (num_pixels * p.m_progress_range) / total_pixels; dxt_image::pack_params tmp_params(p); tmp_params.m_progress_start = math::clamp(progress_start, 0, p.m_progress_range); tmp_params.m_progress_range = math::clamp(progress_range, 0, p.m_progress_range - tmp_params.m_progress_start); progress_start += tmp_params.m_progress_range; if (!m_faces[f][l]->convert(fmt, cook, tmp_params)) { clear(); return false; } } } m_format = get_level(0, 0)->get_format(); m_comp_flags = get_level(0, 0)->get_comp_flags(); CRNLIB_ASSERT(check()); if (p.m_pProgress_callback) { if (!p.m_pProgress_callback(p.m_progress_start + p.m_progress_range, p.m_pProgress_callback_user_data_ptr)) return false; } return true; } bool mipmapped_texture::convert(pixel_format fmt, const dxt_image::pack_params& p) { return convert(fmt, true, p); } bool mipmapped_texture::convert(pixel_format fmt, bool cook, const dxt_image::pack_params& p, int qdxt_quality, bool hierarchical) { if ((!pixel_format_helpers::is_dxt(fmt)) || (fmt == PIXEL_FMT_DXT3) || (fmt == PIXEL_FMT_ETC1) || (fmt == PIXEL_FMT_ETC2) || (fmt == PIXEL_FMT_ETC2A) || (fmt == PIXEL_FMT_ETC1S) || (fmt == PIXEL_FMT_ETC2AS)) { // QDXT doesn't support DXT3 or ETCn yet. return convert(fmt, cook, p); } mipmapped_texture src_tex(*this); if (src_tex.is_packed()) src_tex.unpack_from_dxt(true); if (cook) { mipmapped_texture cooked_tex(src_tex); for (uint f = 0; f < m_faces.size(); f++) for (uint l = 0; l < m_faces[f].size(); l++) src_tex.m_faces[f][l]->cook_image(*cooked_tex.m_faces[f][l]->get_image()); src_tex.swap(cooked_tex); } qdxt1_params q1_params; q1_params.init(p, qdxt_quality, hierarchical); qdxt5_params q5_params; q5_params.init(p, qdxt_quality, hierarchical); if (pixel_format_helpers::is_pixel_format_non_srgb(fmt) || (m_comp_flags & pixel_format_helpers::cCompFlagNormalMap) || (m_comp_flags & pixel_format_helpers::cCompFlagLumaChroma)) { // Disable perceptual colorspace metrics when packing to swizzled or non-RGB pixel formats. q1_params.m_perceptual = false; } task_pool tp; if (!tp.init(p.m_num_helper_threads)) return false; mipmapped_texture packed_tex; qdxt_state state(tp); if (!src_tex.qdxt_pack_init(state, packed_tex, q1_params, q5_params, fmt, false)) return false; if (!src_tex.qdxt_pack(state, packed_tex, q1_params, q5_params)) return false; swap(packed_tex); return true; } bool mipmapped_texture::is_packed() const { CRNLIB_ASSERT(is_valid()); if (!is_valid()) return false; return get_level(0, 0)->is_packed(); } bool mipmapped_texture::set_alpha_to_luma() { CRNLIB_ASSERT(is_valid()); if (!is_valid()) return false; if (is_packed()) unpack_from_dxt(true); for (uint f = 0; f < m_faces.size(); f++) for (uint l = 0; l < get_num_levels(); l++) get_level(f, l)->set_alpha_to_luma(); m_format = get_level(0, 0)->get_format(); m_comp_flags = get_level(0, 0)->get_comp_flags(); CRNLIB_ASSERT(check()); return true; } bool mipmapped_texture::convert(image_utils::conversion_type conv_type) { CRNLIB_ASSERT(is_valid()); if (!is_valid()) return false; if (is_packed()) unpack_from_dxt(true); for (uint f = 0; f < m_faces.size(); f++) for (uint l = 0; l < get_num_levels(); l++) get_level(f, l)->convert(conv_type); m_format = get_level(0, 0)->get_format(); m_comp_flags = get_level(0, 0)->get_comp_flags(); CRNLIB_ASSERT(check()); return true; } bool mipmapped_texture::unpack_from_dxt(bool uncook) { CRNLIB_ASSERT(is_valid()); if (!is_valid()) return false; CRNLIB_ASSERT(pixel_format_helpers::is_dxt(m_format)); if (!pixel_format_helpers::is_dxt(m_format)) return false; for (uint f = 0; f < m_faces.size(); f++) for (uint l = 0; l < get_num_levels(); l++) if (!get_level(f, l)->unpack_from_dxt(uncook)) return false; m_format = get_level(0, 0)->get_format(); m_comp_flags = get_level(0, 0)->get_comp_flags(); CRNLIB_ASSERT(check()); return true; } bool mipmapped_texture::has_alpha() const { CRNLIB_ASSERT(is_valid()); if (!is_valid()) return false; if (pixel_format_helpers::has_alpha(m_format)) return true; if ((m_format == PIXEL_FMT_DXT1) && (get_level(0, 0)->get_dxt_image())) { // Try scanning DXT1 mip levels to find blocks with transparent pixels. for (uint f = 0; f < get_num_faces(); f++) if (get_level(f, 0)->get_dxt_image()->has_alpha()) return true; } return false; } bool mipmapped_texture::is_normal_map() const { CRNLIB_ASSERT(is_valid()); if (!is_valid()) return false; if (pixel_format_helpers::is_normal_map(get_format())) return true; const mip_level* pLevel = get_level(0, 0); if (pLevel->get_image()) return image_utils::is_normal_map(*pLevel->get_image(), m_name.get_ptr()); image_u8 tmp; pLevel->get_dxt_image()->unpack(tmp); return image_utils::is_normal_map(tmp, m_name.get_ptr()); } bool mipmapped_texture::is_vertical_cross() const { CRNLIB_ASSERT(is_valid()); if (!is_valid()) return false; if (get_num_faces() > 1) return false; if (!((math::is_power_of_2(m_height)) && (!math::is_power_of_2(m_width)) && (m_height / 4U == m_width / 3U))) return false; return true; } bool mipmapped_texture::resize(uint new_width, uint new_height, const resample_params& params) { CRNLIB_ASSERT(is_valid()); if (!is_valid()) return false; CRNLIB_ASSERT((new_width >= 1) && (new_height >= 1)); face_vec faces(get_num_faces()); for (uint f = 0; f < faces.size(); f++) { faces[f].resize(1); faces[f][0] = crnlib_new(); } for (uint f = 0; f < faces.size(); f++) { image_u8 tmp; image_u8* pImg = get_level(f, 0)->get_unpacked_image(tmp, cUnpackFlagUncook); image_u8* pMip = crnlib_new(); image_utils::resample_params rparams; rparams.m_dst_width = new_width; rparams.m_dst_height = new_height; rparams.m_filter_scale = params.m_filter_scale; rparams.m_first_comp = 0; rparams.m_num_comps = pImg->is_component_valid(3) ? 4 : 3; rparams.m_srgb = params.m_srgb; rparams.m_wrapping = params.m_wrapping; rparams.m_pFilter = params.m_pFilter; rparams.m_multithreaded = params.m_multithreaded; if (!image_utils::resample(*pImg, *pMip, rparams)) { crnlib_delete(pMip); for (uint f = 0; f < faces.size(); f++) for (uint l = 0; l < faces[f].size(); l++) crnlib_delete(faces[f][l]); return false; } if (params.m_renormalize) image_utils::renorm_normal_map(*pMip); pMip->set_comp_flags(pImg->get_comp_flags()); faces[f][0]->assign(pMip, PIXEL_FMT_INVALID, get_level(f, 0)->get_orientation_flags()); } assign(faces); CRNLIB_ASSERT(check()); return true; } bool mipmapped_texture::generate_mipmaps(const generate_mipmap_params& params, bool force) { CRNLIB_ASSERT(is_valid()); if (!is_valid()) return false; uint num_levels = 1; { uint width = get_width(); uint height = get_height(); while ((width > params.m_min_mip_size) || (height > params.m_min_mip_size)) { width >>= 1U; height >>= 1U; num_levels++; } } if ((params.m_max_mips > 0) && (num_levels > params.m_max_mips)) num_levels = params.m_max_mips; if ((force) && (get_num_levels() > 1)) discard_mipmaps(); if (num_levels == get_num_levels()) return true; face_vec faces(get_num_faces()); for (uint f = 0; f < faces.size(); f++) { faces[f].resize(num_levels); for (uint l = 0; l < num_levels; l++) faces[f][l] = crnlib_new(); } for (uint f = 0; f < faces.size(); f++) { image_u8 tmp; image_u8* pImg = get_level(f, 0)->get_unpacked_image(tmp, cUnpackFlagUncook); for (uint l = 0; l < num_levels; l++) { const uint mip_width = math::maximum(1U, get_width() >> l); const uint mip_height = math::maximum(1U, get_height() >> l); image_u8* pMip = crnlib_new(); if (!l) *pMip = *pImg; else { image_utils::resample_params rparams; rparams.m_dst_width = mip_width; rparams.m_dst_height = mip_height; rparams.m_filter_scale = params.m_filter_scale; rparams.m_first_comp = 0; rparams.m_num_comps = pImg->is_component_valid(3) ? 4 : 3; rparams.m_srgb = params.m_srgb; rparams.m_wrapping = params.m_wrapping; rparams.m_pFilter = params.m_pFilter; rparams.m_multithreaded = params.m_multithreaded; if (!image_utils::resample(*pImg, *pMip, rparams)) { crnlib_delete(pMip); for (uint f = 0; f < faces.size(); f++) for (uint l = 0; l < faces[f].size(); l++) crnlib_delete(faces[f][l]); return false; } if (params.m_renormalize) image_utils::renorm_normal_map(*pMip); pMip->set_comp_flags(pImg->get_comp_flags()); } faces[f][l]->assign(pMip, PIXEL_FMT_INVALID, get_level(f, 0)->get_orientation_flags()); } } assign(faces); CRNLIB_ASSERT(check()); return true; } bool mipmapped_texture::crop(uint x, uint y, uint width, uint height) { CRNLIB_ASSERT(is_valid()); if (!is_valid()) return false; if (get_num_faces() > 1) return false; if ((width < 1) || (height < 1)) return false; image_u8 tmp; image_u8* pImg = get_level(0, 0)->get_unpacked_image(tmp, cUnpackFlagUncook | cUnpackFlagUnflip); image_u8* pMip = crnlib_new(width, height); if (!pImg->extract_block(pMip->get_ptr(), x, y, width, height)) return false; face_vec faces(1); faces[0].resize(1); faces[0][0] = crnlib_new(); pMip->set_comp_flags(pImg->get_comp_flags()); faces[0][0]->assign(pMip); assign(faces); CRNLIB_ASSERT(check()); return true; } bool mipmapped_texture::vertical_cross_to_cubemap() { if (!is_vertical_cross()) return false; const uint face_width = get_height() / 4; bool alpha_is_valid = has_alpha(); mipmapped_texture cubemap; pixel_format fmt = alpha_is_valid ? PIXEL_FMT_A8R8G8B8 : PIXEL_FMT_R8G8B8; cubemap.init(face_width, face_width, 1, 6, fmt, m_name.get_ptr(), cDefaultOrientationFlags); // +x -x +y -y +z -z // 0 1 2 // 0 +y // 1 -x +z +x // 2 -y // 3 -z for (uint face_index = 0; face_index < 6; face_index++) { const mip_level* pSrc = get_level(0, 0); image_u8 tmp_img; image_u8* pSrc_image = pSrc->get_unpacked_image(tmp_img, cUnpackFlagUncook | cUnpackFlagUnflip); const mip_level* pDst = get_level(face_index, 0); image_u8* pDst_image = pDst->get_image(); CRNLIB_ASSERT(pDst_image); const bool flipped = (face_index == 5); const uint x_ofs = g_vertical_cross_image_offsets[face_index][0] * face_width; const uint y_ofs = g_vertical_cross_image_offsets[face_index][1] * face_width; for (uint y = 0; y < face_width; y++) { for (uint x = 0; x < face_width; x++) { const color_quad_u8& c = (*pSrc_image)(x_ofs + x, y_ofs + y); if (!flipped) (*pDst_image)(x, y) = c; else (*pDst_image)(face_width - 1 - x, face_width - 1 - y) = c; } } } swap(cubemap); CRNLIB_ASSERT(check()); return true; } bool mipmapped_texture::qdxt_pack_init(qdxt_state& state, mipmapped_texture& dst_tex, const qdxt1_params& dxt1_params, const qdxt5_params& dxt5_params, pixel_format fmt, bool cook) { if (!is_valid()) return false; state.m_qdxt1_params = dxt1_params; state.m_qdxt5_params[0] = dxt5_params; state.m_qdxt5_params[1] = dxt5_params; utils::zero_object(state.m_has_blocks); switch (fmt) { case PIXEL_FMT_DXT1: { state.m_has_blocks[0] = true; break; } case PIXEL_FMT_DXT1A: { state.m_has_blocks[0] = true; state.m_qdxt1_params.m_use_alpha_blocks = true; break; } case PIXEL_FMT_DXT4: case PIXEL_FMT_DXT5: { state.m_has_blocks[0] = true; state.m_has_blocks[1] = true; state.m_qdxt1_params.m_use_alpha_blocks = false; state.m_qdxt5_params[0].m_comp_index = 3; break; } case PIXEL_FMT_DXT5_CCxY: case PIXEL_FMT_DXT5_xGxR: case PIXEL_FMT_DXT5_xGBR: case PIXEL_FMT_DXT5_AGBR: { state.m_has_blocks[0] = true; state.m_has_blocks[1] = true; state.m_qdxt1_params.m_use_alpha_blocks = false; state.m_qdxt1_params.m_perceptual = false; state.m_qdxt5_params[0].m_comp_index = 3; break; } case PIXEL_FMT_3DC: { state.m_has_blocks[1] = true; state.m_has_blocks[2] = true; state.m_qdxt5_params[0].m_comp_index = 1; state.m_qdxt5_params[1].m_comp_index = 0; break; } case PIXEL_FMT_DXN: { state.m_has_blocks[1] = true; state.m_has_blocks[2] = true; state.m_qdxt5_params[0].m_comp_index = 0; state.m_qdxt5_params[1].m_comp_index = 1; break; } case PIXEL_FMT_DXT5A: { state.m_has_blocks[1] = true; state.m_qdxt5_params[0].m_comp_index = 3; break; } case PIXEL_FMT_ETC1: case PIXEL_FMT_ETC2: case PIXEL_FMT_ETC2A: case PIXEL_FMT_ETC1S: case PIXEL_FMT_ETC2AS: { console::warning("mipmapped_texture::qdxt_pack_init: This method does not support ETCn"); return false; } default: { CRNLIB_ASSERT(0); return false; } } const uint num_elements = state.m_has_blocks[0] + state.m_has_blocks[1] + state.m_has_blocks[2]; uint cur_progress_start = dxt1_params.m_progress_start; if (state.m_has_blocks[0]) { state.m_qdxt1_params.m_progress_start = cur_progress_start; state.m_qdxt1_params.m_progress_range = dxt1_params.m_progress_range / num_elements; cur_progress_start += state.m_qdxt1_params.m_progress_range; } if (state.m_has_blocks[1]) { state.m_qdxt5_params[0].m_progress_start = cur_progress_start; state.m_qdxt5_params[0].m_progress_range = dxt1_params.m_progress_range / num_elements; cur_progress_start += state.m_qdxt5_params[0].m_progress_range; } if (state.m_has_blocks[2]) { state.m_qdxt5_params[1].m_progress_start = cur_progress_start; state.m_qdxt5_params[1].m_progress_range = dxt1_params.m_progress_range - cur_progress_start; } state.m_fmt = fmt; dst_tex.init(get_width(), get_height(), get_num_levels(), get_num_faces(), fmt, get_name().get_ptr(), cDefaultOrientationFlags); state.m_pixel_blocks.resize(0); image_utils::conversion_type cook_conv_type = image_utils::cConversion_Invalid; if (cook) { cook_conv_type = image_utils::get_conversion_type(true, fmt); if (pixel_format_helpers::is_alpha_only(fmt) && !pixel_format_helpers::has_alpha(m_format)) cook_conv_type = image_utils::cConversion_Y_To_A; } state.m_qdxt1_params.m_num_mips = 0; state.m_qdxt5_params[0].m_num_mips = 0; state.m_qdxt5_params[1].m_num_mips = 0; for (uint f = 0; f < get_num_faces(); f++) { for (uint l = 0; l < get_num_levels(); l++) { mip_level* pLevel = get_level(f, l); dst_tex.get_level(f, l)->set_orientation_flags(pLevel->get_orientation_flags()); image_u8 tmp_img; image_u8 img(*pLevel->get_unpacked_image(tmp_img, cUnpackFlagUncook)); if (cook_conv_type != image_utils::cConversion_Invalid) image_utils::convert_image(img, cook_conv_type); const uint num_blocks_x = (img.get_width() + 3) / 4; const uint num_blocks_y = (img.get_height() + 3) / 4; const uint total_blocks = num_blocks_x * num_blocks_y; const uint cur_size = state.m_pixel_blocks.size(); state.m_pixel_blocks.resize(cur_size + total_blocks); dxt_pixel_block* pDst_blocks = &state.m_pixel_blocks[cur_size]; { CRNLIB_ASSERT(state.m_qdxt1_params.m_num_mips < qdxt1_params::cMaxMips); qdxt1_params::mip_desc& mip_desc = state.m_qdxt1_params.m_mip_desc[state.m_qdxt1_params.m_num_mips]; mip_desc.m_first_block = cur_size; mip_desc.m_block_width = num_blocks_x; mip_desc.m_block_height = num_blocks_y; state.m_qdxt1_params.m_num_mips++; } for (uint i = 0; i < 2; i++) { CRNLIB_ASSERT(state.m_qdxt5_params[i].m_num_mips < qdxt5_params::cMaxMips); qdxt5_params::mip_desc& mip_desc = state.m_qdxt5_params[i].m_mip_desc[state.m_qdxt5_params[i].m_num_mips]; mip_desc.m_first_block = cur_size; mip_desc.m_block_width = num_blocks_x; mip_desc.m_block_height = num_blocks_y; state.m_qdxt5_params[i].m_num_mips++; } for (uint block_y = 0; block_y < num_blocks_y; block_y++) { const uint img_y = block_y << 2; for (uint block_x = 0; block_x < num_blocks_x; block_x++) { const uint img_x = block_x << 2; color_quad_u8* pDst_pixel = &pDst_blocks->m_pixels[0][0]; pDst_blocks++; for (uint by = 0; by < 4; by++) for (uint bx = 0; bx < 4; bx++) *pDst_pixel++ = img.get_clamped(img_x + bx, img_y + by); } // block_x } // block_y } // l } // f if (state.m_has_blocks[0]) { if (!state.m_qdxt1.init(state.m_pixel_blocks.size(), &state.m_pixel_blocks[0], state.m_qdxt1_params)) return false; } if (state.m_has_blocks[1]) { if (!state.m_qdxt5a.init(state.m_pixel_blocks.size(), &state.m_pixel_blocks[0], state.m_qdxt5_params[0])) return false; } if (state.m_has_blocks[2]) { if (!state.m_qdxt5b.init(state.m_pixel_blocks.size(), &state.m_pixel_blocks[0], state.m_qdxt5_params[1])) return false; } return true; } bool mipmapped_texture::qdxt_pack(qdxt_state& state, mipmapped_texture& dst_tex, const qdxt1_params& dxt1_params, const qdxt5_params& dxt5_params) { if (!is_valid()) return false; CRNLIB_ASSERT(dxt1_params.m_quality_level <= qdxt1_params::cMaxQuality); CRNLIB_ASSERT(dxt5_params.m_quality_level <= qdxt5_params::cMaxQuality); state.m_qdxt1_params.m_quality_level = dxt1_params.m_quality_level; state.m_qdxt1_params.m_pProgress_func = dxt1_params.m_pProgress_func; state.m_qdxt1_params.m_pProgress_data = dxt1_params.m_pProgress_data; state.m_qdxt5_params[0].m_quality_level = dxt5_params.m_quality_level; state.m_qdxt5_params[0].m_pProgress_func = dxt5_params.m_pProgress_func; state.m_qdxt5_params[0].m_pProgress_data = dxt5_params.m_pProgress_data; state.m_qdxt5_params[1].m_quality_level = dxt5_params.m_quality_level; state.m_qdxt5_params[1].m_pProgress_func = dxt5_params.m_pProgress_func; state.m_qdxt5_params[1].m_pProgress_data = dxt5_params.m_pProgress_data; const uint num_elements = state.m_has_blocks[0] + state.m_has_blocks[1] + state.m_has_blocks[2]; uint cur_progress_start = dxt1_params.m_progress_start; if (state.m_has_blocks[0]) { state.m_qdxt1_params.m_progress_start = cur_progress_start; state.m_qdxt1_params.m_progress_range = dxt1_params.m_progress_range / num_elements; cur_progress_start += state.m_qdxt1_params.m_progress_range; } if (state.m_has_blocks[1]) { state.m_qdxt5_params[0].m_progress_start = cur_progress_start; state.m_qdxt5_params[0].m_progress_range = dxt1_params.m_progress_range / num_elements; cur_progress_start += state.m_qdxt5_params[0].m_progress_range; } if (state.m_has_blocks[2]) { state.m_qdxt5_params[1].m_progress_start = cur_progress_start; state.m_qdxt5_params[1].m_progress_range = dxt1_params.m_progress_range - cur_progress_start; } crnlib::vector dxt1_blocks; if (state.m_has_blocks[0]) { dxt1_blocks.resize(state.m_pixel_blocks.size()); float pow_mul = 1.0f; if (state.m_fmt == PIXEL_FMT_DXT5_CCxY) { // use a "deeper" codebook size curves when compressing chroma into DXT1, because it's not as important pow_mul = 1.5f; } else if (state.m_fmt == PIXEL_FMT_DXT5) { // favor color more than alpha pow_mul = .75f; } if (!state.m_qdxt1.pack(&dxt1_blocks[0], 1, state.m_qdxt1_params, pow_mul)) return false; } crnlib::vector dxt5_blocks[2]; for (uint i = 0; i < 2; i++) { if (state.m_has_blocks[i + 1]) { dxt5_blocks[i].resize(state.m_pixel_blocks.size()); if (!(i ? state.m_qdxt5b : state.m_qdxt5a).pack(&dxt5_blocks[i][0], 1, state.m_qdxt5_params[i])) return false; } } uint cur_block_ofs = 0; for (uint f = 0; f < dst_tex.get_num_faces(); f++) { for (uint l = 0; l < dst_tex.get_num_levels(); l++) { mip_level* pDst_level = dst_tex.get_level(f, l); const uint num_blocks_x = (pDst_level->get_width() + 3) / 4; const uint num_blocks_y = (pDst_level->get_height() + 3) / 4; const uint total_blocks = num_blocks_x * num_blocks_y; dxt_image* pDst_dxt_image = pDst_level->get_dxt_image(); dxt_image::element* pDst = pDst_dxt_image->get_element_ptr(); for (uint block_index = 0; block_index < total_blocks; block_index++) { if (state.m_has_blocks[1]) memcpy(pDst, &dxt5_blocks[0][cur_block_ofs + block_index], 8); if (state.m_has_blocks[2]) memcpy(pDst + 1, &dxt5_blocks[1][cur_block_ofs + block_index], 8); if (state.m_has_blocks[0]) memcpy(pDst + state.m_has_blocks[1], &dxt1_blocks[cur_block_ofs + block_index], 8); pDst += pDst_dxt_image->get_elements_per_block(); } cur_block_ofs += total_blocks; } } if (dxt1_params.m_pProgress_func) { if (!dxt1_params.m_pProgress_func(dxt1_params.m_progress_start + dxt1_params.m_progress_range, dxt1_params.m_pProgress_data)) return false; } CRNLIB_ASSERT(dst_tex.check()); return true; } bool mipmapped_texture::read_from_file(const char* pFilename, texture_file_types::format file_format) { clear(); set_last_error("Can't open file"); bool success = false; cfile_stream in_stream; if (in_stream.open(pFilename)) { data_stream_serializer serializer(in_stream); success = read_from_stream(serializer, file_format); } return success; } bool mipmapped_texture::read_from_stream(data_stream_serializer& serializer, texture_file_types::format file_format) { clear(); if (!serializer.get_stream()) { set_last_error("Invalid stream"); return false; } if (file_format == texture_file_types::cFormatInvalid) file_format = texture_file_types::determine_file_format(serializer.get_name().get_ptr()); if (file_format == texture_file_types::cFormatInvalid) { set_last_error("Unsupported file format"); return false; } set_last_error("Image file load failed"); bool success = false; if (!texture_file_types::supports_mipmaps(file_format)) { success = read_regular_image(serializer); } else { switch (file_format) { case texture_file_types::cFormatDDS: { success = read_dds(serializer); break; } case texture_file_types::cFormatCRN: { success = read_crn(serializer); break; } case texture_file_types::cFormatKTX: { success = read_ktx(serializer); break; } default: { CRNLIB_ASSERT(0); break; } } } if (success) { CRNLIB_ASSERT(check()); m_source_file_type = file_format; set_name(serializer.get_name()); clear_last_error(); } return success; } bool mipmapped_texture::read_regular_image(data_stream_serializer& serializer) { image_u8* pImg = crnlib_new(); bool status = image_utils::read_from_stream(*pImg, serializer, 0); if (!status) { crnlib_delete(pImg); set_last_error("Failed loading image file"); return false; } mip_level* pLevel = crnlib_new(); pLevel->assign(pImg); assign(pLevel); set_name(serializer.get_name()); return true; } bool mipmapped_texture::read_crn_from_memory(const void* pData, uint data_size, const char* pFilename) { clear(); set_last_error("Image file load failed"); if ((!pData) || (data_size < 1)) return false; crnd::crn_texture_info tex_info; tex_info.m_struct_size = sizeof(crnd::crn_texture_info); if (!crnd_get_texture_info(pData, data_size, &tex_info)) { set_last_error("crnd_get_texture_info() failed"); return false; } const pixel_format dds_fmt = (pixel_format)crnd::crnd_crn_format_to_fourcc(tex_info.m_format); if (dds_fmt == PIXEL_FMT_INVALID) { set_last_error("Unsupported DXT format"); return false; } const dxt_format dxt_fmt = pixel_format_helpers::get_dxt_format(dds_fmt); face_vec faces(tex_info.m_faces); for (uint f = 0; f < tex_info.m_faces; f++) { faces[f].resize(tex_info.m_levels); for (uint l = 0; l < tex_info.m_levels; l++) faces[f][l] = crnlib_new(); } const uint tex_num_blocks_x = (tex_info.m_width + 3) >> 2; const uint tex_num_blocks_y = (tex_info.m_height + 3) >> 2; vector dxt_data; // Create temp buffer big enough to hold the largest mip level, and all faces if it's a cubemap. dxt_data.resize(tex_info.m_bytes_per_block * tex_num_blocks_x * tex_num_blocks_y * tex_info.m_faces); set_last_error("CRN unpack failed"); #if 0 timer t; double total_time = 0.0f; t.start(); #endif crnd::crnd_unpack_context pContext = crnd::crnd_unpack_begin(pData, data_size); #if 0 total_time += t.get_elapsed_secs(); #endif if (!pContext) { for (uint f = 0; f < faces.size(); f++) for (uint l = 0; l < faces[f].size(); l++) crnlib_delete(faces[f][l]); return false; } void* pFaces[cCRNMaxFaces]; for (uint f = tex_info.m_faces; f < cCRNMaxFaces; f++) pFaces[f] = NULL; for (uint l = 0; l < tex_info.m_levels; l++) { const uint level_width = math::maximum(1U, tex_info.m_width >> l); const uint level_height = math::maximum(1U, tex_info.m_height >> l); const uint num_blocks_x = (level_width + 3U) >> 2U; const uint num_blocks_y = (level_height + 3U) >> 2U; const uint row_pitch = num_blocks_x * tex_info.m_bytes_per_block; const uint size_of_face = num_blocks_y * row_pitch; #if 0 t.start(); #endif for (uint f = 0; f < tex_info.m_faces; f++) pFaces[f] = &dxt_data[f * size_of_face]; if (!crnd::crnd_unpack_level(pContext, pFaces, dxt_data.size(), row_pitch, l)) { crnd::crnd_unpack_end(pContext); for (uint f = 0; f < faces.size(); f++) for (uint l = 0; l < faces[f].size(); l++) crnlib_delete(faces[f][l]); return false; } #if 0 total_time += t.get_elapsed_secs(); #endif for (uint f = 0; f < tex_info.m_faces; f++) { dxt_image* pDXT_image = crnlib_new(); if (!pDXT_image->init( dxt_fmt, level_width, level_height, num_blocks_x * num_blocks_y * (tex_info.m_bytes_per_block / sizeof(dxt_image::element)), reinterpret_cast(pFaces[f]), true)) { crnlib_delete(pDXT_image); crnd::crnd_unpack_end(pContext); for (uint f = 0; f < faces.size(); f++) for (uint l = 0; l < faces[f].size(); l++) crnlib_delete(faces[f][l]); return false; } faces[f][l]->assign(pDXT_image, dds_fmt); } } #if 0 if (total_pixels) { console::info("read_crn_from_memory: Total pixels: %u, ms: %3.3fms, megapixels/sec: %3.3f", total_pixels, total_time * 1000.0f, total_pixels / total_time); } #endif crnd::crnd_unpack_end(pContext); assign(faces); set_name(pFilename); m_source_file_type = texture_file_types::cFormatCRN; clear_last_error(); return true; } bool mipmapped_texture::read_crn(data_stream_serializer& serializer) { crnlib::vector crn_data; if (!serializer.read_entire_file(crn_data)) { set_last_error("Failed reading CRN file"); return false; } return read_crn_from_memory(crn_data.get_ptr(), crn_data.size(), serializer.get_name().get_ptr()); } bool mipmapped_texture::write_to_file( const char* pFilename, texture_file_types::format file_format, crn_comp_params* pComp_params, uint32* pActual_quality_level, float* pActual_bitrate, uint32 image_write_flags) { if (pActual_quality_level) *pActual_quality_level = 0; if (pActual_bitrate) *pActual_bitrate = 0.0f; if (!is_valid()) { set_last_error("Unable to write empty texture"); return false; } if (file_format == texture_file_types::cFormatInvalid) file_format = texture_file_types::determine_file_format(pFilename); if (file_format == texture_file_types::cFormatInvalid) { set_last_error("Unknown file format"); return false; } bool success = false; if (((pComp_params) && (file_format == texture_file_types::cFormatDDS)) || (file_format == texture_file_types::cFormatCRN)) { if (!pComp_params) return false; success = write_comp_texture(pFilename, *pComp_params, pActual_quality_level, pActual_bitrate); } else if (!texture_file_types::supports_mipmaps(file_format)) { success = write_regular_image(pFilename, image_write_flags); } else { if (pComp_params) { console::warning("mipmapped_texture::write_to_file: Ignoring CRN compression parameters (currently unsupported for this file type)."); } cfile_stream write_stream; if (!write_stream.open(pFilename, cDataStreamWritable | cDataStreamSeekable)) { set_last_error(dynamic_string(cVarArg, "Failed creating output file \"%s\"", pFilename).get_ptr()); return false; } data_stream_serializer serializer(write_stream); switch (file_format) { case texture_file_types::cFormatDDS: { success = write_dds(serializer); break; } case texture_file_types::cFormatKTX: { success = write_ktx(serializer); break; } default: { break; } } } return success; } bool mipmapped_texture::write_regular_image(const char* pFilename, uint32 image_write_flags) { image_u8 tmp; image_u8* pLevel_image = get_level_image(0, 0, tmp); if (!image_utils::write_to_file(pFilename, *pLevel_image, image_write_flags)) { set_last_error("File write failed"); return false; } return true; } void mipmapped_texture::print_crn_comp_params(const crn_comp_params& p) { console::debug("CRN compression params:"); console::debug(" File Type: %s", crn_get_file_type_ext(p.m_file_type)); console::debug(" Quality level: %u", p.m_quality_level); console::debug(" Target Bitrate: %f", p.m_target_bitrate); console::debug(" Faces: %u", p.m_faces); console::debug(" Width: %u", p.m_width); console::debug(" Height: %u", p.m_height); console::debug(" Levels: %u", p.m_levels); console::debug(" Pixel Format: %s", crn_get_format_string(p.m_format)); console::debug("Use manual CRN palette sizes: %u", p.get_flag(cCRNCompFlagManualPaletteSizes)); console::debug("Color endpoints: %u", p.m_crn_color_endpoint_palette_size); console::debug("Color selectors: %u", p.m_crn_color_selector_palette_size); console::debug("Alpha endpoints: %u", p.m_crn_alpha_endpoint_palette_size); console::debug("Alpha selectors: %u", p.m_crn_alpha_selector_palette_size); console::debug("Flags:"); console::debug(" Perceptual: %u", p.get_flag(cCRNCompFlagPerceptual)); console::debug(" Hierarchical: %u", p.get_flag(cCRNCompFlagHierarchical)); console::debug(" UseBothBlockTypes: %u", p.get_flag(cCRNCompFlagUseBothBlockTypes)); console::debug(" UseTransparentIndicesForBlack: %u", p.get_flag(cCRNCompFlagUseTransparentIndicesForBlack)); console::debug(" DisableEndpointCaching: %u", p.get_flag(cCRNCompFlagDisableEndpointCaching)); console::debug("GrayscaleSampling: %u", p.get_flag(cCRNCompFlagGrayscaleSampling)); console::debug(" UseDXT1ATransparency: %u", p.get_flag(cCRNCompFlagDXT1AForTransparency)); console::debug("AdaptiveTileColorPSNRDerating: %2.2fdB", p.m_crn_adaptive_tile_color_psnr_derating); console::debug("AdaptiveTileAlphaPSNRDerating: %2.2fdB", p.m_crn_adaptive_tile_alpha_psnr_derating); console::debug("NumHelperThreads: %u", p.m_num_helper_threads); } bool mipmapped_texture::write_comp_texture(const char* pFilename, const crn_comp_params& orig_comp_params, uint32* pActual_quality_level, float* pActual_bitrate) { crn_comp_params comp_params(orig_comp_params); if (pActual_quality_level) *pActual_quality_level = 0; if (pActual_bitrate) *pActual_bitrate = 0.0f; if (math::maximum(get_height(), get_width()) > cCRNMaxLevelResolution) { set_last_error("Texture resolution is too big!"); return false; } comp_params.m_faces = get_num_faces(); comp_params.m_levels = get_num_levels(); comp_params.m_width = get_width(); comp_params.m_height = get_height(); image_u8 temp_images[cCRNMaxFaces][cCRNMaxLevels]; for (uint f = 0; f < get_num_faces(); f++) { for (uint l = 0; l < get_num_levels(); l++) { image_u8* p = get_level_image(f, l, temp_images[f][l]); comp_params.m_pImages[f][l] = (crn_uint32*)p->get_ptr(); } } if (comp_params.get_flag(cCRNCompFlagDebugging)) print_crn_comp_params(comp_params); timer t; t.start(); crnlib::vector comp_data; if (!create_compressed_texture(comp_params, comp_data, pActual_quality_level, pActual_bitrate)) { set_last_error("CRN compression failed"); return false; } double total_time = t.get_elapsed_secs(); if (comp_params.get_flag(cCRNCompFlagDebugging)) { console::debug("\nTotal compression time: %3.3fs", total_time); } cfile_stream out_stream; if (!out_stream.open(pFilename, cDataStreamWritable | cDataStreamSeekable)) { set_last_error("Failed opening file"); return false; } if (out_stream.write(comp_data.get_ptr(), comp_data.size()) != comp_data.size()) { set_last_error("Failed writing to file"); return false; } if (!out_stream.close()) { set_last_error("Failed writing to file"); return false; } return true; } uint mipmapped_texture::get_total_pixels_in_all_faces_and_mips() const { uint total_pixels = 0; for (uint l = 0; l < m_faces.size(); l++) for (uint m = 0; m < m_faces[l].size(); m++) total_pixels += m_faces[l][m]->get_total_pixels(); return total_pixels; } void mipmapped_texture::set_orientation_flags(orientation_flags_t flags) { for (uint l = 0; l < m_faces.size(); l++) for (uint m = 0; m < m_faces[l].size(); m++) m_faces[l][m]->set_orientation_flags(flags); } bool mipmapped_texture::is_flipped() const { for (uint l = 0; l < m_faces.size(); l++) for (uint m = 0; m < m_faces[l].size(); m++) if (m_faces[l][m]->is_flipped()) return true; return false; } bool mipmapped_texture::is_x_flipped() const { for (uint l = 0; l < m_faces.size(); l++) for (uint m = 0; m < m_faces[l].size(); m++) if (m_faces[l][m]->is_x_flipped()) return true; return false; } bool mipmapped_texture::is_y_flipped() const { for (uint l = 0; l < m_faces.size(); l++) for (uint m = 0; m < m_faces[l].size(); m++) if (m_faces[l][m]->is_y_flipped()) return true; return false; } bool mipmapped_texture::can_unflip_without_unpacking() const { if (!is_valid()) return false; if (!is_packed()) return true; for (uint l = 0; l < m_faces.size(); l++) for (uint m = 0; m < m_faces[l].size(); m++) if (!m_faces[l][m]->can_unflip_without_unpacking()) return false; return true; } bool mipmapped_texture::unflip(bool allow_unpacking_to_flip, bool uncook_if_necessary_to_unpack) { if (!is_valid()) return false; if (is_packed()) { // The texture is packed - make sure all faces/miplevels can be consistently unflipped. bool can_do_packed_unflip = can_unflip_without_unpacking(); if ((!can_do_packed_unflip) && (!allow_unpacking_to_flip)) return false; // If any face/miplevel can't unflip the packed bits, then just unpack the whole texture. if (!can_do_packed_unflip) unpack_from_dxt(uncook_if_necessary_to_unpack); } for (uint l = 0; l < m_faces.size(); l++) for (uint m = 0; m < m_faces[l].size(); m++) if (!m_faces[l][m]->unflip(true, false)) return false; CRNLIB_VERIFY(check()); return true; } #if 0 bool mipmapped_texture::flip_x() { for (uint l = 0; l < m_faces.size(); l++) for (uint m = 0; m < m_faces[l].size(); m++) if (!m_faces[l][m]->flip_x()) return false; return true; } #endif bool mipmapped_texture::flip_y_helper() { for (uint l = 0; l < m_faces.size(); l++) for (uint m = 0; m < m_faces[l].size(); m++) if (!m_faces[l][m]->flip_y()) return false; return true; } bool mipmapped_texture::flip_y(bool update_orientation_flags) { mipmapped_texture temp_tex(*this); if (!temp_tex.flip_y_helper()) { temp_tex = *this; temp_tex.unpack_from_dxt(true); if (!temp_tex.flip_y_helper()) return false; } swap(temp_tex); if (update_orientation_flags) { for (uint f = 0; f < get_num_faces(); f++) { for (uint m = 0; m < get_face(f).size(); m++) { uint orient_flags = get_face(f)[m]->get_orientation_flags(); orient_flags ^= cOrientationFlagYFlipped; get_face(f)[m]->set_orientation_flags(static_cast(orient_flags)); } } } CRNLIB_ASSERT(check()); return true; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_mipmapped_texture.h000066400000000000000000000264631503722002600240520ustar00rootroot00000000000000// File: crn_mipmapped_texture.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_dxt_image.h" #include "../inc/dds_defs.h" #include "crn_pixel_format.h" #include "crn_image.h" #include "crn_resampler.h" #include "crn_data_stream_serializer.h" #include "crn_qdxt1.h" #include "crn_qdxt5.h" #include "crn_texture_file_types.h" #include "crn_image_utils.h" namespace crnlib { extern const vec2I g_vertical_cross_image_offsets[6]; enum orientation_flags_t { cOrientationFlagXFlipped = 1, cOrientationFlagYFlipped = 2, cDefaultOrientationFlags = 0 }; enum unpack_flags_t { cUnpackFlagUncook = 1, cUnpackFlagUnflip = 2 }; class mip_level { friend class mipmapped_texture; public: mip_level(); ~mip_level(); mip_level(const mip_level& other); mip_level& operator=(const mip_level& rhs); // Assumes ownership. void assign(image_u8* p, pixel_format fmt = PIXEL_FMT_INVALID, orientation_flags_t orient_flags = cDefaultOrientationFlags); void assign(dxt_image* p, pixel_format fmt = PIXEL_FMT_INVALID, orientation_flags_t orient_flags = cDefaultOrientationFlags); void clear(); inline uint get_width() const { return m_width; } inline uint get_height() const { return m_height; } inline uint get_total_pixels() const { return m_width * m_height; } orientation_flags_t get_orientation_flags() const { return m_orient_flags; } void set_orientation_flags(orientation_flags_t flags) { m_orient_flags = flags; } inline image_u8* get_image() const { return m_pImage; } inline dxt_image* get_dxt_image() const { return m_pDXTImage; } image_u8* get_unpacked_image(image_u8& tmp, uint unpack_flags) const; inline bool is_packed() const { return m_pDXTImage != NULL; } inline bool is_valid() const { return (m_pImage != NULL) || (m_pDXTImage != NULL); } inline pixel_format_helpers::component_flags get_comp_flags() const { return m_comp_flags; } inline void set_comp_flags(pixel_format_helpers::component_flags comp_flags) { m_comp_flags = comp_flags; } inline pixel_format get_format() const { return m_format; } inline void set_format(pixel_format fmt) { m_format = fmt; } bool convert(pixel_format fmt, bool cook, const dxt_image::pack_params& p); bool pack_to_dxt(const image_u8& img, pixel_format fmt, bool cook, const dxt_image::pack_params& p, orientation_flags_t orient_flags = cDefaultOrientationFlags); bool pack_to_dxt(pixel_format fmt, bool cook, const dxt_image::pack_params& p); bool unpack_from_dxt(bool uncook = true); // Returns true if flipped on either axis. bool is_flipped() const; bool is_x_flipped() const; bool is_y_flipped() const; bool can_unflip_without_unpacking() const; // Returns true if unflipped on either axis. // Will try to flip packed (DXT/ETC) data in-place, if this isn't possible it'll unpack/uncook the mip level then unflip. bool unflip(bool allow_unpacking_to_flip, bool uncook_during_unpack); bool set_alpha_to_luma(); bool convert(image_utils::conversion_type conv_type); bool flip_x(); bool flip_y(); private: uint m_width; uint m_height; pixel_format_helpers::component_flags m_comp_flags; pixel_format m_format; image_u8* m_pImage; dxt_image* m_pDXTImage; orientation_flags_t m_orient_flags; void cook_image(image_u8& img) const; void uncook_image(image_u8& img) const; }; // A face is an array of mip_level ptr's. typedef crnlib::vector mip_ptr_vec; // And an array of one, six, or N faces make up a texture. typedef crnlib::vector face_vec; class mipmapped_texture { public: // Construction/destruction mipmapped_texture(); ~mipmapped_texture(); mipmapped_texture(const mipmapped_texture& other); mipmapped_texture& operator=(const mipmapped_texture& rhs); void clear(); void init(uint width, uint height, uint levels, uint faces, pixel_format fmt, const char* pName, orientation_flags_t orient_flags); // Assumes ownership. void assign(face_vec& faces); void assign(mip_level* pLevel); void assign(image_u8* p, pixel_format fmt = PIXEL_FMT_INVALID, orientation_flags_t orient_flags = cDefaultOrientationFlags); void assign(dxt_image* p, pixel_format fmt = PIXEL_FMT_INVALID, orientation_flags_t orient_flags = cDefaultOrientationFlags); void set(texture_file_types::format source_file_type, const mipmapped_texture& mipmapped_texture); // Accessors image_u8* get_level_image(uint face, uint level, image_u8& img, uint unpack_flags = cUnpackFlagUncook | cUnpackFlagUnflip) const; inline bool is_valid() const { return m_faces.size() > 0; } const dynamic_string& get_name() const { return m_name; } void set_name(const dynamic_string& name) { m_name = name; } const dynamic_string& get_source_filename() const { return get_name(); } texture_file_types::format get_source_file_type() const { return m_source_file_type; } inline uint get_width() const { return m_width; } inline uint get_height() const { return m_height; } inline uint get_total_pixels() const { return m_width * m_height; } uint get_total_pixels_in_all_faces_and_mips() const; inline uint get_num_faces() const { return m_faces.size(); } inline uint get_num_levels() const { if (m_faces.empty()) return 0; else return m_faces[0].size(); } inline pixel_format_helpers::component_flags get_comp_flags() const { return m_comp_flags; } inline pixel_format get_format() const { return m_format; } inline bool is_unpacked() const { if (get_num_faces()) { return get_level(0, 0)->get_image() != NULL; } return false; } inline const mip_ptr_vec& get_face(uint face) const { return m_faces[face]; } inline mip_ptr_vec& get_face(uint face) { return m_faces[face]; } inline const mip_level* get_level(uint face, uint mip) const { return m_faces[face][mip]; } inline mip_level* get_level(uint face, uint mip) { return m_faces[face][mip]; } bool has_alpha() const; bool is_normal_map() const; bool is_vertical_cross() const; bool is_packed() const; texture_type determine_texture_type(bool no_normal_detection) const; const dynamic_string& get_last_error() const { return m_last_error; } void clear_last_error() { m_last_error.clear(); } // Reading/writing bool read_dds(data_stream_serializer& serializer); bool write_dds(data_stream_serializer& serializer) const; bool read_ktx(data_stream_serializer& serializer); bool write_ktx(data_stream_serializer& serializer) const; bool read_crn(data_stream_serializer& serializer); bool read_crn_from_memory(const void* pData, uint data_size, const char* pFilename); // If file_format is texture_file_types::cFormatInvalid, the format will be determined from the filename's extension. bool read_from_file(const char* pFilename, texture_file_types::format file_format = texture_file_types::cFormatInvalid); bool read_from_stream(data_stream_serializer& serializer, texture_file_types::format file_format = texture_file_types::cFormatInvalid); bool write_to_file( const char* pFilename, texture_file_types::format file_format = texture_file_types::cFormatInvalid, crn_comp_params* pComp_params = NULL, uint32* pActual_quality_level = NULL, float* pActual_bitrate = NULL, uint32 image_write_flags = 0); // Conversion bool convert(pixel_format fmt, bool cook, const dxt_image::pack_params& p); bool convert(pixel_format fmt, const dxt_image::pack_params& p); bool convert(pixel_format fmt, bool cook, const dxt_image::pack_params& p, int qdxt_quality, bool hierarchical = true); bool convert(image_utils::conversion_type conv_type); bool unpack_from_dxt(bool uncook = true); bool set_alpha_to_luma(); void discard_mipmaps(); void discard_mips(); struct resample_params { resample_params() : m_pFilter("kaiser"), m_wrapping(false), m_srgb(false), m_renormalize(false), m_rtopmip(false), m_filter_scale(.9f), m_gamma(1.75f), // or 2.2f m_multithreaded(true) { } const char* m_pFilter; bool m_wrapping; bool m_srgb; bool m_renormalize; bool m_rtopmip; float m_filter_scale; float m_gamma; bool m_multithreaded; }; bool resize(uint new_width, uint new_height, const resample_params& params); struct generate_mipmap_params : public resample_params { generate_mipmap_params() : resample_params(), m_min_mip_size(1), m_max_mips(0) { } uint m_min_mip_size; uint m_max_mips; // actually the max # of total levels }; bool generate_mipmaps(const generate_mipmap_params& params, bool force); bool crop(uint x, uint y, uint width, uint height); bool vertical_cross_to_cubemap(); // Low-level clustered DXT (QDXT) compression struct qdxt_state { qdxt_state(task_pool& tp) : m_fmt(PIXEL_FMT_INVALID), m_qdxt1(tp), m_qdxt5a(tp), m_qdxt5b(tp) { } pixel_format m_fmt; qdxt1 m_qdxt1; qdxt5 m_qdxt5a; qdxt5 m_qdxt5b; crnlib::vector m_pixel_blocks; qdxt1_params m_qdxt1_params; qdxt5_params m_qdxt5_params[2]; bool m_has_blocks[3]; void clear() { m_fmt = PIXEL_FMT_INVALID; m_qdxt1.clear(); m_qdxt5a.clear(); m_qdxt5b.clear(); m_pixel_blocks.clear(); m_qdxt1_params.clear(); m_qdxt5_params[0].clear(); m_qdxt5_params[1].clear(); utils::zero_object(m_has_blocks); } }; bool qdxt_pack_init(qdxt_state& state, mipmapped_texture& dst_tex, const qdxt1_params& dxt1_params, const qdxt5_params& dxt5_params, pixel_format fmt, bool cook); bool qdxt_pack(qdxt_state& state, mipmapped_texture& dst_tex, const qdxt1_params& dxt1_params, const qdxt5_params& dxt5_params); void swap(mipmapped_texture& img); bool check() const; void set_orientation_flags(orientation_flags_t flags); // Returns true if any face/miplevel is flipped. bool is_flipped() const; bool is_x_flipped() const; bool is_y_flipped() const; bool can_unflip_without_unpacking() const; bool unflip(bool allow_unpacking_to_flip, bool uncook_if_necessary_to_unpack); bool flip_y(bool update_orientation_flags); private: dynamic_string m_name; uint m_width; uint m_height; pixel_format_helpers::component_flags m_comp_flags; pixel_format m_format; face_vec m_faces; texture_file_types::format m_source_file_type; mutable dynamic_string m_last_error; inline void clear_last_error() const { m_last_error.clear(); } inline void set_last_error(const char* p) const { m_last_error = p; } void free_all_mips(); bool read_regular_image(data_stream_serializer& serializer); bool write_regular_image(const char* pFilename, uint32 image_write_flags); bool read_dds_internal(data_stream_serializer& serializer); void print_crn_comp_params(const crn_comp_params& p); bool write_comp_texture(const char* pFilename, const crn_comp_params& comp_params, uint32* pActual_quality_level, float* pActual_bitrate); void change_dxt1_to_dxt1a(); bool flip_y_helper(); }; inline void swap(mipmapped_texture& a, mipmapped_texture& b) { a.swap(b); } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_packed_uint.h000066400000000000000000000035431503722002600225760ustar00rootroot00000000000000// File: crn_packed_uint // See Copyright Notice and license at the end of inc/crnlib.h #pragma once namespace crnlib { template struct packed_uint { inline packed_uint() {} inline packed_uint(unsigned int val) { *this = val; } inline packed_uint(const packed_uint& other) { *this = other; } inline packed_uint& operator=(const packed_uint& rhs) { if (this != &rhs) memcpy(m_buf, rhs.m_buf, sizeof(m_buf)); return *this; } inline packed_uint& operator=(unsigned int val) { #ifdef CRNLIB_BUILD_DEBUG if (N == 1) { CRNLIB_ASSERT(val <= 0xFFU); } else if (N == 2) { CRNLIB_ASSERT(val <= 0xFFFFU); } else if (N == 3) { CRNLIB_ASSERT(val <= 0xFFFFFFU); } #endif val <<= (8U * (4U - N)); for (unsigned int i = 0; i < N; i++) { m_buf[i] = static_cast(val >> 24U); val <<= 8U; } return *this; } inline operator unsigned int() const { switch (N) { case 1: return m_buf[0]; case 2: return (m_buf[0] << 8U) | m_buf[1]; case 3: return (m_buf[0] << 16U) | (m_buf[1] << 8U) | (m_buf[2]); default: return (m_buf[0] << 24U) | (m_buf[1] << 16U) | (m_buf[2] << 8U) | (m_buf[3]); } } unsigned char m_buf[N]; }; template class packed_value { public: packed_value() {} packed_value(T val) { *this = val; } inline operator T() const { T result = 0; for (int i = sizeof(T) - 1; i >= 0; i--) result = static_cast((result << 8) | m_bytes[i]); return result; } packed_value& operator=(T val) { for (int i = 0; i < sizeof(T); i++) { m_bytes[i] = static_cast(val); val >>= 8; } return *this; } private: uint8 m_bytes[sizeof(T)]; }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_pixel_format.cpp000066400000000000000000000207651503722002600233410ustar00rootroot00000000000000// File: crn_pixel_format.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_pixel_format.h" #include "crn_image.h" namespace crnlib { namespace pixel_format_helpers { const pixel_format g_all_pixel_formats[] = { PIXEL_FMT_DXT1, PIXEL_FMT_DXT2, PIXEL_FMT_DXT3, PIXEL_FMT_DXT4, PIXEL_FMT_DXT5, PIXEL_FMT_3DC, PIXEL_FMT_DXN, PIXEL_FMT_DXT5A, PIXEL_FMT_DXT5_CCxY, PIXEL_FMT_DXT5_xGxR, PIXEL_FMT_DXT5_xGBR, PIXEL_FMT_DXT5_AGBR, PIXEL_FMT_DXT1A, PIXEL_FMT_ETC1, PIXEL_FMT_ETC2, PIXEL_FMT_ETC2A, PIXEL_FMT_ETC1S, PIXEL_FMT_ETC2AS, PIXEL_FMT_R8G8B8, PIXEL_FMT_L8, PIXEL_FMT_A8, PIXEL_FMT_A8L8, PIXEL_FMT_A8R8G8B8}; uint get_num_formats() { return sizeof(g_all_pixel_formats) / sizeof(g_all_pixel_formats[0]); } pixel_format get_pixel_format_by_index(uint index) { CRNLIB_ASSERT(index < get_num_formats()); return g_all_pixel_formats[index]; } const char* get_pixel_format_string(pixel_format fmt) { switch (fmt) { case PIXEL_FMT_INVALID: return "INVALID"; case PIXEL_FMT_DXT1: return "DXT1"; case PIXEL_FMT_DXT1A: return "DXT1A"; case PIXEL_FMT_DXT2: return "DXT2"; case PIXEL_FMT_DXT3: return "DXT3"; case PIXEL_FMT_DXT4: return "DXT4"; case PIXEL_FMT_DXT5: return "DXT5"; case PIXEL_FMT_3DC: return "3DC"; case PIXEL_FMT_DXN: return "DXN"; case PIXEL_FMT_DXT5A: return "DXT5A"; case PIXEL_FMT_DXT5_CCxY: return "DXT5_CCxY"; case PIXEL_FMT_DXT5_xGxR: return "DXT5_xGxR"; case PIXEL_FMT_DXT5_xGBR: return "DXT5_xGBR"; case PIXEL_FMT_DXT5_AGBR: return "DXT5_AGBR"; case PIXEL_FMT_ETC1: return "ETC1"; case PIXEL_FMT_ETC2: return "ETC2"; case PIXEL_FMT_ETC2A: return "ETC2A"; case PIXEL_FMT_ETC1S: return "ETC1S"; case PIXEL_FMT_ETC2AS: return "ETC2AS"; case PIXEL_FMT_R8G8B8: return "R8G8B8"; case PIXEL_FMT_A8R8G8B8: return "A8R8G8B8"; case PIXEL_FMT_A8: return "A8"; case PIXEL_FMT_L8: return "L8"; case PIXEL_FMT_A8L8: return "A8L8"; default: break; } CRNLIB_ASSERT(false); return "?"; } const char* get_crn_format_string(crn_format fmt) { switch (fmt) { case cCRNFmtDXT1: return "DXT1"; case cCRNFmtDXT3: return "DXT3"; case cCRNFmtDXT5: return "DXT5"; case cCRNFmtDXT5_CCxY: return "DXT5_CCxY"; case cCRNFmtDXT5_xGBR: return "DXT5_xGBR"; case cCRNFmtDXT5_AGBR: return "DXT5_AGBR"; case cCRNFmtDXT5_xGxR: return "DXT5_xGxR"; case cCRNFmtDXN_XY: return "DXN_XY"; case cCRNFmtDXN_YX: return "DXN_YX"; case cCRNFmtDXT5A: return "DXT5A"; case cCRNFmtETC1: return "ETC1"; case cCRNFmtETC2: return "ETC2"; case cCRNFmtETC2A: return "ETC2A"; case cCRNFmtETC1S: return "ETC1S"; case cCRNFmtETC2AS: return "ETC2AS"; default: break; } CRNLIB_ASSERT(false); return "?"; } component_flags get_component_flags(pixel_format fmt) { // These flags are for *uncooked* pixels, i.e. after after adding Z to DXN maps, or converting YCC maps to RGB, etc. uint flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid | cCompFlagGrayscale; switch (fmt) { case PIXEL_FMT_DXT1: case PIXEL_FMT_ETC1: case PIXEL_FMT_ETC2: case PIXEL_FMT_ETC1S: { flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid; break; } case PIXEL_FMT_DXT1A: { flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid; break; } case PIXEL_FMT_DXT2: case PIXEL_FMT_DXT3: { flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid; break; } case PIXEL_FMT_DXT4: case PIXEL_FMT_DXT5: case PIXEL_FMT_ETC2A: case PIXEL_FMT_ETC2AS: { flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid; break; } case PIXEL_FMT_DXT5A: { flags = cCompFlagAValid; break; } case PIXEL_FMT_DXT5_CCxY: { flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagLumaChroma; break; } case PIXEL_FMT_DXT5_xGBR: { flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagNormalMap; break; } case PIXEL_FMT_DXT5_AGBR: { flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid | cCompFlagNormalMap; break; } case PIXEL_FMT_DXT5_xGxR: { flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagNormalMap; break; } case PIXEL_FMT_3DC: { flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagNormalMap; break; } case PIXEL_FMT_DXN: { flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagNormalMap; break; } case PIXEL_FMT_R8G8B8: { flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid; break; } case PIXEL_FMT_A8R8G8B8: { flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid; break; } case PIXEL_FMT_A8: { flags = cCompFlagAValid; break; } case PIXEL_FMT_L8: { flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagGrayscale; break; } case PIXEL_FMT_A8L8: { flags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid | cCompFlagGrayscale; break; } default: { CRNLIB_ASSERT(0); break; } } return static_cast(flags); } crn_format convert_pixel_format_to_best_crn_format(pixel_format crn_fmt) { crn_format fmt = cCRNFmtDXT1; switch (crn_fmt) { case PIXEL_FMT_DXT1: case PIXEL_FMT_DXT1A: fmt = cCRNFmtDXT1; break; case PIXEL_FMT_DXT2: case PIXEL_FMT_DXT3: case PIXEL_FMT_DXT4: case PIXEL_FMT_DXT5: fmt = cCRNFmtDXT5; break; case PIXEL_FMT_3DC: fmt = cCRNFmtDXN_YX; break; case PIXEL_FMT_DXN: fmt = cCRNFmtDXN_XY; break; case PIXEL_FMT_DXT5A: fmt = cCRNFmtDXT5A; break; case PIXEL_FMT_R8G8B8: case PIXEL_FMT_L8: fmt = cCRNFmtDXT1; break; case PIXEL_FMT_A8R8G8B8: case PIXEL_FMT_A8: case PIXEL_FMT_A8L8: fmt = cCRNFmtDXT5; break; case PIXEL_FMT_DXT5_CCxY: fmt = cCRNFmtDXT5_CCxY; break; case PIXEL_FMT_DXT5_xGBR: fmt = cCRNFmtDXT5_xGBR; break; case PIXEL_FMT_DXT5_AGBR: fmt = cCRNFmtDXT5_AGBR; break; case PIXEL_FMT_DXT5_xGxR: fmt = cCRNFmtDXT5_xGxR; break; case PIXEL_FMT_ETC1: fmt = cCRNFmtETC1; break; case PIXEL_FMT_ETC2: fmt = cCRNFmtETC2; break; case PIXEL_FMT_ETC2A: fmt = cCRNFmtETC2A; break; case PIXEL_FMT_ETC1S: fmt = cCRNFmtETC1S; break; case PIXEL_FMT_ETC2AS: fmt = cCRNFmtETC2AS; break; default: { CRNLIB_ASSERT(false); break; } } return fmt; } pixel_format convert_crn_format_to_pixel_format(crn_format fmt) { switch (fmt) { case cCRNFmtDXT1: return PIXEL_FMT_DXT1; case cCRNFmtDXT3: return PIXEL_FMT_DXT3; case cCRNFmtDXT5: return PIXEL_FMT_DXT5; case cCRNFmtDXT5_CCxY: return PIXEL_FMT_DXT5_CCxY; case cCRNFmtDXT5_xGxR: return PIXEL_FMT_DXT5_xGxR; case cCRNFmtDXT5_xGBR: return PIXEL_FMT_DXT5_xGBR; case cCRNFmtDXT5_AGBR: return PIXEL_FMT_DXT5_AGBR; case cCRNFmtDXN_XY: return PIXEL_FMT_DXN; case cCRNFmtDXN_YX: return PIXEL_FMT_3DC; case cCRNFmtDXT5A: return PIXEL_FMT_DXT5A; case cCRNFmtETC1: return PIXEL_FMT_ETC1; case cCRNFmtETC2: return PIXEL_FMT_ETC2; case cCRNFmtETC2A: return PIXEL_FMT_ETC2A; case cCRNFmtETC1S: return PIXEL_FMT_ETC1S; case cCRNFmtETC2AS: return PIXEL_FMT_ETC2AS; default: { CRNLIB_ASSERT(false); break; } } return PIXEL_FMT_INVALID; } } // namespace pixel_format } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_pixel_format.h000066400000000000000000000173741503722002600230100ustar00rootroot00000000000000// File: crn_pixel_format.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_dxt.h" #include "../inc/crnlib.h" #include "../inc/dds_defs.h" namespace crnlib { namespace pixel_format_helpers { uint get_num_formats(); pixel_format get_pixel_format_by_index(uint index); const char* get_pixel_format_string(pixel_format fmt); const char* get_crn_format_string(crn_format fmt); inline bool is_grayscale(pixel_format fmt) { switch (fmt) { case PIXEL_FMT_L8: case PIXEL_FMT_A8L8: return true; default: break; } return false; } inline bool is_dxt1(pixel_format fmt) { return (fmt == PIXEL_FMT_DXT1) || (fmt == PIXEL_FMT_DXT1A); } // has_alpha() should probably be called "has_opacity()" - it indicates if the format encodes opacity // because some swizzled DXT5 formats do not encode opacity. inline bool has_alpha(pixel_format fmt) { switch (fmt) { case PIXEL_FMT_DXT1A: case PIXEL_FMT_DXT2: case PIXEL_FMT_DXT3: case PIXEL_FMT_DXT4: case PIXEL_FMT_DXT5: case PIXEL_FMT_DXT5A: case PIXEL_FMT_A8R8G8B8: case PIXEL_FMT_A8: case PIXEL_FMT_A8L8: case PIXEL_FMT_DXT5_AGBR: case PIXEL_FMT_ETC2A: case PIXEL_FMT_ETC2AS: return true; default: break; } return false; } inline bool is_alpha_only(pixel_format fmt) { switch (fmt) { case PIXEL_FMT_A8: case PIXEL_FMT_DXT5A: return true; default: break; } return false; } inline bool is_normal_map(pixel_format fmt) { switch (fmt) { case PIXEL_FMT_3DC: case PIXEL_FMT_DXN: case PIXEL_FMT_DXT5_xGBR: case PIXEL_FMT_DXT5_xGxR: case PIXEL_FMT_DXT5_AGBR: return true; default: break; } return false; } inline int is_dxt(pixel_format fmt) { switch (fmt) { case PIXEL_FMT_DXT1: case PIXEL_FMT_DXT1A: case PIXEL_FMT_DXT2: case PIXEL_FMT_DXT3: case PIXEL_FMT_DXT4: case PIXEL_FMT_DXT5: case PIXEL_FMT_3DC: case PIXEL_FMT_DXT5A: case PIXEL_FMT_DXN: case PIXEL_FMT_DXT5_CCxY: case PIXEL_FMT_DXT5_xGxR: case PIXEL_FMT_DXT5_xGBR: case PIXEL_FMT_DXT5_AGBR: case PIXEL_FMT_ETC1: case PIXEL_FMT_ETC2: case PIXEL_FMT_ETC2A: case PIXEL_FMT_ETC1S: case PIXEL_FMT_ETC2AS: return true; default: break; } return false; } inline int get_fundamental_format(pixel_format fmt) { switch (fmt) { case PIXEL_FMT_DXT1A: return PIXEL_FMT_DXT1; case PIXEL_FMT_DXT5_CCxY: case PIXEL_FMT_DXT5_xGxR: case PIXEL_FMT_DXT5_xGBR: case PIXEL_FMT_DXT5_AGBR: return PIXEL_FMT_DXT5; default: break; } return fmt; } inline dxt_format get_dxt_format(pixel_format fmt) { switch (fmt) { case PIXEL_FMT_DXT1: return cDXT1; case PIXEL_FMT_DXT1A: return cDXT1A; case PIXEL_FMT_DXT2: return cDXT3; case PIXEL_FMT_DXT3: return cDXT3; case PIXEL_FMT_DXT4: return cDXT5; case PIXEL_FMT_DXT5: return cDXT5; case PIXEL_FMT_3DC: return cDXN_YX; case PIXEL_FMT_DXT5A: return cDXT5A; case PIXEL_FMT_DXN: return cDXN_XY; case PIXEL_FMT_DXT5_CCxY: return cDXT5; case PIXEL_FMT_DXT5_xGxR: return cDXT5; case PIXEL_FMT_DXT5_xGBR: return cDXT5; case PIXEL_FMT_DXT5_AGBR: return cDXT5; case PIXEL_FMT_ETC1: return cETC1; case PIXEL_FMT_ETC2: return cETC2; case PIXEL_FMT_ETC2A: return cETC2A; case PIXEL_FMT_ETC1S: return cETC1S; case PIXEL_FMT_ETC2AS: return cETC2AS; default: break; } return cDXTInvalid; } inline pixel_format from_dxt_format(dxt_format dxt_fmt) { switch (dxt_fmt) { case cDXT1: return PIXEL_FMT_DXT1; case cDXT1A: return PIXEL_FMT_DXT1A; case cDXT3: return PIXEL_FMT_DXT3; case cDXT5: return PIXEL_FMT_DXT5; case cDXN_XY: return PIXEL_FMT_DXN; case cDXN_YX: return PIXEL_FMT_3DC; case cDXT5A: return PIXEL_FMT_DXT5A; case cETC1: return PIXEL_FMT_ETC1; case cETC2: return PIXEL_FMT_ETC2; case cETC2A: return PIXEL_FMT_ETC2A; case cETC1S: return PIXEL_FMT_ETC1S; case cETC2AS: return PIXEL_FMT_ETC2AS; default: break; } CRNLIB_ASSERT(false); return PIXEL_FMT_INVALID; } inline bool is_pixel_format_non_srgb(pixel_format fmt) { switch (fmt) { case PIXEL_FMT_3DC: case PIXEL_FMT_DXN: case PIXEL_FMT_DXT5A: case PIXEL_FMT_DXT5_CCxY: case PIXEL_FMT_DXT5_xGxR: case PIXEL_FMT_DXT5_xGBR: case PIXEL_FMT_DXT5_AGBR: return true; default: break; } return false; } inline bool is_crn_format_non_srgb(crn_format fmt) { switch (fmt) { case cCRNFmtDXN_XY: case cCRNFmtDXN_YX: case cCRNFmtDXT5A: case cCRNFmtDXT5_CCxY: case cCRNFmtDXT5_xGxR: case cCRNFmtDXT5_xGBR: case cCRNFmtDXT5_AGBR: return true; default: break; } return false; } inline uint get_bpp(pixel_format fmt) { switch (fmt) { case PIXEL_FMT_DXT1: return 4; case PIXEL_FMT_DXT1A: return 4; case PIXEL_FMT_ETC1: return 4; case PIXEL_FMT_ETC2: return 4; case PIXEL_FMT_ETC2A: return 8; case PIXEL_FMT_ETC1S: return 4; case PIXEL_FMT_ETC2AS: return 8; case PIXEL_FMT_DXT2: return 8; case PIXEL_FMT_DXT3: return 8; case PIXEL_FMT_DXT4: return 8; case PIXEL_FMT_DXT5: return 8; case PIXEL_FMT_3DC: return 8; case PIXEL_FMT_DXT5A: return 4; case PIXEL_FMT_R8G8B8: return 24; case PIXEL_FMT_A8R8G8B8: return 32; case PIXEL_FMT_A8: return 8; case PIXEL_FMT_L8: return 8; case PIXEL_FMT_A8L8: return 16; case PIXEL_FMT_DXN: return 8; case PIXEL_FMT_DXT5_CCxY: return 8; case PIXEL_FMT_DXT5_xGxR: return 8; case PIXEL_FMT_DXT5_xGBR: return 8; case PIXEL_FMT_DXT5_AGBR: return 8; default: break; } CRNLIB_ASSERT(false); return 0; }; inline uint get_dxt_bytes_per_block(pixel_format fmt) { switch (fmt) { case PIXEL_FMT_DXT1: return 8; case PIXEL_FMT_DXT1A: return 8; case PIXEL_FMT_DXT5A: return 8; case PIXEL_FMT_ETC1: return 8; case PIXEL_FMT_ETC2: return 8; case PIXEL_FMT_ETC2A: return 16; case PIXEL_FMT_ETC1S: return 8; case PIXEL_FMT_ETC2AS: return 16; case PIXEL_FMT_DXT2: return 16; case PIXEL_FMT_DXT3: return 16; case PIXEL_FMT_DXT4: return 16; case PIXEL_FMT_DXT5: return 16; case PIXEL_FMT_3DC: return 16; case PIXEL_FMT_DXN: return 16; case PIXEL_FMT_DXT5_CCxY: return 16; case PIXEL_FMT_DXT5_xGxR: return 16; case PIXEL_FMT_DXT5_xGBR: return 16; case PIXEL_FMT_DXT5_AGBR: return 16; default: break; } CRNLIB_ASSERT(false); return 0; } enum component_flags { cCompFlagRValid = 1, cCompFlagGValid = 2, cCompFlagBValid = 4, cCompFlagAValid = 8, cCompFlagGrayscale = 16, cCompFlagNormalMap = 32, cCompFlagLumaChroma = 64, cDefaultCompFlags = cCompFlagRValid | cCompFlagGValid | cCompFlagBValid | cCompFlagAValid }; component_flags get_component_flags(pixel_format fmt); crn_format convert_pixel_format_to_best_crn_format(pixel_format crn_fmt); pixel_format convert_crn_format_to_pixel_format(crn_format fmt); } // namespace pixel_format_helpers } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_platform.cpp000066400000000000000000000017241503722002600224660ustar00rootroot00000000000000// File: crn_platform.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #if CRNLIB_USE_WIN32_API #include "crn_winhdr.h" #endif #if !defined(_WIN32) char* crnlib_strnlwr(char* p, size_t n) { char* q = p; for (size_t i = 0; i < n && *q; i++) { char c = *q; *q++ = tolower(c); } return p; } char* crnlib_strnupr(char* p, size_t n) { char* q = p; for (size_t i = 0; i < n && *q; i++) { char c = *q; *q++ = toupper(c); } return p; } #endif void crnlib_debug_break(void) { CRNLIB_BREAKPOINT } #if CRNLIB_USE_WIN32_API #include "crn_winhdr.h" bool crnlib_is_debugger_present(void) { return IsDebuggerPresent() != 0; } void crnlib_output_debug_string(const char* p) { OutputDebugStringA(p); } #else bool crnlib_is_debugger_present(void) { return false; } void crnlib_output_debug_string(const char* p) { puts(p); } #endif // CRNLIB_USE_WIN32_API DaemonEngine-crunch-ef4d32f/crnlib/crn_platform.h000066400000000000000000000060531503722002600221330ustar00rootroot00000000000000// File: crn_platform.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once bool crnlib_is_debugger_present(void); void crnlib_debug_break(void); void crnlib_output_debug_string(const char* p); // actually in crnlib_assert.cpp void crnlib_assert(const char* pExp, const char* pFile, unsigned line); void crnlib_fail(const char* pExp, const char* pFile, unsigned line); #if CRNLIB_LITTLE_ENDIAN_CPU const bool c_crnlib_little_endian_platform = true; #else const bool c_crnlib_little_endian_platform = false; #endif const bool c_crnlib_big_endian_platform = !c_crnlib_little_endian_platform; #if defined(_WIN32) #define crn_fopen(pDstFile, f, m) fopen_s(pDstFile, f, m) #define crn_fseek _fseeki64 #define crn_ftell _ftelli64 #elif defined(__linux__) #define crn_fopen(pDstFile, f, m) *(pDstFile) = fopen64(f, m) #define crn_fseek fseeko64 #define crn_ftell ftello64 #elif defined(__GNUC__) // This should be defined before including any header. #define _FILE_OFFSET_BITS 64 #define crn_fopen(pDstFile, f, m) *(pDstFile) = fopen(f, m) #define crn_fseek fseeko #define crn_ftell ftello #else #define crn_fopen(pDstFile, f, m) *(pDstFile) = fopen(f, m) #define crn_fseek(s, o, w) fseek(s, static_cast(o), w) #define crn_ftell ftell #endif #if CRNLIB_USE_WIN32_API #define CRNLIB_BREAKPOINT DebugBreak(); #define CRNLIB_BUILTIN_EXPECT(c, v) c #elif defined(__GNUC__) #if defined(__i386__) || defined(__x86_64__) #define CRNLIB_BREAKPOINT asm("int $3"); #else #define CRNLIB_BREAKPOINT #endif #define CRNLIB_BUILTIN_EXPECT(c, v) __builtin_expect(c, v) #else #define CRNLIB_BREAKPOINT #define CRNLIB_BUILTIN_EXPECT(c, v) c #endif #if defined(__GNUC__) #define CRNLIB_ALIGNED(x) __attribute__((aligned(x))) #define CRNLIB_NOINLINE __attribute__((noinline)) #elif defined(_MSC_VER) #define CRNLIB_ALIGNED(x) __declspec(align(x)) #define CRNLIB_NOINLINE __declspec(noinline) #else #define CRNLIB_ALIGNED(x) #define CRNLIB_NOINLINE #endif #define CRNLIB_GET_ALIGNMENT(v) ((!sizeof(v)) ? 1 : (__alignof(v) ? __alignof(v) : sizeof(uint32))) #if defined(_WIN32) #define crnlib_snprintf sprintf_s #define crnlib_vsnprintf vsprintf_s #define crnlib_strnlwr _strlwr_s #define crnlib_strnupr _strupr_s #define crnlib_stricmp _stricmp #define crnlib_strnicmp _strnicmp #else #define crnlib_snprintf snprintf #define crnlib_vsnprintf vsnprintf char* crnlib_strnlwr(char* p, size_t n); char* crnlib_strnupr(char* p, size_t n); #define crnlib_stricmp strcasecmp #define crnlib_strnicmp strncasecmp #endif inline bool crnlib_is_little_endian() { return c_crnlib_little_endian_platform; } inline bool crnlib_is_big_endian() { return c_crnlib_big_endian_platform; } inline bool crnlib_is_pc() { #ifdef CRNLIB_PLATFORM_PC return true; #else return false; #endif } inline bool crnlib_is_x86() { #ifdef CRNLIB_PLATFORM_PC_X86 return true; #else return false; #endif } inline bool crnlib_is_x64() { #ifdef CRNLIB_PLATFORM_PC_X64 return true; #else return false; #endif } DaemonEngine-crunch-ef4d32f/crnlib/crn_prefix_coding.cpp000066400000000000000000000200701503722002600234550ustar00rootroot00000000000000// File: crn_prefix_coding.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_prefix_coding.h" //#include "rand.h" #ifdef CRNLIB_BUILD_DEBUG //#define TEST_DECODER_TABLES #endif namespace crnlib { namespace prefix_coding { bool limit_max_code_size(uint num_syms, uint8* pCodesizes, uint max_code_size) { const uint cMaxEverCodeSize = 34; if ((!num_syms) || (num_syms > cMaxSupportedSyms) || (max_code_size < 1) || (max_code_size > cMaxEverCodeSize)) return false; uint num_codes[cMaxEverCodeSize + 1]; utils::zero_object(num_codes); bool should_limit = false; for (uint i = 0; i < num_syms; i++) { uint c = pCodesizes[i]; if (c) { CRNLIB_ASSERT(c <= cMaxEverCodeSize); num_codes[c]++; if (c > max_code_size) should_limit = true; } } if (!should_limit) return true; uint ofs = 0; uint next_sorted_ofs[cMaxEverCodeSize + 1]; for (uint i = 1; i <= cMaxEverCodeSize; i++) { next_sorted_ofs[i] = ofs; ofs += num_codes[i]; } if ((ofs < 2) || (ofs > cMaxSupportedSyms)) return true; if (ofs > (1U << max_code_size)) return false; for (uint i = max_code_size + 1; i <= cMaxEverCodeSize; i++) num_codes[max_code_size] += num_codes[i]; // Technique of adjusting tree to enforce maximum code size from LHArc. uint total = 0; for (uint i = max_code_size; i; --i) total += (num_codes[i] << (max_code_size - i)); if (total == (1U << max_code_size)) return true; do { num_codes[max_code_size]--; uint i; for (i = max_code_size - 1; i; --i) { if (!num_codes[i]) continue; num_codes[i]--; num_codes[i + 1] += 2; break; } if (!i) return false; total--; } while (total != (1U << max_code_size)); uint8 new_codesizes[cMaxSupportedSyms]; uint8* p = new_codesizes; for (uint i = 1; i <= max_code_size; i++) { uint n = num_codes[i]; if (n) { memset(p, i, n); p += n; } } for (uint i = 0; i < num_syms; i++) { const uint c = pCodesizes[i]; if (c) { uint ofs = next_sorted_ofs[c]; next_sorted_ofs[c] = ofs + 1; pCodesizes[i] = static_cast(new_codesizes[ofs]); } } return true; } bool generate_codes(uint num_syms, const uint8* pCodesizes, uint16* pCodes) { uint num_codes[cMaxExpectedCodeSize + 1]; utils::zero_object(num_codes); for (uint i = 0; i < num_syms; i++) { uint c = pCodesizes[i]; if (c) { CRNLIB_ASSERT(c <= cMaxExpectedCodeSize); num_codes[c]++; } } uint code = 0; uint next_code[cMaxExpectedCodeSize + 1]; next_code[0] = 0; for (uint i = 1; i <= cMaxExpectedCodeSize; i++) { next_code[i] = code; code = (code + num_codes[i]) << 1; } if (code != (1 << (cMaxExpectedCodeSize + 1))) { uint t = 0; for (uint i = 1; i <= cMaxExpectedCodeSize; i++) { t += num_codes[i]; if (t > 1) return false; } } for (uint i = 0; i < num_syms; i++) { uint c = pCodesizes[i]; if (c) { CRNLIB_ASSERT(next_code[c] <= cUINT16_MAX); pCodes[i] = static_cast(next_code[c]++); CRNLIB_ASSERT(math::total_bits(pCodes[i]) <= pCodesizes[i]); } } return true; } bool generate_decoder_tables(uint num_syms, const uint8* pCodesizes, decoder_tables* pTables, uint table_bits) { uint min_codes[cMaxExpectedCodeSize]; if ((!num_syms) || (table_bits > cMaxTableBits)) return false; pTables->m_num_syms = num_syms; uint num_codes[cMaxExpectedCodeSize + 1]; utils::zero_object(num_codes); for (uint i = 0; i < num_syms; i++) { uint c = pCodesizes[i]; if (c) num_codes[c]++; } uint sorted_positions[cMaxExpectedCodeSize + 1]; uint code = 0; uint total_used_syms = 0; uint max_code_size = 0; uint min_code_size = UINT_MAX; for (uint i = 1; i <= cMaxExpectedCodeSize; i++) { const uint n = num_codes[i]; if (!n) pTables->m_max_codes[i - 1] = 0; //UINT_MAX; else { min_code_size = math::minimum(min_code_size, i); max_code_size = math::maximum(max_code_size, i); min_codes[i - 1] = code; pTables->m_max_codes[i - 1] = code + n - 1; pTables->m_max_codes[i - 1] = 1 + ((pTables->m_max_codes[i - 1] << (16 - i)) | ((1 << (16 - i)) - 1)); pTables->m_val_ptrs[i - 1] = total_used_syms; sorted_positions[i] = total_used_syms; code += n; total_used_syms += n; } code <<= 1; } pTables->m_total_used_syms = total_used_syms; if (total_used_syms > pTables->m_cur_sorted_symbol_order_size) { pTables->m_cur_sorted_symbol_order_size = total_used_syms; if (!math::is_power_of_2(total_used_syms)) pTables->m_cur_sorted_symbol_order_size = math::minimum(num_syms, math::next_pow2(total_used_syms)); if (pTables->m_sorted_symbol_order) { crnlib_delete_array(pTables->m_sorted_symbol_order); pTables->m_sorted_symbol_order = NULL; } pTables->m_sorted_symbol_order = crnlib_new_array(pTables->m_cur_sorted_symbol_order_size); } pTables->m_min_code_size = static_cast(min_code_size); pTables->m_max_code_size = static_cast(max_code_size); for (uint i = 0; i < num_syms; i++) { uint c = pCodesizes[i]; if (c) { CRNLIB_ASSERT(num_codes[c]); uint sorted_pos = sorted_positions[c]++; CRNLIB_ASSERT(sorted_pos < total_used_syms); pTables->m_sorted_symbol_order[sorted_pos] = static_cast(i); } } if (table_bits <= pTables->m_min_code_size) table_bits = 0; pTables->m_table_bits = table_bits; if (table_bits) { uint table_size = 1 << table_bits; if (table_size > pTables->m_cur_lookup_size) { pTables->m_cur_lookup_size = table_size; if (pTables->m_lookup) { crnlib_delete_array(pTables->m_lookup); pTables->m_lookup = NULL; } pTables->m_lookup = crnlib_new_array(table_size); } memset(pTables->m_lookup, 0xFF, static_cast(sizeof(pTables->m_lookup[0])) * (1UL << table_bits)); for (uint codesize = 1; codesize <= table_bits; codesize++) { if (!num_codes[codesize]) continue; const uint fillsize = table_bits - codesize; const uint fillnum = 1 << fillsize; const uint min_code = min_codes[codesize - 1]; const uint max_code = pTables->get_unshifted_max_code(codesize); const uint val_ptr = pTables->m_val_ptrs[codesize - 1]; for (uint code = min_code; code <= max_code; code++) { const uint sym_index = pTables->m_sorted_symbol_order[val_ptr + code - min_code]; CRNLIB_ASSERT(pCodesizes[sym_index] == codesize); for (uint j = 0; j < fillnum; j++) { const uint t = j + (code << fillsize); CRNLIB_ASSERT(t < (1U << table_bits)); CRNLIB_ASSERT(pTables->m_lookup[t] == cUINT32_MAX); pTables->m_lookup[t] = sym_index | (codesize << 16U); } } } } for (uint i = 0; i < cMaxExpectedCodeSize; i++) pTables->m_val_ptrs[i] -= min_codes[i]; pTables->m_table_max_code = 0; pTables->m_decode_start_code_size = pTables->m_min_code_size; if (table_bits) { uint i; for (i = table_bits; i >= 1; i--) { if (num_codes[i]) { pTables->m_table_max_code = pTables->m_max_codes[i - 1]; break; } } if (i >= 1) { pTables->m_decode_start_code_size = table_bits + 1; for (uint i = table_bits + 1; i <= max_code_size; i++) { if (num_codes[i]) { pTables->m_decode_start_code_size = i; break; } } } } // sentinels pTables->m_max_codes[cMaxExpectedCodeSize] = UINT_MAX; pTables->m_val_ptrs[cMaxExpectedCodeSize] = 0xFFFFF; pTables->m_table_shift = 32 - pTables->m_table_bits; return true; } } // namespace prefix_codig } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_prefix_coding.h000066400000000000000000000056651503722002600231370ustar00rootroot00000000000000// File: crn_prefix_coding.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once namespace crnlib { namespace prefix_coding { const uint cMaxExpectedCodeSize = 16; const uint cMaxSupportedSyms = 8192; const uint cMaxTableBits = 11; bool limit_max_code_size(uint num_syms, uint8* pCodesizes, uint max_code_size); bool generate_codes(uint num_syms, const uint8* pCodesizes, uint16* pCodes); class decoder_tables { public: inline decoder_tables() : m_table_shift(0), m_table_max_code(0), m_decode_start_code_size(0), m_cur_lookup_size(0), m_lookup(NULL), m_cur_sorted_symbol_order_size(0), m_sorted_symbol_order(NULL) { } inline decoder_tables(const decoder_tables& other) : m_table_shift(0), m_table_max_code(0), m_decode_start_code_size(0), m_cur_lookup_size(0), m_lookup(NULL), m_cur_sorted_symbol_order_size(0), m_sorted_symbol_order(NULL) { *this = other; } decoder_tables& operator=(const decoder_tables& other) { if (this == &other) return *this; clear(); memcpy(this, &other, sizeof(*this)); if (other.m_lookup) { m_lookup = crnlib_new_array(m_cur_lookup_size); memcpy(m_lookup, other.m_lookup, sizeof(m_lookup[0]) * m_cur_lookup_size); } if (other.m_sorted_symbol_order) { m_sorted_symbol_order = crnlib_new_array(m_cur_sorted_symbol_order_size); memcpy(m_sorted_symbol_order, other.m_sorted_symbol_order, sizeof(m_sorted_symbol_order[0]) * m_cur_sorted_symbol_order_size); } return *this; } inline void clear() { if (m_lookup) { crnlib_delete_array(m_lookup); m_lookup = 0; m_cur_lookup_size = 0; } if (m_sorted_symbol_order) { crnlib_delete_array(m_sorted_symbol_order); m_sorted_symbol_order = NULL; m_cur_sorted_symbol_order_size = 0; } } inline ~decoder_tables() { if (m_lookup) crnlib_delete_array(m_lookup); if (m_sorted_symbol_order) crnlib_delete_array(m_sorted_symbol_order); } // DO NOT use any complex classes here - it is bitwise copied. uint m_num_syms; uint m_total_used_syms; uint m_table_bits; uint m_table_shift; uint m_table_max_code; uint m_decode_start_code_size; uint8 m_min_code_size; uint8 m_max_code_size; uint m_max_codes[cMaxExpectedCodeSize + 1]; int m_val_ptrs[cMaxExpectedCodeSize + 1]; uint m_cur_lookup_size; uint32* m_lookup; uint m_cur_sorted_symbol_order_size; uint16* m_sorted_symbol_order; inline uint get_unshifted_max_code(uint len) const { CRNLIB_ASSERT((len >= 1) && (len <= cMaxExpectedCodeSize)); uint k = m_max_codes[len - 1]; if (!k) return UINT_MAX; return (k - 1) >> (16 - len); } }; bool generate_decoder_tables(uint num_syms, const uint8* pCodesizes, decoder_tables* pTables, uint table_bits); } // namespace prefix_coding } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_qdxt1.cpp000066400000000000000000000731021503722002600217020ustar00rootroot00000000000000// File: crn_qdxt.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_qdxt1.h" #include "crn_dxt1.h" #include "crn_dxt_fast.h" #include "crn_image_utils.h" #include "crn_dxt_hc_common.h" #define GENERATE_DEBUG_IMAGES 0 namespace crnlib { qdxt1::qdxt1(task_pool& task_pool) : m_pTask_pool(&task_pool), m_main_thread_id(0), m_canceled(false), m_progress_start(0), m_progress_range(100), m_num_blocks(0), m_pBlocks(NULL), m_pDst_elements(NULL), m_elements_per_block(0), m_max_selector_clusters(0), m_prev_percentage_complete(-1), m_selector_clusterizer(task_pool) { } qdxt1::~qdxt1() { } void qdxt1::clear() { m_main_thread_id = 0; m_num_blocks = 0; m_pBlocks = 0; m_pDst_elements = NULL; m_elements_per_block = 0; m_params.clear(); m_endpoint_clusterizer.clear(); m_endpoint_cluster_indices.clear(); m_max_selector_clusters = 0; m_canceled = false; m_progress_start = 0; m_progress_range = 100; m_selector_clusterizer.clear(); for (uint i = 0; i <= qdxt1_params::cMaxQuality; i++) m_cached_selector_cluster_indices[i].clear(); m_cluster_hash.clear(); m_prev_percentage_complete = -1; } bool qdxt1::init(uint n, const dxt_pixel_block* pBlocks, const qdxt1_params& params) { clear(); CRNLIB_ASSERT(n && pBlocks); m_main_thread_id = crn_get_current_thread_id(); m_num_blocks = n; m_pBlocks = pBlocks; m_params = params; m_endpoint_clusterizer.reserve_training_vecs(m_num_blocks); m_progress_start = 0; m_progress_range = 75; const bool debugging = false; image_u8 debug_img; if ((m_params.m_hierarchical) && (m_params.m_num_mips)) { vec6F_clusterizer::training_vec_array& training_vecs = m_endpoint_clusterizer.get_training_vecs(); training_vecs.resize(m_num_blocks); uint encoding_hist[cNumChunkEncodings]; utils::zero_object(encoding_hist); uint total_processed_blocks = 0; uint next_progress_threshold = 512; for (uint level = 0; level < m_params.m_num_mips; level++) { const qdxt1_params::mip_desc& level_desc = m_params.m_mip_desc[level]; const uint num_chunks_x = (level_desc.m_block_width + cChunkBlockWidth - 1) / cChunkBlockWidth; const uint num_chunks_y = (level_desc.m_block_height + cChunkBlockHeight - 1) / cChunkBlockHeight; const uint level_width = level_desc.m_block_width * 4; const uint level_height = level_desc.m_block_height * 4; if (debugging) debug_img.resize(num_chunks_x * cChunkPixelWidth, num_chunks_y * cChunkPixelHeight); float adaptive_tile_color_psnr_derating = 1.5f; // was 2.4f if ((level) && (adaptive_tile_color_psnr_derating > .25f)) { adaptive_tile_color_psnr_derating = math::maximum(.25f, adaptive_tile_color_psnr_derating / powf(3.1f, static_cast(level))); // was 3.0f } for (uint chunk_y = 0; chunk_y < num_chunks_y; chunk_y++) { for (uint chunk_x = 0; chunk_x < num_chunks_x; chunk_x++) { color_quad_u8 chunk_pixels[cChunkPixelWidth * cChunkPixelHeight]; for (uint y = 0; y < cChunkPixelHeight; y++) { const uint pix_y = math::minimum(chunk_y * cChunkPixelHeight + y, level_height - 1); const uint outer_block_index = level_desc.m_first_block + ((pix_y >> 2) * level_desc.m_block_width); for (uint x = 0; x < cChunkPixelWidth; x++) { const uint pix_x = math::minimum(chunk_x * cChunkPixelWidth + x, level_width - 1); const uint block_index = outer_block_index + (pix_x >> 2); const dxt_pixel_block& block = m_pBlocks[block_index]; const color_quad_u8& p = block.m_pixels[pix_y & 3][pix_x & 3]; chunk_pixels[x + y * 8] = p; } } struct layout_results { uint m_low_color; uint m_high_color; uint8 m_selectors[cChunkPixelWidth * cChunkPixelHeight]; uint64 m_error; //float m_penalty; }; layout_results layouts[cNumChunkTileLayouts]; for (uint l = 0; l < cNumChunkTileLayouts; l++) { const uint width = g_chunk_tile_layouts[l].m_width; const uint height = g_chunk_tile_layouts[l].m_height; const uint x_ofs = g_chunk_tile_layouts[l].m_x_ofs; const uint y_ofs = g_chunk_tile_layouts[l].m_y_ofs; color_quad_u8 layout_pixels[cChunkPixelWidth * cChunkPixelHeight]; for (uint y = 0; y < height; y++) for (uint x = 0; x < width; x++) layout_pixels[x + y * width] = chunk_pixels[(x_ofs + x) + (y_ofs + y) * cChunkPixelWidth]; const uint n = width * height; dxt_fast::compress_color_block(n, layout_pixels, layouts[l].m_low_color, layouts[l].m_high_color, layouts[l].m_selectors); color_quad_u8 c[4]; dxt1_block::get_block_colors(c, static_cast(layouts[l].m_low_color), static_cast(layouts[l].m_high_color)); uint64 error = 0; for (uint i = 0; i < n; i++) error += color::elucidian_distance(layout_pixels[i], c[layouts[l].m_selectors[i]], false); layouts[l].m_error = error; #if 0 if ((width > 4) || (height > 4)) { const uint dist = color::elucidian_distance( dxt1_block::unpack_color(static_cast(layouts[l].m_low_color), true), dxt1_block::unpack_color(static_cast(layouts[l].m_high_color), true), false); layouts[l].m_penalty = math::clamp((sqrt((float)dist) - 75.0f) / 150.0f, 0.0f, 2.0f); if ((width == 8) && (height == 8)) layouts[l].m_penalty *= 2.0f; } else { layouts[l].m_penalty = 0.0f; } #endif } double best_peak_snr = -1.0f; uint best_encoding = 0; for (uint e = 0; e < cNumChunkEncodings; e++) { const chunk_encoding_desc& encoding_desc = g_chunk_encodings[e]; double total_error = 0; for (uint t = 0; t < encoding_desc.m_num_tiles; t++) total_error += (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_error; //double mean_squared = total_error * (1.0f / (16.0f * 3.0f)); double mean_squared = total_error * (1.0f / (64.0f * 3.0f)); double root_mean_squared = sqrt(mean_squared); double peak_snr = 999999.0f; if (mean_squared) peak_snr = math::clamp(log10(255.0f / root_mean_squared) * 20.0f, 0.0f, 500.0f); //if (level) // adaptive_tile_color_psnr_derating = math::lerp(adaptive_tile_color_psnr_derating * .5f, .3f, math::maximum((level - 1) / float(m_params.m_num_mips - 2), 1.0f)); float color_derating = math::lerp(0.0f, adaptive_tile_color_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f); peak_snr = peak_snr - color_derating; //for (uint t = 0; t < encoding_desc.m_num_tiles; t++) // peak_snr -= (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_penalty; if (peak_snr > best_peak_snr) { best_peak_snr = peak_snr; best_encoding = e; } } encoding_hist[best_encoding]++; const chunk_encoding_desc& encoding_desc = g_chunk_encodings[best_encoding]; for (uint t = 0; t < encoding_desc.m_num_tiles; t++) { const chunk_tile_desc& tile_desc = encoding_desc.m_tiles[t]; uint layout_index = tile_desc.m_layout_index; const layout_results& layout = layouts[layout_index]; color_quad_u8 c[4]; if (debugging) dxt1_block::get_block_colors(c, static_cast(layout.m_low_color), static_cast(layout.m_high_color)); color_quad_u8 tile_pixels[cChunkPixelWidth * cChunkPixelHeight]; for (uint y = 0; y < tile_desc.m_height; y++) { const uint pix_y = y + tile_desc.m_y_ofs; for (uint x = 0; x < tile_desc.m_width; x++) { const uint pix_x = x + tile_desc.m_x_ofs; tile_pixels[x + y * tile_desc.m_width] = chunk_pixels[pix_x + pix_y * cChunkPixelWidth]; if (debugging) debug_img(chunk_x * 8 + pix_x, chunk_y * 8 + pix_y) = c[layout.m_selectors[x + y * tile_desc.m_width]]; } } color_quad_u8 l, h; dxt_fast::find_representative_colors(tile_desc.m_width * tile_desc.m_height, tile_pixels, l, h); //const uint dist = color::color_distance(m_params.m_perceptual, l, h, false); const uint dist = color::elucidian_distance(l, h, false); const uint cColorDistToWeight = 5000; const uint cMaxWeight = 8; uint weight = math::clamp(dist / cColorDistToWeight, 1, cMaxWeight); vec6F ev; ev[0] = l[0]; ev[1] = l[1]; ev[2] = l[2]; ev[3] = h[0]; ev[4] = h[1]; ev[5] = h[2]; for (uint y = 0; y < (tile_desc.m_height >> 2); y++) { uint block_y = chunk_y * cChunkBlockHeight + y + (tile_desc.m_y_ofs >> 2); if (block_y >= level_desc.m_block_height) continue; for (uint x = 0; x < (tile_desc.m_width >> 2); x++) { uint block_x = chunk_x * cChunkBlockWidth + x + (tile_desc.m_x_ofs >> 2); if (block_x >= level_desc.m_block_width) break; uint block_index = level_desc.m_first_block + block_x + block_y * level_desc.m_block_width; training_vecs[block_index].first = ev; training_vecs[block_index].second = weight; total_processed_blocks++; //if (debugging) //{ // debug_img(block_x, block_y) = l; // debug_img(block_x + level_desc.m_block_width, block_y) = h; //} } // x } // y } //t if (total_processed_blocks >= next_progress_threshold) { next_progress_threshold += 512; if (!update_progress(total_processed_blocks, m_num_blocks - 1)) return false; } } // chunk_x } // chunk_y #if GENERATE_DEBUG_IMAGES if (debugging) image_utils::write_to_file(dynamic_string(cVarArg, "debug_%u.tga", level).get_ptr(), debug_img, image_utils::cWriteFlagIgnoreAlpha); #endif } // level #if 0 trace("chunk encoding hist: "); for (uint i = 0; i < cNumChunkEncodings; i++) trace("%u ", encoding_hist[i]); trace("\n"); #endif } else { for (uint block_index = 0; block_index < m_num_blocks; block_index++) { if ((block_index & 511) == 0) { if (!update_progress(block_index, m_num_blocks - 1)) return false; } color_quad_u8 l, h; dxt_fast::find_representative_colors(cDXTBlockSize * cDXTBlockSize, &m_pBlocks[block_index].m_pixels[0][0], l, h); //const uint dist = color::color_distance(m_params.m_perceptual, l, h, false); const uint dist = color::elucidian_distance(l, h, false); const uint cColorDistToWeight = 5000; const uint cMaxWeight = 8; uint weight = math::clamp(dist / cColorDistToWeight, 1, cMaxWeight); vec6F ev; ev[0] = l[0]; ev[1] = l[1]; ev[2] = l[2]; ev[3] = h[0]; ev[4] = h[1]; ev[5] = h[2]; m_endpoint_clusterizer.add_training_vec(ev, weight); } } const uint cMaxEndpointClusters = 65535U; m_progress_start = 75; m_progress_range = 20; if (!m_endpoint_clusterizer.generate_codebook(cMaxEndpointClusters, generate_codebook_progress_callback, this)) return false; crnlib::hash_map selector_hash; m_progress_start = 95; m_progress_range = 5; for (uint block_index = 0; block_index < m_num_blocks; block_index++) { if ((block_index & 511) == 0) { if (!update_progress(block_index, m_num_blocks - 1)) return false; } dxt1_block dxt_blk; dxt_fast::compress_color_block(&dxt_blk, &m_pBlocks[block_index].m_pixels[0][0]); uint selectors = dxt_blk.m_selectors[0] | (dxt_blk.m_selectors[1] << 8) | (dxt_blk.m_selectors[2] << 16) | (dxt_blk.m_selectors[3] << 24); selector_hash.insert(selectors); } m_max_selector_clusters = selector_hash.size() + 128; // trace("max endpoint clusters: %u\n", m_endpoint_clusterizer.get_codebook_size()); // trace("max selector clusters: %u\n", m_max_selector_clusters); update_progress(1, 1); return true; } bool qdxt1::update_progress(uint value, uint max_value) { if (!m_params.m_pProgress_func) return true; uint percentage = max_value ? (m_progress_start + (value * m_progress_range + (max_value / 2)) / max_value) : 100; if ((int)percentage == m_prev_percentage_complete) return true; m_prev_percentage_complete = percentage; if (!m_params.m_pProgress_func(m_params.m_progress_start + (percentage * m_params.m_progress_range) / 100U, m_params.m_pProgress_data)) { m_canceled = true; return false; } return true; } void qdxt1::pack_endpoints_task(uint64 data, void*) { const uint thread_index = static_cast(data); crnlib::vector cluster_pixels; cluster_pixels.reserve(1024); crnlib::vector selectors; selectors.reserve(1024); dxt1_endpoint_optimizer optimizer; dxt1_endpoint_optimizer::params p; dxt1_endpoint_optimizer::results r; p.m_quality = m_params.m_dxt_quality; p.m_use_alpha_blocks = m_params.m_use_alpha_blocks; p.m_dxt1a_alpha_threshold = m_params.m_dxt1a_alpha_threshold; p.m_perceptual = m_params.m_perceptual; uint cluster_index_progress_mask = math::next_pow2(m_endpoint_cluster_indices.size() / 100); cluster_index_progress_mask /= 2; cluster_index_progress_mask = math::maximum(cluster_index_progress_mask, 8); cluster_index_progress_mask -= 1; cluster_id cid; const crnlib::vector& indices = cid.m_cells; for (uint cluster_index = 0; cluster_index < m_endpoint_cluster_indices.size(); cluster_index++) { if (m_canceled) return; if ((cluster_index & cluster_index_progress_mask) == 0) { if (crn_get_current_thread_id() == m_main_thread_id) { if (!update_progress(cluster_index, m_endpoint_cluster_indices.size() - 1)) return; } } if (m_pTask_pool->get_num_threads()) { if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) continue; } const crnlib::vector& cluster_indices = m_endpoint_cluster_indices[cluster_index]; selectors.resize(cluster_indices.size() * cDXTBlockSize * cDXTBlockSize); bool found = false; uint32 found_endpoints = 0; cid.set(cluster_indices); { scoped_spinlock lock(m_cluster_hash_lock); cluster_hash::const_iterator it(m_cluster_hash.find(cid)); if (it != m_cluster_hash.end()) { CRNLIB_ASSERT(cid == it->first); found = true; found_endpoints = it->second; } } if (found) { const uint16 low_color = static_cast(found_endpoints); const uint16 high_color = static_cast((found_endpoints >> 16U)); color_quad_u8 block_colors[4]; dxt1_block::get_block_colors(block_colors, low_color, high_color); const bool is_alpha_block = (low_color <= high_color); for (uint block_iter = 0; block_iter < indices.size(); block_iter++) { const uint block_index = indices[block_iter]; const color_quad_u8* pSrc_pixels = &m_pBlocks[block_index].m_pixels[0][0]; for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { dxt1_block& dxt_block = get_block(block_index); dxt_block.set_low_color(static_cast(low_color)); dxt_block.set_high_color(static_cast(high_color)); uint mask = 0; for (int i = 15; i >= 0; i--) { mask <<= 2; const color_quad_u8& c = pSrc_pixels[i]; uint dist0 = color::color_distance(m_params.m_perceptual, c, block_colors[0], false); uint dist1 = color::color_distance(m_params.m_perceptual, c, block_colors[1], false); uint dist2 = color::color_distance(m_params.m_perceptual, c, block_colors[2], false); uint selector = 0, best_dist = dist0; if (dist1 < best_dist) { selector = 1; best_dist = dist1; } if (dist2 < best_dist) { selector = 2; best_dist = dist2; } if (!is_alpha_block) { uint dist3 = color::color_distance(m_params.m_perceptual, c, block_colors[3], false); if (dist3 < best_dist) { selector = 3; } } else { if (c.a < m_params.m_dxt1a_alpha_threshold) selector = 3; } mask |= selector; } dxt_block.m_selectors[0] = static_cast(mask & 0xFF); dxt_block.m_selectors[1] = static_cast((mask >> 8) & 0xFF); dxt_block.m_selectors[2] = static_cast((mask >> 16) & 0xFF); dxt_block.m_selectors[3] = static_cast((mask >> 24) & 0xFF); } } } else { cluster_pixels.resize(indices.size() * cDXTBlockSize * cDXTBlockSize); color_quad_u8* pDst = &cluster_pixels[0]; bool has_alpha_pixels = false; for (uint block_iter = 0; block_iter < indices.size(); block_iter++) { const uint block_index = indices[block_iter]; //const color_quad_u8* pSrc_pixels = &m_pBlocks[block_index].m_pixels[0][0]; const color_quad_u8* pSrc_pixels = (const color_quad_u8*)m_pBlocks[block_index].m_pixels; for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { const color_quad_u8& src = pSrc_pixels[i]; if (src.a < m_params.m_dxt1a_alpha_threshold) has_alpha_pixels = true; *pDst++ = src; } } p.m_block_index = cluster_index; p.m_num_pixels = cluster_pixels.size(); p.m_pPixels = cluster_pixels.begin(); r.m_pSelectors = selectors.begin(); uint low_color, high_color; if ((m_params.m_dxt_quality != cCRNDXTQualitySuperFast) || (has_alpha_pixels)) { p.m_pixels_have_alpha = has_alpha_pixels; optimizer.compute(p, r); low_color = r.m_low_color; high_color = r.m_high_color; } else { dxt_fast::compress_color_block(cluster_pixels.size(), cluster_pixels.begin(), low_color, high_color, selectors.begin(), true); } const uint8* pSrc_selectors = selectors.begin(); for (uint block_iter = 0; block_iter < indices.size(); block_iter++) { const uint block_index = indices[block_iter]; dxt1_block& dxt_block = get_block(block_index); dxt_block.set_low_color(static_cast(low_color)); dxt_block.set_high_color(static_cast(high_color)); uint mask = 0; for (int i = 15; i >= 0; i--) { mask <<= 2; mask |= pSrc_selectors[i]; } pSrc_selectors += (cDXTBlockSize * cDXTBlockSize); dxt_block.m_selectors[0] = static_cast(mask & 0xFF); dxt_block.m_selectors[1] = static_cast((mask >> 8) & 0xFF); dxt_block.m_selectors[2] = static_cast((mask >> 16) & 0xFF); dxt_block.m_selectors[3] = static_cast((mask >> 24) & 0xFF); } { scoped_spinlock lock(m_cluster_hash_lock); m_cluster_hash.insert(cid, low_color | (high_color << 16)); } } } } struct optimize_selectors_params { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(optimize_selectors_params); optimize_selectors_params( crnlib::vector >& selector_cluster_indices) : m_selector_cluster_indices(selector_cluster_indices) { } crnlib::vector >& m_selector_cluster_indices; }; void qdxt1::optimize_selectors_task(uint64 data, void* pData_ptr) { const uint thread_index = static_cast(data); optimize_selectors_params& task_params = *static_cast(pData_ptr); crnlib::vector block_categories[2]; block_categories[0].reserve(2048); block_categories[1].reserve(2048); for (uint cluster_index = 0; cluster_index < task_params.m_selector_cluster_indices.size(); cluster_index++) { if (m_canceled) return; if ((cluster_index & 255) == 0) { if (crn_get_current_thread_id() == m_main_thread_id) { if (!update_progress(cluster_index, task_params.m_selector_cluster_indices.size() - 1)) return; } } if (m_pTask_pool->get_num_threads()) { if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) continue; } const crnlib::vector& selector_indices = task_params.m_selector_cluster_indices[cluster_index]; if (selector_indices.size() <= 1) continue; block_categories[0].resize(0); block_categories[1].resize(0); for (uint block_iter = 0; block_iter < selector_indices.size(); block_iter++) { const uint block_index = selector_indices[block_iter]; const dxt1_block& src_block = get_block(block_index); if (!src_block.is_alpha_block()) block_categories[0].push_back(block_index); else { bool has_alpha_pixels = false; if (m_params.m_dxt1a_alpha_threshold > 0) { const color_quad_u8* pSrc_pixels = (const color_quad_u8*)m_pBlocks[block_index].m_pixels; for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { const color_quad_u8& src = pSrc_pixels[i]; if (src.a < m_params.m_dxt1a_alpha_threshold) { has_alpha_pixels = true; break; } } } if (has_alpha_pixels) continue; block_categories[1].push_back(block_index); } } dxt1_block blk; utils::zero_object(blk); for (uint block_type = 0; block_type <= 1; block_type++) { const crnlib::vector& block_indices = block_categories[block_type]; if (block_indices.size() <= 1) continue; for (uint y = 0; y < 4; y++) { for (uint x = 0; x < 4; x++) { uint best_s = 0; uint64 best_error = 0xFFFFFFFFFFULL; uint max_s = 4; if (block_type == 1) max_s = 3; for (uint s = 0; s < max_s; s++) { uint64 total_error = 0; for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++) { const uint block_index = block_indices[block_iter]; const color_quad_u8& orig_color = m_pBlocks[block_index].m_pixels[y][x]; const dxt1_block& dst_block = get_block(block_index); color_quad_u8 colors[4]; dxt1_block::get_block_colors(colors, static_cast(dst_block.get_low_color()), static_cast(dst_block.get_high_color())); uint error = color::color_distance(m_params.m_perceptual, orig_color, colors[s], false); total_error += error; } if (total_error < best_error) { best_error = total_error; best_s = s; } } blk.set_selector(x, y, best_s); } // x } // y for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++) { const uint block_index = block_indices[block_iter]; dxt1_block& dst_block = get_block(block_index); memcpy(dst_block.m_selectors, blk.m_selectors, sizeof(dst_block.m_selectors)); } } } // cluster_index } bool qdxt1::generate_codebook_progress_callback(uint percentage_completed, void* pData) { return static_cast(pData)->update_progress(percentage_completed, 100U); } bool qdxt1::create_selector_clusters(uint max_selector_clusters, crnlib::vector >& selector_cluster_indices) { m_progress_start = m_progress_range; m_progress_range = 33; weighted_selector_vec_array selector_vecs(m_num_blocks); for (uint block_iter = 0; block_iter < m_num_blocks; block_iter++) { dxt1_block& dxt1_block = get_block(block_iter); vec16F sv; float* pDst = &sv[0]; for (uint y = 0; y < 4; y++) for (uint x = 0; x < 4; x++) *pDst++ = g_dxt1_to_linear[dxt1_block.get_selector(x, y)]; const color_quad_u8 first_color(dxt1_block::unpack_color((uint16)dxt1_block.get_low_color(), true)); const color_quad_u8 second_color(dxt1_block::unpack_color((uint16)dxt1_block.get_high_color(), true)); const uint dist = color::color_distance(m_params.m_perceptual, first_color, second_color, false); const uint cColorDistToWeight = 2000; const uint cMaxWeight = 2048; uint weight = math::clamp(dist / cColorDistToWeight, 1, cMaxWeight); selector_vecs[block_iter].m_vec = sv; selector_vecs[block_iter].m_weight = weight; } return m_selector_clusterizer.create_clusters( selector_vecs, max_selector_clusters, selector_cluster_indices, generate_codebook_progress_callback, this); } bool qdxt1::pack(dxt1_block* pDst_elements, uint elements_per_block, const qdxt1_params& params, float quality_power_mul) { CRNLIB_ASSERT(m_num_blocks); m_main_thread_id = crn_get_current_thread_id(); m_canceled = false; m_pDst_elements = pDst_elements; m_elements_per_block = elements_per_block; m_params = params; if (!m_params.m_use_alpha_blocks) m_params.m_dxt1a_alpha_threshold = 0; m_prev_percentage_complete = -1; CRNLIB_ASSERT(m_params.m_quality_level <= qdxt1_params::cMaxQuality); const float quality = m_params.m_quality_level / (float)qdxt1_params::cMaxQuality; const float endpoint_quality = powf(quality, 1.8f * quality_power_mul); const float selector_quality = powf(quality, 1.65f * quality_power_mul); //const uint max_endpoint_clusters = math::clamp(static_cast(m_endpoint_clusterizer.get_codebook_size() * endpoint_quality), 128U, m_endpoint_clusterizer.get_codebook_size()); //const uint max_selector_clusters = math::clamp(static_cast(m_max_selector_clusters * selector_quality), 150U, m_max_selector_clusters); const uint max_endpoint_clusters = math::clamp(static_cast(m_endpoint_clusterizer.get_codebook_size() * endpoint_quality), 96U, m_endpoint_clusterizer.get_codebook_size()); const uint max_selector_clusters = math::clamp(static_cast(m_max_selector_clusters * selector_quality), 128U, m_max_selector_clusters); if (quality >= 1.0f) { m_endpoint_cluster_indices.resize(m_num_blocks); for (uint i = 0; i < m_num_blocks; i++) { m_endpoint_cluster_indices[i].resize(1); m_endpoint_cluster_indices[i][0] = i; } } else m_endpoint_clusterizer.retrieve_clusters(max_endpoint_clusters, m_endpoint_cluster_indices); // trace("endpoint clusters: %u\n", m_endpoint_cluster_indices.size()); #if 0 uint total_blocks = 0; #endif uint max_blocks = 0; for (uint i = 0; i < m_endpoint_cluster_indices.size(); i++) { uint num = m_endpoint_cluster_indices[i].size(); #if 0 total_blocks += num; #endif max_blocks = math::maximum(max_blocks, num); } #if 0 trace("Num clusters: %u, Average blocks per cluster: %u, Max blocks per cluster: %u\n", m_endpoint_cluster_indices.size(), total_blocks / m_endpoint_cluster_indices.size(), max_blocks); #endif crnlib::vector >& selector_cluster_indices = m_cached_selector_cluster_indices[params.m_quality_level]; m_progress_start = 0; if (quality >= 1.0f) m_progress_range = 100; else if (selector_cluster_indices.empty()) m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 33; else m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 50; for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) m_pTask_pool->queue_object_task(this, &qdxt1::pack_endpoints_task, i); m_pTask_pool->join(); if (m_canceled) return false; if (quality >= 1.0f) return true; if (selector_cluster_indices.empty()) { create_selector_clusters(max_selector_clusters, selector_cluster_indices); if (m_canceled) { selector_cluster_indices.clear(); return false; } } m_progress_start += m_progress_range; m_progress_range = 100 - m_progress_start; optimize_selectors_params optimize_selectors_task_params(selector_cluster_indices); for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) m_pTask_pool->queue_object_task(this, &qdxt1::optimize_selectors_task, i, &optimize_selectors_task_params); m_pTask_pool->join(); return !m_canceled; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_qdxt1.h000066400000000000000000000113621503722002600213470ustar00rootroot00000000000000// File: crn_qdxt1.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_dxt.h" #include "crn_hash_map.h" #include "crn_clusterizer.h" #include "crn_hash.h" #include "crn_threaded_clusterizer.h" #include "crn_dxt_image.h" namespace crnlib { struct qdxt1_params { qdxt1_params() { clear(); } void clear() { m_quality_level = cMaxQuality; m_dxt_quality = cCRNDXTQualityUber; m_perceptual = true; m_dxt1a_alpha_threshold = 0; m_use_alpha_blocks = true; m_pProgress_func = NULL; m_pProgress_data = NULL; m_num_mips = 0; m_hierarchical = true; utils::zero_object(m_mip_desc); m_progress_start = 0; m_progress_range = 100; } void init(const dxt_image::pack_params& pp, int quality_level, bool hierarchical) { m_dxt_quality = pp.m_quality; m_hierarchical = hierarchical; m_perceptual = pp.m_perceptual; m_use_alpha_blocks = pp.m_use_both_block_types; m_quality_level = quality_level; m_dxt1a_alpha_threshold = pp.m_dxt1a_alpha_threshold; } enum { cMaxQuality = cCRNMaxQualityLevel }; uint m_quality_level; uint m_dxt1a_alpha_threshold; crn_dxt_quality m_dxt_quality; bool m_perceptual; bool m_use_alpha_blocks; bool m_hierarchical; struct mip_desc { uint m_first_block; uint m_block_width; uint m_block_height; }; uint m_num_mips; enum { cMaxMips = 128 }; mip_desc m_mip_desc[cMaxMips]; typedef bool (*progress_callback_func)(uint percentage_completed, void* pProgress_data); progress_callback_func m_pProgress_func; void* m_pProgress_data; uint m_progress_start; uint m_progress_range; }; class qdxt1 { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(qdxt1); public: qdxt1(task_pool& task_pool); ~qdxt1(); void clear(); bool init(uint n, const dxt_pixel_block* pBlocks, const qdxt1_params& params); uint get_num_blocks() const { return m_num_blocks; } const dxt_pixel_block* get_blocks() const { return m_pBlocks; } bool pack(dxt1_block* pDst_elements, uint elements_per_block, const qdxt1_params& params, float quality_power_mul); private: task_pool* m_pTask_pool; crn_thread_id_t m_main_thread_id; bool m_canceled; uint m_progress_start; uint m_progress_range; uint m_num_blocks; const dxt_pixel_block* m_pBlocks; dxt1_block* m_pDst_elements; uint m_elements_per_block; qdxt1_params m_params; uint m_max_selector_clusters; int m_prev_percentage_complete; typedef vec<6, float> vec6F; typedef clusterizer vec6F_clusterizer; vec6F_clusterizer m_endpoint_clusterizer; crnlib::vector > m_endpoint_cluster_indices; typedef vec<16, float> vec16F; typedef threaded_clusterizer vec16F_clusterizer; typedef vec16F_clusterizer::weighted_vec weighted_selector_vec; typedef vec16F_clusterizer::weighted_vec_array weighted_selector_vec_array; vec16F_clusterizer m_selector_clusterizer; crnlib::vector > m_cached_selector_cluster_indices[qdxt1_params::cMaxQuality + 1]; struct cluster_id { cluster_id() : m_hash(0) { } cluster_id(const crnlib::vector& indices) { set(indices); } void set(const crnlib::vector& indices) { m_cells.resize(indices.size()); for (uint i = 0; i < indices.size(); i++) m_cells[i] = static_cast(indices[i]); std::sort(m_cells.begin(), m_cells.end()); m_hash = fast_hash(&m_cells[0], sizeof(m_cells[0]) * m_cells.size()); } bool operator<(const cluster_id& rhs) const { return m_cells < rhs.m_cells; } bool operator==(const cluster_id& rhs) const { if (m_hash != rhs.m_hash) return false; return m_cells == rhs.m_cells; } crnlib::vector m_cells; size_t m_hash; operator size_t() const { return m_hash; } }; typedef crnlib::hash_map cluster_hash; cluster_hash m_cluster_hash; spinlock m_cluster_hash_lock; static bool generate_codebook_dummy_progress_callback(uint percentage_completed, void* pData); static bool generate_codebook_progress_callback(uint percentage_completed, void* pData); bool update_progress(uint value, uint max_value); void pack_endpoints_task(uint64 data, void* pData_ptr); void optimize_selectors_task(uint64 data, void* pData_ptr); bool create_selector_clusters(uint max_selector_clusters, crnlib::vector >& selector_cluster_indices); inline dxt1_block& get_block(uint index) const { return m_pDst_elements[index * m_elements_per_block]; } }; CRNLIB_DEFINE_BITWISE_MOVABLE(qdxt1::cluster_id); } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_qdxt5.cpp000066400000000000000000000627301503722002600217130ustar00rootroot00000000000000// File: crn_qdxt5.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_qdxt5.h" #include "crn_dxt5a.h" #include "crn_image.h" #include "crn_image_utils.h" #include "crn_dxt_fast.h" #include "crn_dxt_hc_common.h" #define QDXT5_DEBUGGING 0 namespace crnlib { qdxt5::qdxt5(task_pool& task_pool) : m_pTask_pool(&task_pool), m_main_thread_id(0), m_canceled(false), m_progress_start(0), m_progress_range(100), m_num_blocks(0), m_pBlocks(NULL), m_pDst_elements(NULL), m_elements_per_block(0), m_max_selector_clusters(0), m_prev_percentage_complete(-1), m_selector_clusterizer(task_pool) { } qdxt5::~qdxt5() { } void qdxt5::clear() { m_main_thread_id = 0; m_num_blocks = 0; m_pBlocks = 0; m_pDst_elements = NULL; m_elements_per_block = 0; m_params.clear(); m_endpoint_clusterizer.clear(); m_endpoint_cluster_indices.clear(); m_max_selector_clusters = 0; m_canceled = false; m_progress_start = 0; m_progress_range = 100; m_selector_clusterizer.clear(); for (uint i = 0; i <= qdxt5_params::cMaxQuality; i++) m_cached_selector_cluster_indices[i].clear(); m_cluster_hash.clear(); m_prev_percentage_complete = -1; } bool qdxt5::init(uint n, const dxt_pixel_block* pBlocks, const qdxt5_params& params) { clear(); CRNLIB_ASSERT(n && pBlocks); m_main_thread_id = crn_get_current_thread_id(); m_num_blocks = n; m_pBlocks = pBlocks; m_params = params; m_endpoint_clusterizer.reserve_training_vecs(m_num_blocks); m_progress_start = 0; m_progress_range = 75; image_u8 debug_img; const bool debugging = true; if ((m_params.m_hierarchical) && (m_params.m_num_mips)) { vec2F_clusterizer::training_vec_array& training_vecs = m_endpoint_clusterizer.get_training_vecs(); training_vecs.resize(m_num_blocks); uint encoding_hist[cNumChunkEncodings]; utils::zero_object(encoding_hist); uint total_processed_blocks = 0; uint next_progress_threshold = 512; for (uint level = 0; level < m_params.m_num_mips; level++) { const qdxt5_params::mip_desc& level_desc = m_params.m_mip_desc[level]; const uint num_chunks_x = (level_desc.m_block_width + cChunkBlockWidth - 1) / cChunkBlockWidth; const uint num_chunks_y = (level_desc.m_block_height + cChunkBlockHeight - 1) / cChunkBlockHeight; const uint level_width = level_desc.m_block_width * 4; const uint level_height = level_desc.m_block_height * 4; if (debugging) debug_img.resize(num_chunks_x * cChunkPixelWidth, num_chunks_y * cChunkPixelHeight); for (uint chunk_y = 0; chunk_y < num_chunks_y; chunk_y++) { for (uint chunk_x = 0; chunk_x < num_chunks_x; chunk_x++) { color_quad_u8 chunk_pixels[cChunkPixelWidth * cChunkPixelHeight]; for (uint y = 0; y < cChunkPixelHeight; y++) { const uint pix_y = math::minimum(chunk_y * cChunkPixelHeight + y, level_height - 1); const uint outer_block_index = level_desc.m_first_block + ((pix_y >> 2) * level_desc.m_block_width); for (uint x = 0; x < cChunkPixelWidth; x++) { const uint pix_x = math::minimum(chunk_x * cChunkPixelWidth + x, level_width - 1); const uint block_index = outer_block_index + (pix_x >> 2); const dxt_pixel_block& block = m_pBlocks[block_index]; const color_quad_u8& p = block.m_pixels[pix_y & 3][pix_x & 3]; chunk_pixels[x + y * 8] = p; } } struct layout_results { uint m_low_color; uint m_high_color; uint8 m_selectors[cChunkPixelWidth * cChunkPixelHeight]; uint64 m_error; //float m_penalty; }; layout_results layouts[cNumChunkTileLayouts]; for (uint l = 0; l < cNumChunkTileLayouts; l++) { const uint width = g_chunk_tile_layouts[l].m_width; const uint height = g_chunk_tile_layouts[l].m_height; const uint x_ofs = g_chunk_tile_layouts[l].m_x_ofs; const uint y_ofs = g_chunk_tile_layouts[l].m_y_ofs; color_quad_u8 layout_pixels[cChunkPixelWidth * cChunkPixelHeight]; for (uint y = 0; y < height; y++) for (uint x = 0; x < width; x++) layout_pixels[x + y * width] = chunk_pixels[(x_ofs + x) + (y_ofs + y) * cChunkPixelWidth]; const uint n = width * height; dxt_fast::compress_alpha_block(n, layout_pixels, layouts[l].m_low_color, layouts[l].m_high_color, layouts[l].m_selectors, m_params.m_comp_index); uint c[dxt5_block::cMaxSelectorValues]; dxt5_block::get_block_values(c, layouts[l].m_low_color, layouts[l].m_high_color); uint64 error = 0; for (uint i = 0; i < n; i++) error += math::square((int)layout_pixels[i][m_params.m_comp_index] - (int)c[layouts[l].m_selectors[i]]); layouts[l].m_error = error; } double best_peak_snr = -1.0f; uint best_encoding = 0; for (uint e = 0; e < cNumChunkEncodings; e++) { const chunk_encoding_desc& encoding_desc = g_chunk_encodings[e]; double total_error = 0; for (uint t = 0; t < encoding_desc.m_num_tiles; t++) total_error += (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_error; double mean_squared = total_error * (1.0f / 64.0f); double root_mean_squared = sqrt(mean_squared); double peak_snr = 999999.0f; if (mean_squared) peak_snr = math::clamp(log10(255.0f / root_mean_squared) * 20.0f, 0.0f, 500.0f); float adaptive_tile_alpha_psnr_derating = 2.4f; //if (level) // adaptive_tile_alpha_psnr_derating = math::lerp(adaptive_tile_alpha_psnr_derating * .5f, .3f, math::maximum((level - 1) / float(m_params.m_num_mips - 2), 1.0f)); if ((level) && (adaptive_tile_alpha_psnr_derating > .25f)) { adaptive_tile_alpha_psnr_derating = math::maximum(.25f, adaptive_tile_alpha_psnr_derating / powf(3.0f, static_cast(level))); } float alpha_derating = math::lerp(0.0f, adaptive_tile_alpha_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f); peak_snr = peak_snr - alpha_derating; //for (uint t = 0; t < encoding_desc.m_num_tiles; t++) // peak_snr -= (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_penalty; if (peak_snr > best_peak_snr) { best_peak_snr = peak_snr; best_encoding = e; } } encoding_hist[best_encoding]++; const chunk_encoding_desc& encoding_desc = g_chunk_encodings[best_encoding]; for (uint t = 0; t < encoding_desc.m_num_tiles; t++) { const chunk_tile_desc& tile_desc = encoding_desc.m_tiles[t]; uint layout_index = tile_desc.m_layout_index; const layout_results& layout = layouts[layout_index]; uint c[dxt5_block::cMaxSelectorValues]; if (debugging) dxt5_block::get_block_values(c, layout.m_low_color, layout.m_high_color); color_quad_u8 tile_pixels[cChunkPixelWidth * cChunkPixelHeight]; for (uint y = 0; y < tile_desc.m_height; y++) { const uint pix_y = y + tile_desc.m_y_ofs; for (uint x = 0; x < tile_desc.m_width; x++) { const uint pix_x = x + tile_desc.m_x_ofs; uint a = chunk_pixels[pix_x + pix_y * cChunkPixelWidth][m_params.m_comp_index]; tile_pixels[x + y * tile_desc.m_width].set(a, a, a, 255); if (debugging) debug_img(chunk_x * 8 + pix_x, chunk_y * 8 + pix_y) = c[layout.m_selectors[x + y * tile_desc.m_width]]; } } color_quad_u8 l, h; dxt_fast::find_representative_colors(tile_desc.m_width * tile_desc.m_height, tile_pixels, l, h); const uint dist = math::square((int)l[0] - (int)h[0]); const int cAlphaErrorToWeight = 8; const uint cMaxWeight = 8; uint weight = math::clamp(dist / cAlphaErrorToWeight, 1, cMaxWeight); vec2F ev; ev[0] = l[0]; ev[1] = h[0]; for (uint y = 0; y < (tile_desc.m_height >> 2); y++) { uint block_y = chunk_y * cChunkBlockHeight + y + (tile_desc.m_y_ofs >> 2); if (block_y >= level_desc.m_block_height) continue; for (uint x = 0; x < (tile_desc.m_width >> 2); x++) { uint block_x = chunk_x * cChunkBlockWidth + x + (tile_desc.m_x_ofs >> 2); if (block_x >= level_desc.m_block_width) break; uint block_index = level_desc.m_first_block + block_x + block_y * level_desc.m_block_width; training_vecs[block_index].first = ev; training_vecs[block_index].second = weight; total_processed_blocks++; } // x } // y } //t if (total_processed_blocks >= next_progress_threshold) { next_progress_threshold += 512; if (!update_progress(total_processed_blocks, m_num_blocks - 1)) return false; } } // chunk_x } // chunk_y #if QDXT5_DEBUGGING if (debugging) image_utils::write_to_file(dynamic_wstring(cVarArg, "debug_%u.tga", level).get_ptr(), debug_img, image_utils::cWriteFlagIgnoreAlpha); #endif } // level #if 0 trace("chunk encoding hist: "); for (uint i = 0; i < cNumChunkEncodings; i++) trace("%u ", encoding_hist[i]); trace("\n"); #endif } else { for (uint block_index = 0; block_index < m_num_blocks; block_index++) { if ((block_index & 511) == 0) { if (!update_progress(block_index, m_num_blocks - 1)) return false; } color_quad_u8 c[16]; for (uint y = 0; y < cDXTBlockSize; y++) for (uint x = 0; x < cDXTBlockSize; x++) c[x + y * cDXTBlockSize].set(m_pBlocks[block_index].m_pixels[y][x][m_params.m_comp_index], 255); color_quad_u8 l, h; dxt_fast::find_representative_colors(cDXTBlockSize * cDXTBlockSize, c, l, h); const uint dist = math::square((int)l[0] - (int)h[0]); const int cAlphaErrorToWeight = 8; const uint cMaxWeight = 8; uint weight = math::clamp(dist / cAlphaErrorToWeight, 1, cMaxWeight); vec2F ev; ev[0] = l[0]; ev[1] = h[0]; m_endpoint_clusterizer.add_training_vec(ev, weight); } } const uint cMaxEndpointClusters = 65535U; m_progress_start = 75; m_progress_range = 20; if (!m_endpoint_clusterizer.generate_codebook(cMaxEndpointClusters, generate_codebook_progress_callback, this)) return false; crnlib::hash_map selector_hash; m_progress_start = 95; m_progress_range = 5; for (uint block_index = 0; block_index < m_num_blocks; block_index++) { if ((block_index & 511) == 0) { if (!update_progress(block_index, m_num_blocks - 1)) return false; } dxt5_block dxt_blk; dxt_fast::compress_alpha_block(&dxt_blk, &m_pBlocks[block_index].m_pixels[0][0], m_params.m_comp_index); uint64 selectors = 0; for (uint i = 0; i < dxt5_block::cNumSelectorBytes; i++) selectors |= static_cast(dxt_blk.m_selectors[i]) << (i * 8U); selector_hash.insert(selectors); } m_max_selector_clusters = selector_hash.size() + 128; update_progress(1, 1); return true; } bool qdxt5::update_progress(uint value, uint max_value) { if (!m_params.m_pProgress_func) return true; uint percentage = max_value ? (m_progress_start + (value * m_progress_range + (max_value / 2)) / max_value) : 100; if ((int)percentage == m_prev_percentage_complete) return true; m_prev_percentage_complete = percentage; if (!m_params.m_pProgress_func(m_params.m_progress_start + (percentage * m_params.m_progress_range) / 100U, m_params.m_pProgress_data)) { m_canceled = true; return false; } return true; } void qdxt5::pack_endpoints_task(uint64 data, void*) { const uint thread_index = static_cast(data); crnlib::vector cluster_pixels; cluster_pixels.reserve(1024); crnlib::vector selectors; selectors.reserve(1024); dxt5_endpoint_optimizer optimizer; dxt5_endpoint_optimizer::params p; dxt5_endpoint_optimizer::results r; p.m_quality = m_params.m_dxt_quality; p.m_comp_index = m_params.m_comp_index; p.m_use_both_block_types = m_params.m_use_both_block_types; uint cluster_index_progress_mask = math::next_pow2(m_endpoint_cluster_indices.size() / 100); cluster_index_progress_mask /= 2; cluster_index_progress_mask = math::maximum(cluster_index_progress_mask, 8); cluster_index_progress_mask -= 1; for (uint cluster_index = 0; cluster_index < m_endpoint_cluster_indices.size(); cluster_index++) { if (m_canceled) return; if ((cluster_index & cluster_index_progress_mask) == 0) { if (crn_get_current_thread_id() == m_main_thread_id) { if (!update_progress(cluster_index, m_endpoint_cluster_indices.size() - 1)) return; } } if (m_pTask_pool->get_num_threads()) { if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) continue; } const crnlib::vector& cluster_indices = m_endpoint_cluster_indices[cluster_index]; selectors.resize(cluster_indices.size() * cDXTBlockSize * cDXTBlockSize); cluster_pixels.resize(cluster_indices.size() * cDXTBlockSize * cDXTBlockSize); color_quad_u8* pDst = &cluster_pixels[0]; for (uint block_iter = 0; block_iter < cluster_indices.size(); block_iter++) { const uint block_index = cluster_indices[block_iter]; //const color_quad_u8* pSrc_pixels = &m_pBlocks[block_index].m_pixels[0][0]; const color_quad_u8* pSrc_pixels = (const color_quad_u8*)m_pBlocks[block_index].m_pixels; for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { const color_quad_u8& src = pSrc_pixels[i]; *pDst++ = src; } } p.m_block_index = cluster_index; p.m_num_pixels = cluster_pixels.size(); p.m_pPixels = cluster_pixels.begin(); r.m_pSelectors = selectors.begin(); uint low_color; uint high_color; if (m_params.m_dxt_quality != cCRNDXTQualitySuperFast) { optimizer.compute(p, r); low_color = r.m_first_endpoint; high_color = r.m_second_endpoint; } else { dxt_fast::compress_alpha_block(cluster_pixels.size(), cluster_pixels.begin(), low_color, high_color, selectors.begin(), m_params.m_comp_index); } const uint8* pSrc_selectors = selectors.begin(); for (uint block_iter = 0; block_iter < cluster_indices.size(); block_iter++) { const uint block_index = cluster_indices[block_iter]; dxt5_block& dxt_block = get_block(block_index); dxt_block.set_low_alpha(low_color); dxt_block.set_high_alpha(high_color); for (uint y = 0; y < 4; y++) for (uint x = 0; x < 4; x++) dxt_block.set_selector(x, y, *pSrc_selectors++); } } } struct optimize_selectors_params { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(optimize_selectors_params); optimize_selectors_params( crnlib::vector >& selector_cluster_indices) : m_selector_cluster_indices(selector_cluster_indices) { } crnlib::vector >& m_selector_cluster_indices; }; void qdxt5::optimize_selectors_task(uint64 data, void* pData_ptr) { const uint thread_index = static_cast(data); optimize_selectors_params& task_params = *static_cast(pData_ptr); crnlib::vector block_categories[2]; block_categories[0].reserve(2048); block_categories[1].reserve(2048); for (uint cluster_index = 0; cluster_index < task_params.m_selector_cluster_indices.size(); cluster_index++) { if (m_canceled) return; if ((cluster_index & 255) == 0) { if (crn_get_current_thread_id() == m_main_thread_id) { if (!update_progress(cluster_index, task_params.m_selector_cluster_indices.size() - 1)) return; } } if (m_pTask_pool->get_num_threads()) { if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) continue; } const crnlib::vector& selector_indices = task_params.m_selector_cluster_indices[cluster_index]; if (selector_indices.size() <= 1) continue; block_categories[0].resize(0); block_categories[1].resize(0); for (uint block_iter = 0; block_iter < selector_indices.size(); block_iter++) { const uint block_index = selector_indices[block_iter]; const dxt5_block& src_block = get_block(block_index); block_categories[src_block.is_alpha6_block()].push_back(block_index); } dxt5_block blk; utils::zero_object(blk); for (uint block_type = 0; block_type <= 1; block_type++) { const crnlib::vector& block_indices = block_categories[block_type]; if (block_indices.size() <= 1) continue; for (uint y = 0; y < cDXTBlockSize; y++) { for (uint x = 0; x < cDXTBlockSize; x++) { uint best_s = 0; uint64 best_error = 0xFFFFFFFFFFULL; for (uint s = 0; s < dxt5_block::cMaxSelectorValues; s++) { uint64 total_error = 0; for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++) { const uint block_index = block_indices[block_iter]; const color_quad_u8& orig_color = m_pBlocks[block_index].m_pixels[y][x]; const dxt5_block& dst_block = get_block(block_index); uint values[dxt5_block::cMaxSelectorValues]; dxt5_block::get_block_values(values, dst_block.get_low_alpha(), dst_block.get_high_alpha()); int error = math::square((int)orig_color[m_params.m_comp_index] - (int)values[s]); total_error += error; } if (total_error < best_error) { best_error = total_error; best_s = s; } } blk.set_selector(x, y, best_s); } // x } // y for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++) { const uint block_index = block_indices[block_iter]; dxt5_block& dst_block = get_block(block_index); memcpy(dst_block.m_selectors, blk.m_selectors, sizeof(dst_block.m_selectors)); } } } // cluster_index } bool qdxt5::generate_codebook_progress_callback(uint percentage_completed, void* pData) { return static_cast(pData)->update_progress(percentage_completed, 100U); } bool qdxt5::create_selector_clusters(uint max_selector_clusters, crnlib::vector >& selector_cluster_indices) { weighted_selector_vec_array selector_vecs[2]; crnlib::vector selector_vec_remap[2]; for (uint block_type = 0; block_type < 2; block_type++) { for (uint block_iter = 0; block_iter < m_num_blocks; block_iter++) { dxt5_block& dxt5_block = get_block(block_iter); if ((uint)dxt5_block.is_alpha6_block() != block_type) continue; vec16F sv; float* pDst = &sv[0]; bool uses_absolute_values = false; for (uint y = 0; y < 4; y++) { for (uint x = 0; x < 4; x++) { const uint s = dxt5_block.get_selector(x, y); float f; if (dxt5_block.is_alpha6_block()) { if (s >= 6) { uses_absolute_values = true; f = 0.0f; } else f = g_dxt5_alpha6_to_linear[s]; } else f = g_dxt5_to_linear[s]; *pDst++ = f; } } if (uses_absolute_values) continue; int low_alpha = dxt5_block.get_low_alpha(); int high_alpha = dxt5_block.get_high_alpha(); int dist = math::square(low_alpha - high_alpha); const uint cAlphaDistToWeight = 8; const uint cMaxWeight = 2048; uint weight = math::clamp(dist / cAlphaDistToWeight, 1, cMaxWeight); selector_vecs[block_type].resize(selector_vecs[block_type].size() + 1); selector_vecs[block_type].back().m_vec = sv; selector_vecs[block_type].back().m_weight = weight; selector_vec_remap[block_type].push_back(block_iter); } } selector_cluster_indices.clear(); for (uint block_type = 0; block_type < 2; block_type++) { if (selector_vecs[block_type].empty()) continue; if ((selector_vecs[block_type].size() / (float)m_num_blocks) < .01f) continue; uint max_clusters = static_cast((math::emulu(selector_vecs[block_type].size(), max_selector_clusters) + (m_num_blocks - 1)) / m_num_blocks); max_clusters = math::minimum(math::maximum(64U, max_clusters), selector_vecs[block_type].size()); if (max_clusters >= selector_vecs[block_type].size()) continue; #if QDXT5_DEBUGGING trace("max_clusters (%u): %u\n", block_type, max_clusters); #endif crnlib::vector > block_type_selector_cluster_indices; if (!block_type) { m_progress_start = m_progress_range; m_progress_range = 16; } else { m_progress_start = m_progress_range + 16; m_progress_range = 17; } if (!m_selector_clusterizer.create_clusters( selector_vecs[block_type], max_clusters, block_type_selector_cluster_indices, generate_codebook_progress_callback, this)) { return false; } const uint first_cluster = selector_cluster_indices.size(); selector_cluster_indices.enlarge(block_type_selector_cluster_indices.size()); for (uint i = 0; i < block_type_selector_cluster_indices.size(); i++) { crnlib::vector& indices = selector_cluster_indices[first_cluster + i]; indices.swap(block_type_selector_cluster_indices[i]); for (uint j = 0; j < indices.size(); j++) indices.at(j) = selector_vec_remap[block_type][indices.at(j)]; } } return true; } bool qdxt5::pack(dxt5_block* pDst_elements, uint elements_per_block, const qdxt5_params& params) { CRNLIB_ASSERT(m_num_blocks); m_main_thread_id = crn_get_current_thread_id(); m_canceled = false; m_pDst_elements = pDst_elements; m_elements_per_block = elements_per_block; m_params = params; m_prev_percentage_complete = -1; CRNLIB_ASSERT(m_params.m_quality_level <= qdxt5_params::cMaxQuality); const float quality = m_params.m_quality_level / (float)qdxt5_params::cMaxQuality; const float endpoint_quality = powf(quality, 2.1f); const float selector_quality = powf(quality, 1.65f); const uint max_endpoint_clusters = math::clamp(static_cast(m_endpoint_clusterizer.get_codebook_size() * endpoint_quality), 16U, m_endpoint_clusterizer.get_codebook_size()); const uint max_selector_clusters = math::clamp(static_cast(m_max_selector_clusters * selector_quality), 32U, m_max_selector_clusters); #if QDXT5_DEBUGGING trace("max endpoint clusters: %u\n", max_endpoint_clusters); trace("max selector clusters: %u\n", max_selector_clusters); #endif if (quality >= 1.0f) { m_endpoint_cluster_indices.resize(m_num_blocks); for (uint i = 0; i < m_num_blocks; i++) { m_endpoint_cluster_indices[i].resize(1); m_endpoint_cluster_indices[i][0] = i; } } else m_endpoint_clusterizer.retrieve_clusters(max_endpoint_clusters, m_endpoint_cluster_indices); uint max_blocks = 0; for (uint i = 0; i < m_endpoint_cluster_indices.size(); i++) { uint num = m_endpoint_cluster_indices[i].size(); max_blocks = math::maximum(max_blocks, num); } crnlib::vector >& selector_cluster_indices = m_cached_selector_cluster_indices[params.m_quality_level]; m_progress_start = 0; if (quality >= 1.0f) m_progress_range = 100; else if (selector_cluster_indices.empty()) m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 33; else m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 50; for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) m_pTask_pool->queue_object_task(this, &qdxt5::pack_endpoints_task, i); m_pTask_pool->join(); if (m_canceled) return false; if (quality >= 1.0f) return true; if (selector_cluster_indices.empty()) { create_selector_clusters(max_selector_clusters, selector_cluster_indices); if (m_canceled) { selector_cluster_indices.clear(); return false; } } m_progress_start += m_progress_range; m_progress_range = 100 - m_progress_start; optimize_selectors_params optimize_selectors_task_params(selector_cluster_indices); for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) m_pTask_pool->queue_object_task(this, &qdxt5::optimize_selectors_task, i, &optimize_selectors_task_params); m_pTask_pool->join(); return !m_canceled; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_qdxt5.h000066400000000000000000000111061503722002600213470ustar00rootroot00000000000000// File: crn_qdxt5.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_hash_map.h" #include "crn_clusterizer.h" #include "crn_hash.h" #include "crn_threaded_clusterizer.h" #include "crn_dxt.h" #include "crn_dxt_image.h" namespace crnlib { struct qdxt5_params { qdxt5_params() { clear(); } void clear() { m_quality_level = cMaxQuality; m_dxt_quality = cCRNDXTQualityUber; m_pProgress_func = NULL; m_pProgress_data = NULL; m_num_mips = 0; m_hierarchical = true; utils::zero_object(m_mip_desc); m_comp_index = 3; m_progress_start = 0; m_progress_range = 100; m_use_both_block_types = true; } void init(const dxt_image::pack_params& pp, int quality_level, bool hierarchical, int comp_index = 3) { m_dxt_quality = pp.m_quality; m_hierarchical = hierarchical; m_comp_index = comp_index; m_use_both_block_types = pp.m_use_both_block_types; m_quality_level = quality_level; } enum { cMaxQuality = cCRNMaxQualityLevel }; uint m_quality_level; crn_dxt_quality m_dxt_quality; bool m_hierarchical; struct mip_desc { uint m_first_block; uint m_block_width; uint m_block_height; }; uint m_num_mips; enum { cMaxMips = 128 }; mip_desc m_mip_desc[cMaxMips]; typedef bool (*progress_callback_func)(uint percentage_completed, void* pProgress_data); progress_callback_func m_pProgress_func; void* m_pProgress_data; uint m_progress_start; uint m_progress_range; uint m_comp_index; bool m_use_both_block_types; }; class qdxt5 { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(qdxt5); public: qdxt5(task_pool& task_pool); ~qdxt5(); void clear(); bool init(uint n, const dxt_pixel_block* pBlocks, const qdxt5_params& params); uint get_num_blocks() const { return m_num_blocks; } const dxt_pixel_block* get_blocks() const { return m_pBlocks; } bool pack(dxt5_block* pDst_elements, uint elements_per_block, const qdxt5_params& params); private: task_pool* m_pTask_pool; crn_thread_id_t m_main_thread_id; bool m_canceled; uint m_progress_start; uint m_progress_range; uint m_num_blocks; const dxt_pixel_block* m_pBlocks; dxt5_block* m_pDst_elements; uint m_elements_per_block; qdxt5_params m_params; uint m_max_selector_clusters; int m_prev_percentage_complete; typedef vec<2, float> vec2F; typedef clusterizer vec2F_clusterizer; vec2F_clusterizer m_endpoint_clusterizer; crnlib::vector > m_endpoint_cluster_indices; typedef vec<16, float> vec16F; typedef threaded_clusterizer vec16F_clusterizer; typedef vec16F_clusterizer::weighted_vec weighted_selector_vec; typedef vec16F_clusterizer::weighted_vec_array weighted_selector_vec_array; vec16F_clusterizer m_selector_clusterizer; crnlib::vector > m_cached_selector_cluster_indices[qdxt5_params::cMaxQuality + 1]; struct cluster_id { cluster_id() : m_hash(0) { } cluster_id(const crnlib::vector& indices) { set(indices); } void set(const crnlib::vector& indices) { m_cells.resize(indices.size()); for (uint i = 0; i < indices.size(); i++) m_cells[i] = static_cast(indices[i]); std::sort(m_cells.begin(), m_cells.end()); m_hash = fast_hash(&m_cells[0], sizeof(m_cells[0]) * m_cells.size()); } bool operator<(const cluster_id& rhs) const { return m_cells < rhs.m_cells; } bool operator==(const cluster_id& rhs) const { if (m_hash != rhs.m_hash) return false; return m_cells == rhs.m_cells; } crnlib::vector m_cells; size_t m_hash; operator size_t() const { return m_hash; } }; typedef crnlib::hash_map cluster_hash; cluster_hash m_cluster_hash; spinlock m_cluster_hash_lock; static bool generate_codebook_dummy_progress_callback(uint percentage_completed, void* pData); static bool generate_codebook_progress_callback(uint percentage_completed, void* pData); bool update_progress(uint value, uint max_value); void pack_endpoints_task(uint64 data, void* pData_ptr); void optimize_selectors_task(uint64 data, void* pData_ptr); bool create_selector_clusters(uint max_selector_clusters, crnlib::vector >& selector_cluster_indices); inline dxt5_block& get_block(uint index) const { return m_pDst_elements[index * m_elements_per_block]; } }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_radix_sort.h000066400000000000000000000163421503722002600224670ustar00rootroot00000000000000// File: crn_radix_sort.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once namespace crnlib { // Returns pointer to sorted array. template T* radix_sort(uint num_vals, T* pBuf0, T* pBuf1, uint key_ofs, uint key_size) { CRNLIB_ASSERT_OPEN_RANGE(key_ofs, 0, sizeof(T)); CRNLIB_ASSERT_CLOSED_RANGE(key_size, 1, 4); uint hist[256 * 4]; memset(hist, 0, sizeof(hist[0]) * 256 * key_size); #define CRNLIB_GET_KEY(p) (*(uint*)((uint8*)(p) + key_ofs)) if (key_size == 4) { T* p = pBuf0; T* q = pBuf0 + num_vals; for (; p != q; p++) { const uint key = CRNLIB_GET_KEY(p); hist[key & 0xFF]++; hist[256 + ((key >> 8) & 0xFF)]++; hist[512 + ((key >> 16) & 0xFF)]++; hist[768 + ((key >> 24) & 0xFF)]++; } } else if (key_size == 3) { T* p = pBuf0; T* q = pBuf0 + num_vals; for (; p != q; p++) { const uint key = CRNLIB_GET_KEY(p); hist[key & 0xFF]++; hist[256 + ((key >> 8) & 0xFF)]++; hist[512 + ((key >> 16) & 0xFF)]++; } } else if (key_size == 2) { T* p = pBuf0; T* q = pBuf0 + (num_vals >> 1) * 2; for (; p != q; p += 2) { const uint key0 = CRNLIB_GET_KEY(p); const uint key1 = CRNLIB_GET_KEY(p + 1); hist[key0 & 0xFF]++; hist[256 + ((key0 >> 8) & 0xFF)]++; hist[key1 & 0xFF]++; hist[256 + ((key1 >> 8) & 0xFF)]++; } if (num_vals & 1) { const uint key = CRNLIB_GET_KEY(p); hist[key & 0xFF]++; hist[256 + ((key >> 8) & 0xFF)]++; } } else { CRNLIB_ASSERT(key_size == 1); if (key_size != 1) return NULL; T* p = pBuf0; T* q = pBuf0 + (num_vals >> 1) * 2; for (; p != q; p += 2) { const uint key0 = CRNLIB_GET_KEY(p); const uint key1 = CRNLIB_GET_KEY(p + 1); hist[key0 & 0xFF]++; hist[key1 & 0xFF]++; } if (num_vals & 1) { const uint key = CRNLIB_GET_KEY(p); hist[key & 0xFF]++; } } T* pCur = pBuf0; T* pNew = pBuf1; for (uint pass = 0; pass < key_size; pass++) { const uint* pHist = &hist[pass << 8]; uint offsets[256]; uint cur_ofs = 0; for (uint i = 0; i < 256; i += 2) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; offsets[i + 1] = cur_ofs; cur_ofs += pHist[i + 1]; } const uint pass_shift = pass << 3; T* p = pCur; T* q = pCur + (num_vals >> 1) * 2; for (; p != q; p += 2) { uint c0 = (CRNLIB_GET_KEY(p) >> pass_shift) & 0xFF; uint c1 = (CRNLIB_GET_KEY(p + 1) >> pass_shift) & 0xFF; if (c0 == c1) { uint dst_offset0 = offsets[c0]; offsets[c0] = dst_offset0 + 2; pNew[dst_offset0] = p[0]; pNew[dst_offset0 + 1] = p[1]; } else { uint dst_offset0 = offsets[c0]++; uint dst_offset1 = offsets[c1]++; pNew[dst_offset0] = p[0]; pNew[dst_offset1] = p[1]; } } if (num_vals & 1) { uint c = (CRNLIB_GET_KEY(p) >> pass_shift) & 0xFF; uint dst_offset = offsets[c]; offsets[c] = dst_offset + 1; pNew[dst_offset] = *p; } T* t = pCur; pCur = pNew; pNew = t; } return pCur; } #undef CRNLIB_GET_KEY // Returns pointer to sorted array. template T* indirect_radix_sort(uint num_indices, T* pIndices0, T* pIndices1, const Q* pKeys, uint key_ofs, uint key_size, bool init_indices) { CRNLIB_ASSERT_OPEN_RANGE(key_ofs, 0, sizeof(T)); CRNLIB_ASSERT_CLOSED_RANGE(key_size, 1, 4); if (init_indices) { T* p = pIndices0; T* q = pIndices0 + (num_indices >> 1) * 2; uint i; for (i = 0; p != q; p += 2, i += 2) { p[0] = static_cast(i); p[1] = static_cast(i + 1); } if (num_indices & 1) *p = static_cast(i); } uint hist[256 * 4]; memset(hist, 0, sizeof(hist[0]) * 256 * key_size); #define CRNLIB_GET_KEY(p) (*(const uint*)((const uint8*)(pKeys + *(p)) + key_ofs)) #define CRNLIB_GET_KEY_FROM_INDEX(i) (*(const uint*)((const uint8*)(pKeys + (i)) + key_ofs)) if (key_size == 4) { T* p = pIndices0; T* q = pIndices0 + num_indices; for (; p != q; p++) { const uint key = CRNLIB_GET_KEY(p); hist[key & 0xFF]++; hist[256 + ((key >> 8) & 0xFF)]++; hist[512 + ((key >> 16) & 0xFF)]++; hist[768 + ((key >> 24) & 0xFF)]++; } } else if (key_size == 3) { T* p = pIndices0; T* q = pIndices0 + num_indices; for (; p != q; p++) { const uint key = CRNLIB_GET_KEY(p); hist[key & 0xFF]++; hist[256 + ((key >> 8) & 0xFF)]++; hist[512 + ((key >> 16) & 0xFF)]++; } } else if (key_size == 2) { T* p = pIndices0; T* q = pIndices0 + (num_indices >> 1) * 2; for (; p != q; p += 2) { const uint key0 = CRNLIB_GET_KEY(p); const uint key1 = CRNLIB_GET_KEY(p + 1); hist[key0 & 0xFF]++; hist[256 + ((key0 >> 8) & 0xFF)]++; hist[key1 & 0xFF]++; hist[256 + ((key1 >> 8) & 0xFF)]++; } if (num_indices & 1) { const uint key = CRNLIB_GET_KEY(p); hist[key & 0xFF]++; hist[256 + ((key >> 8) & 0xFF)]++; } } else { CRNLIB_ASSERT(key_size == 1); if (key_size != 1) return NULL; T* p = pIndices0; T* q = pIndices0 + (num_indices >> 1) * 2; for (; p != q; p += 2) { const uint key0 = CRNLIB_GET_KEY(p); const uint key1 = CRNLIB_GET_KEY(p + 1); hist[key0 & 0xFF]++; hist[key1 & 0xFF]++; } if (num_indices & 1) { const uint key = CRNLIB_GET_KEY(p); hist[key & 0xFF]++; } } T* pCur = pIndices0; T* pNew = pIndices1; for (uint pass = 0; pass < key_size; pass++) { const uint* pHist = &hist[pass << 8]; uint offsets[256]; uint cur_ofs = 0; for (uint i = 0; i < 256; i += 2) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; offsets[i + 1] = cur_ofs; cur_ofs += pHist[i + 1]; } const uint pass_shift = pass << 3; T* p = pCur; T* q = pCur + (num_indices >> 1) * 2; for (; p != q; p += 2) { uint index0 = p[0]; uint index1 = p[1]; uint c0 = (CRNLIB_GET_KEY_FROM_INDEX(index0) >> pass_shift) & 0xFF; uint c1 = (CRNLIB_GET_KEY_FROM_INDEX(index1) >> pass_shift) & 0xFF; if (c0 == c1) { uint dst_offset0 = offsets[c0]; offsets[c0] = dst_offset0 + 2; pNew[dst_offset0] = static_cast(index0); pNew[dst_offset0 + 1] = static_cast(index1); } else { uint dst_offset0 = offsets[c0]++; uint dst_offset1 = offsets[c1]++; pNew[dst_offset0] = static_cast(index0); pNew[dst_offset1] = static_cast(index1); } } if (num_indices & 1) { uint index = *p; uint c = (CRNLIB_GET_KEY_FROM_INDEX(index) >> pass_shift) & 0xFF; uint dst_offset = offsets[c]; offsets[c] = dst_offset + 1; pNew[dst_offset] = static_cast(index); } T* t = pCur; pCur = pNew; pNew = t; } return pCur; } #undef CRNLIB_GET_KEY #undef CRNLIB_GET_KEY_FROM_INDEX } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_rand.cpp000066400000000000000000000202161503722002600215630ustar00rootroot00000000000000// File: crn_rand.cpp // See Copyright Notice and license at the end of inc/crnlib.h // See: // http://www.ciphersbyritter.com/NEWS4/RANDC.HTM // http://burtleburtle.net/bob/rand/smallprng.html // http://www.cs.ucl.ac.uk/staff/d.jones/GoodPracticeRNG.pdf // See GPG7, page 120, or https://www.lomont.org/papers/2008/Lomont_PRNG_2008.pdf #include "crn_core.h" #include "crn_rand.h" #include "crn_hash.h" #define znew (z = 36969 * (z & 65535) + (z >> 16)) #define wnew (w = 18000 * (w & 65535) + (w >> 16)) #define MWC ((znew << 16) + wnew) #define SHR3 (jsr ^= (jsr << 17), jsr ^= (jsr >> 13), jsr ^= (jsr << 5)) #define CONG (jcong = 69069 * jcong + 1234567) #define FIB ((b = a + b), (a = b - a)) #define KISS ((MWC ^ CONG) + SHR3) #define LFIB4 (c++, t[c] = t[c] + t[UC(c + 58)] + t[UC(c + 119)] + t[UC(c + 178)]) #define SWB (c++, bro = (x < y), t[c] = (x = t[UC(c + 34)]) - (y = t[UC(c + 19)] + bro)) #define UNI (KISS * 2.328306e-10) #define VNI ((long)KISS) * 4.656613e-10 #define UC (unsigned char)/*a cast operation*/ //#define rot(x,k) (((x)<<(k))|((x)>>(32-(k)))) #define rot(x, k) CRNLIB_ROTATE_LEFT(x, k) namespace crnlib { static const double cNorm = 1.0 / (double)0x100000000ULL; kiss99::kiss99() { x = 123456789; y = 362436000; z = 521288629; c = 7654321; } void kiss99::seed(uint32 i, uint32 j, uint32 k) { x = i; y = j; z = k; c = 7654321; } inline uint32 kiss99::next() { x = 69069 * x + 12345; y ^= (y << 13); y ^= (y >> 17); y ^= (y << 5); uint64 t = c; t += (698769069ULL * z); c = static_cast(t >> 32); z = static_cast(t); return (x + y + z); } inline uint32 ranctx::next() { uint32 e = a - rot(b, 27); a = b ^ rot(c, 17); b = c + d; c = d + e; d = e + a; return d; } void ranctx::seed(uint32 seed) { a = 0xf1ea5eed, b = c = d = seed; for (uint32 i = 0; i < 20; ++i) next(); } well512::well512() { seed(0xDEADBE3F); } void well512::seed(uint32 seed[well512::cStateSize]) { memcpy(m_state, seed, sizeof(m_state)); m_index = 0; } void well512::seed(uint32 seed) { uint32 jsr = utils::swap32(seed) ^ 0xAAC29377; for (uint i = 0; i < cStateSize; i++) { SHR3; seed = bitmix32c(seed); m_state[i] = seed ^ jsr; } m_index = 0; } void well512::seed(uint32 seed1, uint32 seed2, uint32 seed3) { uint32 jsr = seed2; uint32 jcong = seed3; for (uint i = 0; i < cStateSize; i++) { SHR3; seed1 = bitmix32c(seed1); CONG; m_state[i] = seed1 ^ jsr ^ jcong; } m_index = 0; } inline uint32 well512::next() { uint32 a, b, c, d; a = m_state[m_index]; c = m_state[(m_index + 13) & 15]; b = a ^ c ^ (a << 16) ^ (c << 15); c = m_state[(m_index + 9) & 15]; c ^= (c >> 11); a = m_state[m_index] = b ^ c; d = a ^ ((a << 5) & 0xDA442D20UL); m_index = (m_index + 15) & 15; a = m_state[m_index]; m_state[m_index] = a ^ b ^ d ^ (a << 2) ^ (b << 18) ^ (c << 28); return m_state[m_index]; } random::random() { seed(12345, 65435, 34221); } random::random(uint32 i) { seed(i); } void random::seed(uint32 i1, uint32 i2, uint32 i3) { m_ranctx.seed(i1 ^ i2 ^ i3); m_kiss99.seed(i1, i2, i3); m_well512.seed(i1, i2, i3); for (uint i = 0; i < 100; i++) urand32(); } void random::seed(uint32 i) { uint32 jsr = i; SHR3; SHR3; uint32 jcong = utils::swap32(~jsr); CONG; CONG; uint32 i1 = SHR3 ^ CONG; uint32 i2 = SHR3 ^ CONG; uint32 i3 = SHR3 + CONG; seed(i1, i2, i3); } uint32 random::urand32() { return m_kiss99.next() ^ (m_ranctx.next() + m_well512.next()); } uint64 random::urand64() { uint64 result = urand32(); result <<= 32ULL; result |= urand32(); return result; } uint32 random::fast_urand32() { return m_well512.next(); } uint32 random::bit() { uint32 k = urand32(); return (k ^ (k >> 6) ^ (k >> 10) ^ (k >> 30)) & 1; } double random::drand(double l, double h) { CRNLIB_ASSERT(l <= h); if (l >= h) return l; return math::clamp(l + (h - l) * (urand32() * cNorm), l, h); } float random::frand(float l, float h) { CRNLIB_ASSERT(l <= h); if (l >= h) return l; float r = static_cast(l + (h - l) * (urand32() * cNorm)); return math::clamp(r, l, h); } int random::irand(int l, int h) { CRNLIB_ASSERT(l < h); if (l >= h) return l; uint32 range = static_cast(h - l); uint32 rnd = urand32(); #if defined(_M_IX86) && defined(_MSC_VER) //uint32 rnd_range = static_cast(__emulu(range, rnd) >> 32U); uint32 x[2]; *reinterpret_cast(x) = __emulu(range, rnd); uint32 rnd_range = x[1]; #else uint32 rnd_range = static_cast((((uint64)range) * ((uint64)rnd)) >> 32U); #endif int result = l + rnd_range; CRNLIB_ASSERT((result >= l) && (result < h)); return result; } int random::irand_inclusive(int l, int h) { CRNLIB_ASSERT(h < cINT32_MAX); return irand(l, h + 1); } /* ALGORITHM 712, COLLECTED ALGORITHMS FROM ACM. THIS WORK PUBLISHED IN TRANSACTIONS ON MATHEMATICAL SOFTWARE, VOL. 18, NO. 4, DECEMBER, 1992, PP. 434-435. The function returns a normally distributed pseudo-random number with a given mean and standard devaiation. Calls are made to a function subprogram which must return independent random numbers uniform in the interval (0,1). The algorithm uses the ratio of uniforms method of A.J. Kinderman and J.F. Monahan augmented with quadratic bounding curves. */ double random::gaussian(double mean, double stddev) { double q, u, v, x, y; /* Generate P = (u,v) uniform in rect. enclosing acceptance region Make sure that any random numbers <= 0 are rejected, since gaussian() requires uniforms > 0, but RandomUniform() delivers >= 0. */ do { u = drand(0, 1); v = drand(0, 1); if (u <= 0.0 || v <= 0.0) { u = 1.0; v = 1.0; } v = 1.7156 * (v - 0.5); /* Evaluate the quadratic form */ x = u - 0.449871; y = fabs(v) + 0.386595; q = x * x + y * (0.19600 * y - 0.25472 * x); /* Accept P if inside inner ellipse */ if (q < 0.27597) break; /* Reject P if outside outer ellipse, or outside acceptance region */ } while ((q > 0.27846) || (v * v > -4.0 * log(u) * u * u)); /* Return ratio of P's coordinates as the normal deviate */ return (mean + stddev * v / u); } void random::test() { } fast_random::fast_random() : jsr(0xABCD917A), jcong(0x17F3DEAD) { } fast_random::fast_random(const fast_random& other) : jsr(other.jsr), jcong(other.jcong) { } fast_random::fast_random(uint32 i) { seed(i); } fast_random& fast_random::operator=(const fast_random& other) { jsr = other.jsr; jcong = other.jcong; return *this; } void fast_random::seed(uint32 i) { jsr = i; SHR3; SHR3; jcong = (~i) ^ 0xDEADBEEF; SHR3; CONG; } uint32 fast_random::urand32() { return SHR3 ^ CONG; } uint64 fast_random::urand64() { uint64 result = urand32(); result <<= 32ULL; result |= urand32(); return result; } int fast_random::irand(int l, int h) { CRNLIB_ASSERT(l < h); if (l >= h) return l; uint32 range = static_cast(h - l); uint32 rnd = urand32(); #if defined(_M_IX86) && defined(_MSC_VER) //uint32 rnd_range = static_cast(__emulu(range, rnd) >> 32U); uint32 x[2]; *reinterpret_cast(x) = __emulu(range, rnd); uint32 rnd_range = x[1]; #else uint32 rnd_range = static_cast((((uint64)range) * ((uint64)rnd)) >> 32U); #endif int result = l + rnd_range; CRNLIB_ASSERT((result >= l) && (result < h)); return result; } double fast_random::drand(double l, double h) { CRNLIB_ASSERT(l <= h); if (l >= h) return l; return math::clamp(l + (h - l) * (urand32() * cNorm), l, h); } float fast_random::frand(float l, float h) { CRNLIB_ASSERT(l <= h); if (l >= h) return l; float r = static_cast(l + (h - l) * (urand32() * cNorm)); return math::clamp(r, l, h); } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_rand.h000066400000000000000000000035541503722002600212360ustar00rootroot00000000000000// File: crn_rand.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once namespace crnlib { class kiss99 { public: kiss99(); void seed(uint32 i, uint32 j, uint32 k); inline uint32 next(); private: uint32 x; uint32 y; uint32 z; uint32 c; }; class well512 { public: well512(); enum { cStateSize = 16 }; void seed(uint32 seed[cStateSize]); void seed(uint32 seed); void seed(uint32 seed1, uint32 seed2, uint32 seed3); inline uint32 next(); private: uint32 m_state[cStateSize]; uint32 m_index; }; class ranctx { public: ranctx() { seed(0xDE149737); } void seed(uint32 seed); inline uint32 next(); private: uint32 a; uint32 b; uint32 c; uint32 d; }; class random { public: random(); random(uint32 i); void seed(uint32 i); void seed(uint32 i1, uint32 i2, uint32 i3); uint32 urand32(); uint64 urand64(); // "Fast" variant uses no multiplies. uint32 fast_urand32(); uint32 bit(); // Returns random between [0, 1) double drand(double l, double h); float frand(float l, float h); // Returns random between [l, h) int irand(int l, int h); // Returns random between [l, h] int irand_inclusive(int l, int h); double gaussian(double mean, double stddev); void test(); private: ranctx m_ranctx; kiss99 m_kiss99; well512 m_well512; }; // Simpler, minimal state PRNG class fast_random { public: fast_random(); fast_random(uint32 i); fast_random(const fast_random& other); fast_random& operator=(const fast_random& other); void seed(uint32 i); uint32 urand32(); uint64 urand64(); int irand(int l, int h); double drand(double l, double h); float frand(float l, float h); private: uint32 jsr; uint32 jcong; }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_ray.h000066400000000000000000000024701503722002600211010ustar00rootroot00000000000000// File: crn_ray.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_vec.h" namespace crnlib { template class ray { public: typedef vector_type vector_t; typedef typename vector_type::scalar_type scalar_type; inline ray() {} inline ray(eClear) { clear(); } inline ray(const vector_type& origin, const vector_type& direction) : m_origin(origin), m_direction(direction) {} inline void clear() { m_origin.clear(); m_direction.clear(); } inline const vector_type& get_origin(void) const { return m_origin; } inline void set_origin(const vector_type& origin) { m_origin = origin; } inline const vector_type& get_direction(void) const { return m_direction; } inline void set_direction(const vector_type& direction) { m_direction = direction; } inline scalar_type set_endpoints(const vector_type& start, const vector_type& end, const vector_type& def) { m_origin = start; m_direction = end - start; return m_direction.normalize(&def); } inline vector_type eval(scalar_type t) const { return m_origin + m_direction * t; } private: vector_type m_origin; vector_type m_direction; }; typedef ray ray2F; typedef ray ray3F; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_rect.h000066400000000000000000000105001503722002600212340ustar00rootroot00000000000000// File: crn_rect.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_vec.h" #include "crn_hash.h" namespace crnlib { class rect { public: inline rect() { } inline rect(eClear) { clear(); } // up to, but not including right/bottom inline rect(int left, int top, int right, int bottom) { set(left, top, right, bottom); } inline rect(const vec2I& lo, const vec2I& hi) { m_corner[0] = lo; m_corner[1] = hi; } inline rect(const vec2I& point) { m_corner[0] = point; m_corner[1].set(point[0] + 1, point[1] + 1); } inline bool operator==(const rect& r) const { return (m_corner[0] == r.m_corner[0]) && (m_corner[1] == r.m_corner[1]); } inline bool operator<(const rect& r) const { for (uint i = 0; i < 2; i++) { if (m_corner[i] < r.m_corner[i]) return true; else if (!(m_corner[i] == r.m_corner[i])) return false; } return false; } inline void clear() { m_corner[0].clear(); m_corner[1].clear(); } inline void set(int left, int top, int right, int bottom) { m_corner[0].set(left, top); m_corner[1].set(right, bottom); } inline void set(const vec2I& lo, const vec2I& hi) { m_corner[0] = lo; m_corner[1] = hi; } inline void set(const vec2I& point) { m_corner[0] = point; m_corner[1].set(point[0] + 1, point[1] + 1); } inline uint get_width() const { return m_corner[1][0] - m_corner[0][0]; } inline uint get_height() const { return m_corner[1][1] - m_corner[0][1]; } inline int get_left() const { return m_corner[0][0]; } inline int get_top() const { return m_corner[0][1]; } inline int get_right() const { return m_corner[1][0]; } inline int get_bottom() const { return m_corner[1][1]; } inline bool is_empty() const { return (m_corner[1][0] <= m_corner[0][0]) || (m_corner[1][1] <= m_corner[0][1]); } inline uint get_dimension(uint axis) const { return m_corner[1][axis] - m_corner[0][axis]; } inline uint get_area() const { return get_dimension(0) * get_dimension(1); } inline const vec2I& operator[](uint i) const { CRNLIB_ASSERT(i < 2); return m_corner[i]; } inline vec2I& operator[](uint i) { CRNLIB_ASSERT(i < 2); return m_corner[i]; } inline rect& translate(int x_ofs, int y_ofs) { m_corner[0][0] += x_ofs; m_corner[0][1] += y_ofs; m_corner[1][0] += x_ofs; m_corner[1][1] += y_ofs; return *this; } inline rect& init_expand() { m_corner[0].set(INT_MAX); m_corner[1].set(INT_MIN); return *this; } inline rect& expand(int x, int y) { m_corner[0][0] = math::minimum(m_corner[0][0], x); m_corner[0][1] = math::minimum(m_corner[0][1], y); m_corner[1][0] = math::maximum(m_corner[1][0], x + 1); m_corner[1][1] = math::maximum(m_corner[1][1], y + 1); return *this; } inline rect& expand(const rect& r) { m_corner[0][0] = math::minimum(m_corner[0][0], r[0][0]); m_corner[0][1] = math::minimum(m_corner[0][1], r[0][1]); m_corner[1][0] = math::maximum(m_corner[1][0], r[1][0]); m_corner[1][1] = math::maximum(m_corner[1][1], r[1][1]); return *this; } inline bool touches(const rect& r) const { for (uint i = 0; i < 2; i++) { if (r[1][i] <= m_corner[0][i]) return false; else if (r[0][i] >= m_corner[1][i]) return false; } return true; } inline bool within(const rect& r) const { for (uint i = 0; i < 2; i++) { if (m_corner[0][i] < r[0][i]) return false; else if (m_corner[1][i] > r[1][i]) return false; } return true; } inline bool intersect(const rect& r) { if (!touches(r)) { clear(); return false; } for (uint i = 0; i < 2; i++) { m_corner[0][i] = math::maximum(m_corner[0][i], r[0][i]); m_corner[1][i] = math::minimum(m_corner[1][i], r[1][i]); } return true; } inline bool contains(int x, int y) const { return (x >= m_corner[0][0]) && (x < m_corner[1][0]) && (y >= m_corner[0][1]) && (y < m_corner[1][1]); } inline bool contains(const vec2I& p) const { return contains(p[0], p[1]); } private: vec2I m_corner[2]; }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_resample_filters.cpp000066400000000000000000000172311503722002600242020ustar00rootroot00000000000000// File: crn_resample_filters.cpp // RG: This is public domain code, originally derived from Graphics Gems 3, see: http://code.google.com/p/imageresampler/ #include "crn_core.h" #include "crn_resample_filters.h" namespace crnlib { #ifndef M_PI #define M_PI 3.14159265358979323846 #endif // To add your own filter, insert the new function below and update the filter table. // There is no need to make the filter function particularly fast, because it's // only called during initializing to create the X and Y axis contributor tables. #define BOX_FILTER_SUPPORT (0.5f) static float box_filter(float t) /* pulse/Fourier window */ { // make_clist() calls the filter function with t inverted (pos = left, neg = right) if ((t >= -0.5f) && (t < 0.5f)) return 1.0f; else return 0.0f; } #define TENT_FILTER_SUPPORT (1.0f) static float tent_filter(float t) /* box (*) box, bilinear/triangle */ { if (t < 0.0f) t = -t; if (t < 1.0f) return 1.0f - t; else return 0.0f; } #define BELL_SUPPORT (1.5f) static float bell_filter(float t) /* box (*) box (*) box */ { if (t < 0.0f) t = -t; if (t < .5f) return (.75f - (t * t)); if (t < 1.5f) { t = (t - 1.5f); return (.5f * (t * t)); } return (0.0f); } #define B_SPLINE_SUPPORT (2.0f) static float B_spline_filter(float t) /* box (*) box (*) box (*) box */ { float tt; if (t < 0.0f) t = -t; if (t < 1.0f) { tt = t * t; return ((.5f * tt * t) - tt + (2.0f / 3.0f)); } else if (t < 2.0f) { t = 2.0f - t; return ((1.0f / 6.0f) * (t * t * t)); } return (0.0f); } // Dodgson, N., "Quadratic Interpolation for Image Resampling" #define QUADRATIC_SUPPORT 1.5f static float quadratic(float t, const float R) { if (t < 0.0f) t = -t; if (t < QUADRATIC_SUPPORT) { float tt = t * t; if (t <= .5f) return (-2.0f * R) * tt + .5f * (R + 1.0f); else return (R * tt) + (-2.0f * R - .5f) * t + (3.0f / 4.0f) * (R + 1.0f); } else return 0.0f; } static float quadratic_interp_filter(float t) { return quadratic(t, 1.0f); } static float quadratic_approx_filter(float t) { return quadratic(t, .5f); } static float quadratic_mix_filter(float t) { return quadratic(t, .8f); } // Mitchell, D. and A. Netravali, "Reconstruction Filters in Computer Graphics." // Computer Graphics, Vol. 22, No. 4, pp. 221-228. // (B, C) // (1/3, 1/3) - Defaults recommended by Mitchell and Netravali // (1, 0) - Equivalent to the Cubic B-Spline // (0, 0.5) - Equivalent to the Catmull-Rom Spline // (0, C) - The family of Cardinal Cubic Splines // (B, 0) - Duff's tensioned B-Splines. static float mitchell(float t, const float B, const float C) { float tt; tt = t * t; if (t < 0.0f) t = -t; if (t < 1.0f) { t = (((12.0f - 9.0f * B - 6.0f * C) * (t * tt)) + ((-18.0f + 12.0f * B + 6.0f * C) * tt) + (6.0f - 2.0f * B)); return (t / 6.0f); } else if (t < 2.0f) { t = (((-1.0f * B - 6.0f * C) * (t * tt)) + ((6.0f * B + 30.0f * C) * tt) + ((-12.0f * B - 48.0f * C) * t) + (8.0f * B + 24.0f * C)); return (t / 6.0f); } return (0.0f); } #define MITCHELL_SUPPORT (2.0f) static float mitchell_filter(float t) { return mitchell(t, 1.0f / 3.0f, 1.0f / 3.0f); } #define CATMULL_ROM_SUPPORT (2.0f) static float catmull_rom_filter(float t) { return mitchell(t, 0.0f, .5f); } static double sinc(double x) { x = (x * M_PI); if ((x < 0.01f) && (x > -0.01f)) return 1.0f + x * x * (-1.0f / 6.0f + x * x * 1.0f / 120.0f); return sin(x) / x; } static float clean(double t) { const float EPSILON = .0000125f; if (fabs(t) < EPSILON) return 0.0f; return (float)t; } //static double blackman_window(double x) //{ // return .42f + .50f * cos(M_PI*x) + .08f * cos(2.0f*M_PI*x); //} static double blackman_exact_window(double x) { return 0.42659071f + 0.49656062f * cos(M_PI * x) + 0.07684867f * cos(2.0f * M_PI * x); } #define BLACKMAN_SUPPORT (3.0f) static float blackman_filter(float t) { if (t < 0.0f) t = -t; if (t < 3.0f) //return clean(sinc(t) * blackman_window(t / 3.0f)); return clean(sinc(t) * blackman_exact_window(t / 3.0f)); else return (0.0f); } #define GAUSSIAN_SUPPORT (1.25f) static float gaussian_filter(float t) // with blackman window { if (t < 0) t = -t; if (t < GAUSSIAN_SUPPORT) return clean(exp(-2.0f * t * t) * sqrt(2.0f / M_PI) * blackman_exact_window(t / GAUSSIAN_SUPPORT)); else return 0.0f; } // Windowed sinc -- see "Jimm Blinn's Corner: Dirty Pixels" pg. 26. #define LANCZOS3_SUPPORT (3.0f) static float lanczos3_filter(float t) { if (t < 0.0f) t = -t; if (t < 3.0f) return clean(sinc(t) * sinc(t / 3.0f)); else return (0.0f); } #define LANCZOS4_SUPPORT (4.0f) static float lanczos4_filter(float t) { if (t < 0.0f) t = -t; if (t < 4.0f) return clean(sinc(t) * sinc(t / 4.0f)); else return (0.0f); } #define LANCZOS6_SUPPORT (6.0f) static float lanczos6_filter(float t) { if (t < 0.0f) t = -t; if (t < 6.0f) return clean(sinc(t) * sinc(t / 6.0f)); else return (0.0f); } #define LANCZOS12_SUPPORT (12.0f) static float lanczos12_filter(float t) { if (t < 0.0f) t = -t; if (t < 12.0f) return clean(sinc(t) * sinc(t / 12.0f)); else return (0.0f); } static double bessel0(double x) { const double EPSILON_RATIO = 1E-16; double xh, sum, pow, ds; int k; xh = 0.5 * x; sum = 1.0; pow = 1.0; k = 0; ds = 1.0; while (ds > sum * EPSILON_RATIO) // FIXME: Shouldn't this stop after X iterations for max. safety? { ++k; pow = pow * (xh / k); ds = pow * pow; sum = sum + ds; } return sum; } static double kaiser(double alpha, double half_width, double x) { const double ratio = (x / half_width); return bessel0(alpha * sqrt(1 - ratio * ratio)) / bessel0(alpha); } #define KAISER_SUPPORT 3 static float kaiser_filter(float t) { if (t < 0.0f) t = -t; if (t < KAISER_SUPPORT) { // db atten const float att = 40.0f; const float alpha = (float)(exp(log((double)0.58417 * (att - 20.96)) * 0.4) + 0.07886 * (att - 20.96)); return (float)clean(sinc(t) * kaiser(alpha, KAISER_SUPPORT, t)); } return 0.0f; } const resample_filter g_resample_filters[] = { {"box", box_filter, BOX_FILTER_SUPPORT}, {"tent", tent_filter, TENT_FILTER_SUPPORT}, {"bell", bell_filter, BELL_SUPPORT}, {"b-spline", B_spline_filter, B_SPLINE_SUPPORT}, {"mitchell", mitchell_filter, MITCHELL_SUPPORT}, {"lanczos3", lanczos3_filter, LANCZOS3_SUPPORT}, {"blackman", blackman_filter, BLACKMAN_SUPPORT}, {"lanczos4", lanczos4_filter, LANCZOS4_SUPPORT}, {"lanczos6", lanczos6_filter, LANCZOS6_SUPPORT}, {"lanczos12", lanczos12_filter, LANCZOS12_SUPPORT}, {"kaiser", kaiser_filter, KAISER_SUPPORT}, {"gaussian", gaussian_filter, GAUSSIAN_SUPPORT}, {"catmullrom", catmull_rom_filter, CATMULL_ROM_SUPPORT}, {"quadratic_interp", quadratic_interp_filter, QUADRATIC_SUPPORT}, {"quadratic_approx", quadratic_approx_filter, QUADRATIC_SUPPORT}, {"quadratic_mix", quadratic_mix_filter, QUADRATIC_SUPPORT}, }; const int g_num_resample_filters = sizeof(g_resample_filters) / sizeof(g_resample_filters[0]); int find_resample_filter(const char* pName) { for (int i = 0; i < g_num_resample_filters; i++) if (crnlib_stricmp(pName, g_resample_filters[i].name) == 0) return i; return cInvalidIndex; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_resample_filters.h000066400000000000000000000007751503722002600236540ustar00rootroot00000000000000// File: crn_resample_filters.h // RG: This is public domain code, originally derived from Graphics Gems 3, see: http://code.google.com/p/imageresampler/ #pragma once namespace crnlib { typedef float (*resample_filter_func)(float t); struct resample_filter { char name[32]; resample_filter_func func; float support; }; extern const resample_filter g_resample_filters[]; extern const int g_num_resample_filters; int find_resample_filter(const char* pName); } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_resampler.cpp000066400000000000000000000522411503722002600226340ustar00rootroot00000000000000// File: crn_resampler.h // RG: This is public domain code, originally derived from Graphics Gems 3, see: http://code.google.com/p/imageresampler/ #include "crn_core.h" #include "crn_resampler.h" #include "crn_resample_filters.h" namespace crnlib { #define resampler_assert CRNLIB_ASSERT static inline int resampler_range_check(int v, int h) { (void)h; resampler_assert((v >= 0) && (v < h)); return v; } #ifndef max #define max(a, b) (((a) > (b)) ? (a) : (b)) #endif #ifndef min #define min(a, b) (((a) < (b)) ? (a) : (b)) #endif #ifndef TRUE #define TRUE (1) #endif #ifndef FALSE #define FALSE (0) #endif #define RESAMPLER_DEBUG 0 // (x mod y) with special handling for negative x values. static inline int posmod(int x, int y) { if (x >= 0) return (x % y); else { int m = (-x) % y; if (m != 0) m = y - m; return (m); } } // Float to int cast with truncation. static inline int cast_to_int(Resample_Real i) { return (int)i; } /* Ensure that the contributing source sample is * within bounds. If not, reflect, clamp, or wrap. */ int Resampler::reflect(const int j, const int src_x, const Boundary_Op boundary_op) { int n; if (j < 0) { if (boundary_op == BOUNDARY_REFLECT) { n = -j; if (n >= src_x) n = src_x - 1; } else if (boundary_op == BOUNDARY_WRAP) n = posmod(j, src_x); else n = 0; } else if (j >= src_x) { if (boundary_op == BOUNDARY_REFLECT) { n = (src_x - j) + (src_x - 1); if (n < 0) n = 0; } else if (boundary_op == BOUNDARY_WRAP) n = posmod(j, src_x); else n = src_x - 1; } else n = j; return n; } // The make_clist() method generates, for all destination samples, // the list of all source samples with non-zero weighted contributions. Resampler::Contrib_List* Resampler::make_clist( int src_x, int dst_x, Boundary_Op boundary_op, Resample_Real (*Pfilter)(Resample_Real), Resample_Real filter_support, Resample_Real filter_scale, Resample_Real src_ofs) { typedef struct { // The center of the range in DISCRETE coordinates (pixel center = 0.0f). Resample_Real center; int left, right; } Contrib_Bounds; int i, j, k, n, left, right; Resample_Real total_weight; Resample_Real xscale, center, half_width, weight; Contrib_List* Pcontrib; Contrib* Pcpool; Contrib* Pcpool_next; Contrib_Bounds* Pcontrib_bounds; if ((Pcontrib = (Contrib_List*)crnlib_calloc(dst_x, sizeof(Contrib_List))) == NULL) return NULL; Pcontrib_bounds = (Contrib_Bounds*)crnlib_calloc(dst_x, sizeof(Contrib_Bounds)); if (!Pcontrib_bounds) { crnlib_free(Pcontrib); return (NULL); } const Resample_Real oo_filter_scale = 1.0f / filter_scale; const Resample_Real NUDGE = 0.5f; xscale = dst_x / (Resample_Real)src_x; if (xscale < 1.0f) { int total; (void)total; /* Handle case when there are fewer destination * samples than source samples (downsampling/minification). */ // stretched half width of filter half_width = (filter_support / xscale) * filter_scale; // Find the range of source sample(s) that will contribute to each destination sample. for (i = 0, n = 0; i < dst_x; i++) { // Convert from discrete to continuous coordinates, scale, then convert back to discrete. center = ((Resample_Real)i + NUDGE) / xscale; center -= NUDGE; center += src_ofs; left = cast_to_int((Resample_Real)floor(center - half_width)); right = cast_to_int((Resample_Real)ceil(center + half_width)); Pcontrib_bounds[i].center = center; Pcontrib_bounds[i].left = left; Pcontrib_bounds[i].right = right; n += (right - left + 1); } /* Allocate memory for contributors. */ if ((n == 0) || ((Pcpool = (Contrib*)crnlib_calloc(n, sizeof(Contrib))) == NULL)) { crnlib_free(Pcontrib); crnlib_free(Pcontrib_bounds); return NULL; } total = n; Pcpool_next = Pcpool; /* Create the list of source samples which * contribute to each destination sample. */ for (i = 0; i < dst_x; i++) { int max_k = -1; Resample_Real max_w = -1e+20f; center = Pcontrib_bounds[i].center; left = Pcontrib_bounds[i].left; right = Pcontrib_bounds[i].right; Pcontrib[i].n = 0; Pcontrib[i].p = Pcpool_next; Pcpool_next += (right - left + 1); resampler_assert((Pcpool_next - Pcpool) <= total); total_weight = 0; for (j = left; j <= right; j++) total_weight += (*Pfilter)((center - (Resample_Real)j) * xscale * oo_filter_scale); const Resample_Real norm = static_cast(1.0f / total_weight); total_weight = 0; #if RESAMPLER_DEBUG printf("%i: ", i); #endif for (j = left; j <= right; j++) { weight = (*Pfilter)((center - (Resample_Real)j) * xscale * oo_filter_scale) * norm; if (weight == 0.0f) continue; n = reflect(j, src_x, boundary_op); #if RESAMPLER_DEBUG printf("%i(%f), ", n, weight); #endif /* Increment the number of source * samples which contribute to the * current destination sample. */ k = Pcontrib[i].n++; Pcontrib[i].p[k].pixel = (unsigned short)n; /* store src sample number */ Pcontrib[i].p[k].weight = weight; /* store src sample weight */ total_weight += weight; /* total weight of all contributors */ if (weight > max_w) { max_w = weight; max_k = k; } } #if RESAMPLER_DEBUG printf("\n\n"); #endif //resampler_assert(Pcontrib[i].n); //resampler_assert(max_k != -1); if ((max_k == -1) || (Pcontrib[i].n == 0)) { crnlib_free(Pcpool); crnlib_free(Pcontrib); crnlib_free(Pcontrib_bounds); return NULL; } if (total_weight != 1.0f) Pcontrib[i].p[max_k].weight += 1.0f - total_weight; } } else { /* Handle case when there are more * destination samples than source * samples (upsampling). */ half_width = filter_support * filter_scale; // Find the source sample(s) that contribute to each destination sample. for (i = 0, n = 0; i < dst_x; i++) { // Convert from discrete to continuous coordinates, scale, then convert back to discrete. center = ((Resample_Real)i + NUDGE) / xscale; center -= NUDGE; center += src_ofs; left = cast_to_int((Resample_Real)floor(center - half_width)); right = cast_to_int((Resample_Real)ceil(center + half_width)); Pcontrib_bounds[i].center = center; Pcontrib_bounds[i].left = left; Pcontrib_bounds[i].right = right; n += (right - left + 1); } /* Allocate memory for contributors. */ int total = n; if ((total == 0) || ((Pcpool = (Contrib*)crnlib_calloc(total, sizeof(Contrib))) == NULL)) { crnlib_free(Pcontrib); crnlib_free(Pcontrib_bounds); return NULL; } Pcpool_next = Pcpool; /* Create the list of source samples which * contribute to each destination sample. */ for (i = 0; i < dst_x; i++) { int max_k = -1; Resample_Real max_w = -1e+20f; center = Pcontrib_bounds[i].center; left = Pcontrib_bounds[i].left; right = Pcontrib_bounds[i].right; Pcontrib[i].n = 0; Pcontrib[i].p = Pcpool_next; Pcpool_next += (right - left + 1); resampler_assert((Pcpool_next - Pcpool) <= total); total_weight = 0; for (j = left; j <= right; j++) total_weight += (*Pfilter)((center - (Resample_Real)j) * oo_filter_scale); const Resample_Real norm = static_cast(1.0f / total_weight); total_weight = 0; #if RESAMPLER_DEBUG printf("%i: ", i); #endif for (j = left; j <= right; j++) { weight = (*Pfilter)((center - (Resample_Real)j) * oo_filter_scale) * norm; if (weight == 0.0f) continue; n = reflect(j, src_x, boundary_op); #if RESAMPLER_DEBUG printf("%i(%f), ", n, weight); #endif /* Increment the number of source * samples which contribute to the * current destination sample. */ k = Pcontrib[i].n++; Pcontrib[i].p[k].pixel = (unsigned short)n; /* store src sample number */ Pcontrib[i].p[k].weight = weight; /* store src sample weight */ total_weight += weight; /* total weight of all contributors */ if (weight > max_w) { max_w = weight; max_k = k; } } #if RESAMPLER_DEBUG printf("\n\n"); #endif //resampler_assert(Pcontrib[i].n); //resampler_assert(max_k != -1); if ((max_k == -1) || (Pcontrib[i].n == 0)) { crnlib_free(Pcpool); crnlib_free(Pcontrib); crnlib_free(Pcontrib_bounds); return NULL; } if (total_weight != 1.0f) Pcontrib[i].p[max_k].weight += 1.0f - total_weight; } } #if RESAMPLER_DEBUG printf("*******\n"); #endif crnlib_free(Pcontrib_bounds); return Pcontrib; } void Resampler::resample_x(Sample* Pdst, const Sample* Psrc) { resampler_assert(Pdst); resampler_assert(Psrc); int i, j; Sample total; Contrib_List* Pclist = m_Pclist_x; Contrib* p; for (i = m_resample_dst_x; i > 0; i--, Pclist++) { #if CRNLIB_RESAMPLER_DEBUG_OPS total_ops += Pclist->n; #endif for (j = Pclist->n, p = Pclist->p, total = 0; j > 0; j--, p++) total += Psrc[p->pixel] * p->weight; *Pdst++ = total; } } void Resampler::scale_y_mov(Sample* Ptmp, const Sample* Psrc, Resample_Real weight, int dst_x) { int i; #if CRNLIB_RESAMPLER_DEBUG_OPS total_ops += dst_x; #endif // Not += because temp buf wasn't cleared. for (i = dst_x; i > 0; i--) *Ptmp++ = *Psrc++ * weight; } void Resampler::scale_y_add(Sample* Ptmp, const Sample* Psrc, Resample_Real weight, int dst_x) { #if CRNLIB_RESAMPLER_DEBUG_OPS total_ops += dst_x; #endif for (int i = dst_x; i > 0; i--) (*Ptmp++) += *Psrc++ * weight; } void Resampler::clamp(Sample* Pdst, int n) { while (n > 0) { Sample x = *Pdst; *Pdst++ = clamp_sample(x); n--; } } void Resampler::resample_y(Sample* Pdst) { int i, j; Sample* Psrc; Contrib_List* Pclist = &m_Pclist_y[m_cur_dst_y]; Sample* Ptmp = m_delay_x_resample ? m_Ptmp_buf : Pdst; resampler_assert(Ptmp); /* Process each contributor. */ for (i = 0; i < Pclist->n; i++) { /* locate the contributor's location in the scan * buffer -- the contributor must always be found! */ for (j = 0; j < MAX_SCAN_BUF_SIZE; j++) if (m_Pscan_buf->scan_buf_y[j] == Pclist->p[i].pixel) break; resampler_assert(j < MAX_SCAN_BUF_SIZE); Psrc = m_Pscan_buf->scan_buf_l[j]; if (!i) scale_y_mov(Ptmp, Psrc, Pclist->p[i].weight, m_intermediate_x); else scale_y_add(Ptmp, Psrc, Pclist->p[i].weight, m_intermediate_x); /* If this source line doesn't contribute to any * more destination lines then mark the scanline buffer slot * which holds this source line as free. * (The max. number of slots used depends on the Y * axis sampling factor and the scaled filter width.) */ if (--m_Psrc_y_count[resampler_range_check(Pclist->p[i].pixel, m_resample_src_y)] == 0) { m_Psrc_y_flag[resampler_range_check(Pclist->p[i].pixel, m_resample_src_y)] = FALSE; m_Pscan_buf->scan_buf_y[j] = -1; } } /* Now generate the destination line */ if (m_delay_x_resample) // Was X resampling delayed until after Y resampling? { resampler_assert(Pdst != Ptmp); resample_x(Pdst, Ptmp); } else { resampler_assert(Pdst == Ptmp); } if (m_lo < m_hi) clamp(Pdst, m_resample_dst_x); } bool Resampler::put_line(const Sample* Psrc) { int i; if (m_cur_src_y >= m_resample_src_y) return false; /* Does this source line contribute * to any destination line? if not, * exit now. */ if (!m_Psrc_y_count[resampler_range_check(m_cur_src_y, m_resample_src_y)]) { m_cur_src_y++; return true; } /* Find an empty slot in the scanline buffer. (FIXME: Perf. is terrible here with extreme scaling ratios.) */ for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) if (m_Pscan_buf->scan_buf_y[i] == -1) break; /* If the buffer is full, exit with an error. */ if (i == MAX_SCAN_BUF_SIZE) { m_status = STATUS_SCAN_BUFFER_FULL; return false; } m_Psrc_y_flag[resampler_range_check(m_cur_src_y, m_resample_src_y)] = TRUE; m_Pscan_buf->scan_buf_y[i] = m_cur_src_y; /* Does this slot have any memory allocated to it? */ if (!m_Pscan_buf->scan_buf_l[i]) { if ((m_Pscan_buf->scan_buf_l[i] = (Sample*)crnlib_malloc(m_intermediate_x * sizeof(Sample))) == NULL) { m_status = STATUS_OUT_OF_MEMORY; return false; } } // Resampling on the X axis first? if (m_delay_x_resample) { resampler_assert(m_intermediate_x == m_resample_src_x); // Y-X resampling order memcpy(m_Pscan_buf->scan_buf_l[i], Psrc, m_intermediate_x * sizeof(Sample)); } else { resampler_assert(m_intermediate_x == m_resample_dst_x); // X-Y resampling order resample_x(m_Pscan_buf->scan_buf_l[i], Psrc); } m_cur_src_y++; return true; } const Resampler::Sample* Resampler::get_line() { int i; /* If all the destination lines have been * generated, then always return NULL. */ if (m_cur_dst_y == m_resample_dst_y) return NULL; /* Check to see if all the required * contributors are present, if not, * return NULL. */ for (i = 0; i < m_Pclist_y[m_cur_dst_y].n; i++) if (!m_Psrc_y_flag[resampler_range_check(m_Pclist_y[m_cur_dst_y].p[i].pixel, m_resample_src_y)]) return NULL; resample_y(m_Pdst_buf); m_cur_dst_y++; return m_Pdst_buf; } Resampler::~Resampler() { int i; #if CRNLIB_RESAMPLER_DEBUG_OPS printf("actual ops: %i\n", total_ops); #endif crnlib_free(m_Pdst_buf); m_Pdst_buf = NULL; if (m_Ptmp_buf) { crnlib_free(m_Ptmp_buf); m_Ptmp_buf = NULL; } /* Don't deallocate a contibutor list * if the user passed us one of their own. */ if ((m_Pclist_x) && (!m_clist_x_forced)) { crnlib_free(m_Pclist_x->p); crnlib_free(m_Pclist_x); m_Pclist_x = NULL; } if ((m_Pclist_y) && (!m_clist_y_forced)) { crnlib_free(m_Pclist_y->p); crnlib_free(m_Pclist_y); m_Pclist_y = NULL; } crnlib_free(m_Psrc_y_count); m_Psrc_y_count = NULL; crnlib_free(m_Psrc_y_flag); m_Psrc_y_flag = NULL; if (m_Pscan_buf) { for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) crnlib_free(m_Pscan_buf->scan_buf_l[i]); crnlib_free(m_Pscan_buf); m_Pscan_buf = NULL; } } void Resampler::restart() { if (STATUS_OKAY != m_status) return; m_cur_src_y = m_cur_dst_y = 0; int i, j; for (i = 0; i < m_resample_src_y; i++) { m_Psrc_y_count[i] = 0; m_Psrc_y_flag[i] = FALSE; } for (i = 0; i < m_resample_dst_y; i++) { for (j = 0; j < m_Pclist_y[i].n; j++) m_Psrc_y_count[resampler_range_check(m_Pclist_y[i].p[j].pixel, m_resample_src_y)]++; } for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) { m_Pscan_buf->scan_buf_y[i] = -1; crnlib_free(m_Pscan_buf->scan_buf_l[i]); m_Pscan_buf->scan_buf_l[i] = NULL; } } Resampler::Resampler(int src_x, int src_y, int dst_x, int dst_y, Boundary_Op boundary_op, Resample_Real sample_low, Resample_Real sample_high, const char* Pfilter_name, Contrib_List* Pclist_x, Contrib_List* Pclist_y, Resample_Real filter_x_scale, Resample_Real filter_y_scale, Resample_Real src_x_ofs, Resample_Real src_y_ofs) { int i, j; Resample_Real support, (*func)(Resample_Real); resampler_assert(src_x > 0); resampler_assert(src_y > 0); resampler_assert(dst_x > 0); resampler_assert(dst_y > 0); #if CRNLIB_RESAMPLER_DEBUG_OPS total_ops = 0; #endif m_lo = sample_low; m_hi = sample_high; m_delay_x_resample = false; m_intermediate_x = 0; m_Pdst_buf = NULL; m_Ptmp_buf = NULL; m_clist_x_forced = false; m_Pclist_x = NULL; m_clist_y_forced = false; m_Pclist_y = NULL; m_Psrc_y_count = NULL; m_Psrc_y_flag = NULL; m_Pscan_buf = NULL; m_status = STATUS_OKAY; m_resample_src_x = src_x; m_resample_src_y = src_y; m_resample_dst_x = dst_x; m_resample_dst_y = dst_y; m_boundary_op = boundary_op; if ((m_Pdst_buf = (Sample*)crnlib_malloc(m_resample_dst_x * sizeof(Sample))) == NULL) { m_status = STATUS_OUT_OF_MEMORY; return; } // Find the specified filter. if (Pfilter_name == NULL) Pfilter_name = CRNLIB_RESAMPLER_DEFAULT_FILTER; for (i = 0; i < g_num_resample_filters; i++) if (strcmp(Pfilter_name, g_resample_filters[i].name) == 0) break; if (i == g_num_resample_filters) { m_status = STATUS_BAD_FILTER_NAME; return; } func = g_resample_filters[i].func; support = g_resample_filters[i].support; /* Create contributor lists, unless the user supplied custom lists. */ if (!Pclist_x) { m_Pclist_x = make_clist(m_resample_src_x, m_resample_dst_x, m_boundary_op, func, support, filter_x_scale, src_x_ofs); if (!m_Pclist_x) { m_status = STATUS_OUT_OF_MEMORY; return; } } else { m_Pclist_x = Pclist_x; m_clist_x_forced = true; } if (!Pclist_y) { m_Pclist_y = make_clist(m_resample_src_y, m_resample_dst_y, m_boundary_op, func, support, filter_y_scale, src_y_ofs); if (!m_Pclist_y) { m_status = STATUS_OUT_OF_MEMORY; return; } } else { m_Pclist_y = Pclist_y; m_clist_y_forced = true; } if ((m_Psrc_y_count = (int*)crnlib_calloc(m_resample_src_y, sizeof(int))) == NULL) { m_status = STATUS_OUT_OF_MEMORY; return; } if ((m_Psrc_y_flag = (unsigned char*)crnlib_calloc(m_resample_src_y, sizeof(unsigned char))) == NULL) { m_status = STATUS_OUT_OF_MEMORY; return; } /* Count how many times each source line * contributes to a destination line. */ for (i = 0; i < m_resample_dst_y; i++) for (j = 0; j < m_Pclist_y[i].n; j++) m_Psrc_y_count[resampler_range_check(m_Pclist_y[i].p[j].pixel, m_resample_src_y)]++; if ((m_Pscan_buf = (Scan_Buf*)crnlib_malloc(sizeof(Scan_Buf))) == NULL) { m_status = STATUS_OUT_OF_MEMORY; return; } for (i = 0; i < MAX_SCAN_BUF_SIZE; i++) { m_Pscan_buf->scan_buf_y[i] = -1; m_Pscan_buf->scan_buf_l[i] = NULL; } m_cur_src_y = m_cur_dst_y = 0; { // Determine which axis to resample first by comparing the number of multiplies required // for each possibility. int x_ops = count_ops(m_Pclist_x, m_resample_dst_x); int y_ops = count_ops(m_Pclist_y, m_resample_dst_y); // Hack 10/2000: Weight Y axis ops a little more than X axis ops. // (Y axis ops use more cache resources.) int xy_ops = x_ops * m_resample_src_y + (4 * y_ops * m_resample_dst_x) / 3; int yx_ops = (4 * y_ops * m_resample_src_x) / 3 + x_ops * m_resample_dst_y; #if CRNLIB_RESAMPLER_DEBUG_OPS printf("src: %i %i\n", m_resample_src_x, m_resample_src_y); printf("dst: %i %i\n", m_resample_dst_x, m_resample_dst_y); printf("x_ops: %i\n", x_ops); printf("y_ops: %i\n", y_ops); printf("xy_ops: %i\n", xy_ops); printf("yx_ops: %i\n", yx_ops); #endif // Now check which resample order is better. In case of a tie, choose the order // which buffers the least amount of data. if ((xy_ops > yx_ops) || ((xy_ops == yx_ops) && (m_resample_src_x < m_resample_dst_x))) { m_delay_x_resample = true; m_intermediate_x = m_resample_src_x; } else { m_delay_x_resample = false; m_intermediate_x = m_resample_dst_x; } #if CRNLIB_RESAMPLER_DEBUG_OPS printf("delaying: %i\n", m_delay_x_resample); #endif } if (m_delay_x_resample) { if ((m_Ptmp_buf = (Sample*)crnlib_malloc(m_intermediate_x * sizeof(Sample))) == NULL) { m_status = STATUS_OUT_OF_MEMORY; return; } } } void Resampler::get_clists(Contrib_List** ptr_clist_x, Contrib_List** ptr_clist_y) { if (ptr_clist_x) *ptr_clist_x = m_Pclist_x; if (ptr_clist_y) *ptr_clist_y = m_Pclist_y; } int Resampler::get_filter_num() { return g_num_resample_filters; } const char* Resampler::get_filter_name(int filter_num) { if ((filter_num < 0) || (filter_num >= g_num_resample_filters)) return NULL; else return g_resample_filters[filter_num].name; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_resampler.h000066400000000000000000000105661503722002600223050ustar00rootroot00000000000000// File: crn_resampler.h // RG: This is public domain code, originally derived from Graphics Gems 3, see: http://code.google.com/p/imageresampler/ #pragma once namespace crnlib { #define CRNLIB_RESAMPLER_DEBUG_OPS 0 #define CRNLIB_RESAMPLER_DEFAULT_FILTER "lanczos4" #define CRNLIB_RESAMPLER_MAX_DIMENSION 16384 // float or double typedef float Resample_Real; class Resampler { public: typedef Resample_Real Sample; struct Contrib { Resample_Real weight; unsigned short pixel; }; struct Contrib_List { unsigned short n; Contrib* p; }; enum Boundary_Op { BOUNDARY_WRAP = 0, BOUNDARY_REFLECT = 1, BOUNDARY_CLAMP = 2 }; enum Status { STATUS_OKAY = 0, STATUS_OUT_OF_MEMORY = 1, STATUS_BAD_FILTER_NAME = 2, STATUS_SCAN_BUFFER_FULL = 3 }; // src_x/src_y - Input dimensions // dst_x/dst_y - Output dimensions // boundary_op - How to sample pixels near the image boundaries // sample_low/sample_high - Clamp output samples to specified range, or disable clamping if sample_low >= sample_high // Pclist_x/Pclist_y - Optional pointers to contributor lists from another instance of a Resampler // src_x_ofs/src_y_ofs - Offset input image by specified amount (fractional values okay) Resampler( int src_x, int src_y, int dst_x, int dst_y, Boundary_Op boundary_op = BOUNDARY_CLAMP, Resample_Real sample_low = 0.0f, Resample_Real sample_high = 0.0f, const char* Pfilter_name = CRNLIB_RESAMPLER_DEFAULT_FILTER, Contrib_List* Pclist_x = NULL, Contrib_List* Pclist_y = NULL, Resample_Real filter_x_scale = 1.0f, Resample_Real filter_y_scale = 1.0f, Resample_Real src_x_ofs = 0.0f, Resample_Real src_y_ofs = 0.0f); ~Resampler(); // Reinits resampler so it can handle another frame. void restart(); // false on out of memory. bool put_line(const Sample* Psrc); // NULL if no scanlines are currently available (give the resampler more scanlines!) const Sample* get_line(); Status status() const { return m_status; } // Returned contributor lists can be shared with another Resampler. void get_clists(Contrib_List** ptr_clist_x, Contrib_List** ptr_clist_y); Contrib_List* get_clist_x() const { return m_Pclist_x; } Contrib_List* get_clist_y() const { return m_Pclist_y; } // Filter accessors. static int get_filter_num(); static const char* get_filter_name(int filter_num); static Contrib_List* make_clist( int src_x, int dst_x, Boundary_Op boundary_op, Resample_Real (*Pfilter)(Resample_Real), Resample_Real filter_support, Resample_Real filter_scale, Resample_Real src_ofs); private: Resampler(); Resampler(const Resampler& o); Resampler& operator=(const Resampler& o); #ifdef CRNLIB_RESAMPLER_DEBUG_OPS int total_ops; #endif int m_intermediate_x; int m_resample_src_x; int m_resample_src_y; int m_resample_dst_x; int m_resample_dst_y; Boundary_Op m_boundary_op; Sample* m_Pdst_buf; Sample* m_Ptmp_buf; Contrib_List* m_Pclist_x; Contrib_List* m_Pclist_y; bool m_clist_x_forced; bool m_clist_y_forced; bool m_delay_x_resample; int* m_Psrc_y_count; unsigned char* m_Psrc_y_flag; // The maximum number of scanlines that can be buffered at one time. enum { MAX_SCAN_BUF_SIZE = CRNLIB_RESAMPLER_MAX_DIMENSION }; struct Scan_Buf { int scan_buf_y[MAX_SCAN_BUF_SIZE]; Sample* scan_buf_l[MAX_SCAN_BUF_SIZE]; }; Scan_Buf* m_Pscan_buf; int m_cur_src_y; int m_cur_dst_y; Status m_status; void resample_x(Sample* Pdst, const Sample* Psrc); void scale_y_mov(Sample* Ptmp, const Sample* Psrc, Resample_Real weight, int dst_x); void scale_y_add(Sample* Ptmp, const Sample* Psrc, Resample_Real weight, int dst_x); void clamp(Sample* Pdst, int n); void resample_y(Sample* Pdst); static int reflect(const int j, const int src_x, const Boundary_Op boundary_op); inline int count_ops(Contrib_List* Pclist, int k) { int i, t = 0; for (i = 0; i < k; i++) t += Pclist[i].n; return (t); } Resample_Real m_lo; Resample_Real m_hi; inline Resample_Real clamp_sample(Resample_Real f) const { if (f < m_lo) f = m_lo; else if (f > m_hi) f = m_hi; return f; } }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_rg_etc1.cpp000066400000000000000000003025451503722002600221730ustar00rootroot00000000000000// File: rg_etc1.cpp - Fast, high quality ETC1 block packer/unpacker - Rich Geldreich // Please see ZLIB license at the end of rg_etc1.h. // // For more information Ericsson Texture Compression (ETC/ETC1), see: // http://www.khronos.org/registry/gles/extensions/OES/OES_compressed_ETC1_RGB8_texture.txt // // v1.03 - 5/12/13 - Initial public release #include "crn_core.h" #include "crn_rg_etc1.h" #include "crn_dxt5a.h" #include #include #include //#include #include #if defined(_MSC_VER) #pragma warning(disable : 4201) // nonstandard extension used : nameless struct/union #endif #if defined(_DEBUG) || defined(DEBUG) #define RG_ETC1_BUILD_DEBUG #endif #define RG_ETC1_ASSERT CRNLIB_ASSERT namespace crnlib { namespace rg_etc1 { typedef unsigned char uint8; typedef unsigned short uint16; typedef unsigned int uint; typedef unsigned int uint32; typedef long long int64; typedef unsigned long long uint64; const uint32 cUINT32_MAX = 0xFFFFFFFFU; const uint64 cUINT64_MAX = 0xFFFFFFFFFFFFFFFFULL; //0xFFFFFFFFFFFFFFFFui64; template inline T minimum(T a, T b) { return (a < b) ? a : b; } template inline T minimum(T a, T b, T c) { return minimum(minimum(a, b), c); } template inline T maximum(T a, T b) { return (a > b) ? a : b; } template inline T maximum(T a, T b, T c) { return maximum(maximum(a, b), c); } template inline T clamp(T value, T low, T high) { return (value < low) ? low : ((value > high) ? high : value); } template inline T square(T value) { return value * value; } template inline void zero_object(T& obj) { memset((void*)&obj, 0, sizeof(obj)); } template inline void zero_this(T* pObj) { memset((void*)pObj, 0, sizeof(*pObj)); } template T decay_array_to_subtype(T (&a)[N]); #define RG_ETC1_ARRAY_SIZE(X) (sizeof(X) / sizeof(decay_array_to_subtype(X))) enum eNoClamp { cNoClamp }; struct color_quad_u8 { static inline int clamp(int v) { if (v & 0xFFFFFF00U) v = (~(static_cast(v) >> 31)) & 0xFF; return v; } struct component_traits { enum { cSigned = false, cFloat = false, cMin = 0U, cMax = 255U }; }; public: typedef unsigned char component_t; typedef int parameter_t; enum { cNumComps = 4 }; union { struct { component_t r; component_t g; component_t b; component_t a; }; component_t c[cNumComps]; uint32 m_u32; }; inline color_quad_u8() { } inline color_quad_u8(const color_quad_u8& other) : m_u32(other.m_u32) { } explicit inline color_quad_u8(parameter_t y, parameter_t alpha = component_traits::cMax) { set(y, alpha); } inline color_quad_u8(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax) { set(red, green, blue, alpha); } explicit inline color_quad_u8(eNoClamp, parameter_t y, parameter_t alpha = component_traits::cMax) { set_noclamp_y_alpha(y, alpha); } inline color_quad_u8(eNoClamp, parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax) { set_noclamp_rgba(red, green, blue, alpha); } inline void clear() { m_u32 = 0; } inline color_quad_u8& operator=(const color_quad_u8& other) { m_u32 = other.m_u32; return *this; } inline color_quad_u8& set_rgb(const color_quad_u8& other) { r = other.r; g = other.g; b = other.b; return *this; } inline color_quad_u8& operator=(parameter_t y) { set(y, component_traits::cMax); return *this; } inline color_quad_u8& set(parameter_t y, parameter_t alpha = component_traits::cMax) { y = clamp(y); alpha = clamp(alpha); r = static_cast(y); g = static_cast(y); b = static_cast(y); a = static_cast(alpha); return *this; } inline color_quad_u8& set_noclamp_y_alpha(parameter_t y, parameter_t alpha = component_traits::cMax) { RG_ETC1_ASSERT((y >= component_traits::cMin) && (y <= component_traits::cMax)); RG_ETC1_ASSERT((alpha >= component_traits::cMin) && (alpha <= component_traits::cMax)); r = static_cast(y); g = static_cast(y); b = static_cast(y); a = static_cast(alpha); return *this; } inline color_quad_u8& set(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha = component_traits::cMax) { r = static_cast(clamp(red)); g = static_cast(clamp(green)); b = static_cast(clamp(blue)); a = static_cast(clamp(alpha)); return *this; } inline color_quad_u8& set_noclamp_rgba(parameter_t red, parameter_t green, parameter_t blue, parameter_t alpha) { RG_ETC1_ASSERT((red >= component_traits::cMin) && (red <= component_traits::cMax)); RG_ETC1_ASSERT((green >= component_traits::cMin) && (green <= component_traits::cMax)); RG_ETC1_ASSERT((blue >= component_traits::cMin) && (blue <= component_traits::cMax)); RG_ETC1_ASSERT((alpha >= component_traits::cMin) && (alpha <= component_traits::cMax)); r = static_cast(red); g = static_cast(green); b = static_cast(blue); a = static_cast(alpha); return *this; } inline color_quad_u8& set_noclamp_rgb(parameter_t red, parameter_t green, parameter_t blue) { RG_ETC1_ASSERT((red >= component_traits::cMin) && (red <= component_traits::cMax)); RG_ETC1_ASSERT((green >= component_traits::cMin) && (green <= component_traits::cMax)); RG_ETC1_ASSERT((blue >= component_traits::cMin) && (blue <= component_traits::cMax)); r = static_cast(red); g = static_cast(green); b = static_cast(blue); return *this; } static inline parameter_t get_min_comp() { return component_traits::cMin; } static inline parameter_t get_max_comp() { return component_traits::cMax; } static inline bool get_comps_are_signed() { return component_traits::cSigned; } inline component_t operator[](uint i) const { RG_ETC1_ASSERT(i < cNumComps); return c[i]; } inline component_t& operator[](uint i) { RG_ETC1_ASSERT(i < cNumComps); return c[i]; } inline color_quad_u8& set_component(uint i, parameter_t f) { RG_ETC1_ASSERT(i < cNumComps); c[i] = static_cast(clamp(f)); return *this; } inline color_quad_u8& set_grayscale(parameter_t l) { component_t x = static_cast(clamp(l)); c[0] = x; c[1] = x; c[2] = x; return *this; } inline color_quad_u8& clamp(const color_quad_u8& l, const color_quad_u8& h) { for (uint i = 0; i < cNumComps; i++) c[i] = static_cast(rg_etc1::clamp(c[i], l[i], h[i])); return *this; } inline color_quad_u8& clamp(parameter_t l, parameter_t h) { for (uint i = 0; i < cNumComps; i++) c[i] = static_cast(rg_etc1::clamp(c[i], l, h)); return *this; } // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y). inline parameter_t get_luma() const { return static_cast((19595U * r + 38470U * g + 7471U * b + 32768U) >> 16U); } // Returns REC 709 luma. inline parameter_t get_luma_rec709() const { return static_cast((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U); } inline uint squared_distance_rgb(const color_quad_u8& c) const { return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b); } inline uint squared_distance_rgba(const color_quad_u8& c) const { return rg_etc1::square(r - c.r) + rg_etc1::square(g - c.g) + rg_etc1::square(b - c.b) + rg_etc1::square(a - c.a); } inline bool rgb_equals(const color_quad_u8& rhs) const { return (r == rhs.r) && (g == rhs.g) && (b == rhs.b); } inline bool operator==(const color_quad_u8& rhs) const { return m_u32 == rhs.m_u32; } color_quad_u8& operator+=(const color_quad_u8& other) { for (uint i = 0; i < 4; i++) c[i] = static_cast(clamp(c[i] + other.c[i])); return *this; } color_quad_u8& operator-=(const color_quad_u8& other) { for (uint i = 0; i < 4; i++) c[i] = static_cast(clamp(c[i] - other.c[i])); return *this; } friend color_quad_u8 operator+(const color_quad_u8& lhs, const color_quad_u8& rhs) { color_quad_u8 result(lhs); result += rhs; return result; } friend color_quad_u8 operator-(const color_quad_u8& lhs, const color_quad_u8& rhs) { color_quad_u8 result(lhs); result -= rhs; return result; } }; // class color_quad_u8 struct vec3F { float m_s[3]; inline vec3F() {} inline vec3F(float s) { m_s[0] = s; m_s[1] = s; m_s[2] = s; } inline vec3F(float x, float y, float z) { m_s[0] = x; m_s[1] = y; m_s[2] = z; } inline float operator[](uint i) const { RG_ETC1_ASSERT(i < 3); return m_s[i]; } inline vec3F& operator+=(const vec3F& other) { for (uint i = 0; i < 3; i++) m_s[i] += other.m_s[i]; return *this; } inline vec3F& operator*=(float s) { for (uint i = 0; i < 3; i++) m_s[i] *= s; return *this; } }; enum etc_constants { cETC1BytesPerBlock = 8U, cETC1SelectorBits = 2U, cETC1SelectorValues = 1U << cETC1SelectorBits, cETC1SelectorMask = cETC1SelectorValues - 1U, cETC1BlockShift = 2U, cETC1BlockSize = 1U << cETC1BlockShift, cETC1LSBSelectorIndicesBitOffset = 0, cETC1MSBSelectorIndicesBitOffset = 16, cETC1FlipBitOffset = 32, cETC1DiffBitOffset = 33, cETC1IntenModifierNumBits = 3, cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits, cETC1RightIntenModifierTableBitOffset = 34, cETC1LeftIntenModifierTableBitOffset = 37, // Base+Delta encoding (5 bit bases, 3 bit delta) cETC1BaseColorCompNumBits = 5, cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits, cETC1DeltaColorCompNumBits = 3, cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits, cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits, cETC1BaseColor5RBitOffset = 59, cETC1BaseColor5GBitOffset = 51, cETC1BaseColor5BBitOffset = 43, cETC1DeltaColor3RBitOffset = 56, cETC1DeltaColor3GBitOffset = 48, cETC1DeltaColor3BBitOffset = 40, // Absolute (non-delta) encoding (two 4-bit per component bases) cETC1AbsColorCompNumBits = 4, cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits, cETC1AbsColor4R1BitOffset = 60, cETC1AbsColor4G1BitOffset = 52, cETC1AbsColor4B1BitOffset = 44, cETC1AbsColor4R2BitOffset = 56, cETC1AbsColor4G2BitOffset = 48, cETC1AbsColor4B2BitOffset = 40, cETC1ColorDeltaMin = -4, cETC1ColorDeltaMax = 3, // Delta3: // 0 1 2 3 4 5 6 7 // 000 001 010 011 100 101 110 111 // 0 1 2 3 -4 -3 -2 -1 }; static uint8 g_quant5_tab[256 + 16]; static const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues] = { {-8, -2, 2, 8}, {-17, -5, 5, 17}, {-29, -9, 9, 29}, {-42, -13, 13, 42}, {-60, -18, 18, 60}, {-80, -24, 24, 80}, {-106, -33, 33, 106}, {-183, -47, 47, 183}}; static const uint8 g_etc2_modifier_table[8] = { 3, 6, 11, 16, 23, 32, 41, 64 }; static const int g_etc2a_modifier_table[16][8] = { { -3, -6, -9, -15, 2, 5, 8, 14}, { -3, -7, -10, -13, 2, 6, 9, 12}, { -2, -5, -8, -13, 1, 4, 7, 12}, { -2, -4, -6, -13, 1, 3, 5, 12}, { -3, -6, -8, -12, 2, 5, 7, 11}, { -3, -7, -9, -11, 2, 6, 8, 10}, { -4, -7, -8, -11, 3, 6, 7, 10}, { -3, -5, -8, -11, 2, 4, 7, 10}, { -2, -6, -8, -10, 1, 5, 7, 9}, { -2, -5, -8, -10, 1, 4, 7, 9}, { -2, -4, -8, -10, 1, 3, 7, 9}, { -2, -5, -7, -10, 1, 4, 6, 9}, { -3, -4, -7, -10, 2, 3, 6, 9}, { -1, -2, -3, -10, 0, 1, 2, 9}, { -4, -6, -8, -9, 3, 5, 7, 8}, { -3, -5, -7, -9, 2, 4, 6, 8}, }; static const uint8 g_etc1_to_selector_index[cETC1SelectorValues] = {2, 3, 1, 0}; static const uint8 g_selector_index_to_etc1[cETC1SelectorValues] = {3, 2, 0, 1}; // Given an ETC1 diff/inten_table/selector, and an 8-bit desired color, this table encodes the best packed_color in the low byte, and the abs error in the high byte. static uint16 g_etc1_inverse_lookup[2 * 8 * 4][256]; // [diff/inten_table/selector][desired_color] // g_color8_to_etc_block_config[color][table_index] = Supplies for each 8-bit color value a list of packed ETC1 diff/intensity table/selectors/packed_colors that map to that color. // To pack: diff | (inten << 1) | (selector << 4) | (packed_c << 8) static const uint16 g_color8_to_etc_block_config_0_255[2][33] = { {0x0000, 0x0010, 0x0002, 0x0012, 0x0004, 0x0014, 0x0006, 0x0016, 0x0008, 0x0018, 0x000A, 0x001A, 0x000C, 0x001C, 0x000E, 0x001E, 0x0001, 0x0011, 0x0003, 0x0013, 0x0005, 0x0015, 0x0007, 0x0017, 0x0009, 0x0019, 0x000B, 0x001B, 0x000D, 0x001D, 0x000F, 0x001F, 0xFFFF}, {0x0F20, 0x0F30, 0x0E32, 0x0F22, 0x0E34, 0x0F24, 0x0D36, 0x0F26, 0x0C38, 0x0E28, 0x0B3A, 0x0E2A, 0x093C, 0x0E2C, 0x053E, 0x0D2E, 0x1E31, 0x1F21, 0x1D33, 0x1F23, 0x1C35, 0x1E25, 0x1A37, 0x1E27, 0x1839, 0x1D29, 0x163B, 0x1C2B, 0x133D, 0x1B2D, 0x093F, 0x1A2F, 0xFFFF}, }; // Really only [254][11]. static const uint16 g_color8_to_etc_block_config_1_to_254[254][12] = { {0x021C, 0x0D0D, 0xFFFF}, {0x0020, 0x0021, 0x0A0B, 0x061F, 0xFFFF}, {0x0113, 0x0217, 0xFFFF}, {0x0116, 0x031E, 0x0B0E, 0x0405, 0xFFFF}, {0x0022, 0x0204, 0x050A, 0x0023, 0xFFFF}, {0x0111, 0x0319, 0x0809, 0x170F, 0xFFFF}, {0x0303, 0x0215, 0x0607, 0xFFFF}, {0x0030, 0x0114, 0x0408, 0x0031, 0x0201, 0x051D, 0xFFFF}, {0x0100, 0x0024, 0x0306, 0x0025, 0x041B, 0x0E0D, 0xFFFF}, {0x021A, 0x0121, 0x0B0B, 0x071F, 0xFFFF}, {0x0213, 0x0317, 0xFFFF}, {0x0112, 0x0505, 0xFFFF}, {0x0026, 0x070C, 0x0123, 0x0027, 0xFFFF}, {0x0211, 0x0909, 0xFFFF}, {0x0110, 0x0315, 0x0707, 0x0419, 0x180F, 0xFFFF}, {0x0218, 0x0131, 0x0301, 0x0403, 0x061D, 0xFFFF}, {0x0032, 0x0202, 0x0033, 0x0125, 0x051B, 0x0F0D, 0xFFFF}, {0x0028, 0x031C, 0x0221, 0x0029, 0xFFFF}, {0x0120, 0x0313, 0x0C0B, 0x081F, 0xFFFF}, {0x0605, 0x0417, 0xFFFF}, {0x0216, 0x041E, 0x0C0E, 0x0223, 0x0127, 0xFFFF}, {0x0122, 0x0304, 0x060A, 0x0311, 0x0A09, 0xFFFF}, {0x0519, 0x190F, 0xFFFF}, {0x002A, 0x0231, 0x0503, 0x0415, 0x0807, 0x002B, 0x071D, 0xFFFF}, {0x0130, 0x0214, 0x0508, 0x0401, 0x0133, 0x0225, 0x061B, 0xFFFF}, {0x0200, 0x0124, 0x0406, 0x0321, 0x0129, 0x100D, 0xFFFF}, {0x031A, 0x0D0B, 0x091F, 0xFFFF}, {0x0413, 0x0705, 0x0517, 0xFFFF}, {0x0212, 0x0034, 0x0323, 0x0035, 0x0227, 0xFFFF}, {0x0126, 0x080C, 0x0B09, 0xFFFF}, {0x0411, 0x0619, 0x1A0F, 0xFFFF}, {0x0210, 0x0331, 0x0603, 0x0515, 0x0907, 0x012B, 0xFFFF}, {0x0318, 0x002C, 0x0501, 0x0233, 0x0325, 0x071B, 0x002D, 0x081D, 0xFFFF}, {0x0132, 0x0302, 0x0229, 0x110D, 0xFFFF}, {0x0128, 0x041C, 0x0421, 0x0E0B, 0x0A1F, 0xFFFF}, {0x0220, 0x0513, 0x0617, 0xFFFF}, {0x0135, 0x0805, 0x0327, 0xFFFF}, {0x0316, 0x051E, 0x0D0E, 0x0423, 0xFFFF}, {0x0222, 0x0404, 0x070A, 0x0511, 0x0719, 0x0C09, 0x1B0F, 0xFFFF}, {0x0703, 0x0615, 0x0A07, 0x022B, 0xFFFF}, {0x012A, 0x0431, 0x0601, 0x0333, 0x012D, 0x091D, 0xFFFF}, {0x0230, 0x0314, 0x0036, 0x0608, 0x0425, 0x0037, 0x0329, 0x081B, 0x120D, 0xFFFF}, {0x0300, 0x0224, 0x0506, 0x0521, 0x0F0B, 0x0B1F, 0xFFFF}, {0x041A, 0x0613, 0x0717, 0xFFFF}, {0x0235, 0x0905, 0xFFFF}, {0x0312, 0x0134, 0x0523, 0x0427, 0xFFFF}, {0x0226, 0x090C, 0x002E, 0x0611, 0x0D09, 0x002F, 0xFFFF}, {0x0715, 0x0B07, 0x0819, 0x032B, 0x1C0F, 0xFFFF}, {0x0310, 0x0531, 0x0701, 0x0803, 0x022D, 0x0A1D, 0xFFFF}, {0x0418, 0x012C, 0x0433, 0x0525, 0x0137, 0x091B, 0x130D, 0xFFFF}, {0x0232, 0x0402, 0x0621, 0x0429, 0xFFFF}, {0x0228, 0x051C, 0x0713, 0x100B, 0x0C1F, 0xFFFF}, {0x0320, 0x0335, 0x0A05, 0x0817, 0xFFFF}, {0x0623, 0x0527, 0xFFFF}, {0x0416, 0x061E, 0x0E0E, 0x0711, 0x0E09, 0x012F, 0xFFFF}, {0x0322, 0x0504, 0x080A, 0x0919, 0x1D0F, 0xFFFF}, {0x0631, 0x0903, 0x0815, 0x0C07, 0x042B, 0x032D, 0x0B1D, 0xFFFF}, {0x022A, 0x0801, 0x0533, 0x0625, 0x0237, 0x0A1B, 0xFFFF}, {0x0330, 0x0414, 0x0136, 0x0708, 0x0721, 0x0529, 0x140D, 0xFFFF}, {0x0400, 0x0324, 0x0606, 0x0038, 0x0039, 0x110B, 0x0D1F, 0xFFFF}, {0x051A, 0x0813, 0x0B05, 0x0917, 0xFFFF}, {0x0723, 0x0435, 0x0627, 0xFFFF}, {0x0412, 0x0234, 0x0F09, 0x022F, 0xFFFF}, {0x0326, 0x0A0C, 0x012E, 0x0811, 0x0A19, 0x1E0F, 0xFFFF}, {0x0731, 0x0A03, 0x0915, 0x0D07, 0x052B, 0xFFFF}, {0x0410, 0x0901, 0x0633, 0x0725, 0x0337, 0x0B1B, 0x042D, 0x0C1D, 0xFFFF}, {0x0518, 0x022C, 0x0629, 0x150D, 0xFFFF}, {0x0332, 0x0502, 0x0821, 0x0139, 0x120B, 0x0E1F, 0xFFFF}, {0x0328, 0x061C, 0x0913, 0x0A17, 0xFFFF}, {0x0420, 0x0535, 0x0C05, 0x0727, 0xFFFF}, {0x0823, 0x032F, 0xFFFF}, {0x0516, 0x071E, 0x0F0E, 0x0911, 0x0B19, 0x1009, 0x1F0F, 0xFFFF}, {0x0422, 0x0604, 0x090A, 0x0B03, 0x0A15, 0x0E07, 0x062B, 0xFFFF}, {0x0831, 0x0A01, 0x0733, 0x052D, 0x0D1D, 0xFFFF}, {0x032A, 0x0825, 0x0437, 0x0729, 0x0C1B, 0x160D, 0xFFFF}, {0x0430, 0x0514, 0x0236, 0x0808, 0x0921, 0x0239, 0x130B, 0x0F1F, 0xFFFF}, {0x0500, 0x0424, 0x0706, 0x0138, 0x0A13, 0x0B17, 0xFFFF}, {0x061A, 0x0635, 0x0D05, 0xFFFF}, {0x0923, 0x0827, 0xFFFF}, {0x0512, 0x0334, 0x003A, 0x0A11, 0x1109, 0x003B, 0x042F, 0xFFFF}, {0x0426, 0x0B0C, 0x022E, 0x0B15, 0x0F07, 0x0C19, 0x072B, 0xFFFF}, {0x0931, 0x0B01, 0x0C03, 0x062D, 0x0E1D, 0xFFFF}, {0x0510, 0x0833, 0x0925, 0x0537, 0x0D1B, 0x170D, 0xFFFF}, {0x0618, 0x032C, 0x0A21, 0x0339, 0x0829, 0xFFFF}, {0x0432, 0x0602, 0x0B13, 0x140B, 0x101F, 0xFFFF}, {0x0428, 0x071C, 0x0735, 0x0E05, 0x0C17, 0xFFFF}, {0x0520, 0x0A23, 0x0927, 0xFFFF}, {0x0B11, 0x1209, 0x013B, 0x052F, 0xFFFF}, {0x0616, 0x081E, 0x0D19, 0xFFFF}, {0x0522, 0x0704, 0x0A0A, 0x0A31, 0x0D03, 0x0C15, 0x1007, 0x082B, 0x072D, 0x0F1D, 0xFFFF}, {0x0C01, 0x0933, 0x0A25, 0x0637, 0x0E1B, 0xFFFF}, {0x042A, 0x0B21, 0x0929, 0x180D, 0xFFFF}, {0x0530, 0x0614, 0x0336, 0x0908, 0x0439, 0x150B, 0x111F, 0xFFFF}, {0x0600, 0x0524, 0x0806, 0x0238, 0x0C13, 0x0F05, 0x0D17, 0xFFFF}, {0x071A, 0x0B23, 0x0835, 0x0A27, 0xFFFF}, {0x1309, 0x023B, 0x062F, 0xFFFF}, {0x0612, 0x0434, 0x013A, 0x0C11, 0x0E19, 0xFFFF}, {0x0526, 0x0C0C, 0x032E, 0x0B31, 0x0E03, 0x0D15, 0x1107, 0x092B, 0xFFFF}, {0x0D01, 0x0A33, 0x0B25, 0x0737, 0x0F1B, 0x082D, 0x101D, 0xFFFF}, {0x0610, 0x0A29, 0x190D, 0xFFFF}, {0x0718, 0x042C, 0x0C21, 0x0539, 0x160B, 0x121F, 0xFFFF}, {0x0532, 0x0702, 0x0D13, 0x0E17, 0xFFFF}, {0x0528, 0x081C, 0x0935, 0x1005, 0x0B27, 0xFFFF}, {0x0620, 0x0C23, 0x033B, 0x072F, 0xFFFF}, {0x0D11, 0x0F19, 0x1409, 0xFFFF}, {0x0716, 0x003C, 0x091E, 0x0F03, 0x0E15, 0x1207, 0x0A2B, 0x003D, 0xFFFF}, {0x0622, 0x0804, 0x0B0A, 0x0C31, 0x0E01, 0x0B33, 0x092D, 0x111D, 0xFFFF}, {0x0C25, 0x0837, 0x0B29, 0x101B, 0x1A0D, 0xFFFF}, {0x052A, 0x0D21, 0x0639, 0x170B, 0x131F, 0xFFFF}, {0x0630, 0x0714, 0x0436, 0x0A08, 0x0E13, 0x0F17, 0xFFFF}, {0x0700, 0x0624, 0x0906, 0x0338, 0x0A35, 0x1105, 0xFFFF}, {0x081A, 0x0D23, 0x0C27, 0xFFFF}, {0x0E11, 0x1509, 0x043B, 0x082F, 0xFFFF}, {0x0712, 0x0534, 0x023A, 0x0F15, 0x1307, 0x1019, 0x0B2B, 0x013D, 0xFFFF}, {0x0626, 0x0D0C, 0x042E, 0x0D31, 0x0F01, 0x1003, 0x0A2D, 0x121D, 0xFFFF}, {0x0C33, 0x0D25, 0x0937, 0x111B, 0x1B0D, 0xFFFF}, {0x0710, 0x0E21, 0x0739, 0x0C29, 0xFFFF}, {0x0818, 0x052C, 0x0F13, 0x180B, 0x141F, 0xFFFF}, {0x0632, 0x0802, 0x0B35, 0x1205, 0x1017, 0xFFFF}, {0x0628, 0x091C, 0x0E23, 0x0D27, 0xFFFF}, {0x0720, 0x0F11, 0x1609, 0x053B, 0x092F, 0xFFFF}, {0x1119, 0x023D, 0xFFFF}, {0x0816, 0x013C, 0x0A1E, 0x0E31, 0x1103, 0x1015, 0x1407, 0x0C2B, 0x0B2D, 0x131D, 0xFFFF}, {0x0722, 0x0904, 0x0C0A, 0x1001, 0x0D33, 0x0E25, 0x0A37, 0x121B, 0xFFFF}, {0x0F21, 0x0D29, 0x1C0D, 0xFFFF}, {0x062A, 0x0839, 0x190B, 0x151F, 0xFFFF}, {0x0730, 0x0814, 0x0536, 0x0B08, 0x1013, 0x1305, 0x1117, 0xFFFF}, {0x0800, 0x0724, 0x0A06, 0x0438, 0x0F23, 0x0C35, 0x0E27, 0xFFFF}, {0x091A, 0x1709, 0x063B, 0x0A2F, 0xFFFF}, {0x1011, 0x1219, 0x033D, 0xFFFF}, {0x0812, 0x0634, 0x033A, 0x0F31, 0x1203, 0x1115, 0x1507, 0x0D2B, 0xFFFF}, {0x0726, 0x0E0C, 0x052E, 0x1101, 0x0E33, 0x0F25, 0x0B37, 0x131B, 0x0C2D, 0x141D, 0xFFFF}, {0x0E29, 0x1D0D, 0xFFFF}, {0x0810, 0x1021, 0x0939, 0x1A0B, 0x161F, 0xFFFF}, {0x0918, 0x062C, 0x1113, 0x1217, 0xFFFF}, {0x0732, 0x0902, 0x0D35, 0x1405, 0x0F27, 0xFFFF}, {0x0728, 0x0A1C, 0x1023, 0x073B, 0x0B2F, 0xFFFF}, {0x0820, 0x1111, 0x1319, 0x1809, 0xFFFF}, {0x1303, 0x1215, 0x1607, 0x0E2B, 0x043D, 0xFFFF}, {0x0916, 0x023C, 0x0B1E, 0x1031, 0x1201, 0x0F33, 0x0D2D, 0x151D, 0xFFFF}, {0x0822, 0x0A04, 0x0D0A, 0x1025, 0x0C37, 0x0F29, 0x141B, 0x1E0D, 0xFFFF}, {0x1121, 0x0A39, 0x1B0B, 0x171F, 0xFFFF}, {0x072A, 0x1213, 0x1317, 0xFFFF}, {0x0830, 0x0914, 0x0636, 0x0C08, 0x0E35, 0x1505, 0xFFFF}, {0x0900, 0x0824, 0x0B06, 0x0538, 0x1123, 0x1027, 0xFFFF}, {0x0A1A, 0x1211, 0x1909, 0x083B, 0x0C2F, 0xFFFF}, {0x1315, 0x1707, 0x1419, 0x0F2B, 0x053D, 0xFFFF}, {0x0912, 0x0734, 0x043A, 0x1131, 0x1301, 0x1403, 0x0E2D, 0x161D, 0xFFFF}, {0x0826, 0x0F0C, 0x062E, 0x1033, 0x1125, 0x0D37, 0x151B, 0x1F0D, 0xFFFF}, {0x1221, 0x0B39, 0x1029, 0xFFFF}, {0x0910, 0x1313, 0x1C0B, 0x181F, 0xFFFF}, {0x0A18, 0x072C, 0x0F35, 0x1605, 0x1417, 0xFFFF}, {0x0832, 0x0A02, 0x1223, 0x1127, 0xFFFF}, {0x0828, 0x0B1C, 0x1311, 0x1A09, 0x093B, 0x0D2F, 0xFFFF}, {0x0920, 0x1519, 0x063D, 0xFFFF}, {0x1231, 0x1503, 0x1415, 0x1807, 0x102B, 0x0F2D, 0x171D, 0xFFFF}, {0x0A16, 0x033C, 0x0C1E, 0x1401, 0x1133, 0x1225, 0x0E37, 0x161B, 0xFFFF}, {0x0922, 0x0B04, 0x0E0A, 0x1321, 0x1129, 0xFFFF}, {0x0C39, 0x1D0B, 0x191F, 0xFFFF}, {0x082A, 0x1413, 0x1705, 0x1517, 0xFFFF}, {0x0930, 0x0A14, 0x0736, 0x0D08, 0x1323, 0x1035, 0x1227, 0xFFFF}, {0x0A00, 0x0924, 0x0C06, 0x0638, 0x1B09, 0x0A3B, 0x0E2F, 0xFFFF}, {0x0B1A, 0x1411, 0x1619, 0x073D, 0xFFFF}, {0x1331, 0x1603, 0x1515, 0x1907, 0x112B, 0xFFFF}, {0x0A12, 0x0834, 0x053A, 0x1501, 0x1233, 0x1325, 0x0F37, 0x171B, 0x102D, 0x181D, 0xFFFF}, {0x0926, 0x072E, 0x1229, 0xFFFF}, {0x1421, 0x0D39, 0x1E0B, 0x1A1F, 0xFFFF}, {0x0A10, 0x1513, 0x1617, 0xFFFF}, {0x0B18, 0x082C, 0x1135, 0x1805, 0x1327, 0xFFFF}, {0x0932, 0x0B02, 0x1423, 0x0B3B, 0x0F2F, 0xFFFF}, {0x0928, 0x0C1C, 0x1511, 0x1719, 0x1C09, 0xFFFF}, {0x0A20, 0x1703, 0x1615, 0x1A07, 0x122B, 0x083D, 0xFFFF}, {0x1431, 0x1601, 0x1333, 0x112D, 0x191D, 0xFFFF}, {0x0B16, 0x043C, 0x0D1E, 0x1425, 0x1037, 0x1329, 0x181B, 0xFFFF}, {0x0A22, 0x0C04, 0x0F0A, 0x1521, 0x0E39, 0x1F0B, 0x1B1F, 0xFFFF}, {0x1613, 0x1717, 0xFFFF}, {0x092A, 0x1235, 0x1905, 0xFFFF}, {0x0A30, 0x0B14, 0x0836, 0x0E08, 0x1523, 0x1427, 0xFFFF}, {0x0B00, 0x0A24, 0x0D06, 0x0738, 0x1611, 0x1D09, 0x0C3B, 0x102F, 0xFFFF}, {0x0C1A, 0x1715, 0x1B07, 0x1819, 0x132B, 0x093D, 0xFFFF}, {0x1531, 0x1701, 0x1803, 0x122D, 0x1A1D, 0xFFFF}, {0x0B12, 0x0934, 0x063A, 0x1433, 0x1525, 0x1137, 0x191B, 0xFFFF}, {0x0A26, 0x003E, 0x082E, 0x1621, 0x0F39, 0x1429, 0x003F, 0xFFFF}, {0x1713, 0x1C1F, 0xFFFF}, {0x0B10, 0x1335, 0x1A05, 0x1817, 0xFFFF}, {0x0C18, 0x092C, 0x1623, 0x1527, 0xFFFF}, {0x0A32, 0x0C02, 0x1711, 0x1E09, 0x0D3B, 0x112F, 0xFFFF}, {0x0A28, 0x0D1C, 0x1919, 0x0A3D, 0xFFFF}, {0x0B20, 0x1631, 0x1903, 0x1815, 0x1C07, 0x142B, 0x132D, 0x1B1D, 0xFFFF}, {0x1801, 0x1533, 0x1625, 0x1237, 0x1A1B, 0xFFFF}, {0x0C16, 0x053C, 0x0E1E, 0x1721, 0x1529, 0x013F, 0xFFFF}, {0x0B22, 0x0D04, 0x1039, 0x1D1F, 0xFFFF}, {0x1813, 0x1B05, 0x1917, 0xFFFF}, {0x0A2A, 0x1723, 0x1435, 0x1627, 0xFFFF}, {0x0B30, 0x0C14, 0x0936, 0x0F08, 0x1F09, 0x0E3B, 0x122F, 0xFFFF}, {0x0C00, 0x0B24, 0x0E06, 0x0838, 0x1811, 0x1A19, 0x0B3D, 0xFFFF}, {0x0D1A, 0x1731, 0x1A03, 0x1915, 0x1D07, 0x152B, 0xFFFF}, {0x1901, 0x1633, 0x1725, 0x1337, 0x1B1B, 0x142D, 0x1C1D, 0xFFFF}, {0x0C12, 0x0A34, 0x073A, 0x1629, 0x023F, 0xFFFF}, {0x0B26, 0x013E, 0x092E, 0x1821, 0x1139, 0x1E1F, 0xFFFF}, {0x1913, 0x1A17, 0xFFFF}, {0x0C10, 0x1535, 0x1C05, 0x1727, 0xFFFF}, {0x0D18, 0x0A2C, 0x1823, 0x0F3B, 0x132F, 0xFFFF}, {0x0B32, 0x0D02, 0x1911, 0x1B19, 0xFFFF}, {0x0B28, 0x0E1C, 0x1B03, 0x1A15, 0x1E07, 0x162B, 0x0C3D, 0xFFFF}, {0x0C20, 0x1831, 0x1A01, 0x1733, 0x152D, 0x1D1D, 0xFFFF}, {0x1825, 0x1437, 0x1729, 0x1C1B, 0x033F, 0xFFFF}, {0x0D16, 0x063C, 0x0F1E, 0x1921, 0x1239, 0x1F1F, 0xFFFF}, {0x0C22, 0x0E04, 0x1A13, 0x1B17, 0xFFFF}, {0x1635, 0x1D05, 0xFFFF}, {0x0B2A, 0x1923, 0x1827, 0xFFFF}, {0x0C30, 0x0D14, 0x0A36, 0x1A11, 0x103B, 0x142F, 0xFFFF}, {0x0D00, 0x0C24, 0x0F06, 0x0938, 0x1B15, 0x1F07, 0x1C19, 0x172B, 0x0D3D, 0xFFFF}, {0x0E1A, 0x1931, 0x1B01, 0x1C03, 0x162D, 0x1E1D, 0xFFFF}, {0x1833, 0x1925, 0x1537, 0x1D1B, 0xFFFF}, {0x0D12, 0x0B34, 0x083A, 0x1A21, 0x1339, 0x1829, 0x043F, 0xFFFF}, {0x0C26, 0x023E, 0x0A2E, 0x1B13, 0xFFFF}, {0x1735, 0x1E05, 0x1C17, 0xFFFF}, {0x0D10, 0x1A23, 0x1927, 0xFFFF}, {0x0E18, 0x0B2C, 0x1B11, 0x113B, 0x152F, 0xFFFF}, {0x0C32, 0x0E02, 0x1D19, 0x0E3D, 0xFFFF}, {0x0C28, 0x0F1C, 0x1A31, 0x1D03, 0x1C15, 0x182B, 0x172D, 0x1F1D, 0xFFFF}, {0x0D20, 0x1C01, 0x1933, 0x1A25, 0x1637, 0x1E1B, 0xFFFF}, {0x1B21, 0x1929, 0x053F, 0xFFFF}, {0x0E16, 0x073C, 0x1439, 0xFFFF}, {0x0D22, 0x0F04, 0x1C13, 0x1F05, 0x1D17, 0xFFFF}, {0x1B23, 0x1835, 0x1A27, 0xFFFF}, {0x0C2A, 0x123B, 0x162F, 0xFFFF}, {0x0D30, 0x0E14, 0x0B36, 0x1C11, 0x1E19, 0x0F3D, 0xFFFF}, {0x0E00, 0x0D24, 0x0A38, 0x1B31, 0x1E03, 0x1D15, 0x192B, 0xFFFF}, {0x0F1A, 0x1D01, 0x1A33, 0x1B25, 0x1737, 0x1F1B, 0x182D, 0xFFFF}, {0x1A29, 0x063F, 0xFFFF}, {0x0E12, 0x0C34, 0x093A, 0x1C21, 0x1539, 0xFFFF}, {0x0D26, 0x033E, 0x0B2E, 0x1D13, 0x1E17, 0xFFFF}, {0x1935, 0x1B27, 0xFFFF}, {0x0E10, 0x1C23, 0x133B, 0x172F, 0xFFFF}, {0x0F18, 0x0C2C, 0x1D11, 0x1F19, 0xFFFF}, {0x0D32, 0x0F02, 0x1F03, 0x1E15, 0x1A2B, 0x103D, 0xFFFF}, {0x0D28, 0x1C31, 0x1E01, 0x1B33, 0x192D, 0xFFFF}, {0x0E20, 0x1C25, 0x1837, 0x1B29, 0x073F, 0xFFFF}, {0x1D21, 0x1639, 0xFFFF}, {0x0F16, 0x083C, 0x1E13, 0x1F17, 0xFFFF}, {0x0E22, 0x1A35, 0xFFFF}, {0x1D23, 0x1C27, 0xFFFF}, {0x0D2A, 0x1E11, 0x143B, 0x182F, 0xFFFF}, {0x0E30, 0x0F14, 0x0C36, 0x1F15, 0x1B2B, 0x113D, 0xFFFF}, {0x0F00, 0x0E24, 0x0B38, 0x1D31, 0x1F01, 0x1A2D, 0xFFFF}, {0x1C33, 0x1D25, 0x1937, 0xFFFF}, {0x1E21, 0x1739, 0x1C29, 0x083F, 0xFFFF}, {0x0F12, 0x0D34, 0x0A3A, 0x1F13, 0xFFFF}, {0x0E26, 0x043E, 0x0C2E, 0x1B35, 0xFFFF}, {0x1E23, 0x1D27, 0xFFFF}, {0x0F10, 0x1F11, 0x153B, 0x192F, 0xFFFF}, {0x0D2C, 0x123D, 0xFFFF}, }; struct etc1_block { // big endian uint64: // bit ofs: 56 48 40 32 24 16 8 0 // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 union { uint64 m_uint64; uint8 m_bytes[8]; }; uint8 m_low_color[2]; uint8 m_high_color[2]; enum { cNumSelectorBytes = 4 }; uint8 m_selectors[cNumSelectorBytes]; inline void clear() { zero_this(this); } inline uint get_byte_bits(uint ofs, uint num) const { RG_ETC1_ASSERT((ofs + num) <= 64U); RG_ETC1_ASSERT(num && (num <= 8U)); RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3)); const uint byte_ofs = 7 - (ofs >> 3); const uint byte_bit_ofs = ofs & 7; return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1); } inline void set_byte_bits(uint ofs, uint num, uint bits) { RG_ETC1_ASSERT((ofs + num) <= 64U); RG_ETC1_ASSERT(num && (num < 32U)); RG_ETC1_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3)); RG_ETC1_ASSERT(bits < (1U << num)); const uint byte_ofs = 7 - (ofs >> 3); const uint byte_bit_ofs = ofs & 7; const uint mask = (1 << num) - 1; m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs); m_bytes[byte_ofs] |= (bits << byte_bit_ofs); } // false = left/right subblocks // true = upper/lower subblocks inline bool get_flip_bit() const { return (m_bytes[3] & 1) != 0; } inline void set_flip_bit(bool flip) { m_bytes[3] &= ~1; m_bytes[3] |= static_cast(flip); } inline bool get_diff_bit() const { return (m_bytes[3] & 2) != 0; } inline void set_diff_bit(bool diff) { m_bytes[3] &= ~2; m_bytes[3] |= (static_cast(diff) << 1); } // Returns intensity modifier table (0-7) used by subblock subblock_id. // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2) inline uint get_inten_table(uint subblock_id) const { RG_ETC1_ASSERT(subblock_id < 2); const uint ofs = subblock_id ? 2 : 5; return (m_bytes[3] >> ofs) & 7; } // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1) inline void set_inten_table(uint subblock_id, uint t) { RG_ETC1_ASSERT(subblock_id < 2); RG_ETC1_ASSERT(t < 8); const uint ofs = subblock_id ? 2 : 5; m_bytes[3] &= ~(7 << ofs); m_bytes[3] |= (t << ofs); } // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. inline uint get_selector(uint x, uint y) const { RG_ETC1_ASSERT((x | y) < 4); const uint bit_index = x * 4 + y; const uint byte_bit_ofs = bit_index & 7; const uint8* p = &m_bytes[7 - (bit_index >> 3)]; const uint lsb = (p[0] >> byte_bit_ofs) & 1; const uint msb = (p[-2] >> byte_bit_ofs) & 1; const uint val = lsb | (msb << 1); return g_etc1_to_selector_index[val]; } // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables. inline void set_selector(uint x, uint y, uint val) { RG_ETC1_ASSERT((x | y | val) < 4); const uint bit_index = x * 4 + y; uint8* p = &m_bytes[7 - (bit_index >> 3)]; const uint byte_bit_ofs = bit_index & 7; const uint mask = 1 << byte_bit_ofs; const uint etc1_val = g_selector_index_to_etc1[val]; const uint lsb = etc1_val & 1; const uint msb = etc1_val >> 1; p[0] &= ~mask; p[0] |= (lsb << byte_bit_ofs); p[-2] &= ~mask; p[-2] |= (msb << byte_bit_ofs); } inline void set_base4_color(uint idx, uint16 c) { if (idx) { set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15); set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15); set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15); } else { set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15); set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15); set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15); } } inline uint16 get_base4_color(uint idx) const { uint r, g, b; if (idx) { r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4); g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4); b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4); } else { r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4); g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4); b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4); } return static_cast(b | (g << 4U) | (r << 8U)); } inline void set_base5_color(uint16 c) { set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31); set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31); set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31); } inline uint16 get_base5_color() const { const uint r = get_byte_bits(cETC1BaseColor5RBitOffset, 5); const uint g = get_byte_bits(cETC1BaseColor5GBitOffset, 5); const uint b = get_byte_bits(cETC1BaseColor5BBitOffset, 5); return static_cast(b | (g << 5U) | (r << 10U)); } void set_delta3_color(uint16 c) { set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7); set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7); set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7); } inline uint16 get_delta3_color() const { const uint r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3); const uint g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3); const uint b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); return static_cast(b | (g << 3U) | (r << 6U)); } // Base color 5 static uint16 pack_color5(const color_quad_u8& color, bool scaled, uint bias = 127U); static uint16 pack_color5(uint r, uint g, uint b, bool scaled, uint bias = 127U); static color_quad_u8 unpack_color5(uint16 packed_color5, bool scaled, uint alpha = 255U); static void unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled); static bool unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U); static bool unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U); // Delta color 3 // Inputs range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) static uint16 pack_delta3(int r, int g, int b); // Results range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) static void unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3); // Abs color 4 static uint16 pack_color4(const color_quad_u8& color, bool scaled, uint bias = 127U); static uint16 pack_color4(uint r, uint g, uint b, bool scaled, uint bias = 127U); static color_quad_u8 unpack_color4(uint16 packed_color4, bool scaled, uint alpha = 255U); static void unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled); // subblock colors static void get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx); static bool get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx); static void get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx); static inline void unscaled_to_scaled_color(color_quad_u8& dst, const color_quad_u8& src, bool color4) { if (color4) { dst.r = src.r | (src.r << 4); dst.g = src.g | (src.g << 4); dst.b = src.b | (src.b << 4); } else { dst.r = (src.r >> 2) | (src.r << 3); dst.g = (src.g >> 2) | (src.g << 3); dst.b = (src.b >> 2) | (src.b << 3); } dst.a = src.a; } }; // Returns pointer to sorted array. template T* indirect_radix_sort(uint num_indices, T* pIndices0, T* pIndices1, const Q* pKeys, uint key_ofs, uint key_size, bool init_indices) { RG_ETC1_ASSERT((key_ofs >= 0) && (key_ofs < sizeof(T))); RG_ETC1_ASSERT((key_size >= 1) && (key_size <= 4)); if (init_indices) { T* p = pIndices0; T* q = pIndices0 + (num_indices >> 1) * 2; uint i; for (i = 0; p != q; p += 2, i += 2) { p[0] = static_cast(i); p[1] = static_cast(i + 1); } if (num_indices & 1) *p = static_cast(i); } uint hist[256 * 4]; memset(hist, 0, sizeof(hist[0]) * 256 * key_size); #define RG_ETC1_GET_KEY(p) (*(const uint*)((const uint8*)(pKeys + *(p)) + key_ofs)) #define RG_ETC1_GET_KEY_FROM_INDEX(i) (*(const uint*)((const uint8*)(pKeys + (i)) + key_ofs)) if (key_size == 4) { T* p = pIndices0; T* q = pIndices0 + num_indices; for (; p != q; p++) { const uint key = RG_ETC1_GET_KEY(p); hist[key & 0xFF]++; hist[256 + ((key >> 8) & 0xFF)]++; hist[512 + ((key >> 16) & 0xFF)]++; hist[768 + ((key >> 24) & 0xFF)]++; } } else if (key_size == 3) { T* p = pIndices0; T* q = pIndices0 + num_indices; for (; p != q; p++) { const uint key = RG_ETC1_GET_KEY(p); hist[key & 0xFF]++; hist[256 + ((key >> 8) & 0xFF)]++; hist[512 + ((key >> 16) & 0xFF)]++; } } else if (key_size == 2) { T* p = pIndices0; T* q = pIndices0 + (num_indices >> 1) * 2; for (; p != q; p += 2) { const uint key0 = RG_ETC1_GET_KEY(p); const uint key1 = RG_ETC1_GET_KEY(p + 1); hist[key0 & 0xFF]++; hist[256 + ((key0 >> 8) & 0xFF)]++; hist[key1 & 0xFF]++; hist[256 + ((key1 >> 8) & 0xFF)]++; } if (num_indices & 1) { const uint key = RG_ETC1_GET_KEY(p); hist[key & 0xFF]++; hist[256 + ((key >> 8) & 0xFF)]++; } } else { RG_ETC1_ASSERT(key_size == 1); if (key_size != 1) return NULL; T* p = pIndices0; T* q = pIndices0 + (num_indices >> 1) * 2; for (; p != q; p += 2) { const uint key0 = RG_ETC1_GET_KEY(p); const uint key1 = RG_ETC1_GET_KEY(p + 1); hist[key0 & 0xFF]++; hist[key1 & 0xFF]++; } if (num_indices & 1) { const uint key = RG_ETC1_GET_KEY(p); hist[key & 0xFF]++; } } T* pCur = pIndices0; T* pNew = pIndices1; for (uint pass = 0; pass < key_size; pass++) { const uint* pHist = &hist[pass << 8]; uint offsets[256]; uint cur_ofs = 0; for (uint i = 0; i < 256; i += 2) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; offsets[i + 1] = cur_ofs; cur_ofs += pHist[i + 1]; } const uint pass_shift = pass << 3; T* p = pCur; T* q = pCur + (num_indices >> 1) * 2; for (; p != q; p += 2) { uint index0 = p[0]; uint index1 = p[1]; uint c0 = (RG_ETC1_GET_KEY_FROM_INDEX(index0) >> pass_shift) & 0xFF; uint c1 = (RG_ETC1_GET_KEY_FROM_INDEX(index1) >> pass_shift) & 0xFF; if (c0 == c1) { uint dst_offset0 = offsets[c0]; offsets[c0] = dst_offset0 + 2; pNew[dst_offset0] = static_cast(index0); pNew[dst_offset0 + 1] = static_cast(index1); } else { uint dst_offset0 = offsets[c0]++; uint dst_offset1 = offsets[c1]++; pNew[dst_offset0] = static_cast(index0); pNew[dst_offset1] = static_cast(index1); } } if (num_indices & 1) { uint index = *p; uint c = (RG_ETC1_GET_KEY_FROM_INDEX(index) >> pass_shift) & 0xFF; uint dst_offset = offsets[c]; offsets[c] = dst_offset + 1; pNew[dst_offset] = static_cast(index); } T* t = pCur; pCur = pNew; pNew = t; } return pCur; } #undef RG_ETC1_GET_KEY #undef RG_ETC1_GET_KEY_FROM_INDEX uint16 etc1_block::pack_color5(const color_quad_u8& color, bool scaled, uint bias) { return pack_color5(color.r, color.g, color.b, scaled, bias); } uint16 etc1_block::pack_color5(uint r, uint g, uint b, bool scaled, uint bias) { if (scaled) { r = (r * 31U + bias) / 255U; g = (g * 31U + bias) / 255U; b = (b * 31U + bias) / 255U; } r = rg_etc1::minimum(r, 31U); g = rg_etc1::minimum(g, 31U); b = rg_etc1::minimum(b, 31U); return static_cast(b | (g << 5U) | (r << 10U)); } color_quad_u8 etc1_block::unpack_color5(uint16 packed_color5, bool scaled, uint alpha) { uint b = packed_color5 & 31U; uint g = (packed_color5 >> 5U) & 31U; uint r = (packed_color5 >> 10U) & 31U; if (scaled) { b = (b << 3U) | (b >> 2U); g = (g << 3U) | (g >> 2U); r = (r << 3U) | (r >> 2U); } return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U)); } void etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, bool scaled) { color_quad_u8 c(unpack_color5(packed_color5, scaled, 0)); r = c.r; g = c.g; b = c.b; } bool etc1_block::unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha) { int dc_r, dc_g, dc_b; unpack_delta3(dc_r, dc_g, dc_b, packed_delta3); int b = (packed_color5 & 31U) + dc_b; int g = ((packed_color5 >> 5U) & 31U) + dc_g; int r = ((packed_color5 >> 10U) & 31U) + dc_r; bool success = true; if (static_cast(r | g | b) > 31U) { success = false; r = rg_etc1::clamp(r, 0, 31); g = rg_etc1::clamp(g, 0, 31); b = rg_etc1::clamp(b, 0, 31); } if (scaled) { b = (b << 3U) | (b >> 2U); g = (g << 3U) | (g >> 2U); r = (r << 3U) | (r >> 2U); } result.set_noclamp_rgba(r, g, b, rg_etc1::minimum(alpha, 255U)); return success; } bool etc1_block::unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha) { color_quad_u8 result; const bool success = unpack_color5(result, packed_color5, packed_delta3, scaled, alpha); r = result.r; g = result.g; b = result.b; return success; } uint16 etc1_block::pack_delta3(int r, int g, int b) { RG_ETC1_ASSERT((r >= cETC1ColorDeltaMin) && (r <= cETC1ColorDeltaMax)); RG_ETC1_ASSERT((g >= cETC1ColorDeltaMin) && (g <= cETC1ColorDeltaMax)); RG_ETC1_ASSERT((b >= cETC1ColorDeltaMin) && (b <= cETC1ColorDeltaMax)); if (r < 0) r += 8; if (g < 0) g += 8; if (b < 0) b += 8; return static_cast(b | (g << 3) | (r << 6)); } void etc1_block::unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3) { r = (packed_delta3 >> 6) & 7; g = (packed_delta3 >> 3) & 7; b = packed_delta3 & 7; if (r >= 4) r -= 8; if (g >= 4) g -= 8; if (b >= 4) b -= 8; } uint16 etc1_block::pack_color4(const color_quad_u8& color, bool scaled, uint bias) { return pack_color4(color.r, color.g, color.b, scaled, bias); } uint16 etc1_block::pack_color4(uint r, uint g, uint b, bool scaled, uint bias) { if (scaled) { r = (r * 15U + bias) / 255U; g = (g * 15U + bias) / 255U; b = (b * 15U + bias) / 255U; } r = rg_etc1::minimum(r, 15U); g = rg_etc1::minimum(g, 15U); b = rg_etc1::minimum(b, 15U); return static_cast(b | (g << 4U) | (r << 8U)); } color_quad_u8 etc1_block::unpack_color4(uint16 packed_color4, bool scaled, uint alpha) { uint b = packed_color4 & 15U; uint g = (packed_color4 >> 4U) & 15U; uint r = (packed_color4 >> 8U) & 15U; if (scaled) { b = (b << 4U) | b; g = (g << 4U) | g; r = (r << 4U) | r; } return color_quad_u8(cNoClamp, r, g, b, rg_etc1::minimum(alpha, 255U)); } void etc1_block::unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled) { color_quad_u8 c(unpack_color4(packed_color4, scaled, 0)); r = c.r; g = c.g; b = c.b; } void etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx) { RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues); const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; uint r, g, b; unpack_color5(r, g, b, packed_color5, true); const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); const int y0 = pInten_modifer_table[0]; pDst[0].set(ir + y0, ig + y0, ib + y0); const int y1 = pInten_modifer_table[1]; pDst[1].set(ir + y1, ig + y1, ib + y1); const int y2 = pInten_modifer_table[2]; pDst[2].set(ir + y2, ig + y2, ib + y2); const int y3 = pInten_modifer_table[3]; pDst[3].set(ir + y3, ig + y3, ib + y3); } bool etc1_block::get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx) { RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues); const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; uint r, g, b; bool success = unpack_color5(r, g, b, packed_color5, packed_delta3, true); const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); const int y0 = pInten_modifer_table[0]; pDst[0].set(ir + y0, ig + y0, ib + y0); const int y1 = pInten_modifer_table[1]; pDst[1].set(ir + y1, ig + y1, ib + y1); const int y2 = pInten_modifer_table[2]; pDst[2].set(ir + y2, ig + y2, ib + y2); const int y3 = pInten_modifer_table[3]; pDst[3].set(ir + y3, ig + y3, ib + y3); return success; } void etc1_block::get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx) { RG_ETC1_ASSERT(table_idx < cETC1IntenModifierValues); const int* pInten_modifer_table = &g_etc1_inten_tables[table_idx][0]; uint r, g, b; unpack_color4(r, g, b, packed_color4, true); const int ir = static_cast(r), ig = static_cast(g), ib = static_cast(b); const int y0 = pInten_modifer_table[0]; pDst[0].set(ir + y0, ig + y0, ib + y0); const int y1 = pInten_modifer_table[1]; pDst[1].set(ir + y1, ig + y1, ib + y1); const int y2 = pInten_modifer_table[2]; pDst[2].set(ir + y2, ig + y2, ib + y2); const int y3 = pInten_modifer_table[3]; pDst[3].set(ir + y3, ig + y3, ib + y3); } bool unpack_etc1_block(const void* pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha) { color_quad_u8* pDst = reinterpret_cast(pDst_pixels_rgba); const etc1_block& block = *static_cast(pETC1_block); const bool diff_flag = block.get_diff_bit(); const bool flip_flag = block.get_flip_bit(); const uint table_index0 = block.get_inten_table(0); const uint table_index1 = block.get_inten_table(1); color_quad_u8 subblock_colors0[4]; color_quad_u8 subblock_colors1[4]; bool success = true; if (diff_flag) { const uint16 base_color5 = block.get_base5_color(); const uint16 delta_color3 = block.get_delta3_color(); etc1_block::get_diff_subblock_colors(subblock_colors0, base_color5, table_index0); if (!etc1_block::get_diff_subblock_colors(subblock_colors1, base_color5, delta_color3, table_index1)) success = false; } else { const uint16 base_color4_0 = block.get_base4_color(0); etc1_block::get_abs_subblock_colors(subblock_colors0, base_color4_0, table_index0); const uint16 base_color4_1 = block.get_base4_color(1); etc1_block::get_abs_subblock_colors(subblock_colors1, base_color4_1, table_index1); } if (preserve_alpha) { if (flip_flag) { for (uint y = 0; y < 2; y++) { pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); pDst[2].set_rgb(subblock_colors0[block.get_selector(2, y)]); pDst[3].set_rgb(subblock_colors0[block.get_selector(3, y)]); pDst += 4; } for (uint y = 2; y < 4; y++) { pDst[0].set_rgb(subblock_colors1[block.get_selector(0, y)]); pDst[1].set_rgb(subblock_colors1[block.get_selector(1, y)]); pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); pDst += 4; } } else { for (uint y = 0; y < 4; y++) { pDst[0].set_rgb(subblock_colors0[block.get_selector(0, y)]); pDst[1].set_rgb(subblock_colors0[block.get_selector(1, y)]); pDst[2].set_rgb(subblock_colors1[block.get_selector(2, y)]); pDst[3].set_rgb(subblock_colors1[block.get_selector(3, y)]); pDst += 4; } } } else { if (flip_flag) { // 0000 // 0000 // 1111 // 1111 for (uint y = 0; y < 2; y++) { pDst[0] = subblock_colors0[block.get_selector(0, y)]; pDst[1] = subblock_colors0[block.get_selector(1, y)]; pDst[2] = subblock_colors0[block.get_selector(2, y)]; pDst[3] = subblock_colors0[block.get_selector(3, y)]; pDst += 4; } for (uint y = 2; y < 4; y++) { pDst[0] = subblock_colors1[block.get_selector(0, y)]; pDst[1] = subblock_colors1[block.get_selector(1, y)]; pDst[2] = subblock_colors1[block.get_selector(2, y)]; pDst[3] = subblock_colors1[block.get_selector(3, y)]; pDst += 4; } } else { // 0011 // 0011 // 0011 // 0011 for (uint y = 0; y < 4; y++) { pDst[0] = subblock_colors0[block.get_selector(0, y)]; pDst[1] = subblock_colors0[block.get_selector(1, y)]; pDst[2] = subblock_colors1[block.get_selector(2, y)]; pDst[3] = subblock_colors1[block.get_selector(3, y)]; pDst += 4; } } } return success; } bool unpack_etc2_color(const void* pBlock, unsigned int* pDst_pixels_rgba, bool preserve_alpha) { if (unpack_etc1_block(pBlock, pDst_pixels_rgba, preserve_alpha)) return true; color_quad_u8* pDst = reinterpret_cast(pDst_pixels_rgba); const etc1_block& block = *static_cast(pBlock); const uint8* B = block.m_bytes; const bool rOverflow = ((int8(B[0] << 5) >> 5) + (B[0] >> 3)) & 0x20; const bool gOverflow = ((int8(B[1] << 5) >> 5) + (B[1] >> 3)) & 0x20; if (rOverflow || gOverflow) { color_quad_u8 block_colors[4]; uint8 unpacked[3]; if (rOverflow) { unpacked[0] = (B[0] & 0x3) | (B[0] >> 1 & 0xC) | (B[2] & 0xF0); unpacked[1] = B[1] >> 4 | B[2] << 4; unpacked[2] = (B[1] & 0xF) | (B[3] & 0xF0); uint8 delta = g_etc2_modifier_table[(B[3] & 1) | (B[3] >> 1 & 6)]; for (uint c = 0; c < 3; c++) { block_colors[2][c] = unpacked[c] << 4 | (unpacked[c] & 0xF); block_colors[1][c] = unpacked[c] >> 4 | (unpacked[c] & 0xF0); block_colors[0][c] = math::maximum(0, block_colors[1][c] - delta); block_colors[3][c] = math::minimum(255, block_colors[1][c] + delta); } } else { unpacked[0] = (B[0] >> 3 & 0xF) | (B[2] << 1 & 0xF0); unpacked[1] = (B[1] >> 4 & 0x1) | (B[0] << 1 & 0xE) | (B[3] >> 3 & 0x10) | B[2] << 5; unpacked[2] = B[2] >> 7 | (B[1] << 1 & 0x6) | (B[1] & 0x8) | (B[3] << 1 & 0xF0); uint8 modifier = (B[3] & 4) | (B[3] << 1 & 2) | 1; for (int d = 0, c = 0; !d && c < 3; c++, modifier &= d < 0 ? 6 : 7) d = (unpacked[c] & 0xF) - (unpacked[c] >> 4); uint8 delta = g_etc2_modifier_table[modifier]; for (uint c = 0; c < 3; c++) { uint8 c0 = unpacked[c] << 4 | (unpacked[c] & 0xF); uint8 c1 = unpacked[c] >> 4 | (unpacked[c] & 0xF0); block_colors[0][c] = math::maximum(0, c1 - delta); block_colors[1][c] = math::minimum(255, c1 + delta); block_colors[2][c] = math::minimum(255, c0 + delta); block_colors[3][c] = math::maximum(0, c0 - delta); } } for (uint i = 0; i < 4; i++) { for (uint j = 0; j < 4; j++, pDst++) { pDst->set_rgb(block_colors[block.get_selector(j, i)]); if (!preserve_alpha) pDst->a = 255; } } } else { int16 base[3], dj[3], di[3], color[3]; base[0] = (B[0] << 1 & 0xFC) | (B[0] >> 5 & 3); base[1] = (B[0] << 7 & 0x80) | (B[1] & 0x7E) | (B[0] & 1); base[2] = (B[1] << 7 & 0x80) | (B[2] << 2 & 0x60) | (B[2] << 3 & 0x18) | (B[3] >> 5 & 4) | (B[1] << 1 & 2) | (B[2] >> 4 & 1); di[0] = ((B[5] << 5 & 0xE0) | (B[6] >> 3 & 0x1C) | (B[5] >> 1 & 0x3)) - base[0]; di[1] = ((B[6] << 3 & 0xF8) | (B[7] >> 5 & 0x6) | (B[6] >> 4 & 0x1)) - base[1]; di[2] = ((B[7] << 2 & 0xFC) | (B[7] >> 4 & 0x3)) - base[2]; dj[0] = ((B[3] << 1 & 0xF8) | (B[3] << 2 & 0x4) | (B[3] >> 5 & 0x3)) - base[0]; dj[1] = ((B[4] & 0xFE) | B[4] >> 7) - base[1]; dj[2] = ((B[4] << 7 & 0x80) | (B[5] >> 1 & 0x7C) | (B[4] << 1 & 0x2) | B[5] >> 7) - base[2]; for (uint c = 0; c < 3; c++) base[c] = (base[c] << 2) + 2; for (uint i = 0; i < 4; i++) { for (uint c = 0; c < 3; base[c] += di[c], c++) color[c] = base[c]; for (uint j = 0; j < 4; j++, pDst++) { for (uint c = 0; c < 3; color[c] += dj[c], c++) pDst->c[c] = math::clamp(color[c], 0, 1020) >> 2; if (!preserve_alpha) pDst->a = 255; } } } return true; } bool unpack_etc2_alpha(const void* pBlock, unsigned int* pDst_pixels_rgba, int comp_index) { color_quad_u8* pDst = (color_quad_u8*)pDst_pixels_rgba; const uint8* B = (const uint8*)pBlock; const int* modifier = g_etc2a_modifier_table[B[1] & 0xF]; uint8 values[8]; for (int base_codeword = B[0], multiplier = B[1] >> 4, i = 0; i < 8; i++) values[i] = math::clamp(base_codeword + modifier[i] * multiplier, 0, 255); for (uint d0 = 3, i = 0; i < 4; i++, d0 += 3) { for (uint d = d0, j = 0; j < 4; j++, pDst++, d += 12) { int byte_offset = 2 + (d >> 3); int bit_offset = d & 7; int s = B[byte_offset] >> (8 - bit_offset) & 7; if (bit_offset < 3) s |= B[byte_offset - 1] << bit_offset & 7; pDst->c[comp_index] = values[s]; } } return true; } struct etc1_solution_coordinates { inline etc1_solution_coordinates() : m_unscaled_color(0, 0, 0, 0), m_inten_table(0), m_color4(false) { } inline etc1_solution_coordinates(uint r, uint g, uint b, uint inten_table, bool color4) : m_unscaled_color(r, g, b, 255), m_inten_table(inten_table), m_color4(color4) { } inline etc1_solution_coordinates(const color_quad_u8& c, uint inten_table, bool color4) : m_unscaled_color(c), m_inten_table(inten_table), m_color4(color4) { } inline etc1_solution_coordinates(const etc1_solution_coordinates& other) { *this = other; } inline etc1_solution_coordinates& operator=(const etc1_solution_coordinates& rhs) { m_unscaled_color = rhs.m_unscaled_color; m_inten_table = rhs.m_inten_table; m_color4 = rhs.m_color4; return *this; } inline void clear() { m_unscaled_color.clear(); m_inten_table = 0; m_color4 = false; } inline color_quad_u8 get_scaled_color() const { int br, bg, bb; if (m_color4) { br = m_unscaled_color.r | (m_unscaled_color.r << 4); bg = m_unscaled_color.g | (m_unscaled_color.g << 4); bb = m_unscaled_color.b | (m_unscaled_color.b << 4); } else { br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); } return color_quad_u8(br, bg, bb); } inline void get_block_colors(color_quad_u8* pBlock_colors) { int br, bg, bb; if (m_color4) { br = m_unscaled_color.r | (m_unscaled_color.r << 4); bg = m_unscaled_color.g | (m_unscaled_color.g << 4); bb = m_unscaled_color.b | (m_unscaled_color.b << 4); } else { br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); } const int* pInten_table = g_etc1_inten_tables[m_inten_table]; pBlock_colors[0].set(br + pInten_table[0], bg + pInten_table[0], bb + pInten_table[0]); pBlock_colors[1].set(br + pInten_table[1], bg + pInten_table[1], bb + pInten_table[1]); pBlock_colors[2].set(br + pInten_table[2], bg + pInten_table[2], bb + pInten_table[2]); pBlock_colors[3].set(br + pInten_table[3], bg + pInten_table[3], bb + pInten_table[3]); } color_quad_u8 m_unscaled_color; uint m_inten_table; bool m_color4; }; class etc1_optimizer { etc1_optimizer(const etc1_optimizer&); etc1_optimizer& operator=(const etc1_optimizer&); public: etc1_optimizer() { clear(); } void clear() { m_pParams = NULL; m_pResult = NULL; m_pSorted_luma = NULL; m_pSorted_luma_indices = NULL; } struct params : etc1_pack_params { params() { clear(); } params(const etc1_pack_params& base_params) : etc1_pack_params(base_params) { clear_optimizer_params(); } void clear() { etc1_pack_params::clear(); clear_optimizer_params(); } void clear_optimizer_params() { m_num_src_pixels = 0; m_pSrc_pixels = 0; m_use_color4 = false; static const int s_default_scan_delta[] = {0}; m_pScan_deltas = s_default_scan_delta; m_scan_delta_size = 1; m_base_color5.clear(); m_constrain_against_base_color5 = false; } uint m_num_src_pixels; const color_quad_u8* m_pSrc_pixels; bool m_use_color4; const int* m_pScan_deltas; uint m_scan_delta_size; color_quad_u8 m_base_color5; bool m_constrain_against_base_color5; }; struct results { uint64 m_error; color_quad_u8 m_block_color_unscaled; uint m_block_inten_table; uint m_n; uint8* m_pSelectors; bool m_block_color4; inline results& operator=(const results& rhs) { m_block_color_unscaled = rhs.m_block_color_unscaled; m_block_color4 = rhs.m_block_color4; m_block_inten_table = rhs.m_block_inten_table; m_error = rhs.m_error; RG_ETC1_ASSERT(m_n == rhs.m_n); memcpy(m_pSelectors, rhs.m_pSelectors, rhs.m_n); return *this; } }; void init(const params& params, results& result); bool compute(); private: struct potential_solution { potential_solution() : m_coords(), m_error(cUINT64_MAX), m_valid(false) { } etc1_solution_coordinates m_coords; uint8 m_selectors[8]; uint64 m_error; bool m_valid; void clear() { m_coords.clear(); m_error = cUINT64_MAX; m_valid = false; } }; const params* m_pParams; results* m_pResult; int m_limit; vec3F m_avg_color; int m_br, m_bg, m_bb; uint16 m_luma[8]; uint32 m_sorted_luma[2][8]; const uint32* m_pSorted_luma_indices; uint32* m_pSorted_luma; uint8 m_selectors[8]; uint8 m_best_selectors[8]; potential_solution m_best_solution; potential_solution m_trial_solution; uint8 m_temp_selectors[8]; bool evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); }; bool etc1_optimizer::compute() { const uint n = m_pParams->m_num_src_pixels; const int scan_delta_size = m_pParams->m_scan_delta_size; // Scan through a subset of the 3D lattice centered around the avg block color trying each 3D (555 or 444) lattice point as a potential block color. // Each time a better solution is found try to refine the current solution's block color based of the current selectors and intensity table index. for (int zdi = 0; zdi < scan_delta_size; zdi++) { const int zd = m_pParams->m_pScan_deltas[zdi]; const int mbb = m_bb + zd; if (mbb < 0) continue; else if (mbb > m_limit) break; for (int ydi = 0; ydi < scan_delta_size; ydi++) { const int yd = m_pParams->m_pScan_deltas[ydi]; const int mbg = m_bg + yd; if (mbg < 0) continue; else if (mbg > m_limit) break; for (int xdi = 0; xdi < scan_delta_size; xdi++) { const int xd = m_pParams->m_pScan_deltas[xdi]; const int mbr = m_br + xd; if (mbr < 0) continue; else if (mbr > m_limit) break; etc1_solution_coordinates coords(mbr, mbg, mbb, 0, m_pParams->m_use_color4); if (m_pParams->m_quality == cHighQuality) { if (!evaluate_solution(coords, m_trial_solution, &m_best_solution)) continue; } else { if (!evaluate_solution_fast(coords, m_trial_solution, &m_best_solution)) continue; } // Now we have the input block, the avg. color of the input pixels, a set of trial selector indices, and the block color+intensity index. // Now, for each component, attempt to refine the current solution by solving a simple linear equation. For example, for 4 colors: // The goal is: // pixel0 - (block_color+inten_table[selector0]) + pixel1 - (block_color+inten_table[selector1]) + pixel2 - (block_color+inten_table[selector2]) + pixel3 - (block_color+inten_table[selector3]) = 0 // Rearranging this: // (pixel0 + pixel1 + pixel2 + pixel3) - (block_color+inten_table[selector0]) - (block_color+inten_table[selector1]) - (block_color+inten_table[selector2]) - (block_color+inten_table[selector3]) = 0 // (pixel0 + pixel1 + pixel2 + pixel3) - block_color - inten_table[selector0] - block_color-inten_table[selector1] - block_color-inten_table[selector2] - block_color-inten_table[selector3] = 0 // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - inten_table[selector0] - inten_table[selector1] - inten_table[selector2] - inten_table[selector3] = 0 // (pixel0 + pixel1 + pixel2 + pixel3) - 4*block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3]) = 0 // (pixel0 + pixel1 + pixel2 + pixel3)/4 - block_color - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 = 0 // block_color = (pixel0 + pixel1 + pixel2 + pixel3)/4 - (inten_table[selector0] + inten_table[selector1] + inten_table[selector2] + inten_table[selector3])/4 // So what this means: // optimal_block_color = avg_input - avg_inten_delta // So the optimal block color can be computed by taking the average block color and subtracting the current average of the intensity delta. // Unfortunately, optimal_block_color must then be quantized to 555 or 444 so it's not always possible to improve matters using this formula. // Also, the above formula is for unclamped intensity deltas. The actual implementation takes into account clamping. const uint max_refinement_trials = (m_pParams->m_quality == cLowQuality) ? 2 : (((xd | yd | zd) == 0) ? 4 : 2); for (uint refinement_trial = 0; refinement_trial < max_refinement_trials; refinement_trial++) { const uint8* pSelectors = m_best_solution.m_selectors; const int* pInten_table = g_etc1_inten_tables[m_best_solution.m_coords.m_inten_table]; int delta_sum_r = 0, delta_sum_g = 0, delta_sum_b = 0; const color_quad_u8 base_color(m_best_solution.m_coords.get_scaled_color()); for (uint r = 0; r < n; r++) { const uint s = *pSelectors++; const int yd = pInten_table[s]; // Compute actual delta being applied to each pixel, taking into account clamping. delta_sum_r += rg_etc1::clamp(base_color.r + yd, 0, 255) - base_color.r; delta_sum_g += rg_etc1::clamp(base_color.g + yd, 0, 255) - base_color.g; delta_sum_b += rg_etc1::clamp(base_color.b + yd, 0, 255) - base_color.b; } if ((!delta_sum_r) && (!delta_sum_g) && (!delta_sum_b)) break; const float avg_delta_r_f = static_cast(delta_sum_r) / n; const float avg_delta_g_f = static_cast(delta_sum_g) / n; const float avg_delta_b_f = static_cast(delta_sum_b) / n; const int br1 = rg_etc1::clamp(static_cast((m_avg_color[0] - avg_delta_r_f) * m_limit / 255.0f + .5f), 0, m_limit); const int bg1 = rg_etc1::clamp(static_cast((m_avg_color[1] - avg_delta_g_f) * m_limit / 255.0f + .5f), 0, m_limit); const int bb1 = rg_etc1::clamp(static_cast((m_avg_color[2] - avg_delta_b_f) * m_limit / 255.0f + .5f), 0, m_limit); bool skip = false; if ((mbr == br1) && (mbg == bg1) && (mbb == bb1)) skip = true; else if ((br1 == m_best_solution.m_coords.m_unscaled_color.r) && (bg1 == m_best_solution.m_coords.m_unscaled_color.g) && (bb1 == m_best_solution.m_coords.m_unscaled_color.b)) skip = true; else if ((m_br == br1) && (m_bg == bg1) && (m_bb == bb1)) skip = true; if (skip) break; etc1_solution_coordinates coords1(br1, bg1, bb1, 0, m_pParams->m_use_color4); if (m_pParams->m_quality == cHighQuality) { if (!evaluate_solution(coords1, m_trial_solution, &m_best_solution)) break; } else { if (!evaluate_solution_fast(coords1, m_trial_solution, &m_best_solution)) break; } } // refinement_trial } // xdi } // ydi } // zdi if (!m_best_solution.m_valid) { m_pResult->m_error = cUINT32_MAX; return false; } const uint8* pSelectors = m_best_solution.m_selectors; #ifdef RG_ETC1_BUILD_DEBUG { color_quad_u8 block_colors[4]; m_best_solution.m_coords.get_block_colors(block_colors); const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; uint64 actual_error = 0; for (uint i = 0; i < n; i++) actual_error += pSrc_pixels[i].squared_distance_rgb(block_colors[pSelectors[i]]); RG_ETC1_ASSERT(actual_error == m_best_solution.m_error); } #endif m_pResult->m_error = m_best_solution.m_error; m_pResult->m_block_color_unscaled = m_best_solution.m_coords.m_unscaled_color; m_pResult->m_block_color4 = m_best_solution.m_coords.m_color4; m_pResult->m_block_inten_table = m_best_solution.m_coords.m_inten_table; memcpy(m_pResult->m_pSelectors, pSelectors, n); m_pResult->m_n = n; return true; } void etc1_optimizer::init(const params& p, results& r) { // This version is hardcoded for 8 pixel subblocks. RG_ETC1_ASSERT(p.m_num_src_pixels == 8); m_pParams = &p; m_pResult = &r; const uint n = 8; m_limit = m_pParams->m_use_color4 ? 15 : 31; vec3F avg_color(0.0f); for (uint i = 0; i < n; i++) { const color_quad_u8& c = m_pParams->m_pSrc_pixels[i]; const vec3F fc(c.r, c.g, c.b); avg_color += fc; m_luma[i] = static_cast(c.r + c.g + c.b); m_sorted_luma[0][i] = i; } avg_color *= (1.0f / static_cast(n)); m_avg_color = avg_color; m_br = rg_etc1::clamp(static_cast(m_avg_color[0] * m_limit / 255.0f + .5f), 0, m_limit); m_bg = rg_etc1::clamp(static_cast(m_avg_color[1] * m_limit / 255.0f + .5f), 0, m_limit); m_bb = rg_etc1::clamp(static_cast(m_avg_color[2] * m_limit / 255.0f + .5f), 0, m_limit); if (m_pParams->m_quality <= cMediumQuality) { m_pSorted_luma_indices = indirect_radix_sort(n, m_sorted_luma[0], m_sorted_luma[1], m_luma, 0, sizeof(m_luma[0]), false); m_pSorted_luma = m_sorted_luma[0]; if (m_pSorted_luma_indices == m_sorted_luma[0]) m_pSorted_luma = m_sorted_luma[1]; for (uint i = 0; i < n; i++) m_pSorted_luma[i] = m_luma[m_pSorted_luma_indices[i]]; } m_best_solution.m_coords.clear(); m_best_solution.m_valid = false; m_best_solution.m_error = cUINT64_MAX; } bool etc1_optimizer::evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) { trial_solution.m_valid = false; if (m_pParams->m_constrain_against_base_color5) { const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r; const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g; const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b; if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax)) return false; } const color_quad_u8 base_color(coords.get_scaled_color()); const uint n = 8; trial_solution.m_error = cUINT64_MAX; for (uint inten_table = 0; inten_table < cETC1IntenModifierValues; inten_table++) { const int* pInten_table = g_etc1_inten_tables[inten_table]; color_quad_u8 block_colors[4]; for (uint s = 0; s < 4; s++) { const int yd = pInten_table[s]; block_colors[s].set(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0); } uint64 total_error = 0; const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; for (uint c = 0; c < n; c++) { const color_quad_u8& src_pixel = *pSrc_pixels++; uint best_selector_index = 0; uint best_error = rg_etc1::square(src_pixel.r - block_colors[0].r) + rg_etc1::square(src_pixel.g - block_colors[0].g) + rg_etc1::square(src_pixel.b - block_colors[0].b); uint trial_error = rg_etc1::square(src_pixel.r - block_colors[1].r) + rg_etc1::square(src_pixel.g - block_colors[1].g) + rg_etc1::square(src_pixel.b - block_colors[1].b); if (trial_error < best_error) { best_error = trial_error; best_selector_index = 1; } trial_error = rg_etc1::square(src_pixel.r - block_colors[2].r) + rg_etc1::square(src_pixel.g - block_colors[2].g) + rg_etc1::square(src_pixel.b - block_colors[2].b); if (trial_error < best_error) { best_error = trial_error; best_selector_index = 2; } trial_error = rg_etc1::square(src_pixel.r - block_colors[3].r) + rg_etc1::square(src_pixel.g - block_colors[3].g) + rg_etc1::square(src_pixel.b - block_colors[3].b); if (trial_error < best_error) { best_error = trial_error; best_selector_index = 3; } m_temp_selectors[c] = static_cast(best_selector_index); total_error += best_error; if (total_error >= trial_solution.m_error) break; } if (total_error < trial_solution.m_error) { trial_solution.m_error = total_error; trial_solution.m_coords.m_inten_table = inten_table; memcpy(trial_solution.m_selectors, m_temp_selectors, 8); trial_solution.m_valid = true; } } trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; bool success = false; if (pBest_solution) { if (trial_solution.m_error < pBest_solution->m_error) { *pBest_solution = trial_solution; success = true; } } return success; } bool etc1_optimizer::evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution) { if (m_pParams->m_constrain_against_base_color5) { const int dr = coords.m_unscaled_color.r - m_pParams->m_base_color5.r; const int dg = coords.m_unscaled_color.g - m_pParams->m_base_color5.g; const int db = coords.m_unscaled_color.b - m_pParams->m_base_color5.b; if ((rg_etc1::minimum(dr, dg, db) < cETC1ColorDeltaMin) || (rg_etc1::maximum(dr, dg, db) > cETC1ColorDeltaMax)) { trial_solution.m_valid = false; return false; } } const color_quad_u8 base_color(coords.get_scaled_color()); const uint n = 8; trial_solution.m_error = cUINT64_MAX; for (int inten_table = cETC1IntenModifierValues - 1; inten_table >= 0; --inten_table) { const int* pInten_table = g_etc1_inten_tables[inten_table]; uint block_inten[4]; color_quad_u8 block_colors[4]; for (uint s = 0; s < 4; s++) { const int yd = pInten_table[s]; color_quad_u8 block_color(base_color.r + yd, base_color.g + yd, base_color.b + yd, 0); block_colors[s] = block_color; block_inten[s] = block_color.r + block_color.g + block_color.b; } // evaluate_solution_fast() enforces/assumesd a total ordering of the input colors along the intensity (1,1,1) axis to more quickly classify the inputs to selectors. // The inputs colors have been presorted along the projection onto this axis, and ETC1 block colors are always ordered along the intensity axis, so this classification is fast. // 0 1 2 3 // 01 12 23 const uint block_inten_midpoints[3] = {block_inten[0] + block_inten[1], block_inten[1] + block_inten[2], block_inten[2] + block_inten[3]}; uint64 total_error = 0; const color_quad_u8* pSrc_pixels = m_pParams->m_pSrc_pixels; if ((m_pSorted_luma[n - 1] * 2) < block_inten_midpoints[0]) { if (block_inten[0] > m_pSorted_luma[n - 1]) { const uint min_error = block_inten[0] - m_pSorted_luma[n - 1]; if (min_error >= trial_solution.m_error) continue; } memset(&m_temp_selectors[0], 0, n); for (uint c = 0; c < n; c++) total_error += block_colors[0].squared_distance_rgb(pSrc_pixels[c]); } else if ((m_pSorted_luma[0] * 2) >= block_inten_midpoints[2]) { if (m_pSorted_luma[0] > block_inten[3]) { const uint min_error = m_pSorted_luma[0] - block_inten[3]; if (min_error >= trial_solution.m_error) continue; } memset(&m_temp_selectors[0], 3, n); for (uint c = 0; c < n; c++) total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[c]); } else { uint cur_selector = 0, c; for (c = 0; c < n; c++) { const uint y = m_pSorted_luma[c]; while ((y * 2) >= block_inten_midpoints[cur_selector]) if (++cur_selector > 2) goto done; const uint sorted_pixel_index = m_pSorted_luma_indices[c]; m_temp_selectors[sorted_pixel_index] = static_cast(cur_selector); total_error += block_colors[cur_selector].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]); } done: while (c < n) { const uint sorted_pixel_index = m_pSorted_luma_indices[c]; m_temp_selectors[sorted_pixel_index] = 3; total_error += block_colors[3].squared_distance_rgb(pSrc_pixels[sorted_pixel_index]); ++c; } } if (total_error < trial_solution.m_error) { trial_solution.m_error = total_error; trial_solution.m_coords.m_inten_table = inten_table; memcpy(trial_solution.m_selectors, m_temp_selectors, n); trial_solution.m_valid = true; if (!total_error) break; } } trial_solution.m_coords.m_unscaled_color = coords.m_unscaled_color; trial_solution.m_coords.m_color4 = m_pParams->m_use_color4; bool success = false; if (pBest_solution) { if (trial_solution.m_error < pBest_solution->m_error) { *pBest_solution = trial_solution; success = true; } } return success; } static uint etc1_decode_value(uint diff, uint inten, uint selector, uint packed_c) { RG_ETC1_ASSERT((diff < 2) && (inten < 8) && (selector < 4) && (packed_c < (diff ? 32 : 16))); int c; if (diff) c = (packed_c >> 2) | (packed_c << 3); else c = packed_c | (packed_c << 4); c += g_etc1_inten_tables[inten][selector]; c = rg_etc1::clamp(c, 0, 255); return c; } static inline int mul_8bit(int a, int b) { int t = a * b + 128; return (t + (t >> 8)) >> 8; } void pack_etc1_block_init() { for (uint diff = 0; diff < 2; diff++) { const uint limit = diff ? 32 : 16; for (uint inten = 0; inten < 8; inten++) { for (uint selector = 0; selector < 4; selector++) { const uint inverse_table_index = diff + (inten << 1) + (selector << 4); for (int color = 0; color < 256; color++) { uint best_error = cUINT32_MAX, best_packed_c = 0; for (uint packed_c = 0; packed_c < limit; packed_c++) { int v = etc1_decode_value(diff, inten, selector, packed_c); uint err = labs(v - color); if (err < best_error) { best_error = err; best_packed_c = packed_c; if (!best_error) break; } } RG_ETC1_ASSERT(best_error <= 255); g_etc1_inverse_lookup[inverse_table_index][color] = static_cast(best_packed_c | (best_error << 8)); } } } } uint expand5[32]; for (int i = 0; i < 32; i++) expand5[i] = (i << 3) | (i >> 2); for (int i = 0; i < 256 + 16; i++) { int v = clamp(i - 8, 0, 255); g_quant5_tab[i] = static_cast(expand5[mul_8bit(v, 31)]); } } // Packs solid color blocks efficiently using a set of small precomputed tables. // For random 888 inputs, MSE results are better than Erricson's ETC1 packer in "slow" mode ~9.5% of the time, is slightly worse only ~.01% of the time, and is equal the rest of the time. static uint64 pack_etc1_block_solid_color(etc1_block& block, const uint8* pColor) { RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]); static uint s_next_comp[4] = {1, 2, 0, 1}; uint best_error = cUINT32_MAX, best_i = 0; int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. for (uint i = 0; i < 3; i++) { const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; const int delta_range = 1; for (int delta = -delta_range; delta <= delta_range; delta++) { const int c_plus_delta = rg_etc1::clamp(pColor[i] + delta, 0, 255); const uint16* pTable; if (!c_plus_delta) pTable = g_color8_to_etc_block_config_0_255[0]; else if (c_plus_delta == 255) pTable = g_color8_to_etc_block_config_0_255[1]; else pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; do { const uint x = *pTable++; #ifdef RG_ETC1_BUILD_DEBUG const uint diff = x & 1; const uint inten = (x >> 1) & 7; const uint selector = (x >> 4) & 3; const uint p0 = (x >> 8) & 255; RG_ETC1_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta); #endif const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; uint16 p1 = pInverse_table[c1]; uint16 p2 = pInverse_table[c2]; const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8); if (trial_error < best_error) { best_error = trial_error; best_x = x; best_packed_c1 = p1 & 0xFF; best_packed_c2 = p2 & 0xFF; best_i = i; if (!best_error) goto found_perfect_match; } } while (*pTable != 0xFFFF); } } found_perfect_match: const uint diff = best_x & 1; const uint inten = (best_x >> 1) & 7; block.m_bytes[3] = static_cast(((inten | (inten << 3)) << 2) | (diff << 1)); const uint etc1_selector = g_selector_index_to_etc1[(best_x >> 4) & 3]; *reinterpret_cast(&block.m_bytes[4]) = (etc1_selector & 2) ? 0xFFFF : 0; *reinterpret_cast(&block.m_bytes[6]) = (etc1_selector & 1) ? 0xFFFF : 0; const uint best_packed_c0 = (best_x >> 8) & 255; if (diff) { block.m_bytes[best_i] = static_cast(best_packed_c0 << 3); block.m_bytes[s_next_comp[best_i]] = static_cast(best_packed_c1 << 3); block.m_bytes[s_next_comp[best_i + 1]] = static_cast(best_packed_c2 << 3); } else { block.m_bytes[best_i] = static_cast(best_packed_c0 | (best_packed_c0 << 4)); block.m_bytes[s_next_comp[best_i]] = static_cast(best_packed_c1 | (best_packed_c1 << 4)); block.m_bytes[s_next_comp[best_i + 1]] = static_cast(best_packed_c2 | (best_packed_c2 << 4)); } return best_error; } static uint pack_etc1_block_solid_color_constrained( etc1_optimizer::results& results, uint num_colors, const uint8* pColor, bool use_diff, const color_quad_u8* pBase_color5_unscaled) { RG_ETC1_ASSERT(g_etc1_inverse_lookup[0][255]); static uint s_next_comp[4] = {1, 2, 0, 1}; uint best_error = cUINT32_MAX, best_i = 0; int best_x = 0, best_packed_c1 = 0, best_packed_c2 = 0; // For each possible 8-bit value, there is a precomputed list of diff/inten/selector configurations that allow that 8-bit value to be encoded with no error. for (uint i = 0; i < 3; i++) { const uint c1 = pColor[s_next_comp[i]], c2 = pColor[s_next_comp[i + 1]]; const int delta_range = 1; for (int delta = -delta_range; delta <= delta_range; delta++) { const int c_plus_delta = rg_etc1::clamp(pColor[i] + delta, 0, 255); const uint16* pTable; if (!c_plus_delta) pTable = g_color8_to_etc_block_config_0_255[0]; else if (c_plus_delta == 255) pTable = g_color8_to_etc_block_config_0_255[1]; else pTable = g_color8_to_etc_block_config_1_to_254[c_plus_delta - 1]; do { const uint x = *pTable++; const uint diff = x & 1; if (static_cast(use_diff) != diff) { if (*pTable == 0xFFFF) break; continue; } if ((diff) && (pBase_color5_unscaled)) { const int p0 = (x >> 8) & 255; int delta = p0 - static_cast(pBase_color5_unscaled->c[i]); if ((delta < cETC1ColorDeltaMin) || (delta > cETC1ColorDeltaMax)) { if (*pTable == 0xFFFF) break; continue; } } #ifdef RG_ETC1_BUILD_DEBUG { const uint inten = (x >> 1) & 7; const uint selector = (x >> 4) & 3; const uint p0 = (x >> 8) & 255; RG_ETC1_ASSERT(etc1_decode_value(diff, inten, selector, p0) == (uint)c_plus_delta); } #endif const uint16* pInverse_table = g_etc1_inverse_lookup[x & 0xFF]; uint16 p1 = pInverse_table[c1]; uint16 p2 = pInverse_table[c2]; if ((diff) && (pBase_color5_unscaled)) { int delta1 = (p1 & 0xFF) - static_cast(pBase_color5_unscaled->c[s_next_comp[i]]); int delta2 = (p2 & 0xFF) - static_cast(pBase_color5_unscaled->c[s_next_comp[i + 1]]); if ((delta1 < cETC1ColorDeltaMin) || (delta1 > cETC1ColorDeltaMax) || (delta2 < cETC1ColorDeltaMin) || (delta2 > cETC1ColorDeltaMax)) { if (*pTable == 0xFFFF) break; continue; } } const uint trial_error = rg_etc1::square(c_plus_delta - pColor[i]) + rg_etc1::square(p1 >> 8) + rg_etc1::square(p2 >> 8); if (trial_error < best_error) { best_error = trial_error; best_x = x; best_packed_c1 = p1 & 0xFF; best_packed_c2 = p2 & 0xFF; best_i = i; if (!best_error) goto found_perfect_match; } } while (*pTable != 0xFFFF); } } found_perfect_match: if (best_error == cUINT32_MAX) return best_error; best_error *= num_colors; results.m_n = num_colors; results.m_block_color4 = !(best_x & 1); results.m_block_inten_table = (best_x >> 1) & 7; memset(results.m_pSelectors, (best_x >> 4) & 3, num_colors); const uint best_packed_c0 = (best_x >> 8) & 255; results.m_block_color_unscaled[best_i] = static_cast(best_packed_c0); results.m_block_color_unscaled[s_next_comp[best_i]] = static_cast(best_packed_c1); results.m_block_color_unscaled[s_next_comp[best_i + 1]] = static_cast(best_packed_c2); results.m_error = best_error; return best_error; } // Function originally from RYG's public domain real-time DXT1 compressor, modified for 555. static void dither_block_555(color_quad_u8* dest, const color_quad_u8* block) { int err[8], *ep1 = err, *ep2 = err + 4; uint8* quant = g_quant5_tab + 8; memset(dest, 0xFF, sizeof(color_quad_u8) * 16); // process channels seperately for (int ch = 0; ch < 3; ch++) { uint8* bp = (uint8*)block; uint8* dp = (uint8*)dest; bp += ch; dp += ch; memset(err, 0, sizeof(err)); for (int y = 0; y < 4; y++) { // pixel 0 dp[0] = quant[bp[0] + ((3 * ep2[1] + 5 * ep2[0]) >> 4)]; ep1[0] = bp[0] - dp[0]; // pixel 1 dp[4] = quant[bp[4] + ((7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]) >> 4)]; ep1[1] = bp[4] - dp[4]; // pixel 2 dp[8] = quant[bp[8] + ((7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]) >> 4)]; ep1[2] = bp[8] - dp[8]; // pixel 3 dp[12] = quant[bp[12] + ((7 * ep1[2] + 5 * ep2[3] + ep2[2]) >> 4)]; ep1[3] = bp[12] - dp[12]; // advance to next line int* tmp = ep1; ep1 = ep2; ep2 = tmp; bp += 16; dp += 16; } } } unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params) { const color_quad_u8* pSrc_pixels = reinterpret_cast(pSrc_pixels_rgba); etc1_block& dst_block = *static_cast(pETC1_block); color_quad_u8 src_pixel0(pSrc_pixels[0]); // Check for solid block. const uint32 first_pixel_u32 = pSrc_pixels->m_u32; int r; for (r = 15; r >= 1; --r) if (pSrc_pixels[r].m_u32 != first_pixel_u32) break; if (!r) return static_cast(16 * pack_etc1_block_solid_color(dst_block, &pSrc_pixels[0].r)); color_quad_u8 dithered_pixels[16]; if (pack_params.m_dithering) { dither_block_555(dithered_pixels, pSrc_pixels); pSrc_pixels = dithered_pixels; } etc1_optimizer optimizer; uint64 best_error = cUINT64_MAX; uint best_flip = false, best_use_color4 = false; uint8 best_selectors[2][8]; etc1_optimizer::results best_results[2]; for (uint i = 0; i < 2; i++) { best_results[i].m_n = 8; best_results[i].m_pSelectors = best_selectors[i]; } uint8 selectors[3][8]; etc1_optimizer::results results[3]; for (uint i = 0; i < 3; i++) { results[i].m_n = 8; results[i].m_pSelectors = selectors[i]; } color_quad_u8 subblock_pixels[8]; etc1_optimizer::params params(pack_params); params.m_num_src_pixels = 8; params.m_pSrc_pixels = subblock_pixels; for (uint flip = 0; flip < 2; flip++) { for (uint use_color4 = 0; use_color4 < 2; use_color4++) { uint64 trial_error = 0; uint subblock; for (subblock = 0; subblock < 2; subblock++) { if (flip) memcpy(subblock_pixels, pSrc_pixels + subblock * 8, sizeof(color_quad_u8) * 8); else { const color_quad_u8* pSrc_col = pSrc_pixels + subblock * 2; subblock_pixels[0] = pSrc_col[0]; subblock_pixels[1] = pSrc_col[4]; subblock_pixels[2] = pSrc_col[8]; subblock_pixels[3] = pSrc_col[12]; subblock_pixels[4] = pSrc_col[1]; subblock_pixels[5] = pSrc_col[5]; subblock_pixels[6] = pSrc_col[9]; subblock_pixels[7] = pSrc_col[13]; } results[2].m_error = cUINT64_MAX; if ((params.m_quality >= cMediumQuality) && ((subblock) || (use_color4))) { const uint32 subblock_pixel0_u32 = subblock_pixels[0].m_u32; for (r = 7; r >= 1; --r) if (subblock_pixels[r].m_u32 != subblock_pixel0_u32) break; if (!r) { pack_etc1_block_solid_color_constrained(results[2], 8, &subblock_pixels[0].r, !use_color4, (subblock && !use_color4) ? &results[0].m_block_color_unscaled : NULL); } } params.m_use_color4 = (use_color4 != 0); params.m_constrain_against_base_color5 = false; if ((!use_color4) && (subblock)) { params.m_constrain_against_base_color5 = true; params.m_base_color5 = results[0].m_block_color_unscaled; } if (params.m_quality == cHighQuality) { static const int s_scan_delta_0_to_4[] = {-4, -3, -2, -1, 0, 1, 2, 3, 4}; params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_4); params.m_pScan_deltas = s_scan_delta_0_to_4; } else if (params.m_quality == cMediumQuality) { static const int s_scan_delta_0_to_1[] = {-1, 0, 1}; params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0_to_1); params.m_pScan_deltas = s_scan_delta_0_to_1; } else { static const int s_scan_delta_0[] = {0}; params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_0); params.m_pScan_deltas = s_scan_delta_0; } optimizer.init(params, results[subblock]); if (!optimizer.compute()) break; if (params.m_quality >= cMediumQuality) { // TODO: Fix fairly arbitrary/unrefined thresholds that control how far away to scan for potentially better solutions. const uint refinement_error_thresh0 = 3000; const uint refinement_error_thresh1 = 6000; if (results[subblock].m_error > refinement_error_thresh0) { if (params.m_quality == cMediumQuality) { static const int s_scan_delta_2_to_3[] = {-3, -2, 2, 3}; params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_2_to_3); params.m_pScan_deltas = s_scan_delta_2_to_3; } else { static const int s_scan_delta_5_to_5[] = {-5, 5}; static const int s_scan_delta_5_to_8[] = {-8, -7, -6, -5, 5, 6, 7, 8}; if (results[subblock].m_error > refinement_error_thresh1) { params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_8); params.m_pScan_deltas = s_scan_delta_5_to_8; } else { params.m_scan_delta_size = RG_ETC1_ARRAY_SIZE(s_scan_delta_5_to_5); params.m_pScan_deltas = s_scan_delta_5_to_5; } } if (!optimizer.compute()) break; } if (results[2].m_error < results[subblock].m_error) results[subblock] = results[2]; } trial_error += results[subblock].m_error; if (trial_error >= best_error) break; } if (subblock < 2) continue; best_error = trial_error; best_results[0] = results[0]; best_results[1] = results[1]; best_flip = flip; best_use_color4 = use_color4; } // use_color4 } // flip int dr = best_results[1].m_block_color_unscaled.r - best_results[0].m_block_color_unscaled.r; int dg = best_results[1].m_block_color_unscaled.g - best_results[0].m_block_color_unscaled.g; int db = best_results[1].m_block_color_unscaled.b - best_results[0].m_block_color_unscaled.b; RG_ETC1_ASSERT(best_use_color4 || (rg_etc1::minimum(dr, dg, db) >= cETC1ColorDeltaMin) && (rg_etc1::maximum(dr, dg, db) <= cETC1ColorDeltaMax)); if (best_use_color4) { dst_block.m_bytes[0] = static_cast(best_results[1].m_block_color_unscaled.r | (best_results[0].m_block_color_unscaled.r << 4)); dst_block.m_bytes[1] = static_cast(best_results[1].m_block_color_unscaled.g | (best_results[0].m_block_color_unscaled.g << 4)); dst_block.m_bytes[2] = static_cast(best_results[1].m_block_color_unscaled.b | (best_results[0].m_block_color_unscaled.b << 4)); } else { if (dr < 0) dr += 8; dst_block.m_bytes[0] = static_cast((best_results[0].m_block_color_unscaled.r << 3) | dr); if (dg < 0) dg += 8; dst_block.m_bytes[1] = static_cast((best_results[0].m_block_color_unscaled.g << 3) | dg); if (db < 0) db += 8; dst_block.m_bytes[2] = static_cast((best_results[0].m_block_color_unscaled.b << 3) | db); } dst_block.m_bytes[3] = static_cast((best_results[1].m_block_inten_table << 2) | (best_results[0].m_block_inten_table << 5) | ((~best_use_color4 & 1) << 1) | best_flip); uint selector0 = 0, selector1 = 0; if (best_flip) { // flipped: // { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, // { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 } // // { 0, 2 }, { 1, 2 }, { 2, 2 }, { 3, 2 }, // { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 } const uint8* pSelectors0 = best_results[0].m_pSelectors; const uint8* pSelectors1 = best_results[1].m_pSelectors; for (int x = 3; x >= 0; --x) { uint b; b = g_selector_index_to_etc1[pSelectors1[4 + x]]; selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); b = g_selector_index_to_etc1[pSelectors1[x]]; selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); b = g_selector_index_to_etc1[pSelectors0[4 + x]]; selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); b = g_selector_index_to_etc1[pSelectors0[x]]; selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); } } else { // non-flipped: // { 0, 0 }, { 0, 1 }, { 0, 2 }, { 0, 3 }, // { 1, 0 }, { 1, 1 }, { 1, 2 }, { 1, 3 } // // { 2, 0 }, { 2, 1 }, { 2, 2 }, { 2, 3 }, // { 3, 0 }, { 3, 1 }, { 3, 2 }, { 3, 3 } for (int subblock = 1; subblock >= 0; --subblock) { const uint8* pSelectors = best_results[subblock].m_pSelectors + 4; for (uint i = 0; i < 2; i++) { uint b; b = g_selector_index_to_etc1[pSelectors[3]]; selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); b = g_selector_index_to_etc1[pSelectors[2]]; selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); b = g_selector_index_to_etc1[pSelectors[1]]; selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); b = g_selector_index_to_etc1[pSelectors[0]]; selector0 = (selector0 << 1) | (b & 1); selector1 = (selector1 << 1) | (b >> 1); pSelectors -= 4; } } } dst_block.m_bytes[4] = static_cast(selector1 >> 8); dst_block.m_bytes[5] = static_cast(selector1 & 0xFF); dst_block.m_bytes[6] = static_cast(selector0 >> 8); dst_block.m_bytes[7] = static_cast(selector0 & 0xFF); return static_cast(best_error); } unsigned int pack_etc2_alpha(void* pBlock, const unsigned int* pSrc_pixels_rgba, etc2a_pack_params& pack_params) { crnlib::color_quad_u8* pixels = (crnlib::color_quad_u8*)pSrc_pixels_rgba; dxt5_endpoint_optimizer dxt5_optimizer; dxt5_endpoint_optimizer::results results; uint8 selectors[16]; results.m_pSelectors = selectors; dxt5_endpoint_optimizer::params params; params.m_pPixels = pixels; params.m_num_pixels = 16; params.m_comp_index = pack_params.comp_index; params.m_quality = pack_params.m_quality == cHighQuality ? cCRNDXTQualityUber : pack_params.m_quality == cMediumQuality ? cCRNDXTQualityNormal : cCRNDXTQualityFast; params.m_use_both_block_types = false; dxt5_optimizer.compute(params, results); uint base_codeword = (results.m_first_endpoint + results.m_second_endpoint + 1) >> 1; uint best_error = cUINT32_MAX; for (int modifier_index = 0; modifier_index < 16; modifier_index++) { const int* modifier = g_etc2a_modifier_table[modifier_index]; int multiplier = math::clamp((results.m_first_endpoint - results.m_second_endpoint + modifier[7] + (modifier[7] >> 1)) / (modifier[7] << 1), 1, 15); uint8 data[8] = {(uint8)base_codeword, (uint8)(multiplier << 4 | modifier_index)}, values[8]; for (int i = 0; i < 8; i++) values[i] = math::clamp(base_codeword + modifier[i] * multiplier, 0, 255); uint error = 0; for (uint d0 = 3, t = 0, i = 0; i < 4; i++, d0 += 3) { for (uint d = d0, j = 0; j < 4; j++, t++, d += 12) { int a = pixels[t].a; uint byte_offset = 2 + (d >> 3); uint bit_offset = d & 7; uint best_s = 0; uint best_delta = cUINT32_MAX; for (uint s = 0; s < 8; s++) { uint delta = abs(a - values[s]); if (delta < best_delta) { best_s = s; best_delta = delta; } } error += best_delta * best_delta; data[byte_offset] |= best_s << (8 - bit_offset); if (bit_offset < 3) data[byte_offset - 1] |= best_s >> bit_offset; } } if (error < best_error) { memcpy(pBlock, data, 8); best_error = error; } } return best_error; } } // namespace rg_etc1 } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_rg_etc1.h000066400000000000000000000071561503722002600216400ustar00rootroot00000000000000// File: rg_etc1.h - Fast, high quality ETC1 block packer/unpacker - Rich Geldreich // Please see ZLIB license at the end of this file. #pragma once namespace crnlib { namespace rg_etc1 { // Unpacks an 8-byte ETC1 compressed block to a block of 4x4 32bpp RGBA pixels. // Returns false if the block is invalid. Invalid blocks will still be unpacked with clamping. // This function is thread safe, and does not dynamically allocate any memory. // If preserve_alpha is true, the alpha channel of the destination pixels will not be overwritten. Otherwise, alpha will be set to 255. bool unpack_etc1_block(const void* pETC1_block, unsigned int* pDst_pixels_rgba, bool preserve_alpha = false); bool unpack_etc2_color(const void* pBlock, unsigned int* pDst_pixels_rgba, bool preserve_alpha = false); bool unpack_etc2_alpha(const void* pBlock, unsigned int* pDst_pixels_rgba, int comp_index = 3); // Quality setting = the higher the quality, the slower. // To pack large textures, it is highly recommended to call pack_etc1_block() in parallel, on different blocks, from multiple threads (particularly when using cHighQuality). enum etc1_quality { cLowQuality, cMediumQuality, cHighQuality, }; struct etc1_pack_params { etc1_quality m_quality; bool m_dithering; inline etc1_pack_params() { clear(); } void clear() { m_quality = cHighQuality; m_dithering = false; } }; struct etc2a_pack_params { etc1_quality m_quality; int comp_index; inline etc2a_pack_params() { clear(); } void clear() { m_quality = cHighQuality; comp_index = 3; } }; // Important: pack_etc1_block_init() must be called before calling pack_etc1_block(). void pack_etc1_block_init(); // Packs a 4x4 block of 32bpp RGBA pixels to an 8-byte ETC1 block. // 32-bit RGBA pixels must always be arranged as (R,G,B,A) (R first, A last) in memory, independent of platform endianness. A should always be 255. // Returns squared error of result. // This function is thread safe, and does not dynamically allocate any memory. // pack_etc1_block() does not currently support "perceptual" colorspace metrics - it primarily optimizes for RGB RMSE. unsigned int pack_etc1_block(void* pETC1_block, const unsigned int* pSrc_pixels_rgba, etc1_pack_params& pack_params); unsigned int pack_etc2_alpha(void* pBlock, const unsigned int* pSrc_pixels_rgba, etc2a_pack_params& pack_params); } // namespace rg_etc1 } // namespace crnlib //------------------------------------------------------------------------------ // // rg_etc1 uses the ZLIB license: // http://opensource.org/licenses/Zlib // // Copyright (c) 2012 Rich Geldreich // // This software is provided 'as-is', without any express or implied // warranty. In no event will the authors be held liable for any damages // arising from the use of this software. // // Permission is granted to anyone to use this software for any purpose, // including commercial applications, and to alter it and redistribute it // freely, subject to the following restrictions: // // 1. The origin of this software must not be misrepresented; you must not // claim that you wrote the original software. If you use this software // in a product, an acknowledgment in the product documentation would be // appreciated but is not required. // // 2. Altered source versions must be plainly marked as such, and must not be // misrepresented as being the original software. // // 3. This notice may not be removed or altered from any source distribution. // //------------------------------------------------------------------------------ DaemonEngine-crunch-ef4d32f/crnlib/crn_ryg_dxt.cpp000066400000000000000000000341441503722002600223240ustar00rootroot00000000000000// File: crn_ryg_dxt.cpp // RYG's real-time DXT compressor - Public domain. #include "crn_core.h" #include "crn_ryg_types.hpp" #include "crn_ryg_dxt.hpp" #ifdef _MSC_VER #pragma warning(disable : 4244) // conversion from 'a' to 'b', possible loss of data #endif namespace ryg_dxt { // Couple of tables... sU8 Expand5[32]; sU8 Expand6[64]; sU8 OMatch5[256][2]; sU8 OMatch6[256][2]; sU8 OMatch5_3[256][2]; sU8 OMatch6_3[256][2]; sU8 QuantRBTab[256 + 16]; sU8 QuantGTab[256 + 16]; static sInt Mul8Bit(sInt a, sInt b) { sInt t = a * b + 128; return (t + (t >> 8)) >> 8; } union Pixel { struct { sU8 b, g, r, a; }; sU32 v; void From16Bit(sU16 v) { sInt rv = (v & 0xf800) >> 11; sInt gv = (v & 0x07e0) >> 5; sInt bv = (v & 0x001f) >> 0; a = 0; r = Expand5[rv]; g = Expand6[gv]; b = Expand5[bv]; } sU16 As16Bit() const { return (Mul8Bit(r, 31) << 11) + (Mul8Bit(g, 63) << 5) + Mul8Bit(b, 31); } void LerpRGB(const Pixel& p1, const Pixel& p2, sInt f) { r = p1.r + Mul8Bit(p2.r - p1.r, f); g = p1.g + Mul8Bit(p2.g - p1.g, f); b = p1.b + Mul8Bit(p2.b - p1.b, f); } }; /****************************************************************************/ static void PrepareOptTable4(sU8* Table, const sU8* expand, sInt size) { for (sInt i = 0; i < 256; i++) { sInt bestErr = 256; for (sInt min = 0; min < size; min++) { for (sInt max = 0; max < size; max++) { sInt mine = expand[min]; sInt maxe = expand[max]; //sInt err = sAbs(maxe + Mul8Bit(mine-maxe,0x55) - i); sInt err = sAbs(((maxe * 2 + mine) / 3) - i); err += ((sAbs(maxe - mine) * 8) >> 8); // approx. .03f if (err < bestErr) { Table[i * 2 + 0] = max; Table[i * 2 + 1] = min; bestErr = err; } } } } } static void PrepareOptTable3(sU8* Table, const sU8* expand, sInt size) { for (sInt i = 0; i < 256; i++) { sInt bestErr = 256; for (sInt min = 0; min < size; min++) { for (sInt max = 0; max < size; max++) { sInt mine = expand[min]; sInt maxe = expand[max]; sInt err = sAbs(((mine + maxe) >> 1) - i); err += ((sAbs(maxe - mine) * 8) >> 8); // approx. .03f if (err < bestErr) { Table[i * 2 + 0] = max; Table[i * 2 + 1] = min; bestErr = err; } } } } } static inline void EvalColors(Pixel* color, sU16 c0, sU16 c1) { color[0].From16Bit(c0); color[1].From16Bit(c1); color[2].LerpRGB(color[0], color[1], 0x55); color[3].LerpRGB(color[0], color[1], 0xaa); } // Block dithering function. Simply dithers a block to 565 RGB. // (Floyd-Steinberg) static void DitherBlock(Pixel* dest, const Pixel* block) { sInt err[8], *ep1 = err, *ep2 = err + 4; // process channels seperately for (sInt ch = 0; ch < 3; ch++) { sU8* bp = (sU8*)block; sU8* dp = (sU8*)dest; sU8* quant = (ch == 1) ? QuantGTab + 8 : QuantRBTab + 8; bp += ch; dp += ch; sSetMem(err, 0, sizeof(err)); for (sInt y = 0; y < 4; y++) { // pixel 0 dp[0] = quant[bp[0] + ((3 * ep2[1] + 5 * ep2[0]) >> 4)]; ep1[0] = bp[0] - dp[0]; // pixel 1 dp[4] = quant[bp[4] + ((7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]) >> 4)]; ep1[1] = bp[4] - dp[4]; // pixel 2 dp[8] = quant[bp[8] + ((7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]) >> 4)]; ep1[2] = bp[8] - dp[8]; // pixel 3 dp[12] = quant[bp[12] + ((7 * ep1[2] + 5 * ep2[3] + ep2[2]) >> 4)]; ep1[3] = bp[12] - dp[12]; // advance to next line sSwap(ep1, ep2); bp += 16; dp += 16; } } } // The color matching function static sU32 MatchColorsBlock(const Pixel* block, const Pixel* color, sBool dither) { sU32 mask = 0; sInt dirr = color[0].r - color[1].r; sInt dirg = color[0].g - color[1].g; sInt dirb = color[0].b - color[1].b; sInt dots[16]; for (sInt i = 0; i < 16; i++) dots[i] = block[i].r * dirr + block[i].g * dirg + block[i].b * dirb; sInt stops[4]; for (sInt i = 0; i < 4; i++) stops[i] = color[i].r * dirr + color[i].g * dirg + color[i].b * dirb; sInt c0Point = (stops[1] + stops[3]) >> 1; sInt halfPoint = (stops[3] + stops[2]) >> 1; sInt c3Point = (stops[2] + stops[0]) >> 1; if (!dither) { // the version without dithering is straightforward for (sInt i = 15; i >= 0; i--) { mask <<= 2; sInt dot = dots[i]; if (dot < halfPoint) mask |= (dot < c0Point) ? 1 : 3; else mask |= (dot < c3Point) ? 2 : 0; } } else { // with floyd-steinberg dithering (see above) sInt err[8], *ep1 = err, *ep2 = err + 4; sInt* dp = dots; c0Point <<= 4; halfPoint <<= 4; c3Point <<= 4; for (sInt i = 0; i < 8; i++) err[i] = 0; for (sInt y = 0; y < 4; y++) { sInt dot, lmask, step; // pixel 0 dot = (dp[0] << 4) + (3 * ep2[1] + 5 * ep2[0]); if (dot < halfPoint) step = (dot < c0Point) ? 1 : 3; else step = (dot < c3Point) ? 2 : 0; ep1[0] = dp[0] - stops[step]; lmask = step; // pixel 1 dot = (dp[1] << 4) + (7 * ep1[0] + 3 * ep2[2] + 5 * ep2[1] + ep2[0]); if (dot < halfPoint) step = (dot < c0Point) ? 1 : 3; else step = (dot < c3Point) ? 2 : 0; ep1[1] = dp[1] - stops[step]; lmask |= step << 2; // pixel 2 dot = (dp[2] << 4) + (7 * ep1[1] + 3 * ep2[3] + 5 * ep2[2] + ep2[1]); if (dot < halfPoint) step = (dot < c0Point) ? 1 : 3; else step = (dot < c3Point) ? 2 : 0; ep1[2] = dp[2] - stops[step]; lmask |= step << 4; // pixel 3 dot = (dp[3] << 4) + (7 * ep1[2] + 5 * ep2[3] + ep2[2]); if (dot < halfPoint) step = (dot < c0Point) ? 1 : 3; else step = (dot < c3Point) ? 2 : 0; ep1[3] = dp[3] - stops[step]; lmask |= step << 6; // advance to next line sSwap(ep1, ep2); dp += 4; mask |= lmask << (y * 8); } } return mask; } // The color optimization function. (Clever code, part 1) static void OptimizeColorsBlock(const Pixel* block, sU16& max16, sU16& min16) { static const sInt nIterPower = 4; // determine color distribution sInt mu[3], min[3], max[3]; for (sInt ch = 0; ch < 3; ch++) { const sU8* bp = ((const sU8*)block) + ch; sInt muv, minv, maxv; muv = minv = maxv = bp[0]; for (sInt i = 4; i < 64; i += 4) { muv += bp[i]; minv = sMin(minv, bp[i]); maxv = sMax(maxv, bp[i]); } mu[ch] = (muv + 8) >> 4; min[ch] = minv; max[ch] = maxv; } // determine covariance matrix sInt cov[6]; for (sInt i = 0; i < 6; i++) cov[i] = 0; for (sInt i = 0; i < 16; i++) { sInt r = block[i].r - mu[2]; sInt g = block[i].g - mu[1]; sInt b = block[i].b - mu[0]; cov[0] += r * r; cov[1] += r * g; cov[2] += r * b; cov[3] += g * g; cov[4] += g * b; cov[5] += b * b; } // convert covariance matrix to float, find principal axis via power iter sF32 covf[6], vfr, vfg, vfb; for (sInt i = 0; i < 6; i++) covf[i] = cov[i] / 255.0f; vfr = max[2] - min[2]; vfg = max[1] - min[1]; vfb = max[0] - min[0]; for (sInt iter = 0; iter < nIterPower; iter++) { sF32 r = vfr * covf[0] + vfg * covf[1] + vfb * covf[2]; sF32 g = vfr * covf[1] + vfg * covf[3] + vfb * covf[4]; sF32 b = vfr * covf[2] + vfg * covf[4] + vfb * covf[5]; vfr = r; vfg = g; vfb = b; } sF32 magn = sMax(sMax(sFAbs(vfr), sFAbs(vfg)), sFAbs(vfb)); sInt v_r, v_g, v_b; if (magn < 4.0f) // too small, default to luminance { v_r = 148; v_g = 300; v_b = 58; } else { magn = 512.0f / magn; v_r = vfr * magn; v_g = vfg * magn; v_b = vfb * magn; } // Pick colors at extreme points sInt mind = 0x7fffffff, maxd = -0x7fffffff; Pixel minp, maxp; for (sInt i = 0; i < 16; i++) { sInt dot = block[i].r * v_r + block[i].g * v_g + block[i].b * v_b; if (dot < mind) { mind = dot; minp = block[i]; } if (dot > maxd) { maxd = dot; maxp = block[i]; } } // Reduce to 16 bit colors max16 = maxp.As16Bit(); min16 = minp.As16Bit(); } // The refinement function. (Clever code, part 2) // Tries to optimize colors to suit block contents better. // (By solving a least squares system via normal equations+Cramer's rule) static sBool RefineBlock(const Pixel* block, sU16& max16, sU16& min16, sU32 mask) { static const sInt w1Tab[4] = {3, 0, 2, 1}; static const sInt prods[4] = {0x090000, 0x000900, 0x040102, 0x010402}; // ^some magic to save a lot of multiplies in the accumulating loop... sInt akku = 0; sInt At1_r, At1_g, At1_b; sInt At2_r, At2_g, At2_b; sU32 cm = mask; At1_r = At1_g = At1_b = 0; At2_r = At2_g = At2_b = 0; for (sInt i = 0; i < 16; i++, cm >>= 2) { sInt step = cm & 3; sInt w1 = w1Tab[step]; sInt r = block[i].r; sInt g = block[i].g; sInt b = block[i].b; akku += prods[step]; At1_r += w1 * r; At1_g += w1 * g; At1_b += w1 * b; At2_r += r; At2_g += g; At2_b += b; } At2_r = 3 * At2_r - At1_r; At2_g = 3 * At2_g - At1_g; At2_b = 3 * At2_b - At1_b; // extract solutions and decide solvability sInt xx = akku >> 16; sInt yy = (akku >> 8) & 0xff; sInt xy = (akku >> 0) & 0xff; if (!yy || !xx || xx * yy == xy * xy) return sFALSE; sF32 frb = 3.0f * 31.0f / 255.0f / (xx * yy - xy * xy); sF32 fg = frb * 63.0f / 31.0f; sU16 oldMin = min16; sU16 oldMax = max16; // solve. max16 = sClamp((At1_r * yy - At2_r * xy) * frb + 0.5f, 0, 31) << 11; max16 |= sClamp((At1_g * yy - At2_g * xy) * fg + 0.5f, 0, 63) << 5; max16 |= sClamp((At1_b * yy - At2_b * xy) * frb + 0.5f, 0, 31) << 0; min16 = sClamp((At2_r * xx - At1_r * xy) * frb + 0.5f, 0, 31) << 11; min16 |= sClamp((At2_g * xx - At1_g * xy) * fg + 0.5f, 0, 63) << 5; min16 |= sClamp((At2_b * xx - At1_b * xy) * frb + 0.5f, 0, 31) << 0; return oldMin != min16 || oldMax != max16; } // Color block compression static void CompressColorBlock(sU8* dest, const sU32* src, sInt quality) { const Pixel* block = (const Pixel*)src; Pixel dblock[16], color[4]; // check if block is constant sU32 min, max; min = max = block[0].v; for (sInt i = 1; i < 16; i++) { min = sMin(min, block[i].v); max = sMax(max, block[i].v); } // perform block compression sU16 min16, max16; sU32 mask; if (min != max) // no constant color { // first step: compute dithered version for PCA if desired if (quality) DitherBlock(dblock, block); // second step: pca+map along principal axis OptimizeColorsBlock(quality ? dblock : block, max16, min16); if (max16 != min16) { EvalColors(color, max16, min16); mask = MatchColorsBlock(block, color, quality != 0); } else mask = 0; // third step: refine if (RefineBlock(quality ? dblock : block, max16, min16, mask)) { if (max16 != min16) { EvalColors(color, max16, min16); mask = MatchColorsBlock(block, color, quality != 0); } else mask = 0; } } else // constant color { sInt r = block[0].r; sInt g = block[0].g; sInt b = block[0].b; mask = 0xaaaaaaaa; max16 = (OMatch5[r][0] << 11) | (OMatch6[g][0] << 5) | OMatch5[b][0]; min16 = (OMatch5[r][1] << 11) | (OMatch6[g][1] << 5) | OMatch5[b][1]; } // write the color block if (max16 < min16) { sSwap(max16, min16); mask ^= 0x55555555; } ((sU16*)dest)[0] = max16; ((sU16*)dest)[1] = min16; ((sU32*)dest)[1] = mask; } // Alpha block compression (this is easy for a change) static void CompressAlphaBlock(sU8* dest, const sU32* src) { const Pixel* block = (const Pixel*)src; // find min/max color sInt min, max; min = max = block[0].a; for (sInt i = 1; i < 16; i++) { min = sMin(min, block[i].a); max = sMax(max, block[i].a); } // encode them *dest++ = max; *dest++ = min; // determine bias and emit color indices sInt dist = max - min; sInt bias = min * 7 - (dist >> 1); sInt dist4 = dist * 4; sInt dist2 = dist * 2; sInt bits = 0, mask = 0; for (sInt i = 0; i < 16; i++) { sInt a = block[i].a * 7 - bias; sInt ind, t; // select index (hooray for bit magic) t = (dist4 - a) >> 31; ind = t & 4; a -= dist4 & t; t = (dist2 - a) >> 31; ind += t & 2; a -= dist2 & t; t = (dist - a) >> 31; ind += t & 1; ind = -ind & 7; ind ^= (2 > ind); // write index mask |= ind << bits; if ((bits += 3) >= 8) { *dest++ = mask; mask >>= 8; bits -= 8; } } } /****************************************************************************/ void sInitDXT() { for (sInt i = 0; i < 32; i++) Expand5[i] = (i << 3) | (i >> 2); for (sInt i = 0; i < 64; i++) Expand6[i] = (i << 2) | (i >> 4); for (sInt i = 0; i < 256 + 16; i++) { sInt v = sClamp(i - 8, 0, 255); QuantRBTab[i] = Expand5[Mul8Bit(v, 31)]; QuantGTab[i] = Expand6[Mul8Bit(v, 63)]; } PrepareOptTable4(&OMatch5[0][0], Expand5, 32); PrepareOptTable4(&OMatch6[0][0], Expand6, 64); PrepareOptTable3(&OMatch5_3[0][0], Expand5, 32); PrepareOptTable3(&OMatch6_3[0][0], Expand6, 64); } void sCompressDXTBlock(sU8* dest, const sU32* src, sBool alpha, sInt quality) { CRNLIB_ASSERT(Expand5[1]); // if alpha specified, compress alpha as well if (alpha) { CompressAlphaBlock(dest, src); dest += 8; } // compress the color part CompressColorBlock(dest, src, quality); } void sCompressDXT5ABlock(sU8* dest, const sU32* src) { CRNLIB_ASSERT(Expand5[1]); CompressAlphaBlock(dest, src); } } // namespace ryg_dxt DaemonEngine-crunch-ef4d32f/crnlib/crn_ryg_dxt.hpp000066400000000000000000000013641503722002600223270ustar00rootroot00000000000000// File: ryg_dxt.hpp #pragma once #include "crn_ryg_types.hpp" namespace ryg_dxt { extern sU8 Expand5[32]; extern sU8 Expand6[64]; extern sU8 OMatch5[256][2]; extern sU8 OMatch6[256][2]; extern sU8 OMatch5_3[256][2]; extern sU8 OMatch6_3[256][2]; extern sU8 QuantRBTab[256 + 16]; extern sU8 QuantGTab[256 + 16]; // initialize DXT codec. only needs to be called once. void sInitDXT(); // input: a 4x4 pixel block, A8R8G8B8. you need to handle boundary cases // yourself. // alpha=sTRUE => use DXT5 (else use DXT1) // quality: 0=fastest (no dither), 1=medium (dither) void sCompressDXTBlock(sU8* dest, const sU32* src, sBool alpha, sInt quality); void sCompressDXT5ABlock(sU8* dest, const sU32* src); } // namespace ryg_dxt DaemonEngine-crunch-ef4d32f/crnlib/crn_ryg_types.hpp000066400000000000000000000120051503722002600226660ustar00rootroot00000000000000// File: ryg_types.hpp #pragma once #ifndef __TP_TYPES_HPP__ #define __TP_TYPES_HPP__ #include #include #include #include #ifdef _MSC_VER // microsoft C++ #define sCONFIG_NATIVEINT int _w64 // sDInt: an int of the same size as a pointer #define sCONFIG_INT64 __int64 // sS64, sU64: a 64 bit int #define sINLINE __forceinline // use this to inline #endif #ifdef __GNUC__ // GNU C++ #define sCONFIG_NATIVEINT int #define sCONFIG_INT64 long long #define sINLINE __inline__ #endif /****************************************************************************/ /*** ***/ /*** Basic Types and Functions ***/ /*** ***/ /****************************************************************************/ typedef unsigned char sU8; // for packed arrays typedef unsigned short sU16; // for packed arrays typedef unsigned int sU32; // for packed arrays and bitfields typedef unsigned sCONFIG_INT64 sU64; // use as needed typedef signed char sS8; // for packed arrays typedef short sS16; // for packed arrays typedef int sS32; // for packed arrays typedef signed sCONFIG_INT64 sS64; // use as needed typedef float sF32; // basic floatingpoint typedef double sF64; // use as needed typedef int sInt; // use this most! typedef signed sCONFIG_NATIVEINT sDInt; // type for pointer diff typedef bool sBool; // use for boolean function results /****************************************************************************/ #define sTRUE true #define sFALSE false /****************************************************************************/ template sINLINE Type sMin(Type a, Type b) { return (a < b) ? a : b; } template sINLINE Type sMax(Type a, Type b) { return (a > b) ? a : b; } template sINLINE Type sSign(Type a) { return (a == 0) ? Type(0) : (a > 0) ? Type(1) : Type(-1); } template sINLINE Type sClamp(Type a, Type min, Type max) { return (a >= max) ? max : (a <= min) ? min : a; } template sINLINE void sSwap(Type& a, Type& b) { Type s; s = a; a = b; b = s; } template sINLINE Type sAlign(Type a, sInt b) { return (Type)((((sDInt)a) + b - 1) & (~(b - 1))); } template sINLINE Type sSquare(Type a) { return a * a; } /****************************************************************************/ #define sPI 3.1415926535897932384626433832795 #define sPI2 6.28318530717958647692528676655901 #define sPIF 3.1415926535897932384626433832795f #define sPI2F 6.28318530717958647692528676655901f #define sSQRT2 1.4142135623730950488016887242097 #define sSQRT2F 1.4142135623730950488016887242097f sINLINE sInt sAbs(sInt i) { return abs(i); } sINLINE void sSetMem(void* dd, sInt s, sInt c) { memset(dd, s, c); } sINLINE void sCopyMem(void* dd, const void* ss, sInt c) { memcpy(dd, ss, c); } sINLINE sInt sCmpMem(const void* dd, const void* ss, sInt c) { return (sInt)memcmp(dd, ss, c); } sINLINE sF64 sFATan(sF64 f) { return atan(f); } sINLINE sF64 sFATan2(sF64 a, sF64 b) { return atan2(a, b); } sINLINE sF64 sFCos(sF64 f) { return cos(f); } sINLINE sF64 sFAbs(sF64 f) { return fabs(f); } sINLINE sF32 sFAbs(sF32 f) { return fabsf(f); } sINLINE sF64 sFLog(sF64 f) { return log(f); } sINLINE sF64 sFLog10(sF64 f) { return log10(f); } sINLINE sF64 sFSin(sF64 f) { return sin(f); } sINLINE sF64 sFSqrt(sF64 f) { return sqrt(f); } sINLINE sF64 sFTan(sF64 f) { return tan(f); } sINLINE sF64 sFACos(sF64 f) { return acos(f); } sINLINE sF64 sFASin(sF64 f) { return asin(f); } sINLINE sF64 sFCosH(sF64 f) { return cosh(f); } sINLINE sF64 sFSinH(sF64 f) { return sinh(f); } sINLINE sF64 sFTanH(sF64 f) { return tanh(f); } sINLINE sF64 sFInvSqrt(sF64 f) { return 1.0 / sqrt(f); } sINLINE sF64 sFFloor(sF64 f) { return floor(f); } sINLINE sF64 sFPow(sF64 a, sF64 b) { return pow(a, b); } sINLINE sF64 sFMod(sF64 a, sF64 b) { return fmod(a, b); } sINLINE sF64 sFExp(sF64 f) { return exp(f); } /****************************************************************************/ /*** ***/ /*** Debugging ***/ /*** ***/ /****************************************************************************/ #define sVERIFY(x) \ { assert(x); } #define sVERIFYFALSE \ { assert(false); } /****************************************************************************/ #endif DaemonEngine-crunch-ef4d32f/crnlib/crn_sparse_array.h000066400000000000000000000170731503722002600230060ustar00rootroot00000000000000// File: crn_sparse_array.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once namespace crnlib { template class sparse_array_traits { public: static inline void* alloc_space(uint size) { return crnlib_malloc(size); } static inline void free_space(void* p) { crnlib_free(p); } static inline void construct_group(T* p) { scalar_type::construct_array(p, 1U << Log2N); } static inline void destruct_group(T* p) { scalar_type::destruct_array(p, 1U << Log2N); } static inline void construct_element(T* p) { scalar_type::construct(p); } static inline void destruct_element(T* p) { scalar_type::destruct(p); } static inline void copy_group(T* pDst, const T* pSrc) { for (uint j = 0; j < (1U << Log2N); j++) pDst[j] = pSrc[j]; } }; template class Traits = sparse_array_traits> class sparse_array : public Traits { public: enum { N = 1U << Log2N }; inline sparse_array() : m_size(0), m_num_active_groups(0) { init_default(); } inline sparse_array(uint size) : m_size(0), m_num_active_groups(0) { init_default(); resize(size); } inline sparse_array(const sparse_array& other) : m_size(0), m_num_active_groups(0) { init_default(); *this = other; } inline ~sparse_array() { for (uint i = 0; (i < m_groups.size()) && m_num_active_groups; i++) free_group(m_groups[i]); deinit_default(); } bool assign(const sparse_array& other) { if (this == &other) return true; if (!try_resize(other.size())) return false; for (uint i = 0; i < other.m_groups.size(); i++) { const T* p = other.m_groups[i]; T* q = m_groups[i]; if (p) { if (!q) { q = alloc_group(true); if (!q) return false; m_groups[i] = q; } copy_group(q, p); } else if (q) { free_group(q); m_groups[i] = NULL; } } return true; } sparse_array& operator=(const sparse_array& other) { if (!assign(other)) { CRNLIB_FAIL("Out of memory"); } return *this; } bool operator==(const sparse_array& other) const { if (m_size != other.m_size) return false; for (uint i = 0; i < m_size; i++) if (!((*this)[i] == other[i])) return false; return true; } bool operator<(const sparse_array& rhs) const { const uint min_size = math::minimum(m_size, rhs.m_size); uint i; for (i = 0; i < min_size; i++) if (!((*this)[i] == rhs[i])) break; if (i < min_size) return (*this)[i] < rhs[i]; return m_size < rhs.m_size; } void clear() { if (m_groups.size()) { for (uint i = 0; (i < m_groups.size()) && m_num_active_groups; i++) free_group(m_groups[i]); m_groups.clear(); } m_size = 0; CRNLIB_ASSERT(!m_num_active_groups); } bool try_resize(uint size) { if (m_size == size) return true; const uint new_num_groups = (size + N - 1) >> Log2N; if (new_num_groups != m_groups.size()) { for (uint i = new_num_groups; i < m_groups.size(); i++) free_group(m_groups[i]); if (!m_groups.try_resize(new_num_groups)) return false; } m_size = size; return true; } void resize(uint size) { if (!try_resize(size)) { CRNLIB_FAIL("Out of memory"); } } inline uint size() const { return m_size; } inline bool empty() const { return 0 == m_size; } inline uint capacity() const { return m_groups.size(); } inline const T& operator[](uint i) const { CRNLIB_ASSERT(i < m_size); const T* p = m_groups[i >> Log2N]; const void* t = m_default; return p ? p[i & (N - 1)] : *reinterpret_cast(t); } inline const T* get(uint i) const { CRNLIB_ASSERT(i < m_size); const T* p = m_groups[i >> Log2N]; return p ? &p[i & (N - 1)] : NULL; } inline T* get(uint i) { CRNLIB_ASSERT(i < m_size); T* p = m_groups[i >> Log2N]; return p ? &p[i & (N - 1)] : NULL; } inline bool is_present(uint i) const { CRNLIB_ASSERT(i < m_size); return m_groups[i >> Log2N] != NULL; } inline uint get_num_groups() const { return m_groups.size(); } inline const T* get_group(uint group_index) const { return m_groups[group_index]; } inline T* get_group(uint group_index) { return m_groups[group_index]; } inline uint get_group_size() const { return N; } inline T* ensure_valid(uint index) { CRNLIB_ASSERT(index <= m_size); const uint group_index = index >> Log2N; if (group_index >= m_groups.size()) { T* p = alloc_group(true); if (!p) return NULL; if (!m_groups.try_push_back(p)) { free_group(p); return NULL; } } T* p = m_groups[group_index]; if (!p) { p = alloc_group(true); if (!p) return NULL; m_groups[group_index] = p; } m_size = math::maximum(index + 1, m_size); return p + (index & (N - 1)); } inline bool set(uint index, const T& obj) { T* p = ensure_valid(index); if (!p) return false; *p = obj; return true; } inline void push_back(const T& obj) { if (!set(m_size, obj)) { CRNLIB_FAIL("Out of memory"); } } inline bool try_push_back(const T& obj) { return set(m_size, obj); } inline void pop_back() { CRNLIB_ASSERT(m_size); if (m_size) resize(m_size - 1); } inline void unset_range(uint start, uint num) { if (!num) return; CRNLIB_ASSERT((start + num) <= capacity()); const uint num_to_skip = math::minimum(math::get_align_up_value_delta(start, N), num); num -= num_to_skip; const uint first_group = (start + num_to_skip) >> Log2N; const uint num_groups = num >> Log2N; for (uint i = 0; i < num_groups; i++) { T* p = m_groups[first_group + i]; if (p) { free_group(p); m_groups[i] = NULL; } } } inline void unset_all() { unset_range(0, m_groups.size() << Log2N); } inline void swap(sparse_array& other) { utils::swap(m_size, other.m_size); m_groups.swap(other.m_groups); utils::swap(m_num_active_groups, other.m_num_active_groups); } private: uint m_size; uint m_num_active_groups; crnlib::vector m_groups; uint64 m_default[(sizeof(T) + sizeof(uint64) - 1) / sizeof(uint64)]; inline T* alloc_group(bool nofail = false) { T* p = static_cast(sparse_array_traits::alloc_space(N * sizeof(T))); if (!p) { if (nofail) return NULL; CRNLIB_FAIL("Out of memory"); } sparse_array_traits::construct_group(p); m_num_active_groups++; return p; } inline void free_group(T* p) { if (p) { CRNLIB_ASSERT(m_num_active_groups); m_num_active_groups--; sparse_array_traits::destruct_group(p); sparse_array_traits::free_space(p); } } inline void init_default() { sparse_array_traits::construct_element(reinterpret_cast(m_default)); } inline void deinit_default() { sparse_array_traits::destruct_element(reinterpret_cast(m_default)); } }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_sparse_bit_array.cpp000066400000000000000000000262731503722002600242010ustar00rootroot00000000000000// File: crn_sparse_bit_array.h // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_sparse_bit_array.h" namespace crnlib { sparse_bit_array::sparse_bit_array() : m_num_groups(0), m_ppGroups(NULL) { } sparse_bit_array::sparse_bit_array(uint size) : m_num_groups(0), m_ppGroups(NULL) { resize(size); } sparse_bit_array::sparse_bit_array(sparse_bit_array& other) { m_num_groups = other.m_num_groups; m_ppGroups = (uint32**)crnlib_malloc(m_num_groups * sizeof(uint32*)); CRNLIB_VERIFY(m_ppGroups); for (uint i = 0; i < m_num_groups; i++) { if (other.m_ppGroups[i]) { m_ppGroups[i] = alloc_group(false); memcpy(m_ppGroups[i], other.m_ppGroups[i], cBytesPerGroup); } else m_ppGroups[i] = NULL; } } sparse_bit_array::~sparse_bit_array() { clear(); } sparse_bit_array& sparse_bit_array::operator=(sparse_bit_array& other) { if (this == &other) return *this; if (m_num_groups != other.m_num_groups) { clear(); m_num_groups = other.m_num_groups; m_ppGroups = (uint32**)crnlib_calloc(m_num_groups, sizeof(uint32*)); CRNLIB_VERIFY(m_ppGroups); } for (uint i = 0; i < m_num_groups; i++) { if (other.m_ppGroups[i]) { if (!m_ppGroups[i]) m_ppGroups[i] = alloc_group(false); memcpy(m_ppGroups[i], other.m_ppGroups[i], cBytesPerGroup); } else if (m_ppGroups[i]) { free_group(m_ppGroups[i]); m_ppGroups[i] = NULL; } } return *this; } void sparse_bit_array::clear() { if (!m_num_groups) return; for (uint i = 0; i < m_num_groups; i++) free_group(m_ppGroups[i]); crnlib_free(m_ppGroups); m_ppGroups = NULL; m_num_groups = 0; } void sparse_bit_array::swap(sparse_bit_array& other) { utils::swap(m_ppGroups, other.m_ppGroups); utils::swap(m_num_groups, other.m_num_groups); } void sparse_bit_array::optimize() { for (uint i = 0; i < m_num_groups; i++) { uint32* s = m_ppGroups[i]; if (s) { uint j; for (j = 0; j < cDWORDsPerGroup; j++) if (s[j]) break; if (j == cDWORDsPerGroup) { free_group(s); m_ppGroups[i] = NULL; } } } } void sparse_bit_array::set_bit_range(uint index, uint num) { CRNLIB_ASSERT((index + num) <= (m_num_groups << cBitsPerGroupShift)); if (!num) return; else if (num == 1) { set_bit(index); return; } while ((index & cBitsPerGroupMask) || (num <= cBitsPerGroup)) { uint group_index = index >> cBitsPerGroupShift; CRNLIB_ASSERT(group_index < m_num_groups); uint32* pGroup = m_ppGroups[group_index]; if (!pGroup) { pGroup = alloc_group(true); m_ppGroups[group_index] = pGroup; } const uint group_bit_ofs = index & cBitsPerGroupMask; const uint dword_bit_ofs = group_bit_ofs & 31; const uint max_bits_to_set = 32 - dword_bit_ofs; const uint bits_to_set = math::minimum(max_bits_to_set, num); const uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_set)); pGroup[group_bit_ofs >> 5] |= (msk << dword_bit_ofs); num -= bits_to_set; if (!num) return; index += bits_to_set; } while (num >= cBitsPerGroup) { uint group_index = index >> cBitsPerGroupShift; CRNLIB_ASSERT(group_index < m_num_groups); uint32* pGroup = m_ppGroups[group_index]; if (!pGroup) { pGroup = alloc_group(true); m_ppGroups[group_index] = pGroup; } memset(pGroup, 0xFF, sizeof(uint32) * cDWORDsPerGroup); num -= cBitsPerGroup; index += cBitsPerGroup; } while (num) { uint group_index = index >> cBitsPerGroupShift; CRNLIB_ASSERT(group_index < m_num_groups); uint32* pGroup = m_ppGroups[group_index]; if (!pGroup) { pGroup = alloc_group(true); m_ppGroups[group_index] = pGroup; } uint group_bit_ofs = index & cBitsPerGroupMask; uint bits_to_set = math::minimum(32U, num); uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_set)); pGroup[group_bit_ofs >> 5] |= (msk << (group_bit_ofs & 31)); num -= bits_to_set; index += bits_to_set; } } void sparse_bit_array::clear_all_bits() { for (uint i = 0; i < m_num_groups; i++) { uint32* pGroup = m_ppGroups[i]; if (pGroup) memset(pGroup, 0, sizeof(uint32) * cDWORDsPerGroup); } } void sparse_bit_array::clear_bit_range(uint index, uint num) { CRNLIB_ASSERT((index + num) <= (m_num_groups << cBitsPerGroupShift)); if (!num) return; else if (num == 1) { clear_bit(index); return; } while ((index & cBitsPerGroupMask) || (num <= cBitsPerGroup)) { uint group_index = index >> cBitsPerGroupShift; CRNLIB_ASSERT(group_index < m_num_groups); const uint group_bit_ofs = index & cBitsPerGroupMask; const uint dword_bit_ofs = group_bit_ofs & 31; const uint max_bits_to_set = 32 - dword_bit_ofs; const uint bits_to_set = math::minimum(max_bits_to_set, num); uint32* pGroup = m_ppGroups[group_index]; if (pGroup) { const uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_set)); pGroup[group_bit_ofs >> 5] &= (~(msk << dword_bit_ofs)); } num -= bits_to_set; if (!num) return; index += bits_to_set; } while (num >= cBitsPerGroup) { uint group_index = index >> cBitsPerGroupShift; CRNLIB_ASSERT(group_index < m_num_groups); uint32* pGroup = m_ppGroups[group_index]; if (pGroup) { free_group(pGroup); m_ppGroups[group_index] = NULL; } num -= cBitsPerGroup; index += cBitsPerGroup; } while (num) { uint group_index = index >> cBitsPerGroupShift; CRNLIB_ASSERT(group_index < m_num_groups); uint bits_to_set = math::minimum(32u, num); uint32* pGroup = m_ppGroups[group_index]; if (pGroup) { uint group_bit_ofs = index & cBitsPerGroupMask; uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_set)); pGroup[group_bit_ofs >> 5] &= (~(msk << (group_bit_ofs & 31))); } num -= bits_to_set; index += bits_to_set; } } void sparse_bit_array::resize(uint size) { uint num_groups = (size + cBitsPerGroup - 1) >> cBitsPerGroupShift; if (num_groups == m_num_groups) return; if (!num_groups) { clear(); return; } sparse_bit_array temp; temp.swap(*this); m_num_groups = num_groups; m_ppGroups = (uint32**)crnlib_calloc(m_num_groups, sizeof(uint32*)); CRNLIB_VERIFY(m_ppGroups); uint n = math::minimum(temp.m_num_groups, m_num_groups); for (uint i = 0; i < n; i++) { uint32* p = temp.m_ppGroups[i]; if (p) { m_ppGroups[i] = temp.m_ppGroups[i]; temp.m_ppGroups[i] = NULL; } } } sparse_bit_array& sparse_bit_array::operator&=(const sparse_bit_array& other) { if (this == &other) return *this; CRNLIB_VERIFY(other.m_num_groups == m_num_groups); for (uint i = 0; i < m_num_groups; i++) { uint32* d = m_ppGroups[i]; if (!d) continue; uint32* s = other.m_ppGroups[i]; if (!s) { free_group(d); m_ppGroups[i] = NULL; } else { uint32 oc = 0; for (uint j = 0; j < cDWORDsPerGroup; j++) { uint32 c = d[j] & s[j]; d[j] = c; oc |= c; } if (!oc) { free_group(d); m_ppGroups[i] = NULL; } } } return *this; } sparse_bit_array& sparse_bit_array::operator|=(const sparse_bit_array& other) { if (this == &other) return *this; CRNLIB_VERIFY(other.m_num_groups == m_num_groups); for (uint i = 0; i < m_num_groups; i++) { uint32* s = other.m_ppGroups[i]; if (!s) continue; uint32* d = m_ppGroups[i]; if (!d) { d = alloc_group(true); m_ppGroups[i] = d; memcpy(d, s, cBytesPerGroup); } else { uint32 oc = 0; for (uint j = 0; j < cDWORDsPerGroup; j++) { uint32 c = d[j] | s[j]; d[j] = c; oc |= c; } if (!oc) { free_group(d); m_ppGroups[i] = NULL; } } } return *this; } sparse_bit_array& sparse_bit_array::and_not(const sparse_bit_array& other) { if (this == &other) return *this; CRNLIB_VERIFY(other.m_num_groups == m_num_groups); for (uint i = 0; i < m_num_groups; i++) { uint32* d = m_ppGroups[i]; if (!d) continue; uint32* s = other.m_ppGroups[i]; if (!s) continue; uint32 oc = 0; for (uint j = 0; j < cDWORDsPerGroup; j++) { uint32 c = d[j] & (~s[j]); d[j] = c; oc |= c; } if (!oc) { free_group(d); m_ppGroups[i] = NULL; } } return *this; } int sparse_bit_array::find_first_set_bit(uint index, uint num) const { CRNLIB_ASSERT((index + num) <= (m_num_groups << cBitsPerGroupShift)); if (!num) return -1; while ((index & cBitsPerGroupMask) || (num <= cBitsPerGroup)) { uint group_index = index >> cBitsPerGroupShift; CRNLIB_ASSERT(group_index < m_num_groups); const uint group_bit_ofs = index & cBitsPerGroupMask; const uint dword_bit_ofs = group_bit_ofs & 31; const uint max_bits_to_examine = 32 - dword_bit_ofs; const uint bits_to_examine = math::minimum(max_bits_to_examine, num); uint32* pGroup = m_ppGroups[group_index]; if (pGroup) { const uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_examine)); uint bits = pGroup[group_bit_ofs >> 5] & (msk << dword_bit_ofs); if (bits) { uint num_trailing_zeros = math::count_trailing_zero_bits(bits); int set_index = num_trailing_zeros + (index & ~31); CRNLIB_ASSERT(get_bit(set_index)); return set_index; } } num -= bits_to_examine; if (!num) return -1; index += bits_to_examine; } while (num >= cBitsPerGroup) { uint group_index = index >> cBitsPerGroupShift; CRNLIB_ASSERT(group_index < m_num_groups); uint32* pGroup = m_ppGroups[group_index]; if (pGroup) { for (uint i = 0; i < cDWORDsPerGroup; i++) { uint32 bits = pGroup[i]; if (bits) { uint num_trailing_zeros = math::count_trailing_zero_bits(bits); int set_index = num_trailing_zeros + index + (i << 5); CRNLIB_ASSERT(get_bit(set_index)); return set_index; } } } num -= cBitsPerGroup; index += cBitsPerGroup; } while (num) { uint group_index = index >> cBitsPerGroupShift; CRNLIB_ASSERT(group_index < m_num_groups); uint bits_to_examine = math::minimum(32U, num); uint32* pGroup = m_ppGroups[group_index]; if (pGroup) { uint group_bit_ofs = index & cBitsPerGroupMask; uint32 msk = (0xFFFFFFFFU >> (32 - bits_to_examine)); uint32 bits = pGroup[group_bit_ofs >> 5] & (msk << (group_bit_ofs & 31)); if (bits) { uint num_trailing_zeros = math::count_trailing_zero_bits(bits); int set_index = num_trailing_zeros + (index & ~31); CRNLIB_ASSERT(get_bit(set_index)); return set_index; } } num -= bits_to_examine; index += bits_to_examine; } return -1; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_sparse_bit_array.h000066400000000000000000000100741503722002600236360ustar00rootroot00000000000000// File: crn_sparse_bit_array.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once namespace crnlib { class sparse_bit_array { public: sparse_bit_array(); sparse_bit_array(uint size); sparse_bit_array(sparse_bit_array& other); ~sparse_bit_array(); sparse_bit_array& operator=(sparse_bit_array& other); void clear(); inline uint get_size() { return (m_num_groups << cBitsPerGroupShift); } void resize(uint size); sparse_bit_array& operator&=(const sparse_bit_array& other); sparse_bit_array& operator|=(const sparse_bit_array& other); sparse_bit_array& and_not(const sparse_bit_array& other); void swap(sparse_bit_array& other); void optimize(); void set_bit_range(uint index, uint num); void clear_bit_range(uint index, uint num); void clear_all_bits(); inline void set_bit(uint index) { uint group_index = index >> cBitsPerGroupShift; CRNLIB_ASSERT(group_index < m_num_groups); uint32* pGroup = m_ppGroups[group_index]; if (!pGroup) { pGroup = alloc_group(true); m_ppGroups[group_index] = pGroup; } uint bit_ofs = index & (cBitsPerGroup - 1); pGroup[bit_ofs >> 5] |= (1U << (bit_ofs & 31)); } inline void clear_bit(uint index) { uint group_index = index >> cBitsPerGroupShift; CRNLIB_ASSERT(group_index < m_num_groups); uint32* pGroup = m_ppGroups[group_index]; if (!pGroup) { pGroup = alloc_group(true); m_ppGroups[group_index] = pGroup; } uint bit_ofs = index & (cBitsPerGroup - 1); pGroup[bit_ofs >> 5] &= (~(1U << (bit_ofs & 31))); } inline void set(uint index, bool value) { uint group_index = index >> cBitsPerGroupShift; CRNLIB_ASSERT(group_index < m_num_groups); uint32* pGroup = m_ppGroups[group_index]; if (!pGroup) { pGroup = alloc_group(true); m_ppGroups[group_index] = pGroup; } uint bit_ofs = index & (cBitsPerGroup - 1); uint bit = (1U << (bit_ofs & 31)); uint c = pGroup[bit_ofs >> 5]; uint mask = (uint)(-(int)value); pGroup[bit_ofs >> 5] = (c & ~bit) | (mask & bit); } inline bool get_bit(uint index) const { uint group_index = index >> cBitsPerGroupShift; CRNLIB_ASSERT(group_index < m_num_groups); uint32* pGroup = m_ppGroups[group_index]; if (!pGroup) return 0; uint bit_ofs = index & (cBitsPerGroup - 1); uint bit = (1U << (bit_ofs & 31)); return (pGroup[bit_ofs >> 5] & bit) != 0; } inline uint32 get_uint32(uint index) const { uint group_index = index >> cBitsPerGroupShift; CRNLIB_ASSERT(group_index < m_num_groups); uint32* pGroup = m_ppGroups[group_index]; if (!pGroup) return 0; uint bit_ofs = index & (cBitsPerGroup - 1); return pGroup[bit_ofs >> 5]; } inline void set_uint32(uint index, uint32 value) const { uint group_index = index >> cBitsPerGroupShift; CRNLIB_ASSERT(group_index < m_num_groups); uint32* pGroup = m_ppGroups[group_index]; if (!pGroup) { pGroup = alloc_group(true); m_ppGroups[group_index] = pGroup; } uint bit_ofs = index & (cBitsPerGroup - 1); pGroup[bit_ofs >> 5] = value; } int find_first_set_bit(uint index, uint num) const; enum { cDWORDsPerGroupShift = 4U, cDWORDsPerGroup = 1U << cDWORDsPerGroupShift, cBitsPerGroupShift = cDWORDsPerGroupShift + 5, cBitsPerGroup = 1U << cBitsPerGroupShift, cBitsPerGroupMask = cBitsPerGroup - 1U, cBytesPerGroup = cDWORDsPerGroup * sizeof(uint32) }; uint get_num_groups() const { return m_num_groups; } uint32** get_groups() { return m_ppGroups; } private: uint m_num_groups; uint32** m_ppGroups; static inline uint32* alloc_group(bool clear) { uint32* p = (uint32*)crnlib_malloc(cBytesPerGroup); CRNLIB_VERIFY(p); if (clear) memset(p, 0, cBytesPerGroup); return p; } static inline void free_group(void* p) { if (p) crnlib_free(p); } }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_stb_image.cpp000066400000000000000000000002001503722002600225600ustar00rootroot00000000000000#define STB_IMAGE_IMPLEMENTATION #include "stb_image.h" #define STB_IMAGE_WRITE_IMPLEMENTATION #include "stb_image_write.h" DaemonEngine-crunch-ef4d32f/crnlib/crn_strutils.cpp000066400000000000000000000235241503722002600225350ustar00rootroot00000000000000// File: crn_strutils.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_strutils.h" namespace crnlib { char* crn_strdup(const char* pStr) { if (!pStr) pStr = ""; size_t l = strlen(pStr) + 1; char* p = (char*)crnlib_malloc(l); if (p) memcpy(p, pStr, l); return p; } char* strcpy_safe(char* pDst, size_t dst_len, const char* pSrc) { CRNLIB_ASSERT(pDst && pSrc && dst_len); if (!dst_len) return pDst; char* q = pDst; char c; do { if (dst_len == 1) { *q++ = '\0'; break; } c = *pSrc++; *q++ = c; dst_len--; } while (c); CRNLIB_ASSERT((q - pDst) <= (int)dst_len); return pDst; } bool int_to_string(int value, char* pDst, uint len) { CRNLIB_ASSERT(pDst); const uint cBufSize = 16; char buf[cBufSize]; uint j = static_cast((value < 0) ? -value : value); char* p = buf + cBufSize - 1; *p-- = '\0'; do { *p-- = static_cast('0' + (j % 10)); j /= 10; } while (j); if (value < 0) *p-- = '-'; const size_t total_bytes = (buf + cBufSize - 1) - p; if (total_bytes > len) return false; for (size_t i = 0; i < total_bytes; i++) pDst[i] = p[1 + i]; return true; } bool uint_to_string(uint value, char* pDst, uint len) { CRNLIB_ASSERT(pDst); const uint cBufSize = 16; char buf[cBufSize]; char* p = buf + cBufSize - 1; *p-- = '\0'; do { *p-- = static_cast('0' + (value % 10)); value /= 10; } while (value); const size_t total_bytes = (buf + cBufSize - 1) - p; if (total_bytes > len) return false; for (size_t i = 0; i < total_bytes; i++) pDst[i] = p[1 + i]; return true; } bool string_to_int(const char*& pBuf, int& value) { value = 0; CRNLIB_ASSERT(pBuf); const char* p = pBuf; while (*p && isspace(*p)) p++; uint result = 0; bool negative = false; if (!isdigit(*p)) { if (p[0] == '-') { negative = true; p++; } else return false; } while (*p && isdigit(*p)) { if (result & 0xE0000000U) return false; const uint result8 = result << 3U; const uint result2 = result << 1U; if (result2 > (0xFFFFFFFFU - result8)) return false; result = result8 + result2; uint c = p[0] - '0'; if (c > (0xFFFFFFFFU - result)) return false; result += c; p++; } if (negative) { if (result > 0x80000000U) { value = 0; return false; } value = -static_cast(result); } else { if (result > 0x7FFFFFFFU) { value = 0; return false; } value = static_cast(result); } pBuf = p; return true; } bool string_to_int64(const char*& pBuf, int64& value) { value = 0; CRNLIB_ASSERT(pBuf); const char* p = pBuf; while (*p && isspace(*p)) p++; uint64 result = 0; bool negative = false; if (!isdigit(*p)) { if (p[0] == '-') { negative = true; p++; } else return false; } while (*p && isdigit(*p)) { if (result & 0xE000000000000000ULL) return false; const uint64 result8 = result << 3U; const uint64 result2 = result << 1U; if (result2 > (0xFFFFFFFFFFFFFFFFULL - result8)) return false; result = result8 + result2; uint c = p[0] - '0'; if (c > (0xFFFFFFFFFFFFFFFFULL - result)) return false; result += c; p++; } if (negative) { if (result > 0x8000000000000000ULL) { value = 0; return false; } value = -static_cast(result); } else { if (result > 0x7FFFFFFFFFFFFFFFULL) { value = 0; return false; } value = static_cast(result); } pBuf = p; return true; } bool string_to_uint(const char*& pBuf, uint& value) { value = 0; CRNLIB_ASSERT(pBuf); const char* p = pBuf; while (*p && isspace(*p)) p++; uint result = 0; if (!isdigit(*p)) return false; while (*p && isdigit(*p)) { if (result & 0xE0000000U) return false; const uint result8 = result << 3U; const uint result2 = result << 1U; if (result2 > (0xFFFFFFFFU - result8)) return false; result = result8 + result2; uint c = p[0] - '0'; if (c > (0xFFFFFFFFU - result)) return false; result += c; p++; } value = result; pBuf = p; return true; } bool string_to_uint64(const char*& pBuf, uint64& value) { value = 0; CRNLIB_ASSERT(pBuf); const char* p = pBuf; while (*p && isspace(*p)) p++; uint64 result = 0; if (!isdigit(*p)) return false; while (*p && isdigit(*p)) { if (result & 0xE000000000000000ULL) return false; const uint64 result8 = result << 3U; const uint64 result2 = result << 1U; if (result2 > (0xFFFFFFFFFFFFFFFFULL - result8)) return false; result = result8 + result2; uint c = p[0] - '0'; if (c > (0xFFFFFFFFFFFFFFFFULL - result)) return false; result += c; p++; } value = result; pBuf = p; return true; } bool string_to_bool(const char* p, bool& value) { CRNLIB_ASSERT(p); value = false; if (crnlib_stricmp(p, "false") == 0) return true; if (crnlib_stricmp(p, "true") == 0) { value = true; return true; } const char* q = p; uint v; if (string_to_uint(q, v)) { if (!v) return true; else if (v == 1) { value = true; return true; } } return false; } bool string_to_float(const char*& p, float& value, uint round_digit) { double d; if (!string_to_double(p, d, round_digit)) { value = 0; return false; } value = static_cast(d); return true; } bool string_to_double(const char*& p, double& value, uint round_digit) { return string_to_double(p, p + 128, value, round_digit); } // I wrote this approx. 20 years ago in C/assembly using a limited FP emulator package, so it's a bit crude. bool string_to_double(const char*& p, const char* pEnd, double& value, uint round_digit) { CRNLIB_ASSERT(p); value = 0; enum { AF_BLANK = 1, AF_SIGN = 2, AF_DPOINT = 3, AF_BADCHAR = 4, AF_OVRFLOW = 5, AF_EXPONENT = 6, AF_NODIGITS = 7 }; int status = 0; const char* buf = p; int got_sign_flag = 0, got_dp_flag = 0, got_num_flag = 0; int got_e_flag = 0, got_e_sign_flag = 0, e_sign = 0; uint whole_count = 0, frac_count = 0; double whole = 0, frac = 0, scale = 1, exponent = 1; if (p >= pEnd) { status = AF_NODIGITS; goto af_exit; } while (*buf) { if (!isspace(*buf)) break; if (++buf >= pEnd) { status = AF_NODIGITS; goto af_exit; } } p = buf; while (*buf) { p = buf; if (buf >= pEnd) break; int i = *buf++; switch (i) { case 'e': case 'E': { got_e_flag = 1; goto exit_while; } case '+': { if ((got_num_flag) || (got_sign_flag)) { status = AF_SIGN; goto af_exit; } got_sign_flag = 1; break; } case '-': { if ((got_num_flag) || (got_sign_flag)) { status = AF_SIGN; goto af_exit; } got_sign_flag = -1; break; } case '.': { if (got_dp_flag) { status = AF_DPOINT; goto af_exit; } got_dp_flag = 1; break; } default: { if ((i < '0') || (i > '9')) goto exit_while; else { i -= '0'; got_num_flag = 1; if (got_dp_flag) { if (frac_count < round_digit) { frac = frac * 10.0f + i; scale = scale * 10.0f; } else if (frac_count == round_digit) { if (i >= 5) /* check for round */ frac = frac + 1.0f; } frac_count++; } else { whole = whole * 10.0f + i; whole_count++; if (whole > 1e+100) { status = AF_OVRFLOW; goto af_exit; } } } break; } } } exit_while: if (got_e_flag) { if ((got_num_flag == 0) && (got_dp_flag)) { status = AF_EXPONENT; goto af_exit; } int e = 0; e_sign = 1; got_num_flag = 0; got_e_sign_flag = 0; while (*buf) { p = buf; if (buf >= pEnd) break; int i = *buf++; if (i == '+') { if ((got_num_flag) || (got_e_sign_flag)) { status = AF_EXPONENT; goto af_exit; } e_sign = 1; got_e_sign_flag = 1; } else if (i == '-') { if ((got_num_flag) || (got_e_sign_flag)) { status = AF_EXPONENT; goto af_exit; } e_sign = -1; got_e_sign_flag = 1; } else if ((i >= '0') && (i <= '9')) { got_num_flag = 1; if ((e = (e * 10) + (i - 48)) > 100) { status = AF_EXPONENT; goto af_exit; } } else break; } for (int i = 1; i <= e; i++) /* compute 10^e */ exponent = exponent * 10.0f; } if (((whole_count + frac_count) == 0) && (got_e_flag == 0)) { status = AF_NODIGITS; goto af_exit; } if (frac) whole = whole + (frac / scale); if (got_e_flag) { if (e_sign > 0) whole = whole * exponent; else whole = whole / exponent; } if (got_sign_flag < 0) whole = -whole; value = whole; af_exit: return (status == 0); } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_strutils.h000066400000000000000000000017721503722002600222030ustar00rootroot00000000000000// File: crn_strutils.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #ifdef WIN32 #define CRNLIB_PATH_SEPERATOR_CHAR '\\' #else #define CRNLIB_PATH_SEPERATOR_CHAR '/' #endif namespace crnlib { char* crn_strdup(const char* pStr); char* strcpy_safe(char* pDst, size_t dst_len, const char* pSrc); bool int_to_string(int value, char* pDst, uint len); bool uint_to_string(uint value, char* pDst, uint len); bool string_to_int(const char*& pBuf, int& value); bool string_to_uint(const char*& pBuf, uint& value); bool string_to_int64(const char*& pBuf, int64& value); bool string_to_uint64(const char*& pBuf, uint64& value); bool string_to_bool(const char* p, bool& value); bool string_to_float(const char*& p, float& value, uint round_digit = 512U); bool string_to_double(const char*& p, double& value, uint round_digit = 512U); bool string_to_double(const char*& p, const char* pEnd, double& value, uint round_digit = 512U); } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_symbol_codec.cpp000066400000000000000000001306711503722002600233100ustar00rootroot00000000000000// File: crn_symbol_codec.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_symbol_codec.h" #include "crn_huffman_codes.h" namespace crnlib { static float gProbCost[cSymbolCodecArithProbScale]; //const uint cArithProbMulLenSigBits = 8; //const uint cArithProbMulLenSigScale = 1 << cArithProbMulLenSigBits; class arith_prob_cost_initializer { public: arith_prob_cost_initializer() { const float cInvLn2 = 1.0f / 0.69314718f; for (uint i = 0; i < cSymbolCodecArithProbScale; i++) gProbCost[i] = -logf(i * (1.0f / cSymbolCodecArithProbScale)) * cInvLn2; } }; static arith_prob_cost_initializer g_prob_cost_initializer; double symbol_histogram::calc_entropy() const { double total = 0.0f; for (uint i = 0; i < m_hist.size(); i++) total += m_hist[i]; if (total == 0.0f) return 0.0f; double entropy = 0.0f; double neg_inv_log2 = -1.0f / log(2.0f); double inv_total = 1.0f / total; for (uint i = 0; i < m_hist.size(); i++) { if (m_hist[i]) { double bits = log(m_hist[i] * inv_total) * neg_inv_log2; entropy += bits * m_hist[i]; } } return entropy; } uint64 symbol_histogram::get_total() const { uint64 total = 0; for (uint i = 0; i < m_hist.size(); i++) total += m_hist[i]; return total; } adaptive_huffman_data_model::adaptive_huffman_data_model(bool encoding, uint total_syms) : m_total_syms(0), m_update_cycle(0), m_symbols_until_update(0), m_total_count(0), m_pDecode_tables(NULL), m_decoder_table_bits(0), m_encoding(encoding) { if (total_syms) init(encoding, total_syms); } adaptive_huffman_data_model::adaptive_huffman_data_model(const adaptive_huffman_data_model& other) : m_total_syms(0), m_update_cycle(0), m_symbols_until_update(0), m_total_count(0), m_pDecode_tables(NULL), m_decoder_table_bits(0), m_encoding(false) { *this = other; } adaptive_huffman_data_model::~adaptive_huffman_data_model() { if (m_pDecode_tables) crnlib_delete(m_pDecode_tables); } adaptive_huffman_data_model& adaptive_huffman_data_model::operator=(const adaptive_huffman_data_model& rhs) { if (this == &rhs) return *this; m_total_syms = rhs.m_total_syms; m_update_cycle = rhs.m_update_cycle; m_symbols_until_update = rhs.m_symbols_until_update; m_total_count = rhs.m_total_count; m_sym_freq = rhs.m_sym_freq; m_codes = rhs.m_codes; m_code_sizes = rhs.m_code_sizes; if (rhs.m_pDecode_tables) { if (m_pDecode_tables) *m_pDecode_tables = *rhs.m_pDecode_tables; else m_pDecode_tables = crnlib_new(*rhs.m_pDecode_tables); } else { crnlib_delete(m_pDecode_tables); m_pDecode_tables = NULL; } m_decoder_table_bits = rhs.m_decoder_table_bits; m_encoding = rhs.m_encoding; return *this; } void adaptive_huffman_data_model::clear() { m_sym_freq.clear(); m_codes.clear(); m_code_sizes.clear(); m_total_syms = 0; m_update_cycle = 0; m_symbols_until_update = 0; m_decoder_table_bits = 0; m_total_count = 0; if (m_pDecode_tables) { crnlib_delete(m_pDecode_tables); m_pDecode_tables = NULL; } } void adaptive_huffman_data_model::init(bool encoding, uint total_syms) { clear(); m_encoding = encoding; m_sym_freq.resize(total_syms); m_code_sizes.resize(total_syms); m_total_syms = total_syms; if (m_total_syms <= 16) m_decoder_table_bits = 0; else m_decoder_table_bits = static_cast(math::minimum(1 + math::ceil_log2i(m_total_syms), prefix_coding::cMaxTableBits)); if (m_encoding) m_codes.resize(total_syms); else m_pDecode_tables = crnlib_new(); reset(); } void adaptive_huffman_data_model::reset() { if (!m_total_syms) return; for (uint i = 0; i < m_total_syms; i++) m_sym_freq[i] = 1; m_total_count = 0; m_update_cycle = m_total_syms; update(); m_symbols_until_update = m_update_cycle = 8; //(m_total_syms + 6) >> 1; } void adaptive_huffman_data_model::rescale() { uint total_freq = 0; for (uint i = 0; i < m_total_syms; i++) { uint freq = (m_sym_freq[i] + 1) >> 1; total_freq += freq; m_sym_freq[i] = static_cast(freq); } m_total_count = total_freq; } void adaptive_huffman_data_model::update() { m_total_count += m_update_cycle; if (m_total_count >= 32768) rescale(); void* pTables = create_generate_huffman_codes_tables(); uint max_code_size, total_freq; bool status = generate_huffman_codes(pTables, m_total_syms, &m_sym_freq[0], &m_code_sizes[0], max_code_size, total_freq); CRNLIB_ASSERT(status); CRNLIB_ASSERT(total_freq == m_total_count); if (max_code_size > prefix_coding::cMaxExpectedCodeSize) prefix_coding::limit_max_code_size(m_total_syms, &m_code_sizes[0], prefix_coding::cMaxExpectedCodeSize); free_generate_huffman_codes_tables(pTables); if (m_encoding) status = prefix_coding::generate_codes(m_total_syms, &m_code_sizes[0], &m_codes[0]); else status = prefix_coding::generate_decoder_tables(m_total_syms, &m_code_sizes[0], m_pDecode_tables, m_decoder_table_bits); CRNLIB_ASSERT(status); (void)status; m_update_cycle = (5 * m_update_cycle) >> 2; uint max_cycle = (m_total_syms + 6) << 3; // this was << 2 - which is ~12% slower but compresses around .5% better if (m_update_cycle > max_cycle) m_update_cycle = max_cycle; m_symbols_until_update = m_update_cycle; } static_huffman_data_model::static_huffman_data_model() : m_total_syms(0), m_pDecode_tables(NULL), m_encoding(false) { } static_huffman_data_model::static_huffman_data_model(const static_huffman_data_model& other) : m_total_syms(0), m_pDecode_tables(NULL), m_encoding(false) { *this = other; } static_huffman_data_model::~static_huffman_data_model() { if (m_pDecode_tables) crnlib_delete(m_pDecode_tables); } static_huffman_data_model& static_huffman_data_model::operator=(const static_huffman_data_model& rhs) { if (this == &rhs) return *this; m_total_syms = rhs.m_total_syms; m_codes = rhs.m_codes; m_code_sizes = rhs.m_code_sizes; if (rhs.m_pDecode_tables) { if (m_pDecode_tables) *m_pDecode_tables = *rhs.m_pDecode_tables; else m_pDecode_tables = crnlib_new(*rhs.m_pDecode_tables); } else { crnlib_delete(m_pDecode_tables); m_pDecode_tables = NULL; } m_encoding = rhs.m_encoding; return *this; } void static_huffman_data_model::clear() { m_total_syms = 0; m_codes.clear(); m_code_sizes.clear(); if (m_pDecode_tables) { crnlib_delete(m_pDecode_tables); m_pDecode_tables = NULL; } m_encoding = false; } bool static_huffman_data_model::init(bool encoding, uint total_syms, const uint16* pSym_freq, uint code_size_limit) { CRNLIB_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms) && (code_size_limit >= 1)); m_encoding = encoding; m_total_syms = total_syms; code_size_limit = math::minimum(code_size_limit, prefix_coding::cMaxExpectedCodeSize); m_code_sizes.resize(total_syms); void* pTables = create_generate_huffman_codes_tables(); uint max_code_size = 0, total_freq; bool status = generate_huffman_codes(pTables, m_total_syms, pSym_freq, &m_code_sizes[0], max_code_size, total_freq); free_generate_huffman_codes_tables(pTables); if (!status) return false; if (max_code_size > code_size_limit) { if (!prefix_coding::limit_max_code_size(m_total_syms, &m_code_sizes[0], code_size_limit)) return false; } if (m_encoding) { m_codes.resize(total_syms); if (m_pDecode_tables) { crnlib_delete(m_pDecode_tables); m_pDecode_tables = NULL; } if (!prefix_coding::generate_codes(m_total_syms, &m_code_sizes[0], &m_codes[0])) return false; } else { m_codes.clear(); if (!m_pDecode_tables) m_pDecode_tables = crnlib_new(); if (!prefix_coding::generate_decoder_tables(m_total_syms, &m_code_sizes[0], m_pDecode_tables, compute_decoder_table_bits())) return false; } return true; } bool static_huffman_data_model::init(bool encoding, uint total_syms, const uint* pSym_freq, uint code_size_limit) { CRNLIB_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms) && (code_size_limit >= 1)); crnlib::vector sym_freq16(total_syms); uint max_freq = 0; for (uint i = 0; i < total_syms; i++) max_freq = math::maximum(max_freq, pSym_freq[i]); if (!max_freq) return false; if (max_freq <= cUINT16_MAX) { for (uint i = 0; i < total_syms; i++) sym_freq16[i] = static_cast(pSym_freq[i]); } else { for (uint i = 0; i < total_syms; i++) { uint f = pSym_freq[i]; if (!f) continue; uint64 fl = f; fl = ((fl << 16) - fl) + (max_freq >> 1); fl /= max_freq; if (fl < 1) fl = 1; CRNLIB_ASSERT(fl <= cUINT16_MAX); sym_freq16[i] = static_cast(fl); } } return init(encoding, total_syms, &sym_freq16[0], code_size_limit); } bool static_huffman_data_model::init(bool encoding, uint total_syms, const uint8* pCode_sizes, uint code_size_limit) { CRNLIB_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms) && (code_size_limit >= 1)); m_encoding = encoding; code_size_limit = math::minimum(code_size_limit, prefix_coding::cMaxExpectedCodeSize); m_code_sizes.resize(total_syms); uint min_code_size = UINT_MAX; uint max_code_size = 0; for (uint i = 0; i < total_syms; i++) { uint s = pCode_sizes[i]; m_code_sizes[i] = static_cast(s); min_code_size = math::minimum(min_code_size, s); max_code_size = math::maximum(max_code_size, s); } if ((max_code_size < 1) || (max_code_size > 32) || (min_code_size > code_size_limit)) return false; if (max_code_size > code_size_limit) { if (!prefix_coding::limit_max_code_size(m_total_syms, &m_code_sizes[0], code_size_limit)) return false; } if (m_encoding) { m_codes.resize(total_syms); if (m_pDecode_tables) { crnlib_delete(m_pDecode_tables); m_pDecode_tables = NULL; } if (!prefix_coding::generate_codes(m_total_syms, &m_code_sizes[0], &m_codes[0])) return false; } else { m_codes.clear(); if (!m_pDecode_tables) m_pDecode_tables = crnlib_new(); if (!prefix_coding::generate_decoder_tables(m_total_syms, &m_code_sizes[0], m_pDecode_tables, compute_decoder_table_bits())) return false; } return true; } bool static_huffman_data_model::init(bool encoding, const symbol_histogram& hist, uint code_size_limit) { return init(encoding, hist.size(), hist.get_ptr(), code_size_limit); } bool static_huffman_data_model::prepare_decoder_tables() { uint total_syms = m_code_sizes.size(); CRNLIB_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms)); m_encoding = false; m_total_syms = total_syms; m_codes.clear(); if (!m_pDecode_tables) m_pDecode_tables = crnlib_new(); return prefix_coding::generate_decoder_tables(m_total_syms, &m_code_sizes[0], m_pDecode_tables, compute_decoder_table_bits()); } uint static_huffman_data_model::compute_decoder_table_bits() const { uint decoder_table_bits = 0; if (m_total_syms > 16) decoder_table_bits = static_cast(math::minimum(1 + math::ceil_log2i(m_total_syms), prefix_coding::cMaxTableBits)); return decoder_table_bits; } adaptive_bit_model::adaptive_bit_model() { clear(); } adaptive_bit_model::adaptive_bit_model(float prob0) { set_probability_0(prob0); } adaptive_bit_model::adaptive_bit_model(const adaptive_bit_model& other) : m_bit_0_prob(other.m_bit_0_prob) { } adaptive_bit_model& adaptive_bit_model::operator=(const adaptive_bit_model& rhs) { m_bit_0_prob = rhs.m_bit_0_prob; return *this; } void adaptive_bit_model::clear() { m_bit_0_prob = 1U << (cSymbolCodecArithProbBits - 1); } void adaptive_bit_model::set_probability_0(float prob0) { m_bit_0_prob = static_cast(math::clamp((uint)(prob0 * cSymbolCodecArithProbScale), 1, cSymbolCodecArithProbScale - 1)); } float adaptive_bit_model::get_cost(uint bit) const { return gProbCost[bit ? (cSymbolCodecArithProbScale - m_bit_0_prob) : m_bit_0_prob]; } void adaptive_bit_model::update(uint bit) { if (!bit) m_bit_0_prob += ((cSymbolCodecArithProbScale - m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); else m_bit_0_prob -= (m_bit_0_prob >> cSymbolCodecArithProbMoveBits); CRNLIB_ASSERT(m_bit_0_prob >= 1); CRNLIB_ASSERT(m_bit_0_prob < cSymbolCodecArithProbScale); } adaptive_arith_data_model::adaptive_arith_data_model(bool encoding, uint total_syms) { init(encoding, total_syms); } adaptive_arith_data_model::adaptive_arith_data_model(const adaptive_arith_data_model& other) { m_total_syms = other.m_total_syms; m_probs = other.m_probs; } adaptive_arith_data_model::~adaptive_arith_data_model() { } adaptive_arith_data_model& adaptive_arith_data_model::operator=(const adaptive_arith_data_model& rhs) { m_total_syms = rhs.m_total_syms; m_probs = rhs.m_probs; return *this; } void adaptive_arith_data_model::clear() { m_total_syms = 0; m_probs.clear(); } void adaptive_arith_data_model::init(bool, uint total_syms) { if (!total_syms) { clear(); return; } if ((total_syms < 2) || (!math::is_power_of_2(total_syms))) total_syms = math::next_pow2(total_syms); m_total_syms = total_syms; m_probs.resize(m_total_syms); } void adaptive_arith_data_model::reset() { for (uint i = 0; i < m_probs.size(); i++) m_probs[i].clear(); } float adaptive_arith_data_model::get_cost(uint sym) const { uint node = 1; uint bitmask = m_total_syms; float cost = 0.0f; do { bitmask >>= 1; uint bit = (sym & bitmask) ? 1 : 0; cost += m_probs[node].get_cost(bit); node = (node << 1) + bit; } while (bitmask > 1); return cost; } symbol_codec::symbol_codec() { clear(); } void symbol_codec::clear() { m_pDecode_buf = NULL; m_pDecode_buf_next = NULL; m_pDecode_buf_end = NULL; m_decode_buf_size = 0; m_bit_buf = 0; m_bit_count = 0; m_total_model_updates = 0; m_mode = cNull; m_simulate_encoding = false; m_total_bits_written = 0; m_arith_base = 0; m_arith_value = 0; m_arith_length = 0; m_arith_total_bits = 0; m_output_buf.clear(); m_arith_output_buf.clear(); m_output_syms.clear(); } void symbol_codec::start_encoding(uint expected_file_size) { m_mode = cEncoding; m_total_model_updates = 0; m_total_bits_written = 0; put_bits_init(expected_file_size); m_output_syms.resize(0); arith_start_encoding(); } // Code length encoding symbols: // 0-16 - actual code lengths const uint cMaxCodelengthCodes = 21; const uint cSmallZeroRunCode = 17; const uint cLargeZeroRunCode = 18; const uint cSmallRepeatCode = 19; const uint cLargeRepeatCode = 20; const uint cMinSmallZeroRunSize = 3; const uint cMaxSmallZeroRunSize = 10; const uint cMinLargeZeroRunSize = 11; const uint cMaxLargeZeroRunSize = 138; const uint cSmallMinNonZeroRunSize = 3; const uint cSmallMaxNonZeroRunSize = 6; const uint cLargeMinNonZeroRunSize = 7; const uint cLargeMaxNonZeroRunSize = 70; const uint cSmallZeroRunExtraBits = 3; const uint cLargeZeroRunExtraBits = 7; const uint cSmallNonZeroRunExtraBits = 2; const uint cLargeNonZeroRunExtraBits = 6; static const uint8 g_most_probable_codelength_codes[] = { cSmallZeroRunCode, cLargeZeroRunCode, cSmallRepeatCode, cLargeRepeatCode, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15, 16}; const uint cNumMostProbableCodelengthCodes = sizeof(g_most_probable_codelength_codes) / sizeof(g_most_probable_codelength_codes[0]); static inline void end_zero_run(uint& size, crnlib::vector& codes) { if (!size) return; if (size < cMinSmallZeroRunSize) { while (size--) codes.push_back(0); } else if (size <= cMaxSmallZeroRunSize) codes.push_back(static_cast(cSmallZeroRunCode | ((size - cMinSmallZeroRunSize) << 8))); else { CRNLIB_ASSERT((size >= cMinLargeZeroRunSize) && (size <= cMaxLargeZeroRunSize)); codes.push_back(static_cast(cLargeZeroRunCode | ((size - cMinLargeZeroRunSize) << 8))); } size = 0; } static inline void end_nonzero_run(uint& size, uint len, crnlib::vector& codes) { if (!size) return; if (size < cSmallMinNonZeroRunSize) { while (size--) codes.push_back(static_cast(len)); } else if (size <= cSmallMaxNonZeroRunSize) { codes.push_back(static_cast(cSmallRepeatCode | ((size - cSmallMinNonZeroRunSize) << 8))); } else { CRNLIB_ASSERT((size >= cLargeMinNonZeroRunSize) && (size <= cLargeMaxNonZeroRunSize)); codes.push_back(static_cast(cLargeRepeatCode | ((size - cLargeMinNonZeroRunSize) << 8))); } size = 0; } uint symbol_codec::encode_transmit_static_huffman_data_model(static_huffman_data_model& model, bool simulate = false, static_huffman_data_model* pDeltaModel) { CRNLIB_ASSERT(m_mode == cEncoding); uint total_used_syms = 0; for (uint i = model.m_total_syms; i > 0; i--) { if (model.m_code_sizes[i - 1]) { total_used_syms = i; break; } } if (!total_used_syms) { if (!simulate) { encode_bits(0, math::total_bits(prefix_coding::cMaxSupportedSyms)); } return math::total_bits(prefix_coding::cMaxSupportedSyms); } crnlib::vector codes; codes.reserve(model.m_total_syms); uint prev_len = UINT_MAX; uint cur_zero_run_size = 0; uint cur_nonzero_run_size = 0; const uint8* pCodesizes = &model.m_code_sizes[0]; crnlib::vector delta_code_sizes; if ((pDeltaModel) && (pDeltaModel->get_total_syms())) { if (pDeltaModel->m_code_sizes.size() < total_used_syms) return 0; delta_code_sizes.resize(total_used_syms); for (uint i = 0; i < total_used_syms; i++) { int delta = (int)model.m_code_sizes[i] - (int)pDeltaModel->m_code_sizes[i]; if (delta < 0) delta += 17; delta_code_sizes[i] = static_cast(delta); } pCodesizes = delta_code_sizes.get_ptr(); } for (uint i = 0; i <= total_used_syms; i++) { const uint len = (i < total_used_syms) ? *pCodesizes++ : 0xFF; CRNLIB_ASSERT((len == 0xFF) || (len <= prefix_coding::cMaxExpectedCodeSize)); if (!len) { end_nonzero_run(cur_nonzero_run_size, prev_len, codes); if (++cur_zero_run_size == cMaxLargeZeroRunSize) end_zero_run(cur_zero_run_size, codes); } else { end_zero_run(cur_zero_run_size, codes); if (len != prev_len) { end_nonzero_run(cur_nonzero_run_size, prev_len, codes); if (len != 0xFF) codes.push_back(static_cast(len)); } else if (++cur_nonzero_run_size == cLargeMaxNonZeroRunSize) end_nonzero_run(cur_nonzero_run_size, prev_len, codes); } prev_len = len; } uint16 hist[cMaxCodelengthCodes]; utils::zero_object(hist); for (uint i = 0; i < codes.size(); i++) { uint code = codes[i] & 0xFF; CRNLIB_ASSERT(code < cMaxCodelengthCodes); hist[code] = static_cast(hist[code] + 1); } static_huffman_data_model dm; if (!dm.init(true, cMaxCodelengthCodes, hist, 7)) return 0; uint num_codelength_codes_to_send; for (num_codelength_codes_to_send = cNumMostProbableCodelengthCodes; num_codelength_codes_to_send > 0; num_codelength_codes_to_send--) if (dm.get_cost(g_most_probable_codelength_codes[num_codelength_codes_to_send - 1])) break; uint total_bits = math::total_bits(prefix_coding::cMaxSupportedSyms); total_bits += 5; total_bits += 3 * num_codelength_codes_to_send; if (!simulate) { encode_bits(total_used_syms, math::total_bits(prefix_coding::cMaxSupportedSyms)); encode_bits(num_codelength_codes_to_send, 5); for (uint i = 0; i < num_codelength_codes_to_send; i++) encode_bits(dm.get_cost(g_most_probable_codelength_codes[i]), 3); } for (uint i = 0; i < codes.size(); i++) { uint code = codes[i]; uint extra = code >> 8; code &= 0xFF; uint extra_bits = 0; if (code == cSmallZeroRunCode) extra_bits = cSmallZeroRunExtraBits; else if (code == cLargeZeroRunCode) extra_bits = cLargeZeroRunExtraBits; else if (code == cSmallRepeatCode) extra_bits = cSmallNonZeroRunExtraBits; else if (code == cLargeRepeatCode) extra_bits = cLargeNonZeroRunExtraBits; total_bits += dm.get_cost(code); if (!simulate) encode(code, dm); if (extra_bits) { if (!simulate) encode_bits(extra, extra_bits); total_bits += extra_bits; } } return total_bits; } void symbol_codec::encode_bits(uint bits, uint num_bits) { CRNLIB_ASSERT(m_mode == cEncoding); if (!num_bits) return; CRNLIB_ASSERT((num_bits == 32) || (bits <= ((1U << num_bits) - 1))); if (num_bits > 16) { record_put_bits(bits >> 16, num_bits - 16); record_put_bits(bits & 0xFFFF, 16); } else record_put_bits(bits, num_bits); } void symbol_codec::encode_align_to_byte() { CRNLIB_ASSERT(m_mode == cEncoding); if (!m_simulate_encoding) { output_symbol sym; sym.m_bits = 0; sym.m_num_bits = output_symbol::cAlignToByteSym; sym.m_arith_prob0 = 0; m_output_syms.push_back(sym); } else { // We really don't know how many we're going to write, so just be conservative. m_total_bits_written += 7; } } void symbol_codec::encode(uint sym, adaptive_huffman_data_model& model) { CRNLIB_ASSERT(m_mode == cEncoding); CRNLIB_ASSERT(model.m_encoding); record_put_bits(model.m_codes[sym], model.m_code_sizes[sym]); uint freq = model.m_sym_freq[sym]; freq++; model.m_sym_freq[sym] = static_cast(freq); if (freq == cUINT16_MAX) model.rescale(); if (--model.m_symbols_until_update == 0) { m_total_model_updates++; model.update(); } } void symbol_codec::encode(uint sym, static_huffman_data_model& model) { CRNLIB_ASSERT(m_mode == cEncoding); CRNLIB_ASSERT(model.m_encoding); CRNLIB_ASSERT(model.m_code_sizes[sym]); record_put_bits(model.m_codes[sym], model.m_code_sizes[sym]); } void symbol_codec::encode_truncated_binary(uint v, uint n) { CRNLIB_ASSERT((n >= 2) && (v < n)); uint k = math::floor_log2i(n); uint u = (1 << (k + 1)) - n; if (v < u) encode_bits(v, k); else encode_bits(v + u, k + 1); } uint symbol_codec::encode_truncated_binary_cost(uint v, uint n) { CRNLIB_ASSERT((n >= 2) && (v < n)); uint k = math::floor_log2i(n); uint u = (1 << (k + 1)) - n; if (v < u) return k; else return k + 1; } void symbol_codec::encode_golomb(uint v, uint m) { CRNLIB_ASSERT(m > 0); uint q = v / m; uint r = v % m; while (q > 16) { encode_bits(0xFFFF, 16); q -= 16; } if (q) encode_bits((1 << q) - 1, q); encode_bits(0, 1); encode_truncated_binary(r, m); } void symbol_codec::encode_rice(uint v, uint m) { CRNLIB_ASSERT(m > 0); uint q = v >> m; uint r = v & ((1 << m) - 1); while (q > 16) { encode_bits(0xFFFF, 16); q -= 16; } if (q) encode_bits((1 << q) - 1, q); encode_bits(0, 1); encode_bits(r, m); } uint symbol_codec::encode_rice_get_cost(uint v, uint m) { CRNLIB_ASSERT(m > 0); uint q = v >> m; //uint r = v & ((1 << m) - 1); return q + 1 + m; } void symbol_codec::arith_propagate_carry() { int index = m_arith_output_buf.size() - 1; while (index >= 0) { uint c = m_arith_output_buf[index]; if (c == 0xFF) m_arith_output_buf[index] = 0; else { m_arith_output_buf[index]++; break; } index--; } } void symbol_codec::arith_renorm_enc_interval() { do { m_arith_output_buf.push_back((m_arith_base >> 24) & 0xFF); m_total_bits_written += 8; m_arith_base <<= 8; } while ((m_arith_length <<= 8) < cSymbolCodecArithMinLen); } void symbol_codec::arith_start_encoding() { m_arith_output_buf.resize(0); m_arith_base = 0; m_arith_value = 0; m_arith_length = cSymbolCodecArithMaxLen; m_arith_total_bits = 0; } void symbol_codec::encode(uint bit, adaptive_bit_model& model, bool update_model) { CRNLIB_ASSERT(m_mode == cEncoding); m_arith_total_bits++; if (!m_simulate_encoding) { output_symbol sym; sym.m_bits = bit; sym.m_num_bits = -1; sym.m_arith_prob0 = model.m_bit_0_prob; m_output_syms.push_back(sym); } //uint x = gArithProbMulTab[model.m_bit_0_prob >> (cSymbolCodecArithProbBits - cSymbolCodecArithProbMulBits)][m_arith_length >> (32 - cSymbolCodecArithProbMulLenSigBits)] << 16; uint x = model.m_bit_0_prob * (m_arith_length >> cSymbolCodecArithProbBits); if (!bit) { if (update_model) model.m_bit_0_prob += ((cSymbolCodecArithProbScale - model.m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); m_arith_length = x; } else { if (update_model) model.m_bit_0_prob -= (model.m_bit_0_prob >> cSymbolCodecArithProbMoveBits); uint orig_base = m_arith_base; m_arith_base += x; m_arith_length -= x; if (orig_base > m_arith_base) arith_propagate_carry(); } if (m_arith_length < cSymbolCodecArithMinLen) arith_renorm_enc_interval(); } void symbol_codec::encode(uint sym, adaptive_arith_data_model& model) { uint node = 1; uint bitmask = model.m_total_syms; do { bitmask >>= 1; uint bit = (sym & bitmask) ? 1 : 0; encode(bit, model.m_probs[node]); node = (node << 1) + bit; } while (bitmask > 1); } void symbol_codec::arith_stop_encoding() { if (!m_arith_total_bits) return; uint orig_base = m_arith_base; if (m_arith_length > 2 * cSymbolCodecArithMinLen) { m_arith_base += cSymbolCodecArithMinLen; m_arith_length = (cSymbolCodecArithMinLen >> 1); } else { m_arith_base += (cSymbolCodecArithMinLen >> 1); m_arith_length = (cSymbolCodecArithMinLen >> 9); } if (orig_base > m_arith_base) arith_propagate_carry(); arith_renorm_enc_interval(); while (m_arith_output_buf.size() < 4) { m_arith_output_buf.push_back(0); m_total_bits_written += 8; } } void symbol_codec::stop_encoding(bool support_arith) { CRNLIB_ASSERT(m_mode == cEncoding); arith_stop_encoding(); if (!m_simulate_encoding) assemble_output_buf(support_arith); m_mode = cNull; } void symbol_codec::record_put_bits(uint bits, uint num_bits) { CRNLIB_ASSERT(m_mode == cEncoding); CRNLIB_ASSERT(num_bits <= 25); CRNLIB_ASSERT(m_bit_count >= 25); if (!num_bits) return; m_total_bits_written += num_bits; if (!m_simulate_encoding) { output_symbol sym; sym.m_bits = bits; sym.m_num_bits = (uint16)num_bits; sym.m_arith_prob0 = 0; m_output_syms.push_back(sym); } } void symbol_codec::put_bits_init(uint expected_size) { m_bit_buf = 0; m_bit_count = cBitBufSize; m_output_buf.resize(0); m_output_buf.reserve(expected_size); } void symbol_codec::put_bits(uint bits, uint num_bits) { CRNLIB_ASSERT(num_bits <= 25); CRNLIB_ASSERT(m_bit_count >= 25); if (!num_bits) return; m_bit_count -= num_bits; m_bit_buf |= (static_cast(bits) << m_bit_count); m_total_bits_written += num_bits; while (m_bit_count <= (cBitBufSize - 8)) { m_output_buf.push_back(static_cast(m_bit_buf >> (cBitBufSize - 8))); m_bit_buf <<= 8; m_bit_count += 8; } } void symbol_codec::put_bits_align_to_byte() { uint num_bits_in = cBitBufSize - m_bit_count; if (num_bits_in & 7) { put_bits(0, 8 - (num_bits_in & 7)); } } void symbol_codec::flush_bits() { //put_bits(15, 4); // for table look-ahead //put_bits(3, 3); // for table look-ahead put_bits(0, 7); // to ensure the last bits are flushed } void symbol_codec::assemble_output_buf(bool support_arith) { m_total_bits_written = 0; uint arith_buf_ofs = 0; if (support_arith) { if (m_arith_output_buf.size()) { put_bits(1, 1); m_arith_length = cSymbolCodecArithMaxLen; m_arith_value = 0; for (uint i = 0; i < 4; i++) { const uint c = m_arith_output_buf[arith_buf_ofs++]; m_arith_value = (m_arith_value << 8) | c; put_bits(c, 8); } } else { put_bits(0, 1); } } for (uint sym_index = 0; sym_index < m_output_syms.size(); sym_index++) { const output_symbol& sym = m_output_syms[sym_index]; if (sym.m_num_bits == output_symbol::cAlignToByteSym) { put_bits_align_to_byte(); } else if (sym.m_num_bits == output_symbol::cArithSym) { if (m_arith_length < cSymbolCodecArithMinLen) { do { const uint c = (arith_buf_ofs < m_arith_output_buf.size()) ? m_arith_output_buf[arith_buf_ofs++] : 0; put_bits(c, 8); m_arith_value = (m_arith_value << 8) | c; } while ((m_arith_length <<= 8) < cSymbolCodecArithMinLen); } //uint x = gArithProbMulTab[sym.m_arith_prob0 >> (cSymbolCodecArithProbBits - cSymbolCodecArithProbMulBits)][m_arith_length >> (32 - cSymbolCodecArithProbMulLenSigBits)] << 16; uint x = sym.m_arith_prob0 * (m_arith_length >> cSymbolCodecArithProbBits); uint bit = (m_arith_value >= x); if (bit == 0) { m_arith_length = x; } else { m_arith_value -= x; m_arith_length -= x; } CRNLIB_VERIFY(bit == sym.m_bits); } else { put_bits(sym.m_bits, sym.m_num_bits); } } flush_bits(); } //------------------------------------------------------------------------------------------------------------------ // Decoding //------------------------------------------------------------------------------------------------------------------ bool symbol_codec::start_decoding(const uint8* pBuf, size_t buf_size, bool eof_flag, need_bytes_func_ptr pNeed_bytes_func, void* pPrivate_data) { if (!buf_size) return false; m_total_model_updates = 0; m_pDecode_buf = pBuf; m_pDecode_buf_next = pBuf; m_decode_buf_size = buf_size; m_pDecode_buf_end = pBuf + buf_size; m_pDecode_need_bytes_func = pNeed_bytes_func; m_pDecode_private_data = pPrivate_data; m_decode_buf_eof = eof_flag; if (!pNeed_bytes_func) { m_decode_buf_eof = true; } m_mode = cDecoding; get_bits_init(); return true; } uint symbol_codec::decode_bits(uint num_bits) { CRNLIB_ASSERT(m_mode == cDecoding); if (!num_bits) return 0; if (num_bits > 16) { uint a = get_bits(num_bits - 16); uint b = get_bits(16); return (a << 16) | b; } else return get_bits(num_bits); } void symbol_codec::decode_remove_bits(uint num_bits) { CRNLIB_ASSERT(m_mode == cDecoding); while (num_bits > 16) { remove_bits(16); num_bits -= 16; } remove_bits(num_bits); } uint symbol_codec::decode_peek_bits(uint num_bits) { CRNLIB_ASSERT(m_mode == cDecoding); CRNLIB_ASSERT(num_bits <= 25); if (!num_bits) return 0; while (m_bit_count < (int)num_bits) { uint c = 0; if (m_pDecode_buf_next == m_pDecode_buf_end) { if (!m_decode_buf_eof) { m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; m_pDecode_buf_next = m_pDecode_buf; if (m_pDecode_buf_next < m_pDecode_buf_end) c = *m_pDecode_buf_next++; } } else c = *m_pDecode_buf_next++; m_bit_count += 8; CRNLIB_ASSERT(m_bit_count <= cBitBufSize); m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); } return static_cast(m_bit_buf >> (cBitBufSize - num_bits)); } uint symbol_codec::decode(adaptive_huffman_data_model& model) { CRNLIB_ASSERT(m_mode == cDecoding); CRNLIB_ASSERT(!model.m_encoding); const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; while (m_bit_count < (cBitBufSize - 8)) { uint c = 0; if (m_pDecode_buf_next == m_pDecode_buf_end) { if (!m_decode_buf_eof) { m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; m_pDecode_buf_next = m_pDecode_buf; if (m_pDecode_buf_next < m_pDecode_buf_end) c = *m_pDecode_buf_next++; } } else c = *m_pDecode_buf_next++; m_bit_count += 8; m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); } uint k = static_cast((m_bit_buf >> (cBitBufSize - 16)) + 1); uint sym, len; if (k <= pTables->m_table_max_code) { uint32 t = pTables->m_lookup[m_bit_buf >> (cBitBufSize - pTables->m_table_bits)]; CRNLIB_ASSERT(t != cUINT32_MAX); sym = t & cUINT16_MAX; len = t >> 16; CRNLIB_ASSERT(model.m_code_sizes[sym] == len); } else { len = pTables->m_decode_start_code_size; for (;;) { if (k <= pTables->m_max_codes[len - 1]) break; len++; } int val_ptr = pTables->m_val_ptrs[len - 1] + static_cast((m_bit_buf >> (cBitBufSize - len))); if (((uint)val_ptr >= model.m_total_syms)) { // corrupted stream, or a bug CRNLIB_ASSERT(0); return 0; } sym = pTables->m_sorted_symbol_order[val_ptr]; } m_bit_buf <<= len; m_bit_count -= len; uint freq = model.m_sym_freq[sym]; freq++; model.m_sym_freq[sym] = static_cast(freq); if (freq == cUINT16_MAX) model.rescale(); if (--model.m_symbols_until_update == 0) { m_total_model_updates++; model.update(); } return sym; } void symbol_codec::decode_set_input_buffer(const uint8* pBuf, size_t buf_size, const uint8* pBuf_next, bool eof_flag) { CRNLIB_ASSERT(m_mode == cDecoding); m_pDecode_buf = pBuf; m_pDecode_buf_next = pBuf_next; m_decode_buf_size = buf_size; m_pDecode_buf_end = pBuf + buf_size; if (!m_pDecode_need_bytes_func) m_decode_buf_eof = true; else m_decode_buf_eof = eof_flag; } bool symbol_codec::decode_receive_static_huffman_data_model(static_huffman_data_model& model, static_huffman_data_model* pDeltaModel) { CRNLIB_ASSERT(m_mode == cDecoding); const uint total_used_syms = decode_bits(math::total_bits(prefix_coding::cMaxSupportedSyms)); if (!total_used_syms) { model.clear(); return true; } model.m_code_sizes.resize(total_used_syms); memset(&model.m_code_sizes[0], 0, sizeof(model.m_code_sizes[0]) * total_used_syms); const uint num_codelength_codes_to_send = decode_bits(5); if ((num_codelength_codes_to_send < 1) || (num_codelength_codes_to_send > cMaxCodelengthCodes)) return false; static_huffman_data_model dm; dm.m_code_sizes.resize(cMaxCodelengthCodes); for (uint i = 0; i < num_codelength_codes_to_send; i++) dm.m_code_sizes[g_most_probable_codelength_codes[i]] = static_cast(decode_bits(3)); if (!dm.prepare_decoder_tables()) return false; uint ofs = 0; while (ofs < total_used_syms) { const uint num_remaining = total_used_syms - ofs; uint code = decode(dm); if (code <= 16) model.m_code_sizes[ofs++] = static_cast(code); else if (code == cSmallZeroRunCode) { uint len = decode_bits(cSmallZeroRunExtraBits) + cMinSmallZeroRunSize; if (len > num_remaining) return false; ofs += len; } else if (code == cLargeZeroRunCode) { uint len = decode_bits(cLargeZeroRunExtraBits) + cMinLargeZeroRunSize; if (len > num_remaining) return false; ofs += len; } else if ((code == cSmallRepeatCode) || (code == cLargeRepeatCode)) { uint len; if (code == cSmallRepeatCode) len = decode_bits(cSmallNonZeroRunExtraBits) + cSmallMinNonZeroRunSize; else len = decode_bits(cLargeNonZeroRunExtraBits) + cLargeMinNonZeroRunSize; if ((!ofs) || (len > num_remaining)) return false; const uint prev = model.m_code_sizes[ofs - 1]; if (!prev) return false; const uint end = ofs + len; while (ofs < end) model.m_code_sizes[ofs++] = static_cast(prev); } else { CRNLIB_ASSERT(0); return false; } } if (ofs != total_used_syms) return false; if ((pDeltaModel) && (pDeltaModel->get_total_syms())) { uint n = math::minimum(pDeltaModel->m_code_sizes.size(), total_used_syms); for (uint i = 0; i < n; i++) { int codesize = model.m_code_sizes[i] + pDeltaModel->m_code_sizes[i]; if (codesize > 16) codesize -= 17; model.m_code_sizes[i] = static_cast(codesize); } } return model.prepare_decoder_tables(); } uint symbol_codec::decode(static_huffman_data_model& model) { CRNLIB_ASSERT(m_mode == cDecoding); CRNLIB_ASSERT(!model.m_encoding); const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; while (m_bit_count < (cBitBufSize - 8)) { uint c = 0; if (m_pDecode_buf_next == m_pDecode_buf_end) { if (!m_decode_buf_eof) { m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; m_pDecode_buf_next = m_pDecode_buf; if (m_pDecode_buf_next < m_pDecode_buf_end) c = *m_pDecode_buf_next++; } } else c = *m_pDecode_buf_next++; m_bit_count += 8; m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); } uint k = static_cast((m_bit_buf >> (cBitBufSize - 16)) + 1); uint sym, len; if (k <= pTables->m_table_max_code) { uint32 t = pTables->m_lookup[m_bit_buf >> (cBitBufSize - pTables->m_table_bits)]; CRNLIB_ASSERT(t != cUINT32_MAX); sym = t & cUINT16_MAX; len = t >> 16; CRNLIB_ASSERT(model.m_code_sizes[sym] == len); } else { len = pTables->m_decode_start_code_size; for (;;) { if (k <= pTables->m_max_codes[len - 1]) break; len++; } int val_ptr = pTables->m_val_ptrs[len - 1] + static_cast((m_bit_buf >> (cBitBufSize - len))); if (((uint)val_ptr >= model.m_total_syms)) { // corrupted stream, or a bug CRNLIB_ASSERT(0); return 0; } sym = pTables->m_sorted_symbol_order[val_ptr]; } m_bit_buf <<= len; m_bit_count -= len; return sym; } uint symbol_codec::decode_truncated_binary(uint n) { CRNLIB_ASSERT(n >= 2); uint k = math::floor_log2i(n); uint u = (1 << (k + 1)) - n; uint i = decode_bits(k); if (i >= u) i = ((i << 1) | decode_bits(1)) - u; return i; } uint symbol_codec::decode_golomb(uint m) { CRNLIB_ASSERT(m > 1); uint q = 0; for (;;) { uint k = decode_peek_bits(16); uint l = utils::count_leading_zeros16((~k) & 0xFFFF); q += l; if (l < 16) break; } decode_remove_bits(q + 1); uint r = decode_truncated_binary(m); return (q * m) + r; } uint symbol_codec::decode_rice(uint m) { CRNLIB_ASSERT(m > 0); uint q = 0; for (;;) { uint k = decode_peek_bits(16); uint l = utils::count_leading_zeros16((~k) & 0xFFFF); q += l; decode_remove_bits(l); if (l < 16) break; } decode_remove_bits(1); uint r = decode_bits(m); return (q << m) + r; } uint64 symbol_codec::stop_decoding() { CRNLIB_ASSERT(m_mode == cDecoding); uint64 n = m_pDecode_buf_next - m_pDecode_buf; m_mode = cNull; return n; } void symbol_codec::get_bits_init() { m_bit_buf = 0; m_bit_count = 0; } uint symbol_codec::get_bits(uint num_bits) { CRNLIB_ASSERT(num_bits <= 25); if (!num_bits) return 0; while (m_bit_count < (int)num_bits) { uint c = 0; if (m_pDecode_buf_next == m_pDecode_buf_end) { if (!m_decode_buf_eof) { m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; m_pDecode_buf_next = m_pDecode_buf; if (m_pDecode_buf_next < m_pDecode_buf_end) c = *m_pDecode_buf_next++; } } else c = *m_pDecode_buf_next++; m_bit_count += 8; CRNLIB_ASSERT(m_bit_count <= cBitBufSize); m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); } uint result = static_cast(m_bit_buf >> (cBitBufSize - num_bits)); m_bit_buf <<= num_bits; m_bit_count -= num_bits; return result; } void symbol_codec::remove_bits(uint num_bits) { CRNLIB_ASSERT(num_bits <= 25); if (!num_bits) return; while (m_bit_count < (int)num_bits) { uint c = 0; if (m_pDecode_buf_next == m_pDecode_buf_end) { if (!m_decode_buf_eof) { m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; m_pDecode_buf_next = m_pDecode_buf; if (m_pDecode_buf_next < m_pDecode_buf_end) c = *m_pDecode_buf_next++; } } else c = *m_pDecode_buf_next++; m_bit_count += 8; CRNLIB_ASSERT(m_bit_count <= cBitBufSize); m_bit_buf |= (static_cast(c) << (cBitBufSize - m_bit_count)); } m_bit_buf <<= num_bits; m_bit_count -= num_bits; } void symbol_codec::decode_align_to_byte() { CRNLIB_ASSERT(m_mode == cDecoding); if (m_bit_count & 7) { remove_bits(m_bit_count & 7); } } int symbol_codec::decode_remove_byte_from_bit_buf() { if (m_bit_count < 8) return -1; int result = static_cast(m_bit_buf >> (cBitBufSize - 8)); m_bit_buf <<= 8; m_bit_count -= 8; return result; } uint symbol_codec::decode(adaptive_bit_model& model, bool update_model) { if (m_arith_length < cSymbolCodecArithMinLen) { uint c = get_bits(8); m_arith_value = (m_arith_value << 8) | c; m_arith_length <<= 8; CRNLIB_ASSERT(m_arith_length >= cSymbolCodecArithMinLen); } CRNLIB_ASSERT(m_arith_length >= cSymbolCodecArithMinLen); //uint x = gArithProbMulTab[model.m_bit_0_prob >> (cSymbolCodecArithProbBits - cSymbolCodecArithProbMulBits)][m_arith_length >> (32 - cSymbolCodecArithProbMulLenSigBits)] << 16; uint x = model.m_bit_0_prob * (m_arith_length >> cSymbolCodecArithProbBits); uint bit = (m_arith_value >= x); if (!bit) { if (update_model) model.m_bit_0_prob += ((cSymbolCodecArithProbScale - model.m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); m_arith_length = x; } else { if (update_model) model.m_bit_0_prob -= (model.m_bit_0_prob >> cSymbolCodecArithProbMoveBits); m_arith_value -= x; m_arith_length -= x; } return bit; } uint symbol_codec::decode(adaptive_arith_data_model& model) { uint node = 1; do { uint bit = decode(model.m_probs[node]); node = (node << 1) + bit; } while (node < model.m_total_syms); return node - model.m_total_syms; } void symbol_codec::start_arith_decoding() { CRNLIB_ASSERT(m_mode == cDecoding); m_arith_length = cSymbolCodecArithMaxLen; m_arith_value = 0; if (get_bits(1)) { m_arith_value = (get_bits(8) << 24); m_arith_value |= (get_bits(8) << 16); m_arith_value |= (get_bits(8) << 8); m_arith_value |= get_bits(8); } } void symbol_codec::decode_need_bytes() { if (!m_decode_buf_eof) { m_pDecode_need_bytes_func(m_pDecode_buf_next - m_pDecode_buf, m_pDecode_private_data, m_pDecode_buf, m_decode_buf_size, m_decode_buf_eof); m_pDecode_buf_end = m_pDecode_buf + m_decode_buf_size; m_pDecode_buf_next = m_pDecode_buf; } } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_symbol_codec.h000066400000000000000000000631341503722002600227540ustar00rootroot00000000000000// File: crn_symbol_codec.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_prefix_coding.h" namespace crnlib { class symbol_codec; class adaptive_arith_data_model; const uint cSymbolCodecArithMinLen = 0x01000000U; const uint cSymbolCodecArithMaxLen = 0xFFFFFFFFU; const uint cSymbolCodecArithProbBits = 11; const uint cSymbolCodecArithProbScale = 1 << cSymbolCodecArithProbBits; const uint cSymbolCodecArithProbMoveBits = 5; const uint cSymbolCodecArithProbMulBits = 8; const uint cSymbolCodecArithProbMulScale = 1 << cSymbolCodecArithProbMulBits; class symbol_histogram { public: inline symbol_histogram(uint size = 0) : m_hist(size) {} inline void clear() { m_hist.clear(); } inline uint size() const { return static_cast(m_hist.size()); } inline void inc_freq(uint x, uint amount = 1) { uint h = m_hist[x]; CRNLIB_ASSERT(amount <= (0xFFFFFFFF - h)); m_hist[x] = h + amount; } inline void set_all(uint val) { for (uint i = 0; i < m_hist.size(); i++) m_hist[i] = val; } inline void resize(uint new_size) { m_hist.resize(new_size); } inline const uint* get_ptr() const { return m_hist.empty() ? NULL : &m_hist.front(); } double calc_entropy() const; uint operator[](uint i) const { return m_hist[i]; } uint& operator[](uint i) { return m_hist[i]; } uint64 get_total() const; private: crnlib::vector m_hist; }; class adaptive_huffman_data_model { public: adaptive_huffman_data_model(bool encoding = true, uint total_syms = 0); adaptive_huffman_data_model(const adaptive_huffman_data_model& other); ~adaptive_huffman_data_model(); adaptive_huffman_data_model& operator=(const adaptive_huffman_data_model& rhs); void clear(); void init(bool encoding, uint total_syms); void reset(); void rescale(); uint get_total_syms() const { return m_total_syms; } uint get_cost(uint sym) const { return m_code_sizes[sym]; } public: uint m_total_syms; uint m_update_cycle; uint m_symbols_until_update; uint m_total_count; crnlib::vector m_sym_freq; crnlib::vector m_codes; crnlib::vector m_code_sizes; prefix_coding::decoder_tables* m_pDecode_tables; uint8 m_decoder_table_bits; bool m_encoding; void update(); friend class symbol_codec; }; class static_huffman_data_model { public: static_huffman_data_model(); static_huffman_data_model(const static_huffman_data_model& other); ~static_huffman_data_model(); static_huffman_data_model& operator=(const static_huffman_data_model& rhs); void clear(); bool init(bool encoding, uint total_syms, const uint16* pSym_freq, uint code_size_limit); bool init(bool encoding, uint total_syms, const uint* pSym_freq, uint code_size_limit); bool init(bool encoding, uint total_syms, const uint8* pCode_sizes, uint code_size_limit); bool init(bool encoding, const symbol_histogram& hist, uint code_size_limit); uint get_total_syms() const { return m_total_syms; } uint get_cost(uint sym) const { return m_code_sizes[sym]; } const uint8* get_code_sizes() const { return m_code_sizes.empty() ? NULL : &m_code_sizes[0]; } private: uint m_total_syms; crnlib::vector m_codes; crnlib::vector m_code_sizes; prefix_coding::decoder_tables* m_pDecode_tables; bool m_encoding; bool prepare_decoder_tables(); uint compute_decoder_table_bits() const; friend class symbol_codec; }; class adaptive_bit_model { public: adaptive_bit_model(); adaptive_bit_model(float prob0); adaptive_bit_model(const adaptive_bit_model& other); adaptive_bit_model& operator=(const adaptive_bit_model& rhs); void clear(); void set_probability_0(float prob0); void update(uint bit); float get_cost(uint bit) const; public: uint16 m_bit_0_prob; friend class symbol_codec; friend class adaptive_arith_data_model; }; class adaptive_arith_data_model { public: adaptive_arith_data_model(bool encoding = true, uint total_syms = 0); adaptive_arith_data_model(const adaptive_arith_data_model& other); ~adaptive_arith_data_model(); adaptive_arith_data_model& operator=(const adaptive_arith_data_model& rhs); void clear(); void init(bool encoding, uint total_syms); void reset(); uint get_total_syms() const { return m_total_syms; } float get_cost(uint sym) const; private: uint m_total_syms; typedef crnlib::vector adaptive_bit_model_vector; adaptive_bit_model_vector m_probs; friend class symbol_codec; }; #if defined(_WIN64) #define CRNLIB_SYMBOL_CODEC_USE_64_BIT_BUFFER 1 #else #define CRNLIB_SYMBOL_CODEC_USE_64_BIT_BUFFER 0 #endif class symbol_codec { public: symbol_codec(); void clear(); // Encoding void start_encoding(uint expected_file_size); uint encode_transmit_static_huffman_data_model(static_huffman_data_model& model, bool simulate, static_huffman_data_model* pDelta_model = NULL); void encode_bits(uint bits, uint num_bits); void encode_align_to_byte(); void encode(uint sym, adaptive_huffman_data_model& model); void encode(uint sym, static_huffman_data_model& model); void encode_truncated_binary(uint v, uint n); static uint encode_truncated_binary_cost(uint v, uint n); void encode_golomb(uint v, uint m); void encode_rice(uint v, uint m); static uint encode_rice_get_cost(uint v, uint m); void encode(uint bit, adaptive_bit_model& model, bool update_model = true); void encode(uint sym, adaptive_arith_data_model& model); inline void encode_enable_simulation(bool enabled) { m_simulate_encoding = enabled; } inline bool encode_get_simulation() { return m_simulate_encoding; } inline uint encode_get_total_bits_written() const { return m_total_bits_written; } void stop_encoding(bool support_arith); const crnlib::vector& get_encoding_buf() const { return m_output_buf; } crnlib::vector& get_encoding_buf() { return m_output_buf; } // Decoding typedef void (*need_bytes_func_ptr)(size_t num_bytes_consumed, void* pPrivate_data, const uint8*& pBuf, size_t& buf_size, bool& eof_flag); bool start_decoding(const uint8* pBuf, size_t buf_size, bool eof_flag = true, need_bytes_func_ptr pNeed_bytes_func = NULL, void* pPrivate_data = NULL); void decode_set_input_buffer(const uint8* pBuf, size_t buf_size, const uint8* pBuf_next, bool eof_flag = true); inline uint64 decode_get_bytes_consumed() const { return m_pDecode_buf_next - m_pDecode_buf; } inline uint64 decode_get_bits_remaining() const { return ((m_pDecode_buf_end - m_pDecode_buf_next) << 3) + m_bit_count; } void start_arith_decoding(); bool decode_receive_static_huffman_data_model(static_huffman_data_model& model, static_huffman_data_model* pDeltaModel); uint decode_bits(uint num_bits); uint decode_peek_bits(uint num_bits); void decode_remove_bits(uint num_bits); void decode_align_to_byte(); int decode_remove_byte_from_bit_buf(); uint decode(adaptive_huffman_data_model& model); uint decode(static_huffman_data_model& model); uint decode_truncated_binary(uint n); uint decode_golomb(uint m); uint decode_rice(uint m); uint decode(adaptive_bit_model& model, bool update_model = true); uint decode(adaptive_arith_data_model& model); uint64 stop_decoding(); uint get_total_model_updates() const { return m_total_model_updates; } public: const uint8* m_pDecode_buf; const uint8* m_pDecode_buf_next; const uint8* m_pDecode_buf_end; size_t m_decode_buf_size; bool m_decode_buf_eof; need_bytes_func_ptr m_pDecode_need_bytes_func; void* m_pDecode_private_data; #if CRNLIB_SYMBOL_CODEC_USE_64_BIT_BUFFER typedef uint64 bit_buf_t; enum { cBitBufSize = 64 }; #else typedef uint32 bit_buf_t; enum { cBitBufSize = 32 }; #endif bit_buf_t m_bit_buf; int m_bit_count; uint m_total_model_updates; crnlib::vector m_output_buf; crnlib::vector m_arith_output_buf; struct output_symbol { uint m_bits; enum { cArithSym = -1, cAlignToByteSym = -2 }; int16 m_num_bits; uint16 m_arith_prob0; }; crnlib::vector m_output_syms; uint m_total_bits_written; bool m_simulate_encoding; uint m_arith_base; uint m_arith_value; uint m_arith_length; uint m_arith_total_bits; bool m_support_arith; void put_bits_init(uint expected_size); void record_put_bits(uint bits, uint num_bits); void arith_propagate_carry(); void arith_renorm_enc_interval(); void arith_start_encoding(); void arith_stop_encoding(); void put_bits(uint bits, uint num_bits); void put_bits_align_to_byte(); void flush_bits(); void assemble_output_buf(bool support_arith); void get_bits_init(); uint get_bits(uint num_bits); void remove_bits(uint num_bits); void decode_need_bytes(); enum { cNull, cEncoding, cDecoding } m_mode; }; #define CRNLIB_SYMBOL_CODEC_USE_MACROS 1 #if defined(_MSC_VER) #define CRNLIB_READ_BIG_ENDIAN_UINT32(p) _byteswap_ulong(*reinterpret_cast(p)) #else #define CRNLIB_READ_BIG_ENDIAN_UINT32(p) utils::swap32(*reinterpret_cast(p)) #endif #if CRNLIB_SYMBOL_CODEC_USE_MACROS #define CRNLIB_SYMBOL_CODEC_DECODE_DECLARE(codec) \ uint arith_value; \ uint arith_length; \ symbol_codec::bit_buf_t bit_buf; \ int bit_count; \ const uint8* pDecode_buf_next; #define CRNLIB_SYMBOL_CODEC_DECODE_BEGIN(codec) \ arith_value = codec.m_arith_value; \ arith_length = codec.m_arith_length; \ bit_buf = codec.m_bit_buf; \ bit_count = codec.m_bit_count; \ pDecode_buf_next = codec.m_pDecode_buf_next; #define CRNLIB_SYMBOL_CODEC_DECODE_END(codec) \ codec.m_arith_value = arith_value; \ codec.m_arith_length = arith_length; \ codec.m_bit_buf = bit_buf; \ codec.m_bit_count = bit_count; \ codec.m_pDecode_buf_next = pDecode_buf_next; #define CRNLIB_SYMBOL_CODEC_DECODE_GET_BITS(codec, result, num_bits) \ { \ while (bit_count < (int)(num_bits)) { \ uint c = 0; \ if (pDecode_buf_next == codec.m_pDecode_buf_end) { \ CRNLIB_SYMBOL_CODEC_DECODE_END(codec) \ codec.decode_need_bytes(); \ CRNLIB_SYMBOL_CODEC_DECODE_BEGIN(codec) \ if (pDecode_buf_next < codec.m_pDecode_buf_end) \ c = *pDecode_buf_next++; \ } else \ c = *pDecode_buf_next++; \ bit_count += 8; \ bit_buf |= (static_cast(c) << (symbol_codec::cBitBufSize - bit_count)); \ } \ result = num_bits ? static_cast(bit_buf >> (symbol_codec::cBitBufSize - (num_bits))) : 0; \ bit_buf <<= (num_bits); \ bit_count -= (num_bits); \ } #define CRNLIB_SYMBOL_CODEC_DECODE_ARITH_BIT(codec, result, model) \ { \ if (arith_length < cSymbolCodecArithMinLen) { \ uint c; \ CRNLIB_SYMBOL_CODEC_DECODE_GET_BITS(codec, c, 8); \ arith_value = (arith_value << 8) | c; \ arith_length <<= 8; \ } \ uint x = model.m_bit_0_prob * (arith_length >> cSymbolCodecArithProbBits); \ result = (arith_value >= x); \ if (!result) { \ model.m_bit_0_prob += ((cSymbolCodecArithProbScale - model.m_bit_0_prob) >> cSymbolCodecArithProbMoveBits); \ arith_length = x; \ } else { \ model.m_bit_0_prob -= (model.m_bit_0_prob >> cSymbolCodecArithProbMoveBits); \ arith_value -= x; \ arith_length -= x; \ } \ } #if CRNLIB_SYMBOL_CODEC_USE_64_BIT_BUFFER #define CRNLIB_SYMBOL_CODEC_DECODE_ADAPTIVE_HUFFMAN(codec, result, model) \ { \ const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; \ if (bit_count < 24) { \ uint c = 0; \ pDecode_buf_next += sizeof(uint32); \ if (pDecode_buf_next >= codec.m_pDecode_buf_end) { \ pDecode_buf_next -= sizeof(uint32); \ while (bit_count < 24) { \ CRNLIB_SYMBOL_CODEC_DECODE_END(codec) \ codec.decode_need_bytes(); \ CRNLIB_SYMBOL_CODEC_DECODE_BEGIN(codec) \ if (pDecode_buf_next < codec.m_pDecode_buf_end) \ c = *pDecode_buf_next++; \ bit_count += 8; \ bit_buf |= (static_cast(c) << (symbol_codec::cBitBufSize - bit_count)); \ } \ } else { \ c = CRNLIB_READ_BIG_ENDIAN_UINT32(pDecode_buf_next - sizeof(uint32)); \ bit_count += 32; \ bit_buf |= (static_cast(c) << (symbol_codec::cBitBufSize - bit_count)); \ } \ } \ uint k = static_cast((bit_buf >> (symbol_codec::cBitBufSize - 16)) + 1); \ uint len; \ if (k <= pTables->m_table_max_code) { \ uint32 t = pTables->m_lookup[bit_buf >> (symbol_codec::cBitBufSize - pTables->m_table_bits)]; \ result = t & UINT16_MAX; \ len = t >> 16; \ } else { \ len = pTables->m_decode_start_code_size; \ for (;;) { \ if (k <= pTables->m_max_codes[len - 1]) \ break; \ len++; \ } \ int val_ptr = pTables->m_val_ptrs[len - 1] + static_cast(bit_buf >> (symbol_codec::cBitBufSize - len)); \ if (((uint)val_ptr >= model.m_total_syms)) \ val_ptr = 0; \ result = pTables->m_sorted_symbol_order[val_ptr]; \ } \ bit_buf <<= len; \ bit_count -= len; \ uint freq = model.m_sym_freq[result]; \ freq++; \ model.m_sym_freq[result] = static_cast(freq); \ if (freq == UINT16_MAX) \ model.rescale(); \ if (--model.m_symbols_until_update == 0) { \ model.update(); \ } \ } #else #define CRNLIB_SYMBOL_CODEC_DECODE_ADAPTIVE_HUFFMAN(codec, result, model) \ { \ const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; \ while (bit_count < (symbol_codec::cBitBufSize - 8)) { \ uint c = 0; \ if (pDecode_buf_next == codec.m_pDecode_buf_end) { \ CRNLIB_SYMBOL_CODEC_DECODE_END(codec) \ codec.decode_need_bytes(); \ CRNLIB_SYMBOL_CODEC_DECODE_BEGIN(codec) \ if (pDecode_buf_next < codec.m_pDecode_buf_end) \ c = *pDecode_buf_next++; \ } else \ c = *pDecode_buf_next++; \ bit_count += 8; \ bit_buf |= (static_cast(c) << (symbol_codec::cBitBufSize - bit_count)); \ } \ uint k = static_cast((bit_buf >> (symbol_codec::cBitBufSize - 16)) + 1); \ uint len; \ if (k <= pTables->m_table_max_code) { \ uint32 t = pTables->m_lookup[bit_buf >> (symbol_codec::cBitBufSize - pTables->m_table_bits)]; \ result = t & UINT16_MAX; \ len = t >> 16; \ } else { \ len = pTables->m_decode_start_code_size; \ for (;;) { \ if (k <= pTables->m_max_codes[len - 1]) \ break; \ len++; \ } \ int val_ptr = pTables->m_val_ptrs[len - 1] + static_cast(bit_buf >> (symbol_codec::cBitBufSize - len)); \ if (((uint)val_ptr >= model.m_total_syms)) \ val_ptr = 0; \ result = pTables->m_sorted_symbol_order[val_ptr]; \ } \ bit_buf <<= len; \ bit_count -= len; \ uint freq = model.m_sym_freq[result]; \ freq++; \ model.m_sym_freq[result] = static_cast(freq); \ if (freq == UINT16_MAX) \ model.rescale(); \ if (--model.m_symbols_until_update == 0) { \ model.update(); \ } \ } #endif #else #define CRNLIB_SYMBOL_CODEC_DECODE_DECLARE(codec) #define CRNLIB_SYMBOL_CODEC_DECODE_BEGIN(codec) #define CRNLIB_SYMBOL_CODEC_DECODE_END(codec) #define CRNLIB_SYMBOL_CODEC_DECODE_GET_BITS(codec, result, num_bits) result = codec.decode_bits(num_bits); #define CRNLIB_SYMBOL_CODEC_DECODE_ARITH_BIT(codec, result, model) result = codec.decode(model); #define CRNLIB_SYMBOL_CODEC_DECODE_ADAPTIVE_HUFFMAN(codec, result, model) result = codec.decode(model); #endif } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_texture_comp.cpp000066400000000000000000000406521503722002600233630ustar00rootroot00000000000000// File: crn_texture_comp.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_texture_comp.h" #include "crn_dds_comp.h" #include "crn_console.h" #include "crn_rect.h" namespace crnlib { static itexture_comp* create_texture_comp(crn_file_type file_type) { if (file_type == cCRNFileTypeCRN) return crnlib_new(); else if (file_type == cCRNFileTypeDDS) return crnlib_new(); else return NULL; } bool create_compressed_texture(const crn_comp_params& params, crnlib::vector& comp_data, uint32* pActual_quality_level, float* pActual_bitrate) { crn_comp_params local_params(params); if (pixel_format_helpers::is_crn_format_non_srgb(local_params.m_format)) { if (local_params.get_flag(cCRNCompFlagPerceptual)) { console::info("Output pixel format is swizzled or not RGB, disabling perceptual color metrics"); // Destination compressed pixel format is swizzled or not RGB at all, so be sure perceptual colorspace metrics are disabled. local_params.set_flag(cCRNCompFlagPerceptual, false); } } if (pActual_quality_level) *pActual_quality_level = 0; if (pActual_bitrate) *pActual_bitrate = 0.0f; comp_data.resize(0); itexture_comp* pTexture_comp = create_texture_comp(local_params.m_file_type); if (!pTexture_comp) return false; if (!pTexture_comp->compress_init(local_params)) { crnlib_delete(pTexture_comp); return false; } if ((local_params.m_target_bitrate <= 0.0f) || (local_params.m_format == cCRNFmtDXT3) || ((local_params.m_file_type == cCRNFileTypeCRN) && ((local_params.m_flags & cCRNCompFlagManualPaletteSizes) != 0))) { if ((local_params.m_file_type == cCRNFileTypeCRN) || ((local_params.m_file_type == cCRNFileTypeDDS) && (local_params.m_quality_level < cCRNMaxQualityLevel))) { console::info("Compressing using quality level %i", local_params.m_quality_level); } if (local_params.m_format == cCRNFmtDXT3) { if (local_params.m_file_type == cCRNFileTypeCRN) console::warning("CRN format doesn't support DXT3"); else if ((local_params.m_file_type == cCRNFileTypeDDS) && (local_params.m_quality_level < cCRNMaxQualityLevel)) console::warning("Clustered DDS compressor doesn't support DXT3"); } if (!pTexture_comp->compress_pass(local_params, pActual_bitrate)) { crnlib_delete(pTexture_comp); return false; } comp_data.swap(pTexture_comp->get_comp_data()); if ((pActual_quality_level) && (local_params.m_target_bitrate <= 0.0)) *pActual_quality_level = local_params.m_quality_level; crnlib_delete(pTexture_comp); return true; } // Interpolative search to find closest quality level to target bitrate. const int cLowestQuality = 0; const int cHighestQuality = cCRNMaxQualityLevel; const int cNumQualityLevels = cHighestQuality - cLowestQuality + 1; float best_bitrate = 1e+10f; int best_quality_level = -1; const uint cMaxIterations = 8; for (;;) { int low_quality = cLowestQuality; int high_quality = cHighestQuality; float cached_bitrates[cNumQualityLevels]; for (int i = 0; i < cNumQualityLevels; i++) cached_bitrates[i] = -1.0f; float highest_bitrate = 0.0f; uint iter_count = 0; bool force_binary_search = false; while (low_quality <= high_quality) { if (params.m_flags & cCRNCompFlagDebugging) { console::debug("Quality level bracket: [%u, %u]", low_quality, high_quality); } int trial_quality = (low_quality + high_quality) / 2; if ((iter_count) && (!force_binary_search)) { int bracket_low = trial_quality; while ((cached_bitrates[bracket_low] < 0) && (bracket_low > cLowestQuality)) bracket_low--; if (cached_bitrates[bracket_low] < 0) trial_quality = static_cast(math::lerp((float)low_quality, (float)high_quality, .33f)); else { int bracket_high = trial_quality + 1; if (bracket_high <= cHighestQuality) { while ((cached_bitrates[bracket_high] < 0) && (bracket_high < cHighestQuality)) bracket_high++; if (cached_bitrates[bracket_high] >= 0) { float bracket_low_bitrate = cached_bitrates[bracket_low]; float bracket_high_bitrate = cached_bitrates[bracket_high]; if ((bracket_low_bitrate < bracket_high_bitrate) && (bracket_low_bitrate < local_params.m_target_bitrate) && (bracket_high_bitrate >= local_params.m_target_bitrate)) { int quality = low_quality + static_cast(((local_params.m_target_bitrate - bracket_low_bitrate) * (high_quality - low_quality)) / (bracket_high_bitrate - bracket_low_bitrate)); if ((quality >= low_quality) && (quality <= high_quality)) { trial_quality = quality; } } } } } } console::info("Compressing to quality level %u", trial_quality); float bitrate = 0.0f; local_params.m_quality_level = trial_quality; if (!pTexture_comp->compress_pass(local_params, &bitrate)) { crnlib_delete(pTexture_comp); return false; } cached_bitrates[trial_quality] = bitrate; highest_bitrate = math::maximum(highest_bitrate, bitrate); console::info("\nTried quality level %u, bpp: %3.3f", trial_quality, bitrate); if ((best_quality_level < 0) || ((bitrate <= local_params.m_target_bitrate) && (best_bitrate > local_params.m_target_bitrate)) || (((bitrate <= local_params.m_target_bitrate) || (best_bitrate > local_params.m_target_bitrate)) && (fabs(bitrate - local_params.m_target_bitrate) < fabs(best_bitrate - local_params.m_target_bitrate)))) { best_bitrate = bitrate; comp_data.swap(pTexture_comp->get_comp_data()); best_quality_level = trial_quality; if (params.m_flags & cCRNCompFlagDebugging) { console::debug("Choose new best quality level"); } if ((best_bitrate <= local_params.m_target_bitrate) && (fabs(best_bitrate - local_params.m_target_bitrate) < .005f)) break; } if (bitrate > local_params.m_target_bitrate) high_quality = trial_quality - 1; else low_quality = trial_quality + 1; iter_count++; if (iter_count > cMaxIterations) { force_binary_search = true; } } if (((local_params.m_flags & cCRNCompFlagHierarchical) != 0) && (highest_bitrate < local_params.m_target_bitrate) && (fabs(best_bitrate - local_params.m_target_bitrate) >= .005f)) { console::info("Unable to achieve desired bitrate - disabling adaptive block sizes and retrying search."); local_params.m_flags &= ~cCRNCompFlagHierarchical; crnlib_delete(pTexture_comp); pTexture_comp = create_texture_comp(local_params.m_file_type); if (!pTexture_comp->compress_init(local_params)) { crnlib_delete(pTexture_comp); return false; } } else break; } crnlib_delete(pTexture_comp); pTexture_comp = NULL; if (best_quality_level < 0) return false; if (pActual_quality_level) *pActual_quality_level = best_quality_level; if (pActual_bitrate) *pActual_bitrate = best_bitrate; console::printf("Selected quality level %u bpp: %f", best_quality_level, best_bitrate); return true; } static bool create_dds_tex(const crn_comp_params& params, mipmapped_texture& dds_tex) { image_u8 images[cCRNMaxFaces][cCRNMaxLevels]; bool has_alpha = false; for (uint face_index = 0; face_index < params.m_faces; face_index++) { for (uint level_index = 0; level_index < params.m_levels; level_index++) { const uint width = math::maximum(1U, params.m_width >> level_index); const uint height = math::maximum(1U, params.m_height >> level_index); if (!params.m_pImages[face_index][level_index]) return false; images[face_index][level_index].alias((color_quad_u8*)params.m_pImages[face_index][level_index], width, height); if (!has_alpha) has_alpha = image_utils::has_alpha(images[face_index][level_index]); } } for (uint face_index = 0; face_index < params.m_faces; face_index++) for (uint level_index = 0; level_index < params.m_levels; level_index++) images[face_index][level_index].set_component_valid(3, has_alpha); face_vec faces(params.m_faces); for (uint face_index = 0; face_index < params.m_faces; face_index++) { for (uint level_index = 0; level_index < params.m_levels; level_index++) { mip_level* pMip = crnlib_new(); image_u8* pImage = crnlib_new(); pImage->swap(images[face_index][level_index]); pMip->assign(pImage); faces[face_index].push_back(pMip); } } dds_tex.assign(faces); #ifdef CRNLIB_BUILD_DEBUG CRNLIB_ASSERT(dds_tex.check()); #endif return true; } bool create_texture_mipmaps(mipmapped_texture& work_tex, const crn_comp_params& params, const crn_mipmap_params& mipmap_params, bool generate_mipmaps) { bool generate_new_mips = false; switch (mipmap_params.m_mode) { case cCRNMipModeUseSourceOrGenerateMips: { if (work_tex.get_num_levels() == 1) generate_new_mips = true; break; } case cCRNMipModeUseSourceMips: { break; } case cCRNMipModeGenerateMips: { generate_new_mips = true; break; } case cCRNMipModeNoMips: { work_tex.discard_mipmaps(); break; } default: { CRNLIB_ASSERT(0); break; } } rect window_rect(mipmap_params.m_window_left, mipmap_params.m_window_top, mipmap_params.m_window_right, mipmap_params.m_window_bottom); if (!window_rect.is_empty()) { if (work_tex.get_num_faces() > 1) { console::warning("Can't crop cubemap textures"); } else { console::info("Cropping input texture from window (%ux%u)-(%ux%u)", window_rect.get_left(), window_rect.get_top(), window_rect.get_right(), window_rect.get_bottom()); if (!work_tex.crop(window_rect.get_left(), window_rect.get_top(), window_rect.get_width(), window_rect.get_height())) console::warning("Failed cropping window rect"); } } int new_width = work_tex.get_width(); int new_height = work_tex.get_height(); if ((mipmap_params.m_clamp_width) && (mipmap_params.m_clamp_height)) { if ((new_width > (int)mipmap_params.m_clamp_width) || (new_height > (int)mipmap_params.m_clamp_height)) { if (!mipmap_params.m_clamp_scale) { if (work_tex.get_num_faces() > 1) { console::warning("Can't crop cubemap textures"); } else { new_width = math::minimum(mipmap_params.m_clamp_width, new_width); new_height = math::minimum(mipmap_params.m_clamp_height, new_height); console::info("Clamping input texture to %ux%u", new_width, new_height); work_tex.crop(0, 0, new_width, new_height); } } } } if (mipmap_params.m_scale_mode != cCRNSMDisabled) { bool is_pow2 = math::is_power_of_2((uint32)new_width) && math::is_power_of_2((uint32)new_height); switch (mipmap_params.m_scale_mode) { case cCRNSMAbsolute: { new_width = (uint)mipmap_params.m_scale_x; new_height = (uint)mipmap_params.m_scale_y; break; } case cCRNSMRelative: { new_width = (uint)(mipmap_params.m_scale_x * new_width + .5f); new_height = (uint)(mipmap_params.m_scale_y * new_height + .5f); break; } case cCRNSMLowerPow2: { if (!is_pow2) math::compute_lower_pow2_dim(new_width, new_height); break; } case cCRNSMNearestPow2: { if (!is_pow2) { int lwidth = new_width; int lheight = new_height; math::compute_lower_pow2_dim(lwidth, lheight); int uwidth = new_width; int uheight = new_height; math::compute_upper_pow2_dim(uwidth, uheight); if (labs(new_width - lwidth) < labs(new_width - uwidth)) new_width = lwidth; else new_width = uwidth; if (labs(new_height - lheight) < labs(new_height - uheight)) new_height = lheight; else new_height = uheight; } break; } case cCRNSMNextPow2: { if (!is_pow2) math::compute_upper_pow2_dim(new_width, new_height); break; } default: break; } } if ((mipmap_params.m_clamp_width) && (mipmap_params.m_clamp_height)) { if ((new_width > (int)mipmap_params.m_clamp_width) || (new_height > (int)mipmap_params.m_clamp_height)) { if (mipmap_params.m_clamp_scale) { new_width = math::minimum(mipmap_params.m_clamp_width, new_width); new_height = math::minimum(mipmap_params.m_clamp_height, new_height); } } } new_width = math::clamp(new_width, 1, cCRNMaxLevelResolution); new_height = math::clamp(new_height, 1, cCRNMaxLevelResolution); if ((new_width != (int)work_tex.get_width()) || (new_height != (int)work_tex.get_height()) || (mipmap_params.m_renormalize == true && mipmap_params.m_rtopmip == true)) { console::info("Resampling input texture to %ux%u", new_width, new_height); const char* pFilter = crn_get_mip_filter_name(mipmap_params.m_filter); bool srgb = mipmap_params.m_gamma_filtering != 0; mipmapped_texture::resample_params res_params; res_params.m_pFilter = pFilter; res_params.m_wrapping = mipmap_params.m_tiled != 0; if (work_tex.get_num_faces()) res_params.m_wrapping = false; res_params.m_renormalize = mipmap_params.m_renormalize != 0; res_params.m_filter_scale = 1.0f; res_params.m_gamma = mipmap_params.m_gamma; res_params.m_srgb = srgb; res_params.m_multithreaded = (params.m_num_helper_threads > 0); if (!work_tex.resize(new_width, new_height, res_params)) { console::error("Failed resizing texture!"); return false; } } if ((generate_new_mips) && (generate_mipmaps)) { bool srgb = mipmap_params.m_gamma_filtering != 0; const char* pFilter = crn_get_mip_filter_name(mipmap_params.m_filter); mipmapped_texture::generate_mipmap_params gen_params; gen_params.m_pFilter = pFilter; gen_params.m_wrapping = mipmap_params.m_tiled != 0; gen_params.m_renormalize = mipmap_params.m_renormalize != 0; gen_params.m_filter_scale = mipmap_params.m_blurriness; gen_params.m_gamma = mipmap_params.m_gamma; gen_params.m_srgb = srgb; gen_params.m_multithreaded = params.m_num_helper_threads > 0; gen_params.m_max_mips = mipmap_params.m_max_levels; gen_params.m_min_mip_size = mipmap_params.m_min_mip_size; console::info("Generating mipmaps using filter \"%s\"", pFilter); timer tm; tm.start(); if (!work_tex.generate_mipmaps(gen_params, true)) { console::error("Failed generating mipmaps!"); return false; } double t = tm.get_elapsed_secs(); console::info("Generated %u mipmap levels in %3.3fs", work_tex.get_num_levels() - 1, t); } return true; } bool create_compressed_texture(const crn_comp_params& params, const crn_mipmap_params& mipmap_params, crnlib::vector& comp_data, uint32* pActual_quality_level, float* pActual_bitrate) { comp_data.resize(0); if (pActual_bitrate) *pActual_bitrate = 0.0f; if (pActual_quality_level) *pActual_quality_level = 0; mipmapped_texture work_tex; if (!create_dds_tex(params, work_tex)) { console::error("Failed creating DDS texture from crn_comp_params!"); return false; } if (!create_texture_mipmaps(work_tex, params, mipmap_params, true)) return false; crn_comp_params new_params(params); new_params.m_levels = work_tex.get_num_levels(); memset(new_params.m_pImages, 0, sizeof(new_params.m_pImages)); for (uint f = 0; f < work_tex.get_num_faces(); f++) for (uint l = 0; l < work_tex.get_num_levels(); l++) new_params.m_pImages[f][l] = (uint32*)work_tex.get_level(f, l)->get_image()->get_ptr(); return create_compressed_texture(new_params, comp_data, pActual_quality_level, pActual_bitrate); } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_texture_comp.h000066400000000000000000000023131503722002600230200ustar00rootroot00000000000000// File: crn_texture_comp.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "../inc/crnlib.h" namespace crnlib { class mipmapped_texture; class itexture_comp { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(itexture_comp); public: itexture_comp() {} virtual ~itexture_comp() {} virtual const char* get_ext() const = 0; virtual bool compress_init(const crn_comp_params& params) = 0; virtual bool compress_pass(const crn_comp_params& params, float* pEffective_bitrate) = 0; virtual void compress_deinit() = 0; virtual const crnlib::vector& get_comp_data() const = 0; virtual crnlib::vector& get_comp_data() = 0; }; bool create_compressed_texture(const crn_comp_params& params, crnlib::vector& comp_data, uint32* pActual_quality_level, float* pActual_bitrate); bool create_texture_mipmaps(mipmapped_texture& work_tex, const crn_comp_params& params, const crn_mipmap_params& mipmap_params, bool generate_mipmaps); bool create_compressed_texture(const crn_comp_params& params, const crn_mipmap_params& mipmap_params, crnlib::vector& comp_data, uint32* pActual_quality_level, float* pActual_bitrate); } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_texture_conversion.cpp000066400000000000000000000702131503722002600246060ustar00rootroot00000000000000// File: crn_texture_conversion.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_texture_conversion.h" #include "crn_console.h" #include "crn_file_utils.h" #include "crn_cfile_stream.h" #include "crn_image_utils.h" #include "crn_texture_comp.h" #include "crn_strutils.h" namespace crnlib { namespace texture_conversion { struct progress_params { convert_params* m_pParams; uint m_start_percentage; bool m_canceled; }; convert_stats::convert_stats() { clear(); } bool convert_stats::init( const char* pSrc_filename, const char* pDst_filename, mipmapped_texture& src_tex, texture_file_types::format dst_file_type, bool lzma_stats) { m_src_filename = pSrc_filename; m_dst_filename = pDst_filename; m_dst_file_type = dst_file_type; m_pInput_tex = &src_tex; file_utils::get_file_size(pSrc_filename, m_input_file_size); file_utils::get_file_size(pDst_filename, m_output_file_size); m_total_input_pixels = 0; for (uint i = 0; i < src_tex.get_num_levels(); i++) { uint width = math::maximum(1, src_tex.get_width() >> i); uint height = math::maximum(1, src_tex.get_height() >> i); m_total_input_pixels += width * height * src_tex.get_num_faces(); } m_output_comp_file_size = 0; m_total_output_pixels = 0; if (lzma_stats) { vector dst_tex_bytes; if (!cfile_stream::read_file_into_array(pDst_filename, dst_tex_bytes)) { console::error("Failed loading output file: %s", pDst_filename); return false; } if (!dst_tex_bytes.size()) { console::error("Output file is empty: %s", pDst_filename); return false; } vector cmp_tex_bytes; lzma_codec lossless_codec; if (lossless_codec.pack(dst_tex_bytes.get_ptr(), dst_tex_bytes.size(), cmp_tex_bytes)) { m_output_comp_file_size = cmp_tex_bytes.size(); } } if (!m_output_tex.read_from_file(pDst_filename, m_dst_file_type)) { console::error("Failed loading output file: %s", pDst_filename); return false; } for (uint i = 0; i < m_output_tex.get_num_levels(); i++) { uint width = math::maximum(1, m_output_tex.get_width() >> i); uint height = math::maximum(1, m_output_tex.get_height() >> i); m_total_output_pixels += width * height * m_output_tex.get_num_faces(); } CRNLIB_ASSERT(m_total_output_pixels == m_output_tex.get_total_pixels_in_all_faces_and_mips()); return true; } bool convert_stats::print(bool psnr_metrics, bool mip_stats, bool grayscale_sampling, const char* pCSVStatsFile) const { if (!m_pInput_tex) return false; console::info("Input texture: %ux%u, Levels: %u, Faces: %u, Format: %s", m_pInput_tex->get_width(), m_pInput_tex->get_height(), m_pInput_tex->get_num_levels(), m_pInput_tex->get_num_faces(), pixel_format_helpers::get_pixel_format_string(m_pInput_tex->get_format())); // Just casting the uint64's filesizes to uint32 here to work around gcc issues - it's not even possible to have files that large anyway. console::info("Input pixels: %u, Input file size: %u, Input bits/pixel: %1.3f", m_total_input_pixels, (uint32)m_input_file_size, (m_input_file_size * 8.0f) / m_total_input_pixels); console::info("Output texture: %ux%u, Levels: %u, Faces: %u, Format: %s", m_output_tex.get_width(), m_output_tex.get_height(), m_output_tex.get_num_levels(), m_output_tex.get_num_faces(), pixel_format_helpers::get_pixel_format_string(m_output_tex.get_format())); console::info("Output pixels: %u, Output file size: %u, Output bits/pixel: %1.3f", m_total_output_pixels, (uint32)m_output_file_size, (m_output_file_size * 8.0f) / m_total_output_pixels); if (m_output_comp_file_size) { console::info("LZMA compressed output file size: %u bytes, %1.3f bits/pixel", (uint32)m_output_comp_file_size, (m_output_comp_file_size * 8.0f) / m_total_output_pixels); } if (psnr_metrics) { if ((m_pInput_tex->get_width() != m_output_tex.get_width()) || (m_pInput_tex->get_height() != m_output_tex.get_height()) || (m_pInput_tex->get_num_faces() != m_output_tex.get_num_faces())) { console::warning("Unable to compute image statistics - input/output texture dimensions are different."); } else { uint num_faces = math::minimum(m_pInput_tex->get_num_faces(), m_output_tex.get_num_faces()); uint num_levels = math::minimum(m_pInput_tex->get_num_levels(), m_output_tex.get_num_levels()); if (!mip_stats) num_levels = 1; for (uint face = 0; face < num_faces; face++) { for (uint level = 0; level < num_levels; level++) { image_u8 a, b; image_u8* pA = m_pInput_tex->get_level_image(face, level, a); image_u8* pB = m_output_tex.get_level_image(face, level, b); if (pA && pB) { image_u8 grayscale_a, grayscale_b; if (grayscale_sampling) { grayscale_a = *pA; grayscale_a.convert_to_grayscale(); pA = &grayscale_a; grayscale_b = *pB; grayscale_b.convert_to_grayscale(); pB = &grayscale_b; } console::info("Face %u Mipmap level %u statistics:", face, level); image_utils::print_image_metrics(*pA, *pB); if ((pA->has_rgb()) || (pB->has_rgb())) image_utils::print_ssim(*pA, *pB); } } } if (pCSVStatsFile) { // FIXME: This is kind of a hack, and should be combined with the code above. image_u8 a, b; image_u8* pA = m_pInput_tex->get_level_image(0, 0, a); image_u8* pB = m_output_tex.get_level_image(0, 0, b); if (pA && pB) { image_u8 grayscale_a, grayscale_b; if (grayscale_sampling) { grayscale_a = *pA; grayscale_a.convert_to_grayscale(); pA = &grayscale_a; grayscale_b = *pB; grayscale_b.convert_to_grayscale(); pB = &grayscale_b; } image_utils::error_metrics rgb_error; image_utils::error_metrics luma_error; if (rgb_error.compute(*pA, *pB, 0, 3, false) && luma_error.compute(*pA, *pB, 0, 0, true)) { bool bCSVStatsFileExists = file_utils::does_file_exist(pCSVStatsFile); FILE* pFile; crn_fopen(&pFile, pCSVStatsFile, "a"); if (!pFile) console::warning("Unable to append to CSV stats file: %s\n", pCSVStatsFile); else { if (!bCSVStatsFileExists) fprintf(pFile, "name,width,height,miplevels,rgb_rms,luma_rms,effective_output_size,effective_bitrate\n"); dynamic_string filename; file_utils::split_path(m_src_filename.get_ptr(), NULL, NULL, &filename, NULL); uint64 effective_output_size = m_output_comp_file_size ? m_output_comp_file_size : m_output_file_size; float bitrate = (effective_output_size * 8.0f) / m_total_output_pixels; fprintf(pFile, "%s,%u,%u,%u,%f,%f,%u,%f\n", filename.get_ptr(), pB->get_width(), pB->get_height(), m_output_tex.get_num_levels(), rgb_error.mRootMeanSquared, luma_error.mRootMeanSquared, (uint32)effective_output_size, bitrate); fclose(pFile); } } } } } } return true; } void convert_stats::clear() { m_src_filename.clear(); m_dst_filename.clear(); m_dst_file_type = texture_file_types::cFormatInvalid; m_pInput_tex = NULL; m_output_tex.clear(); m_input_file_size = 0; m_total_input_pixels = 0; m_output_file_size = 0; m_total_output_pixels = 0; m_output_comp_file_size = 0; } //----------------------------------------------------------------------- static crn_bool crn_progress_callback(crn_uint32 phase_index, crn_uint32 total_phases, crn_uint32 subphase_index, crn_uint32 total_subphases, void* pUser_data_ptr) { progress_params& params = *static_cast(pUser_data_ptr); if (params.m_canceled) return false; if (!params.m_pParams->m_pProgress_func) return true; int percentage_complete = params.m_start_percentage + (int)(.5f + (phase_index + float(subphase_index) / total_subphases) * (100.0f - params.m_start_percentage) / total_phases); percentage_complete = math::clamp(percentage_complete, 0, 100); if (!params.m_pParams->m_pProgress_func(percentage_complete, params.m_pParams->m_pProgress_user_data)) { params.m_canceled = true; return false; } return true; } static bool dxt_progress_callback_func(uint percentage_complete, void* pUser_data_ptr) { progress_params& params = *static_cast(pUser_data_ptr); if (params.m_canceled) return false; if (!params.m_pParams->m_pProgress_func) return true; int scaled_percentage_complete = params.m_start_percentage + (percentage_complete * (100 - params.m_start_percentage)) / 100; scaled_percentage_complete = math::clamp(scaled_percentage_complete, 0, 100); if (!params.m_pParams->m_pProgress_func(scaled_percentage_complete, params.m_pParams->m_pProgress_user_data)) { params.m_canceled = true; return false; } return true; } static bool convert_error(const convert_params& params, const char* pError_msg) { params.m_status = false; params.m_error_message = pError_msg; remove(params.m_dst_filename.get_ptr()); return false; } static pixel_format choose_pixel_format(convert_params& params, const crn_comp_params& comp_params, const mipmapped_texture& src_tex, texture_type tex_type) { const pixel_format src_fmt = src_tex.get_format(); const texture_file_types::format src_file_type = src_tex.get_source_file_type(); const bool is_normal_map = (tex_type == cTextureTypeNormalMap); if (params.m_always_use_source_pixel_format) return src_fmt; // Attempt to choose a reasonable/sane output pixel format. if (params.m_dst_file_type == texture_file_types::cFormatCRN) { if (is_normal_map) { if (pixel_format_helpers::is_dxt(src_fmt)) return src_fmt; else return PIXEL_FMT_DXT5_AGBR; } } else if (params.m_dst_file_type == texture_file_types::cFormatKTX) { if ((src_file_type != texture_file_types::cFormatCRN) && (src_file_type != texture_file_types::cFormatKTX) && (src_file_type != texture_file_types::cFormatDDS)) { if (is_normal_map) { return pixel_format_helpers::has_alpha(src_fmt) ? PIXEL_FMT_A8R8G8B8 : PIXEL_FMT_R8G8B8; } else if (pixel_format_helpers::is_grayscale(src_fmt)) { if (pixel_format_helpers::has_alpha(src_fmt)) return PIXEL_FMT_ETC2A; else return PIXEL_FMT_ETC1; } else if (pixel_format_helpers::has_alpha(src_fmt)) return PIXEL_FMT_ETC2A; else return PIXEL_FMT_ETC1; } } else if (params.m_dst_file_type == texture_file_types::cFormatDDS) { if ((src_file_type != texture_file_types::cFormatCRN) && (src_file_type != texture_file_types::cFormatKTX) && (src_file_type != texture_file_types::cFormatDDS)) { if (is_normal_map) { return PIXEL_FMT_DXT5_AGBR; } else if (pixel_format_helpers::is_grayscale(src_fmt)) { if (pixel_format_helpers::has_alpha(src_fmt)) return comp_params.get_flag(cCRNCompFlagDXT1AForTransparency) ? PIXEL_FMT_DXT1A : PIXEL_FMT_DXT5; else return PIXEL_FMT_DXT1; } else if (pixel_format_helpers::has_alpha(src_fmt)) return comp_params.get_flag(cCRNCompFlagDXT1AForTransparency) ? PIXEL_FMT_DXT1A : PIXEL_FMT_DXT5; else return PIXEL_FMT_DXT1; } } else { // Destination is a regular image format. if (pixel_format_helpers::is_grayscale(src_fmt)) { if (pixel_format_helpers::has_alpha(src_fmt)) return PIXEL_FMT_A8L8; else return PIXEL_FMT_L8; } else if (pixel_format_helpers::has_alpha(src_fmt)) return PIXEL_FMT_A8R8G8B8; else return PIXEL_FMT_R8G8B8; } return src_fmt; } static void print_comp_params(const crn_comp_params& comp_params) { console::debug("\nTexture conversion compression parameters:"); console::debug(" Desired bitrate: %3.3f", comp_params.m_target_bitrate); console::debug(" CRN Quality: %i", comp_params.m_quality_level); console::debug("CRN C endpoints/selectors: %u %u", comp_params.m_crn_color_endpoint_palette_size, comp_params.m_crn_color_selector_palette_size); console::debug("CRN A endpoints/selectors: %u %u", comp_params.m_crn_alpha_endpoint_palette_size, comp_params.m_crn_alpha_selector_palette_size); console::debug(" DXT both block types: %u, Alpha threshold: %u", comp_params.get_flag(cCRNCompFlagUseBothBlockTypes), comp_params.m_dxt1a_alpha_threshold); console::debug(" DXT compression quality: %s", crn_get_dxt_quality_string(comp_params.m_dxt_quality)); console::debug(" Perceptual: %u, Large Blocks: %u", comp_params.get_flag(cCRNCompFlagPerceptual), comp_params.get_flag(cCRNCompFlagHierarchical)); console::debug(" Compressor: %s", get_dxt_compressor_name(comp_params.m_dxt_compressor_type)); console::debug(" Disable endpoint caching: %u", comp_params.get_flag(cCRNCompFlagDisableEndpointCaching)); console::debug(" Grayscale sampling: %u", comp_params.get_flag(cCRNCompFlagGrayscaleSampling)); console::debug(" Max helper threads: %u", comp_params.m_num_helper_threads); console::debug(""); } static void print_mipmap_params(const crn_mipmap_params& mipmap_params) { console::debug("\nTexture conversion MIP-map parameters:"); console::debug(" Mode: %s", crn_get_mip_mode_name(mipmap_params.m_mode)); console::debug(" Filter: %s", crn_get_mip_filter_name(mipmap_params.m_filter)); console::debug("Gamma filtering: %u, Gamma: %2.2f", mipmap_params.m_gamma_filtering, mipmap_params.m_gamma); console::debug(" Blurriness: %2.2f", mipmap_params.m_blurriness); console::debug(" Renormalize: %u", mipmap_params.m_renormalize); console::debug("Renorm. top mip: %u", mipmap_params.m_rtopmip); console::debug(" Tiled: %u", mipmap_params.m_tiled); console::debug(" Max Levels: %u", mipmap_params.m_max_levels); console::debug(" Min level size: %u", mipmap_params.m_min_mip_size); console::debug(" window: %u %u %u %u", mipmap_params.m_window_left, mipmap_params.m_window_top, mipmap_params.m_window_right, mipmap_params.m_window_bottom); console::debug(" scale mode: %s", crn_get_scale_mode_desc(mipmap_params.m_scale_mode)); console::debug(" scale: %f %f", mipmap_params.m_scale_x, mipmap_params.m_scale_y); console::debug(" clamp: %u %u, clamp_scale: %u", mipmap_params.m_clamp_width, mipmap_params.m_clamp_height, mipmap_params.m_clamp_scale); console::debug(""); } void convert_params::print() { console::debug("\nTexture conversion parameters:"); console::debug(" Resolution: %ux%u, Faces: %u, Levels: %u, Format: %s, X Flipped: %u, Y Flipped: %u", m_pInput_texture->get_width(), m_pInput_texture->get_height(), m_pInput_texture->get_num_faces(), m_pInput_texture->get_num_levels(), pixel_format_helpers::get_pixel_format_string(m_pInput_texture->get_format()), m_pInput_texture->is_x_flipped(), m_pInput_texture->is_y_flipped()); console::debug(" texture_type: %s", get_texture_type_desc(m_texture_type)); console::debug(" dst_filename: %s", m_dst_filename.get_ptr()); console::debug(" dst_file_type: %s", texture_file_types::get_extension(m_dst_file_type)); console::debug(" dst_format: %s", pixel_format_helpers::get_pixel_format_string(m_dst_format)); console::debug(" quick: %u", m_quick); console::debug(" use_source_format: %u", m_always_use_source_pixel_format); console::debug(" Y Flip: %u", m_y_flip); console::debug(" Unflip: %u", m_unflip); } static bool write_compressed_texture( mipmapped_texture& work_tex, convert_params& params, crn_comp_params& comp_params, pixel_format dst_format, progress_params& progress_state, bool perceptual, convert_stats& stats) { comp_params.m_file_type = (params.m_dst_file_type == texture_file_types::cFormatCRN) ? cCRNFileTypeCRN : cCRNFileTypeDDS; comp_params.m_pProgress_func = crn_progress_callback; comp_params.m_pProgress_func_data = &progress_state; comp_params.set_flag(cCRNCompFlagPerceptual, perceptual); crn_format crn_fmt = pixel_format_helpers::convert_pixel_format_to_best_crn_format(dst_format); comp_params.m_format = crn_fmt; console::message("Writing %s texture to file: \"%s\"", crn_get_format_string(crn_fmt), params.m_dst_filename.get_ptr()); uint32 actual_quality_level; float actual_bitrate; bool status = work_tex.write_to_file(params.m_dst_filename.get_ptr(), params.m_dst_file_type, &comp_params, &actual_quality_level, &actual_bitrate); if (!status) return convert_error(params, "Failed writing output file!"); if (!params.m_no_stats) { if (!stats.init(params.m_pInput_texture->get_source_filename().get_ptr(), params.m_dst_filename.get_ptr(), *params.m_pIntermediate_texture, params.m_dst_file_type, params.m_lzma_stats)) { console::warning("Unable to compute output statistics for file: %s", params.m_pInput_texture->get_source_filename().get_ptr()); } } return true; } static bool convert_and_write_normal_texture(mipmapped_texture& work_tex, convert_params& params, const crn_comp_params& comp_params, pixel_format dst_format, progress_params& progress_state, bool formats_differ, bool perceptual, convert_stats& stats) { if (formats_differ) { dxt_image::pack_params pack_params; pack_params.m_perceptual = perceptual; pack_params.m_compressor = comp_params.m_dxt_compressor_type; pack_params.m_pProgress_callback = dxt_progress_callback_func; pack_params.m_pProgress_callback_user_data_ptr = &progress_state; pack_params.m_dxt1a_alpha_threshold = comp_params.m_dxt1a_alpha_threshold; pack_params.m_quality = comp_params.m_dxt_quality; pack_params.m_endpoint_caching = !comp_params.get_flag(cCRNCompFlagDisableEndpointCaching); pack_params.m_grayscale_sampling = comp_params.get_flag(cCRNCompFlagGrayscaleSampling); if ((!comp_params.get_flag(cCRNCompFlagUseBothBlockTypes)) && (!comp_params.get_flag(cCRNCompFlagDXT1AForTransparency))) pack_params.m_use_both_block_types = false; pack_params.m_num_helper_threads = comp_params.m_num_helper_threads; pack_params.m_use_transparent_indices_for_black = comp_params.get_flag(cCRNCompFlagUseTransparentIndicesForBlack); console::info("Converting texture format from %s to %s", pixel_format_helpers::get_pixel_format_string(work_tex.get_format()), pixel_format_helpers::get_pixel_format_string(dst_format)); timer tm; tm.start(); bool status = work_tex.convert(dst_format, pack_params); double t = tm.get_elapsed_secs(); console::info(""); if (!status) { if (progress_state.m_canceled) { params.m_canceled = true; return false; } else { return convert_error(params, "Failed converting texture to output format!"); } } console::info("Texture format conversion took %3.3fs", t); } if (params.m_write_mipmaps_to_multiple_files) { for (uint f = 0; f < work_tex.get_num_faces(); f++) { for (uint l = 0; l < work_tex.get_num_levels(); l++) { dynamic_string filename(params.m_dst_filename.get_ptr()); dynamic_string drv, dir, fn, ext; if (!file_utils::split_path(params.m_dst_filename.get_ptr(), &drv, &dir, &fn, &ext)) return false; fn += dynamic_string(cVarArg, "_face%u_mip%u", f, l).get_ptr(); filename = drv + dir + fn + ext; mip_level* pLevel = work_tex.get_level(f, l); face_vec face(1); face[0].push_back(crnlib_new(*pLevel)); mipmapped_texture new_tex; new_tex.assign(face); console::info("Writing texture face %u mip level %u to file %s", f, l, filename.get_ptr()); if (!new_tex.write_to_file(filename.get_ptr(), params.m_dst_file_type, NULL, NULL, NULL)) return convert_error(params, "Failed writing output file!"); } } } else { console::message("Writing texture to file: \"%s\"", params.m_dst_filename.get_ptr()); if (!work_tex.write_to_file(params.m_dst_filename.get_ptr(), params.m_dst_file_type, NULL, NULL, NULL)) return convert_error(params, "Failed writing output file!"); if (!params.m_no_stats) { if (!stats.init(params.m_pInput_texture->get_source_filename().get_ptr(), params.m_dst_filename.get_ptr(), *params.m_pIntermediate_texture, params.m_dst_file_type, params.m_lzma_stats)) { console::warning("Unable to compute output statistics for file: %s", params.m_pInput_texture->get_source_filename().get_ptr()); } } } return true; } bool process(convert_params& params, convert_stats& stats) { texture_type tex_type = params.m_texture_type; crn_comp_params comp_params(params.m_comp_params); crn_mipmap_params mipmap_params(params.m_mipmap_params); progress_params progress_state; progress_state.m_pParams = ¶ms; progress_state.m_canceled = false; progress_state.m_start_percentage = 0; params.m_status = false; params.m_error_message.clear(); if (params.m_pIntermediate_texture) { crnlib_delete(params.m_pIntermediate_texture); params.m_pIntermediate_texture = NULL; } params.m_pIntermediate_texture = crnlib_new(*params.m_pInput_texture); mipmapped_texture& work_tex = *params.m_pInput_texture; if ((params.m_unflip) && (work_tex.is_flipped())) { console::info("Unflipping texture"); work_tex.unflip(true, true); } if (params.m_y_flip) { console::info("Flipping texture on Y axis"); // This is awkward - if we're writing to KTX, then go ahead and properly update the work texture's orientation flags. // Otherwise, don't bother updating the orientation flags because the writer may then attempt to unflip the texture before writing to formats // that don't support flipped textures (ugh). const bool bOutputFormatSupportsFlippedTextures = params.m_dst_file_type == texture_file_types::cFormatKTX; if (!work_tex.flip_y(bOutputFormatSupportsFlippedTextures)) { console::warning("Failed flipping texture on Y axis"); } } if ((params.m_dst_format != PIXEL_FMT_INVALID) && (pixel_format_helpers::is_alpha_only(params.m_dst_format))) { if ((work_tex.get_comp_flags() & pixel_format_helpers::cCompFlagAValid) == 0) { console::warning("Output format is alpha-only, but input doesn't have alpha, so setting alpha to luminance."); work_tex.convert(PIXEL_FMT_A8, crnlib::dxt_image::pack_params()); if (tex_type == cTextureTypeNormalMap) tex_type = cTextureTypeRegularMap; } } pixel_format dst_format = params.m_dst_format; if (pixel_format_helpers::is_dxt(dst_format)) { if ((params.m_dst_file_type != texture_file_types::cFormatCRN) && (params.m_dst_file_type != texture_file_types::cFormatDDS) && (params.m_dst_file_type != texture_file_types::cFormatKTX)) { console::warning("Output file format does not support DXTc - automatically choosing a non-DXT pixel format."); dst_format = PIXEL_FMT_INVALID; } } if (dst_format == PIXEL_FMT_INVALID) { // Caller didn't specify a format to use, so try to pick something reasonable. // This is actually much trickier than it seems, and the current approach kind of sucks. dst_format = choose_pixel_format(params, comp_params, work_tex, tex_type); } if ((dst_format == PIXEL_FMT_DXT1) && (comp_params.get_flag(cCRNCompFlagDXT1AForTransparency))) dst_format = PIXEL_FMT_DXT1A; else if (dst_format == PIXEL_FMT_DXT1A) comp_params.set_flag(cCRNCompFlagDXT1AForTransparency, true); if ((dst_format == PIXEL_FMT_DXT1A) && (params.m_dst_file_type == texture_file_types::cFormatCRN)) { console::warning("CRN file format does not support DXT1A compressed textures - converting to DXT5 instead."); dst_format = PIXEL_FMT_DXT5; } const bool is_normal_map = (tex_type == cTextureTypeNormalMap); bool perceptual = comp_params.get_flag(cCRNCompFlagPerceptual); if (is_normal_map) { perceptual = false; mipmap_params.m_gamma_filtering = false; } if (pixel_format_helpers::is_pixel_format_non_srgb(dst_format)) { if (perceptual) { console::message("Output pixel format is swizzled or not RGB, disabling perceptual color metrics"); perceptual = false; } } if (pixel_format_helpers::is_normal_map(dst_format)) { if (perceptual) console::message("Output pixel format is intended for normal maps, disabling perceptual color metrics"); perceptual = false; } bool generate_mipmaps = texture_file_types::supports_mipmaps(params.m_dst_file_type); if ((params.m_write_mipmaps_to_multiple_files) && ((params.m_dst_file_type != texture_file_types::cFormatCRN) && (params.m_dst_file_type != texture_file_types::cFormatDDS) && (params.m_dst_file_type != texture_file_types::cFormatKTX))) { generate_mipmaps = true; } if (params.m_param_debugging) { params.print(); print_comp_params(comp_params); print_mipmap_params(mipmap_params); } if (!create_texture_mipmaps(work_tex, comp_params, mipmap_params, generate_mipmaps)) return convert_error(params, "Failed creating texture mipmaps!"); bool formats_differ = work_tex.get_format() != dst_format; if (formats_differ) { if (pixel_format_helpers::is_dxt1(work_tex.get_format()) && pixel_format_helpers::is_dxt1(dst_format)) formats_differ = false; } bool status = false; timer t; t.start(); if ((params.m_dst_file_type == texture_file_types::cFormatCRN) || ((params.m_dst_file_type == texture_file_types::cFormatDDS) && (pixel_format_helpers::is_dxt(dst_format)) && //((formats_differ) || (comp_params.m_target_bitrate > 0.0f) || (comp_params.m_quality_level < cCRNMaxQualityLevel)) ((comp_params.m_target_bitrate > 0.0f) || (comp_params.m_quality_level < cCRNMaxQualityLevel)))) { status = write_compressed_texture(work_tex, params, comp_params, dst_format, progress_state, perceptual, stats); } else { if ((comp_params.m_target_bitrate > 0.0f) || (comp_params.m_quality_level < cCRNMaxQualityLevel)) { console::warning("Target bitrate/quality level is not supported for this output file format.\n"); } status = convert_and_write_normal_texture(work_tex, params, comp_params, dst_format, progress_state, formats_differ, perceptual, stats); } console::progress(""); if (progress_state.m_canceled) { params.m_canceled = true; return false; } double total_write_time = t.get_elapsed_secs(); if (status) { if (params.m_param_debugging) console::info("Work texture format: %s, desired destination format: %s", pixel_format_helpers::get_pixel_format_string(work_tex.get_format()), pixel_format_helpers::get_pixel_format_string(dst_format)); console::message("Texture successfully written in %3.3fs", total_write_time); } else { dynamic_string str; if (work_tex.get_last_error().is_empty()) str.format("Failed writing texture to file \"%s\"", params.m_dst_filename.get_ptr()); else str.format("Failed writing texture to file \"%s\", Reason: %s", params.m_dst_filename.get_ptr(), work_tex.get_last_error().get_ptr()); return convert_error(params, str.get_ptr()); } if (params.m_debugging) { crnlib_print_mem_stats(); } params.m_status = true; return true; } } // namespace texture_conversion } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_texture_conversion.h000066400000000000000000000054041503722002600242530ustar00rootroot00000000000000// File: crn_texture_conversion.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_dxt_image.h" #include "crn_mipmapped_texture.h" #include "crn_rect.h" #include "crn_lzma_codec.h" namespace crnlib { namespace texture_conversion { class convert_stats { public: convert_stats(); bool init( const char* pSrc_filename, const char* pDst_filename, mipmapped_texture& src_tex, texture_file_types::format dst_file_type, bool lzma_stats); bool print(bool psnr_metrics, bool mip_stats, bool grayscale_sampling, const char* pCSVStatsFile = NULL) const; void clear(); dynamic_string m_src_filename; dynamic_string m_dst_filename; texture_file_types::format m_dst_file_type; mipmapped_texture* m_pInput_tex; mipmapped_texture m_output_tex; uint64 m_input_file_size; uint m_total_input_pixels; uint64 m_output_file_size; uint m_total_output_pixels; uint64 m_output_comp_file_size; }; class convert_params { public: convert_params() : m_pInput_texture(NULL), m_texture_type(cTextureTypeUnknown), m_dst_file_type(texture_file_types::cFormatInvalid), m_dst_format(PIXEL_FMT_INVALID), m_pProgress_func(NULL), m_pProgress_user_data(NULL), m_pIntermediate_texture(NULL), m_y_flip(false), m_unflip(false), m_always_use_source_pixel_format(false), m_write_mipmaps_to_multiple_files(false), m_quick(false), m_debugging(false), m_param_debugging(false), m_no_stats(false), m_lzma_stats(false), m_status(false), m_canceled(false) { } ~convert_params() { crnlib_delete(m_pIntermediate_texture); } void print(); // Input parameters mipmapped_texture* m_pInput_texture; texture_type m_texture_type; dynamic_string m_dst_filename; texture_file_types::format m_dst_file_type; pixel_format m_dst_format; crn_comp_params m_comp_params; crn_mipmap_params m_mipmap_params; typedef bool (*progress_callback_func_ptr)(uint percentage_complete, void* pUser_data_ptr); progress_callback_func_ptr m_pProgress_func; void* m_pProgress_user_data; // Return parameters mipmapped_texture* m_pIntermediate_texture; mutable dynamic_string m_error_message; bool m_y_flip; bool m_unflip; bool m_always_use_source_pixel_format; bool m_write_mipmaps_to_multiple_files; bool m_quick; bool m_debugging; bool m_param_debugging; bool m_no_stats; bool m_lzma_stats; mutable bool m_status; mutable bool m_canceled; }; bool process(convert_params& params, convert_stats& stats); } // namespace texture_conversion } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_texture_file_types.cpp000066400000000000000000000042501503722002600245620ustar00rootroot00000000000000// File: crn_texture_file_types.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_texture_file_types.h" #include "crn_file_utils.h" namespace crnlib { const char* texture_file_types::get_extension(format fmt) { CRNLIB_ASSERT(fmt < cNumFileFormats); if (fmt >= cNumFileFormats) return NULL; static const char* extensions[cNumFileFormats] = { "dds", "crn", "ktx", "tga", "png", "jpg", "jpeg", "bmp", "gif", "tif", "tiff", "ppm", "pgm", "psd", "jp2", "", ""}; return extensions[fmt]; } texture_file_types::format texture_file_types::determine_file_format(const char* pFilename) { dynamic_string ext; if (!file_utils::split_path(pFilename, NULL, NULL, NULL, &ext)) return cFormatInvalid; if (ext.is_empty()) return cFormatInvalid; if (ext[0] == '.') ext.right(1); for (uint i = 0; i < cNumFileFormats; i++) if (ext == get_extension(static_cast(i))) return static_cast(i); return cFormatInvalid; } bool texture_file_types::supports_mipmaps(format fmt) { switch (fmt) { case cFormatCRN: case cFormatDDS: case cFormatKTX: return true; default: break; } return false; } bool texture_file_types::supports_alpha(format fmt) { switch (fmt) { case cFormatJPG: case cFormatJPEG: case cFormatGIF: case cFormatJP2: return false; default: break; } return true; } const char* get_texture_type_desc(texture_type t) { switch (t) { case cTextureTypeUnknown: return "Unknown"; case cTextureTypeRegularMap: return "2D map"; case cTextureTypeNormalMap: return "Normal map"; case cTextureTypeVerticalCrossCubemap: return "Vertical Cross Cubemap"; case cTextureTypeCubemap: return "Cubemap"; default: break; } CRNLIB_ASSERT(false); return "?"; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_texture_file_types.h000066400000000000000000000024701503722002600242310ustar00rootroot00000000000000// File: crn_texture_file_types.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "../inc/crnlib.h" #include "crn_vec.h" #include "crn_pixel_format.h" namespace crnlib { struct texture_file_types { enum format { cFormatInvalid = -1, cFormatDDS, cFormatCRN, cFormatKTX, cNumMipmappedFileFormats, cFormatTGA = cNumMipmappedFileFormats, cFormatPNG, cFormatJPG, cFormatJPEG, cFormatBMP, cFormatGIF, cFormatTIF, cFormatTIFF, cFormatPPM, cFormatPGM, cFormatPSD, cFormatJP2, cNumRegularFileFormats, cNumImageFileFormats = cNumRegularFileFormats - cNumMipmappedFileFormats, // Not really a file format cFormatClipboard = cNumRegularFileFormats, cFormatDragDrop, cNumFileFormats }; static const char* get_extension(format fmt); static format determine_file_format(const char* pFilename); static bool supports_mipmaps(format fmt); static bool supports_alpha(format fmt); }; enum texture_type { cTextureTypeUnknown = 0, cTextureTypeRegularMap, cTextureTypeNormalMap, cTextureTypeVerticalCrossCubemap, cTextureTypeCubemap, cNumTextureTypes }; const char* get_texture_type_desc(texture_type t); } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_threaded_clusterizer.h000066400000000000000000000236501503722002600245240ustar00rootroot00000000000000// File: crn_threaded_clusterizer.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_clusterizer.h" #include "crn_threading.h" namespace crnlib { template class threaded_clusterizer { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(threaded_clusterizer); public: threaded_clusterizer(task_pool& tp) : m_pTask_pool(&tp), m_pProgress_callback(NULL), m_pProgress_callback_data(NULL), m_canceled(false) { } void clear() { for (uint i = 0; i < cMaxClusterizers; i++) m_clusterizers[i].clear(); } struct weighted_vec { weighted_vec() {} weighted_vec(const VectorType& v, uint w) : m_vec(v), m_weight(w) {} VectorType m_vec; uint m_weight; }; typedef crnlib::vector weighted_vec_array; typedef bool (*progress_callback_func)(uint percentage_completed, void* pProgress_data); bool create_clusters( const weighted_vec_array& weighted_vecs, uint max_clusters, crnlib::vector >& cluster_indices, progress_callback_func pProgress_callback, void* pProgress_callback_data) { m_main_thread_id = crn_get_current_thread_id(); m_canceled = false; m_pProgress_callback = pProgress_callback; m_pProgress_callback_data = pProgress_callback_data; if (max_clusters >= 128) { crnlib::vector primary_indices(weighted_vecs.size()); for (uint i = 0; i < weighted_vecs.size(); i++) primary_indices[i] = i; CRNLIB_ASSUME(cMaxClusterizers == 4); crnlib::vector indices[6]; compute_split(weighted_vecs, primary_indices, indices[0], indices[1]); compute_split(weighted_vecs, indices[0], indices[2], indices[3]); compute_split(weighted_vecs, indices[1], indices[4], indices[5]); create_clusters_task_state task_state[4]; m_cluster_task_displayed_progress = false; uint total_partitions = 0; for (uint i = 0; i < 4; i++) { const uint num_indices = indices[2 + i].size(); if (num_indices) total_partitions++; } for (uint i = 0; i < 4; i++) { const uint num_indices = indices[2 + i].size(); if (!num_indices) continue; task_state[i].m_pWeighted_vecs = &weighted_vecs; task_state[i].m_pIndices = &indices[2 + i]; task_state[i].m_max_clusters = (max_clusters + (total_partitions / 2)) / total_partitions; m_pTask_pool->queue_object_task(this, &threaded_clusterizer::create_clusters_task, i, &task_state[i]); } m_pTask_pool->join(); if (m_canceled) return false; uint total_clusters = 0; for (uint i = 0; i < 4; i++) total_clusters += task_state[i].m_cluster_indices.size(); cluster_indices.reserve(total_clusters); cluster_indices.resize(0); for (uint i = 0; i < 4; i++) { const uint ofs = cluster_indices.size(); cluster_indices.resize(ofs + task_state[i].m_cluster_indices.size()); for (uint j = 0; j < task_state[i].m_cluster_indices.size(); j++) { cluster_indices[ofs + j].swap(task_state[i].m_cluster_indices[j]); } } } else { m_clusterizers[0].clear(); m_clusterizers[0].get_training_vecs().reserve(weighted_vecs.size()); for (uint i = 0; i < weighted_vecs.size(); i++) { const weighted_vec& v = weighted_vecs[i]; m_clusterizers[0].add_training_vec(v.m_vec, v.m_weight); } m_clusterizers[0].generate_codebook(max_clusters, generate_codebook_progress_callback, this, false); //m_params.m_dxt_quality <= cCRNDXTQualityFast); const uint num_clusters = m_clusterizers[0].get_codebook_size(); m_clusterizers[0].retrieve_clusters(num_clusters, cluster_indices); } return !m_canceled; } private: task_pool* m_pTask_pool; crn_thread_id_t m_main_thread_id; struct create_clusters_task_state { create_clusters_task_state() : m_pWeighted_vecs(NULL), m_pIndices(NULL), m_max_clusters(0) { } const weighted_vec_array* m_pWeighted_vecs; crnlib::vector* m_pIndices; crnlib::vector > m_cluster_indices; uint m_max_clusters; }; typedef clusterizer vector_clusterizer; enum { cMaxClusterizers = 4 }; vector_clusterizer m_clusterizers[cMaxClusterizers]; bool m_cluster_task_displayed_progress; progress_callback_func m_pProgress_callback; void* m_pProgress_callback_data; bool m_canceled; static bool generate_codebook_progress_callback(uint percentage_completed, void* pData) { threaded_clusterizer* pClusterizer = static_cast(pData); if (!pClusterizer->m_pProgress_callback) return true; if (!pClusterizer->m_pProgress_callback(percentage_completed, pClusterizer->m_pProgress_callback_data)) { pClusterizer->m_canceled = true; return false; } return true; } void compute_pca(VectorType& axis_res, VectorType& centroid_res, const weighted_vec_array& vecs, const vector& indices) { const uint N = VectorType::num_elements; VectorType centroid(0.0f); double total_weight = 0.0f; for (uint i = 0; i < indices.size(); i++) { const weighted_vec& v = vecs[indices[i]]; centroid += v.m_vec * static_cast(v.m_weight); total_weight += v.m_weight; } if (total_weight == 0.0f) { axis_res.clear(); centroid_res = centroid; return; } double one_over_total_weight = 1.0f / total_weight; for (uint i = 0; i < N; i++) centroid[i] = static_cast(centroid[i] * one_over_total_weight); matrix covar; covar.clear(); for (uint i = 0; i < indices.size(); i++) { const weighted_vec& weighted_vec = vecs[indices[i]]; const VectorType v(weighted_vec.m_vec - centroid); const VectorType w(v * static_cast(weighted_vec.m_weight)); for (uint x = 0; x < N; x++) for (uint y = x; y < N; y++) covar[x][y] = covar[x][y] + v[x] * w[y]; } for (uint x = 0; x < N; x++) for (uint y = x; y < N; y++) covar[x][y] = static_cast(covar[x][y] * one_over_total_weight); for (uint x = 0; x < (N - 1); x++) for (uint y = x + 1; y < N; y++) covar[y][x] = covar[x][y]; VectorType axis; for (uint i = 0; i < N; i++) axis[i] = math::lerp(.75f, 1.25f, i * (1.0f / (N - 1))); VectorType prev_axis(axis); const uint cMaxIterations = 10; for (uint iter = 0; iter < cMaxIterations; iter++) { VectorType x; double max_sum = 0; for (uint i = 0; i < N; i++) { double sum = 0; for (uint j = 0; j < N; j++) sum += axis[j] * covar[i][j]; x[i] = static_cast(sum); max_sum = math::maximum(max_sum, fabs(sum)); } if (max_sum != 0.0f) x *= static_cast(1.0f / max_sum); VectorType delta_axis(prev_axis - x); prev_axis = axis; axis = x; if (delta_axis.norm() < .0025f) break; } axis.normalize(); axis_res = axis; centroid_res = centroid; } void compute_division( const VectorType& axis, const VectorType& centroid, const weighted_vec_array& vecs, const vector& indices, vector& left_indices, vector& right_indices) { left_indices.resize(0); right_indices.resize(0); for (uint i = 0; i < indices.size(); i++) { const uint vec_index = indices[i]; const VectorType v(vecs[vec_index].m_vec - centroid); float t = v * axis; if (t < 0.0f) left_indices.push_back(vec_index); else right_indices.push_back(vec_index); } } void compute_split( const weighted_vec_array& vecs, const vector& indices, vector& left_indices, vector& right_indices) { VectorType axis, centroid; compute_pca(axis, centroid, vecs, indices); compute_division(axis, centroid, vecs, indices, left_indices, right_indices); } static bool generate_codebook_dummy_progress_callback(uint, void* pData) { if (static_cast(pData)->m_canceled) return false; return true; } void create_clusters_task(uint64 data, void* pData_ptr) { if (m_canceled) return; const uint partition_index = static_cast(data); create_clusters_task_state& state = *static_cast(pData_ptr); m_clusterizers[partition_index].clear(); for (uint i = 0; i < state.m_pIndices->size(); i++) { const uint index = (*state.m_pIndices)[i]; const weighted_vec& v = (*state.m_pWeighted_vecs)[index]; m_clusterizers[partition_index].add_training_vec(v.m_vec, v.m_weight); } if (m_canceled) return; const bool is_main_thread = (crn_get_current_thread_id() == m_main_thread_id); const bool quick = false; m_clusterizers[partition_index].generate_codebook( state.m_max_clusters, (is_main_thread && !m_cluster_task_displayed_progress) ? generate_codebook_progress_callback : generate_codebook_dummy_progress_callback, this, quick); if (is_main_thread) m_cluster_task_displayed_progress = true; if (m_canceled) return; const uint num_clusters = m_clusterizers[partition_index].get_codebook_size(); m_clusterizers[partition_index].retrieve_clusters(num_clusters, state.m_cluster_indices); for (uint i = 0; i < state.m_cluster_indices.size(); i++) { crnlib::vector& indices = state.m_cluster_indices[i]; for (uint j = 0; j < indices.size(); j++) indices[j] = (*state.m_pIndices)[indices[j]]; } } }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_threaded_resampler.cpp000066400000000000000000000201561503722002600244740ustar00rootroot00000000000000// File: crn_threaded_resampler.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_threaded_resampler.h" #include "crn_resample_filters.h" #include "crn_threading.h" namespace crnlib { threaded_resampler::threaded_resampler(task_pool& tp) : m_pTask_pool(&tp), m_pParams(NULL), m_pX_contribs(NULL), m_pY_contribs(NULL), m_bytes_per_pixel(0) { } threaded_resampler::~threaded_resampler() { free_contrib_lists(); } void threaded_resampler::free_contrib_lists() { if (m_pX_contribs) { crnlib_free(m_pX_contribs->p); m_pX_contribs->p = NULL; crnlib_free(m_pX_contribs); m_pX_contribs = NULL; } if (m_pY_contribs) { crnlib_free(m_pY_contribs->p); m_pY_contribs->p = NULL; crnlib_free(m_pY_contribs); m_pY_contribs = NULL; } } void threaded_resampler::resample_x_task(uint64 data, void*) { const uint thread_index = (uint)data; for (uint src_y = 0; src_y < m_pParams->m_src_height; src_y++) { if (m_pTask_pool->get_num_threads()) { if ((src_y % (m_pTask_pool->get_num_threads() + 1)) != thread_index) continue; } const Resampler::Contrib_List* pContribs = m_pX_contribs; const Resampler::Contrib_List* pContribs_end = m_pX_contribs + m_pParams->m_dst_width; switch (m_pParams->m_fmt) { case cPF_Y_F32: { const float* pSrc = reinterpret_cast(static_cast(m_pParams->m_pSrc_pixels) + m_pParams->m_src_pitch * src_y); vec4F* pDst = m_tmp_img.get_ptr() + m_pParams->m_dst_width * src_y; do { const Resampler::Contrib* p = pContribs->p; const Resampler::Contrib* p_end = pContribs->p + pContribs->n; vec4F s(0.0f); while (p != p_end) { const uint src_pixel = p->pixel; const float src_weight = p->weight; s[0] += pSrc[src_pixel] * src_weight; p++; } *pDst++ = s; pContribs++; } while (pContribs != pContribs_end); break; } case cPF_RGBX_F32: { const vec4F* pSrc = reinterpret_cast(static_cast(m_pParams->m_pSrc_pixels) + m_pParams->m_src_pitch * src_y); vec4F* pDst = m_tmp_img.get_ptr() + m_pParams->m_dst_width * src_y; do { const Resampler::Contrib* p = pContribs->p; const Resampler::Contrib* p_end = pContribs->p + pContribs->n; vec4F s(0.0f); while (p != p_end) { const float src_weight = p->weight; const vec4F& src_pixel = pSrc[p->pixel]; s[0] += src_pixel[0] * src_weight; s[1] += src_pixel[1] * src_weight; s[2] += src_pixel[2] * src_weight; p++; } *pDst++ = s; pContribs++; } while (pContribs != pContribs_end); break; } case cPF_RGBA_F32: { const vec4F* pSrc = reinterpret_cast(static_cast(m_pParams->m_pSrc_pixels) + m_pParams->m_src_pitch * src_y); vec4F* pDst = m_tmp_img.get_ptr() + m_pParams->m_dst_width * src_y; do { Resampler::Contrib* p = pContribs->p; Resampler::Contrib* p_end = pContribs->p + pContribs->n; vec4F s(0.0f); while (p != p_end) { const float src_weight = p->weight; const vec4F& src_pixel = pSrc[p->pixel]; s[0] += src_pixel[0] * src_weight; s[1] += src_pixel[1] * src_weight; s[2] += src_pixel[2] * src_weight; s[3] += src_pixel[3] * src_weight; p++; } *pDst++ = s; pContribs++; } while (pContribs != pContribs_end); break; } default: break; } } } void threaded_resampler::resample_y_task(uint64 data, void*) { const uint thread_index = (uint)data; crnlib::vector tmp(m_pParams->m_dst_width); for (uint dst_y = 0; dst_y < m_pParams->m_dst_height; dst_y++) { if (m_pTask_pool->get_num_threads()) { if ((dst_y % (m_pTask_pool->get_num_threads() + 1)) != thread_index) continue; } const Resampler::Contrib_List& contribs = m_pY_contribs[dst_y]; const vec4F* pSrc; if (contribs.n == 1) { pSrc = m_tmp_img.get_ptr() + m_pParams->m_dst_width * contribs.p[0].pixel; } else { for (uint src_y_iter = 0; src_y_iter < contribs.n; src_y_iter++) { const vec4F* p = m_tmp_img.get_ptr() + m_pParams->m_dst_width * contribs.p[src_y_iter].pixel; const float weight = contribs.p[src_y_iter].weight; if (!src_y_iter) { for (uint i = 0; i < m_pParams->m_dst_width; i++) tmp[i] = p[i] * weight; } else { for (uint i = 0; i < m_pParams->m_dst_width; i++) tmp[i] += p[i] * weight; } } pSrc = tmp.get_ptr(); } const vec4F* pSrc_end = pSrc + m_pParams->m_dst_width; const float l = m_pParams->m_sample_low; const float h = m_pParams->m_sample_high; switch (m_pParams->m_fmt) { case cPF_Y_F32: { float* pDst = reinterpret_cast(static_cast(m_pParams->m_pDst_pixels) + m_pParams->m_dst_pitch * dst_y); do { *pDst++ = math::clamp((*pSrc)[0], l, h); pSrc++; } while (pSrc != pSrc_end); break; } case cPF_RGBX_F32: { vec4F* pDst = reinterpret_cast(static_cast(m_pParams->m_pDst_pixels) + m_pParams->m_dst_pitch * dst_y); do { (*pDst)[0] = math::clamp((*pSrc)[0], l, h); (*pDst)[1] = math::clamp((*pSrc)[1], l, h); (*pDst)[2] = math::clamp((*pSrc)[2], l, h); (*pDst)[3] = h; pSrc++; pDst++; } while (pSrc != pSrc_end); break; } case cPF_RGBA_F32: { vec4F* pDst = reinterpret_cast(static_cast(m_pParams->m_pDst_pixels) + m_pParams->m_dst_pitch * dst_y); do { (*pDst)[0] = math::clamp((*pSrc)[0], l, h); (*pDst)[1] = math::clamp((*pSrc)[1], l, h); (*pDst)[2] = math::clamp((*pSrc)[2], l, h); (*pDst)[3] = math::clamp((*pSrc)[3], l, h); pSrc++; pDst++; } while (pSrc != pSrc_end); break; } default: break; } } } bool threaded_resampler::resample(const params& p) { free_contrib_lists(); m_pParams = &p; CRNLIB_ASSERT(m_pParams->m_src_width && m_pParams->m_src_height); CRNLIB_ASSERT(m_pParams->m_dst_width && m_pParams->m_dst_height); switch (p.m_fmt) { case cPF_Y_F32: m_bytes_per_pixel = 4; break; case cPF_RGBX_F32: case cPF_RGBA_F32: m_bytes_per_pixel = 16; break; default: CRNLIB_ASSERT(false); return false; } int filter_index = find_resample_filter(p.m_Pfilter_name); if (filter_index < 0) return false; const resample_filter& filter = g_resample_filters[filter_index]; m_pX_contribs = Resampler::make_clist(m_pParams->m_src_width, m_pParams->m_dst_width, m_pParams->m_boundary_op, filter.func, filter.support, p.m_filter_x_scale, 0.0f); if (!m_pX_contribs) return false; m_pY_contribs = Resampler::make_clist(m_pParams->m_src_height, m_pParams->m_dst_height, m_pParams->m_boundary_op, filter.func, filter.support, p.m_filter_y_scale, 0.0f); if (!m_pY_contribs) return false; if (!m_tmp_img.try_resize(m_pParams->m_dst_width * m_pParams->m_src_height)) return false; for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) m_pTask_pool->queue_object_task(this, &threaded_resampler::resample_x_task, i, NULL); m_pTask_pool->join(); for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) m_pTask_pool->queue_object_task(this, &threaded_resampler::resample_y_task, i, NULL); m_pTask_pool->join(); m_tmp_img.clear(); free_contrib_lists(); return true; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_threaded_resampler.h000066400000000000000000000032211503722002600241330ustar00rootroot00000000000000// File: crn_threaded_resampler.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_resampler.h" #include "crn_vec.h" namespace crnlib { class task_pool; class threaded_resampler { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(threaded_resampler); public: threaded_resampler(task_pool& tp); ~threaded_resampler(); enum pixel_format { cPF_Y_F32, cPF_RGBX_F32, cPF_RGBA_F32, cPF_Total }; struct params { params() { clear(); } void clear() { utils::zero_object(*this); m_boundary_op = Resampler::BOUNDARY_CLAMP; m_sample_low = 0.0f; m_sample_high = 255.0f; m_Pfilter_name = CRNLIB_RESAMPLER_DEFAULT_FILTER; m_filter_x_scale = 1.0f; m_filter_y_scale = 1.0f; } pixel_format m_fmt; const void* m_pSrc_pixels; uint m_src_width; uint m_src_height; uint m_src_pitch; void* m_pDst_pixels; uint m_dst_width; uint m_dst_height; uint m_dst_pitch; Resampler::Boundary_Op m_boundary_op; float m_sample_low; float m_sample_high; const char* m_Pfilter_name; float m_filter_x_scale; float m_filter_y_scale; }; bool resample(const params& p); private: task_pool* m_pTask_pool; const params* m_pParams; Resampler::Contrib_List* m_pX_contribs; Resampler::Contrib_List* m_pY_contribs; uint m_bytes_per_pixel; crnlib::vector m_tmp_img; void free_contrib_lists(); void resample_x_task(uint64 data, void* pData_ptr); void resample_y_task(uint64 data, void* pData_ptr); }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_threading.h000066400000000000000000000004141503722002600222470ustar00rootroot00000000000000// File: crn_threading.h // See Copyright Notice and license at the end of inc/crnlib.h #if CRNLIB_USE_WIN32_API #include "crn_threading_win32.h" #elif CRNLIB_USE_PTHREADS_API #include "crn_threading_pthreads.h" #else #include "crn_threading_null.h" #endif DaemonEngine-crunch-ef4d32f/crnlib/crn_threading_null.h000066400000000000000000000074201503722002600233050ustar00rootroot00000000000000// File: crn_threading_null.h // See Copyright Notice and license at the end of include/crnlib.h #pragma once #include "crn_atomics.h" namespace crnlib { const uint g_number_of_processors = 1; inline void crn_threading_init() { } typedef uint64 crn_thread_id_t; inline crn_thread_id_t crn_get_current_thread_id() { return 0; } inline void crn_sleep(unsigned int milliseconds) { milliseconds; } inline uint crn_get_max_helper_threads() { return 0; } class mutex { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(mutex); public: inline mutex(unsigned int spin_count = 0) { spin_count; } inline ~mutex() { } inline void lock() { } inline void unlock() { } inline void set_spin_count(unsigned int count) { count; } }; class scoped_mutex { scoped_mutex(const scoped_mutex&); scoped_mutex& operator=(const scoped_mutex&); public: inline scoped_mutex(mutex& lock) : m_lock(lock) { m_lock.lock(); } inline ~scoped_mutex() { m_lock.unlock(); } private: mutex& m_lock; }; // Simple non-recursive spinlock. class spinlock { public: inline spinlock() { } inline void lock(uint32 max_spins = 4096, bool yielding = true, bool memoryBarrier = true) { max_spins, yielding, memoryBarrier; } inline void lock_no_barrier(uint32 max_spins = 4096, bool yielding = true) { max_spins, yielding; } inline void unlock() { } inline void unlock_no_barrier() { } }; class scoped_spinlock { scoped_spinlock(const scoped_spinlock&); scoped_spinlock& operator=(const scoped_spinlock&); public: inline scoped_spinlock(spinlock& lock) : m_lock(lock) { m_lock.lock(); } inline ~scoped_spinlock() { m_lock.unlock(); } private: spinlock& m_lock; }; class semaphore { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(semaphore); public: inline semaphore(long initialCount = 0, long maximumCount = 1, const char* pName = NULL) { initialCount, maximumCount, pName; } inline ~semaphore() { } inline void release(long releaseCount = 1, long* pPreviousCount = NULL) { releaseCount, pPreviousCount; } inline bool wait(uint32 milliseconds = cUINT32_MAX) { milliseconds; return true; } }; class task_pool { public: inline task_pool() {} inline task_pool(uint num_threads) { num_threads; } inline ~task_pool() {} inline bool init(uint num_threads) { num_threads; return true; } inline void deinit() {} inline uint get_num_threads() const { return 0; } inline uint get_num_outstanding_tasks() const { return 0; } // C-style task callback typedef void (*task_callback_func)(uint64 data, void* pData_ptr); inline bool queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = NULL) { pFunc(data, pData_ptr); return true; } class executable_task { public: virtual void execute_task(uint64 data, void* pData_ptr) = 0; }; // It's the caller's responsibility to delete pObj within the execute_task() method, if needed! inline bool queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = NULL) { pObj->execute_task(data, pData_ptr); return true; } template inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = NULL) { (pObject->*pObject_method)(data, pData_ptr); return true; } template inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = NULL) { for (uint i = 0; i < num_tasks; i++) { (pObject->*pObject_method)(first_data + i, pData_ptr); } return true; } inline void join() {} }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_threading_pthreads.cpp000066400000000000000000000232571503722002600245060ustar00rootroot00000000000000// File: crn_threading_pthreads.cpp // See Copyright Notice and license at the end of include/crnlib.h #include "crn_core.h" #include "crn_threading_pthreads.h" #include "crn_timer.h" #if CRNLIB_USE_PTHREADS_API #ifdef WIN32 #pragma comment(lib, "../ext/libpthread/lib/pthreadVC2.lib") #include "crn_winhdr.h" #endif #if defined(__FreeBSD__) || defined(__APPLE__) #include #elif defined(__GNUC__) #include #endif #ifdef WIN32 #include #endif namespace crnlib { uint g_number_of_processors = 1; void crn_threading_init() { #ifdef WIN32 SYSTEM_INFO g_system_info; GetSystemInfo(&g_system_info); g_number_of_processors = math::maximum(1U, g_system_info.dwNumberOfProcessors); #elif defined(__FreeBSD__) || defined(__APPLE__) g_number_of_processors = math::maximum(1, sysconf(_SC_NPROCESSORS_ONLN)); #elif defined(__GNUC__) g_number_of_processors = math::maximum(1, get_nprocs()); #endif g_number_of_processors = math::minimum(g_number_of_processors, task_pool::cMaxThreads); } crn_thread_id_t crn_get_current_thread_id() { // FIXME: Not portable return (crn_thread_id_t)(pthread_self()); } void crn_sleep(unsigned int milliseconds) { #ifdef WIN32 struct timespec interval; interval.tv_sec = milliseconds / 1000; interval.tv_nsec = (milliseconds % 1000) * 1000000L; pthread_delay_np(&interval); #else while (milliseconds) { int msecs_to_sleep = CRNLIB_MIN(milliseconds, 1000); usleep(msecs_to_sleep * 1000); milliseconds -= msecs_to_sleep; } #endif } mutex::mutex(unsigned int /* spin_count */) { if (pthread_mutex_init(&m_mutex, NULL)) crnlib_fail("mutex::mutex: pthread_mutex_init() failed", __FILE__, __LINE__); #ifdef CRNLIB_BUILD_DEBUG m_lock_count = 0; #endif } mutex::~mutex() { #ifdef CRNLIB_BUILD_DEBUG if (m_lock_count) crnlib_assert("mutex::~mutex: mutex is still locked", __FILE__, __LINE__); #endif if (pthread_mutex_destroy(&m_mutex)) crnlib_assert("mutex::~mutex: pthread_mutex_destroy() failed", __FILE__, __LINE__); } void mutex::lock() { pthread_mutex_lock(&m_mutex); #ifdef CRNLIB_BUILD_DEBUG m_lock_count++; #endif } void mutex::unlock() { #ifdef CRNLIB_BUILD_DEBUG if (!m_lock_count) crnlib_assert("mutex::unlock: mutex is not locked", __FILE__, __LINE__); m_lock_count--; #endif pthread_mutex_unlock(&m_mutex); } void mutex::set_spin_count(unsigned int /* count */) { } semaphore::semaphore(long initialCount, long maximumCount, const char* pName) { CRNLIB_ASSERT(maximumCount >= initialCount); #if !defined(__APPLE__) (void)maximumCount; (void)pName; m_sem = new sem_t(); if (sem_init(m_sem, 0, initialCount)) { CRNLIB_FAIL("semaphore: sem_init() failed"); } #else m_name = pName ? pName : "semaphore"; for(int i = 0; i < 256; i++) { m_sem = sem_open(m_name, O_CREAT | O_EXCL, 0644, initialCount); if (m_sem != SEM_FAILED) { break; } sem_unlink(m_name); } if (m_sem == SEM_FAILED) { CRNLIB_FAIL("semaphore: sem_open() failed"); } #endif } semaphore::~semaphore() { #if !defined(__APPLE__) sem_destroy(m_sem); #else sem_unlink(m_name); #endif } void semaphore::release(long releaseCount) { CRNLIB_ASSERT(releaseCount >= 1); int status = 0; #ifdef WIN32 if (1 == releaseCount) status = sem_post(m_sem); else status = sem_post_multiple(m_sem, releaseCount); #else while (releaseCount > 0) { status = sem_post(m_sem); if (status) break; releaseCount--; } #endif if (status) { CRNLIB_FAIL("semaphore: sem_post() or sem_post_multiple() failed"); } } void semaphore::try_release(long releaseCount) { CRNLIB_ASSERT(releaseCount >= 1); #ifdef WIN32 if (1 == releaseCount) sem_post(m_sem); else sem_post_multiple(m_sem, releaseCount); #else while (releaseCount > 0) { sem_post(m_sem); releaseCount--; } #endif } bool semaphore::wait(uint32 milliseconds) { int status; if (milliseconds == cUINT32_MAX) { status = sem_wait(m_sem); } else { #if !defined(__APPLE__) struct timespec interval; interval.tv_sec = milliseconds / 1000; interval.tv_nsec = (milliseconds % 1000) * 1000000L; status = sem_timedwait(m_sem, &interval); #else status = sem_wait(m_sem); #endif } if (status) { if (errno != ETIMEDOUT) { CRNLIB_FAIL("semaphore: sem_wait() or sem_timedwait() failed"); } return false; } return true; } spinlock::spinlock() { #if !defined(__APPLE__) if (pthread_spin_init(&m_spinlock, 0)) { CRNLIB_FAIL("spinlock: pthread_spin_init() failed"); } #else m_lock = new os_unfair_lock(); *m_lock = OS_UNFAIR_LOCK_INIT; #endif } spinlock::~spinlock() { #if !defined(__APPLE__) pthread_spin_destroy(&m_spinlock); #else delete m_lock; #endif } void spinlock::lock() { #if !defined(__APPLE__) if (pthread_spin_lock(&m_spinlock)) { CRNLIB_FAIL("spinlock: pthread_spin_lock() failed"); } #else os_unfair_lock_lock(m_lock); #endif } void spinlock::unlock() { #if !defined(__APPLE__) if (pthread_spin_unlock(&m_spinlock)) { CRNLIB_FAIL("spinlock: pthread_spin_unlock() failed"); } #else os_unfair_lock_unlock(m_lock); #endif } task_pool::task_pool() : m_num_threads(0), m_tasks_available(0, 32767), m_all_tasks_completed(0, 1), m_total_submitted_tasks(0), m_total_completed_tasks(0), m_exit_flag(false) { utils::zero_object(m_threads); } task_pool::task_pool(uint num_threads) : m_num_threads(0), m_tasks_available(0, 32767), m_all_tasks_completed(0, 1), m_total_submitted_tasks(0), m_total_completed_tasks(0), m_exit_flag(false) { utils::zero_object(m_threads); bool status = init(num_threads); CRNLIB_VERIFY(status); } task_pool::~task_pool() { deinit(); } bool task_pool::init(uint num_threads) { CRNLIB_ASSERT(num_threads <= cMaxThreads); num_threads = math::minimum(num_threads, cMaxThreads); deinit(); bool succeeded = true; m_num_threads = 0; while (m_num_threads < num_threads) { int status = pthread_create(&m_threads[m_num_threads], NULL, thread_func, this); if (status) { succeeded = false; break; } m_num_threads++; } if (!succeeded) { deinit(); return false; } return true; } void task_pool::deinit() { if (m_num_threads) { join(); atomic_exchange32(&m_exit_flag, true); m_tasks_available.release(m_num_threads); for (uint i = 0; i < m_num_threads; i++) pthread_join(m_threads[i], NULL); m_num_threads = 0; atomic_exchange32(&m_exit_flag, false); } m_task_stack.clear(); m_total_submitted_tasks = 0; m_total_completed_tasks = 0; } bool task_pool::queue_task(task_callback_func pFunc, uint64 data, void* pData_ptr) { CRNLIB_ASSERT(pFunc); task tsk; tsk.m_callback = pFunc; tsk.m_data = data; tsk.m_pData_ptr = pData_ptr; tsk.m_flags = 0; atomic_increment32(&m_total_submitted_tasks); if (!m_task_stack.try_push(tsk)) { atomic_increment32(&m_total_completed_tasks); return false; } m_tasks_available.release(1); return true; } // It's the object's responsibility to delete pObj within the execute_task() method, if needed! bool task_pool::queue_task(executable_task* pObj, uint64 data, void* pData_ptr) { CRNLIB_ASSERT(pObj); task tsk; tsk.m_pObj = pObj; tsk.m_data = data; tsk.m_pData_ptr = pData_ptr; tsk.m_flags = cTaskFlagObject; atomic_increment32(&m_total_submitted_tasks); if (!m_task_stack.try_push(tsk)) { atomic_increment32(&m_total_completed_tasks); return false; } m_tasks_available.release(1); return true; } void task_pool::process_task(task& tsk) { if (tsk.m_flags & cTaskFlagObject) tsk.m_pObj->execute_task(tsk.m_data, tsk.m_pData_ptr); else tsk.m_callback(tsk.m_data, tsk.m_pData_ptr); if (atomic_increment32(&m_total_completed_tasks) == m_total_submitted_tasks) { // Try to signal the semaphore (the max count is 1 so this may actually fail). m_all_tasks_completed.try_release(); } } void task_pool::join() { // Try to steal any outstanding tasks. This could cause one or more worker threads to wake up and immediately go back to sleep, which is wasteful but should be harmless. task tsk; while (m_task_stack.pop(tsk)) process_task(tsk); // At this point the task stack is empty. // Now wait for all concurrent tasks to complete. The m_all_tasks_completed semaphore has a max count of 1, so it's possible it could have saturated to 1 as the tasks // where issued and asynchronously completed, so this loop may iterate a few times. const int total_submitted_tasks = atomic_add32(&m_total_submitted_tasks, 0); while (m_total_completed_tasks != total_submitted_tasks) { // If the previous (m_total_completed_tasks != total_submitted_tasks) check failed the semaphore MUST be eventually signalled once the last task completes. // So I think this can actually be an INFINITE delay, but it shouldn't really matter if it's 1ms. m_all_tasks_completed.wait(1); } } void* task_pool::thread_func(void* pContext) { task_pool* pPool = static_cast(pContext); task tsk; for (;;) { if (!pPool->m_tasks_available.wait()) break; if (pPool->m_exit_flag) break; if (pPool->m_task_stack.pop(tsk)) { pPool->process_task(tsk); } } return NULL; } } // namespace crnlib #endif // CRNLIB_USE_PTHREADS_API DaemonEngine-crunch-ef4d32f/crnlib/crn_threading_pthreads.h000066400000000000000000000170401503722002600241440ustar00rootroot00000000000000// File: crn_threading_pthreads.h // See Copyright Notice and license at the end of include/crnlib.h #pragma once #if CRNLIB_USE_PTHREADS_API #include "crn_atomics.h" #if CRNLIB_NO_ATOMICS #error No atomic operations defined in crn_platform.h! #endif #include #include #include #if defined(__APPLE__) #include #endif namespace crnlib { // g_number_of_processors defaults to 1. Will be higher on multicore machines. extern uint g_number_of_processors; void crn_threading_init(); typedef uint64 crn_thread_id_t; crn_thread_id_t crn_get_current_thread_id(); void crn_sleep(unsigned int milliseconds); uint crn_get_max_helper_threads(); class mutex { mutex(const mutex&); mutex& operator=(const mutex&); public: mutex(unsigned int spin_count = 0); ~mutex(); void lock(); void unlock(); void set_spin_count(unsigned int count); private: pthread_mutex_t m_mutex; #ifdef CRNLIB_BUILD_DEBUG unsigned int m_lock_count; #endif }; class scoped_mutex { scoped_mutex(const scoped_mutex&); scoped_mutex& operator=(const scoped_mutex&); public: inline scoped_mutex(mutex& m) : m_mutex(m) { m_mutex.lock(); } inline ~scoped_mutex() { m_mutex.unlock(); } private: mutex& m_mutex; }; class semaphore { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(semaphore); public: semaphore(long initialCount = 0, long maximumCount = 1, const char* pName = NULL); ~semaphore(); void release(long releaseCount = 1); void try_release(long releaseCount = 1); bool wait(uint32 milliseconds = cUINT32_MAX); private: sem_t* m_sem; #if defined(__APPLE__) const char* m_name; #endif }; class spinlock { public: spinlock(); ~spinlock(); void lock(); void unlock(); private: #if !defined(__APPLE__) pthread_spinlock_t m_spinlock; #else os_unfair_lock_t m_lock; #endif }; class scoped_spinlock { scoped_spinlock(const scoped_spinlock&); scoped_spinlock& operator=(const scoped_spinlock&); public: inline scoped_spinlock(spinlock& lock) : m_lock(lock) { m_lock.lock(); } inline ~scoped_spinlock() { m_lock.unlock(); } private: spinlock& m_lock; }; template class tsstack { public: inline tsstack() : m_top(0) { } inline ~tsstack() { } inline void clear() { m_spinlock.lock(); m_top = 0; m_spinlock.unlock(); } inline bool try_push(const T& obj) { bool result = false; m_spinlock.lock(); if (m_top < (int)cMaxSize) { m_stack[m_top++] = obj; result = true; } m_spinlock.unlock(); return result; } inline bool pop(T& obj) { bool result = false; m_spinlock.lock(); if (m_top > 0) { obj = m_stack[--m_top]; result = true; } m_spinlock.unlock(); return result; } private: spinlock m_spinlock; T m_stack[cMaxSize]; int m_top; }; class task_pool { public: task_pool(); task_pool(uint num_threads); ~task_pool(); enum { cMaxThreads = 16 }; bool init(uint num_threads); void deinit(); inline uint get_num_threads() const { return m_num_threads; } inline uint32 get_num_outstanding_tasks() const { return m_total_submitted_tasks - m_total_completed_tasks; } // C-style task callback typedef void (*task_callback_func)(uint64 data, void* pData_ptr); bool queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = NULL); class executable_task { public: virtual void execute_task(uint64 data, void* pData_ptr) = 0; virtual ~executable_task( void ) = default; }; // It's the caller's responsibility to delete pObj within the execute_task() method, if needed! bool queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = NULL); template inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = NULL); template inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = NULL); void join(); private: struct task { inline task() : m_data(0), m_pData_ptr(NULL), m_pObj(NULL), m_flags(0) {} uint64 m_data; void* m_pData_ptr; union { task_callback_func m_callback; executable_task* m_pObj; }; uint m_flags; }; tsstack m_task_stack; uint m_num_threads; pthread_t m_threads[cMaxThreads]; // Signalled whenever a task is queued up. semaphore m_tasks_available; // Signalled when all outstanding tasks are completed. semaphore m_all_tasks_completed; enum task_flags { cTaskFlagObject = 1 }; volatile atomic32_t m_total_submitted_tasks; volatile atomic32_t m_total_completed_tasks; volatile atomic32_t m_exit_flag; void process_task(task& tsk); static void* thread_func(void* pContext); }; enum object_task_flags { cObjectTaskFlagDefault = 0, cObjectTaskFlagDeleteAfterExecution = 1 }; template class object_task : public task_pool::executable_task { public: object_task(uint flags = cObjectTaskFlagDefault) : m_pObject(NULL), m_pMethod(NULL), m_flags(flags) { } typedef void (T::*object_method_ptr)(uint64 data, void* pData_ptr); object_task(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault) : m_pObject(pObject), m_pMethod(pMethod), m_flags(flags) { CRNLIB_ASSERT(pObject && pMethod); } virtual ~object_task( void ) = default; void init(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault) { CRNLIB_ASSERT(pObject && pMethod); m_pObject = pObject; m_pMethod = pMethod; m_flags = flags; } T* get_object() const { return m_pObject; } object_method_ptr get_method() const { return m_pMethod; } virtual void execute_task(uint64 data, void* pData_ptr) { (m_pObject->*m_pMethod)(data, pData_ptr); if (m_flags & cObjectTaskFlagDeleteAfterExecution) crnlib_delete(this); } protected: T* m_pObject; object_method_ptr m_pMethod; uint m_flags; }; template inline bool task_pool::queue_object_task(S* pObject, T pObject_method, uint64 data, void* pData_ptr) { object_task* pTask = crnlib_new >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); if (!pTask) return false; return queue_task(pTask, data, pData_ptr); } template inline bool task_pool::queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr) { CRNLIB_ASSERT(pObject); CRNLIB_ASSERT(num_tasks); if (!num_tasks) return true; bool status = true; uint i; for (i = 0; i < num_tasks; i++) { task tsk; tsk.m_pObj = crnlib_new >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); if (!tsk.m_pObj) { status = false; break; } tsk.m_data = first_data + i; tsk.m_pData_ptr = pData_ptr; tsk.m_flags = cTaskFlagObject; atomic_increment32(&m_total_submitted_tasks); if (!m_task_stack.try_push(tsk)) { atomic_increment32(&m_total_completed_tasks); status = false; break; } } if (i) { m_tasks_available.release(i); } return status; } } // namespace crnlib #endif // CRNLIB_USE_PTHREADS_API DaemonEngine-crunch-ef4d32f/crnlib/crn_threading_win32.cpp000066400000000000000000000233311503722002600236270ustar00rootroot00000000000000// File: crn_win32_threading.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_threading_win32.h" #include "crn_winhdr.h" #include namespace crnlib { uint g_number_of_processors = 1; void crn_threading_init() { SYSTEM_INFO g_system_info; GetSystemInfo(&g_system_info); g_number_of_processors = math::maximum(1U, g_system_info.dwNumberOfProcessors); g_number_of_processors = math::minimum(g_number_of_processors, task_pool::cMaxThreads); } crn_thread_id_t crn_get_current_thread_id() { return static_cast(GetCurrentThreadId()); } void crn_sleep(unsigned int milliseconds) { Sleep(milliseconds); } uint crn_get_max_helper_threads() { if (g_number_of_processors > 1) { // use all CPU's return CRNLIB_MIN((int)task_pool::cMaxThreads, (int)g_number_of_processors - 1); } return 0; } mutex::mutex(unsigned int spin_count) { CRNLIB_ASSUME(sizeof(mutex) >= sizeof(CRITICAL_SECTION)); void* p = m_buf; CRITICAL_SECTION& m_cs = *static_cast(p); BOOL status = true; status = InitializeCriticalSectionAndSpinCount(&m_cs, spin_count); if (!status) crnlib_fail("mutex::mutex: InitializeCriticalSectionAndSpinCount failed", __FILE__, __LINE__); #ifdef CRNLIB_BUILD_DEBUG m_lock_count = 0; #endif } mutex::~mutex() { void* p = m_buf; CRITICAL_SECTION& m_cs = *static_cast(p); #ifdef CRNLIB_BUILD_DEBUG if (m_lock_count) crnlib_assert("mutex::~mutex: mutex is still locked", __FILE__, __LINE__); #endif DeleteCriticalSection(&m_cs); } void mutex::lock() { void* p = m_buf; CRITICAL_SECTION& m_cs = *static_cast(p); EnterCriticalSection(&m_cs); #ifdef CRNLIB_BUILD_DEBUG m_lock_count++; #endif } void mutex::unlock() { void* p = m_buf; CRITICAL_SECTION& m_cs = *static_cast(p); #ifdef CRNLIB_BUILD_DEBUG if (!m_lock_count) crnlib_assert("mutex::unlock: mutex is not locked", __FILE__, __LINE__); m_lock_count--; #endif LeaveCriticalSection(&m_cs); } void mutex::set_spin_count(unsigned int count) { void* p = m_buf; CRITICAL_SECTION& m_cs = *static_cast(p); SetCriticalSectionSpinCount(&m_cs, count); } void spinlock::lock(uint32 max_spins, bool yielding) { if (g_number_of_processors <= 1) max_spins = 1; uint32 spinCount = 0; uint32 yieldCount = 0; for (;;) { CRNLIB_ASSUME(sizeof(long) == sizeof(int32)); if (!InterlockedExchange((volatile long*)&m_flag, TRUE)) break; YieldProcessor(); YieldProcessor(); YieldProcessor(); YieldProcessor(); YieldProcessor(); YieldProcessor(); YieldProcessor(); YieldProcessor(); spinCount++; if ((yielding) && (spinCount >= max_spins)) { switch (yieldCount) { case 0: { spinCount = 0; Sleep(0); yieldCount++; break; } case 1: { if (g_number_of_processors <= 1) spinCount = 0; else spinCount = max_spins / 2; Sleep(1); yieldCount++; break; } case 2: { if (g_number_of_processors <= 1) spinCount = 0; else spinCount = max_spins; Sleep(2); break; } } } } CRNLIB_MEMORY_IMPORT_BARRIER } void spinlock::unlock() { CRNLIB_MEMORY_EXPORT_BARRIER InterlockedExchange((volatile long*)&m_flag, FALSE); } semaphore::semaphore(int32 initialCount, int32 maximumCount, const char* pName) { m_handle = CreateSemaphoreA(NULL, initialCount, maximumCount, pName); if (NULL == m_handle) { CRNLIB_FAIL("semaphore: CreateSemaphore() failed"); } } semaphore::~semaphore() { if (m_handle) { CloseHandle(m_handle); m_handle = NULL; } } void semaphore::release(int32 releaseCount, int32* pPreviousCount) { CRNLIB_ASSUME(sizeof(LONG) == sizeof(int32)); if (0 == ReleaseSemaphore(m_handle, releaseCount, (LPLONG)pPreviousCount)) { CRNLIB_FAIL("semaphore: ReleaseSemaphore() failed"); } } bool semaphore::try_release(int32 releaseCount, int32* pPreviousCount) { CRNLIB_ASSUME(sizeof(LONG) == sizeof(int32)); return ReleaseSemaphore(m_handle, releaseCount, (LPLONG)pPreviousCount) != 0; } bool semaphore::wait(uint32 milliseconds) { uint32 result = WaitForSingleObject(m_handle, milliseconds); if (WAIT_FAILED == result) { CRNLIB_FAIL("semaphore: WaitForSingleObject() failed"); } return WAIT_OBJECT_0 == result; } task_pool::task_pool() : m_pTask_stack(crnlib_new()), m_num_threads(0), m_tasks_available(0, 32767), m_all_tasks_completed(0, 1), m_total_submitted_tasks(0), m_total_completed_tasks(0), m_exit_flag(false) { utils::zero_object(m_threads); } task_pool::task_pool(uint num_threads) : m_pTask_stack(crnlib_new()), m_num_threads(0), m_tasks_available(0, 32767), m_all_tasks_completed(0, 1), m_total_submitted_tasks(0), m_total_completed_tasks(0), m_exit_flag(false) { utils::zero_object(m_threads); bool status = init(num_threads); CRNLIB_VERIFY(status); } task_pool::~task_pool() { deinit(); crnlib_delete(m_pTask_stack); } bool task_pool::init(uint num_threads) { CRNLIB_ASSERT(num_threads <= cMaxThreads); num_threads = math::minimum(num_threads, cMaxThreads); deinit(); bool succeeded = true; m_num_threads = 0; while (m_num_threads < num_threads) { m_threads[m_num_threads] = (HANDLE)_beginthreadex(NULL, 32768, thread_func, this, 0, NULL); CRNLIB_ASSERT(m_threads[m_num_threads] != 0); if (!m_threads[m_num_threads]) { succeeded = false; break; } m_num_threads++; } if (!succeeded) { deinit(); return false; } return true; } void task_pool::deinit() { if (m_num_threads) { join(); // Set exit flag, then release all threads. Each should wakeup and exit. atomic_exchange32(&m_exit_flag, true); m_tasks_available.release(m_num_threads); // Now wait for each thread to exit. for (uint i = 0; i < m_num_threads; i++) { if (m_threads[i]) { for (;;) { // Can be an INFINITE delay, but set at 30 seconds so this function always provably exits. DWORD result = WaitForSingleObject(m_threads[i], 30000); if ((result == WAIT_OBJECT_0) || (result == WAIT_ABANDONED)) break; } CloseHandle(m_threads[i]); m_threads[i] = NULL; } } m_num_threads = 0; atomic_exchange32(&m_exit_flag, false); } if (m_pTask_stack) m_pTask_stack->clear(); m_total_submitted_tasks = 0; m_total_completed_tasks = 0; } bool task_pool::queue_task(task_callback_func pFunc, uint64 data, void* pData_ptr) { CRNLIB_ASSERT(pFunc); task tsk; tsk.m_callback = pFunc; tsk.m_data = data; tsk.m_pData_ptr = pData_ptr; tsk.m_flags = 0; atomic_increment32(&m_total_submitted_tasks); if (!m_pTask_stack->try_push(tsk)) { atomic_increment32(&m_total_completed_tasks); return false; } m_tasks_available.release(1); return true; } // It's the object's responsibility to delete pObj within the execute_task() method, if needed! bool task_pool::queue_task(executable_task* pObj, uint64 data, void* pData_ptr) { CRNLIB_ASSERT(pObj); task tsk; tsk.m_pObj = pObj; tsk.m_data = data; tsk.m_pData_ptr = pData_ptr; tsk.m_flags = cTaskFlagObject; atomic_increment32(&m_total_submitted_tasks); if (!m_pTask_stack->try_push(tsk)) { atomic_increment32(&m_total_completed_tasks); return false; } m_tasks_available.release(1); return true; } void task_pool::process_task(task& tsk) { if (tsk.m_flags & cTaskFlagObject) tsk.m_pObj->execute_task(tsk.m_data, tsk.m_pData_ptr); else tsk.m_callback(tsk.m_data, tsk.m_pData_ptr); if (atomic_increment32(&m_total_completed_tasks) == m_total_submitted_tasks) { // Try to signal the semaphore (the max count is 1 so this may actually fail). m_all_tasks_completed.try_release(); } } void task_pool::join() { // Try to steal any outstanding tasks. This could cause one or more worker threads to wake up and immediately go back to sleep, which is wasteful but should be harmless. task tsk; while (m_pTask_stack->pop(tsk)) process_task(tsk); // At this point the task stack is empty. // Now wait for all concurrent tasks to complete. The m_all_tasks_completed semaphore has a max count of 1, so it's possible it could have saturated to 1 as the tasks // where issued and asynchronously completed, so this loop may iterate a few times. const int total_submitted_tasks = atomic_add32(&m_total_submitted_tasks, 0); while (m_total_completed_tasks != total_submitted_tasks) { // If the previous (m_total_completed_tasks != total_submitted_tasks) check failed the semaphore MUST be eventually signalled once the last task completes. // So I think this can actually be an INFINITE delay, but it shouldn't really matter if it's 1ms. m_all_tasks_completed.wait(1); } } unsigned __stdcall task_pool::thread_func(void* pContext) { task_pool* pPool = static_cast(pContext); for (;;) { if (!pPool->m_tasks_available.wait()) break; if (pPool->m_exit_flag) break; task tsk; if (pPool->m_pTask_stack->pop(tsk)) pPool->process_task(tsk); } _endthreadex(0); return 0; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_threading_win32.h000066400000000000000000000223341503722002600232760ustar00rootroot00000000000000// File: crn_win32_threading.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_atomics.h" #if CRNLIB_NO_ATOMICS #error No atomic operations defined in crn_platform.h! #endif namespace crnlib { // g_number_of_processors defaults to 1. Will be higher on multicore machines. extern uint g_number_of_processors; void crn_threading_init(); typedef uint64 crn_thread_id_t; crn_thread_id_t crn_get_current_thread_id(); void crn_sleep(unsigned int milliseconds); uint crn_get_max_helper_threads(); class mutex { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(mutex); public: mutex(unsigned int spin_count = 0); ~mutex(); void lock(); void unlock(); void set_spin_count(unsigned int count); private: int m_buf[12]; #ifdef CRNLIB_BUILD_DEBUG unsigned int m_lock_count; #endif }; class scoped_mutex { scoped_mutex(const scoped_mutex&); scoped_mutex& operator=(const scoped_mutex&); public: inline scoped_mutex(mutex& m) : m_mutex(m) { m_mutex.lock(); } inline ~scoped_mutex() { m_mutex.unlock(); } private: mutex& m_mutex; }; // Simple non-recursive spinlock. class spinlock { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(spinlock); public: inline spinlock() : m_flag(0) {} void lock(uint32 max_spins = 4096, bool yielding = true); inline void lock_no_barrier(uint32 max_spins = 4096, bool yielding = true) { lock(max_spins, yielding); } void unlock(); inline void unlock_no_barrier() { m_flag = CRNLIB_FALSE; } private: volatile int32 m_flag; }; class scoped_spinlock { scoped_spinlock(const scoped_spinlock&); scoped_spinlock& operator=(const scoped_spinlock&); public: inline scoped_spinlock(spinlock& lock) : m_lock(lock) { m_lock.lock(); } inline ~scoped_spinlock() { m_lock.unlock(); } private: spinlock& m_lock; }; class semaphore { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(semaphore); public: semaphore(int32 initialCount = 0, int32 maximumCount = 1, const char* pName = NULL); ~semaphore(); inline HANDLE get_handle(void) const { return m_handle; } void release(int32 releaseCount = 1, int32* pPreviousCount = NULL); bool try_release(int32 releaseCount = 1, int32* pPreviousCount = NULL); bool wait(uint32 milliseconds = cUINT32_MAX); private: HANDLE m_handle; }; template class tsstack { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(tsstack); public: inline tsstack(bool use_freelist = true) : m_use_freelist(use_freelist) { CRNLIB_VERIFY(((ptr_bits_t)this & (CRNLIB_GET_ALIGNMENT(tsstack) - 1)) == 0); InitializeSListHead(&m_stack_head); InitializeSListHead(&m_freelist_head); } inline ~tsstack() { clear(); } inline void clear() { for (;;) { node* pNode = (node*)InterlockedPopEntrySList(&m_stack_head); if (!pNode) break; CRNLIB_MEMORY_IMPORT_BARRIER helpers::destruct(&pNode->m_obj); crnlib_free(pNode); } flush_freelist(); } inline void flush_freelist() { if (!m_use_freelist) return; for (;;) { node* pNode = (node*)InterlockedPopEntrySList(&m_freelist_head); if (!pNode) break; CRNLIB_MEMORY_IMPORT_BARRIER crnlib_free(pNode); } } inline bool try_push(const T& obj) { node* pNode = alloc_node(); if (!pNode) return false; helpers::construct(&pNode->m_obj, obj); CRNLIB_MEMORY_EXPORT_BARRIER InterlockedPushEntrySList(&m_stack_head, &pNode->m_slist_entry); return true; } inline bool pop(T& obj) { node* pNode = (node*)InterlockedPopEntrySList(&m_stack_head); if (!pNode) return false; CRNLIB_MEMORY_IMPORT_BARRIER obj = pNode->m_obj; helpers::destruct(&pNode->m_obj); free_node(pNode); return true; } private: SLIST_HEADER m_stack_head; SLIST_HEADER m_freelist_head; struct node { SLIST_ENTRY m_slist_entry; T m_obj; }; bool m_use_freelist; inline node* alloc_node() { node* pNode = m_use_freelist ? (node*)InterlockedPopEntrySList(&m_freelist_head) : NULL; if (!pNode) pNode = (node*)crnlib_malloc(sizeof(node)); return pNode; } inline void free_node(node* pNode) { if (m_use_freelist) InterlockedPushEntrySList(&m_freelist_head, &pNode->m_slist_entry); else crnlib_free(pNode); } }; // Simple multithreaded task pool. This class assumes a single global thread will be issuing tasks and joining. class task_pool { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(task_pool); public: task_pool(); task_pool(uint num_threads); ~task_pool(); enum { cMaxThreads = 16 }; bool init(uint num_threads); void deinit(); inline uint get_num_threads() const { return m_num_threads; } inline uint32 get_num_outstanding_tasks() const { return m_total_submitted_tasks - m_total_completed_tasks; } // C-style task callback typedef void (*task_callback_func)(uint64 data, void* pData_ptr); bool queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = NULL); class executable_task { public: virtual void execute_task(uint64 data, void* pData_ptr) = 0; }; // It's the caller's responsibility to delete pObj within the execute_task() method, if needed! bool queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = NULL); template inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = NULL); template inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = NULL); // Waits for all outstanding tasks (if any) to complete. // The calling thread will steal any outstanding tasks from worker threads, if possible. void join(); private: struct task { //inline task() : m_data(0), m_pData_ptr(NULL), m_pObj(NULL), m_flags(0) { } uint64 m_data; void* m_pData_ptr; union { task_callback_func m_callback; executable_task* m_pObj; }; uint m_flags; }; typedef tsstack ts_task_stack_t; ts_task_stack_t* m_pTask_stack; uint m_num_threads; HANDLE m_threads[cMaxThreads]; // Signalled whenever a task is queued up. semaphore m_tasks_available; // Signalled when all outstanding tasks are completed. semaphore m_all_tasks_completed; enum task_flags { cTaskFlagObject = 1 }; volatile atomic32_t m_total_submitted_tasks; volatile atomic32_t m_total_completed_tasks; volatile atomic32_t m_exit_flag; void process_task(task& tsk); static unsigned __stdcall thread_func(void* pContext); }; enum object_task_flags { cObjectTaskFlagDefault = 0, cObjectTaskFlagDeleteAfterExecution = 1 }; template class object_task : public task_pool::executable_task { public: object_task(uint flags = cObjectTaskFlagDefault) : m_pObject(NULL), m_pMethod(NULL), m_flags(flags) { } typedef void (T::*object_method_ptr)(uint64 data, void* pData_ptr); object_task(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault) : m_pObject(pObject), m_pMethod(pMethod), m_flags(flags) { CRNLIB_ASSERT(pObject && pMethod); } void init(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault) { CRNLIB_ASSERT(pObject && pMethod); m_pObject = pObject; m_pMethod = pMethod; m_flags = flags; } T* get_object() const { return m_pObject; } object_method_ptr get_method() const { return m_pMethod; } virtual void execute_task(uint64 data, void* pData_ptr) { (m_pObject->*m_pMethod)(data, pData_ptr); if (m_flags & cObjectTaskFlagDeleteAfterExecution) crnlib_delete(this); } protected: T* m_pObject; object_method_ptr m_pMethod; uint m_flags; }; template inline bool task_pool::queue_object_task(S* pObject, T pObject_method, uint64 data, void* pData_ptr) { object_task* pTask = crnlib_new >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); if (!pTask) return false; return queue_task(pTask, data, pData_ptr); } template inline bool task_pool::queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr) { CRNLIB_ASSERT(pObject); CRNLIB_ASSERT(num_tasks); if (!num_tasks) return true; bool status = true; uint i; for (i = 0; i < num_tasks; i++) { task tsk; tsk.m_pObj = crnlib_new >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); if (!tsk.m_pObj) { status = false; break; } tsk.m_data = first_data + i; tsk.m_pData_ptr = pData_ptr; tsk.m_flags = cTaskFlagObject; atomic_increment32(&m_total_submitted_tasks); if (!m_pTask_stack->try_push(tsk)) { atomic_increment32(&m_total_completed_tasks); status = false; break; } } if (i) { m_tasks_available.release(i); } return status; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_timer.cpp000066400000000000000000000056201503722002600217610ustar00rootroot00000000000000// File: crn_win32_timer.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_timer.h" #include #if defined(__FreeBSD__) #include "sys/time.h" #endif #include "crn_timer.h" #if CRNLIB_USE_WIN32_API #include "crn_winhdr.h" #endif namespace crnlib { unsigned long long timer::g_init_ticks; unsigned long long timer::g_freq; double timer::g_inv_freq; #if defined(CRNLIB_USE_WIN32_API) inline void query_counter(timer_ticks* pTicks) { QueryPerformanceCounter(reinterpret_cast(pTicks)); } inline void query_counter_frequency(timer_ticks* pTicks) { QueryPerformanceFrequency(reinterpret_cast(pTicks)); } #elif defined(__GNUC__) #include inline void query_counter(timer_ticks* pTicks) { struct timeval cur_time; gettimeofday(&cur_time, NULL); *pTicks = static_cast(cur_time.tv_sec) * 1000000ULL + static_cast(cur_time.tv_usec); } inline void query_counter_frequency(timer_ticks* pTicks) { *pTicks = 1000000; } #else #error Unimplemented #endif timer::timer() : m_start_time(0), m_stop_time(0), m_started(false), m_stopped(false) { if (!g_inv_freq) init(); } timer::timer(timer_ticks start_ticks) { if (!g_inv_freq) init(); m_start_time = start_ticks; m_started = true; m_stopped = false; } void timer::start(timer_ticks start_ticks) { m_start_time = start_ticks; m_started = true; m_stopped = false; } void timer::start() { query_counter(&m_start_time); m_started = true; m_stopped = false; } void timer::stop() { CRNLIB_ASSERT(m_started); query_counter(&m_stop_time); m_stopped = true; } double timer::get_elapsed_secs() const { CRNLIB_ASSERT(m_started); if (!m_started) return 0; timer_ticks stop_time = m_stop_time; if (!m_stopped) query_counter(&stop_time); timer_ticks delta = stop_time - m_start_time; return delta * g_inv_freq; } timer_ticks timer::get_elapsed_us() const { CRNLIB_ASSERT(m_started); if (!m_started) return 0; timer_ticks stop_time = m_stop_time; if (!m_stopped) query_counter(&stop_time); timer_ticks delta = stop_time - m_start_time; return (delta * 1000000ULL + (g_freq >> 1U)) / g_freq; } void timer::init() { if (!g_inv_freq) { query_counter_frequency(&g_freq); g_inv_freq = 1.0f / g_freq; query_counter(&g_init_ticks); } } timer_ticks timer::get_init_ticks() { if (!g_inv_freq) init(); return g_init_ticks; } timer_ticks timer::get_ticks() { if (!g_inv_freq) init(); timer_ticks ticks; query_counter(&ticks); return ticks - g_init_ticks; } double timer::ticks_to_secs(timer_ticks ticks) { if (!g_inv_freq) init(); return ticks * g_inv_freq; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_timer.h000066400000000000000000000034001503722002600214200ustar00rootroot00000000000000// File: crn_win32_timer.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once namespace crnlib { typedef unsigned long long timer_ticks; class timer { public: timer(); timer(timer_ticks start_ticks); void start(); void start(timer_ticks start_ticks); void stop(); double get_elapsed_secs() const; inline double get_elapsed_ms() const { return get_elapsed_secs() * 1000.0f; } timer_ticks get_elapsed_us() const; static void init(); static inline timer_ticks get_ticks_per_sec() { return g_freq; } static timer_ticks get_init_ticks(); static timer_ticks get_ticks(); static double ticks_to_secs(timer_ticks ticks); static inline double ticks_to_ms(timer_ticks ticks) { return ticks_to_secs(ticks) * 1000.0f; } static inline double get_secs() { return ticks_to_secs(get_ticks()); } static inline double get_ms() { return ticks_to_ms(get_ticks()); } private: static timer_ticks g_init_ticks; static timer_ticks g_freq; static double g_inv_freq; timer_ticks m_start_time; timer_ticks m_stop_time; bool m_started : 1; bool m_stopped : 1; }; // Prints object's lifetime to stdout class timed_scope { const char* m_pName; timer m_tm; public: inline timed_scope(const char* pName = "timed_scope") : m_pName(pName) { m_tm.start(); } inline double get_elapsed_secs() const { return m_tm.get_elapsed_secs(); } inline double get_elapsed_ms() const { return m_tm.get_elapsed_ms(); } const timer& get_timer() const { return m_tm; } timer& get_timer() { return m_tm; } inline ~timed_scope() { double secs = m_tm.get_elapsed_secs(); printf("%s: %f secs, %f ms\n", m_pName, secs, secs * 1000.0f); } }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_traits.h000066400000000000000000000131471503722002600216170ustar00rootroot00000000000000// File: crn_traits.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once namespace crnlib { template struct int_traits { enum { cMin = crnlib::cINT32_MIN, cMax = crnlib::cINT32_MAX, cSigned = true }; }; template <> struct int_traits { enum { cMin = crnlib::cINT8_MIN, cMax = crnlib::cINT8_MAX, cSigned = true }; }; template <> struct int_traits { enum { cMin = crnlib::cINT16_MIN, cMax = crnlib::cINT16_MAX, cSigned = true }; }; template <> struct int_traits { enum { cMin = crnlib::cINT32_MIN, cMax = crnlib::cINT32_MAX, cSigned = true }; }; template <> struct int_traits { enum { cMin = 0, cMax = crnlib::cUINT8_MAX, cSigned = false }; }; template <> struct int_traits { enum { cMin = 0, cMax = crnlib::cUINT16_MAX, cSigned = false }; }; template <> struct int_traits { enum { cMin = 0, cMax = crnlib::cUINT32_MAX, cSigned = false }; }; template struct scalar_type { enum { cFlag = false }; static inline void construct(T* p) { helpers::construct(p); } static inline void construct(T* p, const T& init) { helpers::construct(p, init); } static inline void construct_array(T* p, uint n) { helpers::construct_array(p, n); } static inline void destruct(T* p) { helpers::destruct(p); } static inline void destruct_array(T* p, uint n) { helpers::destruct_array(p, n); } }; template struct scalar_type { enum { cFlag = true }; static inline void construct(T** p) { memset(p, 0, sizeof(T*)); } static inline void construct(T** p, T* init) { *p = init; } static inline void construct_array(T** p, uint n) { memset(p, 0, sizeof(T*) * n); } static inline void destruct(T**) {} static inline void destruct_array(T**, uint) {} }; #define CRNLIB_DEFINE_BUILT_IN_TYPE(X) \ template <> \ struct scalar_type { \ enum { cFlag = true }; \ static inline void construct(X* p) { memset(p, 0, sizeof(X)); } \ static inline void construct(X* p, const X& init) { memcpy(p, &init, sizeof(X)); } \ static inline void construct_array(X* p, uint n) { memset(p, 0, sizeof(X) * n); } \ static inline void destruct(X*) {} \ static inline void destruct_array(X*, uint) {} \ }; CRNLIB_DEFINE_BUILT_IN_TYPE(bool) CRNLIB_DEFINE_BUILT_IN_TYPE(char) CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned char) CRNLIB_DEFINE_BUILT_IN_TYPE(short) CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned short) CRNLIB_DEFINE_BUILT_IN_TYPE(int) CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned int) CRNLIB_DEFINE_BUILT_IN_TYPE(long) CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned long) #ifdef __GNUC__ CRNLIB_DEFINE_BUILT_IN_TYPE(long long) CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned long long) #else CRNLIB_DEFINE_BUILT_IN_TYPE(__int64) CRNLIB_DEFINE_BUILT_IN_TYPE(unsigned __int64) #endif CRNLIB_DEFINE_BUILT_IN_TYPE(float) CRNLIB_DEFINE_BUILT_IN_TYPE(double) CRNLIB_DEFINE_BUILT_IN_TYPE(long double) #undef CRNLIB_DEFINE_BUILT_IN_TYPE // See: http://erdani.org/publications/cuj-2004-06.pdf template struct bitwise_movable { enum { cFlag = false }; }; // Defines type Q as bitwise movable. // Bitwise movable: type T may be safely moved to a new location via memcpy, without requiring the old copy to be destructed. // However, the final version of the object (wherever it winds up in memory) must be eventually destructed (a single time, of course). // Bitwise movable is a superset of bitwise copyable (all bitwise copyable types are also bitwise movable). #define CRNLIB_DEFINE_BITWISE_MOVABLE(Q) \ template <> \ struct bitwise_movable { \ enum { cFlag = true }; \ }; template struct bitwise_copyable { enum { cFlag = false }; }; // Defines type Q as bitwise copyable. // Bitwise copyable: type T may be safely and freely copied (duplicated) via memcpy, and *does not* require destruction. #define CRNLIB_DEFINE_BITWISE_COPYABLE(Q) \ template <> \ struct bitwise_copyable { \ enum { cFlag = true }; \ }; #define CRNLIB_IS_POD(T) __is_pod(T) #define CRNLIB_IS_SCALAR_TYPE(T) (scalar_type::cFlag) #define CRNLIB_IS_BITWISE_COPYABLE(T) (CRNLIB_IS_SCALAR_TYPE(T) || CRNLIB_IS_POD(T) || (bitwise_copyable::cFlag)) #define CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(T) (CRNLIB_IS_BITWISE_COPYABLE(T) || (bitwise_movable::cFlag)) #define CRNLIB_HAS_DESTRUCTOR(T) ((!scalar_type::cFlag) && (!__is_pod(T))) // From yasli_traits.h: // Credit goes to Boost; // also found in the C++ Templates book by Vandevoorde and Josuttis typedef char (&yes_t)[1]; typedef char (&no_t)[2]; template yes_t class_test(int U::*); template no_t class_test(...); template struct is_class { enum { value = (sizeof(class_test(0)) == sizeof(yes_t)) }; }; template struct is_pointer { enum { value = false }; }; template struct is_pointer { enum { value = true }; }; CRNLIB_DEFINE_BITWISE_COPYABLE(empty_type); CRNLIB_DEFINE_BITWISE_MOVABLE(empty_type); } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_tree_clusterizer.h000066400000000000000000000510751503722002600237050ustar00rootroot00000000000000// File: crn_tree_clusterizer.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_matrix.h" #include "crn_threading.h" #include namespace crnlib { template class tree_clusterizer { public: tree_clusterizer() {} struct VectorInfo { uint index; uint weight; }; struct NodeInfo { uint m_index; float m_variance; NodeInfo (uint index, float variance) : m_index(index), m_variance(variance) {} bool operator<(const NodeInfo& other) const { return m_index < other.m_index ? m_variance < other.m_variance : !(other.m_variance < m_variance); } }; struct split_alternative_node_task_params { uint main_node; uint alternative_node; uint max_splits; }; void split_alternative_node_task(uint64, void* pData_ptr) { split_alternative_node_task_params* pParams = (split_alternative_node_task_params*)pData_ptr; std::priority_queue node_queue; uint begin_node = pParams->alternative_node, end_node = begin_node, splits = 0; m_nodes[end_node] = m_nodes[pParams->main_node]; node_queue.push(NodeInfo(end_node, m_nodes[end_node].m_variance)); end_node++; splits++; while (splits < pParams->max_splits && split_node(node_queue, end_node)) splits++; m_nodes[pParams->main_node] = m_nodes[pParams->alternative_node]; m_nodes[pParams->main_node].m_alternative = true; } void generate_codebook(VectorType* vectors, uint* weights, uint size, uint max_splits, bool generate_node_index_map = false, task_pool* pTask_pool = 0) { m_vectors = vectors; m_vectorsInfo.resize(size); m_weightedVectors.resize(size); m_weightedDotProducts.resize(size); m_vectorsInfoLeft.resize(size); m_vectorsInfoRight.resize(size); m_vectorComparison.resize(size); m_nodes.resize(max_splits << 2); m_codebook.clear(); uint num_tasks = pTask_pool ? pTask_pool->get_num_threads() + 1 : 1; vq_node root; root.m_begin = 0; root.m_end = size; double ttsum = 0.0f; for (uint i = 0; i < m_vectorsInfo.size(); i++) { const VectorType& v = vectors[i]; m_vectorsInfo[i].index = i; const uint weight = m_vectorsInfo[i].weight = weights[i]; m_weightedVectors[i] = v * (float)weight; root.m_centroid += m_weightedVectors[i]; root.m_total_weight += weight; m_weightedDotProducts[i] = v.dot(v) * weight; ttsum += m_weightedDotProducts[i]; } root.m_variance = (float)(ttsum - (root.m_centroid.dot(root.m_centroid) / root.m_total_weight)); root.m_centroid *= (1.0f / root.m_total_weight); std::priority_queue node_queue; uint begin_node = 0, end_node = begin_node, splits = 0; m_nodes[end_node] = root; node_queue.push(NodeInfo(end_node, root.m_variance)); end_node++; splits++; if (num_tasks > 1) { while (splits < max_splits && node_queue.size() != num_tasks && split_node(node_queue, end_node, pTask_pool)) splits++; if (node_queue.size() == num_tasks) { std::priority_queue alternative_node_queue = node_queue; uint alternative_node = max_splits << 1, alternative_max_splits = max_splits / num_tasks; crnlib::vector params(num_tasks); for (uint task = 0; !alternative_node_queue.empty(); alternative_node_queue.pop(), alternative_node += alternative_max_splits << 1, task++) { params[task].main_node = alternative_node_queue.top().m_index; params[task].alternative_node = alternative_node; params[task].max_splits = alternative_max_splits; pTask_pool->queue_object_task(this, &tree_clusterizer::split_alternative_node_task, task, ¶ms[task]); } pTask_pool->join(); } } while (splits < max_splits && split_node(node_queue, end_node, pTask_pool)) splits++; for (uint i = begin_node; i < end_node; i++) { vq_node& node = m_nodes[i]; if (!node.m_alternative && node.m_left != -1) continue; node.m_codebook_index = m_codebook.size(); m_codebook.push_back(node.m_centroid); if (generate_node_index_map) { for (uint j = node.m_begin; j < node.m_end; j++) m_node_index_map.insert(std::make_pair(m_vectors[m_vectorsInfo[j].index], node.m_codebook_index)); } } } inline uint get_node_index(const VectorType& v) { return m_node_index_map.find(v)->second; } inline uint get_codebook_size() const { return m_codebook.size(); } inline const VectorType& get_codebook_entry(uint index) const { return m_codebook[index]; } typedef crnlib::vector vector_vec_type; inline const vector_vec_type& get_codebook() const { return m_codebook; } private: VectorType* m_vectors; crnlib::vector m_weightedVectors; crnlib::vector m_weightedDotProducts; crnlib::vector m_vectorsInfo, m_vectorsInfoLeft, m_vectorsInfoRight; crnlib::vector m_vectorComparison; crnlib::hash_map m_node_index_map; struct vq_node { vq_node() : m_centroid(cClear), m_total_weight(0), m_left(-1), m_right(-1), m_codebook_index(-1), m_unsplittable(false), m_alternative(false), m_processed(false) {} VectorType m_centroid; uint64 m_total_weight; float m_variance; uint m_begin; uint m_end; int m_left; int m_right; int m_codebook_index; bool m_unsplittable; bool m_alternative; bool m_processed; }; typedef crnlib::vector node_vec_type; node_vec_type m_nodes; vector_vec_type m_codebook; struct distance_comparison_task_params { VectorType* left_child; VectorType* right_child; uint begin; uint end; uint num_tasks; }; void distance_comparison_task(uint64 data, void* pData_ptr) { distance_comparison_task_params* pParams = (distance_comparison_task_params*)pData_ptr; const VectorType& left_child = *pParams->left_child; const VectorType& right_child = *pParams->right_child; uint begin = pParams->begin + (pParams->end - pParams->begin) * data / pParams->num_tasks; uint end = pParams->begin + (pParams->end - pParams->begin) * (data + 1) / pParams->num_tasks; for (uint i = begin; i < end; i++) { const VectorType& v = m_vectors[m_vectorsInfo[i].index]; double left_dist2 = left_child.squared_distance(v); double right_dist2 = right_child.squared_distance(v); m_vectorComparison[i] = left_dist2 < right_dist2; } } bool split_node(std::priority_queue& node_queue, uint& end_node, task_pool* pTask_pool = 0) { if (node_queue.empty()) return false; vq_node& parent_node = m_nodes[node_queue.top().m_index]; if (parent_node.m_alternative) parent_node.m_alternative = false; if (parent_node.m_variance <= 0.0f || parent_node.m_begin + 1 == parent_node.m_end) return false; node_queue.pop(); if (parent_node.m_processed) { if (!parent_node.m_unsplittable) { m_nodes[end_node] = m_nodes[parent_node.m_left]; m_nodes[end_node].m_alternative = true; node_queue.push(NodeInfo(end_node, m_nodes[end_node].m_variance)); parent_node.m_left = end_node++; m_nodes[end_node] = m_nodes[parent_node.m_right]; m_nodes[end_node].m_alternative = true; node_queue.push(NodeInfo(end_node, m_nodes[end_node].m_variance)); parent_node.m_right = end_node++; } return true; } parent_node.m_processed = true; uint num_blocks = (parent_node.m_end - parent_node.m_begin) >> 9; uint num_tasks = num_blocks > 1 && pTask_pool ? math::minimum(num_blocks, pTask_pool->get_num_threads() + 1) : 1; VectorType furthest(0); double furthest_dist = -1.0f; for (uint i = parent_node.m_begin; i < parent_node.m_end; i++) { const VectorType& v = m_vectors[m_vectorsInfo[i].index]; double dist = v.squared_distance(parent_node.m_centroid); if (dist > furthest_dist) { furthest_dist = dist; furthest = v; } } VectorType opposite; double opposite_dist = -1.0f; for (uint i = parent_node.m_begin; i < parent_node.m_end; i++) { const VectorType& v = m_vectors[m_vectorsInfo[i].index]; double dist = v.squared_distance(furthest); if (dist > opposite_dist) { opposite_dist = dist; opposite = v; } } VectorType left_child((furthest + parent_node.m_centroid) * .5f); VectorType right_child((opposite + parent_node.m_centroid) * .5f); if (parent_node.m_begin + 2 < parent_node.m_end) { const uint N = VectorType::num_elements; matrix covar; covar.clear(); for (uint i = parent_node.m_begin; i < parent_node.m_end; i++) { const VectorType& v = m_vectors[m_vectorsInfo[i].index] - parent_node.m_centroid; const VectorType w = v * (float)m_vectorsInfo[i].weight; for (uint x = 0; x < N; x++) { for (uint y = x; y < N; y++) covar[x][y] = covar[x][y] + v[x] * w[y]; } } float divider = (float)parent_node.m_total_weight; for (uint x = 0; x < N; x++) { for (uint y = x; y < N; y++) { covar[x][y] /= divider; covar[y][x] = covar[x][y]; } } VectorType axis(1.0f); // Starting with an estimate of the principle axis should work better, but doesn't in practice? //left_child - right_child); //axis.normalize(); for (uint iter = 0; iter < 10; iter++) { VectorType x; double max_sum = 0; for (uint i = 0; i < N; i++) { double sum = 0; for (uint j = 0; j < N; j++) sum += axis[j] * covar[i][j]; x[i] = (float)sum; max_sum = i ? math::maximum(max_sum, sum) : sum; } if (max_sum != 0.0f) x *= (float)(1.0f / max_sum); axis = x; } axis.normalize(); VectorType new_left_child(0.0f); VectorType new_right_child(0.0f); double left_weight = 0.0f; double right_weight = 0.0f; for (uint i = parent_node.m_begin; i < parent_node.m_end; i++) { const VectorInfo& vectorInfo = m_vectorsInfo[i]; const float weight = (float)vectorInfo.weight; double t = (m_vectors[vectorInfo.index] - parent_node.m_centroid) * axis; if (t < 0.0f) { new_left_child += m_weightedVectors[vectorInfo.index]; left_weight += weight; } else { new_right_child += m_weightedVectors[vectorInfo.index]; right_weight += weight; } } if ((left_weight > 0.0f) && (right_weight > 0.0f)) { left_child = new_left_child * (float)(1.0f / left_weight); right_child = new_right_child * (float)(1.0f / right_weight); } } uint64 left_weight = 0; uint64 right_weight = 0; uint left_info_index = 0; uint right_info_index = 0; float prev_total_variance = 1e+10f; float left_variance = 0.0f; float right_variance = 0.0f; // FIXME: Excessive upper limit const uint cMaxLoops = 1024; for (uint total_loops = 0; total_loops < cMaxLoops; total_loops++) { left_info_index = right_info_index = parent_node.m_begin; VectorType new_left_child(cClear); VectorType new_right_child(cClear); double left_ttsum = 0.0f; double right_ttsum = 0.0f; left_weight = 0; right_weight = 0; if (num_tasks > 1) { distance_comparison_task_params params; params.left_child = &left_child; params.right_child = &right_child; params.begin = parent_node.m_begin; params.end = parent_node.m_end; params.num_tasks = num_tasks; for (uint task = 0; task < params.num_tasks; task++) pTask_pool->queue_object_task(this, &tree_clusterizer::distance_comparison_task, task, ¶ms); pTask_pool->join(); for (uint i = parent_node.m_begin; i < parent_node.m_end; i++) { const VectorInfo& vectorInfo = m_vectorsInfo[i]; if (m_vectorComparison[i]) { new_left_child += m_weightedVectors[vectorInfo.index]; left_ttsum += m_weightedDotProducts[vectorInfo.index]; left_weight += vectorInfo.weight; m_vectorsInfoLeft[left_info_index++] = vectorInfo; } else { new_right_child += m_weightedVectors[vectorInfo.index]; right_ttsum += m_weightedDotProducts[vectorInfo.index]; right_weight += vectorInfo.weight; m_vectorsInfoRight[right_info_index++] = vectorInfo; } } } else { for (uint i = parent_node.m_begin; i < parent_node.m_end; i++) { const VectorInfo& vectorInfo = m_vectorsInfo[i]; double left_dist2 = left_child.squared_distance(m_vectors[vectorInfo.index]); double right_dist2 = right_child.squared_distance(m_vectors[vectorInfo.index]); if (left_dist2 < right_dist2) { new_left_child += m_weightedVectors[vectorInfo.index]; left_ttsum += m_weightedDotProducts[vectorInfo.index]; left_weight += vectorInfo.weight; m_vectorsInfoLeft[left_info_index++] = vectorInfo; } else { new_right_child += m_weightedVectors[vectorInfo.index]; right_ttsum += m_weightedDotProducts[vectorInfo.index]; right_weight += vectorInfo.weight; m_vectorsInfoRight[right_info_index++] = vectorInfo; } } } if ((!left_weight) || (!right_weight)) { parent_node.m_unsplittable = true; return true; } left_variance = (float)(left_ttsum - (new_left_child.dot(new_left_child) / left_weight)); right_variance = (float)(right_ttsum - (new_right_child.dot(new_right_child) / right_weight)); new_left_child *= (1.0f / left_weight); new_right_child *= (1.0f / right_weight); left_child = new_left_child; right_child = new_right_child; float total_variance = left_variance + right_variance; if (total_variance < .00001f) break; if (((prev_total_variance - total_variance) / total_variance) < .00001f) break; prev_total_variance = total_variance; } parent_node.m_left = end_node++; parent_node.m_right = end_node++; node_queue.push(NodeInfo(parent_node.m_left, left_variance)); node_queue.push(NodeInfo(parent_node.m_right, right_variance)); vq_node& left_child_node = m_nodes[parent_node.m_left]; vq_node& right_child_node = m_nodes[parent_node.m_right]; left_child_node.m_begin = parent_node.m_begin; left_child_node.m_end = right_child_node.m_begin = left_info_index; right_child_node.m_end = parent_node.m_end; memcpy(&m_vectorsInfo[left_child_node.m_begin], &m_vectorsInfoLeft[parent_node.m_begin], (left_child_node.m_end - left_child_node.m_begin) * sizeof(VectorInfo)); memcpy(&m_vectorsInfo[right_child_node.m_begin], &m_vectorsInfoRight[parent_node.m_begin], (right_child_node.m_end - right_child_node.m_begin) * sizeof(VectorInfo)); left_child_node.m_centroid = left_child; left_child_node.m_total_weight = left_weight; left_child_node.m_variance = left_variance; right_child_node.m_centroid = right_child; right_child_node.m_total_weight = right_weight; right_child_node.m_variance = right_variance; return true; } }; template void split_vectors(VectorType (&vectors)[64], uint (&weights)[64], uint size, VectorType (&result)[2]) { VectorType weightedVectors[64]; double weightedDotProducts[64]; VectorType centroid(cClear); uint64 total_weight = 0; double ttsum = 0.0f; for (uint i = 0; i < size; i++) { const VectorType& v = vectors[i]; const uint weight = weights[i]; weightedVectors[i] = v * (float)weight; centroid += weightedVectors[i]; total_weight += weight; weightedDotProducts[i] = v.dot(v) * weight; ttsum += weightedDotProducts[i]; } float variance = (float)(ttsum - (centroid.dot(centroid) / total_weight)); centroid *= (1.0f / total_weight); result[0] = result[1] = centroid; if (variance <= 0.0f || size == 1) return; VectorType furthest; double furthest_dist = -1.0f; for (uint i = 0; i < size; i++) { const VectorType& v = vectors[i]; double dist = v.squared_distance(centroid); if (dist > furthest_dist) { furthest_dist = dist; furthest = v; } } VectorType opposite; double opposite_dist = -1.0f; for (uint i = 0; i < size; i++) { const VectorType& v = vectors[i]; double dist = v.squared_distance(furthest); if (dist > opposite_dist) { opposite_dist = dist; opposite = v; } } VectorType left_child((furthest + centroid) * .5f); VectorType right_child((opposite + centroid) * .5f); if (size > 2) { const uint N = VectorType::num_elements; matrix covar; covar.clear(); for (uint i = 0; i < size; i++) { const VectorType& v = vectors[i] - centroid; const VectorType w = v * (float)weights[i]; for (uint x = 0; x < N; x++) { for (uint y = x; y < N; y++) covar[x][y] = covar[x][y] + v[x] * w[y]; } } float divider = (float)total_weight; for (uint x = 0; x < N; x++) { for (uint y = x; y < N; y++) { covar[x][y] /= divider; covar[y][x] = covar[x][y]; } } VectorType axis(1.0f); for (uint iter = 0; iter < 10; iter++) { VectorType x; double max_sum = 0; for (uint i = 0; i < N; i++) { double sum = 0; for (uint j = 0; j < N; j++) sum += axis[j] * covar[i][j]; x[i] = (float)sum; max_sum = i ? math::maximum(max_sum, sum) : sum; } if (max_sum != 0.0f) x *= (float)(1.0f / max_sum); axis = x; } axis.normalize(); VectorType new_left_child(0.0f); VectorType new_right_child(0.0f); double left_weight = 0.0f; double right_weight = 0.0f; for (uint i = 0; i < size; i++) { const VectorType& v = vectors[i]; const float weight = (float)weights[i]; double t = (v - centroid) * axis; if (t < 0.0f) { new_left_child += weightedVectors[i]; left_weight += weight; } else { new_right_child += weightedVectors[i]; right_weight += weight; } } if ((left_weight > 0.0f) && (right_weight > 0.0f)) { left_child = new_left_child * (float)(1.0f / left_weight); right_child = new_right_child * (float)(1.0f / right_weight); } } uint64 left_weight = 0; uint64 right_weight = 0; float prev_total_variance = 1e+10f; float left_variance = 0.0f; float right_variance = 0.0f; const uint cMaxLoops = 1024; for (uint total_loops = 0; total_loops < cMaxLoops; total_loops++) { VectorType new_left_child(cClear); VectorType new_right_child(cClear); double left_ttsum = 0.0f; double right_ttsum = 0.0f; left_weight = 0; right_weight = 0; for (uint i = 0; i < size; i++) { const VectorType& v = vectors[i]; double left_dist2 = left_child.squared_distance(v); double right_dist2 = right_child.squared_distance(v); if (left_dist2 < right_dist2) { new_left_child += weightedVectors[i]; left_ttsum += weightedDotProducts[i]; left_weight += weights[i]; } else { new_right_child += weightedVectors[i]; right_ttsum += weightedDotProducts[i]; right_weight += weights[i]; } } if ((!left_weight) || (!right_weight)) return; left_variance = (float)(left_ttsum - (new_left_child.dot(new_left_child) / left_weight)); right_variance = (float)(right_ttsum - (new_right_child.dot(new_right_child) / right_weight)); new_left_child *= (1.0f / left_weight); new_right_child *= (1.0f / right_weight); left_child = new_left_child; right_child = new_right_child; float total_variance = left_variance + right_variance; if (total_variance < .00001f) break; if (((prev_total_variance - total_variance) / total_variance) < .00001f) break; prev_total_variance = total_variance; } result[0] = left_child; result[1] = right_child; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_types.h000066400000000000000000000031701503722002600214500ustar00rootroot00000000000000// File: crn_types.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once namespace crnlib { typedef unsigned char uint8; typedef signed char int8; typedef unsigned short uint16; typedef signed short int16; typedef unsigned int uint32; typedef uint32 uint; typedef signed int int32; #ifdef __GNUC__ typedef unsigned long long uint64; typedef long long int64; #else typedef unsigned __int64 uint64; typedef signed __int64 int64; #endif const uint8 cUINT8_MIN = 0; const uint8 cUINT8_MAX = 0xFFU; const uint16 cUINT16_MIN = 0; const uint16 cUINT16_MAX = 0xFFFFU; const uint32 cUINT32_MIN = 0; const uint32 cUINT32_MAX = 0xFFFFFFFFU; const uint64 cUINT64_MIN = 0; const uint64 cUINT64_MAX = 0xFFFFFFFFFFFFFFFFULL; //0xFFFFFFFFFFFFFFFFui64; const int8 cINT8_MIN = -128; const int8 cINT8_MAX = 127; const int16 cINT16_MIN = -32768; const int16 cINT16_MAX = 32767; const int32 cINT32_MIN = (-2147483647 - 1); const int32 cINT32_MAX = 2147483647; const int64 cINT64_MIN = (int64)0x8000000000000000ULL; //(-9223372036854775807i64 - 1); const int64 cINT64_MAX = (int64)0x7FFFFFFFFFFFFFFFULL; // 9223372036854775807i64; #if CRNLIB_64BIT_POINTERS typedef uint64 uint_ptr; typedef uint64 uint32_ptr; typedef int64 signed_size_t; typedef uint64 ptr_bits_t; #else typedef unsigned int uint_ptr; typedef unsigned int uint32_ptr; typedef signed int signed_size_t; typedef uint32 ptr_bits_t; #endif enum eVarArg { cVarArg }; enum eClear { cClear }; enum eNoClamp { cNoClamp }; enum { cInvalidIndex = -1 }; const uint cIntBits = 32; struct empty_type {}; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_utils.cpp000066400000000000000000000023371503722002600220030ustar00rootroot00000000000000// File: crn_utils.cpp #include "crn_core.h" #include "crn_utils.h" namespace crnlib { namespace utils { void endian_switch_words(uint16* p, uint num) { uint16* p_end = p + num; while (p != p_end) { uint16 k = *p; *p++ = swap16(k); } } void endian_switch_dwords(uint32* p, uint num) { uint32* p_end = p + num; while (p != p_end) { uint32 k = *p; *p++ = swap32(k); } } void copy_words(uint16* pDst, const uint16* pSrc, uint num, bool endian_switch) { if (!endian_switch) memcpy(pDst, pSrc, num << 1U); else { uint16* pDst_end = pDst + num; while (pDst != pDst_end) *pDst++ = swap16(*pSrc++); } } void copy_dwords(uint32* pDst, const uint32* pSrc, uint num, bool endian_switch) { if (!endian_switch) memcpy(pDst, pSrc, num << 2U); else { uint32* pDst_end = pDst + num; while (pDst != pDst_end) *pDst++ = swap32(*pSrc++); } } uint compute_max_mips(uint width, uint height) { if ((width | height) == 0) return 0; uint num_mips = 1; while ((width > 1U) || (height > 1U)) { width >>= 1U; height >>= 1U; num_mips++; } return num_mips; } } // namespace utils } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_utils.h000066400000000000000000000236411503722002600214510ustar00rootroot00000000000000// File: crn_utils.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #define CRNLIB_MIN(a, b) (((a) < (b)) ? (a) : (b)) #define CRNLIB_MAX(a, b) (((a) < (b)) ? (b) : (a)) #define CRNLIB_ARRAYSIZE(x) (sizeof(x) / sizeof(x[0])) #ifdef _MSC_VER // Need to explictly extern these with MSVC, but not MinGW. extern "C" unsigned long __cdecl _lrotl(unsigned long, int); #pragma intrinsic(_lrotl) extern "C" unsigned long __cdecl _lrotr(unsigned long, int); #pragma intrinsic(_lrotr) #endif #ifdef WIN32 #define CRNLIB_ROTATE_LEFT(x, k) _lrotl(x, k) #define CRNLIB_ROTATE_RIGHT(x, k) _lrotr(x, k) #else #define CRNLIB_ROTATE_LEFT(x, k) (((x) << (k)) | ((x) >> (32 - (k)))) #define CRNLIB_ROTATE_RIGHT(x, k) (((x) >> (k)) | ((x) << (32 - (k)))) #endif template T decay_array_to_subtype(T (&a)[N]); #define CRNLIB_ARRAY_SIZE(X) (sizeof(X) / sizeof(decay_array_to_subtype(X))) #define CRNLIB_SIZEOF_U32(x) static_cast(sizeof(x)) namespace crnlib { namespace utils { template inline void swap(T& l, T& r) { T temp(l); l = r; r = temp; } template inline void zero_object(T& obj) { memset((void*)&obj, 0, sizeof(obj)); } template inline void zero_this(T* pObj) { memset((void*)pObj, 0, sizeof(*pObj)); } inline bool is_bit_set(uint bits, uint mask) { return (bits & mask) != 0; } inline void set_bit(uint& bits, uint mask, bool state) { if (state) bits |= mask; else bits &= ~mask; } inline bool is_flag_set(uint bits, uint flag) { CRNLIB_ASSERT(flag < 32U); return is_bit_set(bits, 1U << flag); } inline void set_flag(uint& bits, uint flag, bool state) { CRNLIB_ASSERT(flag < 32U); set_bit(bits, 1U << flag, state); } inline void invert_buf(void* pBuf, uint size) { uint8* p = static_cast(pBuf); const uint half_size = size >> 1; for (uint i = 0; i < half_size; i++) utils::swap(p[i], p[size - 1U - i]); } // buffer_is_little_endian is the endianness of the buffer's data template inline void write_obj(const T& obj, void* pBuf, bool buffer_is_little_endian) { const uint8* pSrc = reinterpret_cast(&obj); uint8* pDst = static_cast(pBuf); if (c_crnlib_little_endian_platform == buffer_is_little_endian) memcpy(pDst, pSrc, sizeof(T)); else { for (uint i = 0; i < sizeof(T); i++) pDst[i] = pSrc[sizeof(T) - 1 - i]; } } // buffer_is_little_endian is the endianness of the buffer's data template inline void read_obj(T& obj, const void* pBuf, bool buffer_is_little_endian) { const uint8* pSrc = reinterpret_cast(pBuf); uint8* pDst = reinterpret_cast(&obj); if (c_crnlib_little_endian_platform == buffer_is_little_endian) memcpy(pDst, pSrc, sizeof(T)); else { for (uint i = 0; i < sizeof(T); i++) pDst[i] = pSrc[sizeof(T) - 1 - i]; } } template inline bool write_obj(const T& obj, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { if (buf_size < sizeof(T)) return false; utils::write_obj(obj, pBuf, buffer_is_little_endian); pBuf = static_cast(pBuf) + sizeof(T); buf_size -= sizeof(T); return true; } inline bool write_val(uint8 val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { return write_obj(val, pBuf, buf_size, buffer_is_little_endian); } inline bool write_val(uint16 val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { return write_obj(val, pBuf, buf_size, buffer_is_little_endian); } inline bool write_val(uint val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { return write_obj(val, pBuf, buf_size, buffer_is_little_endian); } inline bool write_val(int val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { return write_obj(val, pBuf, buf_size, buffer_is_little_endian); } inline bool write_val(uint64 val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { return write_obj(val, pBuf, buf_size, buffer_is_little_endian); } inline bool write_val(float val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { return write_obj(val, pBuf, buf_size, buffer_is_little_endian); } inline bool write_val(double val, void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { return write_obj(val, pBuf, buf_size, buffer_is_little_endian); } template inline bool read_obj(T& obj, const void*& pBuf, uint& buf_size, bool buffer_is_little_endian) { if (buf_size < sizeof(T)) { zero_object(obj); return false; } utils::read_obj(obj, pBuf, buffer_is_little_endian); pBuf = static_cast(pBuf) + sizeof(T); buf_size -= sizeof(T); return true; } #if defined(_MSC_VER) extern CRNLIB_FORCE_INLINE uint16 swap16(uint16 x) { return _byteswap_ushort(x); } extern CRNLIB_FORCE_INLINE uint32 swap32(uint32 x) { return _byteswap_ulong(x); } extern CRNLIB_FORCE_INLINE uint64 swap64(uint64 x) { return _byteswap_uint64(x); } #elif defined(__GNUC__) extern CRNLIB_FORCE_INLINE uint16 swap16(uint16 x) { return static_cast((x << 8U) | (x >> 8U)); } extern CRNLIB_FORCE_INLINE uint32 swap32(uint32 x) { return __builtin_bswap32(x); } extern CRNLIB_FORCE_INLINE uint64 swap64(uint64 x) { return __builtin_bswap64(x); } #else extern CRNLIB_FORCE_INLINE uint16 swap16(uint16 x) { return static_cast((x << 8U) | (x >> 8U)); } extern CRNLIB_FORCE_INLINE uint32 swap32(uint32 x) { return ((x << 24U) | ((x << 8U) & 0x00FF0000U) | ((x >> 8U) & 0x0000FF00U) | (x >> 24U)); } extern CRNLIB_FORCE_INLINE uint64 swap64(uint64 x) { return (static_cast(swap32(static_cast(x))) << 32ULL) | swap32(static_cast(x >> 32U)); } #endif // Assumes x has been read from memory as a little endian value, converts to native endianness for manipulation. extern CRNLIB_FORCE_INLINE uint16 swap_le16_to_native(uint16 x) { return c_crnlib_little_endian_platform ? x : swap16(x); } extern CRNLIB_FORCE_INLINE uint32 swap_le32_to_native(uint32 x) { return c_crnlib_little_endian_platform ? x : swap32(x); } extern CRNLIB_FORCE_INLINE uint64 swap_le64_to_native(uint64 x) { return c_crnlib_little_endian_platform ? x : swap64(x); } // Assumes x has been read from memory as a big endian value, converts to native endianness for manipulation. extern CRNLIB_FORCE_INLINE uint16 swap_be16_to_native(uint16 x) { return c_crnlib_big_endian_platform ? x : swap16(x); } extern CRNLIB_FORCE_INLINE uint32 swap_be32_to_native(uint32 x) { return c_crnlib_big_endian_platform ? x : swap32(x); } extern CRNLIB_FORCE_INLINE uint64 swap_be64_to_native(uint64 x) { return c_crnlib_big_endian_platform ? x : swap64(x); } extern CRNLIB_FORCE_INLINE uint32 read_le32(const void* p) { return swap_le32_to_native(*static_cast(p)); } extern CRNLIB_FORCE_INLINE void write_le32(void* p, uint32 x) { *static_cast(p) = swap_le32_to_native(x); } extern CRNLIB_FORCE_INLINE uint64 read_le64(const void* p) { return swap_le64_to_native(*static_cast(p)); } extern CRNLIB_FORCE_INLINE void write_le64(void* p, uint64 x) { *static_cast(p) = swap_le64_to_native(x); } extern CRNLIB_FORCE_INLINE uint32 read_be32(const void* p) { return swap_be32_to_native(*static_cast(p)); } extern CRNLIB_FORCE_INLINE void write_be32(void* p, uint32 x) { *static_cast(p) = swap_be32_to_native(x); } extern CRNLIB_FORCE_INLINE uint64 read_be64(const void* p) { return swap_be64_to_native(*static_cast(p)); } extern CRNLIB_FORCE_INLINE void write_be64(void* p, uint64 x) { *static_cast(p) = swap_be64_to_native(x); } inline void endian_swap_mem16(uint16* p, uint n) { while (n--) { *p = swap16(*p); ++p; } } inline void endian_swap_mem32(uint32* p, uint n) { while (n--) { *p = swap32(*p); ++p; } } inline void endian_swap_mem64(uint64* p, uint n) { while (n--) { *p = swap64(*p); ++p; } } inline void endian_swap_mem(void* p, uint size_in_bytes, uint type_size) { switch (type_size) { case sizeof(uint16): endian_swap_mem16(static_cast(p), size_in_bytes / type_size); break; case sizeof(uint32): endian_swap_mem32(static_cast(p), size_in_bytes / type_size); break; case sizeof(uint64): endian_swap_mem64(static_cast(p), size_in_bytes / type_size); break; } } inline void fast_memset(void* pDst, int val, size_t size) { memset(pDst, val, size); } inline void fast_memcpy(void* pDst, const void* pSrc, size_t size) { memcpy(pDst, pSrc, size); } inline uint count_leading_zeros(uint v) { uint temp; uint n = 32; temp = v >> 16; if (temp) { n -= 16; v = temp; } temp = v >> 8; if (temp) { n -= 8; v = temp; } temp = v >> 4; if (temp) { n -= 4; v = temp; } temp = v >> 2; if (temp) { n -= 2; v = temp; } temp = v >> 1; if (temp) { n -= 1; v = temp; } if (v & 1) n--; return n; } inline uint count_leading_zeros16(uint v) { CRNLIB_ASSERT(v < 0x10000); uint temp; uint n = 16; temp = v >> 8; if (temp) { n -= 8; v = temp; } temp = v >> 4; if (temp) { n -= 4; v = temp; } temp = v >> 2; if (temp) { n -= 2; v = temp; } temp = v >> 1; if (temp) { n -= 1; v = temp; } if (v & 1) n--; return n; } void endian_switch_words(uint16* p, uint num); void endian_switch_dwords(uint32* p, uint num); void copy_words(uint16* pDst, const uint16* pSrc, uint num, bool endian_switch); void copy_dwords(uint32* pDst, const uint32* pSrc, uint num, bool endian_switch); uint compute_max_mips(uint width, uint height); } // namespace utils } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_value.cpp000066400000000000000000000006251503722002600217550ustar00rootroot00000000000000// File: crn_value.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_value.h" namespace crnlib { const char* gValueDataTypeStrings[cDTTotal + 1] = { "invalid", "string", "bool", "int", "uint", "float", "vec3f", "vec3i", NULL, }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_value.h000066400000000000000000000460051503722002600214240ustar00rootroot00000000000000// File: crn_value.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_strutils.h" #include "crn_dynamic_string.h" #include "crn_vec.h" namespace crnlib { enum value_data_type { cDTInvalid, cDTString, cDTBool, cDTInt, cDTUInt, cDTFloat, cDTVec3F, cDTVec3I, cDTTotal }; extern const char* gValueDataTypeStrings[cDTTotal + 1]; class value { public: value() : m_type(cDTInvalid) { } value(const char* pStr) : m_pStr(crnlib_new(pStr)), m_type(cDTString) { } value(const dynamic_string& str) : m_pStr(crnlib_new(str)), m_type(cDTString) { } explicit value(bool v) : m_bool(v), m_type(cDTBool) { } value(int v) : m_int(v), m_type(cDTInt) { } value(uint v) : m_uint(v), m_type(cDTUInt) { } value(float v) : m_float(v), m_type(cDTFloat) { } value(const vec3F& v) : m_pVec3F(crnlib_new(v)), m_type(cDTVec3F) { } value(const vec3I& v) : m_pVec3I(crnlib_new(v)), m_type(cDTVec3I) { } ~value() { switch (m_type) { case cDTString: crnlib_delete(m_pStr); break; case cDTVec3F: crnlib_delete(m_pVec3F); break; case cDTVec3I: crnlib_delete(m_pVec3I); break; default: break; } } value(const value& other) : m_type(cDTInvalid) { *this = other; } value& operator=(const value& other) { if (this == &other) return *this; change_type(other.m_type); switch (other.m_type) { case cDTString: m_pStr->set(*other.m_pStr); break; case cDTBool: m_bool = other.m_bool; break; case cDTInt: m_int = other.m_int; break; case cDTUInt: m_uint = other.m_uint; break; case cDTFloat: m_float = other.m_float; break; case cDTVec3F: m_pVec3F->set(*other.m_pVec3F); break; case cDTVec3I: m_pVec3I->set(*other.m_pVec3I); break; default: break; } return *this; } inline value_data_type get_data_type() const { return m_type; } void clear() { clear_dynamic(); m_type = cDTInvalid; } void set_string(const char* pStr) { set_str(pStr); } void set_int(int v) { clear_dynamic(); m_type = cDTInt; m_int = v; } void set_uint(uint v) { clear_dynamic(); m_type = cDTUInt; m_uint = v; } void set_bool(bool v) { clear_dynamic(); m_type = cDTBool; m_bool = v; } void set_float(float v) { clear_dynamic(); m_type = cDTFloat; m_float = v; } void set_vec(const vec3F& v) { change_type(cDTVec3F); m_pVec3F->set(v); } void set_vec(const vec3I& v) { change_type(cDTVec3I); m_pVec3I->set(v); } bool parse(const char* p) { if ((!p) || (!p[0])) { clear(); return false; } if (crnlib_stricmp(p, "false") == 0) { set_bool(false); return true; } else if (crnlib_stricmp(p, "true") == 0) { set_bool(true); return true; } if (p[0] == '\"') { dynamic_string str; str = p + 1; if (!str.is_empty()) { if (str[str.get_len() - 1] == '\"') { str.left(str.get_len() - 1); set_str(str); return true; } } } if (strchr(p, ',') != NULL) { float fx = 0, fy = 0, fz = 0; #ifdef _MSC_VER if (sscanf_s(p, "%f,%f,%f", &fx, &fy, &fz) == 3) #else if (sscanf(p, "%f,%f,%f", &fx, &fy, &fz) == 3) #endif { bool as_float = true; int ix = 0, iy = 0, iz = 0; #ifdef _MSC_VER if (sscanf_s(p, "%i,%i,%i", &ix, &iy, &iz) == 3) #else if (sscanf(p, "%i,%i,%i", &ix, &iy, &iz) == 3) #endif { if ((ix == fx) && (iy == fy) && (iz == fz)) as_float = false; } if (as_float) set_vec(vec3F(fx, fy, fz)); else set_vec(vec3I(ix, iy, iz)); return true; } } const char* q = p; bool success = string_to_uint(q, m_uint); if ((success) && (*q == 0)) { set_uint(m_uint); return true; } q = p; success = string_to_int(q, m_int); if ((success) && (*q == 0)) { set_int(m_int); return true; } q = p; success = string_to_float(q, m_float); if ((success) && (*q == 0)) { set_float(m_float); return true; } set_string(p); return true; } dynamic_string& get_as_string(dynamic_string& dst) const { switch (m_type) { case cDTInvalid: dst.clear(); break; case cDTString: dst = *m_pStr; break; case cDTBool: dst = m_bool ? "TRUE" : "FALSE"; break; case cDTInt: dst.format("%i", m_int); break; case cDTUInt: dst.format("%u", m_uint); break; case cDTFloat: dst.format("%f", m_float); break; case cDTVec3F: dst.format("%f,%f,%f", (*m_pVec3F)[0], (*m_pVec3F)[1], (*m_pVec3F)[2]); break; case cDTVec3I: dst.format("%i,%i,%i", (*m_pVec3I)[0], (*m_pVec3I)[1], (*m_pVec3I)[2]); break; default: break; } return dst; } bool get_as_int(int& val, uint component = 0) const { switch (m_type) { case cDTInvalid: { val = 0; return false; } case cDTString: { const char* p = m_pStr->get_ptr(); return string_to_int(p, val); } case cDTBool: val = m_bool; break; case cDTInt: val = m_int; break; case cDTUInt: { if (m_uint > INT_MAX) { val = 0; return false; } val = m_uint; break; } case cDTFloat: { if ((m_float < (float)INT_MIN) || (m_float > (float)INT_MAX)) { val = 0; return false; } val = (int)m_float; break; } case cDTVec3F: { if (component > 2) { val = 0; return false; } if (((*m_pVec3F)[component] < (float)INT_MIN) || ((*m_pVec3F)[component] > (float)INT_MAX)) { val = 0; return false; } val = (int)(*m_pVec3F)[component]; break; } case cDTVec3I: { if (component > 2) { val = 0; return false; } val = (int)(*m_pVec3I)[component]; break; } default: break; } return true; } bool get_as_uint(uint& val, uint component = 0) const { switch (m_type) { case cDTInvalid: { val = 0; return false; } case cDTString: { const char* p = m_pStr->get_ptr(); return string_to_uint(p, val); } case cDTBool: { val = m_bool; break; } case cDTInt: { if (m_int < 0) { val = 0; return false; } val = (uint)m_int; break; } case cDTUInt: { val = m_uint; break; } case cDTFloat: { if ((m_float < 0) || (m_float > (float)UINT_MAX)) { val = 0; return false; } val = (uint)m_float; break; } case cDTVec3F: { if (component > 2) { val = 0; return false; } if (((*m_pVec3F)[component] < 0) || ((*m_pVec3F)[component] > (float)UINT_MAX)) { val = 0; return false; } val = (uint)(*m_pVec3F)[component]; break; } case cDTVec3I: { if (component > 2) { val = 0; return false; } if ((*m_pVec3I)[component] < 0) { val = 0; return false; } val = (uint)(*m_pVec3I)[component]; break; } default: break; } return true; } bool get_as_bool(bool& val, uint component = 0) const { switch (m_type) { case cDTInvalid: { val = false; return false; } case cDTString: { const char* p = m_pStr->get_ptr(); return string_to_bool(p, val); } case cDTBool: { val = m_bool; break; } case cDTInt: { val = (m_int != 0); break; } case cDTUInt: { val = (m_uint != 0); break; } case cDTFloat: { val = (m_float != 0); break; } case cDTVec3F: { if (component > 2) { val = false; return false; } val = ((*m_pVec3F)[component] != 0); break; } case cDTVec3I: { if (component > 2) { val = false; return false; } val = ((*m_pVec3I)[component] != 0); break; } default: break; } return true; } bool get_as_float(float& val, uint component = 0) const { switch (m_type) { case cDTInvalid: { val = 0; return false; } case cDTString: { const char* p = m_pStr->get_ptr(); return string_to_float(p, val); } case cDTBool: { val = m_bool; break; } case cDTInt: { val = (float)m_int; break; } case cDTUInt: { val = (float)m_uint; break; } case cDTFloat: { val = m_float; break; } case cDTVec3F: { if (component > 2) { val = 0; return false; } val = (*m_pVec3F)[component]; break; } case cDTVec3I: { if (component > 2) { val = 0; return false; } val = (float)(*m_pVec3I)[component]; break; } default: break; } return true; } bool get_as_vec(vec3F& val) const { switch (m_type) { case cDTInvalid: { val.clear(); return false; } case cDTString: { const char* p = m_pStr->get_ptr(); float x = 0, y = 0, z = 0; #ifdef _MSC_VER if (sscanf_s(p, "%f,%f,%f", &x, &y, &z) == 3) #else if (sscanf(p, "%f,%f,%f", &x, &y, &z) == 3) #endif { val.set(x, y, z); return true; } else { val.clear(); return false; } } case cDTBool: { val.set(m_bool); break; } case cDTInt: { val.set(static_cast(m_int)); break; } case cDTUInt: { val.set(static_cast(m_uint)); break; } case cDTFloat: { val.set(m_float); break; } case cDTVec3F: { val = *m_pVec3F; break; } case cDTVec3I: { val.set((float)(*m_pVec3I)[0], (float)(*m_pVec3I)[1], (float)(*m_pVec3I)[2]); break; } default: break; } return true; } bool get_as_vec(vec3I& val) const { switch (m_type) { case cDTInvalid: { val.clear(); return false; } case cDTString: { const char* p = m_pStr->get_ptr(); float x = 0, y = 0, z = 0; #ifdef _MSC_VER if (sscanf_s(p, "%f,%f,%f", &x, &y, &z) == 3) #else if (sscanf(p, "%f,%f,%f", &x, &y, &z) == 3) #endif { if ((x < (float)INT_MIN) || (x > (float)INT_MAX) || (y < (float)INT_MIN) || (y > (float)INT_MAX) || (z < (float)INT_MIN) || (z > (float)INT_MAX)) { val.clear(); return false; } val.set((int)x, (int)y, (int)z); return true; } else { val.clear(); return false; } break; } case cDTBool: { val.set(m_bool); break; } case cDTInt: { val.set(m_int); break; } case cDTUInt: { val.set(m_uint); break; } case cDTFloat: { val.set((int)m_float); break; } case cDTVec3F: { val.set((int)(*m_pVec3F)[0], (int)(*m_pVec3F)[1], (int)(*m_pVec3F)[2]); break; } case cDTVec3I: { val = *m_pVec3I; break; } default: break; } return true; } bool set_zero() { switch (m_type) { case cDTInvalid: { return false; } case cDTString: { m_pStr->empty(); break; } case cDTBool: { m_bool = false; break; } case cDTInt: { m_int = 0; break; } case cDTUInt: { m_uint = 0; break; } case cDTFloat: { m_float = 0; break; } case cDTVec3F: { m_pVec3F->clear(); break; } case cDTVec3I: { m_pVec3I->clear(); break; } default: break; } return true; } bool is_vector() const { switch (m_type) { case cDTVec3F: case cDTVec3I: return true; default: break; } return false; } uint get_num_components() const { switch (m_type) { case cDTVec3F: case cDTVec3I: return 3; default: break; } return 1; } bool is_numeric() const { switch (m_type) { case cDTInt: case cDTUInt: case cDTFloat: case cDTVec3F: case cDTVec3I: return true; default: break; } return false; } bool is_float() const { switch (m_type) { case cDTFloat: case cDTVec3F: return true; default: break; } return false; } bool is_integer() const { switch (m_type) { case cDTInt: case cDTUInt: case cDTVec3I: return true; default: break; } return false; } bool is_signed() const { switch (m_type) { case cDTInt: case cDTFloat: case cDTVec3F: case cDTVec3I: return true; default: break; } return false; } bool is_string() const { return m_type == cDTString; } int serialize(void* pBuf, uint buf_size, bool little_endian) const { uint buf_left = buf_size; uint8 t = (uint8)m_type; if (!utils::write_obj(t, pBuf, buf_left, little_endian)) return -1; switch (m_type) { case cDTString: { int bytes_written = m_pStr->serialize(pBuf, buf_left, little_endian); if (bytes_written < 0) return -1; pBuf = static_cast(pBuf) + bytes_written; buf_left -= bytes_written; break; } case cDTBool: { if (!utils::write_obj(m_bool, pBuf, buf_left, little_endian)) return -1; break; } case cDTInt: case cDTUInt: case cDTFloat: { if (!utils::write_obj(m_float, pBuf, buf_left, little_endian)) return -1; break; } case cDTVec3F: { for (uint i = 0; i < 3; i++) if (!utils::write_obj((*m_pVec3F)[i], pBuf, buf_left, little_endian)) return -1; break; } case cDTVec3I: { for (uint i = 0; i < 3; i++) if (!utils::write_obj((*m_pVec3I)[i], pBuf, buf_left, little_endian)) return -1; break; } default: break; } return buf_size - buf_left; } int deserialize(const void* pBuf, uint buf_size, bool little_endian) { uint buf_left = buf_size; uint8 t; if (!utils::read_obj(t, pBuf, buf_left, little_endian)) return -1; if (t >= cDTTotal) return -1; m_type = static_cast(t); switch (m_type) { case cDTString: { change_type(cDTString); int bytes_read = m_pStr->deserialize(pBuf, buf_left, little_endian); if (bytes_read < 0) return -1; pBuf = static_cast(pBuf) + bytes_read; buf_left -= bytes_read; break; } case cDTBool: { if (!utils::read_obj(m_bool, pBuf, buf_left, little_endian)) return -1; break; } case cDTInt: case cDTUInt: case cDTFloat: { if (!utils::read_obj(m_float, pBuf, buf_left, little_endian)) return -1; break; } case cDTVec3F: { change_type(cDTVec3F); for (uint i = 0; i < 3; i++) if (!utils::read_obj((*m_pVec3F)[i], pBuf, buf_left, little_endian)) return -1; break; } case cDTVec3I: { change_type(cDTVec3I); for (uint i = 0; i < 3; i++) if (!utils::read_obj((*m_pVec3I)[i], pBuf, buf_left, little_endian)) return -1; break; } default: break; } return buf_size - buf_left; } void swap(value& other) { for (uint i = 0; i < cUnionSize; i++) std::swap(m_union[i], other.m_union[i]); std::swap(m_type, other.m_type); } private: void clear_dynamic() { if (m_type == cDTVec3F) { crnlib_delete(m_pVec3F); m_pVec3F = NULL; m_type = cDTInvalid; } else if (m_type == cDTVec3I) { crnlib_delete(m_pVec3I); m_pVec3I = NULL; m_type = cDTInvalid; } else if (m_type == cDTString) { crnlib_delete(m_pStr); m_pStr = NULL; m_type = cDTInvalid; } } void change_type(value_data_type type) { if (type != m_type) { clear_dynamic(); m_type = type; switch (m_type) { case cDTString: m_pStr = crnlib_new(); break; case cDTVec3F: m_pVec3F = crnlib_new(); break; case cDTVec3I: m_pVec3I = crnlib_new(); break; default: break; } } } void set_str(const dynamic_string& s) { if (m_type == cDTString) m_pStr->set(s); else { clear_dynamic(); m_type = cDTString; m_pStr = crnlib_new(s); } } void set_str(const char* p) { if (m_type == cDTString) m_pStr->set(p); else { clear_dynamic(); m_type = cDTString; m_pStr = crnlib_new(p); } } enum { cUnionSize = 1 }; union { bool m_bool; int m_int; uint m_uint; float m_float; vec3F* m_pVec3F; vec3I* m_pVec3I; dynamic_string* m_pStr; uint m_union[cUnionSize]; }; value_data_type m_type; }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_vec.h000066400000000000000000000426221503722002600210660ustar00rootroot00000000000000// File: crn_vec.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_core.h" #include "crn_rand.h" namespace crnlib { template class vec : public helpers::rel_ops > { public: typedef T scalar_type; enum { num_elements = N }; inline vec() {} inline vec(eClear) { clear(); } inline vec(const vec& other) { for (uint i = 0; i < N; i++) m_s[i] = other.m_s[i]; } template inline vec(const vec& other) { set(other); } template inline vec(const vec& other, T w) { *this = other; m_s[N - 1] = w; } explicit inline vec(T val) { set(val); } inline vec(T val0, T val1) { set(val0, val1); } inline vec(T val0, T val1, T val2) { set(val0, val1, val2); } inline vec(T val0, T val1, T val2, T val3) { set(val0, val1, val2, val3); } inline void clear() { if (N > 4) memset(m_s, 0, sizeof(m_s)); else { for (uint i = 0; i < N; i++) m_s[i] = 0; } } template inline vec& set(const vec& other) { if ((void*)this == (void*)&other) return *this; const uint m = math::minimum(N, ON); uint i; for (i = 0; i < m; i++) m_s[i] = static_cast(other[i]); for (; i < N; i++) m_s[i] = 0; return *this; } inline vec& set_component(uint index, T val) { CRNLIB_ASSERT(index < N); m_s[index] = val; return *this; } inline vec& set(T val) { for (uint i = 0; i < N; i++) m_s[i] = val; return *this; } inline vec& set(T val0, T val1) { m_s[0] = val0; if (N >= 2) { m_s[1] = val1; for (uint i = 2; i < N; i++) m_s[i] = 0; } return *this; } inline vec& set(T val0, T val1, T val2) { m_s[0] = val0; if (N >= 2) { m_s[1] = val1; if (N >= 3) { m_s[2] = val2; for (uint i = 3; i < N; i++) m_s[i] = 0; } } return *this; } inline vec& set(T val0, T val1, T val2, T val3) { m_s[0] = val0; if (N >= 2) { m_s[1] = val1; if (N >= 3) { m_s[2] = val2; if (N >= 4) { m_s[3] = val3; for (uint i = 4; i < N; i++) m_s[i] = 0; } } } return *this; } inline vec& set(const T* pValues) { for (uint i = 0; i < N; i++) m_s[i] = pValues[i]; return *this; } template inline vec& swizzle_set(const vec& other, uint i) { return set(static_cast(other[i])); } template inline vec& swizzle_set(const vec& other, uint i, uint j) { return set(static_cast(other[i]), static_cast(other[j])); } template inline vec& swizzle_set(const vec& other, uint i, uint j, uint k) { return set(static_cast(other[i]), static_cast(other[j]), static_cast(other[k])); } template inline vec& swizzle_set(const vec& other, uint i, uint j, uint k, uint l) { return set(static_cast(other[i]), static_cast(other[j]), static_cast(other[k]), static_cast(other[l])); } inline vec& operator=(const vec& rhs) { if (this != &rhs) { for (uint i = 0; i < N; i++) m_s[i] = rhs.m_s[i]; } return *this; } template inline vec& operator=(const vec& other) { if ((void*)this == (void*)&other) return *this; uint s = math::minimum(N, O); uint i; for (i = 0; i < s; i++) m_s[i] = static_cast(other[i]); for (; i < N; i++) m_s[i] = 0; return *this; } inline bool operator==(const vec& rhs) const { for (uint i = 0; i < N; i++) if (!(m_s[i] == rhs.m_s[i])) return false; return true; } inline bool operator<(const vec& rhs) const { for (uint i = 0; i < N; i++) { if (m_s[i] < rhs.m_s[i]) return true; else if (!(m_s[i] == rhs.m_s[i])) return false; } return false; } inline T operator[](uint i) const { CRNLIB_ASSERT(i < N); return m_s[i]; } inline T& operator[](uint i) { CRNLIB_ASSERT(i < N); return m_s[i]; } inline operator size_t() const { return (size_t)fast_hash(this, sizeof(*this)); } inline T get_x(void) const { return m_s[0]; } inline T get_y(void) const { CRNLIB_ASSUME(N >= 2); return m_s[1]; } inline T get_z(void) const { CRNLIB_ASSUME(N >= 3); return m_s[2]; } inline T get_w(void) const { CRNLIB_ASSUME(N >= 4); return m_s[3]; } inline vec& set_x(T v) { m_s[0] = v; return *this; } inline vec& set_y(T v) { CRNLIB_ASSUME(N >= 2); m_s[1] = v; return *this; } inline vec& set_z(T v) { CRNLIB_ASSUME(N >= 3); m_s[2] = v; return *this; } inline vec& set_w(T v) { CRNLIB_ASSUME(N >= 4); m_s[3] = v; return *this; } inline vec as_point() const { vec result(*this); result[N - 1] = 1; return result; } inline vec as_dir() const { vec result(*this); result[N - 1] = 0; return result; } inline vec<2, T> select2(uint i, uint j) const { CRNLIB_ASSERT((i < N) && (j < N)); return vec<2, T>(m_s[i], m_s[j]); } inline vec<3, T> select3(uint i, uint j, uint k) const { CRNLIB_ASSERT((i < N) && (j < N) && (k < N)); return vec<3, T>(m_s[i], m_s[j], m_s[k]); } inline vec<4, T> select4(uint i, uint j, uint k, uint l) const { CRNLIB_ASSERT((i < N) && (j < N) && (k < N) && (l < N)); return vec<4, T>(m_s[i], m_s[j], m_s[k], m_s[l]); } inline bool is_dir() const { return m_s[N - 1] == 0; } inline bool is_vector() const { return is_dir(); } inline bool is_point() const { return m_s[N - 1] == 1; } inline vec project() const { vec result(*this); if (result[N - 1]) result /= result[N - 1]; return result; } inline vec broadcast(unsigned i) const { return vec((*this)[i]); } inline vec swizzle(uint i, uint j) const { return vec((*this)[i], (*this)[j]); } inline vec swizzle(uint i, uint j, uint k) const { return vec((*this)[i], (*this)[j], (*this)[k]); } inline vec swizzle(uint i, uint j, uint k, uint l) const { return vec((*this)[i], (*this)[j], (*this)[k], (*this)[l]); } inline vec operator-() const { vec result; for (uint i = 0; i < N; i++) result.m_s[i] = -m_s[i]; return result; } inline vec operator+() const { return *this; } inline vec& operator+=(const vec& other) { for (uint i = 0; i < N; i++) m_s[i] += other.m_s[i]; return *this; } inline vec& operator-=(const vec& other) { for (uint i = 0; i < N; i++) m_s[i] -= other.m_s[i]; return *this; } inline vec& operator*=(const vec& other) { for (uint i = 0; i < N; i++) m_s[i] *= other.m_s[i]; return *this; } inline vec& operator/=(const vec& other) { for (uint i = 0; i < N; i++) m_s[i] /= other.m_s[i]; return *this; } inline vec& operator*=(T s) { for (uint i = 0; i < N; i++) m_s[i] *= s; return *this; } inline vec& operator/=(T s) { for (uint i = 0; i < N; i++) m_s[i] /= s; return *this; } friend inline T operator*(const vec& lhs, const vec& rhs) { T result = lhs.m_s[0] * rhs.m_s[0]; for (uint i = 1; i < N; i++) result += lhs.m_s[i] * rhs.m_s[i]; return result; } friend inline vec operator*(const vec& lhs, T val) { vec result; for (uint i = 0; i < N; i++) result.m_s[i] = lhs.m_s[i] * val; return result; } friend inline vec operator*(T val, const vec& lhs) { vec result; for (uint i = 0; i < N; i++) result.m_s[i] = lhs.m_s[i] * val; return result; } friend inline vec operator/(const vec& lhs, const vec& rhs) { vec result; for (uint i = 0; i < N; i++) result.m_s[i] = lhs.m_s[i] / rhs.m_s[i]; return result; } friend inline vec operator/(const vec& lhs, T val) { vec result; for (uint i = 0; i < N; i++) result.m_s[i] = lhs.m_s[i] / val; return result; } friend inline vec operator+(const vec& lhs, const vec& rhs) { vec result; for (uint i = 0; i < N; i++) result.m_s[i] = lhs.m_s[i] + rhs.m_s[i]; return result; } friend inline vec operator-(const vec& lhs, const vec& rhs) { vec result; for (uint i = 0; i < N; i++) result.m_s[i] = lhs.m_s[i] - rhs.m_s[i]; return result; } static inline vec<3, T> cross2(const vec& a, const vec& b) { CRNLIB_ASSUME(N >= 2); return vec<3, T>(0, 0, a[0] * b[1] - a[1] * b[0]); } static inline vec<3, T> cross3(const vec& a, const vec& b) { CRNLIB_ASSUME(N >= 3); return vec<3, T>(a[1] * b[2] - a[2] * b[1], a[2] * b[0] - a[0] * b[2], a[0] * b[1] - a[1] * b[0]); } static inline vec<3, T> cross(const vec& a, const vec& b) { CRNLIB_ASSUME(N >= 2); if (N == 2) return cross2(a, b); else return cross3(a, b); } inline T dot(const vec& rhs) const { return *this * rhs; } inline T dot2(const vec& rhs) const { CRNLIB_ASSUME(N >= 2); return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1]; } inline T dot3(const vec& rhs) const { CRNLIB_ASSUME(N >= 3); return m_s[0] * rhs.m_s[0] + m_s[1] * rhs.m_s[1] + m_s[2] * rhs.m_s[2]; } inline T norm(void) const { T sum = m_s[0] * m_s[0]; for (uint i = 1; i < N; i++) sum += m_s[i] * m_s[i]; return sum; } inline T length(void) const { return sqrt(norm()); } inline T squared_distance(const vec& rhs) const { T dist2 = 0; for (uint i = 0; i < N; i++) { T d = m_s[i] - rhs.m_s[i]; dist2 += d * d; } return dist2; } inline T squared_distance(const vec& rhs, T early_out) const { T dist2 = 0; for (uint i = 0; i < N; i++) { T d = m_s[i] - rhs.m_s[i]; dist2 += d * d; if (dist2 > early_out) break; } return dist2; } inline T distance(const vec& rhs) const { T dist2 = 0; for (uint i = 0; i < N; i++) { T d = m_s[i] - rhs.m_s[i]; dist2 += d * d; } return sqrt(dist2); } inline vec inverse() const { vec result; for (uint i = 0; i < N; i++) result[i] = m_s[i] ? (1.0f / m_s[i]) : 0; return result; } inline double normalize(const vec* pDefaultVec = NULL) { double n = m_s[0] * m_s[0]; for (uint i = 1; i < N; i++) n += m_s[i] * m_s[i]; if (n != 0) *this *= static_cast((1.0f / sqrt(n))); else if (pDefaultVec) *this = *pDefaultVec; return n; } inline double normalize3(const vec* pDefaultVec = NULL) { CRNLIB_ASSUME(N >= 3); double n = m_s[0] * m_s[0] + m_s[1] * m_s[1] + m_s[2] * m_s[2]; if (n != 0) *this *= static_cast((1.0f / sqrt(n))); else if (pDefaultVec) *this = *pDefaultVec; return n; } inline vec& normalize_in_place(const vec* pDefaultVec = NULL) { normalize(pDefaultVec); return *this; } inline vec& normalize3_in_place(const vec* pDefaultVec = NULL) { normalize3(pDefaultVec); return *this; } inline vec get_normalized(const vec* pDefaultVec = NULL) const { vec result(*this); result.normalize(pDefaultVec); return result; } inline vec get_normalized3(const vec* pDefaultVec = NULL) const { vec result(*this); result.normalize3(pDefaultVec); return result; } inline vec& clamp(T l, T h) { for (uint i = 0; i < N; i++) m_s[i] = static_cast(math::clamp(m_s[i], l, h)); return *this; } inline vec& clamp(const vec& l, const vec& h) { for (uint i = 0; i < N; i++) m_s[i] = static_cast(math::clamp(m_s[i], l[i], h[i])); return *this; } inline bool is_within_bounds(const vec& l, const vec& h) const { for (uint i = 0; i < N; i++) if ((m_s[i] < l[i]) || (m_s[i] > h[i])) return false; return true; } inline bool is_within_bounds(T l, T h) const { for (uint i = 0; i < N; i++) if ((m_s[i] < l) || (m_s[i] > h)) return false; return true; } inline uint get_major_axis(void) const { T m = fabs(m_s[0]); uint r = 0; for (uint i = 1; i < N; i++) { const T c = fabs(m_s[i]); if (c > m) { m = c; r = i; } } return r; } inline uint get_minor_axis(void) const { T m = fabs(m_s[0]); uint r = 0; for (uint i = 1; i < N; i++) { const T c = fabs(m_s[i]); if (c < m) { m = c; r = i; } } return r; } inline T get_absolute_minimum(void) const { T result = fabs(m_s[0]); for (uint i = 1; i < N; i++) result = math::minimum(result, fabs(m_s[i])); return result; } inline T get_absolute_maximum(void) const { T result = fabs(m_s[0]); for (uint i = 1; i < N; i++) result = math::maximum(result, fabs(m_s[i])); return result; } inline T get_minimum(void) const { T result = m_s[0]; for (uint i = 1; i < N; i++) result = math::minimum(result, m_s[i]); return result; } inline T get_maximum(void) const { T result = m_s[0]; for (uint i = 1; i < N; i++) result = math::maximum(result, m_s[i]); return result; } inline vec& remove_unit_direction(const vec& dir) { T p = *this * dir; *this -= (p * dir); return *this; } inline bool all_less(const vec& b) const { for (uint i = 0; i < N; i++) if (m_s[i] >= b.m_s[i]) return false; return true; } inline bool all_less_equal(const vec& b) const { for (uint i = 0; i < N; i++) if (m_s[i] > b.m_s[i]) return false; return true; } inline bool all_greater(const vec& b) const { for (uint i = 0; i < N; i++) if (m_s[i] <= b.m_s[i]) return false; return true; } inline bool all_greater_equal(const vec& b) const { for (uint i = 0; i < N; i++) if (m_s[i] < b.m_s[i]) return false; return true; } inline vec get_negate_xyz() const { vec ret; ret[0] = -m_s[0]; if (N >= 2) ret[1] = -m_s[1]; if (N >= 3) ret[2] = -m_s[2]; for (uint i = 3; i < N; i++) ret[i] = m_s[i]; return ret; } inline vec& invert() { for (uint i = 0; i < N; i++) if (m_s[i] != 0.0f) m_s[i] = 1.0f / m_s[i]; return *this; } static inline vec mul_components(const vec& lhs, const vec& rhs) { vec result; for (uint i = 0; i < N; i++) result[i] = lhs.m_s[i] * rhs.m_s[i]; return result; } static inline vec make_axis(uint i) { vec result; result.clear(); result[i] = 1; return result; } static inline vec component_max(const vec& a, const vec& b) { vec ret; for (uint i = 0; i < N; i++) ret.m_s[i] = math::maximum(a.m_s[i], b.m_s[i]); return ret; } static inline vec component_min(const vec& a, const vec& b) { vec ret; for (uint i = 0; i < N; i++) ret.m_s[i] = math::minimum(a.m_s[i], b.m_s[i]); return ret; } static inline vec lerp(const vec& a, const vec& b, float t) { vec ret; for (uint i = 0; i < N; i++) ret.m_s[i] = a.m_s[i] + (b.m_s[i] - a.m_s[i]) * t; return ret; } static inline vec make_random(random& r, float l, float h) { vec result; for (uint i = 0; i < N; i++) result[i] = r.frand(l, h); return result; } static inline vec make_random(fast_random& r, float l, float h) { vec result; for (uint i = 0; i < N; i++) result[i] = r.frand(l, h); return result; } static inline vec make_random(random& r, const vec& l, const vec& h) { vec result; for (uint i = 0; i < N; i++) result[i] = r.frand(l[i], h[i]); return result; } static inline vec make_random(fast_random& r, const vec& l, const vec& h) { vec result; for (uint i = 0; i < N; i++) result[i] = r.frand(l[i], h[i]); return result; } private: T m_s[N]; }; typedef vec<1, double> vec1D; typedef vec<2, double> vec2D; typedef vec<3, double> vec3D; typedef vec<4, double> vec4D; typedef vec<1, float> vec1F; typedef vec<2, float> vec2F; typedef crnlib::vector vec2F_array; typedef vec<3, float> vec3F; typedef crnlib::vector vec3F_array; typedef vec<4, float> vec4F; typedef crnlib::vector vec4F_array; typedef vec<2, int> vec2I; typedef vec<3, int> vec3I; typedef vec<2, int16> vec2I16; typedef vec<3, int16> vec3I16; template struct scalar_type > { enum { cFlag = true }; static inline void construct(vec* p) {} static inline void construct(vec* p, const vec& init) { memcpy(p, &init, sizeof(vec)); } static inline void construct_array(vec*, uint) {} static inline void destruct(vec*) {} static inline void destruct_array(vec*, uint) {} }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_vec_interval.h000066400000000000000000000020601503722002600227620ustar00rootroot00000000000000// File: crn_vec_interval.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_vec.h" namespace crnlib { template class vec_interval { public: enum { N = T::num_elements }; typedef typename T::scalar_type scalar_type; inline vec_interval(const T& v) { m_bounds[0] = v; m_bounds[1] = v; } inline vec_interval(const T& low, const T& high) { m_bounds[0] = low; m_bounds[1] = high; } inline void clear() { m_bounds[0].clear(); m_bounds[1].clear(); } inline const T& operator[](uint i) const { CRNLIB_ASSERT(i < 2); return m_bounds[i]; } inline T& operator[](uint i) { CRNLIB_ASSERT(i < 2); return m_bounds[i]; } private: T m_bounds[2]; }; typedef vec_interval vec_interval1F; typedef vec_interval vec_interval2F; typedef vec_interval vec_interval3F; typedef vec_interval vec_interval4F; typedef vec_interval2F aabb2F; typedef vec_interval3F aabb3F; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_vector.cpp000066400000000000000000000042471503722002600221470ustar00rootroot00000000000000// File: crn_vector.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_vector.h" #include "crn_rand.h" #include "crn_color.h" #include "crn_vec.h" namespace crnlib { bool elemental_vector::increase_capacity(uint min_new_capacity, bool grow_hint, uint element_size, object_mover pMover, bool nofail) { CRNLIB_ASSERT(m_size <= m_capacity); #ifdef CRNLIB_64BIT_POINTERS CRNLIB_ASSERT(min_new_capacity < (0x400000000ULL / element_size)); #else CRNLIB_ASSERT(min_new_capacity < (0x7FFF0000U / element_size)); #endif if (m_capacity >= min_new_capacity) return true; ptr_bits_t new_capacity = min_new_capacity; if ((grow_hint) && (!math::is_power_of_2((uint64)new_capacity))) new_capacity = math::next_pow2((uint64)new_capacity); CRNLIB_ASSERT(new_capacity && (new_capacity > m_capacity)); const size_t desired_size = element_size * new_capacity; size_t actual_size; if (!pMover) { void* new_p = crnlib_realloc(m_p, desired_size, &actual_size, true); if (!new_p) { if (nofail) return false; char buf[256]; #if defined(_WIN32) sprintf_s(buf, sizeof(buf), "vector: crnlib_realloc() failed allocating %u bytes", (uint)desired_size); #else snprintf(buf, sizeof(buf), "vector: crnlib_realloc() failed allocating %u bytes", (uint)desired_size); #endif CRNLIB_FAIL(buf); } m_p = new_p; } else { void* new_p = crnlib_malloc(desired_size, &actual_size); if (!new_p) { if (nofail) return false; char buf[256]; #if defined(_WIN32) sprintf_s(buf, sizeof(buf), "vector: crnlib_malloc() failed allocating %u bytes", (uint)desired_size); #else snprintf(buf, sizeof(buf), "vector: crnlib_malloc() failed allocating %u bytes", (uint)desired_size); #endif CRNLIB_FAIL(buf); } (*pMover)(new_p, m_p, m_size); if (m_p) crnlib_free(m_p); m_p = new_p; } if (actual_size > desired_size) m_capacity = static_cast(actual_size / element_size); else m_capacity = static_cast(new_capacity); return true; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_vector.h000066400000000000000000000410341503722002600216070ustar00rootroot00000000000000// File: crn_vector.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once namespace crnlib { struct elemental_vector { void* m_p; uint m_size; uint m_capacity; typedef void (*object_mover)(void* pDst, void* pSrc, uint num); bool increase_capacity(uint min_new_capacity, bool grow_hint, uint element_size, object_mover pRelocate, bool nofail); }; template class vector : public helpers::rel_ops > { public: typedef T* iterator; typedef const T* const_iterator; typedef T value_type; typedef T& reference; typedef const T& const_reference; typedef T* pointer; typedef const T* const_pointer; inline vector() : m_p(NULL), m_size(0), m_capacity(0) { } inline vector(uint n, const T& init) : m_p(NULL), m_size(0), m_capacity(0) { increase_capacity(n, false); helpers::construct_array(m_p, n, init); m_size = n; } inline vector(const vector& other) : m_p(NULL), m_size(0), m_capacity(0) { increase_capacity(other.m_size, false); m_size = other.m_size; if (CRNLIB_IS_BITWISE_COPYABLE(T)) memcpy(m_p, other.m_p, m_size * sizeof(T)); else { T* pDst = m_p; const T* pSrc = other.m_p; for (uint i = m_size; i > 0; i--) helpers::construct(pDst++, *pSrc++); } } inline explicit vector(uint size) : m_p(NULL), m_size(0), m_capacity(0) { resize(size); } inline ~vector() { if (m_p) { scalar_type::destruct_array(m_p, m_size); crnlib_free(m_p); } } inline vector& operator=(const vector& other) { if (this == &other) return *this; if (m_capacity >= other.m_size) resize(0); else { clear(); increase_capacity(other.m_size, false); } if (CRNLIB_IS_BITWISE_COPYABLE(T)) memcpy(m_p, other.m_p, other.m_size * sizeof(T)); else { T* pDst = m_p; const T* pSrc = other.m_p; for (uint i = other.m_size; i > 0; i--) helpers::construct(pDst++, *pSrc++); } m_size = other.m_size; return *this; } inline const T* begin() const { return m_p; } T* begin() { return m_p; } inline const T* end() const { return m_p + m_size; } T* end() { return m_p + m_size; } inline bool empty() const { return !m_size; } inline uint size() const { return m_size; } inline uint size_in_bytes() const { return m_size * sizeof(T); } inline uint capacity() const { return m_capacity; } // operator[] will assert on out of range indices, but in final builds there is (and will never be) any range checking on this method. inline const T& operator[](uint i) const { CRNLIB_ASSERT(i < m_size); return m_p[i]; } inline T& operator[](uint i) { CRNLIB_ASSERT(i < m_size); return m_p[i]; } // at() always includes range checking, even in final builds, unlike operator []. // The first element is returned if the index is out of range. inline const T& at(uint i) const { CRNLIB_ASSERT(i < m_size); return (i >= m_size) ? m_p[0] : m_p[i]; } inline T& at(uint i) { CRNLIB_ASSERT(i < m_size); return (i >= m_size) ? m_p[0] : m_p[i]; } inline const T& front() const { CRNLIB_ASSERT(m_size); return m_p[0]; } inline T& front() { CRNLIB_ASSERT(m_size); return m_p[0]; } inline const T& back() const { CRNLIB_ASSERT(m_size); return m_p[m_size - 1]; } inline T& back() { CRNLIB_ASSERT(m_size); return m_p[m_size - 1]; } inline const T* get_ptr() const { return m_p; } inline T* get_ptr() { return m_p; } // clear() sets the container to empty, then frees the allocated block. inline void clear() { if (m_p) { scalar_type::destruct_array(m_p, m_size); crnlib_free(m_p); m_p = NULL; m_size = 0; m_capacity = 0; } } inline void clear_no_destruction() { if (m_p) { crnlib_free(m_p); m_p = NULL; m_size = 0; m_capacity = 0; } } inline void reserve(uint new_capacity) { if (new_capacity > m_capacity) increase_capacity(new_capacity, false); else if (new_capacity < m_capacity) { // Must work around the lack of a "decrease_capacity()" method. // This case is rare enough in practice that it's probably not worth implementing an optimized in-place resize. vector tmp; tmp.increase_capacity(math::maximum(m_size, new_capacity), false); tmp = *this; swap(tmp); } } inline bool try_reserve(uint new_capacity) { return increase_capacity(new_capacity, true, true); } // resize(0) sets the container to empty, but does not free the allocated block. inline void resize(uint new_size, bool grow_hint = false) { if (m_size != new_size) { if (new_size < m_size) scalar_type::destruct_array(m_p + new_size, m_size - new_size); else { if (new_size > m_capacity) increase_capacity(new_size, (new_size == (m_size + 1)) || grow_hint); scalar_type::construct_array(m_p + m_size, new_size - m_size); } m_size = new_size; } } inline bool try_resize(uint new_size, bool grow_hint = false) { if (m_size != new_size) { if (new_size < m_size) scalar_type::destruct_array(m_p + new_size, m_size - new_size); else { if (new_size > m_capacity) { if (!increase_capacity(new_size, (new_size == (m_size + 1)) || grow_hint, true)) return false; } scalar_type::construct_array(m_p + m_size, new_size - m_size); } m_size = new_size; } return true; } // If size >= capacity/2, reset() sets the container's size to 0 but doesn't free the allocated block (because the container may be similarly loaded in the future). // Otherwise it blows away the allocated block. See http://www.codercorner.com/blog/?p=494 inline void reset() { if (m_size >= (m_capacity >> 1)) resize(0); else clear(); } inline T* enlarge(uint i) { uint cur_size = m_size; resize(cur_size + i, true); return get_ptr() + cur_size; } inline T* try_enlarge(uint i) { uint cur_size = m_size; if (!try_resize(cur_size + i, true)) return NULL; return get_ptr() + cur_size; } inline void push_back(const T& obj) { CRNLIB_ASSERT(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); if (m_size >= m_capacity) increase_capacity(m_size + 1, true); scalar_type::construct(m_p + m_size, obj); m_size++; } inline bool try_push_back(const T& obj) { CRNLIB_ASSERT(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); if (m_size >= m_capacity) { if (!increase_capacity(m_size + 1, true, true)) return false; } scalar_type::construct(m_p + m_size, obj); m_size++; return true; } inline void push_back_value(T obj) { if (m_size >= m_capacity) increase_capacity(m_size + 1, true); scalar_type::construct(m_p + m_size, obj); m_size++; } inline void pop_back() { CRNLIB_ASSERT(m_size); if (m_size) { m_size--; scalar_type::destruct(&m_p[m_size]); } } inline void insert(uint index, const T* p, uint n) { CRNLIB_ASSERT(index <= m_size); if (!n) return; const uint orig_size = m_size; resize(m_size + n, true); const uint num_to_move = orig_size - index; if (CRNLIB_IS_BITWISE_COPYABLE(T)) { // This overwrites the destination object bits, but bitwise copyable means we don't need to worry about destruction. memmove(m_p + index + n, m_p + index, sizeof(T) * num_to_move); } else { const T* pSrc = m_p + orig_size - 1; T* pDst = const_cast(pSrc) + n; for (uint i = 0; i < num_to_move; i++) { CRNLIB_ASSERT((pDst - m_p) < (int)m_size); *pDst-- = *pSrc--; } } T* pDst = m_p + index; if (CRNLIB_IS_BITWISE_COPYABLE(T)) { // This copies in the new bits, overwriting the existing objects, which is OK for copyable types that don't need destruction. memcpy(pDst, p, sizeof(T) * n); } else { for (uint i = 0; i < n; i++) { CRNLIB_ASSERT((pDst - m_p) < (int)m_size); *pDst++ = *p++; } } } // push_front() isn't going to be very fast - it's only here for usability. inline void push_front(const T& obj) { insert(0, &obj, 1); } vector& append(const vector& other) { if (other.m_size) insert(m_size, &other[0], other.m_size); return *this; } vector& append(const T* p, uint n) { if (n) insert(m_size, p, n); return *this; } inline void erase(uint start, uint n) { CRNLIB_ASSERT((start + n) <= m_size); if ((start + n) > m_size) return; if (!n) return; const uint num_to_move = m_size - (start + n); T* pDst = m_p + start; const T* pSrc = m_p + start + n; if (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(T)) { // This test is overly cautious. if ((!CRNLIB_IS_BITWISE_COPYABLE(T)) || (CRNLIB_HAS_DESTRUCTOR(T))) { // Type has been marked explictly as bitwise movable, which means we can move them around but they may need to be destructed. // First destroy the erased objects. scalar_type::destruct_array(pDst, n); } // Copy "down" the objects to preserve, filling in the empty slots. memmove(pDst, pSrc, num_to_move * sizeof(T)); } else { // Type is not bitwise copyable or movable. // Move them down one at a time by using the equals operator, and destroying anything that's left over at the end. T* pDst_end = pDst + num_to_move; while (pDst != pDst_end) *pDst++ = *pSrc++; scalar_type::destruct_array(pDst_end, n); } m_size -= n; } inline void erase(uint index) { erase(index, 1); } inline void erase(T* p) { CRNLIB_ASSERT((p >= m_p) && (p < (m_p + m_size))); erase(static_cast(p - m_p)); } void erase_unordered(uint index) { CRNLIB_ASSERT(index < m_size); if ((index + 1) < m_size) (*this)[index] = back(); pop_back(); } inline bool operator==(const vector& rhs) const { if (m_size != rhs.m_size) return false; else if (m_size) { if (scalar_type::cFlag) return memcmp(m_p, rhs.m_p, sizeof(T) * m_size) == 0; else { const T* pSrc = m_p; const T* pDst = rhs.m_p; for (uint i = m_size; i; i--) if (!(*pSrc++ == *pDst++)) return false; } } return true; } inline bool operator<(const vector& rhs) const { const uint min_size = math::minimum(m_size, rhs.m_size); const T* pSrc = m_p; const T* pSrc_end = m_p + min_size; const T* pDst = rhs.m_p; while ((pSrc < pSrc_end) && (*pSrc == *pDst)) { pSrc++; pDst++; } if (pSrc < pSrc_end) return *pSrc < *pDst; return m_size < rhs.m_size; } inline void swap(vector& other) { utils::swap(m_p, other.m_p); utils::swap(m_size, other.m_size); utils::swap(m_capacity, other.m_capacity); } inline void sort() { std::sort(begin(), end()); } inline void unique() { if (!empty()) { sort(); resize(std::unique(begin(), end()) - begin()); } } inline void reverse() { uint j = m_size >> 1; for (uint i = 0; i < j; i++) utils::swap(m_p[i], m_p[m_size - 1 - i]); } inline int find(const T& key) const { const T* p = m_p; const T* p_end = m_p + m_size; uint index = 0; while (p != p_end) { if (key == *p) return index; p++; index++; } return cInvalidIndex; } inline int find_sorted(const T& key) const { if (m_size) { // Uniform binary search - Knuth Algorithm 6.2.1 U, unrolled twice. int i = ((m_size + 1) >> 1) - 1; int m = m_size; for (;;) { CRNLIB_ASSERT_OPEN_RANGE(i, 0, (int)m_size); const T* pKey_i = m_p + i; int cmp = key < *pKey_i; if ((!cmp) && (key == *pKey_i)) return i; m >>= 1; if (!m) break; cmp = -cmp; i += (((m + 1) >> 1) ^ cmp) - cmp; CRNLIB_ASSERT_OPEN_RANGE(i, 0, (int)m_size); pKey_i = m_p + i; cmp = key < *pKey_i; if ((!cmp) && (key == *pKey_i)) return i; m >>= 1; if (!m) break; cmp = -cmp; i += (((m + 1) >> 1) ^ cmp) - cmp; } } return cInvalidIndex; } template inline int find_sorted(const T& key, Q less_than) const { if (m_size) { // Uniform binary search - Knuth Algorithm 6.2.1 U, unrolled twice. int i = ((m_size + 1) >> 1) - 1; int m = m_size; for (;;) { CRNLIB_ASSERT_OPEN_RANGE(i, 0, (int)m_size); const T* pKey_i = m_p + i; int cmp = less_than(key, *pKey_i); if ((!cmp) && (!less_than(*pKey_i, key))) return i; m >>= 1; if (!m) break; cmp = -cmp; i += (((m + 1) >> 1) ^ cmp) - cmp; CRNLIB_ASSERT_OPEN_RANGE(i, 0, (int)m_size); pKey_i = m_p + i; cmp = less_than(key, *pKey_i); if ((!cmp) && (!less_than(*pKey_i, key))) return i; m >>= 1; if (!m) break; cmp = -cmp; i += (((m + 1) >> 1) ^ cmp) - cmp; } } return cInvalidIndex; } inline uint count_occurences(const T& key) const { uint c = 0; const T* p = m_p; const T* p_end = m_p + m_size; while (p != p_end) { if (key == *p) c++; p++; } return c; } inline void set_all(const T& o) { if ((sizeof(T) == 1) && (scalar_type::cFlag)) memset(m_p, *reinterpret_cast(&o), m_size); else { T* pDst = m_p; T* pDst_end = pDst + m_size; while (pDst != pDst_end) *pDst++ = o; } } // Caller assumes ownership of the heap block associated with the container. Container is cleared. inline void* assume_ownership() { T* p = m_p; m_p = NULL; m_size = 0; m_capacity = 0; return p; } // Caller is granting ownership of the indicated heap block. // Block must have size constructed elements, and have enough room for capacity elements. inline bool grant_ownership(T* p, uint size, uint capacity) { // To to prevent the caller from obviously shooting themselves in the foot. if (((p + capacity) > m_p) && (p < (m_p + m_capacity))) { // Can grant ownership of a block inside the container itself! CRNLIB_ASSERT(0); return false; } if (size > capacity) { CRNLIB_ASSERT(0); return false; } if (!p) { if (capacity) { CRNLIB_ASSERT(0); return false; } } else if (!capacity) { CRNLIB_ASSERT(0); return false; } clear(); m_p = p; m_size = size; m_capacity = capacity; return true; } private: T* m_p; uint m_size; uint m_capacity; template struct is_vector { enum { cFlag = false }; }; template struct is_vector > { enum { cFlag = true }; }; static void object_mover(void* pDst_void, void* pSrc_void, uint num) { T* pSrc = static_cast(pSrc_void); T* const pSrc_end = pSrc + num; T* pDst = static_cast(pDst_void); while (pSrc != pSrc_end) { // placement new new (static_cast(pDst)) T(*pSrc); pSrc->~T(); ++pSrc; ++pDst; } } inline bool increase_capacity(uint min_new_capacity, bool grow_hint, bool nofail = false) { return reinterpret_cast(this)->increase_capacity( min_new_capacity, grow_hint, sizeof(T), (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(T) || (is_vector::cFlag)) ? NULL : object_mover, nofail); } }; typedef crnlib::vector uint8_vec; template struct bitwise_movable > { enum { cFlag = true }; }; extern void vector_test(); template inline void swap(vector& a, vector& b) { a.swap(b); } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crnlib/crn_vector2d.h000066400000000000000000000065701503722002600220430ustar00rootroot00000000000000// File: crn_vector2d.h #pragma once namespace crnlib { template class vector2D { public: typedef crnlib::vector vector_type; typedef T value_type; typedef T& reference; typedef const T& const_reference; typedef T* pointer; typedef const T* const_pointer; inline vector2D(uint width = 0, uint height = 0, const T& def = T()) : m_width(width), m_height(height), m_vec(width * height), m_def(def) { } inline vector2D(const vector2D& other) : m_width(other.m_width), m_height(other.m_height), m_vec(other.m_vec), m_def(other.m_def) { } inline vector2D& operator=(const vector2D& rhs) { if (this == &rhs) return *this; m_width = rhs.m_width; m_height = rhs.m_height; m_vec = rhs.m_vec; return *this; } bool try_resize(uint width, uint height, bool preserve = true) { if ((width == m_width) && (height == m_height)) return true; vector_type new_vec; if (!new_vec.try_resize(width * height)) return false; if (preserve) { const uint nx = math::minimum(width, m_width); const uint ny = math::minimum(height, m_height); for (uint y = 0; y < ny; y++) { const T* pSrc = &m_vec[y * m_width]; T* pDst = &new_vec[y * width]; if (CRNLIB_IS_BITWISE_COPYABLE_OR_MOVABLE(T)) memcpy(pDst, pSrc, nx * sizeof(T)); else { for (uint x = 0; x < nx; x++) *pDst++ = *pSrc++; } } } m_width = width; m_height = height; m_vec.swap(new_vec); return true; } void resize(uint width, uint height, bool preserve = true) { if (!try_resize(width, height, preserve)) { CRNLIB_FAIL("vector2D::resize: Out of memory"); } } inline void clear() { m_vec.clear(); m_width = 0; m_height = 0; } inline uint width() const { return m_width; } inline uint height() const { return m_height; } inline uint size() const { return m_vec.size(); } inline uint size_in_bytes() const { return m_vec.size() * sizeof(T); } const vector_type& get_vec() const { return m_vec; } vector_type& get_vec() { return m_vec; } inline const T* get_ptr() const { return m_vec.get_ptr(); } inline T* get_ptr() { return m_vec.get_ptr(); } inline const T& operator[](uint i) const { return m_vec[i]; } inline T& operator[](uint i) { return m_vec[i]; } inline const T& operator()(uint x, uint y) const { CRNLIB_ASSERT((x < m_width) && (y < m_height)); return m_vec[x + y * m_width]; } inline T& operator()(uint x, uint y) { CRNLIB_ASSERT((x < m_width) && (y < m_height)); return m_vec[x + y * m_width]; } inline const T& at(uint x, uint y) const { if ((x >= m_width) || (y >= m_height)) return m_def; return m_vec[x + y * m_width]; } inline T& at(uint x, uint y) { if ((x >= m_width) || (y >= m_height)) return m_def; return m_vec[x + y * m_width]; } inline void swap(vector2D& other) { m_vec.swap(other.m_vec); anvil::swap(m_width, other.m_width); anvil::swap(m_height, other.m_height); } inline void set_all(const T& x) { m_vec.set_all(x); } private: vector_type m_vec; uint m_width; uint m_height; T m_def; }; } // namespace anvil DaemonEngine-crunch-ef4d32f/crnlib/crn_winhdr.h000066400000000000000000000004111503722002600215720ustar00rootroot00000000000000#pragma once #ifndef WIN32 #error Should not get here #endif #ifndef _WIN32_WINNT #define _WIN32_WINNT 0x500 #endif #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #ifndef NOMINMAX #define NOMINMAX #endif #include "windows.h" DaemonEngine-crunch-ef4d32f/crnlib/crnlib.cpp000066400000000000000000000324021503722002600212460ustar00rootroot00000000000000// File: crnlib.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "../inc/crnlib.h" #include "crn_comp.h" #include "crn_dds_comp.h" #include "crn_dynamic_stream.h" #include "crn_buffer_stream.h" #include "crn_ryg_dxt.hpp" #include "crn_etc.h" #include "../inc/crn_defs.h" #include "crn_rg_etc1.h" namespace crnlib { static void* realloc_func(void* p, size_t size, size_t* pActual_size, bool movable, void*) { return crnlib_realloc(p, size, pActual_size, movable); } static size_t msize_func(void* p, void*) { return crnlib_msize(p); } class crnlib_global_initializer { public: crnlib_global_initializer() { crn_threading_init(); crnlib_enable_fail_exceptions(true); // Redirect crn_decomp.h's memory allocations into crnlib, which may be further redirected by the outside caller. crnd::crnd_set_memory_callbacks(realloc_func, msize_func, NULL); ryg_dxt::sInitDXT(); pack_etc1_block_init(); rg_etc1::pack_etc1_block_init(); } }; crnlib_global_initializer g_crnlib_initializer; } // namespace crnlib using namespace crnlib; const char* crn_get_format_string(crn_format fmt) { return pixel_format_helpers::get_crn_format_string(fmt); } crn_uint32 crn_get_format_fourcc(crn_format fmt) { return crnd::crnd_crn_format_to_fourcc(fmt); } crn_uint32 crn_get_format_bits_per_texel(crn_format fmt) { return crnd::crnd_get_crn_format_bits_per_texel(fmt); } crn_uint32 crn_get_bytes_per_dxt_block(crn_format fmt) { return crnd::crnd_get_bytes_per_dxt_block(fmt); } crn_format crn_get_fundamental_dxt_format(crn_format fmt) { return crnd::crnd_get_fundamental_dxt_format(fmt); } const char* crn_get_file_type_ext(crn_file_type file_type) { switch (file_type) { case cCRNFileTypeDDS: return "dds"; case cCRNFileTypeCRN: return "crn"; default: break; } return "?"; } const char* crn_get_mip_mode_desc(crn_mip_mode m) { switch (m) { case cCRNMipModeUseSourceOrGenerateMips: return "Use source/generate if none"; case cCRNMipModeUseSourceMips: return "Only use source MIP maps (if any)"; case cCRNMipModeGenerateMips: return "Always generate new MIP maps"; case cCRNMipModeNoMips: return "No MIP maps"; default: break; } return "?"; } const char* crn_get_mip_mode_name(crn_mip_mode m) { switch (m) { case cCRNMipModeUseSourceOrGenerateMips: return "UseSourceOrGenerate"; case cCRNMipModeUseSourceMips: return "UseSource"; case cCRNMipModeGenerateMips: return "Generate"; case cCRNMipModeNoMips: return "None"; default: break; } return "?"; } const char* crn_get_mip_filter_name(crn_mip_filter f) { switch (f) { case cCRNMipFilterBox: return "box"; case cCRNMipFilterTent: return "tent"; case cCRNMipFilterLanczos4: return "lanczos4"; case cCRNMipFilterMitchell: return "mitchell"; case cCRNMipFilterKaiser: return "kaiser"; default: break; } return "?"; } const char* crn_get_scale_mode_desc(crn_scale_mode sm) { switch (sm) { case cCRNSMDisabled: return "disabled"; case cCRNSMAbsolute: return "absolute"; case cCRNSMRelative: return "relative"; case cCRNSMLowerPow2: return "lowerpow2"; case cCRNSMNearestPow2: return "nearestpow2"; case cCRNSMNextPow2: return "nextpow2"; default: break; } return "?"; } const char* crn_get_dxt_quality_string(crn_dxt_quality q) { switch (q) { case cCRNDXTQualitySuperFast: return "SuperFast"; case cCRNDXTQualityFast: return "Fast"; case cCRNDXTQualityNormal: return "Normal"; case cCRNDXTQualityBetter: return "Better"; case cCRNDXTQualityUber: return "Uber"; default: break; } CRNLIB_ASSERT(false); return "?"; } void crn_free_block(void* pBlock) { crnlib_free(pBlock); } void* crn_compress(const crn_comp_params& comp_params, crn_uint32& compressed_size, crn_uint32* pActual_quality_level, float* pActual_bitrate) { compressed_size = 0; if (pActual_quality_level) *pActual_quality_level = 0; if (pActual_bitrate) *pActual_bitrate = 0.0f; if (!comp_params.check()) return NULL; crnlib::vector crn_file_data; if (!create_compressed_texture(comp_params, crn_file_data, pActual_quality_level, pActual_bitrate)) return NULL; compressed_size = crn_file_data.size(); return crn_file_data.assume_ownership(); } void* crn_compress(const crn_comp_params& comp_params, const crn_mipmap_params& mip_params, crn_uint32& compressed_size, crn_uint32* pActual_quality_level, float* pActual_bitrate) { compressed_size = 0; if (pActual_quality_level) *pActual_quality_level = 0; if (pActual_bitrate) *pActual_bitrate = 0.0f; if ((!comp_params.check()) || (!mip_params.check())) return NULL; crnlib::vector crn_file_data; if (!create_compressed_texture(comp_params, mip_params, crn_file_data, pActual_quality_level, pActual_bitrate)) return NULL; compressed_size = crn_file_data.size(); return crn_file_data.assume_ownership(); } void* crn_decompress_crn_to_dds(const void* pCRN_file_data, crn_uint32& file_size) { mipmapped_texture tex; if (!tex.read_crn_from_memory(pCRN_file_data, file_size, "from_memory.crn")) { file_size = 0; return NULL; } file_size = 0; dynamic_stream dds_file_data; dds_file_data.reserve(128 * 1024); data_stream_serializer serializer(dds_file_data); if (!tex.write_dds(serializer)) return NULL; dds_file_data.reserve(0); file_size = static_cast(dds_file_data.get_size()); return dds_file_data.get_buf().assume_ownership(); } bool crn_decompress_dds_to_images(const void* pDDS_file_data, crn_uint32 dds_file_size, crn_uint32** ppImages, crn_texture_desc& tex_desc) { memset(&tex_desc, 0, sizeof(tex_desc)); mipmapped_texture tex; buffer_stream in_stream(pDDS_file_data, dds_file_size); data_stream_serializer in_serializer(in_stream); if (!tex.read_dds(in_serializer)) return false; if (tex.is_packed()) { // TODO: Allow the user to disable uncooking of swizzled DXT5 formats? bool uncook = true; if (!tex.unpack_from_dxt(uncook)) return false; } tex_desc.m_faces = tex.get_num_faces(); tex_desc.m_width = tex.get_width(); tex_desc.m_height = tex.get_height(); tex_desc.m_levels = tex.get_num_levels(); tex_desc.m_fmt_fourcc = (crn_uint32)tex.get_format(); for (uint32 f = 0; f < tex.get_num_faces(); f++) { for (uint32 l = 0; l < tex.get_num_levels(); l++) { mip_level* pLevel = tex.get_level(f, l); image_u8* pImg = pLevel->get_image(); ppImages[l + tex.get_num_levels() * f] = static_cast(pImg->get_pixel_buf().assume_ownership()); } } return true; } void crn_free_all_images(crn_uint32** ppImages, const crn_texture_desc& desc) { for (uint32 f = 0; f < desc.m_faces; f++) for (uint32 l = 0; l < desc.m_levels; l++) crn_free_block(ppImages[l + desc.m_levels * f]); } // Simple low-level DXTn 4x4 block compressor API. // Basically just a basic wrapper over the crnlib::dxt_image class. namespace crnlib { class crn_block_compressor { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(crn_block_compressor); public: crn_block_compressor() { } bool init(const crn_comp_params& params) { m_comp_params = params; m_pack_params.init(params); crn_format basic_crn_fmt = crnd::crnd_get_fundamental_dxt_format(params.m_format); pixel_format basic_pixel_fmt = pixel_format_helpers::convert_crn_format_to_pixel_format(basic_crn_fmt); if ((params.get_flag(cCRNCompFlagDXT1AForTransparency)) && (basic_pixel_fmt == PIXEL_FMT_DXT1)) basic_pixel_fmt = PIXEL_FMT_DXT1A; if (!m_image.init(pixel_format_helpers::get_dxt_format(basic_pixel_fmt), cDXTBlockSize, cDXTBlockSize, false)) return false; return true; } void compress_block(const crn_uint32* pPixels, void* pDst_block) { if (m_image.is_valid()) { m_image.set_block_pixels(0, 0, reinterpret_cast(pPixels), m_pack_params, m_set_block_pixels_context); memcpy(pDst_block, &m_image.get_element(0, 0, 0), m_image.get_bytes_per_block()); } } private: dxt_image m_image; crn_comp_params m_comp_params; dxt_image::pack_params m_pack_params; dxt_image::set_block_pixels_context m_set_block_pixels_context; }; } crn_block_compressor_context_t crn_create_block_compressor(const crn_comp_params& params) { crn_block_compressor* pComp = crnlib_new(); if (!pComp->init(params)) { crnlib_delete(pComp); return NULL; } return pComp; } void crn_compress_block(crn_block_compressor_context_t pContext, const crn_uint32* pPixels, void* pDst_block) { crn_block_compressor* pComp = static_cast(pContext); pComp->compress_block(pPixels, pDst_block); } void crn_free_block_compressor(crn_block_compressor_context_t pContext) { crnlib_delete(static_cast(pContext)); } bool crn_decompress_block(const void* pSrc_block, crn_uint32* pDst_pixels_u32, crn_format crn_fmt) { color_quad_u8* pDst_pixels = reinterpret_cast(pDst_pixels_u32); switch (crn_get_fundamental_dxt_format(crn_fmt)) { case cCRNFmtETC1: { const etc1_block& block = *reinterpret_cast(pSrc_block); unpack_etc1(block, pDst_pixels, false); break; } case cCRNFmtDXT1: { const dxt1_block* pDXT1_block = reinterpret_cast(pSrc_block); color_quad_u8 colors[cDXT1SelectorValues]; pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { const uint s = pDXT1_block->get_selector(i & 3, i >> 2); pDst_pixels[i] = colors[s]; } break; } case cCRNFmtDXT3: { const dxt3_block* pDXT3_block = reinterpret_cast(pSrc_block); const dxt1_block* pDXT1_block = reinterpret_cast(pSrc_block) + 1; color_quad_u8 colors[cDXT1SelectorValues]; pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { const uint s = pDXT1_block->get_selector(i & 3, i >> 2); const uint a = pDXT3_block->get_alpha(i & 3, i >> 2, true); pDst_pixels[i] = colors[s]; pDst_pixels[i].a = static_cast(a); } break; } case cCRNFmtDXT5: { const dxt5_block* pDXT5_block = reinterpret_cast(pSrc_block); const dxt1_block* pDXT1_block = reinterpret_cast(pSrc_block) + 1; color_quad_u8 colors[cDXT1SelectorValues]; pDXT1_block->get_block_colors(colors, static_cast(pDXT1_block->get_low_color()), static_cast(pDXT1_block->get_high_color())); uint values[cDXT5SelectorValues]; dxt5_block::get_block_values(values, pDXT5_block->get_low_alpha(), pDXT5_block->get_high_alpha()); for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { const uint s = pDXT1_block->get_selector(i & 3, i >> 2); const uint a = pDXT5_block->get_selector(i & 3, i >> 2); pDst_pixels[i] = colors[s]; pDst_pixels[i].a = static_cast(values[a]); } } case cCRNFmtDXN_XY: case cCRNFmtDXN_YX: { const dxt5_block* pDXT5_block0 = reinterpret_cast(pSrc_block); const dxt5_block* pDXT5_block1 = reinterpret_cast(pSrc_block) + 1; uint values0[cDXT5SelectorValues]; dxt5_block::get_block_values(values0, pDXT5_block0->get_low_alpha(), pDXT5_block0->get_high_alpha()); uint values1[cDXT5SelectorValues]; dxt5_block::get_block_values(values1, pDXT5_block1->get_low_alpha(), pDXT5_block1->get_high_alpha()); for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { const uint s0 = pDXT5_block0->get_selector(i & 3, i >> 2); const uint s1 = pDXT5_block1->get_selector(i & 3, i >> 2); if (crn_fmt == cCRNFmtDXN_XY) pDst_pixels[i].set_noclamp_rgba(values0[s0], values1[s1], 255, 255); else pDst_pixels[i].set_noclamp_rgba(values1[s1], values0[s0], 255, 255); } break; } case cCRNFmtDXT5A: { const dxt5_block* pDXT5_block = reinterpret_cast(pSrc_block); uint values[cDXT5SelectorValues]; dxt5_block::get_block_values(values, pDXT5_block->get_low_alpha(), pDXT5_block->get_high_alpha()); for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { const uint s = pDXT5_block->get_selector(i & 3, i >> 2); pDst_pixels[i].set_noclamp_rgba(255, 255, 255, values[s]); } break; } default: { return false; } } return true; } DaemonEngine-crunch-ef4d32f/crnlib/lzham_timer.cpp000066400000000000000000000052171503722002600223140ustar00rootroot00000000000000// File: lzham_timer.cpp // See Copyright Notice and license at the end of include/lzham.h #include "lzham_core.h" #include "lzham_timer.h" #ifndef LZHAM_USE_WIN32_API #include #endif namespace lzham { unsigned long long lzham_timer::g_init_ticks; unsigned long long lzham_timer::g_freq; double lzham_timer::g_inv_freq; #if LZHAM_USE_WIN32_API inline void query_counter(timer_ticks* pTicks) { QueryPerformanceCounter(reinterpret_cast(pTicks)); } inline void query_counter_frequency(timer_ticks* pTicks) { QueryPerformanceFrequency(reinterpret_cast(pTicks)); } #else inline void query_counter(timer_ticks* pTicks) { *pTicks = clock(); } inline void query_counter_frequency(timer_ticks* pTicks) { *pTicks = CLOCKS_PER_SEC; } #endif lzham_timer::lzham_timer() : m_start_time(0), m_stop_time(0), m_started(false), m_stopped(false) { if (!g_inv_freq) init(); } lzham_timer::lzham_timer(timer_ticks start_ticks) { if (!g_inv_freq) init(); m_start_time = start_ticks; m_started = true; m_stopped = false; } void lzham_timer::start(timer_ticks start_ticks) { m_start_time = start_ticks; m_started = true; m_stopped = false; } void lzham_timer::start() { query_counter(&m_start_time); m_started = true; m_stopped = false; } void lzham_timer::stop() { LZHAM_ASSERT(m_started); query_counter(&m_stop_time); m_stopped = true; } double lzham_timer::get_elapsed_secs() const { LZHAM_ASSERT(m_started); if (!m_started) return 0; timer_ticks stop_time = m_stop_time; if (!m_stopped) query_counter(&stop_time); timer_ticks delta = stop_time - m_start_time; return delta * g_inv_freq; } timer_ticks lzham_timer::get_elapsed_us() const { LZHAM_ASSERT(m_started); if (!m_started) return 0; timer_ticks stop_time = m_stop_time; if (!m_stopped) query_counter(&stop_time); timer_ticks delta = stop_time - m_start_time; return (delta * 1000000ULL + (g_freq >> 1U)) / g_freq; } void lzham_timer::init() { if (!g_inv_freq) { query_counter_frequency(&g_freq); g_inv_freq = 1.0f / g_freq; query_counter(&g_init_ticks); } } timer_ticks lzham_timer::get_init_ticks() { if (!g_inv_freq) init(); return g_init_ticks; } timer_ticks lzham_timer::get_ticks() { if (!g_inv_freq) init(); timer_ticks ticks; query_counter(&ticks); return ticks - g_init_ticks; } double lzham_timer::ticks_to_secs(timer_ticks ticks) { if (!g_inv_freq) init(); return ticks * g_inv_freq; } } // namespace lzhamDaemonEngine-crunch-ef4d32f/crnlib/lzham_timer.h000066400000000000000000000053161503722002600217610ustar00rootroot00000000000000// File: lzham_timer.h // See Copyright Notice and license at the end of include/lzham.h #pragma once namespace lzham { typedef unsigned long long timer_ticks; class lzham_timer { public: lzham_timer(); lzham_timer(timer_ticks start_ticks); void start(); void start(timer_ticks start_ticks); void stop(); double get_elapsed_secs() const; inline double get_elapsed_ms() const { return get_elapsed_secs() * 1000.0f; } timer_ticks get_elapsed_us() const; static void init(); static inline timer_ticks get_ticks_per_sec() { return g_freq; } static timer_ticks get_init_ticks(); static timer_ticks get_ticks(); static double ticks_to_secs(timer_ticks ticks); static inline double ticks_to_ms(timer_ticks ticks) { return ticks_to_secs(ticks) * 1000.0f; } static inline double get_secs() { return ticks_to_secs(get_ticks()); } static inline double get_ms() { return ticks_to_ms(get_ticks()); } private: static timer_ticks g_init_ticks; static timer_ticks g_freq; static double g_inv_freq; timer_ticks m_start_time; timer_ticks m_stop_time; bool m_started : 1; bool m_stopped : 1; }; enum var_args_t { cVarArgs }; #if LZHAM_PERF_SECTIONS class scoped_perf_section { public: inline scoped_perf_section() : m_start_ticks(lzham_timer::get_ticks()) { m_name[0] = '?'; m_name[1] = '\0'; } inline scoped_perf_section(const char* pName) : m_start_ticks(lzham_timer::get_ticks()) { strcpy_s(m_name, pName); lzham_buffered_printf("Thread: 0x%08X, BEGIN Time: %3.3fms, Section: %s\n", GetCurrentThreadId(), lzham_timer::ticks_to_ms(m_start_ticks), m_name); } inline scoped_perf_section(var_args_t, const char* pName, ...) : m_start_ticks(lzham_timer::get_ticks()) { va_list args; va_start(args, pName); crnlib_vsnprintf(m_name, sizeof(m_name), pName, args); va_end(args); lzham_buffered_printf("Thread: 0x%08X, BEGIN Time: %3.3fms, Section: %s\n", GetCurrentThreadId(), lzham_timer::ticks_to_ms(m_start_ticks), m_name); } inline ~scoped_perf_section() { double end_ms = lzham_timer::get_ms(); double start_ms = lzham_timer::ticks_to_ms(m_start_ticks); lzham_buffered_printf("Thread: 0x%08X, END Time: %3.3fms, Total: %3.3fms, Section: %s\n", GetCurrentThreadId(), end_ms, end_ms - start_ms, m_name); } private: char m_name[64]; timer_ticks m_start_ticks; }; #else class scoped_perf_section { public: inline scoped_perf_section() {} inline scoped_perf_section(const char* pName) { (void)pName; } inline scoped_perf_section(var_args_t, const char* pName, ...) { (void)pName; } }; #endif // LZHAM_PERF_SECTIONS } // namespace lzham DaemonEngine-crunch-ef4d32f/crnlib/lzham_win32_threading.cpp000066400000000000000000000102321503722002600241540ustar00rootroot00000000000000// File: lzham_task_pool_win32.cpp // See Copyright Notice and license at the end of include/lzham.h #include "lzham_core.h" #include "lzham_win32_threading.h" #include "lzham_timer.h" #include #if LZHAM_USE_WIN32_API namespace lzham { task_pool::task_pool() : m_num_threads(0), m_tasks_available(0, 32767), m_num_outstanding_tasks(0), m_exit_flag(false) { utils::zero_object(m_threads); } task_pool::task_pool(uint num_threads) : m_num_threads(0), m_tasks_available(0, 32767), m_num_outstanding_tasks(0), m_exit_flag(false) { utils::zero_object(m_threads); bool status = init(num_threads); LZHAM_VERIFY(status); } task_pool::~task_pool() { deinit(); } bool task_pool::init(uint num_threads) { LZHAM_ASSERT(num_threads <= cMaxThreads); num_threads = math::minimum(num_threads, cMaxThreads); deinit(); bool succeeded = true; m_num_threads = 0; while (m_num_threads < num_threads) { m_threads[m_num_threads] = (HANDLE)_beginthreadex(NULL, 32768, thread_func, this, 0, NULL); LZHAM_ASSERT(m_threads[m_num_threads] != 0); if (!m_threads[m_num_threads]) { succeeded = false; break; } m_num_threads++; } if (!succeeded) { deinit(); return false; } return true; } void task_pool::deinit() { if (m_num_threads) { join(); atomic_exchange32(&m_exit_flag, true); m_tasks_available.release(m_num_threads); for (uint i = 0; i < m_num_threads; i++) { if (m_threads[i]) { for (;;) { DWORD result = WaitForSingleObject(m_threads[i], 30000); if ((result == WAIT_OBJECT_0) || (result == WAIT_ABANDONED)) break; } CloseHandle(m_threads[i]); m_threads[i] = NULL; } } m_num_threads = 0; atomic_exchange32(&m_exit_flag, false); } m_task_stack.clear(); m_num_outstanding_tasks = 0; } bool task_pool::queue_task(task_callback_func pFunc, uint64 data, void* pData_ptr) { LZHAM_ASSERT(m_num_threads); LZHAM_ASSERT(pFunc); task tsk; tsk.m_callback = pFunc; tsk.m_data = data; tsk.m_pData_ptr = pData_ptr; tsk.m_flags = 0; if (!m_task_stack.try_push(tsk)) return false; atomic_increment32(&m_num_outstanding_tasks); m_tasks_available.release(1); return true; } // It's the object's responsibility to delete pObj within the execute_task() method, if needed! bool task_pool::queue_task(executable_task* pObj, uint64 data, void* pData_ptr) { LZHAM_ASSERT(m_num_threads); LZHAM_ASSERT(pObj); task tsk; tsk.m_pObj = pObj; tsk.m_data = data; tsk.m_pData_ptr = pData_ptr; tsk.m_flags = cTaskFlagObject; if (!m_task_stack.try_push(tsk)) return false; atomic_increment32(&m_num_outstanding_tasks); m_tasks_available.release(1); return true; } void task_pool::process_task(task& tsk) { if (tsk.m_flags & cTaskFlagObject) tsk.m_pObj->execute_task(tsk.m_data, tsk.m_pData_ptr); else tsk.m_callback(tsk.m_data, tsk.m_pData_ptr); atomic_decrement32(&m_num_outstanding_tasks); } void task_pool::join() { while (atomic_add32(&m_num_outstanding_tasks, 0) > 0) { task tsk; if (m_task_stack.pop(tsk)) { process_task(tsk); } else { lzham_sleep(1); } } } unsigned __stdcall task_pool::thread_func(void* pContext) { task_pool* pPool = static_cast(pContext); for (;;) { if (!pPool->m_tasks_available.wait()) break; if (pPool->m_exit_flag) break; task tsk; if (pPool->m_task_stack.pop(tsk)) { pPool->process_task(tsk); } } _endthreadex(0); return 0; } static uint g_num_processors; uint lzham_get_max_helper_threads() { if (!g_num_processors) { SYSTEM_INFO system_info; GetSystemInfo(&system_info); g_num_processors = system_info.dwNumberOfProcessors; } if (g_num_processors > 1) { // use all CPU's return LZHAM_MIN(task_pool::cMaxThreads, g_num_processors - 1); } return 0; } } // namespace lzham #endif // LZHAM_USE_WIN32_API DaemonEngine-crunch-ef4d32f/crnlib/lzham_win32_threading.h000066400000000000000000000172051503722002600236300ustar00rootroot00000000000000// File: lzham_task_pool_win32.h // See Copyright Notice and license at the end of include/lzham.h #pragma once #if LZHAM_USE_WIN32_API #if LZHAM_NO_ATOMICS #error No atomic operations defined in lzham_platform.h! #endif namespace lzham { class semaphore { LZHAM_NO_COPY_OR_ASSIGNMENT_OP(semaphore); public: semaphore(long initialCount = 0, long maximumCount = 1, const char* pName = NULL) { m_handle = CreateSemaphoreA(NULL, initialCount, maximumCount, pName); if (NULL == m_handle) { LZHAM_FAIL("semaphore: CreateSemaphore() failed"); } } ~semaphore() { if (m_handle) { CloseHandle(m_handle); m_handle = NULL; } } inline HANDLE get_handle(void) const { return m_handle; } void release(long releaseCount = 1) { if (0 == ReleaseSemaphore(m_handle, releaseCount, NULL)) { LZHAM_FAIL("semaphore: ReleaseSemaphore() failed"); } } bool wait(uint32 milliseconds = UINT32_MAX) { LZHAM_ASSUME(INFINITE == UINT32_MAX); DWORD result = WaitForSingleObject(m_handle, milliseconds); if (WAIT_FAILED == result) { LZHAM_FAIL("semaphore: WaitForSingleObject() failed"); } return WAIT_OBJECT_0 == result; } private: HANDLE m_handle; }; template class tsstack { public: inline tsstack(bool use_freelist = true) : m_use_freelist(use_freelist) { LZHAM_VERIFY(((ptr_bits_t)this & (LZHAM_GET_ALIGNMENT(tsstack) - 1)) == 0); InitializeSListHead(&m_stack_head); InitializeSListHead(&m_freelist_head); } inline ~tsstack() { clear(); } inline void clear() { for (;;) { node* pNode = (node*)InterlockedPopEntrySList(&m_stack_head); if (!pNode) break; LZHAM_MEMORY_IMPORT_BARRIER helpers::destruct(&pNode->m_obj); lzham_free(pNode); } flush_freelist(); } inline void flush_freelist() { if (!m_use_freelist) return; for (;;) { node* pNode = (node*)InterlockedPopEntrySList(&m_freelist_head); if (!pNode) break; LZHAM_MEMORY_IMPORT_BARRIER lzham_free(pNode); } } inline bool try_push(const T& obj) { node* pNode = alloc_node(); if (!pNode) return false; helpers::construct(&pNode->m_obj, obj); LZHAM_MEMORY_EXPORT_BARRIER InterlockedPushEntrySList(&m_stack_head, &pNode->m_slist_entry); return true; } inline bool pop(T& obj) { node* pNode = (node*)InterlockedPopEntrySList(&m_stack_head); if (!pNode) return false; LZHAM_MEMORY_IMPORT_BARRIER obj = pNode->m_obj; helpers::destruct(&pNode->m_obj); free_node(pNode); return true; } private: SLIST_HEADER m_stack_head; SLIST_HEADER m_freelist_head; struct node { SLIST_ENTRY m_slist_entry; T m_obj; }; bool m_use_freelist; inline node* alloc_node() { node* pNode = m_use_freelist ? (node*)InterlockedPopEntrySList(&m_freelist_head) : NULL; if (!pNode) pNode = (node*)lzham_malloc(sizeof(node)); return pNode; } inline void free_node(node* pNode) { if (m_use_freelist) InterlockedPushEntrySList(&m_freelist_head, &pNode->m_slist_entry); else lzham_free(pNode); } }; class task_pool { public: task_pool(); task_pool(uint num_threads); ~task_pool(); enum { cMaxThreads = 16 }; bool init(uint num_threads); void deinit(); inline uint get_num_threads() const { return m_num_threads; } inline uint get_num_outstanding_tasks() const { return m_num_outstanding_tasks; } // C-style task callback typedef void (*task_callback_func)(uint64 data, void* pData_ptr); bool queue_task(task_callback_func pFunc, uint64 data = 0, void* pData_ptr = NULL); class executable_task { public: virtual void execute_task(uint64 data, void* pData_ptr) = 0; }; // It's the caller's responsibility to delete pObj within the execute_task() method, if needed! bool queue_task(executable_task* pObj, uint64 data = 0, void* pData_ptr = NULL); template inline bool queue_object_task(S* pObject, T pObject_method, uint64 data = 0, void* pData_ptr = NULL); template inline bool queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr = NULL); void join(); private: struct task { //inline task() : m_data(0), m_pData_ptr(NULL), m_pObj(NULL), m_flags(0) { } uint64 m_data; void* m_pData_ptr; union { task_callback_func m_callback; executable_task* m_pObj; }; uint m_flags; }; tsstack m_task_stack; uint m_num_threads; HANDLE m_threads[cMaxThreads]; semaphore m_tasks_available; enum task_flags { cTaskFlagObject = 1 }; volatile atomic32_t m_num_outstanding_tasks; volatile atomic32_t m_exit_flag; void process_task(task& tsk); static unsigned __stdcall thread_func(void* pContext); }; enum object_task_flags { cObjectTaskFlagDefault = 0, cObjectTaskFlagDeleteAfterExecution = 1 }; template class object_task : public task_pool::executable_task { public: object_task(uint flags = cObjectTaskFlagDefault) : m_pObject(NULL), m_pMethod(NULL), m_flags(flags) { } typedef void (T::*object_method_ptr)(uint64 data, void* pData_ptr); object_task(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault) : m_pObject(pObject), m_pMethod(pMethod), m_flags(flags) { LZHAM_ASSERT(pObject && pMethod); } void init(T* pObject, object_method_ptr pMethod, uint flags = cObjectTaskFlagDefault) { LZHAM_ASSERT(pObject && pMethod); m_pObject = pObject; m_pMethod = pMethod; m_flags = flags; } T* get_object() const { return m_pObject; } object_method_ptr get_method() const { return m_pMethod; } virtual void execute_task(uint64 data, void* pData_ptr) { (m_pObject->*m_pMethod)(data, pData_ptr); if (m_flags & cObjectTaskFlagDeleteAfterExecution) lzham_delete(this); } protected: T* m_pObject; object_method_ptr m_pMethod; uint m_flags; }; template inline bool task_pool::queue_object_task(S* pObject, T pObject_method, uint64 data, void* pData_ptr) { object_task* pTask = lzham_new >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); if (!pTask) return false; return queue_task(pTask, data, pData_ptr); } template inline bool task_pool::queue_multiple_object_tasks(S* pObject, T pObject_method, uint64 first_data, uint num_tasks, void* pData_ptr) { LZHAM_ASSERT(m_num_threads); LZHAM_ASSERT(pObject); LZHAM_ASSERT(num_tasks); if (!num_tasks) return true; bool status = true; uint i; for (i = 0; i < num_tasks; i++) { task tsk; tsk.m_pObj = lzham_new >(pObject, pObject_method, cObjectTaskFlagDeleteAfterExecution); if (!tsk.m_pObj) { status = false; break; } tsk.m_data = first_data + i; tsk.m_pData_ptr = pData_ptr; tsk.m_flags = cTaskFlagObject; if (!m_task_stack.try_push(tsk)) { status = false; break; } } if (i) { atomic_add32(&m_num_outstanding_tasks, i); m_tasks_available.release(i); } return status; } inline void lzham_sleep(unsigned int milliseconds) { Sleep(milliseconds); } uint lzham_get_max_helper_threads(); } // namespace lzham #endif // LZHAM_USE_WIN32_API DaemonEngine-crunch-ef4d32f/crnlib/lzma_7zBuf.cpp000066400000000000000000000011041503722002600220100ustar00rootroot00000000000000/* 7zBuf.c -- Byte Buffer 2008-03-28 Igor Pavlov Public domain */ #include "crn_core.h" #include "lzma_7zBuf.h" namespace crnlib { void Buf_Init(CBuf* p) { p->data = 0; p->size = 0; } int Buf_Create(CBuf* p, size_t size, ISzAlloc* alloc) { p->size = 0; if (size == 0) { p->data = 0; return 1; } p->data = (Byte*)alloc->Alloc(alloc, size); if (p->data != 0) { p->size = size; return 1; } return 0; } void Buf_Free(CBuf* p, ISzAlloc* alloc) { alloc->Free(alloc, p->data); p->data = 0; p->size = 0; } } DaemonEngine-crunch-ef4d32f/crnlib/lzma_7zBuf.h000066400000000000000000000012011503722002600214530ustar00rootroot00000000000000/* 7zBuf.h -- Byte Buffer 2008-10-04 : Igor Pavlov : Public domain */ #ifndef __7Z_BUF_H #define __7Z_BUF_H #include "lzma_Types.h" namespace crnlib { typedef struct { Byte* data; size_t size; } CBuf; void Buf_Init(CBuf* p); int Buf_Create(CBuf* p, size_t size, ISzAlloc* alloc); void Buf_Free(CBuf* p, ISzAlloc* alloc); typedef struct { Byte* data; size_t size; size_t pos; } CDynBuf; void DynBuf_Construct(CDynBuf* p); void DynBuf_SeekToBeg(CDynBuf* p); int DynBuf_Write(CDynBuf* p, const Byte* buf, size_t size, ISzAlloc* alloc); void DynBuf_Free(CDynBuf* p, ISzAlloc* alloc); } #endif DaemonEngine-crunch-ef4d32f/crnlib/lzma_7zBuf2.cpp000066400000000000000000000016371503722002600221050ustar00rootroot00000000000000/* 7zBuf2.c -- Byte Buffer 2008-10-04 : Igor Pavlov : Public domain */ #include "crn_core.h" #include #include "lzma_7zBuf.h" namespace crnlib { void DynBuf_Construct(CDynBuf* p) { p->data = 0; p->size = 0; p->pos = 0; } void DynBuf_SeekToBeg(CDynBuf* p) { p->pos = 0; } int DynBuf_Write(CDynBuf* p, const Byte* buf, size_t size, ISzAlloc* alloc) { if (size > p->size - p->pos) { size_t newSize = p->pos + size; Byte* data; newSize += newSize / 4; data = (Byte*)alloc->Alloc(alloc, newSize); if (data == 0) return 0; p->size = newSize; memcpy(data, p->data, p->pos); alloc->Free(alloc, p->data); p->data = data; } memcpy(p->data + p->pos, buf, size); p->pos += size; return 1; } void DynBuf_Free(CDynBuf* p, ISzAlloc* alloc) { alloc->Free(alloc, p->data); p->data = 0; p->size = 0; p->pos = 0; } } DaemonEngine-crunch-ef4d32f/crnlib/lzma_7zCrc.cpp000066400000000000000000000013731503722002600220130ustar00rootroot00000000000000/* 7zCrc.c -- CRC32 calculation 2008-08-05 Igor Pavlov Public domain */ #include "crn_core.h" #include "lzma_7zCrc.h" namespace crnlib { #define kCrcPoly 0xEDB88320 UInt32 g_CrcTable[256]; void MY_FAST_CALL CrcGenerateTable(void) { UInt32 i; for (i = 0; i < 256; i++) { UInt32 r = i; int j; for (j = 0; j < 8; j++) r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1)); g_CrcTable[i] = r; } } UInt32 MY_FAST_CALL CrcUpdate(UInt32 v, const void* data, size_t size) { const Byte* p = (const Byte*)data; for (; size > 0; size--, p++) v = CRC_UPDATE_BYTE(v, *p); return v; } UInt32 MY_FAST_CALL CrcCalc(const void* data, size_t size) { return CrcUpdate(CRC_INIT_VAL, data, size) ^ 0xFFFFFFFF; } } DaemonEngine-crunch-ef4d32f/crnlib/lzma_7zCrc.h000066400000000000000000000011111503722002600214460ustar00rootroot00000000000000/* 7zCrc.h -- CRC32 calculation 2008-03-13 Igor Pavlov Public domain */ #ifndef __7Z_CRC_H #define __7Z_CRC_H #include #include "lzma_Types.h" namespace crnlib { extern UInt32 g_CrcTable[]; void MY_FAST_CALL CrcGenerateTable(void); #define CRC_INIT_VAL 0xFFFFFFFF #define CRC_GET_DIGEST(crc) ((crc) ^ 0xFFFFFFFF) #define CRC_UPDATE_BYTE(crc, b) (g_CrcTable[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8)) UInt32 MY_FAST_CALL CrcUpdate(UInt32 crc, const void* data, size_t size); UInt32 MY_FAST_CALL CrcCalc(const void* data, size_t size); } #endif DaemonEngine-crunch-ef4d32f/crnlib/lzma_7zFile.cpp000066400000000000000000000143541503722002600221660ustar00rootroot00000000000000/* 7zFile.c -- File IO 2008-11-22 : Igor Pavlov : Public domain */ #include "crn_core.h" #include "lzma_7zFile.h" #ifndef USE_WINDOWS_FILE #include #endif #ifdef USE_WINDOWS_FILE /* ReadFile and WriteFile functions in Windows have BUG: If you Read or Write 64MB or more (probably min_failure_size = 64MB - 32KB + 1) from/to Network file, it returns ERROR_NO_SYSTEM_RESOURCES (Insufficient system resources exist to complete the requested service). Probably in some version of Windows there are problems with other sizes: for 32 MB (maybe also for 16 MB). And message can be "Network connection was lost" */ #define kChunkSizeMax (1 << 22) #endif namespace crnlib { void File_Construct(CSzFile* p) { #ifdef USE_WINDOWS_FILE p->handle = INVALID_HANDLE_VALUE; #else p->file = NULL; #endif } static WRes File_Open(CSzFile* p, const char* name, int writeMode) { #ifdef USE_WINDOWS_FILE p->handle = CreateFileA(name, writeMode ? GENERIC_WRITE : GENERIC_READ, FILE_SHARE_READ, NULL, writeMode ? CREATE_ALWAYS : OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); return (p->handle != INVALID_HANDLE_VALUE) ? 0 : GetLastError(); #else p->file = fopen(name, writeMode ? "wb+" : "rb"); return (p->file != 0) ? 0 : errno; #endif } WRes InFile_Open(CSzFile* p, const char* name) { return File_Open(p, name, 0); } WRes OutFile_Open(CSzFile* p, const char* name) { return File_Open(p, name, 1); } WRes File_Close(CSzFile* p) { #ifdef USE_WINDOWS_FILE if (p->handle != INVALID_HANDLE_VALUE) { if (!CloseHandle(p->handle)) return GetLastError(); p->handle = INVALID_HANDLE_VALUE; } #else if (p->file != NULL) { int res = fclose(p->file); if (res != 0) return res; p->file = NULL; } #endif return 0; } WRes File_Read(CSzFile* p, void* data, size_t* size) { size_t originalSize = *size; if (originalSize == 0) return 0; #ifdef USE_WINDOWS_FILE *size = 0; do { DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; DWORD processed = 0; BOOL res = ReadFile(p->handle, data, curSize, &processed, NULL); data = (void*)((Byte*)data + processed); originalSize -= processed; *size += processed; if (!res) return GetLastError(); if (processed == 0) break; } while (originalSize > 0); return 0; #else *size = fread(data, 1, originalSize, p->file); if (*size == originalSize) return 0; return ferror(p->file); #endif } WRes File_Write(CSzFile* p, const void* data, size_t* size) { size_t originalSize = *size; if (originalSize == 0) return 0; #ifdef USE_WINDOWS_FILE *size = 0; do { DWORD curSize = (originalSize > kChunkSizeMax) ? kChunkSizeMax : (DWORD)originalSize; DWORD processed = 0; BOOL res = WriteFile(p->handle, data, curSize, &processed, NULL); data = (void*)((Byte*)data + processed); originalSize -= processed; *size += processed; if (!res) return GetLastError(); if (processed == 0) break; } while (originalSize > 0); return 0; #else *size = fwrite(data, 1, originalSize, p->file); if (*size == originalSize) return 0; return ferror(p->file); #endif } WRes File_Seek(CSzFile* p, Int64* pos, ESzSeek origin) { #ifdef USE_WINDOWS_FILE LARGE_INTEGER value; DWORD moveMethod; value.LowPart = (DWORD)*pos; value.HighPart = (LONG)((UInt64)*pos >> 16 >> 16); /* for case when UInt64 is 32-bit only */ switch (origin) { case SZ_SEEK_SET: moveMethod = FILE_BEGIN; break; case SZ_SEEK_CUR: moveMethod = FILE_CURRENT; break; case SZ_SEEK_END: moveMethod = FILE_END; break; default: return ERROR_INVALID_PARAMETER; } value.LowPart = SetFilePointer(p->handle, value.LowPart, &value.HighPart, moveMethod); if (value.LowPart == 0xFFFFFFFF) { WRes res = GetLastError(); if (res != NO_ERROR) return res; } *pos = ((Int64)value.HighPart << 32) | value.LowPart; return 0; #else int moveMethod; int res; switch (origin) { case SZ_SEEK_SET: moveMethod = SEEK_SET; break; case SZ_SEEK_CUR: moveMethod = SEEK_CUR; break; case SZ_SEEK_END: moveMethod = SEEK_END; break; default: return 1; } res = fseek(p->file, (long)*pos, moveMethod); *pos = ftell(p->file); return res; #endif } WRes File_GetLength(CSzFile* p, UInt64* length) { #ifdef USE_WINDOWS_FILE DWORD sizeHigh; DWORD sizeLow = GetFileSize(p->handle, &sizeHigh); if (sizeLow == 0xFFFFFFFF) { DWORD res = GetLastError(); if (res != NO_ERROR) return res; } *length = (((UInt64)sizeHigh) << 32) + sizeLow; return 0; #else long pos = ftell(p->file); int res = fseek(p->file, 0, SEEK_END); *length = ftell(p->file); fseek(p->file, pos, SEEK_SET); return res; #endif } /* ---------- FileSeqInStream ---------- */ static SRes FileSeqInStream_Read(void* pp, void* buf, size_t* size) { CFileSeqInStream* p = (CFileSeqInStream*)pp; return File_Read(&p->file, buf, size) == 0 ? SZ_OK : SZ_ERROR_READ; } void FileSeqInStream_CreateVTable(CFileSeqInStream* p) { p->s.Read = FileSeqInStream_Read; } /* ---------- FileInStream ---------- */ static SRes FileInStream_Read(void* pp, void* buf, size_t* size) { CFileInStream* p = (CFileInStream*)pp; return (File_Read(&p->file, buf, size) == 0) ? SZ_OK : SZ_ERROR_READ; } static SRes FileInStream_Seek(void* pp, Int64* pos, ESzSeek origin) { CFileInStream* p = (CFileInStream*)pp; return File_Seek(&p->file, pos, origin); } void FileInStream_CreateVTable(CFileInStream* p) { p->s.Read = FileInStream_Read; p->s.Seek = FileInStream_Seek; } /* ---------- FileOutStream ---------- */ static size_t FileOutStream_Write(void* pp, const void* data, size_t size) { CFileOutStream* p = (CFileOutStream*)pp; File_Write(&p->file, data, &size); return size; } void FileOutStream_CreateVTable(CFileOutStream* p) { p->s.Write = FileOutStream_Write; } } DaemonEngine-crunch-ef4d32f/crnlib/lzma_7zFile.h000066400000000000000000000025251503722002600216300ustar00rootroot00000000000000/* 7zFile.h -- File IO 2008-11-22 : Igor Pavlov : Public domain */ #ifndef __7Z_FILE_H #define __7Z_FILE_H #ifdef _WIN32 #define USE_WINDOWS_FILE #endif #ifdef USE_WINDOWS_FILE #include #else #include #endif #include "lzma_Types.h" namespace crnlib { /* ---------- File ---------- */ typedef struct { #ifdef USE_WINDOWS_FILE HANDLE handle; #else FILE* file; #endif } CSzFile; void File_Construct(CSzFile* p); WRes InFile_Open(CSzFile* p, const char* name); WRes OutFile_Open(CSzFile* p, const char* name); WRes File_Close(CSzFile* p); /* reads max(*size, remain file's size) bytes */ WRes File_Read(CSzFile* p, void* data, size_t* size); /* writes *size bytes */ WRes File_Write(CSzFile* p, const void* data, size_t* size); WRes File_Seek(CSzFile* p, Int64* pos, ESzSeek origin); WRes File_GetLength(CSzFile* p, UInt64* length); /* ---------- FileInStream ---------- */ typedef struct { ISeqInStream s; CSzFile file; } CFileSeqInStream; void FileSeqInStream_CreateVTable(CFileSeqInStream* p); typedef struct { ISeekInStream s; CSzFile file; } CFileInStream; void FileInStream_CreateVTable(CFileInStream* p); typedef struct { ISeqOutStream s; CSzFile file; } CFileOutStream; void FileOutStream_CreateVTable(CFileOutStream* p); } #endif DaemonEngine-crunch-ef4d32f/crnlib/lzma_7zStream.cpp000066400000000000000000000100541503722002600225330ustar00rootroot00000000000000/* 7zStream.c -- 7z Stream functions 2008-11-23 : Igor Pavlov : Public domain */ #include "crn_core.h" #include #include "lzma_Types.h" namespace crnlib { SRes SeqInStream_Read2(ISeqInStream* stream, void* buf, size_t size, SRes errorType) { while (size != 0) { size_t processed = size; RINOK(stream->Read(stream, buf, &processed)); if (processed == 0) return errorType; buf = (void*)((Byte*)buf + processed); size -= processed; } return SZ_OK; } SRes SeqInStream_Read(ISeqInStream* stream, void* buf, size_t size) { return SeqInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); } SRes SeqInStream_ReadByte(ISeqInStream* stream, Byte* buf) { size_t processed = 1; RINOK(stream->Read(stream, buf, &processed)); return (processed == 1) ? SZ_OK : SZ_ERROR_INPUT_EOF; } SRes LookInStream_SeekTo(ILookInStream* stream, UInt64 offset) { Int64 t = offset; return stream->Seek(stream, &t, SZ_SEEK_SET); } SRes LookInStream_LookRead(ILookInStream* stream, void* buf, size_t* size) { void* lookBuf; if (*size == 0) return SZ_OK; RINOK(stream->Look(stream, &lookBuf, size)); memcpy(buf, lookBuf, *size); return stream->Skip(stream, *size); } SRes LookInStream_Read2(ILookInStream* stream, void* buf, size_t size, SRes errorType) { while (size != 0) { size_t processed = size; RINOK(stream->Read(stream, buf, &processed)); if (processed == 0) return errorType; buf = (void*)((Byte*)buf + processed); size -= processed; } return SZ_OK; } SRes LookInStream_Read(ILookInStream* stream, void* buf, size_t size) { return LookInStream_Read2(stream, buf, size, SZ_ERROR_INPUT_EOF); } static SRes LookToRead_Look_Lookahead(void* pp, void** buf, size_t* size) { SRes res = SZ_OK; CLookToRead* p = (CLookToRead*)pp; size_t size2 = p->size - p->pos; if (size2 == 0 && *size > 0) { p->pos = 0; size2 = LookToRead_BUF_SIZE; res = p->realStream->Read(p->realStream, p->buf, &size2); p->size = size2; } if (size2 < *size) *size = size2; *buf = p->buf + p->pos; return res; } static SRes LookToRead_Look_Exact(void* pp, void** buf, size_t* size) { SRes res = SZ_OK; CLookToRead* p = (CLookToRead*)pp; size_t size2 = p->size - p->pos; if (size2 == 0 && *size > 0) { p->pos = 0; if (*size > LookToRead_BUF_SIZE) *size = LookToRead_BUF_SIZE; res = p->realStream->Read(p->realStream, p->buf, size); size2 = p->size = *size; } if (size2 < *size) *size = size2; *buf = p->buf + p->pos; return res; } static SRes LookToRead_Skip(void* pp, size_t offset) { CLookToRead* p = (CLookToRead*)pp; p->pos += offset; return SZ_OK; } static SRes LookToRead_Read(void* pp, void* buf, size_t* size) { CLookToRead* p = (CLookToRead*)pp; size_t rem = p->size - p->pos; if (rem == 0) return p->realStream->Read(p->realStream, buf, size); if (rem > *size) rem = *size; memcpy(buf, p->buf + p->pos, rem); p->pos += rem; *size = rem; return SZ_OK; } static SRes LookToRead_Seek(void* pp, Int64* pos, ESzSeek origin) { CLookToRead* p = (CLookToRead*)pp; p->pos = p->size = 0; return p->realStream->Seek(p->realStream, pos, origin); } void LookToRead_CreateVTable(CLookToRead* p, int lookahead) { p->s.Look = lookahead ? LookToRead_Look_Lookahead : LookToRead_Look_Exact; p->s.Skip = LookToRead_Skip; p->s.Read = LookToRead_Read; p->s.Seek = LookToRead_Seek; } void LookToRead_Init(CLookToRead* p) { p->pos = p->size = 0; } static SRes SecToLook_Read(void* pp, void* buf, size_t* size) { CSecToLook* p = (CSecToLook*)pp; return LookInStream_LookRead(p->realStream, buf, size); } void SecToLook_CreateVTable(CSecToLook* p) { p->s.Read = SecToLook_Read; } static SRes SecToRead_Read(void* pp, void* buf, size_t* size) { CSecToRead* p = (CSecToRead*)pp; return p->realStream->Read(p->realStream, buf, size); } void SecToRead_CreateVTable(CSecToRead* p) { p->s.Read = SecToRead_Read; } } DaemonEngine-crunch-ef4d32f/crnlib/lzma_7zVersion.h000066400000000000000000000004051503722002600223710ustar00rootroot00000000000000#define MY_VER_MAJOR 4 #define MY_VER_MINOR 63 #define MY_VER_BUILD 0 #define MY_VERSION "4.63" #define MY_DATE "2008-12-31" #define MY_COPYRIGHT ": Igor Pavlov : Public domain" #define MY_VERSION_COPYRIGHT_DATE MY_VERSION " " MY_COPYRIGHT " : " MY_DATE DaemonEngine-crunch-ef4d32f/crnlib/lzma_Alloc.cpp000066400000000000000000000054731503722002600220620ustar00rootroot00000000000000/* Alloc.c -- Memory allocation functions 2008-09-24 Igor Pavlov Public domain */ #include "crn_core.h" #ifdef _WIN32 #include #endif #include #include "lzma_Alloc.h" namespace crnlib { /* #define _SZ_ALLOC_DEBUG */ /* use _SZ_ALLOC_DEBUG to debug alloc/free operations */ #ifdef _SZ_ALLOC_DEBUG #include int g_allocCount = 0; int g_allocCountMid = 0; int g_allocCountBig = 0; #endif void* MyAlloc(size_t size) { if (size == 0) return 0; #ifdef _SZ_ALLOC_DEBUG { void* p = crnlib::crnlib_malloc(size); fprintf(stderr, "\nAlloc %10d bytes, count = %10d, addr = %8X", size, g_allocCount++, (unsigned)p); return p; } #else return crnlib::crnlib_malloc(size); #endif } void MyFree(void* address) { #ifdef _SZ_ALLOC_DEBUG if (address != 0) fprintf(stderr, "\nFree; count = %10d, addr = %8X", --g_allocCount, (unsigned)address); #endif crnlib::crnlib_free(address); } #ifdef _WIN32 void* MidAlloc(size_t size) { if (size == 0) return 0; #ifdef _SZ_ALLOC_DEBUG fprintf(stderr, "\nAlloc_Mid %10d bytes; count = %10d", size, g_allocCountMid++); #endif return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE); } void MidFree(void* address) { #ifdef _SZ_ALLOC_DEBUG if (address != 0) fprintf(stderr, "\nFree_Mid; count = %10d", --g_allocCountMid); #endif if (address == 0) return; VirtualFree(address, 0, MEM_RELEASE); } #ifndef MEM_LARGE_PAGES #undef _7ZIP_LARGE_PAGES #endif #ifdef _7ZIP_LARGE_PAGES SIZE_T g_LargePageSize = 0; typedef SIZE_T(WINAPI* GetLargePageMinimumP)(); #endif void SetLargePageSize() { #ifdef _7ZIP_LARGE_PAGES SIZE_T size = 0; GetLargePageMinimumP largePageMinimum = (GetLargePageMinimumP) GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetLargePageMinimum"); if (largePageMinimum == 0) return; size = largePageMinimum(); if (size == 0 || (size & (size - 1)) != 0) return; g_LargePageSize = size; #endif } void* BigAlloc(size_t size) { if (size == 0) return 0; #ifdef _SZ_ALLOC_DEBUG fprintf(stderr, "\nAlloc_Big %10d bytes; count = %10d", size, g_allocCountBig++); #endif #ifdef _7ZIP_LARGE_PAGES if (g_LargePageSize != 0 && g_LargePageSize <= (1 << 30) && size >= (1 << 18)) { void* res = VirtualAlloc(0, (size + g_LargePageSize - 1) & (~(g_LargePageSize - 1)), MEM_COMMIT | MEM_LARGE_PAGES, PAGE_READWRITE); if (res != 0) return res; } #endif return VirtualAlloc(0, size, MEM_COMMIT, PAGE_READWRITE); } void BigFree(void* address) { #ifdef _SZ_ALLOC_DEBUG if (address != 0) fprintf(stderr, "\nFree_Big; count = %10d", --g_allocCountBig); #endif if (address == 0) return; VirtualFree(address, 0, MEM_RELEASE); } #endif } DaemonEngine-crunch-ef4d32f/crnlib/lzma_Alloc.h000066400000000000000000000011361503722002600215170ustar00rootroot00000000000000/* Alloc.h -- Memory allocation functions 2008-03-13 Igor Pavlov Public domain */ #ifndef __COMMON_ALLOC_H #define __COMMON_ALLOC_H #include namespace crnlib { void* MyAlloc(size_t size); void MyFree(void* address); #ifdef _WIN32 void SetLargePageSize(); void* MidAlloc(size_t size); void MidFree(void* address); void* BigAlloc(size_t size); void BigFree(void* address); #else #define MidAlloc(size) MyAlloc(size) #define MidFree(address) MyFree(address) #define BigAlloc(size) MyAlloc(size) #define BigFree(address) MyFree(address) #endif } #endif DaemonEngine-crunch-ef4d32f/crnlib/lzma_Bcj2.cpp000066400000000000000000000077451503722002600216140ustar00rootroot00000000000000/* Bcj2.c -- Converter for x86 code (BCJ2) 2008-10-04 : Igor Pavlov : Public domain */ #include "crn_core.h" #include "lzma_Bcj2.h" namespace crnlib { #ifdef _LZMA_PROB32 #define CProb UInt32 #else #define CProb UInt16 #endif #define IsJcc(b0, b1) ((b0) == 0x0F && ((b1)&0xF0) == 0x80) #define IsJ(b0, b1) ((b1 & 0xFE) == 0xE8 || IsJcc(b0, b1)) #define kNumTopBits 24 #define kTopValue ((UInt32)1 << kNumTopBits) #define kNumBitModelTotalBits 11 #define kBitModelTotal (1 << kNumBitModelTotalBits) #define kNumMoveBits 5 #define RC_READ_BYTE (*buffer++) #define RC_TEST \ { \ if (buffer == bufferLim) \ return SZ_ERROR_DATA; \ } #define RC_INIT2 \ code = 0; \ range = 0xFFFFFFFF; \ { \ int i; \ for (i = 0; i < 5; i++) { \ RC_TEST; \ code = (code << 8) | RC_READ_BYTE; \ } \ } #define NORMALIZE \ if (range < kTopValue) { \ RC_TEST; \ range <<= 8; \ code = (code << 8) | RC_READ_BYTE; \ } #define IF_BIT_0(p) \ ttt = *(p); \ bound = (range >> kNumBitModelTotalBits) * ttt; \ if (code < bound) #define UPDATE_0(p) \ range = bound; \ *(p) = (CProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); \ NORMALIZE; #define UPDATE_1(p) \ range -= bound; \ code -= bound; \ *(p) = (CProb)(ttt - (ttt >> kNumMoveBits)); \ NORMALIZE; int Bcj2_Decode( const Byte* buf0, SizeT size0, const Byte* buf1, SizeT size1, const Byte* buf2, SizeT size2, const Byte* buf3, SizeT size3, Byte* outBuf, SizeT outSize) { CProb p[256 + 2]; SizeT inPos = 0, outPos = 0; const Byte *buffer, *bufferLim; UInt32 range, code; Byte prevByte = 0; unsigned int i; for (i = 0; i < sizeof(p) / sizeof(p[0]); i++) p[i] = kBitModelTotal >> 1; buffer = buf3; bufferLim = buffer + size3; RC_INIT2 if (outSize == 0) return SZ_OK; for (;;) { Byte b; CProb* prob; UInt32 bound; UInt32 ttt; SizeT limit = size0 - inPos; if (outSize - outPos < limit) limit = outSize - outPos; while (limit != 0) { Byte b = buf0[inPos]; outBuf[outPos++] = b; if (IsJ(prevByte, b)) break; inPos++; prevByte = b; limit--; } if (limit == 0 || outPos == outSize) break; b = buf0[inPos++]; if (b == 0xE8) prob = p + prevByte; else if (b == 0xE9) prob = p + 256; else prob = p + 257; IF_BIT_0(prob) { UPDATE_0(prob) prevByte = b; } else { UInt32 dest; const Byte* v; UPDATE_1(prob) if (b == 0xE8) { v = buf1; if (size1 < 4) return SZ_ERROR_DATA; buf1 += 4; size1 -= 4; } else { v = buf2; if (size2 < 4) return SZ_ERROR_DATA; buf2 += 4; size2 -= 4; } dest = (((UInt32)v[0] << 24) | ((UInt32)v[1] << 16) | ((UInt32)v[2] << 8) | ((UInt32)v[3])) - ((UInt32)outPos + 4); outBuf[outPos++] = (Byte)dest; if (outPos == outSize) break; outBuf[outPos++] = (Byte)(dest >> 8); if (outPos == outSize) break; outBuf[outPos++] = (Byte)(dest >> 16); if (outPos == outSize) break; outBuf[outPos++] = prevByte = (Byte)(dest >> 24); } } return (outPos == outSize) ? SZ_OK : SZ_ERROR_DATA; } } DaemonEngine-crunch-ef4d32f/crnlib/lzma_Bcj2.h000066400000000000000000000012721503722002600212460ustar00rootroot00000000000000/* Bcj2.h -- Converter for x86 code (BCJ2) 2008-10-04 : Igor Pavlov : Public domain */ #ifndef __BCJ2_H #define __BCJ2_H #include "lzma_Types.h" namespace crnlib { /* Conditions: outSize <= FullOutputSize, where FullOutputSize is full size of output stream of x86_2 filter. If buf0 overlaps outBuf, there are two required conditions: 1) (buf0 >= outBuf) 2) (buf0 + size0 >= outBuf + FullOutputSize). Returns: SZ_OK SZ_ERROR_DATA - Data error */ int Bcj2_Decode( const Byte* buf0, SizeT size0, const Byte* buf1, SizeT size1, const Byte* buf2, SizeT size2, const Byte* buf3, SizeT size3, Byte* outBuf, SizeT outSize); } #endif DaemonEngine-crunch-ef4d32f/crnlib/lzma_Bra.cpp000066400000000000000000000063661503722002600215360ustar00rootroot00000000000000/* Bra.c -- Converters for RISC code 2008-10-04 : Igor Pavlov : Public domain */ #include "crn_core.h" #include "lzma_Bra.h" namespace crnlib { SizeT ARM_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) { SizeT i; if (size < 4) return 0; size -= 4; ip += 8; for (i = 0; i <= size; i += 4) { if (data[i + 3] == 0xEB) { UInt32 dest; UInt32 src = ((UInt32)data[i + 2] << 16) | ((UInt32)data[i + 1] << 8) | (data[i + 0]); src <<= 2; if (encoding) dest = ip + (UInt32)i + src; else dest = src - (ip + (UInt32)i); dest >>= 2; data[i + 2] = (Byte)(dest >> 16); data[i + 1] = (Byte)(dest >> 8); data[i + 0] = (Byte)dest; } } return i; } SizeT ARMT_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) { SizeT i; if (size < 4) return 0; size -= 4; ip += 4; for (i = 0; i <= size; i += 2) { if ((data[i + 1] & 0xF8) == 0xF0 && (data[i + 3] & 0xF8) == 0xF8) { UInt32 dest; UInt32 src = (((UInt32)data[i + 1] & 0x7) << 19) | ((UInt32)data[i + 0] << 11) | (((UInt32)data[i + 3] & 0x7) << 8) | (data[i + 2]); src <<= 1; if (encoding) dest = ip + (UInt32)i + src; else dest = src - (ip + (UInt32)i); dest >>= 1; data[i + 1] = (Byte)(0xF0 | ((dest >> 19) & 0x7)); data[i + 0] = (Byte)(dest >> 11); data[i + 3] = (Byte)(0xF8 | ((dest >> 8) & 0x7)); data[i + 2] = (Byte)dest; i += 2; } } return i; } SizeT PPC_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) { SizeT i; if (size < 4) return 0; size -= 4; for (i = 0; i <= size; i += 4) { if ((data[i] >> 2) == 0x12 && (data[i + 3] & 3) == 1) { UInt32 src = ((UInt32)(data[i + 0] & 3) << 24) | ((UInt32)data[i + 1] << 16) | ((UInt32)data[i + 2] << 8) | ((UInt32)data[i + 3] & (~3)); UInt32 dest; if (encoding) dest = ip + (UInt32)i + src; else dest = src - (ip + (UInt32)i); data[i + 0] = (Byte)(0x48 | ((dest >> 24) & 0x3)); data[i + 1] = (Byte)(dest >> 16); data[i + 2] = (Byte)(dest >> 8); data[i + 3] &= 0x3; data[i + 3] |= dest; } } return i; } SizeT SPARC_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) { UInt32 i; if (size < 4) return 0; size -= 4; for (i = 0; i <= size; i += 4) { if (((data[i] == 0x40) && ((data[i + 1] & 0xC0) == 0x00)) || ((data[i] == 0x7F) && ((data[i + 1] & 0xC0) == 0xC0))) { UInt32 src = ((UInt32)data[i + 0] << 24) | ((UInt32)data[i + 1] << 16) | ((UInt32)data[i + 2] << 8) | ((UInt32)data[i + 3]); UInt32 dest; src <<= 2; if (encoding) dest = ip + i + src; else dest = src - (ip + i); dest >>= 2; dest = (((0 - ((dest >> 22) & 1)) << 22) & 0x3FFFFFFF) | (dest & 0x3FFFFF) | 0x40000000; data[i + 0] = (Byte)(dest >> 24); data[i + 1] = (Byte)(dest >> 16); data[i + 2] = (Byte)(dest >> 8); data[i + 3] = (Byte)dest; } } return i; } } DaemonEngine-crunch-ef4d32f/crnlib/lzma_Bra.h000066400000000000000000000036051503722002600211740ustar00rootroot00000000000000/* Bra.h -- Branch converters for executables 2008-10-04 : Igor Pavlov : Public domain */ #ifndef __BRA_H #define __BRA_H #include "lzma_Types.h" namespace crnlib { /* These functions convert relative addresses to absolute addresses in CALL instructions to increase the compression ratio. In: data - data buffer size - size of data ip - current virtual Instruction Pinter (IP) value state - state variable for x86 converter encoding - 0 (for decoding), 1 (for encoding) Out: state - state variable for x86 converter Returns: The number of processed bytes. If you call these functions with multiple calls, you must start next call with first byte after block of processed bytes. Type Endian Alignment LookAhead x86 little 1 4 ARMT little 2 2 ARM little 4 0 PPC big 4 0 SPARC big 4 0 IA64 little 16 0 size must be >= Alignment + LookAhead, if it's not last block. If (size < Alignment + LookAhead), converter returns 0. Example: UInt32 ip = 0; for () { ; size must be >= Alignment + LookAhead, if it's not last block SizeT processed = Convert(data, size, ip, 1); data += processed; size -= processed; ip += processed; } */ #define x86_Convert_Init(state) \ { state = 0; } SizeT x86_Convert(Byte* data, SizeT size, UInt32 ip, UInt32* state, int encoding); SizeT ARM_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); SizeT ARMT_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); SizeT PPC_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); SizeT SPARC_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); SizeT IA64_Convert(Byte* data, SizeT size, UInt32 ip, int encoding); } #endif DaemonEngine-crunch-ef4d32f/crnlib/lzma_Bra86.cpp000066400000000000000000000043411503722002600217030ustar00rootroot00000000000000/* Bra86.c -- Converter for x86 code (BCJ) 2008-10-04 : Igor Pavlov : Public domain */ #include "crn_core.h" #include "lzma_Bra.h" namespace crnlib { #define Test86MSByte(b) ((b) == 0 || (b) == 0xFF) const Byte kMaskToAllowedStatus[8] = {1, 1, 1, 0, 1, 0, 0, 0}; const Byte kMaskToBitNumber[8] = {0, 1, 2, 2, 3, 3, 3, 3}; SizeT x86_Convert(Byte* data, SizeT size, UInt32 ip, UInt32* state, int encoding) { SizeT bufferPos = 0, prevPosT; UInt32 prevMask = *state & 0x7; if (size < 5) return 0; ip += 5; prevPosT = (SizeT)0 - 1; for (;;) { Byte* p = data + bufferPos; Byte* limit = data + size - 4; for (; p < limit; p++) if ((*p & 0xFE) == 0xE8) break; bufferPos = (SizeT)(p - data); if (p >= limit) break; prevPosT = bufferPos - prevPosT; if (prevPosT > 3) prevMask = 0; else { prevMask = (prevMask << ((int)prevPosT - 1)) & 0x7; if (prevMask != 0) { Byte b = p[4 - kMaskToBitNumber[prevMask]]; if (!kMaskToAllowedStatus[prevMask] || Test86MSByte(b)) { prevPosT = bufferPos; prevMask = ((prevMask << 1) & 0x7) | 1; bufferPos++; continue; } } } prevPosT = bufferPos; if (Test86MSByte(p[4])) { UInt32 src = ((UInt32)p[4] << 24) | ((UInt32)p[3] << 16) | ((UInt32)p[2] << 8) | ((UInt32)p[1]); UInt32 dest; for (;;) { Byte b; int index; if (encoding) dest = (ip + (UInt32)bufferPos) + src; else dest = src - (ip + (UInt32)bufferPos); if (prevMask == 0) break; index = kMaskToBitNumber[prevMask] * 8; b = (Byte)(dest >> (24 - index)); if (!Test86MSByte(b)) break; src = dest ^ ((1 << (32 - index)) - 1); } p[4] = (Byte)(~(((dest >> 24) & 1) - 1)); p[3] = (Byte)(dest >> 16); p[2] = (Byte)(dest >> 8); p[1] = (Byte)dest; bufferPos += 5; } else { prevMask = ((prevMask << 1) & 0x7) | 1; bufferPos++; } } prevPosT = bufferPos - prevPosT; *state = ((prevPosT > 3) ? 0 : ((prevMask << ((int)prevPosT - 1)) & 0x7)); return bufferPos; } } DaemonEngine-crunch-ef4d32f/crnlib/lzma_BraIA64.cpp000066400000000000000000000034541503722002600221150ustar00rootroot00000000000000/* BraIA64.c -- Converter for IA-64 code 2008-10-04 : Igor Pavlov : Public domain */ #include "crn_core.h" #include "lzma_Bra.h" namespace crnlib { static const Byte kBranchTable[32] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 6, 6, 0, 0, 7, 7, 4, 4, 0, 0, 4, 4, 0, 0}; SizeT IA64_Convert(Byte* data, SizeT size, UInt32 ip, int encoding) { SizeT i; if (size < 16) return 0; size -= 16; for (i = 0; i <= size; i += 16) { UInt32 instrTemplate = data[i] & 0x1F; UInt32 mask = kBranchTable[instrTemplate]; UInt32 bitPos = 5; int slot; for (slot = 0; slot < 3; slot++, bitPos += 41) { UInt32 bytePos, bitRes; UInt64 instruction, instNorm; int j; if (((mask >> slot) & 1) == 0) continue; bytePos = (bitPos >> 3); bitRes = bitPos & 0x7; instruction = 0; for (j = 0; j < 6; j++) instruction += (UInt64)data[i + j + bytePos] << (8 * j); instNorm = instruction >> bitRes; if (((instNorm >> 37) & 0xF) == 0x5 && ((instNorm >> 9) & 0x7) == 0) { UInt32 src = (UInt32)((instNorm >> 13) & 0xFFFFF); UInt32 dest; src |= ((UInt32)(instNorm >> 36) & 1) << 20; src <<= 4; if (encoding) dest = ip + (UInt32)i + src; else dest = src - (ip + (UInt32)i); dest >>= 4; instNorm &= ~((UInt64)(0x8FFFFF) << 13); instNorm |= ((UInt64)(dest & 0xFFFFF) << 13); instNorm |= ((UInt64)(dest & 0x100000) << (36 - 20)); instruction &= (1 << bitRes) - 1; instruction |= (instNorm << bitRes); for (j = 0; j < 6; j++) data[i + j + bytePos] = (Byte)(instruction >> (8 * j)); } } } return i; } } DaemonEngine-crunch-ef4d32f/crnlib/lzma_CpuArch.h000066400000000000000000000041211503722002600220070ustar00rootroot00000000000000/* CpuArch.h 2008-08-05 Igor Pavlov Public domain */ #ifndef __CPUARCH_H #define __CPUARCH_H /* LITTLE_ENDIAN_UNALIGN means: 1) CPU is LITTLE_ENDIAN 2) it's allowed to make unaligned memory accesses if LITTLE_ENDIAN_UNALIGN is not defined, it means that we don't know about these properties of platform. */ #if defined(_M_IX86) || defined(_M_X64) || defined(_M_AMD64) || defined(__i386__) || defined(__x86_64__) #define LITTLE_ENDIAN_UNALIGN #endif #ifdef LITTLE_ENDIAN_UNALIGN #define GetUi16(p) (*(const UInt16*)(p)) #define GetUi32(p) (*(const UInt32*)(p)) #define GetUi64(p) (*(const UInt64*)(p)) #define SetUi32(p, d) *(UInt32*)(p) = (d); #else #define GetUi16(p) (((const Byte*)(p))[0] | ((UInt16)((const Byte*)(p))[1] << 8)) #define GetUi32(p) ( \ ((const Byte*)(p))[0] | \ ((UInt32)((const Byte*)(p))[1] << 8) | \ ((UInt32)((const Byte*)(p))[2] << 16) | \ ((UInt32)((const Byte*)(p))[3] << 24)) #define GetUi64(p) (GetUi32(p) | ((UInt64)GetUi32(((const Byte*)(p)) + 4) << 32)) #define SetUi32(p, d) \ { \ UInt32 _x_ = (d); \ ((Byte*)(p))[0] = (Byte)_x_; \ ((Byte*)(p))[1] = (Byte)(_x_ >> 8); \ ((Byte*)(p))[2] = (Byte)(_x_ >> 16); \ ((Byte*)(p))[3] = (Byte)(_x_ >> 24); \ } #endif #if defined(LITTLE_ENDIAN_UNALIGN) && defined(_WIN64) && (_MSC_VER >= 1300) #pragma intrinsic(_byteswap_ulong) #pragma intrinsic(_byteswap_uint64) #define GetBe32(p) _byteswap_ulong(*(const UInt32*)(const Byte*)(p)) #define GetBe64(p) _byteswap_uint64(*(const UInt64*)(const Byte*)(p)) #else #define GetBe32(p) ( \ ((UInt32)((const Byte*)(p))[0] << 24) | \ ((UInt32)((const Byte*)(p))[1] << 16) | \ ((UInt32)((const Byte*)(p))[2] << 8) | \ ((const Byte*)(p))[3]) #define GetBe64(p) (((UInt64)GetBe32(p) << 32) | GetBe32(((const Byte*)(p)) + 4)) #endif #define GetBe16(p) (((UInt16)((const Byte*)(p))[0] << 8) | ((const Byte*)(p))[1]) #endif DaemonEngine-crunch-ef4d32f/crnlib/lzma_LzFind.cpp000066400000000000000000000505201503722002600222070ustar00rootroot00000000000000/* LzFind.c -- Match finder for LZ algorithms 2008-10-04 : Igor Pavlov : Public domain */ #include "crn_core.h" #include #include "lzma_LzFind.h" #include "lzma_LzHash.h" namespace crnlib { #define kEmptyHashValue 0 #define kMaxValForNormalize ((UInt32)0xFFFFFFFF) #define kNormalizeStepMin (1 << 10) /* it must be power of 2 */ #define kNormalizeMask (~(kNormalizeStepMin - 1)) #define kMaxHistorySize ((UInt32)3 << 30) #define kStartMaxLen 3 static void LzInWindow_Free(CMatchFinder* p, ISzAlloc* alloc) { if (!p->directInput) { alloc->Free(alloc, p->bufferBase); p->bufferBase = 0; } } /* keepSizeBefore + keepSizeAfter + keepSizeReserv must be < 4G) */ static int LzInWindow_Create(CMatchFinder* p, UInt32 keepSizeReserv, ISzAlloc* alloc) { UInt32 blockSize = p->keepSizeBefore + p->keepSizeAfter + keepSizeReserv; if (p->directInput) { p->blockSize = blockSize; return 1; } if (p->bufferBase == 0 || p->blockSize != blockSize) { LzInWindow_Free(p, alloc); p->blockSize = blockSize; p->bufferBase = (Byte*)alloc->Alloc(alloc, (size_t)blockSize); } return (p->bufferBase != 0); } Byte* MatchFinder_GetPointerToCurrentPos(CMatchFinder* p) { return p->buffer; } Byte MatchFinder_GetIndexByte(CMatchFinder* p, Int32 index) { return p->buffer[index]; } UInt32 MatchFinder_GetNumAvailableBytes(CMatchFinder* p) { return p->streamPos - p->pos; } void MatchFinder_ReduceOffsets(CMatchFinder* p, UInt32 subValue) { p->posLimit -= subValue; p->pos -= subValue; p->streamPos -= subValue; } static void MatchFinder_ReadBlock(CMatchFinder* p) { if (p->streamEndWasReached || p->result != SZ_OK) return; for (;;) { Byte* dest = p->buffer + (p->streamPos - p->pos); size_t size = (p->bufferBase + p->blockSize - dest); if (size == 0) return; p->result = p->stream->Read(p->stream, dest, &size); if (p->result != SZ_OK) return; if (size == 0) { p->streamEndWasReached = 1; return; } p->streamPos += (UInt32)size; if (p->streamPos - p->pos > p->keepSizeAfter) return; } } void MatchFinder_MoveBlock(CMatchFinder* p) { memmove(p->bufferBase, p->buffer - p->keepSizeBefore, (size_t)(p->streamPos - p->pos + p->keepSizeBefore)); p->buffer = p->bufferBase + p->keepSizeBefore; } int MatchFinder_NeedMove(CMatchFinder* p) { /* if (p->streamEndWasReached) return 0; */ return ((size_t)(p->bufferBase + p->blockSize - p->buffer) <= p->keepSizeAfter); } void MatchFinder_ReadIfRequired(CMatchFinder* p) { if (p->streamEndWasReached) return; if (p->keepSizeAfter >= p->streamPos - p->pos) MatchFinder_ReadBlock(p); } static void MatchFinder_CheckAndMoveAndRead(CMatchFinder* p) { if (MatchFinder_NeedMove(p)) MatchFinder_MoveBlock(p); MatchFinder_ReadBlock(p); } static void MatchFinder_SetDefaultSettings(CMatchFinder* p) { p->cutValue = 32; p->btMode = 1; p->numHashBytes = 4; /* p->skipModeBits = 0; */ p->directInput = 0; p->bigHash = 0; } #define kCrcPoly 0xEDB88320 void MatchFinder_Construct(CMatchFinder* p) { UInt32 i; p->bufferBase = 0; p->directInput = 0; p->hash = 0; MatchFinder_SetDefaultSettings(p); for (i = 0; i < 256; i++) { UInt32 r = i; int j; for (j = 0; j < 8; j++) r = (r >> 1) ^ (kCrcPoly & ~((r & 1) - 1)); p->crc[i] = r; } } static void MatchFinder_FreeThisClassMemory(CMatchFinder* p, ISzAlloc* alloc) { alloc->Free(alloc, p->hash); p->hash = 0; } void MatchFinder_Free(CMatchFinder* p, ISzAlloc* alloc) { MatchFinder_FreeThisClassMemory(p, alloc); LzInWindow_Free(p, alloc); } static CLzRef* AllocRefs(UInt32 num, ISzAlloc* alloc) { size_t sizeInBytes = (size_t)num * sizeof(CLzRef); if (sizeInBytes / sizeof(CLzRef) != num) return 0; return (CLzRef*)alloc->Alloc(alloc, sizeInBytes); } int MatchFinder_Create(CMatchFinder* p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc* alloc) { UInt32 sizeReserv; if (historySize > kMaxHistorySize) { MatchFinder_Free(p, alloc); return 0; } sizeReserv = historySize >> 1; if (historySize > ((UInt32)2 << 30)) sizeReserv = historySize >> 2; sizeReserv += (keepAddBufferBefore + matchMaxLen + keepAddBufferAfter) / 2 + (1 << 19); p->keepSizeBefore = historySize + keepAddBufferBefore + 1; p->keepSizeAfter = matchMaxLen + keepAddBufferAfter; /* we need one additional byte, since we use MoveBlock after pos++ and before dictionary using */ if (LzInWindow_Create(p, sizeReserv, alloc)) { UInt32 newCyclicBufferSize = (historySize /* >> p->skipModeBits */) + 1; UInt32 hs; p->matchMaxLen = matchMaxLen; { p->fixedHashSize = 0; if (p->numHashBytes == 2) hs = (1 << 16) - 1; else { hs = historySize - 1; hs |= (hs >> 1); hs |= (hs >> 2); hs |= (hs >> 4); hs |= (hs >> 8); hs >>= 1; /* hs >>= p->skipModeBits; */ hs |= 0xFFFF; /* don't change it! It's required for Deflate */ if (hs > (1 << 24)) { if (p->numHashBytes == 3) hs = (1 << 24) - 1; else hs >>= 1; } } p->hashMask = hs; hs++; if (p->numHashBytes > 2) p->fixedHashSize += kHash2Size; if (p->numHashBytes > 3) p->fixedHashSize += kHash3Size; if (p->numHashBytes > 4) p->fixedHashSize += kHash4Size; hs += p->fixedHashSize; } { UInt32 prevSize = p->hashSizeSum + p->numSons; UInt32 newSize; p->historySize = historySize; p->hashSizeSum = hs; p->cyclicBufferSize = newCyclicBufferSize; p->numSons = (p->btMode ? newCyclicBufferSize * 2 : newCyclicBufferSize); newSize = p->hashSizeSum + p->numSons; if (p->hash != 0 && prevSize == newSize) return 1; MatchFinder_FreeThisClassMemory(p, alloc); p->hash = AllocRefs(newSize, alloc); if (p->hash != 0) { p->son = p->hash + p->hashSizeSum; return 1; } } } MatchFinder_Free(p, alloc); return 0; } static void MatchFinder_SetLimits(CMatchFinder* p) { UInt32 limit = kMaxValForNormalize - p->pos; UInt32 limit2 = p->cyclicBufferSize - p->cyclicBufferPos; if (limit2 < limit) limit = limit2; limit2 = p->streamPos - p->pos; if (limit2 <= p->keepSizeAfter) { if (limit2 > 0) limit2 = 1; } else limit2 -= p->keepSizeAfter; if (limit2 < limit) limit = limit2; { UInt32 lenLimit = p->streamPos - p->pos; if (lenLimit > p->matchMaxLen) lenLimit = p->matchMaxLen; p->lenLimit = lenLimit; } p->posLimit = p->pos + limit; } void MatchFinder_Init(CMatchFinder* p) { UInt32 i; for (i = 0; i < p->hashSizeSum; i++) p->hash[i] = kEmptyHashValue; p->cyclicBufferPos = 0; p->buffer = p->bufferBase; p->pos = p->streamPos = p->cyclicBufferSize; p->result = SZ_OK; p->streamEndWasReached = 0; MatchFinder_ReadBlock(p); MatchFinder_SetLimits(p); } static UInt32 MatchFinder_GetSubValue(CMatchFinder* p) { return (p->pos - p->historySize - 1) & kNormalizeMask; } void MatchFinder_Normalize3(UInt32 subValue, CLzRef* items, UInt32 numItems) { UInt32 i; for (i = 0; i < numItems; i++) { UInt32 value = items[i]; if (value <= subValue) value = kEmptyHashValue; else value -= subValue; items[i] = value; } } static void MatchFinder_Normalize(CMatchFinder* p) { UInt32 subValue = MatchFinder_GetSubValue(p); MatchFinder_Normalize3(subValue, p->hash, p->hashSizeSum + p->numSons); MatchFinder_ReduceOffsets(p, subValue); } static void MatchFinder_CheckLimits(CMatchFinder* p) { if (p->pos == kMaxValForNormalize) MatchFinder_Normalize(p); if (!p->streamEndWasReached && p->keepSizeAfter == p->streamPos - p->pos) MatchFinder_CheckAndMoveAndRead(p); if (p->cyclicBufferPos == p->cyclicBufferSize) p->cyclicBufferPos = 0; MatchFinder_SetLimits(p); } static UInt32* Hc_GetMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte* cur, CLzRef* son, UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, UInt32* distances, UInt32 maxLen) { son[_cyclicBufferPos] = curMatch; for (;;) { UInt32 delta = pos - curMatch; if (cutValue-- == 0 || delta >= _cyclicBufferSize) return distances; { const Byte* pb = cur - delta; curMatch = son[_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)]; if (pb[maxLen] == cur[maxLen] && *pb == *cur) { UInt32 len = 0; while (++len != lenLimit) if (pb[len] != cur[len]) break; if (maxLen < len) { *distances++ = maxLen = len; *distances++ = delta - 1; if (len == lenLimit) return distances; } } } } } UInt32* GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte* cur, CLzRef* son, UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue, UInt32* distances, UInt32 maxLen) { CLzRef* ptr0 = son + (_cyclicBufferPos << 1) + 1; CLzRef* ptr1 = son + (_cyclicBufferPos << 1); UInt32 len0 = 0, len1 = 0; for (;;) { UInt32 delta = pos - curMatch; if (cutValue-- == 0 || delta >= _cyclicBufferSize) { *ptr0 = *ptr1 = kEmptyHashValue; return distances; } { CLzRef* pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); const Byte* pb = cur - delta; UInt32 len = (len0 < len1 ? len0 : len1); if (pb[len] == cur[len]) { if (++len != lenLimit && pb[len] == cur[len]) while (++len != lenLimit) if (pb[len] != cur[len]) break; if (maxLen < len) { *distances++ = maxLen = len; *distances++ = delta - 1; if (len == lenLimit) { *ptr1 = pair[0]; *ptr0 = pair[1]; return distances; } } } if (pb[len] < cur[len]) { *ptr1 = curMatch; ptr1 = pair + 1; curMatch = *ptr1; len1 = len; } else { *ptr0 = curMatch; ptr0 = pair; curMatch = *ptr0; len0 = len; } } } } static void SkipMatchesSpec(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte* cur, CLzRef* son, UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 cutValue) { CLzRef* ptr0 = son + (_cyclicBufferPos << 1) + 1; CLzRef* ptr1 = son + (_cyclicBufferPos << 1); UInt32 len0 = 0, len1 = 0; for (;;) { UInt32 delta = pos - curMatch; if (cutValue-- == 0 || delta >= _cyclicBufferSize) { *ptr0 = *ptr1 = kEmptyHashValue; return; } { CLzRef* pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); const Byte* pb = cur - delta; UInt32 len = (len0 < len1 ? len0 : len1); if (pb[len] == cur[len]) { while (++len != lenLimit) if (pb[len] != cur[len]) break; { if (len == lenLimit) { *ptr1 = pair[0]; *ptr0 = pair[1]; return; } } } if (pb[len] < cur[len]) { *ptr1 = curMatch; ptr1 = pair + 1; curMatch = *ptr1; len1 = len; } else { *ptr0 = curMatch; ptr0 = pair; curMatch = *ptr0; len0 = len; } } } } #define MOVE_POS \ ++p->cyclicBufferPos; \ p->buffer++; \ if (++p->pos == p->posLimit) \ MatchFinder_CheckLimits(p); #define MOVE_POS_RET MOVE_POS return offset; static void MatchFinder_MovePos(CMatchFinder* p) { MOVE_POS; } #define GET_MATCHES_HEADER2(minLen, ret_op) \ UInt32 lenLimit; \ UInt32 hashValue; \ const Byte* cur; \ UInt32 curMatch; \ lenLimit = p->lenLimit; \ { \ if (lenLimit < minLen) { \ MatchFinder_MovePos(p); \ ret_op; \ } \ } \ cur = p->buffer; #define GET_MATCHES_HEADER(minLen) GET_MATCHES_HEADER2(minLen, return 0) #define SKIP_HEADER(minLen) GET_MATCHES_HEADER2(minLen, continue) #define MF_PARAMS(p) p->pos, p->buffer, p->son, p->cyclicBufferPos, p->cyclicBufferSize, p->cutValue #define GET_MATCHES_FOOTER(offset, maxLen) \ offset = (UInt32)(GetMatchesSpec1(lenLimit, curMatch, MF_PARAMS(p), \ distances + offset, maxLen) - \ distances); \ MOVE_POS_RET; #define SKIP_FOOTER \ SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); \ MOVE_POS; static UInt32 Bt2_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { UInt32 offset; GET_MATCHES_HEADER(2) HASH2_CALC; curMatch = p->hash[hashValue]; p->hash[hashValue] = p->pos; offset = 0; GET_MATCHES_FOOTER(offset, 1) } UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { UInt32 offset; GET_MATCHES_HEADER(3) HASH_ZIP_CALC; curMatch = p->hash[hashValue]; p->hash[hashValue] = p->pos; offset = 0; GET_MATCHES_FOOTER(offset, 2) } static UInt32 Bt3_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { UInt32 hash2Value, delta2, maxLen, offset; GET_MATCHES_HEADER(3) HASH3_CALC; delta2 = p->pos - p->hash[hash2Value]; curMatch = p->hash[kFix3HashSize + hashValue]; p->hash[hash2Value] = p->hash[kFix3HashSize + hashValue] = p->pos; maxLen = 2; offset = 0; if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) { for (; maxLen != lenLimit; maxLen++) if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) break; distances[0] = maxLen; distances[1] = delta2 - 1; offset = 2; if (maxLen == lenLimit) { SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS_RET; } } GET_MATCHES_FOOTER(offset, maxLen) } static UInt32 Bt4_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset; GET_MATCHES_HEADER(4) HASH4_CALC; delta2 = p->pos - p->hash[hash2Value]; delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; curMatch = p->hash[kFix4HashSize + hashValue]; p->hash[hash2Value] = p->hash[kFix3HashSize + hash3Value] = p->hash[kFix4HashSize + hashValue] = p->pos; maxLen = 1; offset = 0; if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) { distances[0] = maxLen = 2; distances[1] = delta2 - 1; offset = 2; } if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) { maxLen = 3; distances[offset + 1] = delta3 - 1; offset += 2; delta2 = delta3; } if (offset != 0) { for (; maxLen != lenLimit; maxLen++) if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) break; distances[offset - 2] = maxLen; if (maxLen == lenLimit) { SkipMatchesSpec(lenLimit, curMatch, MF_PARAMS(p)); MOVE_POS_RET; } } if (maxLen < 3) maxLen = 3; GET_MATCHES_FOOTER(offset, maxLen) } static UInt32 Hc4_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { UInt32 hash2Value, hash3Value, delta2, delta3, maxLen, offset; GET_MATCHES_HEADER(4) HASH4_CALC; delta2 = p->pos - p->hash[hash2Value]; delta3 = p->pos - p->hash[kFix3HashSize + hash3Value]; curMatch = p->hash[kFix4HashSize + hashValue]; p->hash[hash2Value] = p->hash[kFix3HashSize + hash3Value] = p->hash[kFix4HashSize + hashValue] = p->pos; maxLen = 1; offset = 0; if (delta2 < p->cyclicBufferSize && *(cur - delta2) == *cur) { distances[0] = maxLen = 2; distances[1] = delta2 - 1; offset = 2; } if (delta2 != delta3 && delta3 < p->cyclicBufferSize && *(cur - delta3) == *cur) { maxLen = 3; distances[offset + 1] = delta3 - 1; offset += 2; delta2 = delta3; } if (offset != 0) { for (; maxLen != lenLimit; maxLen++) if (cur[(ptrdiff_t)maxLen - delta2] != cur[maxLen]) break; distances[offset - 2] = maxLen; if (maxLen == lenLimit) { p->son[p->cyclicBufferPos] = curMatch; MOVE_POS_RET; } } if (maxLen < 3) maxLen = 3; offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), distances + offset, maxLen) - (distances)); MOVE_POS_RET } UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances) { UInt32 offset; GET_MATCHES_HEADER(3) HASH_ZIP_CALC; curMatch = p->hash[hashValue]; p->hash[hashValue] = p->pos; offset = (UInt32)(Hc_GetMatchesSpec(lenLimit, curMatch, MF_PARAMS(p), distances, 2) - (distances)); MOVE_POS_RET } static void Bt2_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { do { SKIP_HEADER(2) HASH2_CALC; curMatch = p->hash[hashValue]; p->hash[hashValue] = p->pos; SKIP_FOOTER } while (--num != 0); } void Bt3Zip_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { do { SKIP_HEADER(3) HASH_ZIP_CALC; curMatch = p->hash[hashValue]; p->hash[hashValue] = p->pos; SKIP_FOOTER } while (--num != 0); } static void Bt3_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { do { UInt32 hash2Value; SKIP_HEADER(3) HASH3_CALC; curMatch = p->hash[kFix3HashSize + hashValue]; p->hash[hash2Value] = p->hash[kFix3HashSize + hashValue] = p->pos; SKIP_FOOTER } while (--num != 0); } static void Bt4_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { do { UInt32 hash2Value, hash3Value; SKIP_HEADER(4) HASH4_CALC; curMatch = p->hash[kFix4HashSize + hashValue]; p->hash[hash2Value] = p->hash[kFix3HashSize + hash3Value] = p->pos; p->hash[kFix4HashSize + hashValue] = p->pos; SKIP_FOOTER } while (--num != 0); } static void Hc4_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { do { UInt32 hash2Value, hash3Value; SKIP_HEADER(4) HASH4_CALC; curMatch = p->hash[kFix4HashSize + hashValue]; p->hash[hash2Value] = p->hash[kFix3HashSize + hash3Value] = p->hash[kFix4HashSize + hashValue] = p->pos; p->son[p->cyclicBufferPos] = curMatch; MOVE_POS } while (--num != 0); } void Hc3Zip_MatchFinder_Skip(CMatchFinder* p, UInt32 num) { do { SKIP_HEADER(3) HASH_ZIP_CALC; curMatch = p->hash[hashValue]; p->hash[hashValue] = p->pos; p->son[p->cyclicBufferPos] = curMatch; MOVE_POS } while (--num != 0); } void MatchFinder_CreateVTable(CMatchFinder* p, IMatchFinder* vTable) { vTable->Init = (Mf_Init_Func)MatchFinder_Init; vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinder_GetIndexByte; vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinder_GetNumAvailableBytes; vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinder_GetPointerToCurrentPos; if (!p->btMode) { vTable->GetMatches = (Mf_GetMatches_Func)Hc4_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Hc4_MatchFinder_Skip; } else if (p->numHashBytes == 2) { vTable->GetMatches = (Mf_GetMatches_Func)Bt2_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Bt2_MatchFinder_Skip; } else if (p->numHashBytes == 3) { vTable->GetMatches = (Mf_GetMatches_Func)Bt3_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Bt3_MatchFinder_Skip; } else { vTable->GetMatches = (Mf_GetMatches_Func)Bt4_MatchFinder_GetMatches; vTable->Skip = (Mf_Skip_Func)Bt4_MatchFinder_Skip; } } } DaemonEngine-crunch-ef4d32f/crnlib/lzma_LzFind.h000066400000000000000000000065471503722002600216660ustar00rootroot00000000000000/* LzFind.h -- Match finder for LZ algorithms 2008-10-04 : Igor Pavlov : Public domain */ #ifndef __LZFIND_H #define __LZFIND_H #include "lzma_Types.h" namespace crnlib { typedef UInt32 CLzRef; typedef struct _CMatchFinder { Byte* buffer; UInt32 pos; UInt32 posLimit; UInt32 streamPos; UInt32 lenLimit; UInt32 cyclicBufferPos; UInt32 cyclicBufferSize; /* it must be = (historySize + 1) */ UInt32 matchMaxLen; CLzRef* hash; CLzRef* son; UInt32 hashMask; UInt32 cutValue; Byte* bufferBase; ISeqInStream* stream; int streamEndWasReached; UInt32 blockSize; UInt32 keepSizeBefore; UInt32 keepSizeAfter; UInt32 numHashBytes; int directInput; int btMode; /* int skipModeBits; */ int bigHash; UInt32 historySize; UInt32 fixedHashSize; UInt32 hashSizeSum; UInt32 numSons; SRes result; UInt32 crc[256]; } CMatchFinder; #define Inline_MatchFinder_GetPointerToCurrentPos(p) ((p)->buffer) #define Inline_MatchFinder_GetIndexByte(p, index) ((p)->buffer[(Int32)(index)]) #define Inline_MatchFinder_GetNumAvailableBytes(p) ((p)->streamPos - (p)->pos) int MatchFinder_NeedMove(CMatchFinder* p); Byte* MatchFinder_GetPointerToCurrentPos(CMatchFinder* p); void MatchFinder_MoveBlock(CMatchFinder* p); void MatchFinder_ReadIfRequired(CMatchFinder* p); void MatchFinder_Construct(CMatchFinder* p); /* Conditions: historySize <= 3 GB keepAddBufferBefore + matchMaxLen + keepAddBufferAfter < 511MB */ int MatchFinder_Create(CMatchFinder* p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc* alloc); void MatchFinder_Free(CMatchFinder* p, ISzAlloc* alloc); void MatchFinder_Normalize3(UInt32 subValue, CLzRef* items, UInt32 numItems); void MatchFinder_ReduceOffsets(CMatchFinder* p, UInt32 subValue); UInt32* GetMatchesSpec1(UInt32 lenLimit, UInt32 curMatch, UInt32 pos, const Byte* buffer, CLzRef* son, UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, UInt32* distances, UInt32 maxLen); /* Conditions: Mf_GetNumAvailableBytes_Func must be called before each Mf_GetMatchLen_Func. Mf_GetPointerToCurrentPos_Func's result must be used only before any other function */ typedef void (*Mf_Init_Func)(void* object); typedef Byte (*Mf_GetIndexByte_Func)(void* object, Int32 index); typedef UInt32 (*Mf_GetNumAvailableBytes_Func)(void* object); typedef const Byte* (*Mf_GetPointerToCurrentPos_Func)(void* object); typedef UInt32 (*Mf_GetMatches_Func)(void* object, UInt32* distances); typedef void (*Mf_Skip_Func)(void* object, UInt32); typedef struct _IMatchFinder { Mf_Init_Func Init; Mf_GetIndexByte_Func GetIndexByte; Mf_GetNumAvailableBytes_Func GetNumAvailableBytes; Mf_GetPointerToCurrentPos_Func GetPointerToCurrentPos; Mf_GetMatches_Func GetMatches; Mf_Skip_Func Skip; } IMatchFinder; void MatchFinder_CreateVTable(CMatchFinder* p, IMatchFinder* vTable); void MatchFinder_Init(CMatchFinder* p); UInt32 Bt3Zip_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances); UInt32 Hc3Zip_MatchFinder_GetMatches(CMatchFinder* p, UInt32* distances); void Bt3Zip_MatchFinder_Skip(CMatchFinder* p, UInt32 num); void Hc3Zip_MatchFinder_Skip(CMatchFinder* p, UInt32 num); } #endif DaemonEngine-crunch-ef4d32f/crnlib/lzma_LzFindMt.cpp000066400000000000000000000573501503722002600225200ustar00rootroot00000000000000/* LzFindMt.c -- multithreaded Match finder for LZ algorithms 2008-10-04 : Igor Pavlov : Public domain */ #include "crn_core.h" #include "lzma_LzHash.h" #include "lzma_LzFindMt.h" namespace crnlib { void MtSync_Construct(CMtSync* p) { p->wasCreated = False; p->csWasInitialized = False; p->csWasEntered = False; Thread_Construct(&p->thread); Event_Construct(&p->canStart); Event_Construct(&p->wasStarted); Event_Construct(&p->wasStopped); Semaphore_Construct(&p->freeSemaphore); Semaphore_Construct(&p->filledSemaphore); } void MtSync_GetNextBlock(CMtSync* p) { if (p->needStart) { p->numProcessedBlocks = 1; p->needStart = False; p->stopWriting = False; p->exit = False; Event_Reset(&p->wasStarted); Event_Reset(&p->wasStopped); Event_Set(&p->canStart); Event_Wait(&p->wasStarted); } else { CriticalSection_Leave(&p->cs); p->csWasEntered = False; p->numProcessedBlocks++; Semaphore_Release1(&p->freeSemaphore); } Semaphore_Wait(&p->filledSemaphore); CriticalSection_Enter(&p->cs); p->csWasEntered = True; } /* MtSync_StopWriting must be called if Writing was started */ void MtSync_StopWriting(CMtSync* p) { UInt32 myNumBlocks = p->numProcessedBlocks; if (!Thread_WasCreated(&p->thread) || p->needStart) return; p->stopWriting = True; if (p->csWasEntered) { CriticalSection_Leave(&p->cs); p->csWasEntered = False; } Semaphore_Release1(&p->freeSemaphore); Event_Wait(&p->wasStopped); while (myNumBlocks++ != p->numProcessedBlocks) { Semaphore_Wait(&p->filledSemaphore); Semaphore_Release1(&p->freeSemaphore); } p->needStart = True; } void MtSync_Destruct(CMtSync* p) { if (Thread_WasCreated(&p->thread)) { MtSync_StopWriting(p); p->exit = True; if (p->needStart) Event_Set(&p->canStart); Thread_Wait(&p->thread); Thread_Close(&p->thread); } if (p->csWasInitialized) { CriticalSection_Delete(&p->cs); p->csWasInitialized = False; } Event_Close(&p->canStart); Event_Close(&p->wasStarted); Event_Close(&p->wasStopped); Semaphore_Close(&p->freeSemaphore); Semaphore_Close(&p->filledSemaphore); p->wasCreated = False; } #define RINOK_THREAD(x) \ { \ if ((x) != 0) \ return SZ_ERROR_THREAD; \ } static SRes MtSync_Create2(CMtSync* p, unsigned(MY_STD_CALL* startAddress)(void*), void* obj, UInt32 numBlocks) { if (p->wasCreated) return SZ_OK; RINOK_THREAD(CriticalSection_Init(&p->cs)); p->csWasInitialized = True; RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->canStart)); RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStarted)); RINOK_THREAD(AutoResetEvent_CreateNotSignaled(&p->wasStopped)); RINOK_THREAD(Semaphore_Create(&p->freeSemaphore, numBlocks, numBlocks)); RINOK_THREAD(Semaphore_Create(&p->filledSemaphore, 0, numBlocks)); p->needStart = True; RINOK_THREAD(Thread_Create(&p->thread, startAddress, obj)); p->wasCreated = True; return SZ_OK; } static SRes MtSync_Create(CMtSync* p, unsigned(MY_STD_CALL* startAddress)(void*), void* obj, UInt32 numBlocks) { SRes res = MtSync_Create2(p, startAddress, obj, numBlocks); if (res != SZ_OK) MtSync_Destruct(p); return res; } void MtSync_Init(CMtSync* p) { p->needStart = True; } #define kMtMaxValForNormalize 0xFFFFFFFF #define DEF_GetHeads2(name, v, action) \ \ static void GetHeads##name(const Byte* p, UInt32 pos, \ UInt32* hash, \ UInt32 hashMask, UInt32* heads, UInt32 numHeads, const UInt32* crc) \ { \ action; \ for (; numHeads != 0; numHeads--) { \ \ const UInt32 value = (v); \ p++; \ *heads++ = pos - hash[value]; \ hash[value] = pos++; \ } \ } #define DEF_GetHeads(name, v) DEF_GetHeads2(name, v, ;) DEF_GetHeads2(2, (p[0] | ((UInt32)p[1] << 8)), hashMask = hashMask; crc = crc;) DEF_GetHeads(3, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8)) & hashMask) DEF_GetHeads(4, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5)) & hashMask) DEF_GetHeads(4b, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ ((UInt32)p[3] << 16)) & hashMask) //DEF_GetHeads(5, (crc[p[0]] ^ p[1] ^ ((UInt32)p[2] << 8) ^ (crc[p[3]] << 5) ^ (crc[p[4]] << 3)) & hashMask) void HashThreadFunc(CMatchFinderMt* mt) { CMtSync* p = &mt->hashSync; for (;;) { UInt32 numProcessedBlocks = 0; Event_Wait(&p->canStart); Event_Set(&p->wasStarted); for (;;) { if (p->exit) return; if (p->stopWriting) { p->numProcessedBlocks = numProcessedBlocks; Event_Set(&p->wasStopped); break; } { CMatchFinder* mf = mt->MatchFinder; if (MatchFinder_NeedMove(mf)) { CriticalSection_Enter(&mt->btSync.cs); CriticalSection_Enter(&mt->hashSync.cs); { const Byte* beforePtr = MatchFinder_GetPointerToCurrentPos(mf); const Byte* afterPtr; MatchFinder_MoveBlock(mf); afterPtr = MatchFinder_GetPointerToCurrentPos(mf); mt->pointerToCurPos -= beforePtr - afterPtr; mt->buffer -= beforePtr - afterPtr; } CriticalSection_Leave(&mt->btSync.cs); CriticalSection_Leave(&mt->hashSync.cs); continue; } Semaphore_Wait(&p->freeSemaphore); MatchFinder_ReadIfRequired(mf); if (mf->pos > (kMtMaxValForNormalize - kMtHashBlockSize)) { UInt32 subValue = (mf->pos - mf->historySize - 1); MatchFinder_ReduceOffsets(mf, subValue); MatchFinder_Normalize3(subValue, mf->hash + mf->fixedHashSize, mf->hashMask + 1); } { UInt32* heads = mt->hashBuf + ((numProcessedBlocks++) & kMtHashNumBlocksMask) * kMtHashBlockSize; UInt32 num = mf->streamPos - mf->pos; heads[0] = 2; heads[1] = num; if (num >= mf->numHashBytes) { num = num - mf->numHashBytes + 1; if (num > kMtHashBlockSize - 2) num = kMtHashBlockSize - 2; mt->GetHeadsFunc(mf->buffer, mf->pos, mf->hash + mf->fixedHashSize, mf->hashMask, heads + 2, num, mf->crc); heads[0] += num; } mf->pos += num; mf->buffer += num; } } Semaphore_Release1(&p->filledSemaphore); } } } void MatchFinderMt_GetNextBlock_Hash(CMatchFinderMt* p) { MtSync_GetNextBlock(&p->hashSync); p->hashBufPosLimit = p->hashBufPos = ((p->hashSync.numProcessedBlocks - 1) & kMtHashNumBlocksMask) * kMtHashBlockSize; p->hashBufPosLimit += p->hashBuf[p->hashBufPos++]; p->hashNumAvail = p->hashBuf[p->hashBufPos++]; } #define kEmptyHashValue 0 /* #define MFMT_GM_INLINE */ #ifdef MFMT_GM_INLINE #define NO_INLINE MY_FAST_CALL Int32 NO_INLINE GetMatchesSpecN(UInt32 lenLimit, UInt32 pos, const Byte* cur, CLzRef* son, UInt32 _cyclicBufferPos, UInt32 _cyclicBufferSize, UInt32 _cutValue, UInt32* _distances, UInt32 _maxLen, const UInt32* hash, Int32 limit, UInt32 size, UInt32* posRes) { do { UInt32* distances = _distances + 1; UInt32 curMatch = pos - *hash++; CLzRef* ptr0 = son + (_cyclicBufferPos << 1) + 1; CLzRef* ptr1 = son + (_cyclicBufferPos << 1); UInt32 len0 = 0, len1 = 0; UInt32 cutValue = _cutValue; UInt32 maxLen = _maxLen; for (;;) { UInt32 delta = pos - curMatch; if (cutValue-- == 0 || delta >= _cyclicBufferSize) { *ptr0 = *ptr1 = kEmptyHashValue; break; } { CLzRef* pair = son + ((_cyclicBufferPos - delta + ((delta > _cyclicBufferPos) ? _cyclicBufferSize : 0)) << 1); const Byte* pb = cur - delta; UInt32 len = (len0 < len1 ? len0 : len1); if (pb[len] == cur[len]) { if (++len != lenLimit && pb[len] == cur[len]) while (++len != lenLimit) if (pb[len] != cur[len]) break; if (maxLen < len) { *distances++ = maxLen = len; *distances++ = delta - 1; if (len == lenLimit) { *ptr1 = pair[0]; *ptr0 = pair[1]; break; } } } if (pb[len] < cur[len]) { *ptr1 = curMatch; ptr1 = pair + 1; curMatch = *ptr1; len1 = len; } else { *ptr0 = curMatch; ptr0 = pair; curMatch = *ptr0; len0 = len; } } } pos++; _cyclicBufferPos++; cur++; { UInt32 num = (UInt32)(distances - _distances); *_distances = num - 1; _distances += num; limit -= num; } } while (limit > 0 && --size != 0); *posRes = pos; return limit; } #endif void BtGetMatches(CMatchFinderMt* p, UInt32* distances) { UInt32 numProcessed = 0; UInt32 curPos = 2; UInt32 limit = kMtBtBlockSize - (p->matchMaxLen * 2); distances[1] = p->hashNumAvail; while (curPos < limit) { if (p->hashBufPos == p->hashBufPosLimit) { MatchFinderMt_GetNextBlock_Hash(p); distances[1] = numProcessed + p->hashNumAvail; if (p->hashNumAvail >= p->numHashBytes) continue; for (; p->hashNumAvail != 0; p->hashNumAvail--) distances[curPos++] = 0; break; } { UInt32 size = p->hashBufPosLimit - p->hashBufPos; UInt32 lenLimit = p->matchMaxLen; UInt32 pos = p->pos; UInt32 cyclicBufferPos = p->cyclicBufferPos; if (lenLimit >= p->hashNumAvail) lenLimit = p->hashNumAvail; { UInt32 size2 = p->hashNumAvail - lenLimit + 1; if (size2 < size) size = size2; size2 = p->cyclicBufferSize - cyclicBufferPos; if (size2 < size) size = size2; } #ifndef MFMT_GM_INLINE while (curPos < limit && size-- != 0) { UInt32* startDistances = distances + curPos; UInt32 num = (UInt32)(GetMatchesSpec1(lenLimit, pos - p->hashBuf[p->hashBufPos++], pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, startDistances + 1, p->numHashBytes - 1) - startDistances); *startDistances = num - 1; curPos += num; cyclicBufferPos++; pos++; p->buffer++; } #else { UInt32 posRes; curPos = limit - GetMatchesSpecN(lenLimit, pos, p->buffer, p->son, cyclicBufferPos, p->cyclicBufferSize, p->cutValue, distances + curPos, p->numHashBytes - 1, p->hashBuf + p->hashBufPos, (Int32)(limit - curPos), size, &posRes); p->hashBufPos += posRes - pos; cyclicBufferPos += posRes - pos; p->buffer += posRes - pos; pos = posRes; } #endif numProcessed += pos - p->pos; p->hashNumAvail -= pos - p->pos; p->pos = pos; if (cyclicBufferPos == p->cyclicBufferSize) cyclicBufferPos = 0; p->cyclicBufferPos = cyclicBufferPos; } } distances[0] = curPos; } void BtFillBlock(CMatchFinderMt* p, UInt32 globalBlockIndex) { CMtSync* sync = &p->hashSync; if (!sync->needStart) { CriticalSection_Enter(&sync->cs); sync->csWasEntered = True; } BtGetMatches(p, p->btBuf + (globalBlockIndex & kMtBtNumBlocksMask) * kMtBtBlockSize); if (p->pos > kMtMaxValForNormalize - kMtBtBlockSize) { UInt32 subValue = p->pos - p->cyclicBufferSize; MatchFinder_Normalize3(subValue, p->son, p->cyclicBufferSize * 2); p->pos -= subValue; } if (!sync->needStart) { CriticalSection_Leave(&sync->cs); sync->csWasEntered = False; } } void BtThreadFunc(CMatchFinderMt* mt) { CMtSync* p = &mt->btSync; for (;;) { UInt32 blockIndex = 0; Event_Wait(&p->canStart); Event_Set(&p->wasStarted); for (;;) { if (p->exit) return; if (p->stopWriting) { p->numProcessedBlocks = blockIndex; MtSync_StopWriting(&mt->hashSync); Event_Set(&p->wasStopped); break; } Semaphore_Wait(&p->freeSemaphore); BtFillBlock(mt, blockIndex++); Semaphore_Release1(&p->filledSemaphore); } } } void MatchFinderMt_Construct(CMatchFinderMt* p) { p->hashBuf = 0; MtSync_Construct(&p->hashSync); MtSync_Construct(&p->btSync); } void MatchFinderMt_FreeMem(CMatchFinderMt* p, ISzAlloc* alloc) { alloc->Free(alloc, p->hashBuf); p->hashBuf = 0; } void MatchFinderMt_Destruct(CMatchFinderMt* p, ISzAlloc* alloc) { MtSync_Destruct(&p->hashSync); MtSync_Destruct(&p->btSync); MatchFinderMt_FreeMem(p, alloc); } #define kHashBufferSize (kMtHashBlockSize * kMtHashNumBlocks) #define kBtBufferSize (kMtBtBlockSize * kMtBtNumBlocks) static unsigned MY_STD_CALL HashThreadFunc2(void* p) { HashThreadFunc((CMatchFinderMt*)p); return 0; } static unsigned MY_STD_CALL BtThreadFunc2(void* p) { Byte allocaDummy[0x180]; (void)allocaDummy; int i = 0; for (i = 0; i < 16; i++) allocaDummy[i] = (Byte)i; BtThreadFunc((CMatchFinderMt*)p); return 0; } SRes MatchFinderMt_Create(CMatchFinderMt* p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc* alloc) { CMatchFinder* mf = p->MatchFinder; p->historySize = historySize; if (kMtBtBlockSize <= matchMaxLen * 4) return SZ_ERROR_PARAM; if (p->hashBuf == 0) { p->hashBuf = (UInt32*)alloc->Alloc(alloc, (kHashBufferSize + kBtBufferSize) * sizeof(UInt32)); if (p->hashBuf == 0) return SZ_ERROR_MEM; p->btBuf = p->hashBuf + kHashBufferSize; } keepAddBufferBefore += (kHashBufferSize + kBtBufferSize); keepAddBufferAfter += kMtHashBlockSize; if (!MatchFinder_Create(mf, historySize, keepAddBufferBefore, matchMaxLen, keepAddBufferAfter, alloc)) return SZ_ERROR_MEM; RINOK(MtSync_Create(&p->hashSync, HashThreadFunc2, p, kMtHashNumBlocks)); RINOK(MtSync_Create(&p->btSync, BtThreadFunc2, p, kMtBtNumBlocks)); return SZ_OK; } /* Call it after ReleaseStream / SetStream */ void MatchFinderMt_Init(CMatchFinderMt* p) { CMatchFinder* mf = p->MatchFinder; p->btBufPos = p->btBufPosLimit = 0; p->hashBufPos = p->hashBufPosLimit = 0; MatchFinder_Init(mf); p->pointerToCurPos = MatchFinder_GetPointerToCurrentPos(mf); p->btNumAvailBytes = 0; p->lzPos = p->historySize + 1; p->hash = mf->hash; p->fixedHashSize = mf->fixedHashSize; p->crc = mf->crc; p->son = mf->son; p->matchMaxLen = mf->matchMaxLen; p->numHashBytes = mf->numHashBytes; p->pos = mf->pos; p->buffer = mf->buffer; p->cyclicBufferPos = mf->cyclicBufferPos; p->cyclicBufferSize = mf->cyclicBufferSize; p->cutValue = mf->cutValue; } /* ReleaseStream is required to finish multithreading */ void MatchFinderMt_ReleaseStream(CMatchFinderMt* p) { MtSync_StopWriting(&p->btSync); /* p->MatchFinder->ReleaseStream(); */ } void MatchFinderMt_Normalize(CMatchFinderMt* p) { MatchFinder_Normalize3(p->lzPos - p->historySize - 1, p->hash, p->fixedHashSize); p->lzPos = p->historySize + 1; } void MatchFinderMt_GetNextBlock_Bt(CMatchFinderMt* p) { UInt32 blockIndex; MtSync_GetNextBlock(&p->btSync); blockIndex = ((p->btSync.numProcessedBlocks - 1) & kMtBtNumBlocksMask); p->btBufPosLimit = p->btBufPos = blockIndex * kMtBtBlockSize; p->btBufPosLimit += p->btBuf[p->btBufPos++]; p->btNumAvailBytes = p->btBuf[p->btBufPos++]; if (p->lzPos >= kMtMaxValForNormalize - kMtBtBlockSize) MatchFinderMt_Normalize(p); } const Byte* MatchFinderMt_GetPointerToCurrentPos(CMatchFinderMt* p) { return p->pointerToCurPos; } #define GET_NEXT_BLOCK_IF_REQUIRED \ if (p->btBufPos == p->btBufPosLimit) \ MatchFinderMt_GetNextBlock_Bt(p); UInt32 MatchFinderMt_GetNumAvailableBytes(CMatchFinderMt* p) { GET_NEXT_BLOCK_IF_REQUIRED; return p->btNumAvailBytes; } Byte MatchFinderMt_GetIndexByte(CMatchFinderMt* p, Int32 index) { return p->pointerToCurPos[index]; } UInt32* MixMatches2(CMatchFinderMt* p, UInt32 matchMinPos, UInt32* distances) { UInt32 hash2Value, curMatch2; UInt32* hash = p->hash; const Byte* cur = p->pointerToCurPos; UInt32 lzPos = p->lzPos; MT_HASH2_CALC curMatch2 = hash[hash2Value]; hash[hash2Value] = lzPos; if (curMatch2 >= matchMinPos) if (cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) { *distances++ = 2; *distances++ = lzPos - curMatch2 - 1; } return distances; } UInt32* MixMatches3(CMatchFinderMt* p, UInt32 matchMinPos, UInt32* distances) { UInt32 hash2Value, hash3Value, curMatch2, curMatch3; UInt32* hash = p->hash; const Byte* cur = p->pointerToCurPos; UInt32 lzPos = p->lzPos; MT_HASH3_CALC curMatch2 = hash[hash2Value]; curMatch3 = hash[kFix3HashSize + hash3Value]; hash[hash2Value] = hash[kFix3HashSize + hash3Value] = lzPos; if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) { distances[1] = lzPos - curMatch2 - 1; if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) { distances[0] = 3; return distances + 2; } distances[0] = 2; distances += 2; } if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) { *distances++ = 3; *distances++ = lzPos - curMatch3 - 1; } return distances; } /* UInt32 *MixMatches4(CMatchFinderMt *p, UInt32 matchMinPos, UInt32 *distances) { UInt32 hash2Value, hash3Value, hash4Value, curMatch2, curMatch3, curMatch4; UInt32 *hash = p->hash; const Byte *cur = p->pointerToCurPos; UInt32 lzPos = p->lzPos; MT_HASH4_CALC curMatch2 = hash[ hash2Value]; curMatch3 = hash[kFix3HashSize + hash3Value]; curMatch4 = hash[kFix4HashSize + hash4Value]; hash[ hash2Value] = hash[kFix3HashSize + hash3Value] = hash[kFix4HashSize + hash4Value] = lzPos; if (curMatch2 >= matchMinPos && cur[(ptrdiff_t)curMatch2 - lzPos] == cur[0]) { distances[1] = lzPos - curMatch2 - 1; if (cur[(ptrdiff_t)curMatch2 - lzPos + 2] == cur[2]) { distances[0] = (cur[(ptrdiff_t)curMatch2 - lzPos + 3] == cur[3]) ? 4 : 3; return distances + 2; } distances[0] = 2; distances += 2; } if (curMatch3 >= matchMinPos && cur[(ptrdiff_t)curMatch3 - lzPos] == cur[0]) { distances[1] = lzPos - curMatch3 - 1; if (cur[(ptrdiff_t)curMatch3 - lzPos + 3] == cur[3]) { distances[0] = 4; return distances + 2; } distances[0] = 3; distances += 2; } if (curMatch4 >= matchMinPos) if ( cur[(ptrdiff_t)curMatch4 - lzPos] == cur[0] && cur[(ptrdiff_t)curMatch4 - lzPos + 3] == cur[3] ) { *distances++ = 4; *distances++ = lzPos - curMatch4 - 1; } return distances; } */ #define INCREASE_LZ_POS \ p->lzPos++; \ p->pointerToCurPos++; UInt32 MatchFinderMt2_GetMatches(CMatchFinderMt* p, UInt32* distances) { const UInt32* btBuf = p->btBuf + p->btBufPos; UInt32 len = *btBuf++; p->btBufPos += 1 + len; p->btNumAvailBytes--; { UInt32 i; for (i = 0; i < len; i += 2) { *distances++ = *btBuf++; *distances++ = *btBuf++; } } INCREASE_LZ_POS return len; } UInt32 MatchFinderMt_GetMatches(CMatchFinderMt* p, UInt32* distances) { const UInt32* btBuf = p->btBuf + p->btBufPos; UInt32 len = *btBuf++; p->btBufPos += 1 + len; if (len == 0) { if (p->btNumAvailBytes-- >= 4) len = (UInt32)(p->MixMatchesFunc(p, p->lzPos - p->historySize, distances) - (distances)); } else { /* Condition: there are matches in btBuf with length < p->numHashBytes */ UInt32* distances2; p->btNumAvailBytes--; distances2 = p->MixMatchesFunc(p, p->lzPos - btBuf[1], distances); do { *distances2++ = *btBuf++; *distances2++ = *btBuf++; } while ((len -= 2) != 0); len = (UInt32)(distances2 - (distances)); } INCREASE_LZ_POS return len; } #define SKIP_HEADER2 \ do { \ GET_NEXT_BLOCK_IF_REQUIRED #define SKIP_HEADER(n) \ SKIP_HEADER2 if (p->btNumAvailBytes-- >= (n)) { \ const Byte* cur = p->pointerToCurPos; \ UInt32* hash = p->hash; #define SKIP_FOOTER \ } \ INCREASE_LZ_POS p->btBufPos += p->btBuf[p->btBufPos] + 1; \ } \ while (--num != 0) \ ; void MatchFinderMt0_Skip(CMatchFinderMt* p, UInt32 num) { SKIP_HEADER2 { p->btNumAvailBytes--; SKIP_FOOTER } void MatchFinderMt2_Skip(CMatchFinderMt * p, UInt32 num) { SKIP_HEADER(2) UInt32 hash2Value; MT_HASH2_CALC hash[hash2Value] = p->lzPos; SKIP_FOOTER } void MatchFinderMt3_Skip(CMatchFinderMt * p, UInt32 num) { SKIP_HEADER(3) UInt32 hash2Value, hash3Value; MT_HASH3_CALC hash[kFix3HashSize + hash3Value] = hash[hash2Value] = p->lzPos; SKIP_FOOTER } /* void MatchFinderMt4_Skip(CMatchFinderMt *p, UInt32 num) { SKIP_HEADER(4) UInt32 hash2Value, hash3Value, hash4Value; MT_HASH4_CALC hash[kFix4HashSize + hash4Value] = hash[kFix3HashSize + hash3Value] = hash[ hash2Value] = p->lzPos; SKIP_FOOTER } */ void MatchFinderMt_CreateVTable(CMatchFinderMt * p, IMatchFinder * vTable) { vTable->Init = (Mf_Init_Func)MatchFinderMt_Init; vTable->GetIndexByte = (Mf_GetIndexByte_Func)MatchFinderMt_GetIndexByte; vTable->GetNumAvailableBytes = (Mf_GetNumAvailableBytes_Func)MatchFinderMt_GetNumAvailableBytes; vTable->GetPointerToCurrentPos = (Mf_GetPointerToCurrentPos_Func)MatchFinderMt_GetPointerToCurrentPos; vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt_GetMatches; switch (p->MatchFinder->numHashBytes) { case 2: p->GetHeadsFunc = GetHeads2; p->MixMatchesFunc = (Mf_Mix_Matches)0; vTable->Skip = (Mf_Skip_Func)MatchFinderMt0_Skip; vTable->GetMatches = (Mf_GetMatches_Func)MatchFinderMt2_GetMatches; break; case 3: p->GetHeadsFunc = GetHeads3; p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches2; vTable->Skip = (Mf_Skip_Func)MatchFinderMt2_Skip; break; default: /* case 4: */ p->GetHeadsFunc = p->MatchFinder->bigHash ? GetHeads4b : GetHeads4; /* p->GetHeadsFunc = GetHeads4; */ p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches3; vTable->Skip = (Mf_Skip_Func)MatchFinderMt3_Skip; break; /* default: p->GetHeadsFunc = GetHeads5; p->MixMatchesFunc = (Mf_Mix_Matches)MixMatches4; vTable->Skip = (Mf_Skip_Func)MatchFinderMt4_Skip; break; */ } } } DaemonEngine-crunch-ef4d32f/crnlib/lzma_LzFindMt.h000066400000000000000000000047551503722002600221660ustar00rootroot00000000000000/* LzFindMt.h -- multithreaded Match finder for LZ algorithms 2008-10-04 : Igor Pavlov : Public domain */ #ifndef __LZFINDMT_H #define __LZFINDMT_H #include "lzma_Threads.h" #include "lzma_LzFind.h" namespace crnlib { #define kMtHashBlockSize (1 << 13) #define kMtHashNumBlocks (1 << 3) #define kMtHashNumBlocksMask (kMtHashNumBlocks - 1) #define kMtBtBlockSize (1 << 14) #define kMtBtNumBlocks (1 << 6) #define kMtBtNumBlocksMask (kMtBtNumBlocks - 1) typedef struct _CMtSync { Bool wasCreated; Bool needStart; Bool exit; Bool stopWriting; CThread thread; CAutoResetEvent canStart; CAutoResetEvent wasStarted; CAutoResetEvent wasStopped; CSemaphore freeSemaphore; CSemaphore filledSemaphore; Bool csWasInitialized; Bool csWasEntered; CCriticalSection cs; UInt32 numProcessedBlocks; } CMtSync; typedef UInt32* (*Mf_Mix_Matches)(void* p, UInt32 matchMinPos, UInt32* distances); /* kMtCacheLineDummy must be >= size_of_CPU_cache_line */ #define kMtCacheLineDummy 128 typedef void (*Mf_GetHeads)(const Byte* buffer, UInt32 pos, UInt32* hash, UInt32 hashMask, UInt32* heads, UInt32 numHeads, const UInt32* crc); typedef struct _CMatchFinderMt { /* LZ */ const Byte* pointerToCurPos; UInt32* btBuf; UInt32 btBufPos; UInt32 btBufPosLimit; UInt32 lzPos; UInt32 btNumAvailBytes; UInt32* hash; UInt32 fixedHashSize; UInt32 historySize; const UInt32* crc; Mf_Mix_Matches MixMatchesFunc; /* LZ + BT */ CMtSync btSync; Byte btDummy[kMtCacheLineDummy]; /* BT */ UInt32* hashBuf; UInt32 hashBufPos; UInt32 hashBufPosLimit; UInt32 hashNumAvail; CLzRef* son; UInt32 matchMaxLen; UInt32 numHashBytes; UInt32 pos; Byte* buffer; UInt32 cyclicBufferPos; UInt32 cyclicBufferSize; /* it must be historySize + 1 */ UInt32 cutValue; /* BT + Hash */ CMtSync hashSync; /* Byte hashDummy[kMtCacheLineDummy]; */ /* Hash */ Mf_GetHeads GetHeadsFunc; CMatchFinder* MatchFinder; } CMatchFinderMt; void MatchFinderMt_Construct(CMatchFinderMt* p); void MatchFinderMt_Destruct(CMatchFinderMt* p, ISzAlloc* alloc); SRes MatchFinderMt_Create(CMatchFinderMt* p, UInt32 historySize, UInt32 keepAddBufferBefore, UInt32 matchMaxLen, UInt32 keepAddBufferAfter, ISzAlloc* alloc); void MatchFinderMt_CreateVTable(CMatchFinderMt* p, IMatchFinder* vTable); void MatchFinderMt_ReleaseStream(CMatchFinderMt* p); } #endif DaemonEngine-crunch-ef4d32f/crnlib/lzma_LzHash.h000066400000000000000000000061431503722002600216610ustar00rootroot00000000000000/* LzHash.h -- HASH functions for LZ algorithms 2008-10-04 : Igor Pavlov : Public domain */ #ifndef __LZHASH_H #define __LZHASH_H #define kHash2Size (1 << 10) #define kHash3Size (1 << 16) #define kHash4Size (1 << 20) #define kFix3HashSize (kHash2Size) #define kFix4HashSize (kHash2Size + kHash3Size) #define kFix5HashSize (kHash2Size + kHash3Size + kHash4Size) #define HASH2_CALC hashValue = cur[0] | ((UInt32)cur[1] << 8); #define HASH3_CALC \ { \ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ hash2Value = temp & (kHash2Size - 1); \ hashValue = (temp ^ ((UInt32)cur[2] << 8)) & p->hashMask; \ } #define HASH4_CALC \ { \ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ hash2Value = temp & (kHash2Size - 1); \ hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ hashValue = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & p->hashMask; \ } #define HASH5_CALC \ { \ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ hash2Value = temp & (kHash2Size - 1); \ hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)); \ hashValue = (hash4Value ^ (p->crc[cur[4]] << 3)) & p->hashMask; \ hash4Value &= (kHash4Size - 1); \ } /* #define HASH_ZIP_CALC hashValue = ((cur[0] | ((UInt32)cur[1] << 8)) ^ p->crc[cur[2]]) & 0xFFFF; */ #define HASH_ZIP_CALC hashValue = ((cur[2] | ((UInt32)cur[0] << 8)) ^ p->crc[cur[1]]) & 0xFFFF; #define MT_HASH2_CALC \ hash2Value = (p->crc[cur[0]] ^ cur[1]) & (kHash2Size - 1); #define MT_HASH3_CALC \ { \ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ hash2Value = temp & (kHash2Size - 1); \ hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ } #define MT_HASH4_CALC \ { \ UInt32 temp = p->crc[cur[0]] ^ cur[1]; \ hash2Value = temp & (kHash2Size - 1); \ hash3Value = (temp ^ ((UInt32)cur[2] << 8)) & (kHash3Size - 1); \ hash4Value = (temp ^ ((UInt32)cur[2] << 8) ^ (p->crc[cur[3]] << 5)) & (kHash4Size - 1); \ } #endif DaemonEngine-crunch-ef4d32f/crnlib/lzma_LzmaDec.cpp000066400000000000000000000704341503722002600223460ustar00rootroot00000000000000/* LzmaDec.c -- LZMA Decoder 2008-11-06 : Igor Pavlov : Public domain */ #include "crn_core.h" #include "lzma_LzmaDec.h" #include namespace crnlib { #define kNumTopBits 24 #define kTopValue ((UInt32)1 << kNumTopBits) #define kNumBitModelTotalBits 11 #define kBitModelTotal (1 << kNumBitModelTotalBits) #define kNumMoveBits 5 #define RC_INIT_SIZE 5 #define NORMALIZE \ if (range < kTopValue) { \ range <<= 8; \ code = (code << 8) | (*buf++); \ } #define IF_BIT_0(p) \ ttt = *(p); \ NORMALIZE; \ bound = (range >> kNumBitModelTotalBits) * ttt; \ if (code < bound) #define UPDATE_0(p) \ range = bound; \ *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); #define UPDATE_1(p) \ range -= bound; \ code -= bound; \ *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); #define GET_BIT2(p, i, A0, A1) \ IF_BIT_0(p) { \ UPDATE_0(p); \ i = (i + i); \ A0; \ } \ else { \ UPDATE_1(p); \ i = (i + i) + 1; \ A1; \ } #define GET_BIT(p, i) GET_BIT2(p, i, ;, ;) #define TREE_GET_BIT(probs, i) \ { GET_BIT((probs + i), i); } #define TREE_DECODE(probs, limit, i) \ { \ i = 1; \ do { \ TREE_GET_BIT(probs, i); \ } while (i < limit); \ i -= limit; \ } /* #define _LZMA_SIZE_OPT */ #ifdef _LZMA_SIZE_OPT #define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) #else #define TREE_6_DECODE(probs, i) \ { \ i = 1; \ TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i); \ TREE_GET_BIT(probs, i); \ i -= 0x40; \ } #endif #define NORMALIZE_CHECK \ if (range < kTopValue) { \ if (buf >= bufLimit) \ return DUMMY_ERROR; \ range <<= 8; \ code = (code << 8) | (*buf++); \ } #define IF_BIT_0_CHECK(p) \ ttt = *(p); \ NORMALIZE_CHECK; \ bound = (range >> kNumBitModelTotalBits) * ttt; \ if (code < bound) #define UPDATE_0_CHECK range = bound; #define UPDATE_1_CHECK \ range -= bound; \ code -= bound; #define GET_BIT2_CHECK(p, i, A0, A1) \ IF_BIT_0_CHECK(p) { \ UPDATE_0_CHECK; \ i = (i + i); \ A0; \ } \ else { \ UPDATE_1_CHECK; \ i = (i + i) + 1; \ A1; \ } #define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ;, ;) #define TREE_DECODE_CHECK(probs, limit, i) \ { \ i = 1; \ do { \ GET_BIT_CHECK(probs + i, i) \ } while (i < limit); \ i -= limit; \ } #define kNumPosBitsMax 4 #define kNumPosStatesMax (1 << kNumPosBitsMax) #define kLenNumLowBits 3 #define kLenNumLowSymbols (1 << kLenNumLowBits) #define kLenNumMidBits 3 #define kLenNumMidSymbols (1 << kLenNumMidBits) #define kLenNumHighBits 8 #define kLenNumHighSymbols (1 << kLenNumHighBits) #define LenChoice 0 #define LenChoice2 (LenChoice + 1) #define LenLow (LenChoice2 + 1) #define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) #define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) #define kNumLenProbs (LenHigh + kLenNumHighSymbols) #define kNumStates 12 #define kNumLitStates 7 #define kStartPosModelIndex 4 #define kEndPosModelIndex 14 #define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) #define kNumPosSlotBits 6 #define kNumLenToPosStates 4 #define kNumAlignBits 4 #define kAlignTableSize (1 << kNumAlignBits) #define kMatchMinLen 2 #define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) #define IsMatch 0 #define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) #define IsRepG0 (IsRep + kNumStates) #define IsRepG1 (IsRepG0 + kNumStates) #define IsRepG2 (IsRepG1 + kNumStates) #define IsRep0Long (IsRepG2 + kNumStates) #define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) #define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) #define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) #define LenCoder (Align + kAlignTableSize) #define RepLenCoder (LenCoder + kNumLenProbs) #define Literal (RepLenCoder + kNumLenProbs) #define LZMA_BASE_SIZE 1846 #define LZMA_LIT_SIZE 768 #define LzmaProps_GetNumProbs(p) ((UInt32)LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) #if Literal != LZMA_BASE_SIZE StopCompilingDueBUG #endif static const Byte kLiteralNextStates[kNumStates * 2] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5, 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; #define LZMA_DIC_MIN (1 << 12) /* First LZMA-symbol is always decoded. And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization Out: Result: SZ_OK - OK SZ_ERROR_DATA - Error p->remainLen: < kMatchSpecLenStart : normal remain = kMatchSpecLenStart : finished = kMatchSpecLenStart + 1 : Flush marker = kMatchSpecLenStart + 2 : State Init Marker */ static int MY_FAST_CALL LzmaDec_DecodeReal(CLzmaDec* p, SizeT limit, const Byte* bufLimit) { CLzmaProb* probs = p->probs; unsigned state = p->state; UInt32 rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1; unsigned lc = p->prop.lc; Byte* dic = p->dic; SizeT dicBufSize = p->dicBufSize; SizeT dicPos = p->dicPos; UInt32 processedPos = p->processedPos; UInt32 checkDicSize = p->checkDicSize; unsigned len = 0; const Byte* buf = p->buf; UInt32 range = p->range; UInt32 code = p->code; do { CLzmaProb* prob; UInt32 bound; unsigned ttt; unsigned posState = processedPos & pbMask; prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; IF_BIT_0(prob) { unsigned symbol; UPDATE_0(prob); prob = probs + Literal; if (checkDicSize != 0 || processedPos != 0) prob += (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) + (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc)))); if (state < kNumLitStates) { symbol = 1; do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100); } else { unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; unsigned offs = 0x100; symbol = 1; do { unsigned bit; CLzmaProb* probLit; matchByte <<= 1; bit = (matchByte & offs); probLit = prob + offs + bit + symbol; GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit) } while (symbol < 0x100); } dic[dicPos++] = (Byte)symbol; processedPos++; state = kLiteralNextStates[state]; /* if (state < 4) state = 0; else if (state < 10) state -= 3; else state -= 6; */ continue; } else { UPDATE_1(prob); prob = probs + IsRep + state; IF_BIT_0(prob) { UPDATE_0(prob); state += kNumStates; prob = probs + LenCoder; } else { UPDATE_1(prob); if (checkDicSize == 0 && processedPos == 0) return SZ_ERROR_DATA; prob = probs + IsRepG0 + state; IF_BIT_0(prob) { UPDATE_0(prob); prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; IF_BIT_0(prob) { UPDATE_0(prob); dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; dicPos++; processedPos++; state = state < kNumLitStates ? 9 : 11; continue; } UPDATE_1(prob); } else { UInt32 distance; UPDATE_1(prob); prob = probs + IsRepG1 + state; IF_BIT_0(prob) { UPDATE_0(prob); distance = rep1; } else { UPDATE_1(prob); prob = probs + IsRepG2 + state; IF_BIT_0(prob) { UPDATE_0(prob); distance = rep2; } else { UPDATE_1(prob); distance = rep3; rep3 = rep2; } rep2 = rep1; } rep1 = rep0; rep0 = distance; } state = state < kNumLitStates ? 8 : 11; prob = probs + RepLenCoder; } { unsigned limit, offset; CLzmaProb* probLen = prob + LenChoice; IF_BIT_0(probLen) { UPDATE_0(probLen); probLen = prob + LenLow + (posState << kLenNumLowBits); offset = 0; limit = (1 << kLenNumLowBits); } else { UPDATE_1(probLen); probLen = prob + LenChoice2; IF_BIT_0(probLen) { UPDATE_0(probLen); probLen = prob + LenMid + (posState << kLenNumMidBits); offset = kLenNumLowSymbols; limit = (1 << kLenNumMidBits); } else { UPDATE_1(probLen); probLen = prob + LenHigh; offset = kLenNumLowSymbols + kLenNumMidSymbols; limit = (1 << kLenNumHighBits); } } TREE_DECODE(probLen, limit, len); len += offset; } if (state >= kNumStates) { UInt32 distance; prob = probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); TREE_6_DECODE(prob, distance); if (distance >= kStartPosModelIndex) { unsigned posSlot = (unsigned)distance; int numDirectBits = (int)(((distance >> 1) - 1)); distance = (2 | (distance & 1)); if (posSlot < kEndPosModelIndex) { distance <<= numDirectBits; prob = probs + SpecPos + distance - posSlot - 1; { UInt32 mask = 1; unsigned i = 1; do { GET_BIT2(prob + i, i, ;, distance |= mask); mask <<= 1; } while (--numDirectBits != 0); } } else { numDirectBits -= kNumAlignBits; do { NORMALIZE range >>= 1; { UInt32 t; code -= range; t = (0 - ((UInt32)code >> 31)); /* (UInt32)((Int32)code >> 31) */ distance = (distance << 1) + (t + 1); code += range & t; } /* distance <<= 1; if (code >= range) { code -= range; distance |= 1; } */ } while (--numDirectBits != 0); prob = probs + Align; distance <<= kNumAlignBits; { unsigned i = 1; GET_BIT2(prob + i, i, ;, distance |= 1); GET_BIT2(prob + i, i, ;, distance |= 2); GET_BIT2(prob + i, i, ;, distance |= 4); GET_BIT2(prob + i, i, ;, distance |= 8); } if (distance == (UInt32)0xFFFFFFFF) { len += kMatchSpecLenStart; state -= kNumStates; break; } } } rep3 = rep2; rep2 = rep1; rep1 = rep0; rep0 = distance + 1; if (checkDicSize == 0) { if (distance >= processedPos) return SZ_ERROR_DATA; } else if (distance >= checkDicSize) return SZ_ERROR_DATA; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; /* state = kLiteralNextStates[state]; */ } len += kMatchMinLen; if (limit == dicPos) return SZ_ERROR_DATA; { SizeT rem = limit - dicPos; unsigned curLen = ((rem < len) ? (unsigned)rem : len); SizeT pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0); processedPos += curLen; len -= curLen; if (pos + curLen <= dicBufSize) { Byte* dest = dic + dicPos; ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; const Byte* lim = dest + curLen; dicPos += curLen; do *(dest) = (Byte) * (dest + src); while (++dest != lim); } else { do { dic[dicPos++] = dic[pos]; if (++pos == dicBufSize) pos = 0; } while (--curLen != 0); } } } } while (dicPos < limit && buf < bufLimit); NORMALIZE; p->buf = buf; p->range = range; p->code = code; p->remainLen = len; p->dicPos = dicPos; p->processedPos = processedPos; p->reps[0] = rep0; p->reps[1] = rep1; p->reps[2] = rep2; p->reps[3] = rep3; p->state = state; return SZ_OK; } static void MY_FAST_CALL LzmaDec_WriteRem(CLzmaDec* p, SizeT limit) { if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) { Byte* dic = p->dic; SizeT dicPos = p->dicPos; SizeT dicBufSize = p->dicBufSize; unsigned len = p->remainLen; UInt32 rep0 = p->reps[0]; if (limit - dicPos < len) len = (unsigned)(limit - dicPos); if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) p->checkDicSize = p->prop.dicSize; p->processedPos += len; p->remainLen -= len; while (len-- != 0) { dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; dicPos++; } p->dicPos = dicPos; } } static int MY_FAST_CALL LzmaDec_DecodeReal2(CLzmaDec* p, SizeT limit, const Byte* bufLimit) { do { SizeT limit2 = limit; if (p->checkDicSize == 0) { UInt32 rem = p->prop.dicSize - p->processedPos; if (limit - p->dicPos > rem) limit2 = p->dicPos + rem; } RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit)); if (p->processedPos >= p->prop.dicSize) p->checkDicSize = p->prop.dicSize; LzmaDec_WriteRem(p, limit); } while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); if (p->remainLen > kMatchSpecLenStart) { p->remainLen = kMatchSpecLenStart; } return 0; } typedef enum { DUMMY_ERROR, /* unexpected end of input stream */ DUMMY_LIT, DUMMY_MATCH, DUMMY_REP } ELzmaDummy; static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec* p, const Byte* buf, SizeT inSize) { UInt32 range = p->range; UInt32 code = p->code; const Byte* bufLimit = buf + inSize; CLzmaProb* probs = p->probs; unsigned state = p->state; ELzmaDummy res; { CLzmaProb* prob; UInt32 bound; unsigned ttt; unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1); prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK /* if (bufLimit - buf >= 7) return DUMMY_LIT; */ prob = probs + Literal; if (p->checkDicSize != 0 || p->processedPos != 0) prob += (LZMA_LIT_SIZE * ((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); if (state < kNumLitStates) { unsigned symbol = 1; do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); } else { unsigned matchByte = p->dic[p->dicPos - p->reps[0] + ((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)]; unsigned offs = 0x100; unsigned symbol = 1; do { unsigned bit; CLzmaProb* probLit; matchByte <<= 1; bit = (matchByte & offs); probLit = prob + offs + bit + symbol; GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit) } while (symbol < 0x100); } res = DUMMY_LIT; } else { unsigned len; UPDATE_1_CHECK; prob = probs + IsRep + state; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK; state = 0; prob = probs + LenCoder; res = DUMMY_MATCH; } else { UPDATE_1_CHECK; res = DUMMY_REP; prob = probs + IsRepG0 + state; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK; prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK; NORMALIZE_CHECK; return DUMMY_REP; } else { UPDATE_1_CHECK; } } else { UPDATE_1_CHECK; prob = probs + IsRepG1 + state; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK; } else { UPDATE_1_CHECK; prob = probs + IsRepG2 + state; IF_BIT_0_CHECK(prob) { UPDATE_0_CHECK; } else { UPDATE_1_CHECK; } } } state = kNumStates; prob = probs + RepLenCoder; } { unsigned limit, offset; CLzmaProb* probLen = prob + LenChoice; IF_BIT_0_CHECK(probLen) { UPDATE_0_CHECK; probLen = prob + LenLow + (posState << kLenNumLowBits); offset = 0; limit = 1 << kLenNumLowBits; } else { UPDATE_1_CHECK; probLen = prob + LenChoice2; IF_BIT_0_CHECK(probLen) { UPDATE_0_CHECK; probLen = prob + LenMid + (posState << kLenNumMidBits); offset = kLenNumLowSymbols; limit = 1 << kLenNumMidBits; } else { UPDATE_1_CHECK; probLen = prob + LenHigh; offset = kLenNumLowSymbols + kLenNumMidSymbols; limit = 1 << kLenNumHighBits; } } TREE_DECODE_CHECK(probLen, limit, len); len += offset; } if (state < 4) { unsigned posSlot; prob = probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); if (posSlot >= kStartPosModelIndex) { int numDirectBits = ((posSlot >> 1) - 1); /* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ if (posSlot < kEndPosModelIndex) { prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1; } else { numDirectBits -= kNumAlignBits; do { NORMALIZE_CHECK range >>= 1; code -= range & (((code - range) >> 31) - 1); /* if (code >= range) code -= range; */ } while (--numDirectBits != 0); prob = probs + Align; numDirectBits = kNumAlignBits; } { unsigned i = 1; do { GET_BIT_CHECK(prob + i, i); } while (--numDirectBits != 0); } } } } } NORMALIZE_CHECK; return res; } static void LzmaDec_InitRc(CLzmaDec* p, const Byte* data) { p->code = ((UInt32)data[1] << 24) | ((UInt32)data[2] << 16) | ((UInt32)data[3] << 8) | ((UInt32)data[4]); p->range = 0xFFFFFFFF; p->needFlush = 0; } void LzmaDec_InitDicAndState(CLzmaDec* p, Bool initDic, Bool initState) { p->needFlush = 1; p->remainLen = 0; p->tempBufSize = 0; if (initDic) { p->processedPos = 0; p->checkDicSize = 0; p->needInitState = 1; } if (initState) p->needInitState = 1; } void LzmaDec_Init(CLzmaDec* p) { p->dicPos = 0; LzmaDec_InitDicAndState(p, True, True); } static void LzmaDec_InitStateReal(CLzmaDec* p) { UInt32 numProbs = Literal + ((UInt32)LZMA_LIT_SIZE << (p->prop.lc + p->prop.lp)); UInt32 i; CLzmaProb* probs = p->probs; for (i = 0; i < numProbs; i++) probs[i] = kBitModelTotal >> 1; p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; p->state = 0; p->needInitState = 0; } SRes LzmaDec_DecodeToDic(CLzmaDec* p, SizeT dicLimit, const Byte* src, SizeT* srcLen, ELzmaFinishMode finishMode, ELzmaStatus* status) { SizeT inSize = *srcLen; (*srcLen) = 0; LzmaDec_WriteRem(p, dicLimit); *status = LZMA_STATUS_NOT_SPECIFIED; while (p->remainLen != kMatchSpecLenStart) { int checkEndMarkNow; if (p->needFlush != 0) { for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) p->tempBuf[p->tempBufSize++] = *src++; if (p->tempBufSize < RC_INIT_SIZE) { *status = LZMA_STATUS_NEEDS_MORE_INPUT; return SZ_OK; } if (p->tempBuf[0] != 0) return SZ_ERROR_DATA; LzmaDec_InitRc(p, p->tempBuf); p->tempBufSize = 0; } checkEndMarkNow = 0; if (p->dicPos >= dicLimit) { if (p->remainLen == 0 && p->code == 0) { *status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; return SZ_OK; } if (finishMode == LZMA_FINISH_ANY) { *status = LZMA_STATUS_NOT_FINISHED; return SZ_OK; } if (p->remainLen != 0) { *status = LZMA_STATUS_NOT_FINISHED; return SZ_ERROR_DATA; } checkEndMarkNow = 1; } if (p->needInitState) LzmaDec_InitStateReal(p); if (p->tempBufSize == 0) { SizeT processed; const Byte* bufLimit; if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) { int dummyRes = LzmaDec_TryDummy(p, src, inSize); if (dummyRes == DUMMY_ERROR) { memcpy(p->tempBuf, src, inSize); p->tempBufSize = (unsigned)inSize; (*srcLen) += inSize; *status = LZMA_STATUS_NEEDS_MORE_INPUT; return SZ_OK; } if (checkEndMarkNow && dummyRes != DUMMY_MATCH) { *status = LZMA_STATUS_NOT_FINISHED; return SZ_ERROR_DATA; } bufLimit = src; } else bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; p->buf = src; if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) return SZ_ERROR_DATA; processed = (SizeT)(p->buf - src); (*srcLen) += processed; src += processed; inSize -= processed; } else { unsigned rem = p->tempBufSize, lookAhead = 0; while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) p->tempBuf[rem++] = src[lookAhead++]; p->tempBufSize = rem; if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) { int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem); if (dummyRes == DUMMY_ERROR) { (*srcLen) += lookAhead; *status = LZMA_STATUS_NEEDS_MORE_INPUT; return SZ_OK; } if (checkEndMarkNow && dummyRes != DUMMY_MATCH) { *status = LZMA_STATUS_NOT_FINISHED; return SZ_ERROR_DATA; } } p->buf = p->tempBuf; if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) return SZ_ERROR_DATA; lookAhead -= (rem - (unsigned)(p->buf - p->tempBuf)); (*srcLen) += lookAhead; src += lookAhead; inSize -= lookAhead; p->tempBufSize = 0; } } if (p->code == 0) *status = LZMA_STATUS_FINISHED_WITH_MARK; return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA; } SRes LzmaDec_DecodeToBuf(CLzmaDec* p, Byte* dest, SizeT* destLen, const Byte* src, SizeT* srcLen, ELzmaFinishMode finishMode, ELzmaStatus* status) { SizeT outSize = *destLen; SizeT inSize = *srcLen; *srcLen = *destLen = 0; for (;;) { SizeT inSizeCur = inSize, outSizeCur, dicPos; ELzmaFinishMode curFinishMode; SRes res; if (p->dicPos == p->dicBufSize) p->dicPos = 0; dicPos = p->dicPos; if (outSize > p->dicBufSize - dicPos) { outSizeCur = p->dicBufSize; curFinishMode = LZMA_FINISH_ANY; } else { outSizeCur = dicPos + outSize; curFinishMode = finishMode; } res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); src += inSizeCur; inSize -= inSizeCur; *srcLen += inSizeCur; outSizeCur = p->dicPos - dicPos; memcpy(dest, p->dic + dicPos, outSizeCur); dest += outSizeCur; outSize -= outSizeCur; *destLen += outSizeCur; if (res != 0) return res; if (outSizeCur == 0 || outSize == 0) return SZ_OK; } } void LzmaDec_FreeProbs(CLzmaDec* p, ISzAlloc* alloc) { alloc->Free(alloc, p->probs); p->probs = 0; } static void LzmaDec_FreeDict(CLzmaDec* p, ISzAlloc* alloc) { alloc->Free(alloc, p->dic); p->dic = 0; } void LzmaDec_Free(CLzmaDec* p, ISzAlloc* alloc) { LzmaDec_FreeProbs(p, alloc); LzmaDec_FreeDict(p, alloc); } SRes LzmaProps_Decode(CLzmaProps* p, const Byte* data, unsigned size) { UInt32 dicSize; Byte d; if (size < LZMA_PROPS_SIZE) return SZ_ERROR_UNSUPPORTED; else dicSize = data[1] | ((UInt32)data[2] << 8) | ((UInt32)data[3] << 16) | ((UInt32)data[4] << 24); if (dicSize < LZMA_DIC_MIN) dicSize = LZMA_DIC_MIN; p->dicSize = dicSize; d = data[0]; if (d >= (9 * 5 * 5)) return SZ_ERROR_UNSUPPORTED; p->lc = d % 9; d /= 9; p->pb = d / 5; p->lp = d % 5; return SZ_OK; } static SRes LzmaDec_AllocateProbs2(CLzmaDec* p, const CLzmaProps* propNew, ISzAlloc* alloc) { UInt32 numProbs = LzmaProps_GetNumProbs(propNew); if (p->probs == 0 || numProbs != p->numProbs) { LzmaDec_FreeProbs(p, alloc); p->probs = (CLzmaProb*)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb)); p->numProbs = numProbs; if (p->probs == 0) return SZ_ERROR_MEM; } return SZ_OK; } SRes LzmaDec_AllocateProbs(CLzmaDec* p, const Byte* props, unsigned propsSize, ISzAlloc* alloc) { CLzmaProps propNew; RINOK(LzmaProps_Decode(&propNew, props, propsSize)); RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); p->prop = propNew; return SZ_OK; } SRes LzmaDec_Allocate(CLzmaDec* p, const Byte* props, unsigned propsSize, ISzAlloc* alloc) { CLzmaProps propNew; SizeT dicBufSize; RINOK(LzmaProps_Decode(&propNew, props, propsSize)); RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); dicBufSize = propNew.dicSize; if (p->dic == 0 || dicBufSize != p->dicBufSize) { LzmaDec_FreeDict(p, alloc); p->dic = (Byte*)alloc->Alloc(alloc, dicBufSize); if (p->dic == 0) { LzmaDec_FreeProbs(p, alloc); return SZ_ERROR_MEM; } } p->dicBufSize = dicBufSize; p->prop = propNew; return SZ_OK; } SRes LzmaDecode(Byte* dest, SizeT* destLen, const Byte* src, SizeT* srcLen, const Byte* propData, unsigned propSize, ELzmaFinishMode finishMode, ELzmaStatus* status, ISzAlloc* alloc) { CLzmaDec p; SRes res; SizeT inSize = *srcLen; SizeT outSize = *destLen; *srcLen = *destLen = 0; if (inSize < RC_INIT_SIZE) return SZ_ERROR_INPUT_EOF; LzmaDec_Construct(&p); res = LzmaDec_AllocateProbs(&p, propData, propSize, alloc); if (res != 0) return res; p.dic = dest; p.dicBufSize = outSize; LzmaDec_Init(&p); *srcLen = inSize; res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) res = SZ_ERROR_INPUT_EOF; (*destLen) = p.dicPos; LzmaDec_FreeProbs(&p, alloc); return res; } } DaemonEngine-crunch-ef4d32f/crnlib/lzma_LzmaDec.h000066400000000000000000000157631503722002600220170ustar00rootroot00000000000000/* LzmaDec.h -- LZMA Decoder 2008-10-04 : Igor Pavlov : Public domain */ #ifndef __LZMADEC_H #define __LZMADEC_H #include "lzma_Types.h" namespace crnlib { /* #define _LZMA_PROB32 */ /* _LZMA_PROB32 can increase the speed on some CPUs, but memory usage for CLzmaDec::probs will be doubled in that case */ #ifdef _LZMA_PROB32 #define CLzmaProb UInt32 #else #define CLzmaProb UInt16 #endif /* ---------- LZMA Properties ---------- */ #define LZMA_PROPS_SIZE 5 typedef struct _CLzmaProps { unsigned lc, lp, pb; UInt32 dicSize; } CLzmaProps; /* LzmaProps_Decode - decodes properties Returns: SZ_OK SZ_ERROR_UNSUPPORTED - Unsupported properties */ SRes LzmaProps_Decode(CLzmaProps* p, const Byte* data, unsigned size); /* ---------- LZMA Decoder state ---------- */ /* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ #define LZMA_REQUIRED_INPUT_MAX 20 typedef struct { CLzmaProps prop; CLzmaProb* probs; Byte* dic; const Byte* buf; UInt32 range, code; SizeT dicPos; SizeT dicBufSize; UInt32 processedPos; UInt32 checkDicSize; unsigned state; UInt32 reps[4]; unsigned remainLen; int needFlush; int needInitState; UInt32 numProbs; unsigned tempBufSize; Byte tempBuf[LZMA_REQUIRED_INPUT_MAX]; } CLzmaDec; #define LzmaDec_Construct(p) \ { \ (p)->dic = 0; \ (p)->probs = 0; \ } void LzmaDec_Init(CLzmaDec* p); /* There are two types of LZMA streams: 0) Stream with end mark. That end mark adds about 6 bytes to compressed size. 1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */ typedef enum { LZMA_FINISH_ANY, /* finish at any point */ LZMA_FINISH_END /* block must be finished at the end */ } ELzmaFinishMode; /* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! You must use LZMA_FINISH_END, when you know that current output buffer covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, and output value of destLen will be less than output buffer size limit. You can check status result also. You can use multiple checks to test data integrity after full decompression: 1) Check Result and "status" variable. 2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. 3) Check that output(srcLen) = compressedSize, if you know real compressedSize. You must use correct finish mode in that case. */ typedef enum { LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ } ELzmaStatus; /* ELzmaStatus is used only as output value for function call */ /* ---------- Interfaces ---------- */ /* There are 3 levels of interfaces: 1) Dictionary Interface 2) Buffer Interface 3) One Call Interface You can select any of these interfaces, but don't mix functions from different groups for same object. */ /* There are two variants to allocate state for Dictionary Interface: 1) LzmaDec_Allocate / LzmaDec_Free 2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs You can use variant 2, if you set dictionary buffer manually. For Buffer Interface you must always use variant 1. LzmaDec_Allocate* can return: SZ_OK SZ_ERROR_MEM - Memory allocation error SZ_ERROR_UNSUPPORTED - Unsupported properties */ SRes LzmaDec_AllocateProbs(CLzmaDec* p, const Byte* props, unsigned propsSize, ISzAlloc* alloc); void LzmaDec_FreeProbs(CLzmaDec* p, ISzAlloc* alloc); SRes LzmaDec_Allocate(CLzmaDec* state, const Byte* prop, unsigned propsSize, ISzAlloc* alloc); void LzmaDec_Free(CLzmaDec* state, ISzAlloc* alloc); /* ---------- Dictionary Interface ---------- */ /* You can use it, if you want to eliminate the overhead for data copying from dictionary to some other external buffer. You must work with CLzmaDec variables directly in this interface. STEPS: LzmaDec_Constr() LzmaDec_Allocate() for (each new stream) { LzmaDec_Init() while (it needs more decompression) { LzmaDec_DecodeToDic() use data from CLzmaDec::dic and update CLzmaDec::dicPos } } LzmaDec_Free() */ /* LzmaDec_DecodeToDic The decoding to internal dictionary buffer (CLzmaDec::dic). You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! finishMode: It has meaning only if the decoding reaches output limit (dicLimit). LZMA_FINISH_ANY - Decode just dicLimit bytes. LZMA_FINISH_END - Stream must be finished after dicLimit. Returns: SZ_OK status: LZMA_STATUS_FINISHED_WITH_MARK LZMA_STATUS_NOT_FINISHED LZMA_STATUS_NEEDS_MORE_INPUT LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK SZ_ERROR_DATA - Data error */ SRes LzmaDec_DecodeToDic(CLzmaDec* p, SizeT dicLimit, const Byte* src, SizeT* srcLen, ELzmaFinishMode finishMode, ELzmaStatus* status); /* ---------- Buffer Interface ---------- */ /* It's zlib-like interface. See LzmaDec_DecodeToDic description for information about STEPS and return results, but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need to work with CLzmaDec variables manually. finishMode: It has meaning only if the decoding reaches output limit (*destLen). LZMA_FINISH_ANY - Decode just destLen bytes. LZMA_FINISH_END - Stream must be finished after (*destLen). */ SRes LzmaDec_DecodeToBuf(CLzmaDec* p, Byte* dest, SizeT* destLen, const Byte* src, SizeT* srcLen, ELzmaFinishMode finishMode, ELzmaStatus* status); /* ---------- One Call Interface ---------- */ /* LzmaDecode finishMode: It has meaning only if the decoding reaches output limit (*destLen). LZMA_FINISH_ANY - Decode just destLen bytes. LZMA_FINISH_END - Stream must be finished after (*destLen). Returns: SZ_OK status: LZMA_STATUS_FINISHED_WITH_MARK LZMA_STATUS_NOT_FINISHED LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK SZ_ERROR_DATA - Data error SZ_ERROR_MEM - Memory allocation error SZ_ERROR_UNSUPPORTED - Unsupported properties SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). */ SRes LzmaDecode(Byte* dest, SizeT* destLen, const Byte* src, SizeT* srcLen, const Byte* propData, unsigned propSize, ELzmaFinishMode finishMode, ELzmaStatus* status, ISzAlloc* alloc); } #endif DaemonEngine-crunch-ef4d32f/crnlib/lzma_LzmaEnc.cpp000066400000000000000000002002041503722002600223460ustar00rootroot00000000000000/* LzmaEnc.c -- LZMA Encoder 2008-10-04 : Igor Pavlov : Public domain */ #include "crn_core.h" #include /* #define SHOW_STAT */ /* #define SHOW_STAT2 */ #if defined(SHOW_STAT) || defined(SHOW_STAT2) #include #endif #include "lzma_LzmaEnc.h" #include "lzma_LzFind.h" #ifdef COMPRESS_MF_MT #include "lzma_LzFindMt.h" #endif namespace crnlib { #ifdef SHOW_STAT static int ttt = 0; #endif #define kBlockSizeMax ((1 << LZMA_NUM_BLOCK_SIZE_BITS) - 1) #define kBlockSize (9 << 10) #define kUnpackBlockSize (1 << 18) #define kMatchArraySize (1 << 21) #define kMatchRecordMaxSize ((LZMA_MATCH_LEN_MAX * 2 + 3) * LZMA_MATCH_LEN_MAX) #define kNumMaxDirectBits (31) #define kNumTopBits 24 #define kTopValue ((UInt32)1 << kNumTopBits) #define kNumBitModelTotalBits 11 #define kBitModelTotal (1 << kNumBitModelTotalBits) #define kNumMoveBits 5 #define kProbInitValue (kBitModelTotal >> 1) #define kNumMoveReducingBits 4 #define kNumBitPriceShiftBits 4 #define kBitPrice (1 << kNumBitPriceShiftBits) void LzmaEncProps_Init(CLzmaEncProps* p) { p->level = 5; p->dictSize = p->mc = 0; p->lc = p->lp = p->pb = p->algo = p->fb = p->btMode = p->numHashBytes = p->numThreads = -1; p->writeEndMark = 0; } void LzmaEncProps_Normalize(CLzmaEncProps* p) { int level = p->level; if (level < 0) level = 5; p->level = level; if (p->dictSize == 0) p->dictSize = (level <= 5 ? (1 << (level * 2 + 14)) : (level == 6 ? (1 << 25) : (1 << 26))); if (p->lc < 0) p->lc = 3; if (p->lp < 0) p->lp = 0; if (p->pb < 0) p->pb = 2; if (p->algo < 0) p->algo = (level < 5 ? 0 : 1); if (p->fb < 0) p->fb = (level < 7 ? 32 : 64); if (p->btMode < 0) p->btMode = (p->algo == 0 ? 0 : 1); if (p->numHashBytes < 0) p->numHashBytes = 4; if (p->mc == 0) p->mc = (16 + (p->fb >> 1)) >> (p->btMode ? 0 : 1); if (p->numThreads < 0) p->numThreads = ((p->btMode && p->algo) ? 2 : 1); } UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps* props2) { CLzmaEncProps props = *props2; LzmaEncProps_Normalize(&props); return props.dictSize; } /* #define LZMA_LOG_BSR */ /* Define it for Intel's CPU */ #ifdef LZMA_LOG_BSR #define kDicLogSizeMaxCompress 30 #define BSR2_RET(pos, res) \ { \ unsigned long i; \ _BitScanReverse(&i, (pos)); \ res = (i + i) + ((pos >> (i - 1)) & 1); \ } UInt32 GetPosSlot1(UInt32 pos) { UInt32 res; BSR2_RET(pos, res); return res; } #define GetPosSlot2(pos, res) \ { BSR2_RET(pos, res); } #define GetPosSlot(pos, res) \ { \ if (pos < 2) \ res = pos; \ else \ BSR2_RET(pos, res); \ } #else #define kNumLogBits (9 + (int)sizeof(size_t) / 2) #define kDicLogSizeMaxCompress ((kNumLogBits - 1) * 2 + 7) void LzmaEnc_FastPosInit(Byte* g_FastPos) { int c = 2, slotFast; g_FastPos[0] = 0; g_FastPos[1] = 1; for (slotFast = 2; slotFast < kNumLogBits * 2; slotFast++) { UInt32 k = (1 << ((slotFast >> 1) - 1)); UInt32 j; for (j = 0; j < k; j++, c++) g_FastPos[c] = (Byte)slotFast; } } #define BSR2_RET(pos, res) \ { \ UInt32 i = 6 + ((kNumLogBits - 1) & \ (0 - (((((UInt32)1 << (kNumLogBits + 6)) - 1) - pos) >> 31))); \ res = p->g_FastPos[pos >> i] + (i * 2); \ } /* #define BSR2_RET(pos, res) { res = (pos < (1 << (kNumLogBits + 6))) ? \ p->g_FastPos[pos >> 6] + 12 : \ p->g_FastPos[pos >> (6 + kNumLogBits - 1)] + (6 + (kNumLogBits - 1)) * 2; } */ #define GetPosSlot1(pos) p->g_FastPos[pos] #define GetPosSlot2(pos, res) \ { BSR2_RET(pos, res); } #define GetPosSlot(pos, res) \ { \ if (pos < kNumFullDistances) \ res = p->g_FastPos[pos]; \ else \ BSR2_RET(pos, res); \ } #endif #define LZMA_NUM_REPS 4 typedef unsigned CState; typedef struct _COptimal { UInt32 price; CState state; int prev1IsChar; int prev2; UInt32 posPrev2; UInt32 backPrev2; UInt32 posPrev; UInt32 backPrev; UInt32 backs[LZMA_NUM_REPS]; } COptimal; #define kNumOpts (1 << 12) #define kNumLenToPosStates 4 #define kNumPosSlotBits 6 #define kDicLogSizeMin 0 #define kDicLogSizeMax 32 #define kDistTableSizeMax (kDicLogSizeMax * 2) #define kNumAlignBits 4 #define kAlignTableSize (1 << kNumAlignBits) #define kAlignMask (kAlignTableSize - 1) #define kStartPosModelIndex 4 #define kEndPosModelIndex 14 #define kNumPosModels (kEndPosModelIndex - kStartPosModelIndex) #define kNumFullDistances (1 << (kEndPosModelIndex / 2)) #ifdef _LZMA_PROB32 #define CLzmaProb UInt32 #else #define CLzmaProb UInt16 #endif #define LZMA_PB_MAX 4 #define LZMA_LC_MAX 8 #define LZMA_LP_MAX 4 #define LZMA_NUM_PB_STATES_MAX (1 << LZMA_PB_MAX) #define kLenNumLowBits 3 #define kLenNumLowSymbols (1 << kLenNumLowBits) #define kLenNumMidBits 3 #define kLenNumMidSymbols (1 << kLenNumMidBits) #define kLenNumHighBits 8 #define kLenNumHighSymbols (1 << kLenNumHighBits) #define kLenNumSymbolsTotal (kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) #define LZMA_MATCH_LEN_MIN 2 #define LZMA_MATCH_LEN_MAX (LZMA_MATCH_LEN_MIN + kLenNumSymbolsTotal - 1) #define kNumStates 12 typedef struct { CLzmaProb choice; CLzmaProb choice2; CLzmaProb low[LZMA_NUM_PB_STATES_MAX << kLenNumLowBits]; CLzmaProb mid[LZMA_NUM_PB_STATES_MAX << kLenNumMidBits]; CLzmaProb high[kLenNumHighSymbols]; } CLenEnc; typedef struct { CLenEnc p; UInt32 prices[LZMA_NUM_PB_STATES_MAX][kLenNumSymbolsTotal]; UInt32 tableSize; UInt32 counters[LZMA_NUM_PB_STATES_MAX]; } CLenPriceEnc; typedef struct _CRangeEnc { UInt32 range; Byte cache; UInt64 low; UInt64 cacheSize; Byte* buf; Byte* bufLim; Byte* bufBase; ISeqOutStream* outStream; UInt64 processed; SRes res; } CRangeEnc; typedef struct _CSeqInStreamBuf { ISeqInStream funcTable; const Byte* data; SizeT rem; } CSeqInStreamBuf; static SRes MyRead(void* pp, void* data, size_t* size) { size_t curSize = *size; CSeqInStreamBuf* p = (CSeqInStreamBuf*)pp; if (p->rem < curSize) curSize = p->rem; memcpy(data, p->data, curSize); p->rem -= curSize; p->data += curSize; *size = curSize; return SZ_OK; } typedef struct { CLzmaProb* litProbs; CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; CLzmaProb isRep[kNumStates]; CLzmaProb isRepG0[kNumStates]; CLzmaProb isRepG1[kNumStates]; CLzmaProb isRepG2[kNumStates]; CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; CLzmaProb posAlignEncoder[1 << kNumAlignBits]; CLenPriceEnc lenEnc; CLenPriceEnc repLenEnc; UInt32 reps[LZMA_NUM_REPS]; UInt32 state; } CSaveState; typedef struct _CLzmaEnc { IMatchFinder matchFinder; void* matchFinderObj; #ifdef COMPRESS_MF_MT Bool mtMode; CMatchFinderMt matchFinderMt; #endif CMatchFinder matchFinderBase; #ifdef COMPRESS_MF_MT Byte pad[128]; #endif UInt32 optimumEndIndex; UInt32 optimumCurrentIndex; UInt32 longestMatchLength; UInt32 numPairs; UInt32 numAvail; COptimal opt[kNumOpts]; #ifndef LZMA_LOG_BSR Byte g_FastPos[1 << kNumLogBits]; #endif UInt32 ProbPrices[kBitModelTotal >> kNumMoveReducingBits]; UInt32 matches[LZMA_MATCH_LEN_MAX * 2 + 2 + 1]; UInt32 numFastBytes; UInt32 additionalOffset; UInt32 reps[LZMA_NUM_REPS]; UInt32 state; UInt32 posSlotPrices[kNumLenToPosStates][kDistTableSizeMax]; UInt32 distancesPrices[kNumLenToPosStates][kNumFullDistances]; UInt32 alignPrices[kAlignTableSize]; UInt32 alignPriceCount; UInt32 distTableSize; unsigned lc, lp, pb; unsigned lpMask, pbMask; CLzmaProb* litProbs; CLzmaProb isMatch[kNumStates][LZMA_NUM_PB_STATES_MAX]; CLzmaProb isRep[kNumStates]; CLzmaProb isRepG0[kNumStates]; CLzmaProb isRepG1[kNumStates]; CLzmaProb isRepG2[kNumStates]; CLzmaProb isRep0Long[kNumStates][LZMA_NUM_PB_STATES_MAX]; CLzmaProb posSlotEncoder[kNumLenToPosStates][1 << kNumPosSlotBits]; CLzmaProb posEncoders[kNumFullDistances - kEndPosModelIndex]; CLzmaProb posAlignEncoder[1 << kNumAlignBits]; CLenPriceEnc lenEnc; CLenPriceEnc repLenEnc; unsigned lclp; Bool fastMode; CRangeEnc rc; Bool writeEndMark; UInt64 nowPos64; UInt32 matchPriceCount; Bool finished; Bool multiThread; SRes result; UInt32 dictSize; UInt32 matchFinderCycles; ISeqInStream* inStream; CSeqInStreamBuf seqBufInStream; CSaveState saveState; } CLzmaEnc; void LzmaEnc_SaveState(CLzmaEncHandle pp) { CLzmaEnc* p = (CLzmaEnc*)pp; CSaveState* dest = &p->saveState; int i; dest->lenEnc = p->lenEnc; dest->repLenEnc = p->repLenEnc; dest->state = p->state; for (i = 0; i < kNumStates; i++) { memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); } for (i = 0; i < kNumLenToPosStates; i++) memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); memcpy(dest->reps, p->reps, sizeof(p->reps)); memcpy(dest->litProbs, p->litProbs, (0x300 << p->lclp) * sizeof(CLzmaProb)); } void LzmaEnc_RestoreState(CLzmaEncHandle pp) { CLzmaEnc* dest = (CLzmaEnc*)pp; const CSaveState* p = &dest->saveState; int i; dest->lenEnc = p->lenEnc; dest->repLenEnc = p->repLenEnc; dest->state = p->state; for (i = 0; i < kNumStates; i++) { memcpy(dest->isMatch[i], p->isMatch[i], sizeof(p->isMatch[i])); memcpy(dest->isRep0Long[i], p->isRep0Long[i], sizeof(p->isRep0Long[i])); } for (i = 0; i < kNumLenToPosStates; i++) memcpy(dest->posSlotEncoder[i], p->posSlotEncoder[i], sizeof(p->posSlotEncoder[i])); memcpy(dest->isRep, p->isRep, sizeof(p->isRep)); memcpy(dest->isRepG0, p->isRepG0, sizeof(p->isRepG0)); memcpy(dest->isRepG1, p->isRepG1, sizeof(p->isRepG1)); memcpy(dest->isRepG2, p->isRepG2, sizeof(p->isRepG2)); memcpy(dest->posEncoders, p->posEncoders, sizeof(p->posEncoders)); memcpy(dest->posAlignEncoder, p->posAlignEncoder, sizeof(p->posAlignEncoder)); memcpy(dest->reps, p->reps, sizeof(p->reps)); memcpy(dest->litProbs, p->litProbs, (0x300 << dest->lclp) * sizeof(CLzmaProb)); } SRes LzmaEnc_SetProps(CLzmaEncHandle pp, const CLzmaEncProps* props2) { CLzmaEnc* p = (CLzmaEnc*)pp; CLzmaEncProps props = *props2; LzmaEncProps_Normalize(&props); if (props.lc > LZMA_LC_MAX || props.lp > LZMA_LP_MAX || props.pb > LZMA_PB_MAX || props.dictSize > (1U << kDicLogSizeMaxCompress) || props.dictSize > (1 << 30)) return SZ_ERROR_PARAM; p->dictSize = props.dictSize; p->matchFinderCycles = props.mc; { unsigned fb = props.fb; if (fb < 5) fb = 5; if (fb > LZMA_MATCH_LEN_MAX) fb = LZMA_MATCH_LEN_MAX; p->numFastBytes = fb; } p->lc = props.lc; p->lp = props.lp; p->pb = props.pb; p->fastMode = (props.algo == 0); p->matchFinderBase.btMode = props.btMode; { UInt32 numHashBytes = 4; if (props.btMode) { if (props.numHashBytes < 2) numHashBytes = 2; else if (props.numHashBytes < 4) numHashBytes = props.numHashBytes; } p->matchFinderBase.numHashBytes = numHashBytes; } p->matchFinderBase.cutValue = props.mc; p->writeEndMark = props.writeEndMark; #ifdef COMPRESS_MF_MT /* if (newMultiThread != _multiThread) { ReleaseMatchFinder(); _multiThread = newMultiThread; } */ p->multiThread = (props.numThreads > 1); #endif return SZ_OK; } static const int kLiteralNextStates[kNumStates] = {0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5}; static const int kMatchNextStates[kNumStates] = {7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10}; static const int kRepNextStates[kNumStates] = {8, 8, 8, 8, 8, 8, 8, 11, 11, 11, 11, 11}; static const int kShortRepNextStates[kNumStates] = {9, 9, 9, 9, 9, 9, 9, 11, 11, 11, 11, 11}; #define IsCharState(s) ((s) < 7) #define GetLenToPosState(len) (((len) < kNumLenToPosStates + 1) ? (len)-2 : kNumLenToPosStates - 1) #define kInfinityPrice (1 << 30) static void RangeEnc_Construct(CRangeEnc* p) { p->outStream = 0; p->bufBase = 0; } #define RangeEnc_GetProcessed(p) ((p)->processed + ((p)->buf - (p)->bufBase) + (p)->cacheSize) #define RC_BUF_SIZE (1 << 16) static int RangeEnc_Alloc(CRangeEnc* p, ISzAlloc* alloc) { if (p->bufBase == 0) { p->bufBase = (Byte*)alloc->Alloc(alloc, RC_BUF_SIZE); if (p->bufBase == 0) return 0; p->bufLim = p->bufBase + RC_BUF_SIZE; } return 1; } static void RangeEnc_Free(CRangeEnc* p, ISzAlloc* alloc) { alloc->Free(alloc, p->bufBase); p->bufBase = 0; } static void RangeEnc_Init(CRangeEnc* p) { /* Stream.Init(); */ p->low = 0; p->range = 0xFFFFFFFF; p->cacheSize = 1; p->cache = 0; p->buf = p->bufBase; p->processed = 0; p->res = SZ_OK; } static void RangeEnc_FlushStream(CRangeEnc* p) { size_t num; if (p->res != SZ_OK) return; num = p->buf - p->bufBase; if (num != p->outStream->Write(p->outStream, p->bufBase, num)) p->res = SZ_ERROR_WRITE; p->processed += num; p->buf = p->bufBase; } static void MY_FAST_CALL RangeEnc_ShiftLow(CRangeEnc* p) { if ((UInt32)p->low < (UInt32)0xFF000000 || (int)(p->low >> 32) != 0) { Byte temp = p->cache; do { Byte* buf = p->buf; *buf++ = (Byte)(temp + (Byte)(p->low >> 32)); p->buf = buf; if (buf == p->bufLim) RangeEnc_FlushStream(p); temp = 0xFF; } while (--p->cacheSize != 0); p->cache = (Byte)((UInt32)p->low >> 24); } p->cacheSize++; p->low = (UInt32)p->low << 8; } static void RangeEnc_FlushData(CRangeEnc* p) { int i; for (i = 0; i < 5; i++) RangeEnc_ShiftLow(p); } static void RangeEnc_EncodeDirectBits(CRangeEnc* p, UInt32 value, int numBits) { do { p->range >>= 1; p->low += p->range & (0 - ((value >> --numBits) & 1)); if (p->range < kTopValue) { p->range <<= 8; RangeEnc_ShiftLow(p); } } while (numBits != 0); } static void RangeEnc_EncodeBit(CRangeEnc* p, CLzmaProb* prob, UInt32 symbol) { UInt32 ttt = *prob; UInt32 newBound = (p->range >> kNumBitModelTotalBits) * ttt; if (symbol == 0) { p->range = newBound; ttt += (kBitModelTotal - ttt) >> kNumMoveBits; } else { p->low += newBound; p->range -= newBound; ttt -= ttt >> kNumMoveBits; } *prob = (CLzmaProb)ttt; if (p->range < kTopValue) { p->range <<= 8; RangeEnc_ShiftLow(p); } } static void LitEnc_Encode(CRangeEnc* p, CLzmaProb* probs, UInt32 symbol) { symbol |= 0x100; do { RangeEnc_EncodeBit(p, probs + (symbol >> 8), (symbol >> 7) & 1); symbol <<= 1; } while (symbol < 0x10000); } static void LitEnc_EncodeMatched(CRangeEnc* p, CLzmaProb* probs, UInt32 symbol, UInt32 matchByte) { UInt32 offs = 0x100; symbol |= 0x100; do { matchByte <<= 1; RangeEnc_EncodeBit(p, probs + (offs + (matchByte & offs) + (symbol >> 8)), (symbol >> 7) & 1); symbol <<= 1; offs &= ~(matchByte ^ symbol); } while (symbol < 0x10000); } void LzmaEnc_InitPriceTables(UInt32* ProbPrices) { UInt32 i; for (i = (1 << kNumMoveReducingBits) / 2; i < kBitModelTotal; i += (1 << kNumMoveReducingBits)) { const int kCyclesBits = kNumBitPriceShiftBits; UInt32 w = i; UInt32 bitCount = 0; int j; for (j = 0; j < kCyclesBits; j++) { w = w * w; bitCount <<= 1; while (w >= ((UInt32)1 << 16)) { w >>= 1; bitCount++; } } ProbPrices[i >> kNumMoveReducingBits] = ((kNumBitModelTotalBits << kCyclesBits) - 15 - bitCount); } } #define GET_PRICE(prob, symbol) \ p->ProbPrices[((prob) ^ (((-(int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; #define GET_PRICEa(prob, symbol) \ ProbPrices[((prob) ^ ((-((int)(symbol))) & (kBitModelTotal - 1))) >> kNumMoveReducingBits]; #define GET_PRICE_0(prob) p->ProbPrices[(prob) >> kNumMoveReducingBits] #define GET_PRICE_1(prob) p->ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] #define GET_PRICE_0a(prob) ProbPrices[(prob) >> kNumMoveReducingBits] #define GET_PRICE_1a(prob) ProbPrices[((prob) ^ (kBitModelTotal - 1)) >> kNumMoveReducingBits] static UInt32 LitEnc_GetPrice(const CLzmaProb* probs, UInt32 symbol, UInt32* ProbPrices) { UInt32 price = 0; symbol |= 0x100; do { price += GET_PRICEa(probs[symbol >> 8], (symbol >> 7) & 1); symbol <<= 1; } while (symbol < 0x10000); return price; } static UInt32 LitEnc_GetPriceMatched(const CLzmaProb* probs, UInt32 symbol, UInt32 matchByte, UInt32* ProbPrices) { UInt32 price = 0; UInt32 offs = 0x100; symbol |= 0x100; do { matchByte <<= 1; price += GET_PRICEa(probs[offs + (matchByte & offs) + (symbol >> 8)], (symbol >> 7) & 1); symbol <<= 1; offs &= ~(matchByte ^ symbol); } while (symbol < 0x10000); return price; } static void RcTree_Encode(CRangeEnc* rc, CLzmaProb* probs, int numBitLevels, UInt32 symbol) { UInt32 m = 1; int i; for (i = numBitLevels; i != 0;) { UInt32 bit; i--; bit = (symbol >> i) & 1; RangeEnc_EncodeBit(rc, probs + m, bit); m = (m << 1) | bit; } } static void RcTree_ReverseEncode(CRangeEnc* rc, CLzmaProb* probs, int numBitLevels, UInt32 symbol) { UInt32 m = 1; int i; for (i = 0; i < numBitLevels; i++) { UInt32 bit = symbol & 1; RangeEnc_EncodeBit(rc, probs + m, bit); m = (m << 1) | bit; symbol >>= 1; } } static UInt32 RcTree_GetPrice(const CLzmaProb* probs, int numBitLevels, UInt32 symbol, UInt32* ProbPrices) { UInt32 price = 0; symbol |= (1 << numBitLevels); while (symbol != 1) { price += GET_PRICEa(probs[symbol >> 1], symbol & 1); symbol >>= 1; } return price; } static UInt32 RcTree_ReverseGetPrice(const CLzmaProb* probs, int numBitLevels, UInt32 symbol, UInt32* ProbPrices) { UInt32 price = 0; UInt32 m = 1; int i; for (i = numBitLevels; i != 0; i--) { UInt32 bit = symbol & 1; symbol >>= 1; price += GET_PRICEa(probs[m], bit); m = (m << 1) | bit; } return price; } static void LenEnc_Init(CLenEnc* p) { unsigned i; p->choice = p->choice2 = kProbInitValue; for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumLowBits); i++) p->low[i] = kProbInitValue; for (i = 0; i < (LZMA_NUM_PB_STATES_MAX << kLenNumMidBits); i++) p->mid[i] = kProbInitValue; for (i = 0; i < kLenNumHighSymbols; i++) p->high[i] = kProbInitValue; } static void LenEnc_Encode(CLenEnc* p, CRangeEnc* rc, UInt32 symbol, UInt32 posState) { if (symbol < kLenNumLowSymbols) { RangeEnc_EncodeBit(rc, &p->choice, 0); RcTree_Encode(rc, p->low + (posState << kLenNumLowBits), kLenNumLowBits, symbol); } else { RangeEnc_EncodeBit(rc, &p->choice, 1); if (symbol < kLenNumLowSymbols + kLenNumMidSymbols) { RangeEnc_EncodeBit(rc, &p->choice2, 0); RcTree_Encode(rc, p->mid + (posState << kLenNumMidBits), kLenNumMidBits, symbol - kLenNumLowSymbols); } else { RangeEnc_EncodeBit(rc, &p->choice2, 1); RcTree_Encode(rc, p->high, kLenNumHighBits, symbol - kLenNumLowSymbols - kLenNumMidSymbols); } } } static void LenEnc_SetPrices(CLenEnc* p, UInt32 posState, UInt32 numSymbols, UInt32* prices, UInt32* ProbPrices) { UInt32 a0 = GET_PRICE_0a(p->choice); UInt32 a1 = GET_PRICE_1a(p->choice); UInt32 b0 = a1 + GET_PRICE_0a(p->choice2); UInt32 b1 = a1 + GET_PRICE_1a(p->choice2); UInt32 i = 0; for (i = 0; i < kLenNumLowSymbols; i++) { if (i >= numSymbols) return; prices[i] = a0 + RcTree_GetPrice(p->low + (posState << kLenNumLowBits), kLenNumLowBits, i, ProbPrices); } for (; i < kLenNumLowSymbols + kLenNumMidSymbols; i++) { if (i >= numSymbols) return; prices[i] = b0 + RcTree_GetPrice(p->mid + (posState << kLenNumMidBits), kLenNumMidBits, i - kLenNumLowSymbols, ProbPrices); } for (; i < numSymbols; i++) prices[i] = b1 + RcTree_GetPrice(p->high, kLenNumHighBits, i - kLenNumLowSymbols - kLenNumMidSymbols, ProbPrices); } static void MY_FAST_CALL LenPriceEnc_UpdateTable(CLenPriceEnc* p, UInt32 posState, UInt32* ProbPrices) { LenEnc_SetPrices(&p->p, posState, p->tableSize, p->prices[posState], ProbPrices); p->counters[posState] = p->tableSize; } static void LenPriceEnc_UpdateTables(CLenPriceEnc* p, UInt32 numPosStates, UInt32* ProbPrices) { UInt32 posState; for (posState = 0; posState < numPosStates; posState++) LenPriceEnc_UpdateTable(p, posState, ProbPrices); } static void LenEnc_Encode2(CLenPriceEnc* p, CRangeEnc* rc, UInt32 symbol, UInt32 posState, Bool updatePrice, UInt32* ProbPrices) { LenEnc_Encode(&p->p, rc, symbol, posState); if (updatePrice) if (--p->counters[posState] == 0) LenPriceEnc_UpdateTable(p, posState, ProbPrices); } static void MovePos(CLzmaEnc* p, UInt32 num) { #ifdef SHOW_STAT ttt += num; printf("\n MovePos %d", num); #endif if (num != 0) { p->additionalOffset += num; p->matchFinder.Skip(p->matchFinderObj, num); } } static UInt32 ReadMatchDistances(CLzmaEnc* p, UInt32* numDistancePairsRes) { UInt32 lenRes = 0, numPairs; p->numAvail = p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); numPairs = p->matchFinder.GetMatches(p->matchFinderObj, p->matches); #ifdef SHOW_STAT printf("\n i = %d numPairs = %d ", ttt, numPairs / 2); ttt++; { UInt32 i; for (i = 0; i < numPairs; i += 2) printf("%2d %6d | ", p->matches[i], p->matches[i + 1]); } #endif if (numPairs > 0) { lenRes = p->matches[numPairs - 2]; if (lenRes == p->numFastBytes) { const Byte* pby = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; UInt32 distance = p->matches[numPairs - 1] + 1; UInt32 numAvail = p->numAvail; if (numAvail > LZMA_MATCH_LEN_MAX) numAvail = LZMA_MATCH_LEN_MAX; { const Byte* pby2 = pby - distance; for (; lenRes < numAvail && pby[lenRes] == pby2[lenRes]; lenRes++) ; } } } p->additionalOffset++; *numDistancePairsRes = numPairs; return lenRes; } #define MakeAsChar(p) \ (p)->backPrev = (UInt32)(-1); \ (p)->prev1IsChar = False; #define MakeAsShortRep(p) \ (p)->backPrev = 0; \ (p)->prev1IsChar = False; #define IsShortRep(p) ((p)->backPrev == 0) static UInt32 GetRepLen1Price(CLzmaEnc* p, UInt32 state, UInt32 posState) { return GET_PRICE_0(p->isRepG0[state]) + GET_PRICE_0(p->isRep0Long[state][posState]); } static UInt32 GetPureRepPrice(CLzmaEnc* p, UInt32 repIndex, UInt32 state, UInt32 posState) { UInt32 price; if (repIndex == 0) { price = GET_PRICE_0(p->isRepG0[state]); price += GET_PRICE_1(p->isRep0Long[state][posState]); } else { price = GET_PRICE_1(p->isRepG0[state]); if (repIndex == 1) price += GET_PRICE_0(p->isRepG1[state]); else { price += GET_PRICE_1(p->isRepG1[state]); price += GET_PRICE(p->isRepG2[state], repIndex - 2); } } return price; } static UInt32 GetRepPrice(CLzmaEnc* p, UInt32 repIndex, UInt32 len, UInt32 state, UInt32 posState) { return p->repLenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN] + GetPureRepPrice(p, repIndex, state, posState); } static UInt32 Backward(CLzmaEnc* p, UInt32* backRes, UInt32 cur) { UInt32 posMem = p->opt[cur].posPrev; UInt32 backMem = p->opt[cur].backPrev; p->optimumEndIndex = cur; do { if (p->opt[cur].prev1IsChar) { MakeAsChar(&p->opt[posMem]) p->opt[posMem] .posPrev = posMem - 1; if (p->opt[cur].prev2) { p->opt[posMem - 1].prev1IsChar = False; p->opt[posMem - 1].posPrev = p->opt[cur].posPrev2; p->opt[posMem - 1].backPrev = p->opt[cur].backPrev2; } } { UInt32 posPrev = posMem; UInt32 backCur = backMem; backMem = p->opt[posPrev].backPrev; posMem = p->opt[posPrev].posPrev; p->opt[posPrev].backPrev = backCur; p->opt[posPrev].posPrev = cur; cur = posPrev; } } while (cur != 0); *backRes = p->opt[0].backPrev; p->optimumCurrentIndex = p->opt[0].posPrev; return p->optimumCurrentIndex; } #define LIT_PROBS(pos, prevByte) (p->litProbs + ((((pos)&p->lpMask) << p->lc) + ((prevByte) >> (8 - p->lc))) * 0x300) static UInt32 GetOptimum(CLzmaEnc* p, UInt32 position, UInt32* backRes) { UInt32 numAvail, mainLen, numPairs, repMaxIndex, i, posState, lenEnd, len, cur; UInt32 matchPrice, repMatchPrice, normalMatchPrice; UInt32 reps[LZMA_NUM_REPS], repLens[LZMA_NUM_REPS]; UInt32* matches; const Byte* data; Byte curByte, matchByte; if (p->optimumEndIndex != p->optimumCurrentIndex) { const COptimal* opt = &p->opt[p->optimumCurrentIndex]; UInt32 lenRes = opt->posPrev - p->optimumCurrentIndex; *backRes = opt->backPrev; p->optimumCurrentIndex = opt->posPrev; return lenRes; } p->optimumCurrentIndex = p->optimumEndIndex = 0; if (p->additionalOffset == 0) mainLen = ReadMatchDistances(p, &numPairs); else { mainLen = p->longestMatchLength; numPairs = p->numPairs; } numAvail = p->numAvail; if (numAvail < 2) { *backRes = (UInt32)(-1); return 1; } if (numAvail > LZMA_MATCH_LEN_MAX) numAvail = LZMA_MATCH_LEN_MAX; data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; repMaxIndex = 0; for (i = 0; i < LZMA_NUM_REPS; i++) { UInt32 lenTest; const Byte* data2; reps[i] = p->reps[i]; data2 = data - (reps[i] + 1); if (data[0] != data2[0] || data[1] != data2[1]) { repLens[i] = 0; continue; } for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++) ; repLens[i] = lenTest; if (lenTest > repLens[repMaxIndex]) repMaxIndex = i; } if (repLens[repMaxIndex] >= p->numFastBytes) { UInt32 lenRes; *backRes = repMaxIndex; lenRes = repLens[repMaxIndex]; MovePos(p, lenRes - 1); return lenRes; } matches = p->matches; if (mainLen >= p->numFastBytes) { *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; MovePos(p, mainLen - 1); return mainLen; } curByte = *data; matchByte = *(data - (reps[0] + 1)); if (mainLen < 2 && curByte != matchByte && repLens[repMaxIndex] < 2) { *backRes = (UInt32)-1; return 1; } p->opt[0].state = (CState)p->state; posState = (position & p->pbMask); { const CLzmaProb* probs = LIT_PROBS(position, *(data - 1)); p->opt[1].price = GET_PRICE_0(p->isMatch[p->state][posState]) + (!IsCharState(p->state) ? LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : LitEnc_GetPrice(probs, curByte, p->ProbPrices)); } MakeAsChar(&p->opt[1]); matchPrice = GET_PRICE_1(p->isMatch[p->state][posState]); repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[p->state]); if (matchByte == curByte) { UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, p->state, posState); if (shortRepPrice < p->opt[1].price) { p->opt[1].price = shortRepPrice; MakeAsShortRep(&p->opt[1]); } } lenEnd = ((mainLen >= repLens[repMaxIndex]) ? mainLen : repLens[repMaxIndex]); if (lenEnd < 2) { *backRes = p->opt[1].backPrev; return 1; } p->opt[1].posPrev = 0; for (i = 0; i < LZMA_NUM_REPS; i++) p->opt[0].backs[i] = reps[i]; len = lenEnd; do p->opt[len--].price = kInfinityPrice; while (len >= 2); for (i = 0; i < LZMA_NUM_REPS; i++) { UInt32 repLen = repLens[i]; UInt32 price; if (repLen < 2) continue; price = repMatchPrice + GetPureRepPrice(p, i, p->state, posState); do { UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][repLen - 2]; COptimal* opt = &p->opt[repLen]; if (curAndLenPrice < opt->price) { opt->price = curAndLenPrice; opt->posPrev = 0; opt->backPrev = i; opt->prev1IsChar = False; } } while (--repLen >= 2); } normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[p->state]); len = ((repLens[0] >= 2) ? repLens[0] + 1 : 2); if (len <= mainLen) { UInt32 offs = 0; while (len > matches[offs]) offs += 2; for (;; len++) { COptimal* opt; UInt32 distance = matches[offs + 1]; UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][len - LZMA_MATCH_LEN_MIN]; UInt32 lenToPosState = GetLenToPosState(len); if (distance < kNumFullDistances) curAndLenPrice += p->distancesPrices[lenToPosState][distance]; else { UInt32 slot; GetPosSlot2(distance, slot); curAndLenPrice += p->alignPrices[distance & kAlignMask] + p->posSlotPrices[lenToPosState][slot]; } opt = &p->opt[len]; if (curAndLenPrice < opt->price) { opt->price = curAndLenPrice; opt->posPrev = 0; opt->backPrev = distance + LZMA_NUM_REPS; opt->prev1IsChar = False; } if (len == matches[offs]) { offs += 2; if (offs == numPairs) break; } } } cur = 0; #ifdef SHOW_STAT2 if (position >= 0) { unsigned i; printf("\n pos = %4X", position); for (i = cur; i <= lenEnd; i++) printf("\nprice[%4X] = %d", position - cur + i, p->opt[i].price); } #endif for (;;) { UInt32 numAvailFull, newLen, numPairs, posPrev, state, posState, startLen; UInt32 curPrice, curAnd1Price, matchPrice, repMatchPrice; Bool nextIsChar; Byte curByte, matchByte; const Byte* data; COptimal* curOpt; COptimal* nextOpt; cur++; if (cur == lenEnd) return Backward(p, backRes, cur); newLen = ReadMatchDistances(p, &numPairs); if (newLen >= p->numFastBytes) { p->numPairs = numPairs; p->longestMatchLength = newLen; return Backward(p, backRes, cur); } position++; curOpt = &p->opt[cur]; posPrev = curOpt->posPrev; if (curOpt->prev1IsChar) { posPrev--; if (curOpt->prev2) { state = p->opt[curOpt->posPrev2].state; if (curOpt->backPrev2 < LZMA_NUM_REPS) state = kRepNextStates[state]; else state = kMatchNextStates[state]; } else state = p->opt[posPrev].state; state = kLiteralNextStates[state]; } else state = p->opt[posPrev].state; if (posPrev == cur - 1) { if (IsShortRep(curOpt)) state = kShortRepNextStates[state]; else state = kLiteralNextStates[state]; } else { UInt32 pos; const COptimal* prevOpt; if (curOpt->prev1IsChar && curOpt->prev2) { posPrev = curOpt->posPrev2; pos = curOpt->backPrev2; state = kRepNextStates[state]; } else { pos = curOpt->backPrev; if (pos < LZMA_NUM_REPS) state = kRepNextStates[state]; else state = kMatchNextStates[state]; } prevOpt = &p->opt[posPrev]; if (pos < LZMA_NUM_REPS) { UInt32 i; reps[0] = prevOpt->backs[pos]; for (i = 1; i <= pos; i++) reps[i] = prevOpt->backs[i - 1]; for (; i < LZMA_NUM_REPS; i++) reps[i] = prevOpt->backs[i]; } else { UInt32 i; reps[0] = (pos - LZMA_NUM_REPS); for (i = 1; i < LZMA_NUM_REPS; i++) reps[i] = prevOpt->backs[i - 1]; } } curOpt->state = (CState)state; curOpt->backs[0] = reps[0]; curOpt->backs[1] = reps[1]; curOpt->backs[2] = reps[2]; curOpt->backs[3] = reps[3]; curPrice = curOpt->price; nextIsChar = False; data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; curByte = *data; matchByte = *(data - (reps[0] + 1)); posState = (position & p->pbMask); curAnd1Price = curPrice + GET_PRICE_0(p->isMatch[state][posState]); { const CLzmaProb* probs = LIT_PROBS(position, *(data - 1)); curAnd1Price += (!IsCharState(state) ? LitEnc_GetPriceMatched(probs, curByte, matchByte, p->ProbPrices) : LitEnc_GetPrice(probs, curByte, p->ProbPrices)); } nextOpt = &p->opt[cur + 1]; if (curAnd1Price < nextOpt->price) { nextOpt->price = curAnd1Price; nextOpt->posPrev = cur; MakeAsChar(nextOpt); nextIsChar = True; } matchPrice = curPrice + GET_PRICE_1(p->isMatch[state][posState]); repMatchPrice = matchPrice + GET_PRICE_1(p->isRep[state]); if (matchByte == curByte && !(nextOpt->posPrev < cur && nextOpt->backPrev == 0)) { UInt32 shortRepPrice = repMatchPrice + GetRepLen1Price(p, state, posState); if (shortRepPrice <= nextOpt->price) { nextOpt->price = shortRepPrice; nextOpt->posPrev = cur; MakeAsShortRep(nextOpt); nextIsChar = True; } } numAvailFull = p->numAvail; { UInt32 temp = kNumOpts - 1 - cur; if (temp < numAvailFull) numAvailFull = temp; } if (numAvailFull < 2) continue; numAvail = (numAvailFull <= p->numFastBytes ? numAvailFull : p->numFastBytes); if (!nextIsChar && matchByte != curByte) /* speed optimization */ { /* try Literal + rep0 */ UInt32 temp; UInt32 lenTest2; const Byte* data2 = data - (reps[0] + 1); UInt32 limit = p->numFastBytes + 1; if (limit > numAvailFull) limit = numAvailFull; for (temp = 1; temp < limit && data[temp] == data2[temp]; temp++) ; lenTest2 = temp - 1; if (lenTest2 >= 2) { UInt32 state2 = kLiteralNextStates[state]; UInt32 posStateNext = (position + 1) & p->pbMask; UInt32 nextRepMatchPrice = curAnd1Price + GET_PRICE_1(p->isMatch[state2][posStateNext]) + GET_PRICE_1(p->isRep[state2]); /* for (; lenTest2 >= 2; lenTest2--) */ { UInt32 curAndLenPrice; COptimal* opt; UInt32 offset = cur + 1 + lenTest2; while (lenEnd < offset) p->opt[++lenEnd].price = kInfinityPrice; curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); opt = &p->opt[offset]; if (curAndLenPrice < opt->price) { opt->price = curAndLenPrice; opt->posPrev = cur + 1; opt->backPrev = 0; opt->prev1IsChar = True; opt->prev2 = False; } } } } startLen = 2; /* speed optimization */ { UInt32 repIndex; for (repIndex = 0; repIndex < LZMA_NUM_REPS; repIndex++) { UInt32 lenTest; UInt32 lenTestTemp; UInt32 price; const Byte* data2 = data - (reps[repIndex] + 1); if (data[0] != data2[0] || data[1] != data2[1]) continue; for (lenTest = 2; lenTest < numAvail && data[lenTest] == data2[lenTest]; lenTest++) ; while (lenEnd < cur + lenTest) p->opt[++lenEnd].price = kInfinityPrice; lenTestTemp = lenTest; price = repMatchPrice + GetPureRepPrice(p, repIndex, state, posState); do { UInt32 curAndLenPrice = price + p->repLenEnc.prices[posState][lenTest - 2]; COptimal* opt = &p->opt[cur + lenTest]; if (curAndLenPrice < opt->price) { opt->price = curAndLenPrice; opt->posPrev = cur; opt->backPrev = repIndex; opt->prev1IsChar = False; } } while (--lenTest >= 2); lenTest = lenTestTemp; if (repIndex == 0) startLen = lenTest + 1; /* if (_maxMode) */ { UInt32 lenTest2 = lenTest + 1; UInt32 limit = lenTest2 + p->numFastBytes; UInt32 nextRepMatchPrice; if (limit > numAvailFull) limit = numAvailFull; for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++) ; lenTest2 -= lenTest + 1; if (lenTest2 >= 2) { UInt32 state2 = kRepNextStates[state]; UInt32 posStateNext = (position + lenTest) & p->pbMask; UInt32 curAndLenCharPrice = price + p->repLenEnc.prices[posState][lenTest - 2] + GET_PRICE_0(p->isMatch[state2][posStateNext]) + LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]), data[lenTest], data2[lenTest], p->ProbPrices); state2 = kLiteralNextStates[state2]; posStateNext = (position + lenTest + 1) & p->pbMask; nextRepMatchPrice = curAndLenCharPrice + GET_PRICE_1(p->isMatch[state2][posStateNext]) + GET_PRICE_1(p->isRep[state2]); /* for (; lenTest2 >= 2; lenTest2--) */ { UInt32 curAndLenPrice; COptimal* opt; UInt32 offset = cur + lenTest + 1 + lenTest2; while (lenEnd < offset) p->opt[++lenEnd].price = kInfinityPrice; curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); opt = &p->opt[offset]; if (curAndLenPrice < opt->price) { opt->price = curAndLenPrice; opt->posPrev = cur + lenTest + 1; opt->backPrev = 0; opt->prev1IsChar = True; opt->prev2 = True; opt->posPrev2 = cur; opt->backPrev2 = repIndex; } } } } } } /* for (UInt32 lenTest = 2; lenTest <= newLen; lenTest++) */ if (newLen > numAvail) { newLen = numAvail; for (numPairs = 0; newLen > matches[numPairs]; numPairs += 2) ; matches[numPairs] = newLen; numPairs += 2; } if (newLen >= startLen) { UInt32 normalMatchPrice = matchPrice + GET_PRICE_0(p->isRep[state]); UInt32 offs, curBack, posSlot; UInt32 lenTest; while (lenEnd < cur + newLen) p->opt[++lenEnd].price = kInfinityPrice; offs = 0; while (startLen > matches[offs]) offs += 2; curBack = matches[offs + 1]; GetPosSlot2(curBack, posSlot); for (lenTest = /*2*/ startLen;; lenTest++) { UInt32 curAndLenPrice = normalMatchPrice + p->lenEnc.prices[posState][lenTest - LZMA_MATCH_LEN_MIN]; UInt32 lenToPosState = GetLenToPosState(lenTest); COptimal* opt; if (curBack < kNumFullDistances) curAndLenPrice += p->distancesPrices[lenToPosState][curBack]; else curAndLenPrice += p->posSlotPrices[lenToPosState][posSlot] + p->alignPrices[curBack & kAlignMask]; opt = &p->opt[cur + lenTest]; if (curAndLenPrice < opt->price) { opt->price = curAndLenPrice; opt->posPrev = cur; opt->backPrev = curBack + LZMA_NUM_REPS; opt->prev1IsChar = False; } if (/*_maxMode && */ lenTest == matches[offs]) { /* Try Match + Literal + Rep0 */ const Byte* data2 = data - (curBack + 1); UInt32 lenTest2 = lenTest + 1; UInt32 limit = lenTest2 + p->numFastBytes; UInt32 nextRepMatchPrice; if (limit > numAvailFull) limit = numAvailFull; for (; lenTest2 < limit && data[lenTest2] == data2[lenTest2]; lenTest2++) ; lenTest2 -= lenTest + 1; if (lenTest2 >= 2) { UInt32 state2 = kMatchNextStates[state]; UInt32 posStateNext = (position + lenTest) & p->pbMask; UInt32 curAndLenCharPrice = curAndLenPrice + GET_PRICE_0(p->isMatch[state2][posStateNext]) + LitEnc_GetPriceMatched(LIT_PROBS(position + lenTest, data[lenTest - 1]), data[lenTest], data2[lenTest], p->ProbPrices); state2 = kLiteralNextStates[state2]; posStateNext = (posStateNext + 1) & p->pbMask; nextRepMatchPrice = curAndLenCharPrice + GET_PRICE_1(p->isMatch[state2][posStateNext]) + GET_PRICE_1(p->isRep[state2]); /* for (; lenTest2 >= 2; lenTest2--) */ { UInt32 offset = cur + lenTest + 1 + lenTest2; UInt32 curAndLenPrice; COptimal* opt; while (lenEnd < offset) p->opt[++lenEnd].price = kInfinityPrice; curAndLenPrice = nextRepMatchPrice + GetRepPrice(p, 0, lenTest2, state2, posStateNext); opt = &p->opt[offset]; if (curAndLenPrice < opt->price) { opt->price = curAndLenPrice; opt->posPrev = cur + lenTest + 1; opt->backPrev = 0; opt->prev1IsChar = True; opt->prev2 = True; opt->posPrev2 = cur; opt->backPrev2 = curBack + LZMA_NUM_REPS; } } } offs += 2; if (offs == numPairs) break; curBack = matches[offs + 1]; if (curBack >= kNumFullDistances) GetPosSlot2(curBack, posSlot); } } } } } #define ChangePair(smallDist, bigDist) (((bigDist) >> 7) > (smallDist)) static UInt32 GetOptimumFast(CLzmaEnc* p, UInt32* backRes) { UInt32 numAvail, mainLen, mainDist, numPairs, repIndex, repLen, i; const Byte* data; const UInt32* matches; if (p->additionalOffset == 0) mainLen = ReadMatchDistances(p, &numPairs); else { mainLen = p->longestMatchLength; numPairs = p->numPairs; } numAvail = p->numAvail; *backRes = (UInt32)-1; if (numAvail < 2) return 1; if (numAvail > LZMA_MATCH_LEN_MAX) numAvail = LZMA_MATCH_LEN_MAX; data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; repLen = repIndex = 0; for (i = 0; i < LZMA_NUM_REPS; i++) { UInt32 len; const Byte* data2 = data - (p->reps[i] + 1); if (data[0] != data2[0] || data[1] != data2[1]) continue; for (len = 2; len < numAvail && data[len] == data2[len]; len++) ; if (len >= p->numFastBytes) { *backRes = i; MovePos(p, len - 1); return len; } if (len > repLen) { repIndex = i; repLen = len; } } matches = p->matches; if (mainLen >= p->numFastBytes) { *backRes = matches[numPairs - 1] + LZMA_NUM_REPS; MovePos(p, mainLen - 1); return mainLen; } mainDist = 0; /* for GCC */ if (mainLen >= 2) { mainDist = matches[numPairs - 1]; while (numPairs > 2 && mainLen == matches[numPairs - 4] + 1) { if (!ChangePair(matches[numPairs - 3], mainDist)) break; numPairs -= 2; mainLen = matches[numPairs - 2]; mainDist = matches[numPairs - 1]; } if (mainLen == 2 && mainDist >= 0x80) mainLen = 1; } if (repLen >= 2 && ((repLen + 1 >= mainLen) || (repLen + 2 >= mainLen && mainDist >= (1 << 9)) || (repLen + 3 >= mainLen && mainDist >= (1 << 15)))) { *backRes = repIndex; MovePos(p, repLen - 1); return repLen; } if (mainLen < 2 || numAvail <= 2) return 1; p->longestMatchLength = ReadMatchDistances(p, &p->numPairs); if (p->longestMatchLength >= 2) { UInt32 newDistance = matches[p->numPairs - 1]; if ((p->longestMatchLength >= mainLen && newDistance < mainDist) || (p->longestMatchLength == mainLen + 1 && !ChangePair(mainDist, newDistance)) || (p->longestMatchLength > mainLen + 1) || (p->longestMatchLength + 1 >= mainLen && mainLen >= 3 && ChangePair(newDistance, mainDist))) return 1; } data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - 1; for (i = 0; i < LZMA_NUM_REPS; i++) { UInt32 len, limit; const Byte* data2 = data - (p->reps[i] + 1); if (data[0] != data2[0] || data[1] != data2[1]) continue; limit = mainLen - 1; for (len = 2; len < limit && data[len] == data2[len]; len++) ; if (len >= limit) return 1; } *backRes = mainDist + LZMA_NUM_REPS; MovePos(p, mainLen - 2); return mainLen; } static void WriteEndMarker(CLzmaEnc* p, UInt32 posState) { UInt32 len; RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); p->state = kMatchNextStates[p->state]; len = LZMA_MATCH_LEN_MIN; LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, (1 << kNumPosSlotBits) - 1); RangeEnc_EncodeDirectBits(&p->rc, (((UInt32)1 << 30) - 1) >> kNumAlignBits, 30 - kNumAlignBits); RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, kAlignMask); } static SRes CheckErrors(CLzmaEnc* p) { if (p->result != SZ_OK) return p->result; if (p->rc.res != SZ_OK) p->result = SZ_ERROR_WRITE; if (p->matchFinderBase.result != SZ_OK) p->result = SZ_ERROR_READ; if (p->result != SZ_OK) p->finished = True; return p->result; } static SRes Flush(CLzmaEnc* p, UInt32 nowPos) { /* ReleaseMFStream(); */ p->finished = True; if (p->writeEndMark) WriteEndMarker(p, nowPos & p->pbMask); RangeEnc_FlushData(&p->rc); RangeEnc_FlushStream(&p->rc); return CheckErrors(p); } static void FillAlignPrices(CLzmaEnc* p) { UInt32 i; for (i = 0; i < kAlignTableSize; i++) p->alignPrices[i] = RcTree_ReverseGetPrice(p->posAlignEncoder, kNumAlignBits, i, p->ProbPrices); p->alignPriceCount = 0; } static void FillDistancesPrices(CLzmaEnc* p) { UInt32 tempPrices[kNumFullDistances]; UInt32 i, lenToPosState; for (i = kStartPosModelIndex; i < kNumFullDistances; i++) { UInt32 posSlot = GetPosSlot1(i); UInt32 footerBits = ((posSlot >> 1) - 1); UInt32 base = ((2 | (posSlot & 1)) << footerBits); tempPrices[i] = RcTree_ReverseGetPrice(p->posEncoders + base - posSlot - 1, footerBits, i - base, p->ProbPrices); } for (lenToPosState = 0; lenToPosState < kNumLenToPosStates; lenToPosState++) { UInt32 posSlot; const CLzmaProb* encoder = p->posSlotEncoder[lenToPosState]; UInt32* posSlotPrices = p->posSlotPrices[lenToPosState]; for (posSlot = 0; posSlot < p->distTableSize; posSlot++) posSlotPrices[posSlot] = RcTree_GetPrice(encoder, kNumPosSlotBits, posSlot, p->ProbPrices); for (posSlot = kEndPosModelIndex; posSlot < p->distTableSize; posSlot++) posSlotPrices[posSlot] += ((((posSlot >> 1) - 1) - kNumAlignBits) << kNumBitPriceShiftBits); { UInt32* distancesPrices = p->distancesPrices[lenToPosState]; UInt32 i; for (i = 0; i < kStartPosModelIndex; i++) distancesPrices[i] = posSlotPrices[i]; for (; i < kNumFullDistances; i++) distancesPrices[i] = posSlotPrices[GetPosSlot1(i)] + tempPrices[i]; } } p->matchPriceCount = 0; } void LzmaEnc_Construct(CLzmaEnc* p) { RangeEnc_Construct(&p->rc); MatchFinder_Construct(&p->matchFinderBase); #ifdef COMPRESS_MF_MT MatchFinderMt_Construct(&p->matchFinderMt); p->matchFinderMt.MatchFinder = &p->matchFinderBase; #endif { CLzmaEncProps props; LzmaEncProps_Init(&props); LzmaEnc_SetProps(p, &props); } #ifndef LZMA_LOG_BSR LzmaEnc_FastPosInit(p->g_FastPos); #endif LzmaEnc_InitPriceTables(p->ProbPrices); p->litProbs = 0; p->saveState.litProbs = 0; } CLzmaEncHandle LzmaEnc_Create(ISzAlloc* alloc) { void* p; p = alloc->Alloc(alloc, sizeof(CLzmaEnc)); if (p != 0) LzmaEnc_Construct((CLzmaEnc*)p); return p; } void LzmaEnc_FreeLits(CLzmaEnc* p, ISzAlloc* alloc) { alloc->Free(alloc, p->litProbs); alloc->Free(alloc, p->saveState.litProbs); p->litProbs = 0; p->saveState.litProbs = 0; } void LzmaEnc_Destruct(CLzmaEnc* p, ISzAlloc* alloc, ISzAlloc* allocBig) { #ifdef COMPRESS_MF_MT MatchFinderMt_Destruct(&p->matchFinderMt, allocBig); #endif MatchFinder_Free(&p->matchFinderBase, allocBig); LzmaEnc_FreeLits(p, alloc); RangeEnc_Free(&p->rc, alloc); } void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc* alloc, ISzAlloc* allocBig) { LzmaEnc_Destruct((CLzmaEnc*)p, alloc, allocBig); alloc->Free(alloc, p); } static SRes LzmaEnc_CodeOneBlock(CLzmaEnc* p, Bool useLimits, UInt32 maxPackSize, UInt32 maxUnpackSize) { UInt32 nowPos32, startPos32; if (p->inStream != 0) { p->matchFinderBase.stream = p->inStream; p->matchFinder.Init(p->matchFinderObj); p->inStream = 0; } if (p->finished) return p->result; RINOK(CheckErrors(p)); nowPos32 = (UInt32)p->nowPos64; startPos32 = nowPos32; if (p->nowPos64 == 0) { UInt32 numPairs; Byte curByte; if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) return Flush(p, nowPos32); ReadMatchDistances(p, &numPairs); RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][0], 0); p->state = kLiteralNextStates[p->state]; curByte = p->matchFinder.GetIndexByte(p->matchFinderObj, 0 - p->additionalOffset); LitEnc_Encode(&p->rc, p->litProbs, curByte); p->additionalOffset--; nowPos32++; } if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) != 0) for (;;) { UInt32 pos, len, posState; if (p->fastMode) len = GetOptimumFast(p, &pos); else len = GetOptimum(p, nowPos32, &pos); #ifdef SHOW_STAT2 printf("\n pos = %4X, len = %d pos = %d", nowPos32, len, pos); #endif posState = nowPos32 & p->pbMask; if (len == 1 && pos == (UInt32)-1) { Byte curByte; CLzmaProb* probs; const Byte* data; RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 0); data = p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; curByte = *data; probs = LIT_PROBS(nowPos32, *(data - 1)); if (IsCharState(p->state)) LitEnc_Encode(&p->rc, probs, curByte); else LitEnc_EncodeMatched(&p->rc, probs, curByte, *(data - p->reps[0] - 1)); p->state = kLiteralNextStates[p->state]; } else { RangeEnc_EncodeBit(&p->rc, &p->isMatch[p->state][posState], 1); if (pos < LZMA_NUM_REPS) { RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 1); if (pos == 0) { RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 0); RangeEnc_EncodeBit(&p->rc, &p->isRep0Long[p->state][posState], ((len == 1) ? 0 : 1)); } else { UInt32 distance = p->reps[pos]; RangeEnc_EncodeBit(&p->rc, &p->isRepG0[p->state], 1); if (pos == 1) RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 0); else { RangeEnc_EncodeBit(&p->rc, &p->isRepG1[p->state], 1); RangeEnc_EncodeBit(&p->rc, &p->isRepG2[p->state], pos - 2); if (pos == 3) p->reps[3] = p->reps[2]; p->reps[2] = p->reps[1]; } p->reps[1] = p->reps[0]; p->reps[0] = distance; } if (len == 1) p->state = kShortRepNextStates[p->state]; else { LenEnc_Encode2(&p->repLenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); p->state = kRepNextStates[p->state]; } } else { UInt32 posSlot; RangeEnc_EncodeBit(&p->rc, &p->isRep[p->state], 0); p->state = kMatchNextStates[p->state]; LenEnc_Encode2(&p->lenEnc, &p->rc, len - LZMA_MATCH_LEN_MIN, posState, !p->fastMode, p->ProbPrices); pos -= LZMA_NUM_REPS; GetPosSlot(pos, posSlot); RcTree_Encode(&p->rc, p->posSlotEncoder[GetLenToPosState(len)], kNumPosSlotBits, posSlot); if (posSlot >= kStartPosModelIndex) { UInt32 footerBits = ((posSlot >> 1) - 1); UInt32 base = ((2 | (posSlot & 1)) << footerBits); UInt32 posReduced = pos - base; if (posSlot < kEndPosModelIndex) RcTree_ReverseEncode(&p->rc, p->posEncoders + base - posSlot - 1, footerBits, posReduced); else { RangeEnc_EncodeDirectBits(&p->rc, posReduced >> kNumAlignBits, footerBits - kNumAlignBits); RcTree_ReverseEncode(&p->rc, p->posAlignEncoder, kNumAlignBits, posReduced & kAlignMask); p->alignPriceCount++; } } p->reps[3] = p->reps[2]; p->reps[2] = p->reps[1]; p->reps[1] = p->reps[0]; p->reps[0] = pos; p->matchPriceCount++; } } p->additionalOffset -= len; nowPos32 += len; if (p->additionalOffset == 0) { UInt32 processed; if (!p->fastMode) { if (p->matchPriceCount >= (1 << 7)) FillDistancesPrices(p); if (p->alignPriceCount >= kAlignTableSize) FillAlignPrices(p); } if (p->matchFinder.GetNumAvailableBytes(p->matchFinderObj) == 0) break; processed = nowPos32 - startPos32; if (useLimits) { if (processed + kNumOpts + 300 >= maxUnpackSize || RangeEnc_GetProcessed(&p->rc) + kNumOpts * 2 >= maxPackSize) break; } else if (processed >= (1 << 15)) { p->nowPos64 += nowPos32 - startPos32; return CheckErrors(p); } } } p->nowPos64 += nowPos32 - startPos32; return Flush(p, nowPos32); } #define kBigHashDicLimit ((UInt32)1 << 24) static SRes LzmaEnc_Alloc(CLzmaEnc* p, UInt32 keepWindowSize, ISzAlloc* alloc, ISzAlloc* allocBig) { UInt32 beforeSize = kNumOpts; if (!RangeEnc_Alloc(&p->rc, alloc)) return SZ_ERROR_MEM; #ifdef COMPRESS_MF_MT Bool btMode = (p->matchFinderBase.btMode != 0); p->mtMode = (p->multiThread && !p->fastMode && btMode); #endif { unsigned lclp = p->lc + p->lp; if (p->litProbs == 0 || p->saveState.litProbs == 0 || p->lclp != lclp) { LzmaEnc_FreeLits(p, alloc); p->litProbs = (CLzmaProb*)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb)); p->saveState.litProbs = (CLzmaProb*)alloc->Alloc(alloc, (0x300 << lclp) * sizeof(CLzmaProb)); if (p->litProbs == 0 || p->saveState.litProbs == 0) { LzmaEnc_FreeLits(p, alloc); return SZ_ERROR_MEM; } p->lclp = lclp; } } p->matchFinderBase.bigHash = (p->dictSize > kBigHashDicLimit); if (beforeSize + p->dictSize < keepWindowSize) beforeSize = keepWindowSize - p->dictSize; #ifdef COMPRESS_MF_MT if (p->mtMode) { RINOK(MatchFinderMt_Create(&p->matchFinderMt, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)); p->matchFinderObj = &p->matchFinderMt; MatchFinderMt_CreateVTable(&p->matchFinderMt, &p->matchFinder); } else #endif { if (!MatchFinder_Create(&p->matchFinderBase, p->dictSize, beforeSize, p->numFastBytes, LZMA_MATCH_LEN_MAX, allocBig)) return SZ_ERROR_MEM; p->matchFinderObj = &p->matchFinderBase; MatchFinder_CreateVTable(&p->matchFinderBase, &p->matchFinder); } return SZ_OK; } void LzmaEnc_Init(CLzmaEnc* p) { UInt32 i; p->state = 0; for (i = 0; i < LZMA_NUM_REPS; i++) p->reps[i] = 0; RangeEnc_Init(&p->rc); for (i = 0; i < kNumStates; i++) { UInt32 j; for (j = 0; j < LZMA_NUM_PB_STATES_MAX; j++) { p->isMatch[i][j] = kProbInitValue; p->isRep0Long[i][j] = kProbInitValue; } p->isRep[i] = kProbInitValue; p->isRepG0[i] = kProbInitValue; p->isRepG1[i] = kProbInitValue; p->isRepG2[i] = kProbInitValue; } { UInt32 num = 0x300 << (p->lp + p->lc); for (i = 0; i < num; i++) p->litProbs[i] = kProbInitValue; } { for (i = 0; i < kNumLenToPosStates; i++) { CLzmaProb* probs = p->posSlotEncoder[i]; UInt32 j; for (j = 0; j < (1 << kNumPosSlotBits); j++) probs[j] = kProbInitValue; } } { for (i = 0; i < kNumFullDistances - kEndPosModelIndex; i++) p->posEncoders[i] = kProbInitValue; } LenEnc_Init(&p->lenEnc.p); LenEnc_Init(&p->repLenEnc.p); for (i = 0; i < (1 << kNumAlignBits); i++) p->posAlignEncoder[i] = kProbInitValue; p->optimumEndIndex = 0; p->optimumCurrentIndex = 0; p->additionalOffset = 0; p->pbMask = (1 << p->pb) - 1; p->lpMask = (1 << p->lp) - 1; } void LzmaEnc_InitPrices(CLzmaEnc* p) { if (!p->fastMode) { FillDistancesPrices(p); FillAlignPrices(p); } p->lenEnc.tableSize = p->repLenEnc.tableSize = p->numFastBytes + 1 - LZMA_MATCH_LEN_MIN; LenPriceEnc_UpdateTables(&p->lenEnc, 1 << p->pb, p->ProbPrices); LenPriceEnc_UpdateTables(&p->repLenEnc, 1 << p->pb, p->ProbPrices); } static SRes LzmaEnc_AllocAndInit(CLzmaEnc* p, UInt32 keepWindowSize, ISzAlloc* alloc, ISzAlloc* allocBig) { UInt32 i; for (i = 0; i < (UInt32)kDicLogSizeMaxCompress; i++) if (p->dictSize <= ((UInt32)1 << i)) break; p->distTableSize = i * 2; p->finished = False; p->result = SZ_OK; RINOK(LzmaEnc_Alloc(p, keepWindowSize, alloc, allocBig)); LzmaEnc_Init(p); LzmaEnc_InitPrices(p); p->nowPos64 = 0; return SZ_OK; } static SRes LzmaEnc_Prepare(CLzmaEncHandle pp, ISeqInStream* inStream, ISeqOutStream* outStream, ISzAlloc* alloc, ISzAlloc* allocBig) { CLzmaEnc* p = (CLzmaEnc*)pp; p->inStream = inStream; p->rc.outStream = outStream; return LzmaEnc_AllocAndInit(p, 0, alloc, allocBig); } SRes LzmaEnc_PrepareForLzma2(CLzmaEncHandle pp, ISeqInStream* inStream, UInt32 keepWindowSize, ISzAlloc* alloc, ISzAlloc* allocBig) { CLzmaEnc* p = (CLzmaEnc*)pp; p->inStream = inStream; return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); } static void LzmaEnc_SetInputBuf(CLzmaEnc* p, const Byte* src, SizeT srcLen) { p->seqBufInStream.funcTable.Read = MyRead; p->seqBufInStream.data = src; p->seqBufInStream.rem = srcLen; } SRes LzmaEnc_MemPrepare(CLzmaEncHandle pp, const Byte* src, SizeT srcLen, UInt32 keepWindowSize, ISzAlloc* alloc, ISzAlloc* allocBig) { CLzmaEnc* p = (CLzmaEnc*)pp; LzmaEnc_SetInputBuf(p, src, srcLen); p->inStream = &p->seqBufInStream.funcTable; return LzmaEnc_AllocAndInit(p, keepWindowSize, alloc, allocBig); } void LzmaEnc_Finish(CLzmaEncHandle pp) { #ifdef COMPRESS_MF_MT CLzmaEnc* p = (CLzmaEnc*)pp; if (p->mtMode) MatchFinderMt_ReleaseStream(&p->matchFinderMt); #else (void)pp; #endif } typedef struct _CSeqOutStreamBuf { ISeqOutStream funcTable; Byte* data; SizeT rem; Bool overflow; } CSeqOutStreamBuf; static size_t MyWrite(void* pp, const void* data, size_t size) { CSeqOutStreamBuf* p = (CSeqOutStreamBuf*)pp; if (p->rem < size) { size = p->rem; p->overflow = True; } memcpy(p->data, data, size); p->rem -= size; p->data += size; return size; } UInt32 LzmaEnc_GetNumAvailableBytes(CLzmaEncHandle pp) { const CLzmaEnc* p = (CLzmaEnc*)pp; return p->matchFinder.GetNumAvailableBytes(p->matchFinderObj); } const Byte* LzmaEnc_GetCurBuf(CLzmaEncHandle pp) { const CLzmaEnc* p = (CLzmaEnc*)pp; return p->matchFinder.GetPointerToCurrentPos(p->matchFinderObj) - p->additionalOffset; } #if 0 // Unused function producing a dangling-pointer warning. SRes LzmaEnc_CodeOneMemBlock(CLzmaEncHandle pp, Bool reInit, Byte* dest, size_t* destLen, UInt32 desiredPackSize, UInt32* unpackSize) { CLzmaEnc* p = (CLzmaEnc*)pp; UInt64 nowPos64; SRes res; CSeqOutStreamBuf outStream; outStream.funcTable.Write = MyWrite; outStream.data = dest; outStream.rem = *destLen; outStream.overflow = False; p->writeEndMark = False; p->finished = False; p->result = SZ_OK; if (reInit) LzmaEnc_Init(p); LzmaEnc_InitPrices(p); nowPos64 = p->nowPos64; RangeEnc_Init(&p->rc); p->rc.outStream = &outStream.funcTable; res = LzmaEnc_CodeOneBlock(p, True, desiredPackSize, *unpackSize); *unpackSize = (UInt32)(p->nowPos64 - nowPos64); *destLen -= outStream.rem; if (outStream.overflow) return SZ_ERROR_OUTPUT_EOF; return res; } #endif SRes LzmaEnc_Encode(CLzmaEncHandle pp, ISeqOutStream* outStream, ISeqInStream* inStream, ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig) { CLzmaEnc* p = (CLzmaEnc*)pp; SRes res = SZ_OK; #ifdef COMPRESS_MF_MT Byte allocaDummy[0x300]; (void)allocaDummy; int i = 0; for (i = 0; i < 16; i++) allocaDummy[i] = (Byte)i; #endif RINOK(LzmaEnc_Prepare(pp, inStream, outStream, alloc, allocBig)); for (;;) { res = LzmaEnc_CodeOneBlock(p, False, 0, 0); if (res != SZ_OK || p->finished != 0) break; if (progress != 0) { res = progress->Progress(progress, p->nowPos64, RangeEnc_GetProcessed(&p->rc)); if (res != SZ_OK) { res = SZ_ERROR_PROGRESS; break; } } } LzmaEnc_Finish(pp); return res; } SRes LzmaEnc_WriteProperties(CLzmaEncHandle pp, Byte* props, SizeT* size) { CLzmaEnc* p = (CLzmaEnc*)pp; int i; UInt32 dictSize = p->dictSize; if (*size < LZMA_PROPS_SIZE) return SZ_ERROR_PARAM; *size = LZMA_PROPS_SIZE; props[0] = (Byte)((p->pb * 5 + p->lp) * 9 + p->lc); for (i = 11; i <= 30; i++) { if (dictSize <= ((UInt32)2 << i)) { dictSize = (2 << i); break; } if (dictSize <= ((UInt32)3 << i)) { dictSize = (3 << i); break; } } for (i = 0; i < 4; i++) props[1 + i] = (Byte)(dictSize >> (8 * i)); return SZ_OK; } SRes LzmaEnc_MemEncode(CLzmaEncHandle pp, Byte* dest, SizeT* destLen, const Byte* src, SizeT srcLen, int writeEndMark, ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig) { SRes res; CLzmaEnc* p = (CLzmaEnc*)pp; CSeqOutStreamBuf outStream; LzmaEnc_SetInputBuf(p, src, srcLen); outStream.funcTable.Write = MyWrite; outStream.data = dest; outStream.rem = *destLen; outStream.overflow = False; p->writeEndMark = writeEndMark; res = LzmaEnc_Encode(pp, &outStream.funcTable, &p->seqBufInStream.funcTable, progress, alloc, allocBig); *destLen -= outStream.rem; if (outStream.overflow) return SZ_ERROR_OUTPUT_EOF; return res; } SRes LzmaEncode(Byte* dest, SizeT* destLen, const Byte* src, SizeT srcLen, const CLzmaEncProps* props, Byte* propsEncoded, SizeT* propsSize, int writeEndMark, ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig) { CLzmaEnc* p = (CLzmaEnc*)LzmaEnc_Create(alloc); SRes res; if (p == 0) return SZ_ERROR_MEM; res = LzmaEnc_SetProps(p, props); if (res == SZ_OK) { res = LzmaEnc_WriteProperties(p, propsEncoded, propsSize); if (res == SZ_OK) res = LzmaEnc_MemEncode(p, dest, destLen, src, srcLen, writeEndMark, progress, alloc, allocBig); } LzmaEnc_Destroy(p, alloc, allocBig); return res; } } DaemonEngine-crunch-ef4d32f/crnlib/lzma_LzmaEnc.h000066400000000000000000000057561503722002600220320ustar00rootroot00000000000000/* LzmaEnc.h -- LZMA Encoder 2008-10-04 : Igor Pavlov : Public domain */ #ifndef __LZMAENC_H #define __LZMAENC_H #include "lzma_Types.h" namespace crnlib { #define LZMA_PROPS_SIZE 5 typedef struct _CLzmaEncProps { int level; /* 0 <= level <= 9 */ UInt32 dictSize; /* (1 << 12) <= dictSize <= (1 << 27) for 32-bit version (1 << 12) <= dictSize <= (1 << 30) for 64-bit version default = (1 << 24) */ int lc; /* 0 <= lc <= 8, default = 3 */ int lp; /* 0 <= lp <= 4, default = 0 */ int pb; /* 0 <= pb <= 4, default = 2 */ int algo; /* 0 - fast, 1 - normal, default = 1 */ int fb; /* 5 <= fb <= 273, default = 32 */ int btMode; /* 0 - hashChain Mode, 1 - binTree mode - normal, default = 1 */ int numHashBytes; /* 2, 3 or 4, default = 4 */ UInt32 mc; /* 1 <= mc <= (1 << 30), default = 32 */ unsigned writeEndMark; /* 0 - do not write EOPM, 1 - write EOPM, default = 0 */ int numThreads; /* 1 or 2, default = 2 */ } CLzmaEncProps; void LzmaEncProps_Init(CLzmaEncProps* p); void LzmaEncProps_Normalize(CLzmaEncProps* p); UInt32 LzmaEncProps_GetDictSize(const CLzmaEncProps* props2); /* ---------- CLzmaEncHandle Interface ---------- */ /* LzmaEnc_* functions can return the following exit codes: Returns: SZ_OK - OK SZ_ERROR_MEM - Memory allocation error SZ_ERROR_PARAM - Incorrect paramater in props SZ_ERROR_WRITE - Write callback error. SZ_ERROR_PROGRESS - some break from progress callback SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) */ typedef void* CLzmaEncHandle; CLzmaEncHandle LzmaEnc_Create(ISzAlloc* alloc); void LzmaEnc_Destroy(CLzmaEncHandle p, ISzAlloc* alloc, ISzAlloc* allocBig); SRes LzmaEnc_SetProps(CLzmaEncHandle p, const CLzmaEncProps* props); SRes LzmaEnc_WriteProperties(CLzmaEncHandle p, Byte* properties, SizeT* size); SRes LzmaEnc_Encode(CLzmaEncHandle p, ISeqOutStream* outStream, ISeqInStream* inStream, ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig); SRes LzmaEnc_MemEncode(CLzmaEncHandle p, Byte* dest, SizeT* destLen, const Byte* src, SizeT srcLen, int writeEndMark, ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig); /* ---------- One Call Interface ---------- */ /* LzmaEncode Return code: SZ_OK - OK SZ_ERROR_MEM - Memory allocation error SZ_ERROR_PARAM - Incorrect paramater SZ_ERROR_OUTPUT_EOF - output buffer overflow SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) */ SRes LzmaEncode(Byte* dest, SizeT* destLen, const Byte* src, SizeT srcLen, const CLzmaEncProps* props, Byte* propsEncoded, SizeT* propsSize, int writeEndMark, ICompressProgress* progress, ISzAlloc* alloc, ISzAlloc* allocBig); } #endif DaemonEngine-crunch-ef4d32f/crnlib/lzma_LzmaLib.cpp000066400000000000000000000035151503722002600223550ustar00rootroot00000000000000/* LzmaLib.c -- LZMA library wrapper 2008-08-05 Igor Pavlov Public domain */ #include "crn_core.h" #include "lzma_LzmaEnc.h" #include "lzma_LzmaDec.h" #include "lzma_Alloc.h" #include "lzma_LzmaLib.h" namespace crnlib { static void* SzAlloc(void* /* p */, size_t size) { return MyAlloc(size); } static void SzFree(void* /* p */, void* address) { MyFree(address); } static ISzAlloc g_Alloc = {SzAlloc, SzFree}; MY_STDAPI LzmaCompress(unsigned char* dest, size_t* destLen, const unsigned char* src, size_t srcLen, unsigned char* outProps, size_t* outPropsSize, int level, /* 0 <= level <= 9, default = 5 */ unsigned dictSize, /* use (1 << N) or (3 << N). 4 KB < dictSize <= 128 MB */ int lc, /* 0 <= lc <= 8, default = 3 */ int lp, /* 0 <= lp <= 4, default = 0 */ int pb, /* 0 <= pb <= 4, default = 2 */ int fb, /* 5 <= fb <= 273, default = 32 */ int numThreads /* 1 or 2, default = 2 */ ) { CLzmaEncProps props; LzmaEncProps_Init(&props); props.level = level; props.dictSize = dictSize; props.lc = lc; props.lp = lp; props.pb = pb; props.fb = fb; props.numThreads = numThreads; return LzmaEncode(dest, destLen, src, srcLen, &props, outProps, outPropsSize, 0, NULL, &g_Alloc, &g_Alloc); } MY_STDAPI LzmaUncompress(unsigned char* dest, size_t* destLen, const unsigned char* src, size_t* srcLen, const unsigned char* props, size_t propsSize) { ELzmaStatus status; return LzmaDecode(dest, destLen, src, srcLen, props, (unsigned)propsSize, LZMA_FINISH_ANY, &status, &g_Alloc); } } DaemonEngine-crunch-ef4d32f/crnlib/lzma_LzmaLib.h000066400000000000000000000120121503722002600220120ustar00rootroot00000000000000/* LzmaLib.h -- LZMA library interface 2008-08-05 Igor Pavlov Public domain */ #ifndef __LZMALIB_H #define __LZMALIB_H #include "lzma_Types.h" namespace crnlib { #if 0 #ifdef __cplusplus #define MY_EXTERN_C extern "C" #else #define MY_EXTERN_C extern #endif #define MY_STDAPI MY_EXTERN_C int MY_STD_CALL #else #define MY_STDAPI int MY_STD_CALL #endif #define LZMA_PROPS_SIZE 5 /* RAM requirements for LZMA: for compression: (dictSize * 11.5 + 6 MB) + state_size for decompression: dictSize + state_size state_size = (4 + (1.5 << (lc + lp))) KB by default (lc=3, lp=0), state_size = 16 KB. LZMA properties (5 bytes) format Offset Size Description 0 1 lc, lp and pb in encoded form. 1 4 dictSize (little endian). */ /* LzmaCompress ------------ outPropsSize - In: the pointer to the size of outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5. Out: the pointer to the size of written properties in outProps buffer; *outPropsSize = LZMA_PROPS_SIZE = 5. LZMA Encoder will use defult values for any parameter, if it is -1 for any from: level, loc, lp, pb, fb, numThreads 0 for dictSize level - compression level: 0 <= level <= 9; level dictSize algo fb 0: 16 KB 0 32 1: 64 KB 0 32 2: 256 KB 0 32 3: 1 MB 0 32 4: 4 MB 0 32 5: 16 MB 1 32 6: 32 MB 1 32 7+: 64 MB 1 64 The default value for "level" is 5. algo = 0 means fast method algo = 1 means normal method dictSize - The dictionary size in bytes. The maximum value is 128 MB = (1 << 27) bytes for 32-bit version 1 GB = (1 << 30) bytes for 64-bit version The default value is 16 MB = (1 << 24) bytes. It's recommended to use the dictionary that is larger than 4 KB and that can be calculated as (1 << N) or (3 << N) sizes. lc - The number of literal context bits (high bits of previous literal). It can be in the range from 0 to 8. The default value is 3. Sometimes lc=4 gives the gain for big files. lp - The number of literal pos bits (low bits of current position for literals). It can be in the range from 0 to 4. The default value is 0. The lp switch is intended for periodical data when the period is equal to 2^lp. For example, for 32-bit (4 bytes) periodical data you can use lp=2. Often it's better to set lc=0, if you change lp switch. pb - The number of pos bits (low bits of current position). It can be in the range from 0 to 4. The default value is 2. The pb switch is intended for periodical data when the period is equal 2^pb. fb - Word size (the number of fast bytes). It can be in the range from 5 to 273. The default value is 32. Usually, a big number gives a little bit better compression ratio and slower compression process. numThreads - The number of thereads. 1 or 2. The default value is 2. Fast mode (algo = 0) can use only 1 thread. Out: destLen - processed output size Returns: SZ_OK - OK SZ_ERROR_MEM - Memory allocation error SZ_ERROR_PARAM - Incorrect paramater SZ_ERROR_OUTPUT_EOF - output buffer overflow SZ_ERROR_THREAD - errors in multithreading functions (only for Mt version) */ MY_STDAPI LzmaCompress(unsigned char* dest, size_t* destLen, const unsigned char* src, size_t srcLen, unsigned char* outProps, size_t* outPropsSize, /* *outPropsSize must be = 5 */ int level, /* 0 <= level <= 9, default = 5 */ unsigned dictSize, /* default = (1 << 24) */ int lc, /* 0 <= lc <= 8, default = 3 */ int lp, /* 0 <= lp <= 4, default = 0 */ int pb, /* 0 <= pb <= 4, default = 2 */ int fb, /* 5 <= fb <= 273, default = 32 */ int numThreads /* 1 or 2, default = 2 */ ); /* LzmaUncompress -------------- In: dest - output data destLen - output data size src - input data srcLen - input data size Out: destLen - processed output size srcLen - processed input size Returns: SZ_OK - OK SZ_ERROR_DATA - Data error SZ_ERROR_MEM - Memory allocation arror SZ_ERROR_UNSUPPORTED - Unsupported properties SZ_ERROR_INPUT_EOF - it needs more bytes in input buffer (src) */ MY_STDAPI LzmaUncompress(unsigned char* dest, size_t* destLen, const unsigned char* src, SizeT* srcLen, const unsigned char* props, size_t propsSize); #define LZMA_COMPRESS_FUNC_EXPORT "LzmaCompress" #define LZMA_UNCOMPRESS_FUNC_EXPORT "LzmaUncompress" } #endif DaemonEngine-crunch-ef4d32f/crnlib/lzma_MyVersion.h000066400000000000000000000004611503722002600224200ustar00rootroot00000000000000#define MY_VER_MAJOR 4 #define MY_VER_MINOR 63 #define MY_VER_BUILD 0 #define MY_VERSION "4.63" #define MY_7ZIP_VERSION "7-Zip 4.63" #define MY_DATE "2008-12-31" #define MY_COPYRIGHT "Copyright (c) 1999-2008 Igor Pavlov" #define MY_VERSION_COPYRIGHT_DATE MY_VERSION " " MY_COPYRIGHT " " MY_DATE DaemonEngine-crunch-ef4d32f/crnlib/lzma_Threads.cpp000066400000000000000000000065751503722002600224260ustar00rootroot00000000000000/* Threads.c -- multithreading library 2008-08-05 Igor Pavlov Public domain */ #include "crn_core.h" #include "lzma_Threads.h" #include namespace crnlib { static WRes GetError() { DWORD res = GetLastError(); return (res) ? (WRes)(res) : 1; } WRes HandleToWRes(HANDLE h) { return (h != 0) ? 0 : GetError(); } WRes BOOLToWRes(BOOL v) { return v ? 0 : GetError(); } static WRes MyCloseHandle(HANDLE* h) { if (*h != NULL) if (!CloseHandle(*h)) return GetError(); *h = NULL; return 0; } WRes Thread_Create(CThread* thread, THREAD_FUNC_RET_TYPE(THREAD_FUNC_CALL_TYPE* startAddress)(void*), LPVOID parameter) { unsigned threadId; /* Windows Me/98/95: threadId parameter may not be NULL in _beginthreadex/CreateThread functions */ thread->handle = /* CreateThread(0, 0, startAddress, parameter, 0, &threadId); */ (HANDLE)_beginthreadex(NULL, 0, startAddress, parameter, 0, &threadId); /* maybe we must use errno here, but probably GetLastError() is also OK. */ return HandleToWRes(thread->handle); } WRes WaitObject(HANDLE h) { return (WRes)WaitForSingleObject(h, INFINITE); } WRes Thread_Wait(CThread* thread) { if (thread->handle == NULL) return 1; return WaitObject(thread->handle); } WRes Thread_Close(CThread* thread) { return MyCloseHandle(&thread->handle); } WRes Event_Create(CEvent* p, BOOL manualReset, int initialSignaled) { p->handle = CreateEvent(NULL, manualReset, (initialSignaled ? TRUE : FALSE), NULL); return HandleToWRes(p->handle); } WRes ManualResetEvent_Create(CManualResetEvent* p, int initialSignaled) { return Event_Create(p, TRUE, initialSignaled); } WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent* p) { return ManualResetEvent_Create(p, 0); } WRes AutoResetEvent_Create(CAutoResetEvent* p, int initialSignaled) { return Event_Create(p, FALSE, initialSignaled); } WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent* p) { return AutoResetEvent_Create(p, 0); } WRes Event_Set(CEvent* p) { return BOOLToWRes(SetEvent(p->handle)); } WRes Event_Reset(CEvent* p) { return BOOLToWRes(ResetEvent(p->handle)); } WRes Event_Wait(CEvent* p) { return WaitObject(p->handle); } WRes Event_Close(CEvent* p) { return MyCloseHandle(&p->handle); } WRes Semaphore_Create(CSemaphore* p, UInt32 initiallyCount, UInt32 maxCount) { p->handle = CreateSemaphore(NULL, (LONG)initiallyCount, (LONG)maxCount, NULL); return HandleToWRes(p->handle); } WRes Semaphore_Release(CSemaphore* p, LONG releaseCount, LONG* previousCount) { return BOOLToWRes(ReleaseSemaphore(p->handle, releaseCount, previousCount)); } WRes Semaphore_ReleaseN(CSemaphore* p, UInt32 releaseCount) { return Semaphore_Release(p, (LONG)releaseCount, NULL); } WRes Semaphore_Release1(CSemaphore* p) { return Semaphore_ReleaseN(p, 1); } WRes Semaphore_Wait(CSemaphore* p) { return WaitObject(p->handle); } WRes Semaphore_Close(CSemaphore* p) { return MyCloseHandle(&p->handle); } WRes CriticalSection_Init(CCriticalSection* p) { #ifdef _MSC_VER /* InitializeCriticalSection can raise only STATUS_NO_MEMORY exception */ __try { InitializeCriticalSection(p); /* InitializeCriticalSectionAndSpinCount(p, 0); */ } __except (EXCEPTION_EXECUTE_HANDLER) { return 1; } #else InitializeCriticalSection(p); #endif return 0; } } DaemonEngine-crunch-ef4d32f/crnlib/lzma_Threads.h000066400000000000000000000037571503722002600220720ustar00rootroot00000000000000/* Threads.h -- multithreading library 2008-11-22 : Igor Pavlov : Public domain */ #ifndef __7Z_THRESDS_H #define __7Z_THRESDS_H #include "lzma_Types.h" namespace crnlib { typedef struct _CThread { HANDLE handle; } CThread; #define Thread_Construct(thread) (thread)->handle = NULL #define Thread_WasCreated(thread) ((thread)->handle != NULL) typedef unsigned THREAD_FUNC_RET_TYPE; #define THREAD_FUNC_CALL_TYPE MY_STD_CALL #define THREAD_FUNC_DECL THREAD_FUNC_RET_TYPE THREAD_FUNC_CALL_TYPE WRes Thread_Create(CThread* thread, THREAD_FUNC_RET_TYPE(THREAD_FUNC_CALL_TYPE* startAddress)(void*), LPVOID parameter); WRes Thread_Wait(CThread* thread); WRes Thread_Close(CThread* thread); typedef struct _CEvent { HANDLE handle; } CEvent; typedef CEvent CAutoResetEvent; typedef CEvent CManualResetEvent; #define Event_Construct(event) (event)->handle = NULL #define Event_IsCreated(event) ((event)->handle != NULL) WRes ManualResetEvent_Create(CManualResetEvent* event, int initialSignaled); WRes ManualResetEvent_CreateNotSignaled(CManualResetEvent* event); WRes AutoResetEvent_Create(CAutoResetEvent* event, int initialSignaled); WRes AutoResetEvent_CreateNotSignaled(CAutoResetEvent* event); WRes Event_Set(CEvent* event); WRes Event_Reset(CEvent* event); WRes Event_Wait(CEvent* event); WRes Event_Close(CEvent* event); typedef struct _CSemaphore { HANDLE handle; } CSemaphore; #define Semaphore_Construct(p) (p)->handle = NULL WRes Semaphore_Create(CSemaphore* p, UInt32 initiallyCount, UInt32 maxCount); WRes Semaphore_ReleaseN(CSemaphore* p, UInt32 num); WRes Semaphore_Release1(CSemaphore* p); WRes Semaphore_Wait(CSemaphore* p); WRes Semaphore_Close(CSemaphore* p); typedef CRITICAL_SECTION CCriticalSection; WRes CriticalSection_Init(CCriticalSection* p); #define CriticalSection_Delete(p) DeleteCriticalSection(p) #define CriticalSection_Enter(p) EnterCriticalSection(p) #define CriticalSection_Leave(p) LeaveCriticalSection(p) } #endif DaemonEngine-crunch-ef4d32f/crnlib/lzma_Types.h000066400000000000000000000116111503722002600215700ustar00rootroot00000000000000/* Types.h -- Basic types 2008-11-23 : Igor Pavlov : Public domain */ #ifndef __7Z_TYPES_H #define __7Z_TYPES_H #include #if defined(_WIN32) #include #define COMPRESS_MF_MT #endif namespace crnlib { #define SZ_OK 0 #define SZ_ERROR_DATA 1 #define SZ_ERROR_MEM 2 #define SZ_ERROR_CRC 3 #define SZ_ERROR_UNSUPPORTED 4 #define SZ_ERROR_PARAM 5 #define SZ_ERROR_INPUT_EOF 6 #define SZ_ERROR_OUTPUT_EOF 7 #define SZ_ERROR_READ 8 #define SZ_ERROR_WRITE 9 #define SZ_ERROR_PROGRESS 10 #define SZ_ERROR_FAIL 11 #define SZ_ERROR_THREAD 12 #define SZ_ERROR_ARCHIVE 16 #define SZ_ERROR_NO_ARCHIVE 17 typedef int SRes; #ifdef _WIN32 typedef DWORD WRes; #else typedef int WRes; #endif #ifndef RINOK #define RINOK(x) \ { \ int __result__ = (x); \ if (__result__ != 0) \ return __result__; \ } #endif typedef unsigned char Byte; typedef short Int16; typedef unsigned short UInt16; #ifdef _LZMA_UINT32_IS_ULONG typedef long Int32; typedef unsigned long UInt32; #else typedef int Int32; typedef unsigned int UInt32; #endif #ifdef _SZ_NO_INT_64 /* define _SZ_NO_INT_64, if your compiler doesn't support 64-bit integers. NOTES: Some code will work incorrectly in that case! */ typedef long Int64; typedef unsigned long UInt64; #else #if defined(_MSC_VER) || defined(__BORLANDC__) typedef __int64 Int64; typedef unsigned __int64 UInt64; #else typedef long long int Int64; typedef unsigned long long int UInt64; #endif #endif #ifdef _LZMA_NO_SYSTEM_SIZE_T typedef UInt32 SizeT; #else typedef size_t SizeT; #endif typedef int Bool; #define True 1 #define False 0 #if defined(_WIN32) #if defined(_MSC_VER) && _MSC_VER >= 1300 #define MY_NO_INLINE __declspec(noinline) #else #define MY_NO_INLINE #endif #define MY_CDECL __cdecl #define MY_STD_CALL __stdcall #define MY_FAST_CALL MY_NO_INLINE __fastcall #else #define MY_CDECL #define MY_STD_CALL #define MY_FAST_CALL #endif /* The following interfaces use first parameter as pointer to structure */ typedef struct { SRes (*Read)(void* p, void* buf, size_t* size); /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. (output(*size) < input(*size)) is allowed */ } ISeqInStream; /* it can return SZ_ERROR_INPUT_EOF */ SRes SeqInStream_Read(ISeqInStream* stream, void* buf, size_t size); SRes SeqInStream_Read2(ISeqInStream* stream, void* buf, size_t size, SRes errorType); SRes SeqInStream_ReadByte(ISeqInStream* stream, Byte* buf); typedef struct { size_t (*Write)(void* p, const void* buf, size_t size); /* Returns: result - the number of actually written bytes. (result < size) means error */ } ISeqOutStream; typedef enum { SZ_SEEK_SET = 0, SZ_SEEK_CUR = 1, SZ_SEEK_END = 2 } ESzSeek; typedef struct { SRes (*Read)(void* p, void* buf, size_t* size); /* same as ISeqInStream::Read */ SRes (*Seek)(void* p, Int64* pos, ESzSeek origin); } ISeekInStream; typedef struct { SRes (*Look)(void* p, void** buf, size_t* size); /* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. (output(*size) > input(*size)) is not allowed (output(*size) < input(*size)) is allowed */ SRes (*Skip)(void* p, size_t offset); /* offset must be <= output(*size) of Look */ SRes (*Read)(void* p, void* buf, size_t* size); /* reads directly (without buffer). It's same as ISeqInStream::Read */ SRes (*Seek)(void* p, Int64* pos, ESzSeek origin); } ILookInStream; SRes LookInStream_LookRead(ILookInStream* stream, void* buf, size_t* size); SRes LookInStream_SeekTo(ILookInStream* stream, UInt64 offset); /* reads via ILookInStream::Read */ SRes LookInStream_Read2(ILookInStream* stream, void* buf, size_t size, SRes errorType); SRes LookInStream_Read(ILookInStream* stream, void* buf, size_t size); #define LookToRead_BUF_SIZE (1 << 14) typedef struct { ILookInStream s; ISeekInStream* realStream; size_t pos; size_t size; Byte buf[LookToRead_BUF_SIZE]; } CLookToRead; void LookToRead_CreateVTable(CLookToRead* p, int lookahead); void LookToRead_Init(CLookToRead* p); typedef struct { ISeqInStream s; ILookInStream* realStream; } CSecToLook; void SecToLook_CreateVTable(CSecToLook* p); typedef struct { ISeqInStream s; ILookInStream* realStream; } CSecToRead; void SecToRead_CreateVTable(CSecToRead* p); typedef struct { SRes (*Progress)(void* p, UInt64 inSize, UInt64 outSize); /* Returns: result. (result != SZ_OK) means break. Value (UInt64)(Int64)-1 for size means unknown value. */ } ICompressProgress; typedef struct { void* (*Alloc)(void* p, size_t size); void (*Free)(void* p, void* address); /* address can be 0 */ } ISzAlloc; #define IAlloc_Alloc(p, size) (p)->Alloc((p), size) #define IAlloc_Free(p, a) (p)->Free((p), a) } #endif DaemonEngine-crunch-ef4d32f/crnlib/stb_image.h000066400000000000000000010523151503722002600214020ustar00rootroot00000000000000/* stb_image - v2.30 - public domain image loader - http://nothings.org/stb no warranty implied; use at your own risk Do this: #define STB_IMAGE_IMPLEMENTATION before you include this file in *one* C or C++ file to create the implementation. // i.e. it should look like this: #include ... #include ... #include ... #define STB_IMAGE_IMPLEMENTATION #include "stb_image.h" You can #define STBI_ASSERT(x) before the #include to avoid using assert.h. And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free QUICK NOTES: Primarily of interest to game developers and other people who can avoid problematic images and only need the trivial interface JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib) PNG 1/2/4/8/16-bit-per-channel TGA (not sure what subset, if a subset) BMP non-1bpp, non-RLE PSD (composited view only, no extra channels, 8/16 bit-per-channel) GIF (*comp always reports as 4-channel) HDR (radiance rgbE format) PIC (Softimage PIC) PNM (PPM and PGM binary only) Animated GIF still needs a proper API, but here's one way to do it: http://gist.github.com/urraka/685d9a6340b26b830d49 - decode from memory or through FILE (define STBI_NO_STDIO to remove code) - decode from arbitrary I/O callbacks - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON) Full documentation under "DOCUMENTATION" below. LICENSE See end of file for license information. RECENT REVISION HISTORY: 2.30 (2024-05-31) avoid erroneous gcc warning 2.29 (2023-05-xx) optimizations 2.28 (2023-01-29) many error fixes, security errors, just tons of stuff 2.27 (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes 2.26 (2020-07-13) many minor fixes 2.25 (2020-02-02) fix warnings 2.24 (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically 2.23 (2019-08-11) fix clang static analysis warning 2.22 (2019-03-04) gif fixes, fix warnings 2.21 (2019-02-25) fix typo in comment 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs 2.19 (2018-02-11) fix warning 2.18 (2018-01-30) fix warnings 2.17 (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings 2.16 (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes 2.15 (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs 2.13 (2016-12-04) experimental 16-bit API, only for PNG so far; fixes 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes 2.11 (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64 RGB-format JPEG; remove white matting in PSD; allocate large structures on the stack; correct channel count for PNG & BMP 2.10 (2016-01-22) avoid warning introduced in 2.09 2.09 (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED See end of file for full revision history. ============================ Contributors ========================= Image formats Extensions, features Sean Barrett (jpeg, png, bmp) Jetro Lauha (stbi_info) Nicolas Schulz (hdr, psd) Martin "SpartanJ" Golini (stbi_info) Jonathan Dummer (tga) James "moose2000" Brown (iPhone PNG) Jean-Marc Lienher (gif) Ben "Disch" Wenger (io callbacks) Tom Seddon (pic) Omar Cornut (1/2/4-bit PNG) Thatcher Ulrich (psd) Nicolas Guillemot (vertical flip) Ken Miller (pgm, ppm) Richard Mitton (16-bit PSD) github:urraka (animated gif) Junggon Kim (PNM comments) Christopher Forseth (animated gif) Daniel Gibson (16-bit TGA) socks-the-fox (16-bit PNG) Jeremy Sawicki (handle all ImageNet JPGs) Optimizations & bugfixes Mikhail Morozov (1-bit BMP) Fabian "ryg" Giesen Anael Seghezzi (is-16-bit query) Arseny Kapoulkine Simon Breuss (16-bit PNM) John-Mark Allen Carmelo J Fdez-Aguera Bug & warning fixes Marc LeBlanc David Woo Guillaume George Martins Mozeiko Christpher Lloyd Jerry Jansson Joseph Thomson Blazej Dariusz Roszkowski Phil Jordan Dave Moore Roy Eltham Hayaki Saito Nathan Reed Won Chun Luke Graham Johan Duparc Nick Verigakis the Horde3D community Thomas Ruf Ronny Chevalier github:rlyeh Janez Zemva John Bartholomew Michal Cichon github:romigrou Jonathan Blow Ken Hamada Tero Hanninen github:svdijk Eugene Golushkov Laurent Gomila Cort Stratton github:snagar Aruelien Pocheville Sergio Gonzalez Thibault Reuille github:Zelex Cass Everitt Ryamond Barbiero github:grim210 Paul Du Bois Engin Manap Aldo Culquicondor github:sammyhw Philipp Wiesemann Dale Weiler Oriol Ferrer Mesia github:phprus Josh Tobin Neil Bickford Matthew Gregan github:poppolopoppo Julian Raschke Gregory Mullen Christian Floisand github:darealshinji Baldur Karlsson Kevin Schmidt JR Smith github:Michaelangel007 Brad Weinberger Matvey Cherevko github:mosra Luca Sas Alexander Veselov Zack Middleton [reserved] Ryan C. Gordon [reserved] [reserved] DO NOT ADD YOUR NAME HERE Jacko Dirks To add your name to the credits, pick a random blank space in the middle and fill it. 80% of merge conflicts on stb PRs are due to people adding their name at the end of the credits. */ #ifndef STBI_INCLUDE_STB_IMAGE_H #define STBI_INCLUDE_STB_IMAGE_H // DOCUMENTATION // // Limitations: // - no 12-bit-per-channel JPEG // - no JPEGs with arithmetic coding // - GIF always returns *comp=4 // // Basic usage (see HDR discussion below for HDR usage): // int x,y,n; // unsigned char *data = stbi_load(filename, &x, &y, &n, 0); // // ... process data if not NULL ... // // ... x = width, y = height, n = # 8-bit components per pixel ... // // ... replace '0' with '1'..'4' to force that many components per pixel // // ... but 'n' will always be the number that it would have been if you said 0 // stbi_image_free(data); // // Standard parameters: // int *x -- outputs image width in pixels // int *y -- outputs image height in pixels // int *channels_in_file -- outputs # of image components in image file // int desired_channels -- if non-zero, # of image components requested in result // // The return value from an image loader is an 'unsigned char *' which points // to the pixel data, or NULL on an allocation failure or if the image is // corrupt or invalid. The pixel data consists of *y scanlines of *x pixels, // with each pixel consisting of N interleaved 8-bit components; the first // pixel pointed to is top-left-most in the image. There is no padding between // image scanlines or between pixels, regardless of format. The number of // components N is 'desired_channels' if desired_channels is non-zero, or // *channels_in_file otherwise. If desired_channels is non-zero, // *channels_in_file has the number of components that _would_ have been // output otherwise. E.g. if you set desired_channels to 4, you will always // get RGBA output, but you can check *channels_in_file to see if it's trivially // opaque because e.g. there were only 3 channels in the source image. // // An output image with N components has the following components interleaved // in this order in each pixel: // // N=#comp components // 1 grey // 2 grey, alpha // 3 red, green, blue // 4 red, green, blue, alpha // // If image loading fails for any reason, the return value will be NULL, // and *x, *y, *channels_in_file will be unchanged. The function // stbi_failure_reason() can be queried for an extremely brief, end-user // unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS // to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly // more user-friendly ones. // // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized. // // To query the width, height and component count of an image without having to // decode the full file, you can use the stbi_info family of functions: // // int x,y,n,ok; // ok = stbi_info(filename, &x, &y, &n); // // returns ok=1 and sets x, y, n if image is a supported format, // // 0 otherwise. // // Note that stb_image pervasively uses ints in its public API for sizes, // including sizes of memory buffers. This is now part of the API and thus // hard to change without causing breakage. As a result, the various image // loaders all have certain limits on image size; these differ somewhat // by format but generally boil down to either just under 2GB or just under // 1GB. When the decoded image would be larger than this, stb_image decoding // will fail. // // Additionally, stb_image will reject image files that have any of their // dimensions set to a larger value than the configurable STBI_MAX_DIMENSIONS, // which defaults to 2**24 = 16777216 pixels. Due to the above memory limit, // the only way to have an image with such dimensions load correctly // is for it to have a rather extreme aspect ratio. Either way, the // assumption here is that such larger images are likely to be malformed // or malicious. If you do need to load an image with individual dimensions // larger than that, and it still fits in the overall size limit, you can // #define STBI_MAX_DIMENSIONS on your own to be something larger. // // =========================================================================== // // UNICODE: // // If compiling for Windows and you wish to use Unicode filenames, compile // with // #define STBI_WINDOWS_UTF8 // and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert // Windows wchar_t filenames to utf8. // // =========================================================================== // // Philosophy // // stb libraries are designed with the following priorities: // // 1. easy to use // 2. easy to maintain // 3. good performance // // Sometimes I let "good performance" creep up in priority over "easy to maintain", // and for best performance I may provide less-easy-to-use APIs that give higher // performance, in addition to the easy-to-use ones. Nevertheless, it's important // to keep in mind that from the standpoint of you, a client of this library, // all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all. // // Some secondary priorities arise directly from the first two, some of which // provide more explicit reasons why performance can't be emphasized. // // - Portable ("ease of use") // - Small source code footprint ("easy to maintain") // - No dependencies ("ease of use") // // =========================================================================== // // I/O callbacks // // I/O callbacks allow you to read from arbitrary sources, like packaged // files or some other source. Data read from callbacks are processed // through a small internal buffer (currently 128 bytes) to try to reduce // overhead. // // The three functions you must define are "read" (reads some bytes of data), // "skip" (skips some bytes of data), "eof" (reports if the stream is at the end). // // =========================================================================== // // SIMD support // // The JPEG decoder will try to automatically use SIMD kernels on x86 when // supported by the compiler. For ARM Neon support, you must explicitly // request it. // // (The old do-it-yourself SIMD API is no longer supported in the current // code.) // // On x86, SSE2 will automatically be used when available based on a run-time // test; if not, the generic C versions are used as a fall-back. On ARM targets, // the typical path is to have separate builds for NEON and non-NEON devices // (at least this is true for iOS and Android). Therefore, the NEON support is // toggled by a build flag: define STBI_NEON to get NEON loops. // // If for some reason you do not want to use any of SIMD code, or if // you have issues compiling it, you can disable it entirely by // defining STBI_NO_SIMD. // // =========================================================================== // // HDR image support (disable by defining STBI_NO_HDR) // // stb_image supports loading HDR images in general, and currently the Radiance // .HDR file format specifically. You can still load any file through the existing // interface; if you attempt to load an HDR file, it will be automatically remapped // to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; // both of these constants can be reconfigured through this interface: // // stbi_hdr_to_ldr_gamma(2.2f); // stbi_hdr_to_ldr_scale(1.0f); // // (note, do not use _inverse_ constants; stbi_image will invert them // appropriately). // // Additionally, there is a new, parallel interface for loading files as // (linear) floats to preserve the full dynamic range: // // float *data = stbi_loadf(filename, &x, &y, &n, 0); // // If you load LDR images through this interface, those images will // be promoted to floating point values, run through the inverse of // constants corresponding to the above: // // stbi_ldr_to_hdr_scale(1.0f); // stbi_ldr_to_hdr_gamma(2.2f); // // Finally, given a filename (or an open file or memory block--see header // file for details) containing image data, you can query for the "most // appropriate" interface to use (that is, whether the image is HDR or // not), using: // // stbi_is_hdr(char *filename); // // =========================================================================== // // iPhone PNG support: // // We optionally support converting iPhone-formatted PNGs (which store // premultiplied BGRA) back to RGB, even though they're internally encoded // differently. To enable this conversion, call // stbi_convert_iphone_png_to_rgb(1). // // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per // pixel to remove any premultiplied alpha *only* if the image file explicitly // says there's premultiplied data (currently only happens in iPhone images, // and only if iPhone convert-to-rgb processing is on). // // =========================================================================== // // ADDITIONAL CONFIGURATION // // - You can suppress implementation of any of the decoders to reduce // your code footprint by #defining one or more of the following // symbols before creating the implementation. // // STBI_NO_JPEG // STBI_NO_PNG // STBI_NO_BMP // STBI_NO_PSD // STBI_NO_TGA // STBI_NO_GIF // STBI_NO_HDR // STBI_NO_PIC // STBI_NO_PNM (.ppm and .pgm) // // - You can request *only* certain decoders and suppress all other ones // (this will be more forward-compatible, as addition of new decoders // doesn't require you to disable them explicitly): // // STBI_ONLY_JPEG // STBI_ONLY_PNG // STBI_ONLY_BMP // STBI_ONLY_PSD // STBI_ONLY_TGA // STBI_ONLY_GIF // STBI_ONLY_HDR // STBI_ONLY_PIC // STBI_ONLY_PNM (.ppm and .pgm) // // - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still // want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB // // - If you define STBI_MAX_DIMENSIONS, stb_image will reject images greater // than that size (in either width or height) without further processing. // This is to let programs in the wild set an upper bound to prevent // denial-of-service attacks on untrusted data, as one could generate a // valid image of gigantic dimensions and force stb_image to allocate a // huge block of memory and spend disproportionate time decoding it. By // default this is set to (1 << 24), which is 16777216, but that's still // very big. #ifndef STBI_NO_STDIO #include #endif // STBI_NO_STDIO #define STBI_VERSION 1 enum { STBI_default = 0, // only used for desired_channels STBI_grey = 1, STBI_grey_alpha = 2, STBI_rgb = 3, STBI_rgb_alpha = 4 }; #include typedef unsigned char stbi_uc; typedef unsigned short stbi_us; #ifdef __cplusplus extern "C" { #endif #ifndef STBIDEF #ifdef STB_IMAGE_STATIC #define STBIDEF static #else #define STBIDEF extern #endif #endif ////////////////////////////////////////////////////////////////////////////// // // PRIMARY API - works on images of any type // // // load image by filename, open file, or memory buffer // typedef struct { int (*read) (void *user,char *data,int size); // fill 'data' with 'size' bytes. return number of bytes actually read void (*skip) (void *user,int n); // skip the next 'n' bytes, or 'unget' the last -n bytes if negative int (*eof) (void *user); // returns nonzero if we are at end of file/data } stbi_io_callbacks; //////////////////////////////////// // // 8-bits-per-channel interface // STBIDEF stbi_uc *stbi_load_from_memory (stbi_uc const *buffer, int len , int *x, int *y, int *channels_in_file, int desired_channels); STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk , void *user, int *x, int *y, int *channels_in_file, int desired_channels); #ifndef STBI_NO_STDIO STBIDEF stbi_uc *stbi_load (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); STBIDEF stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); // for stbi_load_from_file, file pointer is left pointing immediately after image #endif #ifndef STBI_NO_GIF STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp); #endif #ifdef STBI_WINDOWS_UTF8 STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input); #endif //////////////////////////////////// // // 16-bits-per-channel interface // STBIDEF stbi_us *stbi_load_16_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); #ifndef STBI_NO_STDIO STBIDEF stbi_us *stbi_load_16 (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); #endif //////////////////////////////////// // // float-per-channel interface // #ifndef STBI_NO_LINEAR STBIDEF float *stbi_loadf_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels); STBIDEF float *stbi_loadf_from_callbacks (stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels); #ifndef STBI_NO_STDIO STBIDEF float *stbi_loadf (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels); STBIDEF float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels); #endif #endif #ifndef STBI_NO_HDR STBIDEF void stbi_hdr_to_ldr_gamma(float gamma); STBIDEF void stbi_hdr_to_ldr_scale(float scale); #endif // STBI_NO_HDR #ifndef STBI_NO_LINEAR STBIDEF void stbi_ldr_to_hdr_gamma(float gamma); STBIDEF void stbi_ldr_to_hdr_scale(float scale); #endif // STBI_NO_LINEAR // stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user); STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); #ifndef STBI_NO_STDIO STBIDEF int stbi_is_hdr (char const *filename); STBIDEF int stbi_is_hdr_from_file(FILE *f); #endif // STBI_NO_STDIO // get a VERY brief reason for failure // on most compilers (and ALL modern mainstream compilers) this is threadsafe STBIDEF const char *stbi_failure_reason (void); // free the loaded image -- this is just free() STBIDEF void stbi_image_free (void *retval_from_stbi_load); // get image dimensions & components without fully decoding STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp); STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len); STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user); #ifndef STBI_NO_STDIO STBIDEF int stbi_info (char const *filename, int *x, int *y, int *comp); STBIDEF int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); STBIDEF int stbi_is_16_bit (char const *filename); STBIDEF int stbi_is_16_bit_from_file(FILE *f); #endif // for image formats that explicitly notate that they have premultiplied alpha, // we just return the colors as stored in the file. set this flag to force // unpremultiplication. results are undefined if the unpremultiply overflow. STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply); // indicate whether we should process iphone images back to canonical format, // or just pass them through "as-is" STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert); // flip the image vertically, so the first pixel in the output array is the bottom left STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip); // as above, but only applies to images loaded on the thread that calls the function // this function is only available if your compiler supports thread-local variables; // calling it will fail to link if your compiler doesn't STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply); STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert); STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip); // ZLIB client - used by PNG, available for other purposes STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header); STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); #ifdef __cplusplus } #endif // // //// end header file ///////////////////////////////////////////////////// #endif // STBI_INCLUDE_STB_IMAGE_H #ifdef STB_IMAGE_IMPLEMENTATION #if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \ || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \ || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \ || defined(STBI_ONLY_ZLIB) #ifndef STBI_ONLY_JPEG #define STBI_NO_JPEG #endif #ifndef STBI_ONLY_PNG #define STBI_NO_PNG #endif #ifndef STBI_ONLY_BMP #define STBI_NO_BMP #endif #ifndef STBI_ONLY_PSD #define STBI_NO_PSD #endif #ifndef STBI_ONLY_TGA #define STBI_NO_TGA #endif #ifndef STBI_ONLY_GIF #define STBI_NO_GIF #endif #ifndef STBI_ONLY_HDR #define STBI_NO_HDR #endif #ifndef STBI_ONLY_PIC #define STBI_NO_PIC #endif #ifndef STBI_ONLY_PNM #define STBI_NO_PNM #endif #endif #if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB) #define STBI_NO_ZLIB #endif #include #include // ptrdiff_t on osx #include #include #include #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) #include // ldexp, pow #endif #ifndef STBI_NO_STDIO #include #endif #ifndef STBI_ASSERT #include #define STBI_ASSERT(x) assert(x) #endif #ifdef __cplusplus #define STBI_EXTERN extern "C" #else #define STBI_EXTERN extern #endif #ifndef _MSC_VER #ifdef __cplusplus #define stbi_inline inline #else #define stbi_inline #endif #else #define stbi_inline __forceinline #endif #ifndef STBI_NO_THREAD_LOCALS #if defined(__cplusplus) && __cplusplus >= 201103L #define STBI_THREAD_LOCAL thread_local #elif defined(__GNUC__) && __GNUC__ < 5 #define STBI_THREAD_LOCAL __thread #elif defined(_MSC_VER) #define STBI_THREAD_LOCAL __declspec(thread) #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__) #define STBI_THREAD_LOCAL _Thread_local #endif #ifndef STBI_THREAD_LOCAL #if defined(__GNUC__) #define STBI_THREAD_LOCAL __thread #endif #endif #endif #if defined(_MSC_VER) || defined(__SYMBIAN32__) typedef unsigned short stbi__uint16; typedef signed short stbi__int16; typedef unsigned int stbi__uint32; typedef signed int stbi__int32; #else #include typedef uint16_t stbi__uint16; typedef int16_t stbi__int16; typedef uint32_t stbi__uint32; typedef int32_t stbi__int32; #endif // should produce compiler error if size is wrong typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1]; #ifdef _MSC_VER #define STBI_NOTUSED(v) (void)(v) #else #define STBI_NOTUSED(v) (void)sizeof(v) #endif #ifdef _MSC_VER #define STBI_HAS_LROTL #endif #ifdef STBI_HAS_LROTL #define stbi_lrot(x,y) _lrotl(x,y) #else #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (-(y) & 31))) #endif #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED)) // ok #elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED) // ok #else #error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)." #endif #ifndef STBI_MALLOC #define STBI_MALLOC(sz) malloc(sz) #define STBI_REALLOC(p,newsz) realloc(p,newsz) #define STBI_FREE(p) free(p) #endif #ifndef STBI_REALLOC_SIZED #define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz) #endif // x86/x64 detection #if defined(__x86_64__) || defined(_M_X64) #define STBI__X64_TARGET #elif defined(__i386) || defined(_M_IX86) #define STBI__X86_TARGET #endif #if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD) // gcc doesn't support sse2 intrinsics unless you compile with -msse2, // which in turn means it gets to use SSE2 everywhere. This is unfortunate, // but previous attempts to provide the SSE2 functions with runtime // detection caused numerous issues. The way architecture extensions are // exposed in GCC/Clang is, sadly, not really suited for one-file libs. // New behavior: if compiled with -msse2, we use SSE2 without any // detection; if not, we don't use it at all. #define STBI_NO_SIMD #endif #if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD) // Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET // // 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the // Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant. // As a result, enabling SSE2 on 32-bit MinGW is dangerous when not // simultaneously enabling "-mstackrealign". // // See https://github.com/nothings/stb/issues/81 for more information. // // So default to no SSE2 on 32-bit MinGW. If you've read this far and added // -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2. #define STBI_NO_SIMD #endif #if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET)) #define STBI_SSE2 #include #ifdef _MSC_VER #if _MSC_VER >= 1400 // not VC6 #include // __cpuid static int stbi__cpuid3(void) { int info[4]; __cpuid(info,1); return info[3]; } #else static int stbi__cpuid3(void) { int res; __asm { mov eax,1 cpuid mov res,edx } return res; } #endif #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name #if !defined(STBI_NO_JPEG) && defined(STBI_SSE2) static int stbi__sse2_available(void) { int info3 = stbi__cpuid3(); return ((info3 >> 26) & 1) != 0; } #endif #else // assume GCC-style if not VC++ #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) #if !defined(STBI_NO_JPEG) && defined(STBI_SSE2) static int stbi__sse2_available(void) { // If we're even attempting to compile this on GCC/Clang, that means // -msse2 is on, which means the compiler is allowed to use SSE2 // instructions at will, and so are we. return 1; } #endif #endif #endif // ARM NEON #if defined(STBI_NO_SIMD) && defined(STBI_NEON) #undef STBI_NEON #endif #ifdef STBI_NEON #include #ifdef _MSC_VER #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name #else #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16))) #endif #endif #ifndef STBI_SIMD_ALIGN #define STBI_SIMD_ALIGN(type, name) type name #endif #ifndef STBI_MAX_DIMENSIONS #define STBI_MAX_DIMENSIONS (1 << 24) #endif /////////////////////////////////////////////// // // stbi__context struct and start_xxx functions // stbi__context structure is our basic context used by all images, so it // contains all the IO context, plus some basic image information typedef struct { stbi__uint32 img_x, img_y; int img_n, img_out_n; stbi_io_callbacks io; void *io_user_data; int read_from_callbacks; int buflen; stbi_uc buffer_start[128]; int callback_already_read; stbi_uc *img_buffer, *img_buffer_end; stbi_uc *img_buffer_original, *img_buffer_original_end; } stbi__context; static void stbi__refill_buffer(stbi__context *s); // initialize a memory-decode context static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len) { s->io.read = NULL; s->read_from_callbacks = 0; s->callback_already_read = 0; s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer; s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len; } // initialize a callback-based context static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user) { s->io = *c; s->io_user_data = user; s->buflen = sizeof(s->buffer_start); s->read_from_callbacks = 1; s->callback_already_read = 0; s->img_buffer = s->img_buffer_original = s->buffer_start; stbi__refill_buffer(s); s->img_buffer_original_end = s->img_buffer_end; } #ifndef STBI_NO_STDIO static int stbi__stdio_read(void *user, char *data, int size) { return (int) fread(data,1,size,(FILE*) user); } static void stbi__stdio_skip(void *user, int n) { int ch; fseek((FILE*) user, n, SEEK_CUR); ch = fgetc((FILE*) user); /* have to read a byte to reset feof()'s flag */ if (ch != EOF) { ungetc(ch, (FILE *) user); /* push byte back onto stream if valid. */ } } static int stbi__stdio_eof(void *user) { return feof((FILE*) user) || ferror((FILE *) user); } static stbi_io_callbacks stbi__stdio_callbacks = { stbi__stdio_read, stbi__stdio_skip, stbi__stdio_eof, }; static void stbi__start_file(stbi__context *s, FILE *f) { stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f); } //static void stop_file(stbi__context *s) { } #endif // !STBI_NO_STDIO static void stbi__rewind(stbi__context *s) { // conceptually rewind SHOULD rewind to the beginning of the stream, // but we just rewind to the beginning of the initial buffer, because // we only use it after doing 'test', which only ever looks at at most 92 bytes s->img_buffer = s->img_buffer_original; s->img_buffer_end = s->img_buffer_original_end; } enum { STBI_ORDER_RGB, STBI_ORDER_BGR }; typedef struct { int bits_per_channel; int num_channels; int channel_order; } stbi__result_info; #ifndef STBI_NO_JPEG static int stbi__jpeg_test(stbi__context *s); static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_PNG static int stbi__png_test(stbi__context *s); static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp); static int stbi__png_is16(stbi__context *s); #endif #ifndef STBI_NO_BMP static int stbi__bmp_test(stbi__context *s); static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_TGA static int stbi__tga_test(stbi__context *s); static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_PSD static int stbi__psd_test(stbi__context *s); static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc); static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp); static int stbi__psd_is16(stbi__context *s); #endif #ifndef STBI_NO_HDR static int stbi__hdr_test(stbi__context *s); static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_PIC static int stbi__pic_test(stbi__context *s); static void *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_GIF static int stbi__gif_test(stbi__context *s); static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp); static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp); #endif #ifndef STBI_NO_PNM static int stbi__pnm_test(stbi__context *s); static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri); static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp); static int stbi__pnm_is16(stbi__context *s); #endif static #ifdef STBI_THREAD_LOCAL STBI_THREAD_LOCAL #endif const char *stbi__g_failure_reason; STBIDEF const char *stbi_failure_reason(void) { return stbi__g_failure_reason; } #ifndef STBI_NO_FAILURE_STRINGS static int stbi__err(const char *str) { stbi__g_failure_reason = str; return 0; } #endif static void *stbi__malloc(size_t size) { return STBI_MALLOC(size); } // stb_image uses ints pervasively, including for offset calculations. // therefore the largest decoded image size we can support with the // current code, even on 64-bit targets, is INT_MAX. this is not a // significant limitation for the intended use case. // // we do, however, need to make sure our size calculations don't // overflow. hence a few helper functions for size calculations that // multiply integers together, making sure that they're non-negative // and no overflow occurs. // return 1 if the sum is valid, 0 on overflow. // negative terms are considered invalid. static int stbi__addsizes_valid(int a, int b) { if (b < 0) return 0; // now 0 <= b <= INT_MAX, hence also // 0 <= INT_MAX - b <= INTMAX. // And "a + b <= INT_MAX" (which might overflow) is the // same as a <= INT_MAX - b (no overflow) return a <= INT_MAX - b; } // returns 1 if the product is valid, 0 on overflow. // negative factors are considered invalid. static int stbi__mul2sizes_valid(int a, int b) { if (a < 0 || b < 0) return 0; if (b == 0) return 1; // mul-by-0 is always safe // portable way to check for no overflows in a*b return a <= INT_MAX/b; } #if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR) // returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow static int stbi__mad2sizes_valid(int a, int b, int add) { return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add); } #endif // returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow static int stbi__mad3sizes_valid(int a, int b, int c, int add) { return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && stbi__addsizes_valid(a*b*c, add); } // returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM) static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add) { return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) && stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add); } #endif #if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR) // mallocs with size overflow checking static void *stbi__malloc_mad2(int a, int b, int add) { if (!stbi__mad2sizes_valid(a, b, add)) return NULL; return stbi__malloc(a*b + add); } #endif static void *stbi__malloc_mad3(int a, int b, int c, int add) { if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL; return stbi__malloc(a*b*c + add); } #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM) static void *stbi__malloc_mad4(int a, int b, int c, int d, int add) { if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL; return stbi__malloc(a*b*c*d + add); } #endif // returns 1 if the sum of two signed ints is valid (between -2^31 and 2^31-1 inclusive), 0 on overflow. static int stbi__addints_valid(int a, int b) { if ((a >= 0) != (b >= 0)) return 1; // a and b have different signs, so no overflow if (a < 0 && b < 0) return a >= INT_MIN - b; // same as a + b >= INT_MIN; INT_MIN - b cannot overflow since b < 0. return a <= INT_MAX - b; } // returns 1 if the product of two ints fits in a signed short, 0 on overflow. static int stbi__mul2shorts_valid(int a, int b) { if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid if (b < 0) return a <= SHRT_MIN / b; // same as a * b >= SHRT_MIN return a >= SHRT_MIN / b; } // stbi__err - error // stbi__errpf - error returning pointer to float // stbi__errpuc - error returning pointer to unsigned char #ifdef STBI_NO_FAILURE_STRINGS #define stbi__err(x,y) 0 #elif defined(STBI_FAILURE_USERMSG) #define stbi__err(x,y) stbi__err(y) #else #define stbi__err(x,y) stbi__err(x) #endif #define stbi__errpf(x,y) ((float *)(size_t) (stbi__err(x,y)?NULL:NULL)) #define stbi__errpuc(x,y) ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL)) STBIDEF void stbi_image_free(void *retval_from_stbi_load) { STBI_FREE(retval_from_stbi_load); } #ifndef STBI_NO_LINEAR static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp); #endif #ifndef STBI_NO_HDR static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp); #endif static int stbi__vertically_flip_on_load_global = 0; STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip) { stbi__vertically_flip_on_load_global = flag_true_if_should_flip; } #ifndef STBI_THREAD_LOCAL #define stbi__vertically_flip_on_load stbi__vertically_flip_on_load_global #else static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set; STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip) { stbi__vertically_flip_on_load_local = flag_true_if_should_flip; stbi__vertically_flip_on_load_set = 1; } #define stbi__vertically_flip_on_load (stbi__vertically_flip_on_load_set \ ? stbi__vertically_flip_on_load_local \ : stbi__vertically_flip_on_load_global) #endif // STBI_THREAD_LOCAL static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) { memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order ri->num_channels = 0; // test the formats with a very explicit header first (at least a FOURCC // or distinctive magic number first) #ifndef STBI_NO_PNG if (stbi__png_test(s)) return stbi__png_load(s,x,y,comp,req_comp, ri); #endif #ifndef STBI_NO_BMP if (stbi__bmp_test(s)) return stbi__bmp_load(s,x,y,comp,req_comp, ri); #endif #ifndef STBI_NO_GIF if (stbi__gif_test(s)) return stbi__gif_load(s,x,y,comp,req_comp, ri); #endif #ifndef STBI_NO_PSD if (stbi__psd_test(s)) return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc); #else STBI_NOTUSED(bpc); #endif #ifndef STBI_NO_PIC if (stbi__pic_test(s)) return stbi__pic_load(s,x,y,comp,req_comp, ri); #endif // then the formats that can end up attempting to load with just 1 or 2 // bytes matching expectations; these are prone to false positives, so // try them later #ifndef STBI_NO_JPEG if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri); #endif #ifndef STBI_NO_PNM if (stbi__pnm_test(s)) return stbi__pnm_load(s,x,y,comp,req_comp, ri); #endif #ifndef STBI_NO_HDR if (stbi__hdr_test(s)) { float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri); return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); } #endif #ifndef STBI_NO_TGA // test tga last because it's a crappy test! if (stbi__tga_test(s)) return stbi__tga_load(s,x,y,comp,req_comp, ri); #endif return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt"); } static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels) { int i; int img_len = w * h * channels; stbi_uc *reduced; reduced = (stbi_uc *) stbi__malloc(img_len); if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory"); for (i = 0; i < img_len; ++i) reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling STBI_FREE(orig); return reduced; } static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels) { int i; int img_len = w * h * channels; stbi__uint16 *enlarged; enlarged = (stbi__uint16 *) stbi__malloc(img_len*2); if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); for (i = 0; i < img_len; ++i) enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff STBI_FREE(orig); return enlarged; } static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel) { int row; size_t bytes_per_row = (size_t)w * bytes_per_pixel; stbi_uc temp[2048]; stbi_uc *bytes = (stbi_uc *)image; for (row = 0; row < (h>>1); row++) { stbi_uc *row0 = bytes + row*bytes_per_row; stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row; // swap row0 with row1 size_t bytes_left = bytes_per_row; while (bytes_left) { size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp); memcpy(temp, row0, bytes_copy); memcpy(row0, row1, bytes_copy); memcpy(row1, temp, bytes_copy); row0 += bytes_copy; row1 += bytes_copy; bytes_left -= bytes_copy; } } } #ifndef STBI_NO_GIF static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel) { int slice; int slice_size = w * h * bytes_per_pixel; stbi_uc *bytes = (stbi_uc *)image; for (slice = 0; slice < z; ++slice) { stbi__vertical_flip(bytes, w, h, bytes_per_pixel); bytes += slice_size; } } #endif static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) { stbi__result_info ri; void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8); if (result == NULL) return NULL; // it is the responsibility of the loaders to make sure we get either 8 or 16 bit. STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16); if (ri.bits_per_channel != 8) { result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp); ri.bits_per_channel = 8; } // @TODO: move stbi__convert_format to here if (stbi__vertically_flip_on_load) { int channels = req_comp ? req_comp : *comp; stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc)); } return (unsigned char *) result; } static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp) { stbi__result_info ri; void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16); if (result == NULL) return NULL; // it is the responsibility of the loaders to make sure we get either 8 or 16 bit. STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16); if (ri.bits_per_channel != 16) { result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp); ri.bits_per_channel = 16; } // @TODO: move stbi__convert_format16 to here // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision if (stbi__vertically_flip_on_load) { int channels = req_comp ? req_comp : *comp; stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16)); } return (stbi__uint16 *) result; } #if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR) static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp) { if (stbi__vertically_flip_on_load && result != NULL) { int channels = req_comp ? req_comp : *comp; stbi__vertical_flip(result, *x, *y, channels * sizeof(float)); } } #endif #ifndef STBI_NO_STDIO #if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide); STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default); #endif #if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input) { return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL); } #endif static FILE *stbi__fopen(char const *filename, char const *mode) { FILE *f; #if defined(_WIN32) && defined(STBI_WINDOWS_UTF8) wchar_t wMode[64]; wchar_t wFilename[1024]; if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename))) return 0; if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode))) return 0; #if defined(_MSC_VER) && _MSC_VER >= 1400 if (0 != _wfopen_s(&f, wFilename, wMode)) f = 0; #else f = _wfopen(wFilename, wMode); #endif #elif defined(_MSC_VER) && _MSC_VER >= 1400 if (0 != fopen_s(&f, filename, mode)) f=0; #else f = fopen(filename, mode); #endif return f; } STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) { FILE *f = stbi__fopen(filename, "rb"); unsigned char *result; if (!f) return stbi__errpuc("can't fopen", "Unable to open file"); result = stbi_load_from_file(f,x,y,comp,req_comp); fclose(f); return result; } STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) { unsigned char *result; stbi__context s; stbi__start_file(&s,f); result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); if (result) { // need to 'unget' all the characters in the IO buffer fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); } return result; } STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp) { stbi__uint16 *result; stbi__context s; stbi__start_file(&s,f); result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp); if (result) { // need to 'unget' all the characters in the IO buffer fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR); } return result; } STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp) { FILE *f = stbi__fopen(filename, "rb"); stbi__uint16 *result; if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file"); result = stbi_load_from_file_16(f,x,y,comp,req_comp); fclose(f); return result; } #endif //!STBI_NO_STDIO STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels) { stbi__context s; stbi__start_mem(&s,buffer,len); return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); } STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels) { stbi__context s; stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user); return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels); } STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) { stbi__context s; stbi__start_mem(&s,buffer,len); return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); } STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) { stbi__context s; stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp); } #ifndef STBI_NO_GIF STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp) { unsigned char *result; stbi__context s; stbi__start_mem(&s,buffer,len); result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp); if (stbi__vertically_flip_on_load) { stbi__vertical_flip_slices( result, *x, *y, *z, *comp ); } return result; } #endif #ifndef STBI_NO_LINEAR static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp) { unsigned char *data; #ifndef STBI_NO_HDR if (stbi__hdr_test(s)) { stbi__result_info ri; float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri); if (hdr_data) stbi__float_postprocess(hdr_data,x,y,comp,req_comp); return hdr_data; } #endif data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp); if (data) return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); return stbi__errpf("unknown image type", "Image not of any known type, or corrupt"); } STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) { stbi__context s; stbi__start_mem(&s,buffer,len); return stbi__loadf_main(&s,x,y,comp,req_comp); } STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp) { stbi__context s; stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); return stbi__loadf_main(&s,x,y,comp,req_comp); } #ifndef STBI_NO_STDIO STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) { float *result; FILE *f = stbi__fopen(filename, "rb"); if (!f) return stbi__errpf("can't fopen", "Unable to open file"); result = stbi_loadf_from_file(f,x,y,comp,req_comp); fclose(f); return result; } STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) { stbi__context s; stbi__start_file(&s,f); return stbi__loadf_main(&s,x,y,comp,req_comp); } #endif // !STBI_NO_STDIO #endif // !STBI_NO_LINEAR // these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is // defined, for API simplicity; if STBI_NO_LINEAR is defined, it always // reports false! STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) { #ifndef STBI_NO_HDR stbi__context s; stbi__start_mem(&s,buffer,len); return stbi__hdr_test(&s); #else STBI_NOTUSED(buffer); STBI_NOTUSED(len); return 0; #endif } #ifndef STBI_NO_STDIO STBIDEF int stbi_is_hdr (char const *filename) { FILE *f = stbi__fopen(filename, "rb"); int result=0; if (f) { result = stbi_is_hdr_from_file(f); fclose(f); } return result; } STBIDEF int stbi_is_hdr_from_file(FILE *f) { #ifndef STBI_NO_HDR long pos = ftell(f); int res; stbi__context s; stbi__start_file(&s,f); res = stbi__hdr_test(&s); fseek(f, pos, SEEK_SET); return res; #else STBI_NOTUSED(f); return 0; #endif } #endif // !STBI_NO_STDIO STBIDEF int stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user) { #ifndef STBI_NO_HDR stbi__context s; stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user); return stbi__hdr_test(&s); #else STBI_NOTUSED(clbk); STBI_NOTUSED(user); return 0; #endif } #ifndef STBI_NO_LINEAR static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f; STBIDEF void stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; } STBIDEF void stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; } #endif static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f; STBIDEF void stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; } STBIDEF void stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; } ////////////////////////////////////////////////////////////////////////////// // // Common code used by all image loaders // enum { STBI__SCAN_load=0, STBI__SCAN_type, STBI__SCAN_header }; static void stbi__refill_buffer(stbi__context *s) { int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen); s->callback_already_read += (int) (s->img_buffer - s->img_buffer_original); if (n == 0) { // at end of file, treat same as if from memory, but need to handle case // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file s->read_from_callbacks = 0; s->img_buffer = s->buffer_start; s->img_buffer_end = s->buffer_start+1; *s->img_buffer = 0; } else { s->img_buffer = s->buffer_start; s->img_buffer_end = s->buffer_start + n; } } stbi_inline static stbi_uc stbi__get8(stbi__context *s) { if (s->img_buffer < s->img_buffer_end) return *s->img_buffer++; if (s->read_from_callbacks) { stbi__refill_buffer(s); return *s->img_buffer++; } return 0; } #if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) // nothing #else stbi_inline static int stbi__at_eof(stbi__context *s) { if (s->io.read) { if (!(s->io.eof)(s->io_user_data)) return 0; // if feof() is true, check if buffer = end // special case: we've only got the special 0 character at the end if (s->read_from_callbacks == 0) return 1; } return s->img_buffer >= s->img_buffer_end; } #endif #if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) // nothing #else static void stbi__skip(stbi__context *s, int n) { if (n == 0) return; // already there! if (n < 0) { s->img_buffer = s->img_buffer_end; return; } if (s->io.read) { int blen = (int) (s->img_buffer_end - s->img_buffer); if (blen < n) { s->img_buffer = s->img_buffer_end; (s->io.skip)(s->io_user_data, n - blen); return; } } s->img_buffer += n; } #endif #if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM) // nothing #else static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n) { if (s->io.read) { int blen = (int) (s->img_buffer_end - s->img_buffer); if (blen < n) { int res, count; memcpy(buffer, s->img_buffer, blen); count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen); res = (count == (n-blen)); s->img_buffer = s->img_buffer_end; return res; } } if (s->img_buffer+n <= s->img_buffer_end) { memcpy(buffer, s->img_buffer, n); s->img_buffer += n; return 1; } else return 0; } #endif #if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC) // nothing #else static int stbi__get16be(stbi__context *s) { int z = stbi__get8(s); return (z << 8) + stbi__get8(s); } #endif #if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC) // nothing #else static stbi__uint32 stbi__get32be(stbi__context *s) { stbi__uint32 z = stbi__get16be(s); return (z << 16) + stbi__get16be(s); } #endif #if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) // nothing #else static int stbi__get16le(stbi__context *s) { int z = stbi__get8(s); return z + (stbi__get8(s) << 8); } #endif #ifndef STBI_NO_BMP static stbi__uint32 stbi__get32le(stbi__context *s) { stbi__uint32 z = stbi__get16le(s); z += (stbi__uint32)stbi__get16le(s) << 16; return z; } #endif #define STBI__BYTECAST(x) ((stbi_uc) ((x) & 255)) // truncate int to byte without warnings #if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) // nothing #else ////////////////////////////////////////////////////////////////////////////// // // generic converter from built-in img_n to req_comp // individual types do this automatically as much as possible (e.g. jpeg // does all cases internally since it needs to colorspace convert anyway, // and it never has alpha, so very few cases ). png can automatically // interleave an alpha=255 channel, but falls back to this for other cases // // assume data buffer is malloced, so malloc a new one and free that one // only failure mode is malloc failing static stbi_uc stbi__compute_y(int r, int g, int b) { return (stbi_uc) (((r*77) + (g*150) + (29*b)) >> 8); } #endif #if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM) // nothing #else static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y) { int i,j; unsigned char *good; if (req_comp == img_n) return data; STBI_ASSERT(req_comp >= 1 && req_comp <= 4); good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0); if (good == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); } for (j=0; j < (int) y; ++j) { unsigned char *src = data + j * x * img_n ; unsigned char *dest = good + j * x * req_comp; #define STBI__COMBO(a,b) ((a)*8+(b)) #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) // convert source image with img_n components to one with req_comp components; // avoid switch per pixel, so use switch per scanline and massive macros switch (STBI__COMBO(img_n, req_comp)) { STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255; } break; STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255; } break; STBI__CASE(2,1) { dest[0]=src[0]; } break; STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break; STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255; } break; STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255; } break; STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); } break; STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break; STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break; default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return stbi__errpuc("unsupported", "Unsupported format conversion"); } #undef STBI__CASE } STBI_FREE(data); return good; } #endif #if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) // nothing #else static stbi__uint16 stbi__compute_y_16(int r, int g, int b) { return (stbi__uint16) (((r*77) + (g*150) + (29*b)) >> 8); } #endif #if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) // nothing #else static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y) { int i,j; stbi__uint16 *good; if (req_comp == img_n) return data; STBI_ASSERT(req_comp >= 1 && req_comp <= 4); good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2); if (good == NULL) { STBI_FREE(data); return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory"); } for (j=0; j < (int) y; ++j) { stbi__uint16 *src = data + j * x * img_n ; stbi__uint16 *dest = good + j * x * req_comp; #define STBI__COMBO(a,b) ((a)*8+(b)) #define STBI__CASE(a,b) case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) // convert source image with img_n components to one with req_comp components; // avoid switch per pixel, so use switch per scanline and massive macros switch (STBI__COMBO(img_n, req_comp)) { STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff; } break; STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff; } break; STBI__CASE(2,1) { dest[0]=src[0]; } break; STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0]; } break; STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1]; } break; STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff; } break; STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break; STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); } break; STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break; STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2]; } break; default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return (stbi__uint16*) stbi__errpuc("unsupported", "Unsupported format conversion"); } #undef STBI__CASE } STBI_FREE(data); return good; } #endif #ifndef STBI_NO_LINEAR static float *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp) { int i,k,n; float *output; if (!data) return NULL; output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0); if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); } // compute number of non-alpha components if (comp & 1) n = comp; else n = comp-1; for (i=0; i < x*y; ++i) { for (k=0; k < n; ++k) { output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale); } } if (n < comp) { for (i=0; i < x*y; ++i) { output[i*comp + n] = data[i*comp + n]/255.0f; } } STBI_FREE(data); return output; } #endif #ifndef STBI_NO_HDR #define stbi__float2int(x) ((int) (x)) static stbi_uc *stbi__hdr_to_ldr(float *data, int x, int y, int comp) { int i,k,n; stbi_uc *output; if (!data) return NULL; output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0); if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); } // compute number of non-alpha components if (comp & 1) n = comp; else n = comp-1; for (i=0; i < x*y; ++i) { for (k=0; k < n; ++k) { float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f; if (z < 0) z = 0; if (z > 255) z = 255; output[i*comp + k] = (stbi_uc) stbi__float2int(z); } if (k < comp) { float z = data[i*comp+k] * 255 + 0.5f; if (z < 0) z = 0; if (z > 255) z = 255; output[i*comp + k] = (stbi_uc) stbi__float2int(z); } } STBI_FREE(data); return output; } #endif ////////////////////////////////////////////////////////////////////////////// // // "baseline" JPEG/JFIF decoder // // simple implementation // - doesn't support delayed output of y-dimension // - simple interface (only one output format: 8-bit interleaved RGB) // - doesn't try to recover corrupt jpegs // - doesn't allow partial loading, loading multiple at once // - still fast on x86 (copying globals into locals doesn't help x86) // - allocates lots of intermediate memory (full size of all components) // - non-interleaved case requires this anyway // - allows good upsampling (see next) // high-quality // - upsampled channels are bilinearly interpolated, even across blocks // - quality integer IDCT derived from IJG's 'slow' // performance // - fast huffman; reasonable integer IDCT // - some SIMD kernels for common paths on targets with SSE2/NEON // - uses a lot of intermediate memory, could cache poorly #ifndef STBI_NO_JPEG // huffman decoding acceleration #define FAST_BITS 9 // larger handles more cases; smaller stomps less cache typedef struct { stbi_uc fast[1 << FAST_BITS]; // weirdly, repacking this into AoS is a 10% speed loss, instead of a win stbi__uint16 code[256]; stbi_uc values[256]; stbi_uc size[257]; unsigned int maxcode[18]; int delta[17]; // old 'firstsymbol' - old 'firstcode' } stbi__huffman; typedef struct { stbi__context *s; stbi__huffman huff_dc[4]; stbi__huffman huff_ac[4]; stbi__uint16 dequant[4][64]; stbi__int16 fast_ac[4][1 << FAST_BITS]; // sizes for components, interleaved MCUs int img_h_max, img_v_max; int img_mcu_x, img_mcu_y; int img_mcu_w, img_mcu_h; // definition of jpeg image component struct { int id; int h,v; int tq; int hd,ha; int dc_pred; int x,y,w2,h2; stbi_uc *data; void *raw_data, *raw_coeff; stbi_uc *linebuf; short *coeff; // progressive only int coeff_w, coeff_h; // number of 8x8 coefficient blocks } img_comp[4]; stbi__uint32 code_buffer; // jpeg entropy-coded buffer int code_bits; // number of valid bits unsigned char marker; // marker seen while filling entropy buffer int nomore; // flag if we saw a marker so must stop int progressive; int spec_start; int spec_end; int succ_high; int succ_low; int eob_run; int jfif; int app14_color_transform; // Adobe APP14 tag int rgb; int scan_n, order[4]; int restart_interval, todo; // kernels void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]); void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step); stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs); } stbi__jpeg; static int stbi__build_huffman(stbi__huffman *h, int *count) { int i,j,k=0; unsigned int code; // build size list for each symbol (from JPEG spec) for (i=0; i < 16; ++i) { for (j=0; j < count[i]; ++j) { h->size[k++] = (stbi_uc) (i+1); if(k >= 257) return stbi__err("bad size list","Corrupt JPEG"); } } h->size[k] = 0; // compute actual symbols (from jpeg spec) code = 0; k = 0; for(j=1; j <= 16; ++j) { // compute delta to add to code to compute symbol id h->delta[j] = k - code; if (h->size[k] == j) { while (h->size[k] == j) h->code[k++] = (stbi__uint16) (code++); if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG"); } // compute largest code + 1 for this size, preshifted as needed later h->maxcode[j] = code << (16-j); code <<= 1; } h->maxcode[j] = 0xffffffff; // build non-spec acceleration table; 255 is flag for not-accelerated memset(h->fast, 255, 1 << FAST_BITS); for (i=0; i < k; ++i) { int s = h->size[i]; if (s <= FAST_BITS) { int c = h->code[i] << (FAST_BITS-s); int m = 1 << (FAST_BITS-s); for (j=0; j < m; ++j) { h->fast[c+j] = (stbi_uc) i; } } } return 1; } // build a table that decodes both magnitude and value of small ACs in // one go. static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h) { int i; for (i=0; i < (1 << FAST_BITS); ++i) { stbi_uc fast = h->fast[i]; fast_ac[i] = 0; if (fast < 255) { int rs = h->values[fast]; int run = (rs >> 4) & 15; int magbits = rs & 15; int len = h->size[fast]; if (magbits && len + magbits <= FAST_BITS) { // magnitude code followed by receive_extend code int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits); int m = 1 << (magbits - 1); if (k < m) k += (~0U << magbits) + 1; // if the result is small enough, we can fit it in fast_ac table if (k >= -128 && k <= 127) fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits)); } } } } static void stbi__grow_buffer_unsafe(stbi__jpeg *j) { do { unsigned int b = j->nomore ? 0 : stbi__get8(j->s); if (b == 0xff) { int c = stbi__get8(j->s); while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes if (c != 0) { j->marker = (unsigned char) c; j->nomore = 1; return; } } j->code_buffer |= b << (24 - j->code_bits); j->code_bits += 8; } while (j->code_bits <= 24); } // (1 << n) - 1 static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; // decode a jpeg huffman value from the bitstream stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h) { unsigned int temp; int c,k; if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); // look at the top FAST_BITS and determine what symbol ID it is, // if the code is <= FAST_BITS c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); k = h->fast[c]; if (k < 255) { int s = h->size[k]; if (s > j->code_bits) return -1; j->code_buffer <<= s; j->code_bits -= s; return h->values[k]; } // naive test is to shift the code_buffer down so k bits are // valid, then test against maxcode. To speed this up, we've // preshifted maxcode left so that it has (16-k) 0s at the // end; in other words, regardless of the number of bits, it // wants to be compared against something shifted to have 16; // that way we don't need to shift inside the loop. temp = j->code_buffer >> 16; for (k=FAST_BITS+1 ; ; ++k) if (temp < h->maxcode[k]) break; if (k == 17) { // error! code not found j->code_bits -= 16; return -1; } if (k > j->code_bits) return -1; // convert the huffman code to the symbol id c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k]; if(c < 0 || c >= 256) // symbol id out of bounds! return -1; STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]); // convert the id to a symbol j->code_bits -= k; j->code_buffer <<= k; return h->values[c]; } // bias[n] = (-1<code_bits < n) stbi__grow_buffer_unsafe(j); if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative) k = stbi_lrot(j->code_buffer, n); j->code_buffer = k & ~stbi__bmask[n]; k &= stbi__bmask[n]; j->code_bits -= n; return k + (stbi__jbias[n] & (sgn - 1)); } // get some unsigned bits stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n) { unsigned int k; if (j->code_bits < n) stbi__grow_buffer_unsafe(j); if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing k = stbi_lrot(j->code_buffer, n); j->code_buffer = k & ~stbi__bmask[n]; k &= stbi__bmask[n]; j->code_bits -= n; return k; } stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j) { unsigned int k; if (j->code_bits < 1) stbi__grow_buffer_unsafe(j); if (j->code_bits < 1) return 0; // ran out of bits from stream, return 0s intead of continuing k = j->code_buffer; j->code_buffer <<= 1; --j->code_bits; return k & 0x80000000; } // given a value that's at position X in the zigzag stream, // where does it appear in the 8x8 matrix coded as row-major? static const stbi_uc stbi__jpeg_dezigzag[64+15] = { 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, // let corrupt input sample past end 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63, 63 }; // decode one 64-entry block-- static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant) { int diff,dc,k; int t; if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); t = stbi__jpeg_huff_decode(j, hdc); if (t < 0 || t > 15) return stbi__err("bad huffman code","Corrupt JPEG"); // 0 all the ac values now so we can do it 32-bits at a time memset(data,0,64*sizeof(data[0])); diff = t ? stbi__extend_receive(j, t) : 0; if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta","Corrupt JPEG"); dc = j->img_comp[b].dc_pred + diff; j->img_comp[b].dc_pred = dc; if (!stbi__mul2shorts_valid(dc, dequant[0])) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); data[0] = (short) (dc * dequant[0]); // decode AC components, see JPEG spec k = 1; do { unsigned int zig; int c,r,s; if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); r = fac[c]; if (r) { // fast-AC path k += (r >> 4) & 15; // run s = r & 15; // combined length if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available"); j->code_buffer <<= s; j->code_bits -= s; // decode into unzigzag'd location zig = stbi__jpeg_dezigzag[k++]; data[zig] = (short) ((r >> 8) * dequant[zig]); } else { int rs = stbi__jpeg_huff_decode(j, hac); if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); s = rs & 15; r = rs >> 4; if (s == 0) { if (rs != 0xf0) break; // end block k += 16; } else { k += r; // decode into unzigzag'd location zig = stbi__jpeg_dezigzag[k++]; data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]); } } } while (k < 64); return 1; } static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b) { int diff,dc; int t; if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); if (j->succ_high == 0) { // first scan for DC coefficient, must be first memset(data,0,64*sizeof(data[0])); // 0 all the ac values now t = stbi__jpeg_huff_decode(j, hdc); if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); diff = t ? stbi__extend_receive(j, t) : 0; if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta", "Corrupt JPEG"); dc = j->img_comp[b].dc_pred + diff; j->img_comp[b].dc_pred = dc; if (!stbi__mul2shorts_valid(dc, 1 << j->succ_low)) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); data[0] = (short) (dc * (1 << j->succ_low)); } else { // refinement scan for DC coefficient if (stbi__jpeg_get_bit(j)) data[0] += (short) (1 << j->succ_low); } return 1; } // @OPTIMIZE: store non-zigzagged during the decode passes, // and only de-zigzag when dequantizing static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac) { int k; if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG"); if (j->succ_high == 0) { int shift = j->succ_low; if (j->eob_run) { --j->eob_run; return 1; } k = j->spec_start; do { unsigned int zig; int c,r,s; if (j->code_bits < 16) stbi__grow_buffer_unsafe(j); c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); r = fac[c]; if (r) { // fast-AC path k += (r >> 4) & 15; // run s = r & 15; // combined length if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available"); j->code_buffer <<= s; j->code_bits -= s; zig = stbi__jpeg_dezigzag[k++]; data[zig] = (short) ((r >> 8) * (1 << shift)); } else { int rs = stbi__jpeg_huff_decode(j, hac); if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); s = rs & 15; r = rs >> 4; if (s == 0) { if (r < 15) { j->eob_run = (1 << r); if (r) j->eob_run += stbi__jpeg_get_bits(j, r); --j->eob_run; break; } k += 16; } else { k += r; zig = stbi__jpeg_dezigzag[k++]; data[zig] = (short) (stbi__extend_receive(j,s) * (1 << shift)); } } } while (k <= j->spec_end); } else { // refinement scan for these AC coefficients short bit = (short) (1 << j->succ_low); if (j->eob_run) { --j->eob_run; for (k = j->spec_start; k <= j->spec_end; ++k) { short *p = &data[stbi__jpeg_dezigzag[k]]; if (*p != 0) if (stbi__jpeg_get_bit(j)) if ((*p & bit)==0) { if (*p > 0) *p += bit; else *p -= bit; } } } else { k = j->spec_start; do { int r,s; int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG"); s = rs & 15; r = rs >> 4; if (s == 0) { if (r < 15) { j->eob_run = (1 << r) - 1; if (r) j->eob_run += stbi__jpeg_get_bits(j, r); r = 64; // force end of block } else { // r=15 s=0 should write 16 0s, so we just do // a run of 15 0s and then write s (which is 0), // so we don't have to do anything special here } } else { if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG"); // sign bit if (stbi__jpeg_get_bit(j)) s = bit; else s = -bit; } // advance by r while (k <= j->spec_end) { short *p = &data[stbi__jpeg_dezigzag[k++]]; if (*p != 0) { if (stbi__jpeg_get_bit(j)) if ((*p & bit)==0) { if (*p > 0) *p += bit; else *p -= bit; } } else { if (r == 0) { *p = (short) s; break; } --r; } } } while (k <= j->spec_end); } } return 1; } // take a -128..127 value and stbi__clamp it and convert to 0..255 stbi_inline static stbi_uc stbi__clamp(int x) { // trick to use a single test to catch both cases if ((unsigned int) x > 255) { if (x < 0) return 0; if (x > 255) return 255; } return (stbi_uc) x; } #define stbi__f2f(x) ((int) (((x) * 4096 + 0.5))) #define stbi__fsh(x) ((x) * 4096) // derived from jidctint -- DCT_ISLOW #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ p2 = s2; \ p3 = s6; \ p1 = (p2+p3) * stbi__f2f(0.5411961f); \ t2 = p1 + p3*stbi__f2f(-1.847759065f); \ t3 = p1 + p2*stbi__f2f( 0.765366865f); \ p2 = s0; \ p3 = s4; \ t0 = stbi__fsh(p2+p3); \ t1 = stbi__fsh(p2-p3); \ x0 = t0+t3; \ x3 = t0-t3; \ x1 = t1+t2; \ x2 = t1-t2; \ t0 = s7; \ t1 = s5; \ t2 = s3; \ t3 = s1; \ p3 = t0+t2; \ p4 = t1+t3; \ p1 = t0+t3; \ p2 = t1+t2; \ p5 = (p3+p4)*stbi__f2f( 1.175875602f); \ t0 = t0*stbi__f2f( 0.298631336f); \ t1 = t1*stbi__f2f( 2.053119869f); \ t2 = t2*stbi__f2f( 3.072711026f); \ t3 = t3*stbi__f2f( 1.501321110f); \ p1 = p5 + p1*stbi__f2f(-0.899976223f); \ p2 = p5 + p2*stbi__f2f(-2.562915447f); \ p3 = p3*stbi__f2f(-1.961570560f); \ p4 = p4*stbi__f2f(-0.390180644f); \ t3 += p1+p4; \ t2 += p2+p3; \ t1 += p2+p4; \ t0 += p1+p3; static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64]) { int i,val[64],*v=val; stbi_uc *o; short *d = data; // columns for (i=0; i < 8; ++i,++d, ++v) { // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 && d[40]==0 && d[48]==0 && d[56]==0) { // no shortcut 0 seconds // (1|2|3|4|5|6|7)==0 0 seconds // all separate -0.047 seconds // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds int dcterm = d[0]*4; v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; } else { STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56]) // constants scaled things up by 1<<12; let's bring them back // down, but keep 2 extra bits of precision x0 += 512; x1 += 512; x2 += 512; x3 += 512; v[ 0] = (x0+t3) >> 10; v[56] = (x0-t3) >> 10; v[ 8] = (x1+t2) >> 10; v[48] = (x1-t2) >> 10; v[16] = (x2+t1) >> 10; v[40] = (x2-t1) >> 10; v[24] = (x3+t0) >> 10; v[32] = (x3-t0) >> 10; } } for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { // no fast case since the first 1D IDCT spread components out STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) // constants scaled things up by 1<<12, plus we had 1<<2 from first // loop, plus horizontal and vertical each scale by sqrt(8) so together // we've got an extra 1<<3, so 1<<17 total we need to remove. // so we want to round that, which means adding 0.5 * 1<<17, // aka 65536. Also, we'll end up with -128 to 127 that we want // to encode as 0..255 by adding 128, so we'll add that before the shift x0 += 65536 + (128<<17); x1 += 65536 + (128<<17); x2 += 65536 + (128<<17); x3 += 65536 + (128<<17); // tried computing the shifts into temps, or'ing the temps to see // if any were out of range, but that was slower o[0] = stbi__clamp((x0+t3) >> 17); o[7] = stbi__clamp((x0-t3) >> 17); o[1] = stbi__clamp((x1+t2) >> 17); o[6] = stbi__clamp((x1-t2) >> 17); o[2] = stbi__clamp((x2+t1) >> 17); o[5] = stbi__clamp((x2-t1) >> 17); o[3] = stbi__clamp((x3+t0) >> 17); o[4] = stbi__clamp((x3-t0) >> 17); } } #ifdef STBI_SSE2 // sse2 integer IDCT. not the fastest possible implementation but it // produces bit-identical results to the generic C version so it's // fully "transparent". static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) { // This is constructed to match our regular (generic) integer IDCT exactly. __m128i row0, row1, row2, row3, row4, row5, row6, row7; __m128i tmp; // dot product constant: even elems=x, odd elems=y #define dct_const(x,y) _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y)) // out(0) = c0[even]*x + c0[odd]*y (c0, x, y 16-bit, out 32-bit) // out(1) = c1[even]*x + c1[odd]*y #define dct_rot(out0,out1, x,y,c0,c1) \ __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \ __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \ __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \ __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \ __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \ __m128i out1##_h = _mm_madd_epi16(c0##hi, c1) // out = in << 12 (in 16-bit, out 32-bit) #define dct_widen(out, in) \ __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \ __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4) // wide add #define dct_wadd(out, a, b) \ __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \ __m128i out##_h = _mm_add_epi32(a##_h, b##_h) // wide sub #define dct_wsub(out, a, b) \ __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \ __m128i out##_h = _mm_sub_epi32(a##_h, b##_h) // butterfly a/b, add bias, then shift by "s" and pack #define dct_bfly32o(out0, out1, a,b,bias,s) \ { \ __m128i abiased_l = _mm_add_epi32(a##_l, bias); \ __m128i abiased_h = _mm_add_epi32(a##_h, bias); \ dct_wadd(sum, abiased, b); \ dct_wsub(dif, abiased, b); \ out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \ out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \ } // 8-bit interleave step (for transposes) #define dct_interleave8(a, b) \ tmp = a; \ a = _mm_unpacklo_epi8(a, b); \ b = _mm_unpackhi_epi8(tmp, b) // 16-bit interleave step (for transposes) #define dct_interleave16(a, b) \ tmp = a; \ a = _mm_unpacklo_epi16(a, b); \ b = _mm_unpackhi_epi16(tmp, b) #define dct_pass(bias,shift) \ { \ /* even part */ \ dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \ __m128i sum04 = _mm_add_epi16(row0, row4); \ __m128i dif04 = _mm_sub_epi16(row0, row4); \ dct_widen(t0e, sum04); \ dct_widen(t1e, dif04); \ dct_wadd(x0, t0e, t3e); \ dct_wsub(x3, t0e, t3e); \ dct_wadd(x1, t1e, t2e); \ dct_wsub(x2, t1e, t2e); \ /* odd part */ \ dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \ dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \ __m128i sum17 = _mm_add_epi16(row1, row7); \ __m128i sum35 = _mm_add_epi16(row3, row5); \ dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \ dct_wadd(x4, y0o, y4o); \ dct_wadd(x5, y1o, y5o); \ dct_wadd(x6, y2o, y5o); \ dct_wadd(x7, y3o, y4o); \ dct_bfly32o(row0,row7, x0,x7,bias,shift); \ dct_bfly32o(row1,row6, x1,x6,bias,shift); \ dct_bfly32o(row2,row5, x2,x5,bias,shift); \ dct_bfly32o(row3,row4, x3,x4,bias,shift); \ } __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f)); __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f)); __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f)); __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f)); __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f)); __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f)); __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f)); __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f)); // rounding biases in column/row passes, see stbi__idct_block for explanation. __m128i bias_0 = _mm_set1_epi32(512); __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17)); // load row0 = _mm_load_si128((const __m128i *) (data + 0*8)); row1 = _mm_load_si128((const __m128i *) (data + 1*8)); row2 = _mm_load_si128((const __m128i *) (data + 2*8)); row3 = _mm_load_si128((const __m128i *) (data + 3*8)); row4 = _mm_load_si128((const __m128i *) (data + 4*8)); row5 = _mm_load_si128((const __m128i *) (data + 5*8)); row6 = _mm_load_si128((const __m128i *) (data + 6*8)); row7 = _mm_load_si128((const __m128i *) (data + 7*8)); // column pass dct_pass(bias_0, 10); { // 16bit 8x8 transpose pass 1 dct_interleave16(row0, row4); dct_interleave16(row1, row5); dct_interleave16(row2, row6); dct_interleave16(row3, row7); // transpose pass 2 dct_interleave16(row0, row2); dct_interleave16(row1, row3); dct_interleave16(row4, row6); dct_interleave16(row5, row7); // transpose pass 3 dct_interleave16(row0, row1); dct_interleave16(row2, row3); dct_interleave16(row4, row5); dct_interleave16(row6, row7); } // row pass dct_pass(bias_1, 17); { // pack __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7 __m128i p1 = _mm_packus_epi16(row2, row3); __m128i p2 = _mm_packus_epi16(row4, row5); __m128i p3 = _mm_packus_epi16(row6, row7); // 8bit 8x8 transpose pass 1 dct_interleave8(p0, p2); // a0e0a1e1... dct_interleave8(p1, p3); // c0g0c1g1... // transpose pass 2 dct_interleave8(p0, p1); // a0c0e0g0... dct_interleave8(p2, p3); // b0d0f0h0... // transpose pass 3 dct_interleave8(p0, p2); // a0b0c0d0... dct_interleave8(p1, p3); // a4b4c4d4... // store _mm_storel_epi64((__m128i *) out, p0); out += out_stride; _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride; _mm_storel_epi64((__m128i *) out, p2); out += out_stride; _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride; _mm_storel_epi64((__m128i *) out, p1); out += out_stride; _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride; _mm_storel_epi64((__m128i *) out, p3); out += out_stride; _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e)); } #undef dct_const #undef dct_rot #undef dct_widen #undef dct_wadd #undef dct_wsub #undef dct_bfly32o #undef dct_interleave8 #undef dct_interleave16 #undef dct_pass } #endif // STBI_SSE2 #ifdef STBI_NEON // NEON integer IDCT. should produce bit-identical // results to the generic C version. static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64]) { int16x8_t row0, row1, row2, row3, row4, row5, row6, row7; int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f)); int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f)); int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f)); int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f)); int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f)); int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f)); int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f)); int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f)); int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f)); int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f)); int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f)); int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f)); #define dct_long_mul(out, inq, coeff) \ int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \ int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff) #define dct_long_mac(out, acc, inq, coeff) \ int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \ int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff) #define dct_widen(out, inq) \ int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \ int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12) // wide add #define dct_wadd(out, a, b) \ int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \ int32x4_t out##_h = vaddq_s32(a##_h, b##_h) // wide sub #define dct_wsub(out, a, b) \ int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \ int32x4_t out##_h = vsubq_s32(a##_h, b##_h) // butterfly a/b, then shift using "shiftop" by "s" and pack #define dct_bfly32o(out0,out1, a,b,shiftop,s) \ { \ dct_wadd(sum, a, b); \ dct_wsub(dif, a, b); \ out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \ out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \ } #define dct_pass(shiftop, shift) \ { \ /* even part */ \ int16x8_t sum26 = vaddq_s16(row2, row6); \ dct_long_mul(p1e, sum26, rot0_0); \ dct_long_mac(t2e, p1e, row6, rot0_1); \ dct_long_mac(t3e, p1e, row2, rot0_2); \ int16x8_t sum04 = vaddq_s16(row0, row4); \ int16x8_t dif04 = vsubq_s16(row0, row4); \ dct_widen(t0e, sum04); \ dct_widen(t1e, dif04); \ dct_wadd(x0, t0e, t3e); \ dct_wsub(x3, t0e, t3e); \ dct_wadd(x1, t1e, t2e); \ dct_wsub(x2, t1e, t2e); \ /* odd part */ \ int16x8_t sum15 = vaddq_s16(row1, row5); \ int16x8_t sum17 = vaddq_s16(row1, row7); \ int16x8_t sum35 = vaddq_s16(row3, row5); \ int16x8_t sum37 = vaddq_s16(row3, row7); \ int16x8_t sumodd = vaddq_s16(sum17, sum35); \ dct_long_mul(p5o, sumodd, rot1_0); \ dct_long_mac(p1o, p5o, sum17, rot1_1); \ dct_long_mac(p2o, p5o, sum35, rot1_2); \ dct_long_mul(p3o, sum37, rot2_0); \ dct_long_mul(p4o, sum15, rot2_1); \ dct_wadd(sump13o, p1o, p3o); \ dct_wadd(sump24o, p2o, p4o); \ dct_wadd(sump23o, p2o, p3o); \ dct_wadd(sump14o, p1o, p4o); \ dct_long_mac(x4, sump13o, row7, rot3_0); \ dct_long_mac(x5, sump24o, row5, rot3_1); \ dct_long_mac(x6, sump23o, row3, rot3_2); \ dct_long_mac(x7, sump14o, row1, rot3_3); \ dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \ dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \ dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \ dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \ } // load row0 = vld1q_s16(data + 0*8); row1 = vld1q_s16(data + 1*8); row2 = vld1q_s16(data + 2*8); row3 = vld1q_s16(data + 3*8); row4 = vld1q_s16(data + 4*8); row5 = vld1q_s16(data + 5*8); row6 = vld1q_s16(data + 6*8); row7 = vld1q_s16(data + 7*8); // add DC bias row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0)); // column pass dct_pass(vrshrn_n_s32, 10); // 16bit 8x8 transpose { // these three map to a single VTRN.16, VTRN.32, and VSWP, respectively. // whether compilers actually get this is another story, sadly. #define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; } #define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); } #define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); } // pass 1 dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6 dct_trn16(row2, row3); dct_trn16(row4, row5); dct_trn16(row6, row7); // pass 2 dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4 dct_trn32(row1, row3); dct_trn32(row4, row6); dct_trn32(row5, row7); // pass 3 dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0 dct_trn64(row1, row5); dct_trn64(row2, row6); dct_trn64(row3, row7); #undef dct_trn16 #undef dct_trn32 #undef dct_trn64 } // row pass // vrshrn_n_s32 only supports shifts up to 16, we need // 17. so do a non-rounding shift of 16 first then follow // up with a rounding shift by 1. dct_pass(vshrn_n_s32, 16); { // pack and round uint8x8_t p0 = vqrshrun_n_s16(row0, 1); uint8x8_t p1 = vqrshrun_n_s16(row1, 1); uint8x8_t p2 = vqrshrun_n_s16(row2, 1); uint8x8_t p3 = vqrshrun_n_s16(row3, 1); uint8x8_t p4 = vqrshrun_n_s16(row4, 1); uint8x8_t p5 = vqrshrun_n_s16(row5, 1); uint8x8_t p6 = vqrshrun_n_s16(row6, 1); uint8x8_t p7 = vqrshrun_n_s16(row7, 1); // again, these can translate into one instruction, but often don't. #define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; } #define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); } #define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); } // sadly can't use interleaved stores here since we only write // 8 bytes to each scan line! // 8x8 8-bit transpose pass 1 dct_trn8_8(p0, p1); dct_trn8_8(p2, p3); dct_trn8_8(p4, p5); dct_trn8_8(p6, p7); // pass 2 dct_trn8_16(p0, p2); dct_trn8_16(p1, p3); dct_trn8_16(p4, p6); dct_trn8_16(p5, p7); // pass 3 dct_trn8_32(p0, p4); dct_trn8_32(p1, p5); dct_trn8_32(p2, p6); dct_trn8_32(p3, p7); // store vst1_u8(out, p0); out += out_stride; vst1_u8(out, p1); out += out_stride; vst1_u8(out, p2); out += out_stride; vst1_u8(out, p3); out += out_stride; vst1_u8(out, p4); out += out_stride; vst1_u8(out, p5); out += out_stride; vst1_u8(out, p6); out += out_stride; vst1_u8(out, p7); #undef dct_trn8_8 #undef dct_trn8_16 #undef dct_trn8_32 } #undef dct_long_mul #undef dct_long_mac #undef dct_widen #undef dct_wadd #undef dct_wsub #undef dct_bfly32o #undef dct_pass } #endif // STBI_NEON #define STBI__MARKER_none 0xff // if there's a pending marker from the entropy stream, return that // otherwise, fetch from the stream and get a marker. if there's no // marker, return 0xff, which is never a valid marker value static stbi_uc stbi__get_marker(stbi__jpeg *j) { stbi_uc x; if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; } x = stbi__get8(j->s); if (x != 0xff) return STBI__MARKER_none; while (x == 0xff) x = stbi__get8(j->s); // consume repeated 0xff fill bytes return x; } // in each scan, we'll have scan_n components, and the order // of the components is specified by order[] #define STBI__RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) // after a restart interval, stbi__jpeg_reset the entropy decoder and // the dc prediction static void stbi__jpeg_reset(stbi__jpeg *j) { j->code_bits = 0; j->code_buffer = 0; j->nomore = 0; j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0; j->marker = STBI__MARKER_none; j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; j->eob_run = 0; // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, // since we don't even allow 1<<30 pixels } static int stbi__parse_entropy_coded_data(stbi__jpeg *z) { stbi__jpeg_reset(z); if (!z->progressive) { if (z->scan_n == 1) { int i,j; STBI_SIMD_ALIGN(short, data[64]); int n = z->order[0]; // non-interleaved data, we just need to process one block at a time, // in trivial scanline order // number of blocks to do just depends on how many actual "pixels" this // component has, independent of interleaved MCU blocking and such int w = (z->img_comp[n].x+7) >> 3; int h = (z->img_comp[n].y+7) >> 3; for (j=0; j < h; ++j) { for (i=0; i < w; ++i) { int ha = z->img_comp[n].ha; if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); // every data block is an MCU, so countdown the restart interval if (--z->todo <= 0) { if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); // if it's NOT a restart, then just bail, so we get corrupt data // rather than no data if (!STBI__RESTART(z->marker)) return 1; stbi__jpeg_reset(z); } } } return 1; } else { // interleaved int i,j,k,x,y; STBI_SIMD_ALIGN(short, data[64]); for (j=0; j < z->img_mcu_y; ++j) { for (i=0; i < z->img_mcu_x; ++i) { // scan an interleaved mcu... process scan_n components in order for (k=0; k < z->scan_n; ++k) { int n = z->order[k]; // scan out an mcu's worth of this component; that's just determined // by the basic H and V specified for the component for (y=0; y < z->img_comp[n].v; ++y) { for (x=0; x < z->img_comp[n].h; ++x) { int x2 = (i*z->img_comp[n].h + x)*8; int y2 = (j*z->img_comp[n].v + y)*8; int ha = z->img_comp[n].ha; if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0; z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data); } } } // after all interleaved components, that's an interleaved MCU, // so now count down the restart interval if (--z->todo <= 0) { if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); if (!STBI__RESTART(z->marker)) return 1; stbi__jpeg_reset(z); } } } return 1; } } else { if (z->scan_n == 1) { int i,j; int n = z->order[0]; // non-interleaved data, we just need to process one block at a time, // in trivial scanline order // number of blocks to do just depends on how many actual "pixels" this // component has, independent of interleaved MCU blocking and such int w = (z->img_comp[n].x+7) >> 3; int h = (z->img_comp[n].y+7) >> 3; for (j=0; j < h; ++j) { for (i=0; i < w; ++i) { short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); if (z->spec_start == 0) { if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) return 0; } else { int ha = z->img_comp[n].ha; if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha])) return 0; } // every data block is an MCU, so countdown the restart interval if (--z->todo <= 0) { if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); if (!STBI__RESTART(z->marker)) return 1; stbi__jpeg_reset(z); } } } return 1; } else { // interleaved int i,j,k,x,y; for (j=0; j < z->img_mcu_y; ++j) { for (i=0; i < z->img_mcu_x; ++i) { // scan an interleaved mcu... process scan_n components in order for (k=0; k < z->scan_n; ++k) { int n = z->order[k]; // scan out an mcu's worth of this component; that's just determined // by the basic H and V specified for the component for (y=0; y < z->img_comp[n].v; ++y) { for (x=0; x < z->img_comp[n].h; ++x) { int x2 = (i*z->img_comp[n].h + x); int y2 = (j*z->img_comp[n].v + y); short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w); if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n)) return 0; } } } // after all interleaved components, that's an interleaved MCU, // so now count down the restart interval if (--z->todo <= 0) { if (z->code_bits < 24) stbi__grow_buffer_unsafe(z); if (!STBI__RESTART(z->marker)) return 1; stbi__jpeg_reset(z); } } } return 1; } } } static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant) { int i; for (i=0; i < 64; ++i) data[i] *= dequant[i]; } static void stbi__jpeg_finish(stbi__jpeg *z) { if (z->progressive) { // dequantize and idct the data int i,j,n; for (n=0; n < z->s->img_n; ++n) { int w = (z->img_comp[n].x+7) >> 3; int h = (z->img_comp[n].y+7) >> 3; for (j=0; j < h; ++j) { for (i=0; i < w; ++i) { short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w); stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]); z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data); } } } } } static int stbi__process_marker(stbi__jpeg *z, int m) { int L; switch (m) { case STBI__MARKER_none: // no marker found return stbi__err("expected marker","Corrupt JPEG"); case 0xDD: // DRI - specify restart interval if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG"); z->restart_interval = stbi__get16be(z->s); return 1; case 0xDB: // DQT - define quantization table L = stbi__get16be(z->s)-2; while (L > 0) { int q = stbi__get8(z->s); int p = q >> 4, sixteen = (p != 0); int t = q & 15,i; if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG"); if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG"); for (i=0; i < 64; ++i) z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s)); L -= (sixteen ? 129 : 65); } return L==0; case 0xC4: // DHT - define huffman table L = stbi__get16be(z->s)-2; while (L > 0) { stbi_uc *v; int sizes[16],i,n=0; int q = stbi__get8(z->s); int tc = q >> 4; int th = q & 15; if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG"); for (i=0; i < 16; ++i) { sizes[i] = stbi__get8(z->s); n += sizes[i]; } if(n > 256) return stbi__err("bad DHT header","Corrupt JPEG"); // Loop over i < n would write past end of values! L -= 17; if (tc == 0) { if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0; v = z->huff_dc[th].values; } else { if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0; v = z->huff_ac[th].values; } for (i=0; i < n; ++i) v[i] = stbi__get8(z->s); if (tc != 0) stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th); L -= n; } return L==0; } // check for comment block or APP blocks if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { L = stbi__get16be(z->s); if (L < 2) { if (m == 0xFE) return stbi__err("bad COM len","Corrupt JPEG"); else return stbi__err("bad APP len","Corrupt JPEG"); } L -= 2; if (m == 0xE0 && L >= 5) { // JFIF APP0 segment static const unsigned char tag[5] = {'J','F','I','F','\0'}; int ok = 1; int i; for (i=0; i < 5; ++i) if (stbi__get8(z->s) != tag[i]) ok = 0; L -= 5; if (ok) z->jfif = 1; } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment static const unsigned char tag[6] = {'A','d','o','b','e','\0'}; int ok = 1; int i; for (i=0; i < 6; ++i) if (stbi__get8(z->s) != tag[i]) ok = 0; L -= 6; if (ok) { stbi__get8(z->s); // version stbi__get16be(z->s); // flags0 stbi__get16be(z->s); // flags1 z->app14_color_transform = stbi__get8(z->s); // color transform L -= 6; } } stbi__skip(z->s, L); return 1; } return stbi__err("unknown marker","Corrupt JPEG"); } // after we see SOS static int stbi__process_scan_header(stbi__jpeg *z) { int i; int Ls = stbi__get16be(z->s); z->scan_n = stbi__get8(z->s); if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG"); if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG"); for (i=0; i < z->scan_n; ++i) { int id = stbi__get8(z->s), which; int q = stbi__get8(z->s); for (which = 0; which < z->s->img_n; ++which) if (z->img_comp[which].id == id) break; if (which == z->s->img_n) return 0; // no match z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG"); z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG"); z->order[i] = which; } { int aa; z->spec_start = stbi__get8(z->s); z->spec_end = stbi__get8(z->s); // should be 63, but might be 0 aa = stbi__get8(z->s); z->succ_high = (aa >> 4); z->succ_low = (aa & 15); if (z->progressive) { if (z->spec_start > 63 || z->spec_end > 63 || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13) return stbi__err("bad SOS", "Corrupt JPEG"); } else { if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG"); if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG"); z->spec_end = 63; } } return 1; } static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why) { int i; for (i=0; i < ncomp; ++i) { if (z->img_comp[i].raw_data) { STBI_FREE(z->img_comp[i].raw_data); z->img_comp[i].raw_data = NULL; z->img_comp[i].data = NULL; } if (z->img_comp[i].raw_coeff) { STBI_FREE(z->img_comp[i].raw_coeff); z->img_comp[i].raw_coeff = 0; z->img_comp[i].coeff = 0; } if (z->img_comp[i].linebuf) { STBI_FREE(z->img_comp[i].linebuf); z->img_comp[i].linebuf = NULL; } } return why; } static int stbi__process_frame_header(stbi__jpeg *z, int scan) { stbi__context *s = z->s; int Lf,p,i,q, h_max=1,v_max=1,c; Lf = stbi__get16be(s); if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG p = stbi__get8(s); if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline s->img_y = stbi__get16be(s); if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG s->img_x = stbi__get16be(s); if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); c = stbi__get8(s); if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG"); s->img_n = c; for (i=0; i < c; ++i) { z->img_comp[i].data = NULL; z->img_comp[i].linebuf = NULL; } if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG"); z->rgb = 0; for (i=0; i < s->img_n; ++i) { static const unsigned char rgb[3] = { 'R', 'G', 'B' }; z->img_comp[i].id = stbi__get8(s); if (s->img_n == 3 && z->img_comp[i].id == rgb[i]) ++z->rgb; q = stbi__get8(s); z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG"); z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG"); z->img_comp[i].tq = stbi__get8(s); if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG"); } if (scan != STBI__SCAN_load) return 1; if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode"); for (i=0; i < s->img_n; ++i) { if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; } // check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios // and I've never seen a non-corrupted JPEG file actually use them for (i=0; i < s->img_n; ++i) { if (h_max % z->img_comp[i].h != 0) return stbi__err("bad H","Corrupt JPEG"); if (v_max % z->img_comp[i].v != 0) return stbi__err("bad V","Corrupt JPEG"); } // compute interleaved mcu info z->img_h_max = h_max; z->img_v_max = v_max; z->img_mcu_w = h_max * 8; z->img_mcu_h = v_max * 8; // these sizes can't be more than 17 bits z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; for (i=0; i < s->img_n; ++i) { // number of effective pixels (e.g. for non-interleaved MCU) z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; // to simplify generation, we'll allocate enough memory to decode // the bogus oversized data from using interleaved MCUs and their // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't // discard the extra data until colorspace conversion // // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier) // so these muls can't overflow with 32-bit ints (which we require) z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; z->img_comp[i].coeff = 0; z->img_comp[i].raw_coeff = 0; z->img_comp[i].linebuf = NULL; z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15); if (z->img_comp[i].raw_data == NULL) return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); // align blocks for idct using mmx/sse z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); if (z->progressive) { // w2, h2 are multiples of 8 (see above) z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8; z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8; z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15); if (z->img_comp[i].raw_coeff == NULL) return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory")); z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15); } } return 1; } // use comparisons since in some cases we handle more than one case (e.g. SOF) #define stbi__DNL(x) ((x) == 0xdc) #define stbi__SOI(x) ((x) == 0xd8) #define stbi__EOI(x) ((x) == 0xd9) #define stbi__SOF(x) ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2) #define stbi__SOS(x) ((x) == 0xda) #define stbi__SOF_progressive(x) ((x) == 0xc2) static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan) { int m; z->jfif = 0; z->app14_color_transform = -1; // valid values are 0,1,2 z->marker = STBI__MARKER_none; // initialize cached marker to empty m = stbi__get_marker(z); if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG"); if (scan == STBI__SCAN_type) return 1; m = stbi__get_marker(z); while (!stbi__SOF(m)) { if (!stbi__process_marker(z,m)) return 0; m = stbi__get_marker(z); while (m == STBI__MARKER_none) { // some files have extra padding after their blocks, so ok, we'll scan if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG"); m = stbi__get_marker(z); } } z->progressive = stbi__SOF_progressive(m); if (!stbi__process_frame_header(z, scan)) return 0; return 1; } static stbi_uc stbi__skip_jpeg_junk_at_end(stbi__jpeg *j) { // some JPEGs have junk at end, skip over it but if we find what looks // like a valid marker, resume there while (!stbi__at_eof(j->s)) { stbi_uc x = stbi__get8(j->s); while (x == 0xff) { // might be a marker if (stbi__at_eof(j->s)) return STBI__MARKER_none; x = stbi__get8(j->s); if (x != 0x00 && x != 0xff) { // not a stuffed zero or lead-in to another marker, looks // like an actual marker, return it return x; } // stuffed zero has x=0 now which ends the loop, meaning we go // back to regular scan loop. // repeated 0xff keeps trying to read the next byte of the marker. } } return STBI__MARKER_none; } // decode image to YCbCr format static int stbi__decode_jpeg_image(stbi__jpeg *j) { int m; for (m = 0; m < 4; m++) { j->img_comp[m].raw_data = NULL; j->img_comp[m].raw_coeff = NULL; } j->restart_interval = 0; if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0; m = stbi__get_marker(j); while (!stbi__EOI(m)) { if (stbi__SOS(m)) { if (!stbi__process_scan_header(j)) return 0; if (!stbi__parse_entropy_coded_data(j)) return 0; if (j->marker == STBI__MARKER_none ) { j->marker = stbi__skip_jpeg_junk_at_end(j); // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0 } m = stbi__get_marker(j); if (STBI__RESTART(m)) m = stbi__get_marker(j); } else if (stbi__DNL(m)) { int Ld = stbi__get16be(j->s); stbi__uint32 NL = stbi__get16be(j->s); if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG"); if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG"); m = stbi__get_marker(j); } else { if (!stbi__process_marker(j, m)) return 1; m = stbi__get_marker(j); } } if (j->progressive) stbi__jpeg_finish(j); return 1; } // static jfif-centered resampling (across block boundaries) typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1, int w, int hs); #define stbi__div4(x) ((stbi_uc) ((x) >> 2)) static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) { STBI_NOTUSED(out); STBI_NOTUSED(in_far); STBI_NOTUSED(w); STBI_NOTUSED(hs); return in_near; } static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) { // need to generate two samples vertically for every one in input int i; STBI_NOTUSED(hs); for (i=0; i < w; ++i) out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2); return out; } static stbi_uc* stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) { // need to generate two samples horizontally for every one in input int i; stbi_uc *input = in_near; if (w == 1) { // if only one sample, can't do any interpolation out[0] = out[1] = input[0]; return out; } out[0] = input[0]; out[1] = stbi__div4(input[0]*3 + input[1] + 2); for (i=1; i < w-1; ++i) { int n = 3*input[i]+2; out[i*2+0] = stbi__div4(n+input[i-1]); out[i*2+1] = stbi__div4(n+input[i+1]); } out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2); out[i*2+1] = input[w-1]; STBI_NOTUSED(in_far); STBI_NOTUSED(hs); return out; } #define stbi__div16(x) ((stbi_uc) ((x) >> 4)) static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) { // need to generate 2x2 samples for every one in input int i,t0,t1; if (w == 1) { out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); return out; } t1 = 3*in_near[0] + in_far[0]; out[0] = stbi__div4(t1+2); for (i=1; i < w; ++i) { t0 = t1; t1 = 3*in_near[i]+in_far[i]; out[i*2-1] = stbi__div16(3*t0 + t1 + 8); out[i*2 ] = stbi__div16(3*t1 + t0 + 8); } out[w*2-1] = stbi__div4(t1+2); STBI_NOTUSED(hs); return out; } #if defined(STBI_SSE2) || defined(STBI_NEON) static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) { // need to generate 2x2 samples for every one in input int i=0,t0,t1; if (w == 1) { out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2); return out; } t1 = 3*in_near[0] + in_far[0]; // process groups of 8 pixels for as long as we can. // note we can't handle the last pixel in a row in this loop // because we need to handle the filter boundary conditions. for (; i < ((w-1) & ~7); i += 8) { #if defined(STBI_SSE2) // load and perform the vertical filtering pass // this uses 3*x + y = 4*x + (y - x) __m128i zero = _mm_setzero_si128(); __m128i farb = _mm_loadl_epi64((__m128i *) (in_far + i)); __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i)); __m128i farw = _mm_unpacklo_epi8(farb, zero); __m128i nearw = _mm_unpacklo_epi8(nearb, zero); __m128i diff = _mm_sub_epi16(farw, nearw); __m128i nears = _mm_slli_epi16(nearw, 2); __m128i curr = _mm_add_epi16(nears, diff); // current row // horizontal filter works the same based on shifted vers of current // row. "prev" is current row shifted right by 1 pixel; we need to // insert the previous pixel value (from t1). // "next" is current row shifted left by 1 pixel, with first pixel // of next block of 8 pixels added in. __m128i prv0 = _mm_slli_si128(curr, 2); __m128i nxt0 = _mm_srli_si128(curr, 2); __m128i prev = _mm_insert_epi16(prv0, t1, 0); __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7); // horizontal filter, polyphase implementation since it's convenient: // even pixels = 3*cur + prev = cur*4 + (prev - cur) // odd pixels = 3*cur + next = cur*4 + (next - cur) // note the shared term. __m128i bias = _mm_set1_epi16(8); __m128i curs = _mm_slli_epi16(curr, 2); __m128i prvd = _mm_sub_epi16(prev, curr); __m128i nxtd = _mm_sub_epi16(next, curr); __m128i curb = _mm_add_epi16(curs, bias); __m128i even = _mm_add_epi16(prvd, curb); __m128i odd = _mm_add_epi16(nxtd, curb); // interleave even and odd pixels, then undo scaling. __m128i int0 = _mm_unpacklo_epi16(even, odd); __m128i int1 = _mm_unpackhi_epi16(even, odd); __m128i de0 = _mm_srli_epi16(int0, 4); __m128i de1 = _mm_srli_epi16(int1, 4); // pack and write output __m128i outv = _mm_packus_epi16(de0, de1); _mm_storeu_si128((__m128i *) (out + i*2), outv); #elif defined(STBI_NEON) // load and perform the vertical filtering pass // this uses 3*x + y = 4*x + (y - x) uint8x8_t farb = vld1_u8(in_far + i); uint8x8_t nearb = vld1_u8(in_near + i); int16x8_t diff = vreinterpretq_s16_u16(vsubl_u8(farb, nearb)); int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2)); int16x8_t curr = vaddq_s16(nears, diff); // current row // horizontal filter works the same based on shifted vers of current // row. "prev" is current row shifted right by 1 pixel; we need to // insert the previous pixel value (from t1). // "next" is current row shifted left by 1 pixel, with first pixel // of next block of 8 pixels added in. int16x8_t prv0 = vextq_s16(curr, curr, 7); int16x8_t nxt0 = vextq_s16(curr, curr, 1); int16x8_t prev = vsetq_lane_s16(t1, prv0, 0); int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7); // horizontal filter, polyphase implementation since it's convenient: // even pixels = 3*cur + prev = cur*4 + (prev - cur) // odd pixels = 3*cur + next = cur*4 + (next - cur) // note the shared term. int16x8_t curs = vshlq_n_s16(curr, 2); int16x8_t prvd = vsubq_s16(prev, curr); int16x8_t nxtd = vsubq_s16(next, curr); int16x8_t even = vaddq_s16(curs, prvd); int16x8_t odd = vaddq_s16(curs, nxtd); // undo scaling and round, then store with even/odd phases interleaved uint8x8x2_t o; o.val[0] = vqrshrun_n_s16(even, 4); o.val[1] = vqrshrun_n_s16(odd, 4); vst2_u8(out + i*2, o); #endif // "previous" value for next iter t1 = 3*in_near[i+7] + in_far[i+7]; } t0 = t1; t1 = 3*in_near[i] + in_far[i]; out[i*2] = stbi__div16(3*t1 + t0 + 8); for (++i; i < w; ++i) { t0 = t1; t1 = 3*in_near[i]+in_far[i]; out[i*2-1] = stbi__div16(3*t0 + t1 + 8); out[i*2 ] = stbi__div16(3*t1 + t0 + 8); } out[w*2-1] = stbi__div4(t1+2); STBI_NOTUSED(hs); return out; } #endif static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs) { // resample with nearest-neighbor int i,j; STBI_NOTUSED(in_far); for (i=0; i < w; ++i) for (j=0; j < hs; ++j) out[i*hs+j] = in_near[i]; return out; } // this is a reduced-precision calculation of YCbCr-to-RGB introduced // to make sure the code produces the same results in both SIMD and scalar #define stbi__float2fixed(x) (((int) ((x) * 4096.0f + 0.5f)) << 8) static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step) { int i; for (i=0; i < count; ++i) { int y_fixed = (y[i] << 20) + (1<<19); // rounding int r,g,b; int cr = pcr[i] - 128; int cb = pcb[i] - 128; r = y_fixed + cr* stbi__float2fixed(1.40200f); g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); b = y_fixed + cb* stbi__float2fixed(1.77200f); r >>= 20; g >>= 20; b >>= 20; if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } out[0] = (stbi_uc)r; out[1] = (stbi_uc)g; out[2] = (stbi_uc)b; out[3] = 255; out += step; } } #if defined(STBI_SSE2) || defined(STBI_NEON) static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step) { int i = 0; #ifdef STBI_SSE2 // step == 3 is pretty ugly on the final interleave, and i'm not convinced // it's useful in practice (you wouldn't use it for textures, for example). // so just accelerate step == 4 case. if (step == 4) { // this is a fairly straightforward implementation and not super-optimized. __m128i signflip = _mm_set1_epi8(-0x80); __m128i cr_const0 = _mm_set1_epi16( (short) ( 1.40200f*4096.0f+0.5f)); __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f)); __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f)); __m128i cb_const1 = _mm_set1_epi16( (short) ( 1.77200f*4096.0f+0.5f)); __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128); __m128i xw = _mm_set1_epi16(255); // alpha channel for (; i+7 < count; i += 8) { // load __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i)); __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i)); __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i)); __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128 __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128 // unpack to short (and left-shift cr, cb by 8) __m128i yw = _mm_unpacklo_epi8(y_bias, y_bytes); __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased); __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased); // color transform __m128i yws = _mm_srli_epi16(yw, 4); __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw); __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw); __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1); __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1); __m128i rws = _mm_add_epi16(cr0, yws); __m128i gwt = _mm_add_epi16(cb0, yws); __m128i bws = _mm_add_epi16(yws, cb1); __m128i gws = _mm_add_epi16(gwt, cr1); // descale __m128i rw = _mm_srai_epi16(rws, 4); __m128i bw = _mm_srai_epi16(bws, 4); __m128i gw = _mm_srai_epi16(gws, 4); // back to byte, set up for transpose __m128i brb = _mm_packus_epi16(rw, bw); __m128i gxb = _mm_packus_epi16(gw, xw); // transpose to interleave channels __m128i t0 = _mm_unpacklo_epi8(brb, gxb); __m128i t1 = _mm_unpackhi_epi8(brb, gxb); __m128i o0 = _mm_unpacklo_epi16(t0, t1); __m128i o1 = _mm_unpackhi_epi16(t0, t1); // store _mm_storeu_si128((__m128i *) (out + 0), o0); _mm_storeu_si128((__m128i *) (out + 16), o1); out += 32; } } #endif #ifdef STBI_NEON // in this version, step=3 support would be easy to add. but is there demand? if (step == 4) { // this is a fairly straightforward implementation and not super-optimized. uint8x8_t signflip = vdup_n_u8(0x80); int16x8_t cr_const0 = vdupq_n_s16( (short) ( 1.40200f*4096.0f+0.5f)); int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f)); int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f)); int16x8_t cb_const1 = vdupq_n_s16( (short) ( 1.77200f*4096.0f+0.5f)); for (; i+7 < count; i += 8) { // load uint8x8_t y_bytes = vld1_u8(y + i); uint8x8_t cr_bytes = vld1_u8(pcr + i); uint8x8_t cb_bytes = vld1_u8(pcb + i); int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip)); int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip)); // expand to s16 int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4)); int16x8_t crw = vshll_n_s8(cr_biased, 7); int16x8_t cbw = vshll_n_s8(cb_biased, 7); // color transform int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0); int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0); int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1); int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1); int16x8_t rws = vaddq_s16(yws, cr0); int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1); int16x8_t bws = vaddq_s16(yws, cb1); // undo scaling, round, convert to byte uint8x8x4_t o; o.val[0] = vqrshrun_n_s16(rws, 4); o.val[1] = vqrshrun_n_s16(gws, 4); o.val[2] = vqrshrun_n_s16(bws, 4); o.val[3] = vdup_n_u8(255); // store, interleaving r/g/b/a vst4_u8(out, o); out += 8*4; } } #endif for (; i < count; ++i) { int y_fixed = (y[i] << 20) + (1<<19); // rounding int r,g,b; int cr = pcr[i] - 128; int cb = pcb[i] - 128; r = y_fixed + cr* stbi__float2fixed(1.40200f); g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000); b = y_fixed + cb* stbi__float2fixed(1.77200f); r >>= 20; g >>= 20; b >>= 20; if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } out[0] = (stbi_uc)r; out[1] = (stbi_uc)g; out[2] = (stbi_uc)b; out[3] = 255; out += step; } } #endif // set up the kernels static void stbi__setup_jpeg(stbi__jpeg *j) { j->idct_block_kernel = stbi__idct_block; j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row; j->resample_row_hv_2_kernel = stbi__resample_row_hv_2; #ifdef STBI_SSE2 if (stbi__sse2_available()) { j->idct_block_kernel = stbi__idct_simd; j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; } #endif #ifdef STBI_NEON j->idct_block_kernel = stbi__idct_simd; j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd; j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd; #endif } // clean up the temporary component buffers static void stbi__cleanup_jpeg(stbi__jpeg *j) { stbi__free_jpeg_components(j, j->s->img_n, 0); } typedef struct { resample_row_func resample; stbi_uc *line0,*line1; int hs,vs; // expansion factor in each axis int w_lores; // horizontal pixels pre-expansion int ystep; // how far through vertical expansion we are int ypos; // which pre-expansion row we're on } stbi__resample; // fast 0..255 * 0..255 => 0..255 rounded multiplication static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y) { unsigned int t = x*y + 128; return (stbi_uc) ((t + (t >>8)) >> 8); } static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) { int n, decode_n, is_rgb; z->s->img_n = 0; // make stbi__cleanup_jpeg safe // validate req_comp if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); // load a jpeg image from whichever source, but leave in YCbCr format if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; } // determine actual number of components to generate n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1; is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif)); if (z->s->img_n == 3 && n < 3 && !is_rgb) decode_n = 1; else decode_n = z->s->img_n; // nothing to do if no components requested; check this now to avoid // accessing uninitialized coutput[0] later if (decode_n <= 0) { stbi__cleanup_jpeg(z); return NULL; } // resample and color-convert { int k; unsigned int i,j; stbi_uc *output; stbi_uc *coutput[4] = { NULL, NULL, NULL, NULL }; stbi__resample res_comp[4]; for (k=0; k < decode_n; ++k) { stbi__resample *r = &res_comp[k]; // allocate line buffer big enough for upsampling off the edges // with upsample factor of 4 z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3); if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } r->hs = z->img_h_max / z->img_comp[k].h; r->vs = z->img_v_max / z->img_comp[k].v; r->ystep = r->vs >> 1; r->w_lores = (z->s->img_x + r->hs-1) / r->hs; r->ypos = 0; r->line0 = r->line1 = z->img_comp[k].data; if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2; else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2; else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel; else r->resample = stbi__resample_row_generic; } // can't error after this so, this is safe output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1); if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); } // now go ahead and resample for (j=0; j < z->s->img_y; ++j) { stbi_uc *out = output + n * z->s->img_x * j; for (k=0; k < decode_n; ++k) { stbi__resample *r = &res_comp[k]; int y_bot = r->ystep >= (r->vs >> 1); coutput[k] = r->resample(z->img_comp[k].linebuf, y_bot ? r->line1 : r->line0, y_bot ? r->line0 : r->line1, r->w_lores, r->hs); if (++r->ystep >= r->vs) { r->ystep = 0; r->line0 = r->line1; if (++r->ypos < z->img_comp[k].y) r->line1 += z->img_comp[k].w2; } } if (n >= 3) { stbi_uc *y = coutput[0]; if (z->s->img_n == 3) { if (is_rgb) { for (i=0; i < z->s->img_x; ++i) { out[0] = y[i]; out[1] = coutput[1][i]; out[2] = coutput[2][i]; out[3] = 255; out += n; } } else { z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); } } else if (z->s->img_n == 4) { if (z->app14_color_transform == 0) { // CMYK for (i=0; i < z->s->img_x; ++i) { stbi_uc m = coutput[3][i]; out[0] = stbi__blinn_8x8(coutput[0][i], m); out[1] = stbi__blinn_8x8(coutput[1][i], m); out[2] = stbi__blinn_8x8(coutput[2][i], m); out[3] = 255; out += n; } } else if (z->app14_color_transform == 2) { // YCCK z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); for (i=0; i < z->s->img_x; ++i) { stbi_uc m = coutput[3][i]; out[0] = stbi__blinn_8x8(255 - out[0], m); out[1] = stbi__blinn_8x8(255 - out[1], m); out[2] = stbi__blinn_8x8(255 - out[2], m); out += n; } } else { // YCbCr + alpha? Ignore the fourth channel for now z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n); } } else for (i=0; i < z->s->img_x; ++i) { out[0] = out[1] = out[2] = y[i]; out[3] = 255; // not used if n==3 out += n; } } else { if (is_rgb) { if (n == 1) for (i=0; i < z->s->img_x; ++i) *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); else { for (i=0; i < z->s->img_x; ++i, out += 2) { out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]); out[1] = 255; } } } else if (z->s->img_n == 4 && z->app14_color_transform == 0) { for (i=0; i < z->s->img_x; ++i) { stbi_uc m = coutput[3][i]; stbi_uc r = stbi__blinn_8x8(coutput[0][i], m); stbi_uc g = stbi__blinn_8x8(coutput[1][i], m); stbi_uc b = stbi__blinn_8x8(coutput[2][i], m); out[0] = stbi__compute_y(r, g, b); out[1] = 255; out += n; } } else if (z->s->img_n == 4 && z->app14_color_transform == 2) { for (i=0; i < z->s->img_x; ++i) { out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]); out[1] = 255; out += n; } } else { stbi_uc *y = coutput[0]; if (n == 1) for (i=0; i < z->s->img_x; ++i) out[i] = y[i]; else for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; } } } } stbi__cleanup_jpeg(z); *out_x = z->s->img_x; *out_y = z->s->img_y; if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output return output; } } static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { unsigned char* result; stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg)); if (!j) return stbi__errpuc("outofmem", "Out of memory"); memset(j, 0, sizeof(stbi__jpeg)); STBI_NOTUSED(ri); j->s = s; stbi__setup_jpeg(j); result = load_jpeg_image(j, x,y,comp,req_comp); STBI_FREE(j); return result; } static int stbi__jpeg_test(stbi__context *s) { int r; stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg)); if (!j) return stbi__err("outofmem", "Out of memory"); memset(j, 0, sizeof(stbi__jpeg)); j->s = s; stbi__setup_jpeg(j); r = stbi__decode_jpeg_header(j, STBI__SCAN_type); stbi__rewind(s); STBI_FREE(j); return r; } static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp) { if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) { stbi__rewind( j->s ); return 0; } if (x) *x = j->s->img_x; if (y) *y = j->s->img_y; if (comp) *comp = j->s->img_n >= 3 ? 3 : 1; return 1; } static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp) { int result; stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg))); if (!j) return stbi__err("outofmem", "Out of memory"); memset(j, 0, sizeof(stbi__jpeg)); j->s = s; result = stbi__jpeg_info_raw(j, x, y, comp); STBI_FREE(j); return result; } #endif // public domain zlib decode v0.2 Sean Barrett 2006-11-18 // simple implementation // - all input must be provided in an upfront buffer // - all output is written to a single output buffer (can malloc/realloc) // performance // - fast huffman #ifndef STBI_NO_ZLIB // fast-way is faster to check than jpeg huffman, but slow way is slower #define STBI__ZFAST_BITS 9 // accelerate all cases in default tables #define STBI__ZFAST_MASK ((1 << STBI__ZFAST_BITS) - 1) #define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet // zlib-style huffman encoding // (jpegs packs from left, zlib from right, so can't share code) typedef struct { stbi__uint16 fast[1 << STBI__ZFAST_BITS]; stbi__uint16 firstcode[16]; int maxcode[17]; stbi__uint16 firstsymbol[16]; stbi_uc size[STBI__ZNSYMS]; stbi__uint16 value[STBI__ZNSYMS]; } stbi__zhuffman; stbi_inline static int stbi__bitreverse16(int n) { n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); return n; } stbi_inline static int stbi__bit_reverse(int v, int bits) { STBI_ASSERT(bits <= 16); // to bit reverse n bits, reverse 16 and shift // e.g. 11 bits, bit reverse and shift away 5 return stbi__bitreverse16(v) >> (16-bits); } static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num) { int i,k=0; int code, next_code[16], sizes[17]; // DEFLATE spec for generating codes memset(sizes, 0, sizeof(sizes)); memset(z->fast, 0, sizeof(z->fast)); for (i=0; i < num; ++i) ++sizes[sizelist[i]]; sizes[0] = 0; for (i=1; i < 16; ++i) if (sizes[i] > (1 << i)) return stbi__err("bad sizes", "Corrupt PNG"); code = 0; for (i=1; i < 16; ++i) { next_code[i] = code; z->firstcode[i] = (stbi__uint16) code; z->firstsymbol[i] = (stbi__uint16) k; code = (code + sizes[i]); if (sizes[i]) if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG"); z->maxcode[i] = code << (16-i); // preshift for inner loop code <<= 1; k += sizes[i]; } z->maxcode[16] = 0x10000; // sentinel for (i=0; i < num; ++i) { int s = sizelist[i]; if (s) { int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i); z->size [c] = (stbi_uc ) s; z->value[c] = (stbi__uint16) i; if (s <= STBI__ZFAST_BITS) { int j = stbi__bit_reverse(next_code[s],s); while (j < (1 << STBI__ZFAST_BITS)) { z->fast[j] = fastv; j += (1 << s); } } ++next_code[s]; } } return 1; } // zlib-from-memory implementation for PNG reading // because PNG allows splitting the zlib stream arbitrarily, // and it's annoying structurally to have PNG call ZLIB call PNG, // we require PNG read all the IDATs and combine them into a single // memory buffer typedef struct { stbi_uc *zbuffer, *zbuffer_end; int num_bits; int hit_zeof_once; stbi__uint32 code_buffer; char *zout; char *zout_start; char *zout_end; int z_expandable; stbi__zhuffman z_length, z_distance; } stbi__zbuf; stbi_inline static int stbi__zeof(stbi__zbuf *z) { return (z->zbuffer >= z->zbuffer_end); } stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z) { return stbi__zeof(z) ? 0 : *z->zbuffer++; } static void stbi__fill_bits(stbi__zbuf *z) { do { if (z->code_buffer >= (1U << z->num_bits)) { z->zbuffer = z->zbuffer_end; /* treat this as EOF so we fail. */ return; } z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits; z->num_bits += 8; } while (z->num_bits <= 24); } stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n) { unsigned int k; if (z->num_bits < n) stbi__fill_bits(z); k = z->code_buffer & ((1 << n) - 1); z->code_buffer >>= n; z->num_bits -= n; return k; } static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z) { int b,s,k; // not resolved by fast table, so compute it the slow way // use jpeg approach, which requires MSbits at top k = stbi__bit_reverse(a->code_buffer, 16); for (s=STBI__ZFAST_BITS+1; ; ++s) if (k < z->maxcode[s]) break; if (s >= 16) return -1; // invalid code! // code size is s, so: b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; if (b >= STBI__ZNSYMS) return -1; // some data was corrupt somewhere! if (z->size[b] != s) return -1; // was originally an assert, but report failure instead. a->code_buffer >>= s; a->num_bits -= s; return z->value[b]; } stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z) { int b,s; if (a->num_bits < 16) { if (stbi__zeof(a)) { if (!a->hit_zeof_once) { // This is the first time we hit eof, insert 16 extra padding btis // to allow us to keep going; if we actually consume any of them // though, that is invalid data. This is caught later. a->hit_zeof_once = 1; a->num_bits += 16; // add 16 implicit zero bits } else { // We already inserted our extra 16 padding bits and are again // out, this stream is actually prematurely terminated. return -1; } } else { stbi__fill_bits(a); } } b = z->fast[a->code_buffer & STBI__ZFAST_MASK]; if (b) { s = b >> 9; a->code_buffer >>= s; a->num_bits -= s; return b & 511; } return stbi__zhuffman_decode_slowpath(a, z); } static int stbi__zexpand(stbi__zbuf *z, char *zout, int n) // need to make room for n bytes { char *q; unsigned int cur, limit, old_limit; z->zout = zout; if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG"); cur = (unsigned int) (z->zout - z->zout_start); limit = old_limit = (unsigned) (z->zout_end - z->zout_start); if (UINT_MAX - cur < (unsigned) n) return stbi__err("outofmem", "Out of memory"); while (cur + n > limit) { if(limit > UINT_MAX / 2) return stbi__err("outofmem", "Out of memory"); limit *= 2; } q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit); STBI_NOTUSED(old_limit); if (q == NULL) return stbi__err("outofmem", "Out of memory"); z->zout_start = q; z->zout = q + cur; z->zout_end = q + limit; return 1; } static const int stbi__zlength_base[31] = { 3,4,5,6,7,8,9,10,11,13, 15,17,19,23,27,31,35,43,51,59, 67,83,99,115,131,163,195,227,258,0,0 }; static const int stbi__zlength_extra[31]= { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; static const int stbi__zdist_extra[32] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; static int stbi__parse_huffman_block(stbi__zbuf *a) { char *zout = a->zout; for(;;) { int z = stbi__zhuffman_decode(a, &a->z_length); if (z < 256) { if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes if (zout >= a->zout_end) { if (!stbi__zexpand(a, zout, 1)) return 0; zout = a->zout; } *zout++ = (char) z; } else { stbi_uc *p; int len,dist; if (z == 256) { a->zout = zout; if (a->hit_zeof_once && a->num_bits < 16) { // The first time we hit zeof, we inserted 16 extra zero bits into our bit // buffer so the decoder can just do its speculative decoding. But if we // actually consumed any of those bits (which is the case when num_bits < 16), // the stream actually read past the end so it is malformed. return stbi__err("unexpected end","Corrupt PNG"); } return 1; } if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data z -= 257; len = stbi__zlength_base[z]; if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]); z = stbi__zhuffman_decode(a, &a->z_distance); if (z < 0 || z >= 30) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, distance codes 30 and 31 must not appear in compressed data dist = stbi__zdist_base[z]; if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]); if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG"); if (len > a->zout_end - zout) { if (!stbi__zexpand(a, zout, len)) return 0; zout = a->zout; } p = (stbi_uc *) (zout - dist); if (dist == 1) { // run of one byte; common in images. stbi_uc v = *p; if (len) { do *zout++ = v; while (--len); } } else { if (len) { do *zout++ = *p++; while (--len); } } } } } static int stbi__compute_huffman_codes(stbi__zbuf *a) { static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; stbi__zhuffman z_codelength; stbi_uc lencodes[286+32+137];//padding for maximum single op stbi_uc codelength_sizes[19]; int i,n; int hlit = stbi__zreceive(a,5) + 257; int hdist = stbi__zreceive(a,5) + 1; int hclen = stbi__zreceive(a,4) + 4; int ntot = hlit + hdist; memset(codelength_sizes, 0, sizeof(codelength_sizes)); for (i=0; i < hclen; ++i) { int s = stbi__zreceive(a,3); codelength_sizes[length_dezigzag[i]] = (stbi_uc) s; } if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; n = 0; while (n < ntot) { int c = stbi__zhuffman_decode(a, &z_codelength); if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG"); if (c < 16) lencodes[n++] = (stbi_uc) c; else { stbi_uc fill = 0; if (c == 16) { c = stbi__zreceive(a,2)+3; if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG"); fill = lencodes[n-1]; } else if (c == 17) { c = stbi__zreceive(a,3)+3; } else if (c == 18) { c = stbi__zreceive(a,7)+11; } else { return stbi__err("bad codelengths", "Corrupt PNG"); } if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG"); memset(lencodes+n, fill, c); n += c; } } if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG"); if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; return 1; } static int stbi__parse_uncompressed_block(stbi__zbuf *a) { stbi_uc header[4]; int len,nlen,k; if (a->num_bits & 7) stbi__zreceive(a, a->num_bits & 7); // discard // drain the bit-packed data into header k = 0; while (a->num_bits > 0) { header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check a->code_buffer >>= 8; a->num_bits -= 8; } if (a->num_bits < 0) return stbi__err("zlib corrupt","Corrupt PNG"); // now fill header the normal way while (k < 4) header[k++] = stbi__zget8(a); len = header[1] * 256 + header[0]; nlen = header[3] * 256 + header[2]; if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG"); if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG"); if (a->zout + len > a->zout_end) if (!stbi__zexpand(a, a->zout, len)) return 0; memcpy(a->zout, a->zbuffer, len); a->zbuffer += len; a->zout += len; return 1; } static int stbi__parse_zlib_header(stbi__zbuf *a) { int cmf = stbi__zget8(a); int cm = cmf & 15; /* int cinfo = cmf >> 4; */ int flg = stbi__zget8(a); if (stbi__zeof(a)) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png // window = 1 << (8 + cinfo)... but who cares, we fully buffer output return 1; } static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] = { 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8 }; static const stbi_uc stbi__zdefault_distance[32] = { 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5 }; /* Init algorithm: { int i; // use <= to match clearly with spec for (i=0; i <= 143; ++i) stbi__zdefault_length[i] = 8; for ( ; i <= 255; ++i) stbi__zdefault_length[i] = 9; for ( ; i <= 279; ++i) stbi__zdefault_length[i] = 7; for ( ; i <= 287; ++i) stbi__zdefault_length[i] = 8; for (i=0; i <= 31; ++i) stbi__zdefault_distance[i] = 5; } */ static int stbi__parse_zlib(stbi__zbuf *a, int parse_header) { int final, type; if (parse_header) if (!stbi__parse_zlib_header(a)) return 0; a->num_bits = 0; a->code_buffer = 0; a->hit_zeof_once = 0; do { final = stbi__zreceive(a,1); type = stbi__zreceive(a,2); if (type == 0) { if (!stbi__parse_uncompressed_block(a)) return 0; } else if (type == 3) { return 0; } else { if (type == 1) { // use fixed code lengths if (!stbi__zbuild_huffman(&a->z_length , stbi__zdefault_length , STBI__ZNSYMS)) return 0; if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance, 32)) return 0; } else { if (!stbi__compute_huffman_codes(a)) return 0; } if (!stbi__parse_huffman_block(a)) return 0; } } while (!final); return 1; } static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header) { a->zout_start = obuf; a->zout = obuf; a->zout_end = obuf + olen; a->z_expandable = exp; return stbi__parse_zlib(a, parse_header); } STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) { stbi__zbuf a; char *p = (char *) stbi__malloc(initial_size); if (p == NULL) return NULL; a.zbuffer = (stbi_uc *) buffer; a.zbuffer_end = (stbi_uc *) buffer + len; if (stbi__do_zlib(&a, p, initial_size, 1, 1)) { if (outlen) *outlen = (int) (a.zout - a.zout_start); return a.zout_start; } else { STBI_FREE(a.zout_start); return NULL; } } STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) { return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); } STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header) { stbi__zbuf a; char *p = (char *) stbi__malloc(initial_size); if (p == NULL) return NULL; a.zbuffer = (stbi_uc *) buffer; a.zbuffer_end = (stbi_uc *) buffer + len; if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) { if (outlen) *outlen = (int) (a.zout - a.zout_start); return a.zout_start; } else { STBI_FREE(a.zout_start); return NULL; } } STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) { stbi__zbuf a; a.zbuffer = (stbi_uc *) ibuffer; a.zbuffer_end = (stbi_uc *) ibuffer + ilen; if (stbi__do_zlib(&a, obuffer, olen, 0, 1)) return (int) (a.zout - a.zout_start); else return -1; } STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) { stbi__zbuf a; char *p = (char *) stbi__malloc(16384); if (p == NULL) return NULL; a.zbuffer = (stbi_uc *) buffer; a.zbuffer_end = (stbi_uc *) buffer+len; if (stbi__do_zlib(&a, p, 16384, 1, 0)) { if (outlen) *outlen = (int) (a.zout - a.zout_start); return a.zout_start; } else { STBI_FREE(a.zout_start); return NULL; } } STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) { stbi__zbuf a; a.zbuffer = (stbi_uc *) ibuffer; a.zbuffer_end = (stbi_uc *) ibuffer + ilen; if (stbi__do_zlib(&a, obuffer, olen, 0, 0)) return (int) (a.zout - a.zout_start); else return -1; } #endif // public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 // simple implementation // - only 8-bit samples // - no CRC checking // - allocates lots of intermediate memory // - avoids problem of streaming data between subsystems // - avoids explicit window management // performance // - uses stb_zlib, a PD zlib implementation with fast huffman decoding #ifndef STBI_NO_PNG typedef struct { stbi__uint32 length; stbi__uint32 type; } stbi__pngchunk; static stbi__pngchunk stbi__get_chunk_header(stbi__context *s) { stbi__pngchunk c; c.length = stbi__get32be(s); c.type = stbi__get32be(s); return c; } static int stbi__check_png_header(stbi__context *s) { static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 }; int i; for (i=0; i < 8; ++i) if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG"); return 1; } typedef struct { stbi__context *s; stbi_uc *idata, *expanded, *out; int depth; } stbi__png; enum { STBI__F_none=0, STBI__F_sub=1, STBI__F_up=2, STBI__F_avg=3, STBI__F_paeth=4, // synthetic filter used for first scanline to avoid needing a dummy row of 0s STBI__F_avg_first }; static stbi_uc first_row_filter[5] = { STBI__F_none, STBI__F_sub, STBI__F_none, STBI__F_avg_first, STBI__F_sub // Paeth with b=c=0 turns out to be equivalent to sub }; static int stbi__paeth(int a, int b, int c) { // This formulation looks very different from the reference in the PNG spec, but is // actually equivalent and has favorable data dependencies and admits straightforward // generation of branch-free code, which helps performance significantly. int thresh = c*3 - (a + b); int lo = a < b ? a : b; int hi = a < b ? b : a; int t0 = (hi <= thresh) ? lo : c; int t1 = (thresh <= lo) ? hi : t0; return t1; } static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; // adds an extra all-255 alpha channel // dest == src is legal // img_n must be 1 or 3 static void stbi__create_png_alpha_expand8(stbi_uc *dest, stbi_uc *src, stbi__uint32 x, int img_n) { int i; // must process data backwards since we allow dest==src if (img_n == 1) { for (i=x-1; i >= 0; --i) { dest[i*2+1] = 255; dest[i*2+0] = src[i]; } } else { STBI_ASSERT(img_n == 3); for (i=x-1; i >= 0; --i) { dest[i*4+3] = 255; dest[i*4+2] = src[i*3+2]; dest[i*4+1] = src[i*3+1]; dest[i*4+0] = src[i*3+0]; } } } // create the png data from post-deflated data static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color) { int bytes = (depth == 16 ? 2 : 1); stbi__context *s = a->s; stbi__uint32 i,j,stride = x*out_n*bytes; stbi__uint32 img_len, img_width_bytes; stbi_uc *filter_buf; int all_ok = 1; int k; int img_n = s->img_n; // copy it into a local for later int output_bytes = out_n*bytes; int filter_bytes = img_n*bytes; int width = x; STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1); a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into if (!a->out) return stbi__err("outofmem", "Out of memory"); // note: error exits here don't need to clean up a->out individually, // stbi__do_png always does on error. if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG"); img_width_bytes = (((img_n * x * depth) + 7) >> 3); if (!stbi__mad2sizes_valid(img_width_bytes, y, img_width_bytes)) return stbi__err("too large", "Corrupt PNG"); img_len = (img_width_bytes + 1) * y; // we used to check for exact match between raw_len and img_len on non-interlaced PNGs, // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros), // so just check for raw_len < img_len always. if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG"); // Allocate two scan lines worth of filter workspace buffer. filter_buf = (stbi_uc *) stbi__malloc_mad2(img_width_bytes, 2, 0); if (!filter_buf) return stbi__err("outofmem", "Out of memory"); // Filtering for low-bit-depth images if (depth < 8) { filter_bytes = 1; width = img_width_bytes; } for (j=0; j < y; ++j) { // cur/prior filter buffers alternate stbi_uc *cur = filter_buf + (j & 1)*img_width_bytes; stbi_uc *prior = filter_buf + (~j & 1)*img_width_bytes; stbi_uc *dest = a->out + stride*j; int nk = width * filter_bytes; int filter = *raw++; // check filter type if (filter > 4) { all_ok = stbi__err("invalid filter","Corrupt PNG"); break; } // if first row, use special filter that doesn't sample previous row if (j == 0) filter = first_row_filter[filter]; // perform actual filtering switch (filter) { case STBI__F_none: memcpy(cur, raw, nk); break; case STBI__F_sub: memcpy(cur, raw, filter_bytes); for (k = filter_bytes; k < nk; ++k) cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); break; case STBI__F_up: for (k = 0; k < nk; ++k) cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break; case STBI__F_avg: for (k = 0; k < filter_bytes; ++k) cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); for (k = filter_bytes; k < nk; ++k) cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); break; case STBI__F_paeth: for (k = 0; k < filter_bytes; ++k) cur[k] = STBI__BYTECAST(raw[k] + prior[k]); // prior[k] == stbi__paeth(0,prior[k],0) for (k = filter_bytes; k < nk; ++k) cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes], prior[k], prior[k-filter_bytes])); break; case STBI__F_avg_first: memcpy(cur, raw, filter_bytes); for (k = filter_bytes; k < nk; ++k) cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); break; } raw += nk; // expand decoded bits in cur to dest, also adding an extra alpha channel if desired if (depth < 8) { stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range stbi_uc *in = cur; stbi_uc *out = dest; stbi_uc inb = 0; stbi__uint32 nsmp = x*img_n; // expand bits to bytes first if (depth == 4) { for (i=0; i < nsmp; ++i) { if ((i & 1) == 0) inb = *in++; *out++ = scale * (inb >> 4); inb <<= 4; } } else if (depth == 2) { for (i=0; i < nsmp; ++i) { if ((i & 3) == 0) inb = *in++; *out++ = scale * (inb >> 6); inb <<= 2; } } else { STBI_ASSERT(depth == 1); for (i=0; i < nsmp; ++i) { if ((i & 7) == 0) inb = *in++; *out++ = scale * (inb >> 7); inb <<= 1; } } // insert alpha=255 values if desired if (img_n != out_n) stbi__create_png_alpha_expand8(dest, dest, x, img_n); } else if (depth == 8) { if (img_n == out_n) memcpy(dest, cur, x*img_n); else stbi__create_png_alpha_expand8(dest, cur, x, img_n); } else if (depth == 16) { // convert the image data from big-endian to platform-native stbi__uint16 *dest16 = (stbi__uint16*)dest; stbi__uint32 nsmp = x*img_n; if (img_n == out_n) { for (i = 0; i < nsmp; ++i, ++dest16, cur += 2) *dest16 = (cur[0] << 8) | cur[1]; } else { STBI_ASSERT(img_n+1 == out_n); if (img_n == 1) { for (i = 0; i < x; ++i, dest16 += 2, cur += 2) { dest16[0] = (cur[0] << 8) | cur[1]; dest16[1] = 0xffff; } } else { STBI_ASSERT(img_n == 3); for (i = 0; i < x; ++i, dest16 += 4, cur += 6) { dest16[0] = (cur[0] << 8) | cur[1]; dest16[1] = (cur[2] << 8) | cur[3]; dest16[2] = (cur[4] << 8) | cur[5]; dest16[3] = 0xffff; } } } } } STBI_FREE(filter_buf); if (!all_ok) return 0; return 1; } static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced) { int bytes = (depth == 16 ? 2 : 1); int out_bytes = out_n * bytes; stbi_uc *final; int p; if (!interlaced) return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color); // de-interlacing final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0); if (!final) return stbi__err("outofmem", "Out of memory"); for (p=0; p < 7; ++p) { int xorig[] = { 0,4,0,2,0,1,0 }; int yorig[] = { 0,0,4,0,2,0,1 }; int xspc[] = { 8,8,4,4,2,2,1 }; int yspc[] = { 8,8,8,4,4,2,2 }; int i,j,x,y; // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p]; y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p]; if (x && y) { stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y; if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) { STBI_FREE(final); return 0; } for (j=0; j < y; ++j) { for (i=0; i < x; ++i) { int out_y = j*yspc[p]+yorig[p]; int out_x = i*xspc[p]+xorig[p]; memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes, a->out + (j*x+i)*out_bytes, out_bytes); } } STBI_FREE(a->out); image_data += img_len; image_data_len -= img_len; } } a->out = final; return 1; } static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n) { stbi__context *s = z->s; stbi__uint32 i, pixel_count = s->img_x * s->img_y; stbi_uc *p = z->out; // compute color-based transparency, assuming we've // already got 255 as the alpha value in the output STBI_ASSERT(out_n == 2 || out_n == 4); if (out_n == 2) { for (i=0; i < pixel_count; ++i) { p[1] = (p[0] == tc[0] ? 0 : 255); p += 2; } } else { for (i=0; i < pixel_count; ++i) { if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) p[3] = 0; p += 4; } } return 1; } static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n) { stbi__context *s = z->s; stbi__uint32 i, pixel_count = s->img_x * s->img_y; stbi__uint16 *p = (stbi__uint16*) z->out; // compute color-based transparency, assuming we've // already got 65535 as the alpha value in the output STBI_ASSERT(out_n == 2 || out_n == 4); if (out_n == 2) { for (i = 0; i < pixel_count; ++i) { p[1] = (p[0] == tc[0] ? 0 : 65535); p += 2; } } else { for (i = 0; i < pixel_count; ++i) { if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) p[3] = 0; p += 4; } } return 1; } static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n) { stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y; stbi_uc *p, *temp_out, *orig = a->out; p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0); if (p == NULL) return stbi__err("outofmem", "Out of memory"); // between here and free(out) below, exitting would leak temp_out = p; if (pal_img_n == 3) { for (i=0; i < pixel_count; ++i) { int n = orig[i]*4; p[0] = palette[n ]; p[1] = palette[n+1]; p[2] = palette[n+2]; p += 3; } } else { for (i=0; i < pixel_count; ++i) { int n = orig[i]*4; p[0] = palette[n ]; p[1] = palette[n+1]; p[2] = palette[n+2]; p[3] = palette[n+3]; p += 4; } } STBI_FREE(a->out); a->out = temp_out; STBI_NOTUSED(len); return 1; } static int stbi__unpremultiply_on_load_global = 0; static int stbi__de_iphone_flag_global = 0; STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) { stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply; } STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) { stbi__de_iphone_flag_global = flag_true_if_should_convert; } #ifndef STBI_THREAD_LOCAL #define stbi__unpremultiply_on_load stbi__unpremultiply_on_load_global #define stbi__de_iphone_flag stbi__de_iphone_flag_global #else static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set; static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set; STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply) { stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply; stbi__unpremultiply_on_load_set = 1; } STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert) { stbi__de_iphone_flag_local = flag_true_if_should_convert; stbi__de_iphone_flag_set = 1; } #define stbi__unpremultiply_on_load (stbi__unpremultiply_on_load_set \ ? stbi__unpremultiply_on_load_local \ : stbi__unpremultiply_on_load_global) #define stbi__de_iphone_flag (stbi__de_iphone_flag_set \ ? stbi__de_iphone_flag_local \ : stbi__de_iphone_flag_global) #endif // STBI_THREAD_LOCAL static void stbi__de_iphone(stbi__png *z) { stbi__context *s = z->s; stbi__uint32 i, pixel_count = s->img_x * s->img_y; stbi_uc *p = z->out; if (s->img_out_n == 3) { // convert bgr to rgb for (i=0; i < pixel_count; ++i) { stbi_uc t = p[0]; p[0] = p[2]; p[2] = t; p += 3; } } else { STBI_ASSERT(s->img_out_n == 4); if (stbi__unpremultiply_on_load) { // convert bgr to rgb and unpremultiply for (i=0; i < pixel_count; ++i) { stbi_uc a = p[3]; stbi_uc t = p[0]; if (a) { stbi_uc half = a / 2; p[0] = (p[2] * 255 + half) / a; p[1] = (p[1] * 255 + half) / a; p[2] = ( t * 255 + half) / a; } else { p[0] = p[2]; p[2] = t; } p += 4; } } else { // convert bgr to rgb for (i=0; i < pixel_count; ++i) { stbi_uc t = p[0]; p[0] = p[2]; p[2] = t; p += 4; } } } } #define STBI__PNG_TYPE(a,b,c,d) (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d)) static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp) { stbi_uc palette[1024], pal_img_n=0; stbi_uc has_trans=0, tc[3]={0}; stbi__uint16 tc16[3]; stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0; int first=1,k,interlace=0, color=0, is_iphone=0; stbi__context *s = z->s; z->expanded = NULL; z->idata = NULL; z->out = NULL; if (!stbi__check_png_header(s)) return 0; if (scan == STBI__SCAN_type) return 1; for (;;) { stbi__pngchunk c = stbi__get_chunk_header(s); switch (c.type) { case STBI__PNG_TYPE('C','g','B','I'): is_iphone = 1; stbi__skip(s, c.length); break; case STBI__PNG_TYPE('I','H','D','R'): { int comp,filter; if (!first) return stbi__err("multiple IHDR","Corrupt PNG"); first = 0; if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG"); s->img_x = stbi__get32be(s); s->img_y = stbi__get32be(s); if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); z->depth = stbi__get8(s); if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16) return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only"); color = stbi__get8(s); if (color > 6) return stbi__err("bad ctype","Corrupt PNG"); if (color == 3 && z->depth == 16) return stbi__err("bad ctype","Corrupt PNG"); if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG"); comp = stbi__get8(s); if (comp) return stbi__err("bad comp method","Corrupt PNG"); filter= stbi__get8(s); if (filter) return stbi__err("bad filter method","Corrupt PNG"); interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG"); if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG"); if (!pal_img_n) { s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode"); } else { // if paletted, then pal_n is our final components, and // img_n is # components to decompress/filter. s->img_n = 1; if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG"); } // even with SCAN_header, have to scan to see if we have a tRNS break; } case STBI__PNG_TYPE('P','L','T','E'): { if (first) return stbi__err("first not IHDR", "Corrupt PNG"); if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG"); pal_len = c.length / 3; if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG"); for (i=0; i < pal_len; ++i) { palette[i*4+0] = stbi__get8(s); palette[i*4+1] = stbi__get8(s); palette[i*4+2] = stbi__get8(s); palette[i*4+3] = 255; } break; } case STBI__PNG_TYPE('t','R','N','S'): { if (first) return stbi__err("first not IHDR", "Corrupt PNG"); if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG"); if (pal_img_n) { if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; } if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG"); if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG"); pal_img_n = 4; for (i=0; i < c.length; ++i) palette[i*4+3] = stbi__get8(s); } else { if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG"); if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG"); has_trans = 1; // non-paletted with tRNS = constant alpha. if header-scanning, we can stop now. if (scan == STBI__SCAN_header) { ++s->img_n; return 1; } if (z->depth == 16) { for (k = 0; k < s->img_n && k < 3; ++k) // extra loop test to suppress false GCC warning tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is } else { for (k = 0; k < s->img_n && k < 3; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger } } break; } case STBI__PNG_TYPE('I','D','A','T'): { if (first) return stbi__err("first not IHDR", "Corrupt PNG"); if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG"); if (scan == STBI__SCAN_header) { // header scan definitely stops at first IDAT if (pal_img_n) s->img_n = pal_img_n; return 1; } if (c.length > (1u << 30)) return stbi__err("IDAT size limit", "IDAT section larger than 2^30 bytes"); if ((int)(ioff + c.length) < (int)ioff) return 0; if (ioff + c.length > idata_limit) { stbi__uint32 idata_limit_old = idata_limit; stbi_uc *p; if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; while (ioff + c.length > idata_limit) idata_limit *= 2; STBI_NOTUSED(idata_limit_old); p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory"); z->idata = p; } if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG"); ioff += c.length; break; } case STBI__PNG_TYPE('I','E','N','D'): { stbi__uint32 raw_len, bpl; if (first) return stbi__err("first not IHDR", "Corrupt PNG"); if (scan != STBI__SCAN_load) return 1; if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG"); // initial guess for decoded data size to avoid unnecessary reallocs bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */; z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone); if (z->expanded == NULL) return 0; // zlib should set error STBI_FREE(z->idata); z->idata = NULL; if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) s->img_out_n = s->img_n+1; else s->img_out_n = s->img_n; if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0; if (has_trans) { if (z->depth == 16) { if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0; } else { if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0; } } if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2) stbi__de_iphone(z); if (pal_img_n) { // pal_img_n == 3 or 4 s->img_n = pal_img_n; // record the actual colors we had s->img_out_n = pal_img_n; if (req_comp >= 3) s->img_out_n = req_comp; if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n)) return 0; } else if (has_trans) { // non-paletted image with tRNS -> source image has (constant) alpha ++s->img_n; } STBI_FREE(z->expanded); z->expanded = NULL; // end of PNG chunk, read and skip CRC stbi__get32be(s); return 1; } default: // if critical, fail if (first) return stbi__err("first not IHDR", "Corrupt PNG"); if ((c.type & (1 << 29)) == 0) { #ifndef STBI_NO_FAILURE_STRINGS // not threadsafe static char invalid_chunk[] = "XXXX PNG chunk not known"; invalid_chunk[0] = STBI__BYTECAST(c.type >> 24); invalid_chunk[1] = STBI__BYTECAST(c.type >> 16); invalid_chunk[2] = STBI__BYTECAST(c.type >> 8); invalid_chunk[3] = STBI__BYTECAST(c.type >> 0); #endif return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type"); } stbi__skip(s, c.length); break; } // end of PNG chunk, read and skip CRC stbi__get32be(s); } } static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri) { void *result=NULL; if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error"); if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) { if (p->depth <= 8) ri->bits_per_channel = 8; else if (p->depth == 16) ri->bits_per_channel = 16; else return stbi__errpuc("bad bits_per_channel", "PNG not supported: unsupported color depth"); result = p->out; p->out = NULL; if (req_comp && req_comp != p->s->img_out_n) { if (ri->bits_per_channel == 8) result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); else result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y); p->s->img_out_n = req_comp; if (result == NULL) return result; } *x = p->s->img_x; *y = p->s->img_y; if (n) *n = p->s->img_n; } STBI_FREE(p->out); p->out = NULL; STBI_FREE(p->expanded); p->expanded = NULL; STBI_FREE(p->idata); p->idata = NULL; return result; } static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { stbi__png p; p.s = s; return stbi__do_png(&p, x,y,comp,req_comp, ri); } static int stbi__png_test(stbi__context *s) { int r; r = stbi__check_png_header(s); stbi__rewind(s); return r; } static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp) { if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) { stbi__rewind( p->s ); return 0; } if (x) *x = p->s->img_x; if (y) *y = p->s->img_y; if (comp) *comp = p->s->img_n; return 1; } static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp) { stbi__png p; p.s = s; return stbi__png_info_raw(&p, x, y, comp); } static int stbi__png_is16(stbi__context *s) { stbi__png p; p.s = s; if (!stbi__png_info_raw(&p, NULL, NULL, NULL)) return 0; if (p.depth != 16) { stbi__rewind(p.s); return 0; } return 1; } #endif // Microsoft/Windows BMP image #ifndef STBI_NO_BMP static int stbi__bmp_test_raw(stbi__context *s) { int r; int sz; if (stbi__get8(s) != 'B') return 0; if (stbi__get8(s) != 'M') return 0; stbi__get32le(s); // discard filesize stbi__get16le(s); // discard reserved stbi__get16le(s); // discard reserved stbi__get32le(s); // discard data offset sz = stbi__get32le(s); r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124); return r; } static int stbi__bmp_test(stbi__context *s) { int r = stbi__bmp_test_raw(s); stbi__rewind(s); return r; } // returns 0..31 for the highest set bit static int stbi__high_bit(unsigned int z) { int n=0; if (z == 0) return -1; if (z >= 0x10000) { n += 16; z >>= 16; } if (z >= 0x00100) { n += 8; z >>= 8; } if (z >= 0x00010) { n += 4; z >>= 4; } if (z >= 0x00004) { n += 2; z >>= 2; } if (z >= 0x00002) { n += 1;/* >>= 1;*/ } return n; } static int stbi__bitcount(unsigned int a) { a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits a = (a + (a >> 8)); // max 16 per 8 bits a = (a + (a >> 16)); // max 32 per 8 bits return a & 0xff; } // extract an arbitrarily-aligned N-bit value (N=bits) // from v, and then make it 8-bits long and fractionally // extend it to full full range. static int stbi__shiftsigned(unsigned int v, int shift, int bits) { static unsigned int mul_table[9] = { 0, 0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/, 0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/, }; static unsigned int shift_table[9] = { 0, 0,0,1,0,2,4,6,0, }; if (shift < 0) v <<= -shift; else v >>= shift; STBI_ASSERT(v < 256); v >>= (8-bits); STBI_ASSERT(bits >= 0 && bits <= 8); return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits]; } typedef struct { int bpp, offset, hsz; unsigned int mr,mg,mb,ma, all_a; int extra_read; } stbi__bmp_data; static int stbi__bmp_set_mask_defaults(stbi__bmp_data *info, int compress) { // BI_BITFIELDS specifies masks explicitly, don't override if (compress == 3) return 1; if (compress == 0) { if (info->bpp == 16) { info->mr = 31u << 10; info->mg = 31u << 5; info->mb = 31u << 0; } else if (info->bpp == 32) { info->mr = 0xffu << 16; info->mg = 0xffu << 8; info->mb = 0xffu << 0; info->ma = 0xffu << 24; info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0 } else { // otherwise, use defaults, which is all-0 info->mr = info->mg = info->mb = info->ma = 0; } return 1; } return 0; // error } static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info) { int hsz; if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP"); stbi__get32le(s); // discard filesize stbi__get16le(s); // discard reserved stbi__get16le(s); // discard reserved info->offset = stbi__get32le(s); info->hsz = hsz = stbi__get32le(s); info->mr = info->mg = info->mb = info->ma = 0; info->extra_read = 14; if (info->offset < 0) return stbi__errpuc("bad BMP", "bad BMP"); if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown"); if (hsz == 12) { s->img_x = stbi__get16le(s); s->img_y = stbi__get16le(s); } else { s->img_x = stbi__get32le(s); s->img_y = stbi__get32le(s); } if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP"); info->bpp = stbi__get16le(s); if (hsz != 12) { int compress = stbi__get32le(s); if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE"); if (compress >= 4) return stbi__errpuc("BMP JPEG/PNG", "BMP type not supported: unsupported compression"); // this includes PNG/JPEG modes if (compress == 3 && info->bpp != 16 && info->bpp != 32) return stbi__errpuc("bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel stbi__get32le(s); // discard sizeof stbi__get32le(s); // discard hres stbi__get32le(s); // discard vres stbi__get32le(s); // discard colorsused stbi__get32le(s); // discard max important if (hsz == 40 || hsz == 56) { if (hsz == 56) { stbi__get32le(s); stbi__get32le(s); stbi__get32le(s); stbi__get32le(s); } if (info->bpp == 16 || info->bpp == 32) { if (compress == 0) { stbi__bmp_set_mask_defaults(info, compress); } else if (compress == 3) { info->mr = stbi__get32le(s); info->mg = stbi__get32le(s); info->mb = stbi__get32le(s); info->extra_read += 12; // not documented, but generated by photoshop and handled by mspaint if (info->mr == info->mg && info->mg == info->mb) { // ?!?!? return stbi__errpuc("bad BMP", "bad BMP"); } } else return stbi__errpuc("bad BMP", "bad BMP"); } } else { // V4/V5 header int i; if (hsz != 108 && hsz != 124) return stbi__errpuc("bad BMP", "bad BMP"); info->mr = stbi__get32le(s); info->mg = stbi__get32le(s); info->mb = stbi__get32le(s); info->ma = stbi__get32le(s); if (compress != 3) // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs stbi__bmp_set_mask_defaults(info, compress); stbi__get32le(s); // discard color space for (i=0; i < 12; ++i) stbi__get32le(s); // discard color space parameters if (hsz == 124) { stbi__get32le(s); // discard rendering intent stbi__get32le(s); // discard offset of profile data stbi__get32le(s); // discard size of profile data stbi__get32le(s); // discard reserved } } } return (void *) 1; } static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { stbi_uc *out; unsigned int mr=0,mg=0,mb=0,ma=0, all_a; stbi_uc pal[256][4]; int psize=0,i,j,width; int flip_vertically, pad, target; stbi__bmp_data info; STBI_NOTUSED(ri); info.all_a = 255; if (stbi__bmp_parse_header(s, &info) == NULL) return NULL; // error code already set flip_vertically = ((int) s->img_y) > 0; s->img_y = abs((int) s->img_y); if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); mr = info.mr; mg = info.mg; mb = info.mb; ma = info.ma; all_a = info.all_a; if (info.hsz == 12) { if (info.bpp < 24) psize = (info.offset - info.extra_read - 24) / 3; } else { if (info.bpp < 16) psize = (info.offset - info.extra_read - info.hsz) >> 2; } if (psize == 0) { // accept some number of extra bytes after the header, but if the offset points either to before // the header ends or implies a large amount of extra data, reject the file as malformed int bytes_read_so_far = s->callback_already_read + (int)(s->img_buffer - s->img_buffer_original); int header_limit = 1024; // max we actually read is below 256 bytes currently. int extra_data_limit = 256*4; // what ordinarily goes here is a palette; 256 entries*4 bytes is its max size. if (bytes_read_so_far <= 0 || bytes_read_so_far > header_limit) { return stbi__errpuc("bad header", "Corrupt BMP"); } // we established that bytes_read_so_far is positive and sensible. // the first half of this test rejects offsets that are either too small positives, or // negative, and guarantees that info.offset >= bytes_read_so_far > 0. this in turn // ensures the number computed in the second half of the test can't overflow. if (info.offset < bytes_read_so_far || info.offset - bytes_read_so_far > extra_data_limit) { return stbi__errpuc("bad offset", "Corrupt BMP"); } else { stbi__skip(s, info.offset - bytes_read_so_far); } } if (info.bpp == 24 && ma == 0xff000000) s->img_n = 3; else s->img_n = ma ? 4 : 3; if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 target = req_comp; else target = s->img_n; // if they want monochrome, we'll post-convert // sanity-check size if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0)) return stbi__errpuc("too large", "Corrupt BMP"); out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0); if (!out) return stbi__errpuc("outofmem", "Out of memory"); if (info.bpp < 16) { int z=0; if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); } for (i=0; i < psize; ++i) { pal[i][2] = stbi__get8(s); pal[i][1] = stbi__get8(s); pal[i][0] = stbi__get8(s); if (info.hsz != 12) stbi__get8(s); pal[i][3] = 255; } stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4)); if (info.bpp == 1) width = (s->img_x + 7) >> 3; else if (info.bpp == 4) width = (s->img_x + 1) >> 1; else if (info.bpp == 8) width = s->img_x; else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); } pad = (-width)&3; if (info.bpp == 1) { for (j=0; j < (int) s->img_y; ++j) { int bit_offset = 7, v = stbi__get8(s); for (i=0; i < (int) s->img_x; ++i) { int color = (v>>bit_offset)&0x1; out[z++] = pal[color][0]; out[z++] = pal[color][1]; out[z++] = pal[color][2]; if (target == 4) out[z++] = 255; if (i+1 == (int) s->img_x) break; if((--bit_offset) < 0) { bit_offset = 7; v = stbi__get8(s); } } stbi__skip(s, pad); } } else { for (j=0; j < (int) s->img_y; ++j) { for (i=0; i < (int) s->img_x; i += 2) { int v=stbi__get8(s),v2=0; if (info.bpp == 4) { v2 = v & 15; v >>= 4; } out[z++] = pal[v][0]; out[z++] = pal[v][1]; out[z++] = pal[v][2]; if (target == 4) out[z++] = 255; if (i+1 == (int) s->img_x) break; v = (info.bpp == 8) ? stbi__get8(s) : v2; out[z++] = pal[v][0]; out[z++] = pal[v][1]; out[z++] = pal[v][2]; if (target == 4) out[z++] = 255; } stbi__skip(s, pad); } } } else { int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; int z = 0; int easy=0; stbi__skip(s, info.offset - info.extra_read - info.hsz); if (info.bpp == 24) width = 3 * s->img_x; else if (info.bpp == 16) width = 2*s->img_x; else /* bpp = 32 and pad = 0 */ width=0; pad = (-width) & 3; if (info.bpp == 24) { easy = 1; } else if (info.bpp == 32) { if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000) easy = 2; } if (!easy) { if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } // right shift amt to put high bit in position #7 rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr); gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg); bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb); ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma); if (rcount > 8 || gcount > 8 || bcount > 8 || acount > 8) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); } } for (j=0; j < (int) s->img_y; ++j) { if (easy) { for (i=0; i < (int) s->img_x; ++i) { unsigned char a; out[z+2] = stbi__get8(s); out[z+1] = stbi__get8(s); out[z+0] = stbi__get8(s); z += 3; a = (easy == 2 ? stbi__get8(s) : 255); all_a |= a; if (target == 4) out[z++] = a; } } else { int bpp = info.bpp; for (i=0; i < (int) s->img_x; ++i) { stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s)); unsigned int a; out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount)); out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount)); out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount)); a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255); all_a |= a; if (target == 4) out[z++] = STBI__BYTECAST(a); } } stbi__skip(s, pad); } } // if alpha channel is all 0s, replace with all 255s if (target == 4 && all_a == 0) for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4) out[i] = 255; if (flip_vertically) { stbi_uc t; for (j=0; j < (int) s->img_y>>1; ++j) { stbi_uc *p1 = out + j *s->img_x*target; stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; for (i=0; i < (int) s->img_x*target; ++i) { t = p1[i]; p1[i] = p2[i]; p2[i] = t; } } } if (req_comp && req_comp != target) { out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y); if (out == NULL) return out; // stbi__convert_format frees input on failure } *x = s->img_x; *y = s->img_y; if (comp) *comp = s->img_n; return out; } #endif // Targa Truevision - TGA // by Jonathan Dummer #ifndef STBI_NO_TGA // returns STBI_rgb or whatever, 0 on error static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16) { // only RGB or RGBA (incl. 16bit) or grey allowed if (is_rgb16) *is_rgb16 = 0; switch(bits_per_pixel) { case 8: return STBI_grey; case 16: if(is_grey) return STBI_grey_alpha; // fallthrough case 15: if(is_rgb16) *is_rgb16 = 1; return STBI_rgb; case 24: // fallthrough case 32: return bits_per_pixel/8; default: return 0; } } static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp) { int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp; int sz, tga_colormap_type; stbi__get8(s); // discard Offset tga_colormap_type = stbi__get8(s); // colormap type if( tga_colormap_type > 1 ) { stbi__rewind(s); return 0; // only RGB or indexed allowed } tga_image_type = stbi__get8(s); // image type if ( tga_colormap_type == 1 ) { // colormapped (paletted) image if (tga_image_type != 1 && tga_image_type != 9) { stbi__rewind(s); return 0; } stbi__skip(s,4); // skip index of first colormap entry and number of entries sz = stbi__get8(s); // check bits per palette color entry if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) { stbi__rewind(s); return 0; } stbi__skip(s,4); // skip image x and y origin tga_colormap_bpp = sz; } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) { stbi__rewind(s); return 0; // only RGB or grey allowed, +/- RLE } stbi__skip(s,9); // skip colormap specification and image x/y origin tga_colormap_bpp = 0; } tga_w = stbi__get16le(s); if( tga_w < 1 ) { stbi__rewind(s); return 0; // test width } tga_h = stbi__get16le(s); if( tga_h < 1 ) { stbi__rewind(s); return 0; // test height } tga_bits_per_pixel = stbi__get8(s); // bits per pixel stbi__get8(s); // ignore alpha bits if (tga_colormap_bpp != 0) { if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) { // when using a colormap, tga_bits_per_pixel is the size of the indexes // I don't think anything but 8 or 16bit indexes makes sense stbi__rewind(s); return 0; } tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL); } else { tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL); } if(!tga_comp) { stbi__rewind(s); return 0; } if (x) *x = tga_w; if (y) *y = tga_h; if (comp) *comp = tga_comp; return 1; // seems to have passed everything } static int stbi__tga_test(stbi__context *s) { int res = 0; int sz, tga_color_type; stbi__get8(s); // discard Offset tga_color_type = stbi__get8(s); // color type if ( tga_color_type > 1 ) goto errorEnd; // only RGB or indexed allowed sz = stbi__get8(s); // image type if ( tga_color_type == 1 ) { // colormapped (paletted) image if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9 stbi__skip(s,4); // skip index of first colormap entry and number of entries sz = stbi__get8(s); // check bits per palette color entry if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; stbi__skip(s,4); // skip image x and y origin } else { // "normal" image w/o colormap if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE stbi__skip(s,9); // skip colormap specification and image x/y origin } if ( stbi__get16le(s) < 1 ) goto errorEnd; // test width if ( stbi__get16le(s) < 1 ) goto errorEnd; // test height sz = stbi__get8(s); // bits per pixel if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd; res = 1; // if we got this far, everything's good and we can return 1 instead of 0 errorEnd: stbi__rewind(s); return res; } // read 16bit value and convert to 24bit RGB static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out) { stbi__uint16 px = (stbi__uint16)stbi__get16le(s); stbi__uint16 fiveBitMask = 31; // we have 3 channels with 5bits each int r = (px >> 10) & fiveBitMask; int g = (px >> 5) & fiveBitMask; int b = px & fiveBitMask; // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later out[0] = (stbi_uc)((r * 255)/31); out[1] = (stbi_uc)((g * 255)/31); out[2] = (stbi_uc)((b * 255)/31); // some people claim that the most significant bit might be used for alpha // (possibly if an alpha-bit is set in the "image descriptor byte") // but that only made 16bit test images completely translucent.. // so let's treat all 15 and 16bit TGAs as RGB with no alpha. } static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { // read in the TGA header stuff int tga_offset = stbi__get8(s); int tga_indexed = stbi__get8(s); int tga_image_type = stbi__get8(s); int tga_is_RLE = 0; int tga_palette_start = stbi__get16le(s); int tga_palette_len = stbi__get16le(s); int tga_palette_bits = stbi__get8(s); int tga_x_origin = stbi__get16le(s); int tga_y_origin = stbi__get16le(s); int tga_width = stbi__get16le(s); int tga_height = stbi__get16le(s); int tga_bits_per_pixel = stbi__get8(s); int tga_comp, tga_rgb16=0; int tga_descriptor = stbi__get8(s); // int tga_alpha_bits = tga_descriptor & 15; // the 4 lowest bits - unused (useless?) // image data unsigned char *tga_data; unsigned char *tga_palette = NULL; int i, j; unsigned char raw_data[4] = {0}; int RLE_count = 0; int RLE_repeating = 0; int read_next_pixel = 1; STBI_NOTUSED(ri); STBI_NOTUSED(tga_x_origin); // @TODO STBI_NOTUSED(tga_y_origin); // @TODO if (tga_height > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); if (tga_width > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); // do a tiny bit of precessing if ( tga_image_type >= 8 ) { tga_image_type -= 8; tga_is_RLE = 1; } int tga_x_inverted = ((tga_descriptor >> 4) & 1); int tga_y_inverted = 1 - ((tga_descriptor >> 5) & 1); // If I'm paletted, then I'll use the number of bits from the palette if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16); else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16); if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency return stbi__errpuc("bad format", "Can't find out TGA pixelformat"); // tga info *x = tga_width; *y = tga_height; if (comp) *comp = tga_comp; if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0)) return stbi__errpuc("too large", "Corrupt TGA"); tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0); if (!tga_data) return stbi__errpuc("outofmem", "Out of memory"); // skip to the data's starting position (offset usually = 0) stbi__skip(s, tga_offset ); if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) { for (i=0; i < tga_height; ++i) { int row = tga_y_inverted ? tga_height - i - 1 : i; stbi_uc *tga_row = tga_data + row*tga_width*tga_comp; if (tga_x_inverted) { for (j = 0; j < tga_width; j++) { int index = (tga_width - j - 1) * tga_comp; stbi__getn(s, tga_row + index, tga_comp); } } else { stbi__getn(s, tga_row, tga_width * tga_comp); } } } else { // do I need to load a palette? if ( tga_indexed) { if (tga_palette_len == 0) { /* you have to have at least one entry! */ STBI_FREE(tga_data); return stbi__errpuc("bad palette", "Corrupt TGA"); } // any data to skip? (offset usually = 0) stbi__skip(s, tga_palette_start ); // load the palette tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0); if (!tga_palette) { STBI_FREE(tga_data); return stbi__errpuc("outofmem", "Out of memory"); } if (tga_rgb16) { stbi_uc *pal_entry = tga_palette; STBI_ASSERT(tga_comp == STBI_rgb); for (i=0; i < tga_palette_len; ++i) { stbi__tga_read_rgb16(s, pal_entry); pal_entry += tga_comp; } } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) { STBI_FREE(tga_data); STBI_FREE(tga_palette); return stbi__errpuc("bad palette", "Corrupt TGA"); } } // load the data for (i=0; i < tga_width * tga_height; ++i) { // if I'm in RLE mode, do I need to get a RLE stbi__pngchunk? if ( tga_is_RLE ) { if ( RLE_count == 0 ) { // yep, get the next byte as a RLE command int RLE_cmd = stbi__get8(s); RLE_count = 1 + (RLE_cmd & 127); RLE_repeating = RLE_cmd >> 7; read_next_pixel = 1; } else if ( !RLE_repeating ) { read_next_pixel = 1; } } else { read_next_pixel = 1; } // OK, if I need to read a pixel, do it now if ( read_next_pixel ) { // load however much data we did have if ( tga_indexed ) { // read in index, then perform the lookup int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s); if ( pal_idx >= tga_palette_len ) { // invalid index pal_idx = 0; } pal_idx *= tga_comp; for (j = 0; j < tga_comp; ++j) { raw_data[j] = tga_palette[pal_idx+j]; } } else if(tga_rgb16) { STBI_ASSERT(tga_comp == STBI_rgb); stbi__tga_read_rgb16(s, raw_data); } else { // read in the data raw for (j = 0; j < tga_comp; ++j) { raw_data[j] = stbi__get8(s); } } // clear the reading flag for the next pixel read_next_pixel = 0; } // end of reading a pixel // copy data for (j = 0; j < tga_comp; ++j) tga_data[i*tga_comp+j] = raw_data[j]; // in case we're in RLE mode, keep counting down --RLE_count; } // do I need to invert the image? if (tga_x_inverted) { for (j = 0; j < tga_height; j++) { stbi_uc *row = tga_data + (j * tga_width * tga_comp); for (int s = 0, d = tga_width - 1; s * 2 < tga_width; s++, d--) { stbi_uc *src = row + (s * tga_comp); stbi_uc *dest = row + (d * tga_comp); for (i = 0; i < tga_comp; i++) { stbi_uc temp = src[i]; src[i] = dest[i]; dest[i] = temp; } } } } if (tga_y_inverted) { for (j = 0; j*2 < tga_height; ++j) { int index1 = j * tga_width * tga_comp; int index2 = (tga_height - 1 - j) * tga_width * tga_comp; for (i = tga_width * tga_comp; i > 0; --i) { unsigned char temp = tga_data[index1]; tga_data[index1] = tga_data[index2]; tga_data[index2] = temp; ++index1; ++index2; } } } // clear my palette, if I had one if ( tga_palette != NULL ) { STBI_FREE( tga_palette ); } } // swap RGB - if the source data was RGB16, it already is in the right order if (tga_comp >= 3 && !tga_rgb16) { unsigned char* tga_pixel = tga_data; for (i=0; i < tga_width * tga_height; ++i) { unsigned char temp = tga_pixel[0]; tga_pixel[0] = tga_pixel[2]; tga_pixel[2] = temp; tga_pixel += tga_comp; } } // convert to target component count if (req_comp && req_comp != tga_comp) tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height); // the things I do to get rid of an error message, and yet keep // Microsoft's C compilers happy... [8^( tga_palette_start = tga_palette_len = tga_palette_bits = tga_x_origin = tga_y_origin = 0; STBI_NOTUSED(tga_palette_start); // OK, done return tga_data; } #endif // ************************************************************************************************* // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB #ifndef STBI_NO_PSD static int stbi__psd_test(stbi__context *s) { int r = (stbi__get32be(s) == 0x38425053); stbi__rewind(s); return r; } static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount) { int count, nleft, len; count = 0; while ((nleft = pixelCount - count) > 0) { len = stbi__get8(s); if (len == 128) { // No-op. } else if (len < 128) { // Copy next len+1 bytes literally. len++; if (len > nleft) return 0; // corrupt data count += len; while (len) { *p = stbi__get8(s); p += 4; len--; } } else if (len > 128) { stbi_uc val; // Next -len+1 bytes in the dest are replicated from next source byte. // (Interpret len as a negative 8-bit int.) len = 257 - len; if (len > nleft) return 0; // corrupt data val = stbi__get8(s); count += len; while (len) { *p = val; p += 4; len--; } } } return 1; } static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc) { int pixelCount; int channelCount, compression; int channel, i; int bitdepth; int w,h; stbi_uc *out; STBI_NOTUSED(ri); // Check identifier if (stbi__get32be(s) != 0x38425053) // "8BPS" return stbi__errpuc("not PSD", "Corrupt PSD image"); // Check file type version. if (stbi__get16be(s) != 1) return stbi__errpuc("wrong version", "Unsupported version of PSD image"); // Skip 6 reserved bytes. stbi__skip(s, 6 ); // Read the number of channels (R, G, B, A, etc). channelCount = stbi__get16be(s); if (channelCount < 0 || channelCount > 16) return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image"); // Read the rows and columns of the image. h = stbi__get32be(s); w = stbi__get32be(s); if (h > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); if (w > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); // Make sure the depth is 8 bits. bitdepth = stbi__get16be(s); if (bitdepth != 8 && bitdepth != 16) return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit"); // Make sure the color mode is RGB. // Valid options are: // 0: Bitmap // 1: Grayscale // 2: Indexed color // 3: RGB color // 4: CMYK color // 7: Multichannel // 8: Duotone // 9: Lab color if (stbi__get16be(s) != 3) return stbi__errpuc("wrong color format", "PSD is not in RGB color format"); // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) stbi__skip(s,stbi__get32be(s) ); // Skip the image resources. (resolution, pen tool paths, etc) stbi__skip(s, stbi__get32be(s) ); // Skip the reserved data. stbi__skip(s, stbi__get32be(s) ); // Find out if the data is compressed. // Known values: // 0: no compression // 1: RLE compressed compression = stbi__get16be(s); if (compression > 1) return stbi__errpuc("bad compression", "PSD has an unknown compression format"); // Check size if (!stbi__mad3sizes_valid(4, w, h, 0)) return stbi__errpuc("too large", "Corrupt PSD"); // Create the destination image. if (!compression && bitdepth == 16 && bpc == 16) { out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0); ri->bits_per_channel = 16; } else out = (stbi_uc *) stbi__malloc(4 * w*h); if (!out) return stbi__errpuc("outofmem", "Out of memory"); pixelCount = w*h; // Initialize the data to zero. //memset( out, 0, pixelCount * 4 ); // Finally, the image data. if (compression) { // RLE as used by .PSD and .TIFF // Loop until you get the number of unpacked bytes you are expecting: // Read the next source byte into n. // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. // Else if n is 128, noop. // Endloop // The RLE-compressed data is preceded by a 2-byte data count for each row in the data, // which we're going to just skip. stbi__skip(s, h * channelCount * 2 ); // Read the RLE data by channel. for (channel = 0; channel < 4; channel++) { stbi_uc *p; p = out+channel; if (channel >= channelCount) { // Fill this channel with default data. for (i = 0; i < pixelCount; i++, p += 4) *p = (channel == 3 ? 255 : 0); } else { // Read the RLE data. if (!stbi__psd_decode_rle(s, p, pixelCount)) { STBI_FREE(out); return stbi__errpuc("corrupt", "bad RLE data"); } } } } else { // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image. // Read the data by channel. for (channel = 0; channel < 4; channel++) { if (channel >= channelCount) { // Fill this channel with default data. if (bitdepth == 16 && bpc == 16) { stbi__uint16 *q = ((stbi__uint16 *) out) + channel; stbi__uint16 val = channel == 3 ? 65535 : 0; for (i = 0; i < pixelCount; i++, q += 4) *q = val; } else { stbi_uc *p = out+channel; stbi_uc val = channel == 3 ? 255 : 0; for (i = 0; i < pixelCount; i++, p += 4) *p = val; } } else { if (ri->bits_per_channel == 16) { // output bpc stbi__uint16 *q = ((stbi__uint16 *) out) + channel; for (i = 0; i < pixelCount; i++, q += 4) *q = (stbi__uint16) stbi__get16be(s); } else { stbi_uc *p = out+channel; if (bitdepth == 16) { // input bpc for (i = 0; i < pixelCount; i++, p += 4) *p = (stbi_uc) (stbi__get16be(s) >> 8); } else { for (i = 0; i < pixelCount; i++, p += 4) *p = stbi__get8(s); } } } } } // remove weird white matte from PSD if (channelCount >= 4) { if (ri->bits_per_channel == 16) { for (i=0; i < w*h; ++i) { stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i; if (pixel[3] != 0 && pixel[3] != 65535) { float a = pixel[3] / 65535.0f; float ra = 1.0f / a; float inv_a = 65535.0f * (1 - ra); pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a); pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a); pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a); } } } else { for (i=0; i < w*h; ++i) { unsigned char *pixel = out + 4*i; if (pixel[3] != 0 && pixel[3] != 255) { float a = pixel[3] / 255.0f; float ra = 1.0f / a; float inv_a = 255.0f * (1 - ra); pixel[0] = (unsigned char) (pixel[0]*ra + inv_a); pixel[1] = (unsigned char) (pixel[1]*ra + inv_a); pixel[2] = (unsigned char) (pixel[2]*ra + inv_a); } } } } // convert to desired output format if (req_comp && req_comp != 4) { if (ri->bits_per_channel == 16) out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h); else out = stbi__convert_format(out, 4, req_comp, w, h); if (out == NULL) return out; // stbi__convert_format frees input on failure } if (comp) *comp = 4; *y = h; *x = w; return out; } #endif // ************************************************************************************************* // Softimage PIC loader // by Tom Seddon // // See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/ #ifndef STBI_NO_PIC static int stbi__pic_is4(stbi__context *s,const char *str) { int i; for (i=0; i<4; ++i) if (stbi__get8(s) != (stbi_uc)str[i]) return 0; return 1; } static int stbi__pic_test_core(stbi__context *s) { int i; if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) return 0; for(i=0;i<84;++i) stbi__get8(s); if (!stbi__pic_is4(s,"PICT")) return 0; return 1; } typedef struct { stbi_uc size,type,channel; } stbi__pic_packet; static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest) { int mask=0x80, i; for (i=0; i<4; ++i, mask>>=1) { if (channel & mask) { if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short"); dest[i]=stbi__get8(s); } } return dest; } static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src) { int mask=0x80,i; for (i=0;i<4; ++i, mask>>=1) if (channel&mask) dest[i]=src[i]; } static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result) { int act_comp=0,num_packets=0,y,chained; stbi__pic_packet packets[10]; // this will (should...) cater for even some bizarre stuff like having data // for the same channel in multiple packets. do { stbi__pic_packet *packet; if (num_packets==sizeof(packets)/sizeof(packets[0])) return stbi__errpuc("bad format","too many packets"); packet = &packets[num_packets++]; chained = stbi__get8(s); packet->size = stbi__get8(s); packet->type = stbi__get8(s); packet->channel = stbi__get8(s); act_comp |= packet->channel; if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (reading packets)"); if (packet->size != 8) return stbi__errpuc("bad format","packet isn't 8bpp"); } while (chained); *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel? for(y=0; ytype) { default: return stbi__errpuc("bad format","packet has bad compression type"); case 0: {//uncompressed int x; for(x=0;xchannel,dest)) return 0; break; } case 1://Pure RLE { int left=width, i; while (left>0) { stbi_uc count,value[4]; count=stbi__get8(s); if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pure read count)"); if (count > left) count = (stbi_uc) left; if (!stbi__readval(s,packet->channel,value)) return 0; for(i=0; ichannel,dest,value); left -= count; } } break; case 2: {//Mixed RLE int left=width; while (left>0) { int count = stbi__get8(s), i; if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (mixed read count)"); if (count >= 128) { // Repeated stbi_uc value[4]; if (count==128) count = stbi__get16be(s); else count -= 127; if (count > left) return stbi__errpuc("bad file","scanline overrun"); if (!stbi__readval(s,packet->channel,value)) return 0; for(i=0;ichannel,dest,value); } else { // Raw ++count; if (count>left) return stbi__errpuc("bad file","scanline overrun"); for(i=0;ichannel,dest)) return 0; } left-=count; } break; } } } } return result; } static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri) { stbi_uc *result; int i, x,y, internal_comp; STBI_NOTUSED(ri); if (!comp) comp = &internal_comp; for (i=0; i<92; ++i) stbi__get8(s); x = stbi__get16be(s); y = stbi__get16be(s); if (y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); if (x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); if (stbi__at_eof(s)) return stbi__errpuc("bad file","file too short (pic header)"); if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode"); stbi__get32be(s); //skip `ratio' stbi__get16be(s); //skip `fields' stbi__get16be(s); //skip `pad' // intermediate buffer is RGBA result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0); if (!result) return stbi__errpuc("outofmem", "Out of memory"); memset(result, 0xff, x*y*4); if (!stbi__pic_load_core(s,x,y,comp, result)) { STBI_FREE(result); result=0; } *px = x; *py = y; if (req_comp == 0) req_comp = *comp; result=stbi__convert_format(result,4,req_comp,x,y); return result; } static int stbi__pic_test(stbi__context *s) { int r = stbi__pic_test_core(s); stbi__rewind(s); return r; } #endif // ************************************************************************************************* // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb #ifndef STBI_NO_GIF typedef struct { stbi__int16 prefix; stbi_uc first; stbi_uc suffix; } stbi__gif_lzw; typedef struct { int w,h; stbi_uc *out; // output buffer (always 4 components) stbi_uc *background; // The current "background" as far as a gif is concerned stbi_uc *history; int flags, bgindex, ratio, transparent, eflags; stbi_uc pal[256][4]; stbi_uc lpal[256][4]; stbi__gif_lzw codes[8192]; stbi_uc *color_table; int parse, step; int lflags; int start_x, start_y; int max_x, max_y; int cur_x, cur_y; int line_size; int delay; } stbi__gif; static int stbi__gif_test_raw(stbi__context *s) { int sz; if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0; sz = stbi__get8(s); if (sz != '9' && sz != '7') return 0; if (stbi__get8(s) != 'a') return 0; return 1; } static int stbi__gif_test(stbi__context *s) { int r = stbi__gif_test_raw(s); stbi__rewind(s); return r; } static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp) { int i; for (i=0; i < num_entries; ++i) { pal[i][2] = stbi__get8(s); pal[i][1] = stbi__get8(s); pal[i][0] = stbi__get8(s); pal[i][3] = transp == i ? 0 : 255; } } static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info) { stbi_uc version; if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return stbi__err("not GIF", "Corrupt GIF"); version = stbi__get8(s); if (version != '7' && version != '9') return stbi__err("not GIF", "Corrupt GIF"); if (stbi__get8(s) != 'a') return stbi__err("not GIF", "Corrupt GIF"); stbi__g_failure_reason = ""; g->w = stbi__get16le(s); g->h = stbi__get16le(s); g->flags = stbi__get8(s); g->bgindex = stbi__get8(s); g->ratio = stbi__get8(s); g->transparent = -1; if (g->w > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); if (g->h > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)"); if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments if (is_info) return 1; if (g->flags & 0x80) stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1); return 1; } static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp) { stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif)); if (!g) return stbi__err("outofmem", "Out of memory"); if (!stbi__gif_header(s, g, comp, 1)) { STBI_FREE(g); stbi__rewind( s ); return 0; } if (x) *x = g->w; if (y) *y = g->h; STBI_FREE(g); return 1; } static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code) { stbi_uc *p, *c; int idx; // recurse to decode the prefixes, since the linked-list is backwards, // and working backwards through an interleaved image would be nasty if (g->codes[code].prefix >= 0) stbi__out_gif_code(g, g->codes[code].prefix); if (g->cur_y >= g->max_y) return; idx = g->cur_x + g->cur_y; p = &g->out[idx]; g->history[idx / 4] = 1; c = &g->color_table[g->codes[code].suffix * 4]; if (c[3] > 128) { // don't render transparent pixels; p[0] = c[2]; p[1] = c[1]; p[2] = c[0]; p[3] = c[3]; } g->cur_x += 4; if (g->cur_x >= g->max_x) { g->cur_x = g->start_x; g->cur_y += g->step; while (g->cur_y >= g->max_y && g->parse > 0) { g->step = (1 << g->parse) * g->line_size; g->cur_y = g->start_y + (g->step >> 1); --g->parse; } } } static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g) { stbi_uc lzw_cs; stbi__int32 len, init_code; stbi__uint32 first; stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear; stbi__gif_lzw *p; lzw_cs = stbi__get8(s); if (lzw_cs > 12) return NULL; clear = 1 << lzw_cs; first = 1; codesize = lzw_cs + 1; codemask = (1 << codesize) - 1; bits = 0; valid_bits = 0; for (init_code = 0; init_code < clear; init_code++) { g->codes[init_code].prefix = -1; g->codes[init_code].first = (stbi_uc) init_code; g->codes[init_code].suffix = (stbi_uc) init_code; } // support no starting clear code avail = clear+2; oldcode = -1; len = 0; for(;;) { if (valid_bits < codesize) { if (len == 0) { len = stbi__get8(s); // start new block if (len == 0) return g->out; } --len; bits |= (stbi__int32) stbi__get8(s) << valid_bits; valid_bits += 8; } else { stbi__int32 code = bits & codemask; bits >>= codesize; valid_bits -= codesize; // @OPTIMIZE: is there some way we can accelerate the non-clear path? if (code == clear) { // clear code codesize = lzw_cs + 1; codemask = (1 << codesize) - 1; avail = clear + 2; oldcode = -1; first = 0; } else if (code == clear + 1) { // end of stream code stbi__skip(s, len); while ((len = stbi__get8(s)) > 0) stbi__skip(s,len); return g->out; } else if (code <= avail) { if (first) { return stbi__errpuc("no clear code", "Corrupt GIF"); } if (oldcode >= 0) { p = &g->codes[avail++]; if (avail > 8192) { return stbi__errpuc("too many codes", "Corrupt GIF"); } p->prefix = (stbi__int16) oldcode; p->first = g->codes[oldcode].first; p->suffix = (code == avail) ? p->first : g->codes[code].first; } else if (code == avail) return stbi__errpuc("illegal code in raster", "Corrupt GIF"); stbi__out_gif_code(g, (stbi__uint16) code); if ((avail & codemask) == 0 && avail <= 0x0FFF) { codesize++; codemask = (1 << codesize) - 1; } oldcode = code; } else { return stbi__errpuc("illegal code in raster", "Corrupt GIF"); } } } } // this function is designed to support animated gifs, although stb_image doesn't support it // two back is the image from two frames ago, used for a very specific disposal format static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back) { int dispose; int first_frame; int pi; int pcount; STBI_NOTUSED(req_comp); // on first frame, any non-written pixels get the background colour (non-transparent) first_frame = 0; if (g->out == 0) { if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header if (!stbi__mad3sizes_valid(4, g->w, g->h, 0)) return stbi__errpuc("too large", "GIF image is too large"); pcount = g->w * g->h; g->out = (stbi_uc *) stbi__malloc(4 * pcount); g->background = (stbi_uc *) stbi__malloc(4 * pcount); g->history = (stbi_uc *) stbi__malloc(pcount); if (!g->out || !g->background || !g->history) return stbi__errpuc("outofmem", "Out of memory"); // image is treated as "transparent" at the start - ie, nothing overwrites the current background; // background colour is only used for pixels that are not rendered first frame, after that "background" // color refers to the color that was there the previous frame. memset(g->out, 0x00, 4 * pcount); memset(g->background, 0x00, 4 * pcount); // state of the background (starts transparent) memset(g->history, 0x00, pcount); // pixels that were affected previous frame first_frame = 1; } else { // second frame - how do we dispose of the previous one? dispose = (g->eflags & 0x1C) >> 2; pcount = g->w * g->h; if ((dispose == 3) && (two_back == 0)) { dispose = 2; // if I don't have an image to revert back to, default to the old background } if (dispose == 3) { // use previous graphic for (pi = 0; pi < pcount; ++pi) { if (g->history[pi]) { memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 ); } } } else if (dispose == 2) { // restore what was changed last frame to background before that frame; for (pi = 0; pi < pcount; ++pi) { if (g->history[pi]) { memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 ); } } } else { // This is a non-disposal case eithe way, so just // leave the pixels as is, and they will become the new background // 1: do not dispose // 0: not specified. } // background is what out is after the undoing of the previou frame; memcpy( g->background, g->out, 4 * g->w * g->h ); } // clear my history; memset( g->history, 0x00, g->w * g->h ); // pixels that were affected previous frame for (;;) { int tag = stbi__get8(s); switch (tag) { case 0x2C: /* Image Descriptor */ { stbi__int32 x, y, w, h; stbi_uc *o; x = stbi__get16le(s); y = stbi__get16le(s); w = stbi__get16le(s); h = stbi__get16le(s); if (((x + w) > (g->w)) || ((y + h) > (g->h))) return stbi__errpuc("bad Image Descriptor", "Corrupt GIF"); g->line_size = g->w * 4; g->start_x = x * 4; g->start_y = y * g->line_size; g->max_x = g->start_x + w * 4; g->max_y = g->start_y + h * g->line_size; g->cur_x = g->start_x; g->cur_y = g->start_y; // if the width of the specified rectangle is 0, that means // we may not see *any* pixels or the image is malformed; // to make sure this is caught, move the current y down to // max_y (which is what out_gif_code checks). if (w == 0) g->cur_y = g->max_y; g->lflags = stbi__get8(s); if (g->lflags & 0x40) { g->step = 8 * g->line_size; // first interlaced spacing g->parse = 3; } else { g->step = g->line_size; g->parse = 0; } if (g->lflags & 0x80) { stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1); g->color_table = (stbi_uc *) g->lpal; } else if (g->flags & 0x80) { g->color_table = (stbi_uc *) g->pal; } else return stbi__errpuc("missing color table", "Corrupt GIF"); o = stbi__process_gif_raster(s, g); if (!o) return NULL; // if this was the first frame, pcount = g->w * g->h; if (first_frame && (g->bgindex > 0)) { // if first frame, any pixel not drawn to gets the background color for (pi = 0; pi < pcount; ++pi) { if (g->history[pi] == 0) { g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be; memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 ); } } } return o; } case 0x21: // Comment Extension. { int len; int ext = stbi__get8(s); if (ext == 0xF9) { // Graphic Control Extension. len = stbi__get8(s); if (len == 4) { g->eflags = stbi__get8(s); g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths. // unset old transparent if (g->transparent >= 0) { g->pal[g->transparent][3] = 255; } if (g->eflags & 0x01) { g->transparent = stbi__get8(s); if (g->transparent >= 0) { g->pal[g->transparent][3] = 0; } } else { // don't need transparent stbi__skip(s, 1); g->transparent = -1; } } else { stbi__skip(s, len); break; } } while ((len = stbi__get8(s)) != 0) { stbi__skip(s, len); } break; } case 0x3B: // gif stream termination code return (stbi_uc *) s; // using '1' causes warning on some compilers default: return stbi__errpuc("unknown code", "Corrupt GIF"); } } } static void *stbi__load_gif_main_outofmem(stbi__gif *g, stbi_uc *out, int **delays) { STBI_FREE(g->out); STBI_FREE(g->history); STBI_FREE(g->background); if (out) STBI_FREE(out); if (delays && *delays) STBI_FREE(*delays); return stbi__errpuc("outofmem", "Out of memory"); } static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp) { if (stbi__gif_test(s)) { int layers = 0; stbi_uc *u = 0; stbi_uc *out = 0; stbi_uc *two_back = 0; stbi__gif g; int stride; int out_size = 0; int delays_size = 0; STBI_NOTUSED(out_size); STBI_NOTUSED(delays_size); memset(&g, 0, sizeof(g)); if (delays) { *delays = 0; } do { u = stbi__gif_load_next(s, &g, comp, req_comp, two_back); if (u == (stbi_uc *) s) u = 0; // end of animated gif marker if (u) { *x = g.w; *y = g.h; ++layers; stride = g.w * g.h * 4; if (out) { void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride ); if (!tmp) return stbi__load_gif_main_outofmem(&g, out, delays); else { out = (stbi_uc*) tmp; out_size = layers * stride; } if (delays) { int *new_delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers ); if (!new_delays) return stbi__load_gif_main_outofmem(&g, out, delays); *delays = new_delays; delays_size = layers * sizeof(int); } } else { out = (stbi_uc*)stbi__malloc( layers * stride ); if (!out) return stbi__load_gif_main_outofmem(&g, out, delays); out_size = layers * stride; if (delays) { *delays = (int*) stbi__malloc( layers * sizeof(int) ); if (!*delays) return stbi__load_gif_main_outofmem(&g, out, delays); delays_size = layers * sizeof(int); } } memcpy( out + ((layers - 1) * stride), u, stride ); if (layers >= 2) { two_back = out - 2 * stride; } if (delays) { (*delays)[layers - 1U] = g.delay; } } } while (u != 0); // free temp buffer; STBI_FREE(g.out); STBI_FREE(g.history); STBI_FREE(g.background); // do the final conversion after loading everything; if (req_comp && req_comp != 4) out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h); *z = layers; return out; } else { return stbi__errpuc("not GIF", "Image was not as a gif type."); } } static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { stbi_uc *u = 0; stbi__gif g; memset(&g, 0, sizeof(g)); STBI_NOTUSED(ri); u = stbi__gif_load_next(s, &g, comp, req_comp, 0); if (u == (stbi_uc *) s) u = 0; // end of animated gif marker if (u) { *x = g.w; *y = g.h; // moved conversion to after successful load so that the same // can be done for multiple frames. if (req_comp && req_comp != 4) u = stbi__convert_format(u, 4, req_comp, g.w, g.h); } else if (g.out) { // if there was an error and we allocated an image buffer, free it! STBI_FREE(g.out); } // free buffers needed for multiple frame loading; STBI_FREE(g.history); STBI_FREE(g.background); return u; } static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp) { return stbi__gif_info_raw(s,x,y,comp); } #endif // ************************************************************************************************* // Radiance RGBE HDR loader // originally by Nicolas Schulz #ifndef STBI_NO_HDR static int stbi__hdr_test_core(stbi__context *s, const char *signature) { int i; for (i=0; signature[i]; ++i) if (stbi__get8(s) != signature[i]) return 0; stbi__rewind(s); return 1; } static int stbi__hdr_test(stbi__context* s) { int r = stbi__hdr_test_core(s, "#?RADIANCE\n"); stbi__rewind(s); if(!r) { r = stbi__hdr_test_core(s, "#?RGBE\n"); stbi__rewind(s); } return r; } #define STBI__HDR_BUFLEN 1024 static char *stbi__hdr_gettoken(stbi__context *z, char *buffer) { int len=0; char c = '\0'; c = (char) stbi__get8(z); while (!stbi__at_eof(z) && c != '\n') { buffer[len++] = c; if (len == STBI__HDR_BUFLEN-1) { // flush to end of line while (!stbi__at_eof(z) && stbi__get8(z) != '\n') ; break; } c = (char) stbi__get8(z); } buffer[len] = 0; return buffer; } static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp) { if ( input[3] != 0 ) { float f1; // Exponent f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); if (req_comp <= 2) output[0] = (input[0] + input[1] + input[2]) * f1 / 3; else { output[0] = input[0] * f1; output[1] = input[1] * f1; output[2] = input[2] * f1; } if (req_comp == 2) output[1] = 1; if (req_comp == 4) output[3] = 1; } else { switch (req_comp) { case 4: output[3] = 1; /* fallthrough */ case 3: output[0] = output[1] = output[2] = 0; break; case 2: output[1] = 1; /* fallthrough */ case 1: output[0] = 0; break; } } } static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { char buffer[STBI__HDR_BUFLEN]; char *token; int valid = 0; int width, height; stbi_uc *scanline; float *hdr_data; int len; unsigned char count, value; int i, j, k, c1,c2, z; const char *headerToken; STBI_NOTUSED(ri); // Check identifier headerToken = stbi__hdr_gettoken(s,buffer); if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0) return stbi__errpf("not HDR", "Corrupt HDR image"); // Parse header for(;;) { token = stbi__hdr_gettoken(s,buffer); if (token[0] == 0) break; if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; } if (!valid) return stbi__errpf("unsupported format", "Unsupported HDR format"); // Parse width and height // can't use sscanf() if we're not using stdio! token = stbi__hdr_gettoken(s,buffer); if (strncmp(token, "-Y ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); token += 3; height = (int) strtol(token, &token, 10); while (*token == ' ') ++token; if (strncmp(token, "+X ", 3)) return stbi__errpf("unsupported data layout", "Unsupported HDR format"); token += 3; width = (int) strtol(token, NULL, 10); if (height > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)"); if (width > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)"); *x = width; *y = height; if (comp) *comp = 3; if (req_comp == 0) req_comp = 3; if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0)) return stbi__errpf("too large", "HDR image is too large"); // Read data hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0); if (!hdr_data) return stbi__errpf("outofmem", "Out of memory"); // Load image data // image data is stored as some number of sca if ( width < 8 || width >= 32768) { // Read flat data for (j=0; j < height; ++j) { for (i=0; i < width; ++i) { stbi_uc rgbe[4]; main_decode_loop: stbi__getn(s, rgbe, 4); stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); } } } else { // Read RLE-encoded data scanline = NULL; for (j = 0; j < height; ++j) { c1 = stbi__get8(s); c2 = stbi__get8(s); len = stbi__get8(s); if (c1 != 2 || c2 != 2 || (len & 0x80)) { // not run-length encoded, so we have to actually use THIS data as a decoded // pixel (note this can't be a valid pixel--one of RGB must be >= 128) stbi_uc rgbe[4]; rgbe[0] = (stbi_uc) c1; rgbe[1] = (stbi_uc) c2; rgbe[2] = (stbi_uc) len; rgbe[3] = (stbi_uc) stbi__get8(s); stbi__hdr_convert(hdr_data, rgbe, req_comp); i = 1; j = 0; STBI_FREE(scanline); goto main_decode_loop; // yes, this makes no sense } len <<= 8; len |= stbi__get8(s); if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); } if (scanline == NULL) { scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0); if (!scanline) { STBI_FREE(hdr_data); return stbi__errpf("outofmem", "Out of memory"); } } for (k = 0; k < 4; ++k) { int nleft; i = 0; while ((nleft = width - i) > 0) { count = stbi__get8(s); if (count > 128) { // Run value = stbi__get8(s); count -= 128; if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } for (z = 0; z < count; ++z) scanline[i++ * 4 + k] = value; } else { // Dump if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); } for (z = 0; z < count; ++z) scanline[i++ * 4 + k] = stbi__get8(s); } } } for (i=0; i < width; ++i) stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); } if (scanline) STBI_FREE(scanline); } return hdr_data; } static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp) { char buffer[STBI__HDR_BUFLEN]; char *token; int valid = 0; int dummy; if (!x) x = &dummy; if (!y) y = &dummy; if (!comp) comp = &dummy; if (stbi__hdr_test(s) == 0) { stbi__rewind( s ); return 0; } for(;;) { token = stbi__hdr_gettoken(s,buffer); if (token[0] == 0) break; if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; } if (!valid) { stbi__rewind( s ); return 0; } token = stbi__hdr_gettoken(s,buffer); if (strncmp(token, "-Y ", 3)) { stbi__rewind( s ); return 0; } token += 3; *y = (int) strtol(token, &token, 10); while (*token == ' ') ++token; if (strncmp(token, "+X ", 3)) { stbi__rewind( s ); return 0; } token += 3; *x = (int) strtol(token, NULL, 10); *comp = 3; return 1; } #endif // STBI_NO_HDR #ifndef STBI_NO_BMP static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp) { void *p; stbi__bmp_data info; info.all_a = 255; p = stbi__bmp_parse_header(s, &info); if (p == NULL) { stbi__rewind( s ); return 0; } if (x) *x = s->img_x; if (y) *y = s->img_y; if (comp) { if (info.bpp == 24 && info.ma == 0xff000000) *comp = 3; else *comp = info.ma ? 4 : 3; } return 1; } #endif #ifndef STBI_NO_PSD static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp) { int channelCount, dummy, depth; if (!x) x = &dummy; if (!y) y = &dummy; if (!comp) comp = &dummy; if (stbi__get32be(s) != 0x38425053) { stbi__rewind( s ); return 0; } if (stbi__get16be(s) != 1) { stbi__rewind( s ); return 0; } stbi__skip(s, 6); channelCount = stbi__get16be(s); if (channelCount < 0 || channelCount > 16) { stbi__rewind( s ); return 0; } *y = stbi__get32be(s); *x = stbi__get32be(s); depth = stbi__get16be(s); if (depth != 8 && depth != 16) { stbi__rewind( s ); return 0; } if (stbi__get16be(s) != 3) { stbi__rewind( s ); return 0; } *comp = 4; return 1; } static int stbi__psd_is16(stbi__context *s) { int channelCount, depth; if (stbi__get32be(s) != 0x38425053) { stbi__rewind( s ); return 0; } if (stbi__get16be(s) != 1) { stbi__rewind( s ); return 0; } stbi__skip(s, 6); channelCount = stbi__get16be(s); if (channelCount < 0 || channelCount > 16) { stbi__rewind( s ); return 0; } STBI_NOTUSED(stbi__get32be(s)); STBI_NOTUSED(stbi__get32be(s)); depth = stbi__get16be(s); if (depth != 16) { stbi__rewind( s ); return 0; } return 1; } #endif #ifndef STBI_NO_PIC static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp) { int act_comp=0,num_packets=0,chained,dummy; stbi__pic_packet packets[10]; if (!x) x = &dummy; if (!y) y = &dummy; if (!comp) comp = &dummy; if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) { stbi__rewind(s); return 0; } stbi__skip(s, 88); *x = stbi__get16be(s); *y = stbi__get16be(s); if (stbi__at_eof(s)) { stbi__rewind( s); return 0; } if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) { stbi__rewind( s ); return 0; } stbi__skip(s, 8); do { stbi__pic_packet *packet; if (num_packets==sizeof(packets)/sizeof(packets[0])) return 0; packet = &packets[num_packets++]; chained = stbi__get8(s); packet->size = stbi__get8(s); packet->type = stbi__get8(s); packet->channel = stbi__get8(s); act_comp |= packet->channel; if (stbi__at_eof(s)) { stbi__rewind( s ); return 0; } if (packet->size != 8) { stbi__rewind( s ); return 0; } } while (chained); *comp = (act_comp & 0x10 ? 4 : 3); return 1; } #endif // ************************************************************************************************* // Portable Gray Map and Portable Pixel Map loader // by Ken Miller // // PGM: http://netpbm.sourceforge.net/doc/pgm.html // PPM: http://netpbm.sourceforge.net/doc/ppm.html // // Known limitations: // Does not support comments in the header section // Does not support ASCII image data (formats P2 and P3) #ifndef STBI_NO_PNM static int stbi__pnm_test(stbi__context *s) { char p, t; p = (char) stbi__get8(s); t = (char) stbi__get8(s); if (p != 'P' || (t != '5' && t != '6')) { stbi__rewind( s ); return 0; } return 1; } static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri) { stbi_uc *out; STBI_NOTUSED(ri); ri->bits_per_channel = stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n); if (ri->bits_per_channel == 0) return 0; if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)"); *x = s->img_x; *y = s->img_y; if (comp) *comp = s->img_n; if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0)) return stbi__errpuc("too large", "PNM too large"); out = (stbi_uc *) stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0); if (!out) return stbi__errpuc("outofmem", "Out of memory"); if (!stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8))) { STBI_FREE(out); return stbi__errpuc("bad PNM", "PNM file truncated"); } if (req_comp && req_comp != s->img_n) { if (ri->bits_per_channel == 16) { out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, s->img_n, req_comp, s->img_x, s->img_y); } else { out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y); } if (out == NULL) return out; // stbi__convert_format frees input on failure } return out; } static int stbi__pnm_isspace(char c) { return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r'; } static void stbi__pnm_skip_whitespace(stbi__context *s, char *c) { for (;;) { while (!stbi__at_eof(s) && stbi__pnm_isspace(*c)) *c = (char) stbi__get8(s); if (stbi__at_eof(s) || *c != '#') break; while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' ) *c = (char) stbi__get8(s); } } static int stbi__pnm_isdigit(char c) { return c >= '0' && c <= '9'; } static int stbi__pnm_getinteger(stbi__context *s, char *c) { int value = 0; while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) { value = value*10 + (*c - '0'); *c = (char) stbi__get8(s); if((value > 214748364) || (value == 214748364 && *c > '7')) return stbi__err("integer parse overflow", "Parsing an integer in the PPM header overflowed a 32-bit int"); } return value; } static int stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp) { int maxv, dummy; char c, p, t; if (!x) x = &dummy; if (!y) y = &dummy; if (!comp) comp = &dummy; stbi__rewind(s); // Get identifier p = (char) stbi__get8(s); t = (char) stbi__get8(s); if (p != 'P' || (t != '5' && t != '6')) { stbi__rewind(s); return 0; } *comp = (t == '6') ? 3 : 1; // '5' is 1-component .pgm; '6' is 3-component .ppm c = (char) stbi__get8(s); stbi__pnm_skip_whitespace(s, &c); *x = stbi__pnm_getinteger(s, &c); // read width if(*x == 0) return stbi__err("invalid width", "PPM image header had zero or overflowing width"); stbi__pnm_skip_whitespace(s, &c); *y = stbi__pnm_getinteger(s, &c); // read height if (*y == 0) return stbi__err("invalid width", "PPM image header had zero or overflowing width"); stbi__pnm_skip_whitespace(s, &c); maxv = stbi__pnm_getinteger(s, &c); // read max value if (maxv > 65535) return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images"); else if (maxv > 255) return 16; else return 8; } static int stbi__pnm_is16(stbi__context *s) { if (stbi__pnm_info(s, NULL, NULL, NULL) == 16) return 1; return 0; } #endif static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp) { #ifndef STBI_NO_JPEG if (stbi__jpeg_info(s, x, y, comp)) return 1; #endif #ifndef STBI_NO_PNG if (stbi__png_info(s, x, y, comp)) return 1; #endif #ifndef STBI_NO_GIF if (stbi__gif_info(s, x, y, comp)) return 1; #endif #ifndef STBI_NO_BMP if (stbi__bmp_info(s, x, y, comp)) return 1; #endif #ifndef STBI_NO_PSD if (stbi__psd_info(s, x, y, comp)) return 1; #endif #ifndef STBI_NO_PIC if (stbi__pic_info(s, x, y, comp)) return 1; #endif #ifndef STBI_NO_PNM if (stbi__pnm_info(s, x, y, comp)) return 1; #endif #ifndef STBI_NO_HDR if (stbi__hdr_info(s, x, y, comp)) return 1; #endif // test tga last because it's a crappy test! #ifndef STBI_NO_TGA if (stbi__tga_info(s, x, y, comp)) return 1; #endif return stbi__err("unknown image type", "Image not of any known type, or corrupt"); } static int stbi__is_16_main(stbi__context *s) { #ifndef STBI_NO_PNG if (stbi__png_is16(s)) return 1; #endif #ifndef STBI_NO_PSD if (stbi__psd_is16(s)) return 1; #endif #ifndef STBI_NO_PNM if (stbi__pnm_is16(s)) return 1; #endif return 0; } #ifndef STBI_NO_STDIO STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp) { FILE *f = stbi__fopen(filename, "rb"); int result; if (!f) return stbi__err("can't fopen", "Unable to open file"); result = stbi_info_from_file(f, x, y, comp); fclose(f); return result; } STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) { int r; stbi__context s; long pos = ftell(f); stbi__start_file(&s, f); r = stbi__info_main(&s,x,y,comp); fseek(f,pos,SEEK_SET); return r; } STBIDEF int stbi_is_16_bit(char const *filename) { FILE *f = stbi__fopen(filename, "rb"); int result; if (!f) return stbi__err("can't fopen", "Unable to open file"); result = stbi_is_16_bit_from_file(f); fclose(f); return result; } STBIDEF int stbi_is_16_bit_from_file(FILE *f) { int r; stbi__context s; long pos = ftell(f); stbi__start_file(&s, f); r = stbi__is_16_main(&s); fseek(f,pos,SEEK_SET); return r; } #endif // !STBI_NO_STDIO STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) { stbi__context s; stbi__start_mem(&s,buffer,len); return stbi__info_main(&s,x,y,comp); } STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp) { stbi__context s; stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); return stbi__info_main(&s,x,y,comp); } STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len) { stbi__context s; stbi__start_mem(&s,buffer,len); return stbi__is_16_main(&s); } STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user) { stbi__context s; stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user); return stbi__is_16_main(&s); } #endif // STB_IMAGE_IMPLEMENTATION /* revision history: 2.20 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs 2.19 (2018-02-11) fix warning 2.18 (2018-01-30) fix warnings 2.17 (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug 1-bit BMP *_is_16_bit api avoid warnings 2.16 (2017-07-23) all functions have 16-bit variants; STBI_NO_STDIO works again; compilation fixes; fix rounding in unpremultiply; optimize vertical flip; disable raw_len validation; documentation fixes 2.15 (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode; warning fixes; disable run-time SSE detection on gcc; uniform handling of optional "return" values; thread-safe initialization of zlib tables 2.14 (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs 2.13 (2016-11-29) add 16-bit API, only supported for PNG right now 2.12 (2016-04-02) fix typo in 2.11 PSD fix that caused crashes 2.11 (2016-04-02) allocate large structures on the stack remove white matting for transparent PSD fix reported channel count for PNG & BMP re-enable SSE2 in non-gcc 64-bit support RGB-formatted JPEG read 16-bit PNGs (only as 8-bit) 2.10 (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED 2.09 (2016-01-16) allow comments in PNM files 16-bit-per-pixel TGA (not bit-per-component) info() for TGA could break due to .hdr handling info() for BMP to shares code instead of sloppy parse can use STBI_REALLOC_SIZED if allocator doesn't support realloc code cleanup 2.08 (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA 2.07 (2015-09-13) fix compiler warnings partial animated GIF support limited 16-bpc PSD support #ifdef unused functions bug with < 92 byte PIC,PNM,HDR,TGA 2.06 (2015-04-19) fix bug where PSD returns wrong '*comp' value 2.05 (2015-04-19) fix bug in progressive JPEG handling, fix warning 2.04 (2015-04-15) try to re-enable SIMD on MinGW 64-bit 2.03 (2015-04-12) extra corruption checking (mmozeiko) stbi_set_flip_vertically_on_load (nguillemot) fix NEON support; fix mingw support 2.02 (2015-01-19) fix incorrect assert, fix warning 2.01 (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2 2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG 2.00 (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg) progressive JPEG (stb) PGM/PPM support (Ken Miller) STBI_MALLOC,STBI_REALLOC,STBI_FREE GIF bugfix -- seemingly never worked STBI_NO_*, STBI_ONLY_* 1.48 (2014-12-14) fix incorrectly-named assert() 1.47 (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb) optimize PNG (ryg) fix bug in interlaced PNG with user-specified channel count (stb) 1.46 (2014-08-26) fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG 1.45 (2014-08-16) fix MSVC-ARM internal compiler error by wrapping malloc 1.44 (2014-08-07) various warning fixes from Ronny Chevalier 1.43 (2014-07-15) fix MSVC-only compiler problem in code changed in 1.42 1.42 (2014-07-09) don't define _CRT_SECURE_NO_WARNINGS (affects user code) fixes to stbi__cleanup_jpeg path added STBI_ASSERT to avoid requiring assert.h 1.41 (2014-06-25) fix search&replace from 1.36 that messed up comments/error messages 1.40 (2014-06-22) fix gcc struct-initialization warning 1.39 (2014-06-15) fix to TGA optimization when req_comp != number of components in TGA; fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite) add support for BMP version 5 (more ignored fields) 1.38 (2014-06-06) suppress MSVC warnings on integer casts truncating values fix accidental rename of 'skip' field of I/O 1.37 (2014-06-04) remove duplicate typedef 1.36 (2014-06-03) convert to header file single-file library if de-iphone isn't set, load iphone images color-swapped instead of returning NULL 1.35 (2014-05-27) various warnings fix broken STBI_SIMD path fix bug where stbi_load_from_file no longer left file pointer in correct place fix broken non-easy path for 32-bit BMP (possibly never used) TGA optimization by Arseny Kapoulkine 1.34 (unknown) use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case 1.33 (2011-07-14) make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements 1.32 (2011-07-13) support for "info" function for all supported filetypes (SpartanJ) 1.31 (2011-06-20) a few more leak fixes, bug in PNG handling (SpartanJ) 1.30 (2011-06-11) added ability to load files via callbacks to accomidate custom input streams (Ben Wenger) removed deprecated format-specific test/load functions removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha) fix inefficiency in decoding 32-bit BMP (David Woo) 1.29 (2010-08-16) various warning fixes from Aurelien Pocheville 1.28 (2010-08-01) fix bug in GIF palette transparency (SpartanJ) 1.27 (2010-08-01) cast-to-stbi_uc to fix warnings 1.26 (2010-07-24) fix bug in file buffering for PNG reported by SpartanJ 1.25 (2010-07-17) refix trans_data warning (Won Chun) 1.24 (2010-07-12) perf improvements reading from files on platforms with lock-heavy fgetc() minor perf improvements for jpeg deprecated type-specific functions so we'll get feedback if they're needed attempt to fix trans_data warning (Won Chun) 1.23 fixed bug in iPhone support 1.22 (2010-07-10) removed image *writing* support stbi_info support from Jetro Lauha GIF support from Jean-Marc Lienher iPhone PNG-extensions from James Brown warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva) 1.21 fix use of 'stbi_uc' in header (reported by jon blow) 1.20 added support for Softimage PIC, by Tom Seddon 1.19 bug in interlaced PNG corruption check (found by ryg) 1.18 (2008-08-02) fix a threading bug (local mutable static) 1.17 support interlaced PNG 1.16 major bugfix - stbi__convert_format converted one too many pixels 1.15 initialize some fields for thread safety 1.14 fix threadsafe conversion bug header-file-only version (#define STBI_HEADER_FILE_ONLY before including) 1.13 threadsafe 1.12 const qualifiers in the API 1.11 Support installable IDCT, colorspace conversion routines 1.10 Fixes for 64-bit (don't use "unsigned long") optimized upsampling by Fabian "ryg" Giesen 1.09 Fix format-conversion for PSD code (bad global variables!) 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz 1.07 attempt to fix C++ warning/errors again 1.06 attempt to fix C++ warning/errors again 1.05 fix TGA loading to return correct *comp and use good luminance calc 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR 1.02 support for (subset of) HDR files, float interface for preferred access to them 1.01 fix bug: possible bug in handling right-side up bmps... not sure fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all 1.00 interface to zlib that skips zlib header 0.99 correct handling of alpha in palette 0.98 TGA loader by lonesock; dynamically add loaders (untested) 0.97 jpeg errors on too large a file; also catch another malloc failure 0.96 fix detection of invalid v value - particleman@mollyrocket forum 0.95 during header scan, seek to markers in case of padding 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same 0.93 handle jpegtran output; verbose errors 0.92 read 4,8,16,24,32-bit BMP files of several formats 0.91 output 24-bit Windows 3.0 BMP files 0.90 fix a few more warnings; bump version number to approach 1.0 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd 0.60 fix compiling as c++ 0.59 fix warnings: merge Dave Moore's -Wall fixes 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available 0.56 fix bug: zlib uncompressed mode len vs. nlen 0.55 fix bug: restart_interval not initialized to 0 0.54 allow NULL for 'int *comp' 0.53 fix bug in png 3->4; speedup png decoding 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments 0.51 obey req_comp requests, 1-component jpegs return as 1-component, on 'test' only check type, not whether we support this variant 0.50 (2006-11-19) first released version */ /* ------------------------------------------------------------------------------ This software is available under 2 licenses -- choose whichever you prefer. ------------------------------------------------------------------------------ ALTERNATIVE A - MIT License Copyright (c) 2017 Sean Barrett Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------ ALTERNATIVE B - Public Domain (www.unlicense.org) This is free and unencumbered software released into the public domain. Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------ */ DaemonEngine-crunch-ef4d32f/crnlib/stb_image_write.h000066400000000000000000002133461503722002600226160ustar00rootroot00000000000000/* stb_image_write - v1.16 - public domain - http://nothings.org/stb writes out PNG/BMP/TGA/JPEG/HDR images to C stdio - Sean Barrett 2010-2015 no warranty implied; use at your own risk Before #including, #define STB_IMAGE_WRITE_IMPLEMENTATION in the file that you want to have the implementation. Will probably not work correctly with strict-aliasing optimizations. ABOUT: This header file is a library for writing images to C stdio or a callback. The PNG output is not optimal; it is 20-50% larger than the file written by a decent optimizing implementation; though providing a custom zlib compress function (see STBIW_ZLIB_COMPRESS) can mitigate that. This library is designed for source code compactness and simplicity, not optimal image file size or run-time performance. BUILDING: You can #define STBIW_ASSERT(x) before the #include to avoid using assert.h. You can #define STBIW_MALLOC(), STBIW_REALLOC(), and STBIW_FREE() to replace malloc,realloc,free. You can #define STBIW_MEMMOVE() to replace memmove() You can #define STBIW_ZLIB_COMPRESS to use a custom zlib-style compress function for PNG compression (instead of the builtin one), it must have the following signature: unsigned char * my_compress(unsigned char *data, int data_len, int *out_len, int quality); The returned data will be freed with STBIW_FREE() (free() by default), so it must be heap allocated with STBIW_MALLOC() (malloc() by default), UNICODE: If compiling for Windows and you wish to use Unicode filenames, compile with #define STBIW_WINDOWS_UTF8 and pass utf8-encoded filenames. Call stbiw_convert_wchar_to_utf8 to convert Windows wchar_t filenames to utf8. USAGE: There are five functions, one for each image file format: int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); int stbi_write_jpg(char const *filename, int w, int h, int comp, const void *data, int quality); int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); void stbi_flip_vertically_on_write(int flag); // flag is non-zero to flip data vertically There are also five equivalent functions that use an arbitrary write function. You are expected to open/close your file-equivalent before and after calling these: int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); where the callback is: void stbi_write_func(void *context, void *data, int size); You can configure it with these global variables: int stbi_write_tga_with_rle; // defaults to true; set to 0 to disable RLE int stbi_write_png_compression_level; // defaults to 8; set to higher for more compression int stbi_write_force_png_filter; // defaults to -1; set to 0..5 to force a filter mode You can define STBI_WRITE_NO_STDIO to disable the file variant of these functions, so the library will not use stdio.h at all. However, this will also disable HDR writing, because it requires stdio for formatted output. Each function returns 0 on failure and non-0 on success. The functions create an image file defined by the parameters. The image is a rectangle of pixels stored from left-to-right, top-to-bottom. Each pixel contains 'comp' channels of data stored interleaved with 8-bits per channel, in the following order: 1=Y, 2=YA, 3=RGB, 4=RGBA. (Y is monochrome color.) The rectangle is 'w' pixels wide and 'h' pixels tall. The *data pointer points to the first byte of the top-left-most pixel. For PNG, "stride_in_bytes" is the distance in bytes from the first byte of a row of pixels to the first byte of the next row of pixels. PNG creates output files with the same number of components as the input. The BMP format expands Y to RGB in the file format and does not output alpha. PNG supports writing rectangles of data even when the bytes storing rows of data are not consecutive in memory (e.g. sub-rectangles of a larger image), by supplying the stride between the beginning of adjacent rows. The other formats do not. (Thus you cannot write a native-format BMP through the BMP writer, both because it is in BGR order and because it may have padding at the end of the line.) PNG allows you to set the deflate compression level by setting the global variable 'stbi_write_png_compression_level' (it defaults to 8). HDR expects linear float data. Since the format is always 32-bit rgb(e) data, alpha (if provided) is discarded, and for monochrome data it is replicated across all three channels. TGA supports RLE or non-RLE compressed data. To use non-RLE-compressed data, set the global variable 'stbi_write_tga_with_rle' to 0. JPEG does ignore alpha channels in input data; quality is between 1 and 100. Higher quality looks better but results in a bigger image. JPEG baseline (no JPEG progressive). CREDITS: Sean Barrett - PNG/BMP/TGA Baldur Karlsson - HDR Jean-Sebastien Guay - TGA monochrome Tim Kelsey - misc enhancements Alan Hickman - TGA RLE Emmanuel Julien - initial file IO callback implementation Jon Olick - original jo_jpeg.cpp code Daniel Gibson - integrate JPEG, allow external zlib Aarni Koskela - allow choosing PNG filter bugfixes: github:Chribba Guillaume Chereau github:jry2 github:romigrou Sergio Gonzalez Jonas Karlsson Filip Wasil Thatcher Ulrich github:poppolopoppo Patrick Boettcher github:xeekworx Cap Petschulat Simon Rodriguez Ivan Tikhonov github:ignotion Adam Schackart Andrew Kensler LICENSE See end of file for license information. */ #ifndef INCLUDE_STB_IMAGE_WRITE_H #define INCLUDE_STB_IMAGE_WRITE_H #include // if STB_IMAGE_WRITE_STATIC causes problems, try defining STBIWDEF to 'inline' or 'static inline' #ifndef STBIWDEF #ifdef STB_IMAGE_WRITE_STATIC #define STBIWDEF static #else #ifdef __cplusplus #define STBIWDEF extern "C" #else #define STBIWDEF extern #endif #endif #endif #ifndef STB_IMAGE_WRITE_STATIC // C++ forbids static forward declarations STBIWDEF int stbi_write_tga_with_rle; STBIWDEF int stbi_write_png_compression_level; STBIWDEF int stbi_write_force_png_filter; #endif #ifndef STBI_WRITE_NO_STDIO STBIWDEF int stbi_write_png(char const *filename, int w, int h, int comp, const void *data, int stride_in_bytes); STBIWDEF int stbi_write_bmp(char const *filename, int w, int h, int comp, const void *data); STBIWDEF int stbi_write_tga(char const *filename, int w, int h, int comp, const void *data); STBIWDEF int stbi_write_hdr(char const *filename, int w, int h, int comp, const float *data); STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality); #ifdef STBIW_WINDOWS_UTF8 STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input); #endif #endif typedef void stbi_write_func(void *context, void *data, int size); STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data, int stride_in_bytes); STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const void *data); STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int w, int h, int comp, const float *data); STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality); STBIWDEF void stbi_flip_vertically_on_write(int flip_boolean); #endif//INCLUDE_STB_IMAGE_WRITE_H #ifdef STB_IMAGE_WRITE_IMPLEMENTATION #ifdef _WIN32 #ifndef _CRT_SECURE_NO_WARNINGS #define _CRT_SECURE_NO_WARNINGS #endif #ifndef _CRT_NONSTDC_NO_DEPRECATE #define _CRT_NONSTDC_NO_DEPRECATE #endif #endif #ifndef STBI_WRITE_NO_STDIO #include #endif // STBI_WRITE_NO_STDIO #include #include #include #include #if defined(STBIW_MALLOC) && defined(STBIW_FREE) && (defined(STBIW_REALLOC) || defined(STBIW_REALLOC_SIZED)) // ok #elif !defined(STBIW_MALLOC) && !defined(STBIW_FREE) && !defined(STBIW_REALLOC) && !defined(STBIW_REALLOC_SIZED) // ok #else #error "Must define all or none of STBIW_MALLOC, STBIW_FREE, and STBIW_REALLOC (or STBIW_REALLOC_SIZED)." #endif #ifndef STBIW_MALLOC #define STBIW_MALLOC(sz) malloc(sz) #define STBIW_REALLOC(p,newsz) realloc(p,newsz) #define STBIW_FREE(p) free(p) #endif #ifndef STBIW_REALLOC_SIZED #define STBIW_REALLOC_SIZED(p,oldsz,newsz) STBIW_REALLOC(p,newsz) #endif #ifndef STBIW_MEMMOVE #define STBIW_MEMMOVE(a,b,sz) memmove(a,b,sz) #endif #ifndef STBIW_ASSERT #include #define STBIW_ASSERT(x) assert(x) #endif #define STBIW_UCHAR(x) (unsigned char) ((x) & 0xff) #ifdef STB_IMAGE_WRITE_STATIC static int stbi_write_png_compression_level = 8; static int stbi_write_tga_with_rle = 1; static int stbi_write_force_png_filter = -1; #else int stbi_write_png_compression_level = 8; int stbi_write_tga_with_rle = 1; int stbi_write_force_png_filter = -1; #endif static int stbi__flip_vertically_on_write = 0; STBIWDEF void stbi_flip_vertically_on_write(int flag) { stbi__flip_vertically_on_write = flag; } typedef struct { stbi_write_func *func; void *context; unsigned char buffer[64]; int buf_used; } stbi__write_context; // initialize a callback-based context static void stbi__start_write_callbacks(stbi__write_context *s, stbi_write_func *c, void *context) { s->func = c; s->context = context; } #ifndef STBI_WRITE_NO_STDIO static void stbi__stdio_write(void *context, void *data, int size) { fwrite(data,1,size,(FILE*) context); } #if defined(_WIN32) && defined(STBIW_WINDOWS_UTF8) #ifdef __cplusplus #define STBIW_EXTERN extern "C" #else #define STBIW_EXTERN extern #endif STBIW_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide); STBIW_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default); STBIWDEF int stbiw_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input) { return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL); } #endif static FILE *stbiw__fopen(char const *filename, char const *mode) { FILE *f; #if defined(_WIN32) && defined(STBIW_WINDOWS_UTF8) wchar_t wMode[64]; wchar_t wFilename[1024]; if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename))) return 0; if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode))) return 0; #if defined(_MSC_VER) && _MSC_VER >= 1400 if (0 != _wfopen_s(&f, wFilename, wMode)) f = 0; #else f = _wfopen(wFilename, wMode); #endif #elif defined(_MSC_VER) && _MSC_VER >= 1400 if (0 != fopen_s(&f, filename, mode)) f=0; #else f = fopen(filename, mode); #endif return f; } static int stbi__start_write_file(stbi__write_context *s, const char *filename) { FILE *f = stbiw__fopen(filename, "wb"); stbi__start_write_callbacks(s, stbi__stdio_write, (void *) f); return f != NULL; } static void stbi__end_write_file(stbi__write_context *s) { fclose((FILE *)s->context); } #endif // !STBI_WRITE_NO_STDIO typedef unsigned int stbiw_uint32; typedef int stb_image_write_test[sizeof(stbiw_uint32)==4 ? 1 : -1]; static void stbiw__writefv(stbi__write_context *s, const char *fmt, va_list v) { while (*fmt) { switch (*fmt++) { case ' ': break; case '1': { unsigned char x = STBIW_UCHAR(va_arg(v, int)); s->func(s->context,&x,1); break; } case '2': { int x = va_arg(v,int); unsigned char b[2]; b[0] = STBIW_UCHAR(x); b[1] = STBIW_UCHAR(x>>8); s->func(s->context,b,2); break; } case '4': { stbiw_uint32 x = va_arg(v,int); unsigned char b[4]; b[0]=STBIW_UCHAR(x); b[1]=STBIW_UCHAR(x>>8); b[2]=STBIW_UCHAR(x>>16); b[3]=STBIW_UCHAR(x>>24); s->func(s->context,b,4); break; } default: STBIW_ASSERT(0); return; } } } static void stbiw__writef(stbi__write_context *s, const char *fmt, ...) { va_list v; va_start(v, fmt); stbiw__writefv(s, fmt, v); va_end(v); } static void stbiw__write_flush(stbi__write_context *s) { if (s->buf_used) { s->func(s->context, &s->buffer, s->buf_used); s->buf_used = 0; } } static void stbiw__putc(stbi__write_context *s, unsigned char c) { s->func(s->context, &c, 1); } static void stbiw__write1(stbi__write_context *s, unsigned char a) { if ((size_t)s->buf_used + 1 > sizeof(s->buffer)) stbiw__write_flush(s); s->buffer[s->buf_used++] = a; } static void stbiw__write3(stbi__write_context *s, unsigned char a, unsigned char b, unsigned char c) { int n; if ((size_t)s->buf_used + 3 > sizeof(s->buffer)) stbiw__write_flush(s); n = s->buf_used; s->buf_used = n+3; s->buffer[n+0] = a; s->buffer[n+1] = b; s->buffer[n+2] = c; } static void stbiw__write_pixel(stbi__write_context *s, int rgb_dir, int comp, int write_alpha, int expand_mono, unsigned char *d) { unsigned char bg[3] = { 255, 0, 255}, px[3]; int k; if (write_alpha < 0) stbiw__write1(s, d[comp - 1]); switch (comp) { case 2: // 2 pixels = mono + alpha, alpha is written separately, so same as 1-channel case case 1: if (expand_mono) stbiw__write3(s, d[0], d[0], d[0]); // monochrome bmp else stbiw__write1(s, d[0]); // monochrome TGA break; case 4: if (!write_alpha) { // composite against pink background for (k = 0; k < 3; ++k) px[k] = bg[k] + ((d[k] - bg[k]) * d[3]) / 255; stbiw__write3(s, px[1 - rgb_dir], px[1], px[1 + rgb_dir]); break; } /* FALLTHROUGH */ case 3: stbiw__write3(s, d[1 - rgb_dir], d[1], d[1 + rgb_dir]); break; } if (write_alpha > 0) stbiw__write1(s, d[comp - 1]); } static void stbiw__write_pixels(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad, int expand_mono) { stbiw_uint32 zero = 0; int i,j, j_end; if (y <= 0) return; if (stbi__flip_vertically_on_write) vdir *= -1; if (vdir < 0) { j_end = -1; j = y-1; } else { j_end = y; j = 0; } for (; j != j_end; j += vdir) { for (i=0; i < x; ++i) { unsigned char *d = (unsigned char *) data + (j*x+i)*comp; stbiw__write_pixel(s, rgb_dir, comp, write_alpha, expand_mono, d); } stbiw__write_flush(s); s->func(s->context, &zero, scanline_pad); } } static int stbiw__outfile(stbi__write_context *s, int rgb_dir, int vdir, int x, int y, int comp, int expand_mono, void *data, int alpha, int pad, const char *fmt, ...) { if (y < 0 || x < 0) { return 0; } else { va_list v; va_start(v, fmt); stbiw__writefv(s, fmt, v); va_end(v); stbiw__write_pixels(s,rgb_dir,vdir,x,y,comp,data,alpha,pad, expand_mono); return 1; } } static int stbi_write_bmp_core(stbi__write_context *s, int x, int y, int comp, const void *data) { if (comp != 4) { // write RGB bitmap int pad = (-x*3) & 3; return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *) data,0,pad, "11 4 22 4" "4 44 22 444444", 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header } else { // RGBA bitmaps need a v4 header // use BI_BITFIELDS mode with 32bpp and alpha mask // (straight BI_RGB with alpha mask doesn't work in most readers) return stbiw__outfile(s,-1,-1,x,y,comp,1,(void *)data,1,0, "11 4 22 4" "4 44 22 444444 4444 4 444 444 444 444", 'B', 'M', 14+108+x*y*4, 0, 0, 14+108, // file header 108, x,y, 1,32, 3,0,0,0,0,0, 0xff0000,0xff00,0xff,0xff000000u, 0, 0,0,0, 0,0,0, 0,0,0, 0,0,0); // bitmap V4 header } } STBIWDEF int stbi_write_bmp_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) { stbi__write_context s = { 0 }; stbi__start_write_callbacks(&s, func, context); return stbi_write_bmp_core(&s, x, y, comp, data); } #ifndef STBI_WRITE_NO_STDIO STBIWDEF int stbi_write_bmp(char const *filename, int x, int y, int comp, const void *data) { stbi__write_context s = { 0 }; if (stbi__start_write_file(&s,filename)) { int r = stbi_write_bmp_core(&s, x, y, comp, data); stbi__end_write_file(&s); return r; } else return 0; } #endif //!STBI_WRITE_NO_STDIO static int stbi_write_tga_core(stbi__write_context *s, int x, int y, int comp, void *data) { int has_alpha = (comp == 2 || comp == 4); int colorbytes = has_alpha ? comp-1 : comp; int format = colorbytes < 2 ? 3 : 2; // 3 color channels (RGB/RGBA) = 2, 1 color channel (Y/YA) = 3 if (y < 0 || x < 0) return 0; if (!stbi_write_tga_with_rle) { return stbiw__outfile(s, -1, -1, x, y, comp, 0, (void *) data, has_alpha, 0, "111 221 2222 11", 0, 0, format, 0, 0, 0, 0, 0, x, y, (colorbytes + has_alpha) * 8, has_alpha * 8); } else { int i,j,k; int jend, jdir; stbiw__writef(s, "111 221 2222 11", 0,0,format+8, 0,0,0, 0,0,x,y, (colorbytes + has_alpha) * 8, has_alpha * 8); if (stbi__flip_vertically_on_write) { j = 0; jend = y; jdir = 1; } else { j = y-1; jend = -1; jdir = -1; } for (; j != jend; j += jdir) { unsigned char *row = (unsigned char *) data + j * x * comp; int len; for (i = 0; i < x; i += len) { unsigned char *begin = row + i * comp; int diff = 1; len = 1; if (i < x - 1) { ++len; diff = memcmp(begin, row + (i + 1) * comp, comp); if (diff) { const unsigned char *prev = begin; for (k = i + 2; k < x && len < 128; ++k) { if (memcmp(prev, row + k * comp, comp)) { prev += comp; ++len; } else { --len; break; } } } else { for (k = i + 2; k < x && len < 128; ++k) { if (!memcmp(begin, row + k * comp, comp)) { ++len; } else { break; } } } } if (diff) { unsigned char header = STBIW_UCHAR(len - 1); stbiw__write1(s, header); for (k = 0; k < len; ++k) { stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin + k * comp); } } else { unsigned char header = STBIW_UCHAR(len - 129); stbiw__write1(s, header); stbiw__write_pixel(s, -1, comp, has_alpha, 0, begin); } } } stbiw__write_flush(s); } return 1; } STBIWDEF int stbi_write_tga_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data) { stbi__write_context s = { 0 }; stbi__start_write_callbacks(&s, func, context); return stbi_write_tga_core(&s, x, y, comp, (void *) data); } #ifndef STBI_WRITE_NO_STDIO STBIWDEF int stbi_write_tga(char const *filename, int x, int y, int comp, const void *data) { stbi__write_context s = { 0 }; if (stbi__start_write_file(&s,filename)) { int r = stbi_write_tga_core(&s, x, y, comp, (void *) data); stbi__end_write_file(&s); return r; } else return 0; } #endif // ************************************************************************************************* // Radiance RGBE HDR writer // by Baldur Karlsson #define stbiw__max(a, b) ((a) > (b) ? (a) : (b)) #ifndef STBI_WRITE_NO_STDIO static void stbiw__linear_to_rgbe(unsigned char *rgbe, float *linear) { int exponent; float maxcomp = stbiw__max(linear[0], stbiw__max(linear[1], linear[2])); if (maxcomp < 1e-32f) { rgbe[0] = rgbe[1] = rgbe[2] = rgbe[3] = 0; } else { float normalize = (float) frexp(maxcomp, &exponent) * 256.0f/maxcomp; rgbe[0] = (unsigned char)(linear[0] * normalize); rgbe[1] = (unsigned char)(linear[1] * normalize); rgbe[2] = (unsigned char)(linear[2] * normalize); rgbe[3] = (unsigned char)(exponent + 128); } } static void stbiw__write_run_data(stbi__write_context *s, int length, unsigned char databyte) { unsigned char lengthbyte = STBIW_UCHAR(length+128); STBIW_ASSERT(length+128 <= 255); s->func(s->context, &lengthbyte, 1); s->func(s->context, &databyte, 1); } static void stbiw__write_dump_data(stbi__write_context *s, int length, unsigned char *data) { unsigned char lengthbyte = STBIW_UCHAR(length); STBIW_ASSERT(length <= 128); // inconsistent with spec but consistent with official code s->func(s->context, &lengthbyte, 1); s->func(s->context, data, length); } static void stbiw__write_hdr_scanline(stbi__write_context *s, int width, int ncomp, unsigned char *scratch, float *scanline) { unsigned char scanlineheader[4] = { 2, 2, 0, 0 }; unsigned char rgbe[4]; float linear[3]; int x; scanlineheader[2] = (width&0xff00)>>8; scanlineheader[3] = (width&0x00ff); /* skip RLE for images too small or large */ if (width < 8 || width >= 32768) { for (x=0; x < width; x++) { switch (ncomp) { case 4: /* fallthrough */ case 3: linear[2] = scanline[x*ncomp + 2]; linear[1] = scanline[x*ncomp + 1]; linear[0] = scanline[x*ncomp + 0]; break; default: linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; break; } stbiw__linear_to_rgbe(rgbe, linear); s->func(s->context, rgbe, 4); } } else { int c,r; /* encode into scratch buffer */ for (x=0; x < width; x++) { switch(ncomp) { case 4: /* fallthrough */ case 3: linear[2] = scanline[x*ncomp + 2]; linear[1] = scanline[x*ncomp + 1]; linear[0] = scanline[x*ncomp + 0]; break; default: linear[0] = linear[1] = linear[2] = scanline[x*ncomp + 0]; break; } stbiw__linear_to_rgbe(rgbe, linear); scratch[x + width*0] = rgbe[0]; scratch[x + width*1] = rgbe[1]; scratch[x + width*2] = rgbe[2]; scratch[x + width*3] = rgbe[3]; } s->func(s->context, scanlineheader, 4); /* RLE each component separately */ for (c=0; c < 4; c++) { unsigned char *comp = &scratch[width*c]; x = 0; while (x < width) { // find first run r = x; while (r+2 < width) { if (comp[r] == comp[r+1] && comp[r] == comp[r+2]) break; ++r; } if (r+2 >= width) r = width; // dump up to first run while (x < r) { int len = r-x; if (len > 128) len = 128; stbiw__write_dump_data(s, len, &comp[x]); x += len; } // if there's a run, output it if (r+2 < width) { // same test as what we break out of in search loop, so only true if we break'd // find next byte after run while (r < width && comp[r] == comp[x]) ++r; // output run up to r while (x < r) { int len = r-x; if (len > 127) len = 127; stbiw__write_run_data(s, len, comp[x]); x += len; } } } } } } static int stbi_write_hdr_core(stbi__write_context *s, int x, int y, int comp, float *data) { if (y <= 0 || x <= 0 || data == NULL) return 0; else { // Each component is stored separately. Allocate scratch space for full output scanline. unsigned char *scratch = (unsigned char *) STBIW_MALLOC(x*4); int i, len; char buffer[128]; char header[] = "#?RADIANCE\n# Written by stb_image_write.h\nFORMAT=32-bit_rle_rgbe\n"; s->func(s->context, header, sizeof(header)-1); #ifdef __STDC_LIB_EXT1__ len = sprintf_s(buffer, sizeof(buffer), "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); #else len = snprintf(buffer, sizeof(buffer), "EXPOSURE= 1.0000000000000\n\n-Y %d +X %d\n", y, x); #endif s->func(s->context, buffer, len); for(i=0; i < y; i++) stbiw__write_hdr_scanline(s, x, comp, scratch, data + comp*x*(stbi__flip_vertically_on_write ? y-1-i : i)); STBIW_FREE(scratch); return 1; } } STBIWDEF int stbi_write_hdr_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const float *data) { stbi__write_context s = { 0 }; stbi__start_write_callbacks(&s, func, context); return stbi_write_hdr_core(&s, x, y, comp, (float *) data); } STBIWDEF int stbi_write_hdr(char const *filename, int x, int y, int comp, const float *data) { stbi__write_context s = { 0 }; if (stbi__start_write_file(&s,filename)) { int r = stbi_write_hdr_core(&s, x, y, comp, (float *) data); stbi__end_write_file(&s); return r; } else return 0; } #endif // STBI_WRITE_NO_STDIO ////////////////////////////////////////////////////////////////////////////// // // PNG writer // #ifndef STBIW_ZLIB_COMPRESS // stretchy buffer; stbiw__sbpush() == vector<>::push_back() -- stbiw__sbcount() == vector<>::size() #define stbiw__sbraw(a) ((int *) (void *) (a) - 2) #define stbiw__sbm(a) stbiw__sbraw(a)[0] #define stbiw__sbn(a) stbiw__sbraw(a)[1] #define stbiw__sbneedgrow(a,n) ((a)==0 || stbiw__sbn(a)+n >= stbiw__sbm(a)) #define stbiw__sbmaybegrow(a,n) (stbiw__sbneedgrow(a,(n)) ? stbiw__sbgrow(a,n) : 0) #define stbiw__sbgrow(a,n) stbiw__sbgrowf((void **) &(a), (n), sizeof(*(a))) #define stbiw__sbpush(a, v) (stbiw__sbmaybegrow(a,1), (a)[stbiw__sbn(a)++] = (v)) #define stbiw__sbcount(a) ((a) ? stbiw__sbn(a) : 0) #define stbiw__sbfree(a) ((a) ? STBIW_FREE(stbiw__sbraw(a)),0 : 0) static void *stbiw__sbgrowf(void **arr, int increment, int itemsize) { int m = *arr ? 2*stbiw__sbm(*arr)+increment : increment+1; void *p = STBIW_REALLOC_SIZED(*arr ? stbiw__sbraw(*arr) : 0, *arr ? (stbiw__sbm(*arr)*itemsize + sizeof(int)*2) : 0, (size_t)itemsize * (size_t)m + sizeof(int)*2); STBIW_ASSERT(p); if (p) { if (!*arr) ((int *) p)[1] = 0; *arr = (void *) ((int *) p + 2); stbiw__sbm(*arr) = m; } return *arr; } static unsigned char *stbiw__zlib_flushf(unsigned char *data, unsigned int *bitbuffer, int *bitcount) { while (*bitcount >= 8) { stbiw__sbpush(data, STBIW_UCHAR(*bitbuffer)); *bitbuffer >>= 8; *bitcount -= 8; } return data; } static int stbiw__zlib_bitrev(int code, int codebits) { int res=0; while (codebits--) { res = (res << 1) | (code & 1); code >>= 1; } return res; } static unsigned int stbiw__zlib_countm(unsigned char *a, unsigned char *b, int limit) { int i; for (i=0; i < limit && i < 258; ++i) if (a[i] != b[i]) break; return i; } static unsigned int stbiw__zhash(unsigned char *data) { stbiw_uint32 hash = data[0] + (data[1] << 8) + (data[2] << 16); hash ^= hash << 3; hash += hash >> 5; hash ^= hash << 4; hash += hash >> 17; hash ^= hash << 25; hash += hash >> 6; return hash; } #define stbiw__zlib_flush() (out = stbiw__zlib_flushf(out, &bitbuf, &bitcount)) #define stbiw__zlib_add(code,codebits) \ (bitbuf |= (code) << bitcount, bitcount += (codebits), stbiw__zlib_flush()) #define stbiw__zlib_huffa(b,c) stbiw__zlib_add(stbiw__zlib_bitrev(b,c),c) // default huffman tables #define stbiw__zlib_huff1(n) stbiw__zlib_huffa(0x30 + (n), 8) #define stbiw__zlib_huff2(n) stbiw__zlib_huffa(0x190 + (n)-144, 9) #define stbiw__zlib_huff3(n) stbiw__zlib_huffa(0 + (n)-256,7) #define stbiw__zlib_huff4(n) stbiw__zlib_huffa(0xc0 + (n)-280,8) #define stbiw__zlib_huff(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : (n) <= 255 ? stbiw__zlib_huff2(n) : (n) <= 279 ? stbiw__zlib_huff3(n) : stbiw__zlib_huff4(n)) #define stbiw__zlib_huffb(n) ((n) <= 143 ? stbiw__zlib_huff1(n) : stbiw__zlib_huff2(n)) #define stbiw__ZHASH 16384 #endif // STBIW_ZLIB_COMPRESS STBIWDEF unsigned char * stbi_zlib_compress(unsigned char *data, int data_len, int *out_len, int quality) { #ifdef STBIW_ZLIB_COMPRESS // user provided a zlib compress implementation, use that return STBIW_ZLIB_COMPRESS(data, data_len, out_len, quality); #else // use builtin static unsigned short lengthc[] = { 3,4,5,6,7,8,9,10,11,13,15,17,19,23,27,31,35,43,51,59,67,83,99,115,131,163,195,227,258, 259 }; static unsigned char lengtheb[]= { 0,0,0,0,0,0,0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; static unsigned short distc[] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577, 32768 }; static unsigned char disteb[] = { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13 }; unsigned int bitbuf=0; int i,j, bitcount=0; unsigned char *out = NULL; unsigned char ***hash_table = (unsigned char***) STBIW_MALLOC(stbiw__ZHASH * sizeof(unsigned char**)); if (hash_table == NULL) return NULL; if (quality < 5) quality = 5; stbiw__sbpush(out, 0x78); // DEFLATE 32K window stbiw__sbpush(out, 0x5e); // FLEVEL = 1 stbiw__zlib_add(1,1); // BFINAL = 1 stbiw__zlib_add(1,2); // BTYPE = 1 -- fixed huffman for (i=0; i < stbiw__ZHASH; ++i) hash_table[i] = NULL; i=0; while (i < data_len-3) { // hash next 3 bytes of data to be compressed int h = stbiw__zhash(data+i)&(stbiw__ZHASH-1), best=3; unsigned char *bestloc = 0; unsigned char **hlist = hash_table[h]; int n = stbiw__sbcount(hlist); for (j=0; j < n; ++j) { if (hlist[j]-data > i-32768) { // if entry lies within window int d = stbiw__zlib_countm(hlist[j], data+i, data_len-i); if (d >= best) { best=d; bestloc=hlist[j]; } } } // when hash table entry is too long, delete half the entries if (hash_table[h] && stbiw__sbn(hash_table[h]) == 2*quality) { STBIW_MEMMOVE(hash_table[h], hash_table[h]+quality, sizeof(hash_table[h][0])*quality); stbiw__sbn(hash_table[h]) = quality; } stbiw__sbpush(hash_table[h],data+i); if (bestloc) { // "lazy matching" - check match at *next* byte, and if it's better, do cur byte as literal h = stbiw__zhash(data+i+1)&(stbiw__ZHASH-1); hlist = hash_table[h]; n = stbiw__sbcount(hlist); for (j=0; j < n; ++j) { if (hlist[j]-data > i-32767) { int e = stbiw__zlib_countm(hlist[j], data+i+1, data_len-i-1); if (e > best) { // if next match is better, bail on current match bestloc = NULL; break; } } } } if (bestloc) { int d = (int) (data+i - bestloc); // distance back STBIW_ASSERT(d <= 32767 && best <= 258); for (j=0; best > lengthc[j+1]-1; ++j); stbiw__zlib_huff(j+257); if (lengtheb[j]) stbiw__zlib_add(best - lengthc[j], lengtheb[j]); for (j=0; d > distc[j+1]-1; ++j); stbiw__zlib_add(stbiw__zlib_bitrev(j,5),5); if (disteb[j]) stbiw__zlib_add(d - distc[j], disteb[j]); i += best; } else { stbiw__zlib_huffb(data[i]); ++i; } } // write out final bytes for (;i < data_len; ++i) stbiw__zlib_huffb(data[i]); stbiw__zlib_huff(256); // end of block // pad with 0 bits to byte boundary while (bitcount) stbiw__zlib_add(0,1); for (i=0; i < stbiw__ZHASH; ++i) (void) stbiw__sbfree(hash_table[i]); STBIW_FREE(hash_table); // store uncompressed instead if compression was worse if (stbiw__sbn(out) > data_len + 2 + ((data_len+32766)/32767)*5) { stbiw__sbn(out) = 2; // truncate to DEFLATE 32K window and FLEVEL = 1 for (j = 0; j < data_len;) { int blocklen = data_len - j; if (blocklen > 32767) blocklen = 32767; stbiw__sbpush(out, data_len - j == blocklen); // BFINAL = ?, BTYPE = 0 -- no compression stbiw__sbpush(out, STBIW_UCHAR(blocklen)); // LEN stbiw__sbpush(out, STBIW_UCHAR(blocklen >> 8)); stbiw__sbpush(out, STBIW_UCHAR(~blocklen)); // NLEN stbiw__sbpush(out, STBIW_UCHAR(~blocklen >> 8)); memcpy(out+stbiw__sbn(out), data+j, blocklen); stbiw__sbn(out) += blocklen; j += blocklen; } } { // compute adler32 on input unsigned int s1=1, s2=0; int blocklen = (int) (data_len % 5552); j=0; while (j < data_len) { for (i=0; i < blocklen; ++i) { s1 += data[j+i]; s2 += s1; } s1 %= 65521; s2 %= 65521; j += blocklen; blocklen = 5552; } stbiw__sbpush(out, STBIW_UCHAR(s2 >> 8)); stbiw__sbpush(out, STBIW_UCHAR(s2)); stbiw__sbpush(out, STBIW_UCHAR(s1 >> 8)); stbiw__sbpush(out, STBIW_UCHAR(s1)); } *out_len = stbiw__sbn(out); // make returned pointer freeable STBIW_MEMMOVE(stbiw__sbraw(out), out, *out_len); return (unsigned char *) stbiw__sbraw(out); #endif // STBIW_ZLIB_COMPRESS } static unsigned int stbiw__crc32(unsigned char *buffer, int len) { #ifdef STBIW_CRC32 return STBIW_CRC32(buffer, len); #else static unsigned int crc_table[256] = { 0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F, 0xE963A535, 0x9E6495A3, 0x0eDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, 0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9, 0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59, 0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924, 0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433, 0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950, 0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65, 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, 0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F, 0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, 0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB, 0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B, 0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236, 0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D, 0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242, 0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777, 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9, 0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693, 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D }; unsigned int crc = ~0u; int i; for (i=0; i < len; ++i) crc = (crc >> 8) ^ crc_table[buffer[i] ^ (crc & 0xff)]; return ~crc; #endif } #define stbiw__wpng4(o,a,b,c,d) ((o)[0]=STBIW_UCHAR(a),(o)[1]=STBIW_UCHAR(b),(o)[2]=STBIW_UCHAR(c),(o)[3]=STBIW_UCHAR(d),(o)+=4) #define stbiw__wp32(data,v) stbiw__wpng4(data, (v)>>24,(v)>>16,(v)>>8,(v)); #define stbiw__wptag(data,s) stbiw__wpng4(data, s[0],s[1],s[2],s[3]) static void stbiw__wpcrc(unsigned char **data, int len) { unsigned int crc = stbiw__crc32(*data - len - 4, len+4); stbiw__wp32(*data, crc); } static unsigned char stbiw__paeth(int a, int b, int c) { int p = a + b - c, pa = abs(p-a), pb = abs(p-b), pc = abs(p-c); if (pa <= pb && pa <= pc) return STBIW_UCHAR(a); if (pb <= pc) return STBIW_UCHAR(b); return STBIW_UCHAR(c); } // @OPTIMIZE: provide an option that always forces left-predict or paeth predict static void stbiw__encode_png_line(unsigned char *pixels, int stride_bytes, int width, int height, int y, int n, int filter_type, signed char *line_buffer) { static int mapping[] = { 0,1,2,3,4 }; static int firstmap[] = { 0,1,0,5,6 }; int *mymap = (y != 0) ? mapping : firstmap; int i; int type = mymap[filter_type]; unsigned char *z = pixels + stride_bytes * (stbi__flip_vertically_on_write ? height-1-y : y); int signed_stride = stbi__flip_vertically_on_write ? -stride_bytes : stride_bytes; if (type==0) { memcpy(line_buffer, z, (size_t)width * (size_t)n); return; } // first loop isn't optimized since it's just one pixel for (i = 0; i < n; ++i) { switch (type) { case 1: line_buffer[i] = z[i]; break; case 2: line_buffer[i] = z[i] - z[i-signed_stride]; break; case 3: line_buffer[i] = z[i] - (z[i-signed_stride]>>1); break; case 4: line_buffer[i] = (signed char) (z[i] - stbiw__paeth(0,z[i-signed_stride],0)); break; case 5: line_buffer[i] = z[i]; break; case 6: line_buffer[i] = z[i]; break; } } switch (type) { case 1: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-n]; break; case 2: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - z[i-signed_stride]; break; case 3: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - ((z[i-n] + z[i-signed_stride])>>1); break; case 4: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], z[i-signed_stride], z[i-signed_stride-n]); break; case 5: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - (z[i-n]>>1); break; case 6: for (i=n; i < width*n; ++i) line_buffer[i] = z[i] - stbiw__paeth(z[i-n], 0,0); break; } } STBIWDEF unsigned char *stbi_write_png_to_mem(const unsigned char *pixels, int stride_bytes, int x, int y, int n, int *out_len) { int force_filter = stbi_write_force_png_filter; int ctype[5] = { -1, 0, 4, 2, 6 }; unsigned char sig[8] = { 137,80,78,71,13,10,26,10 }; unsigned char *out,*o, *filt, *zlib; signed char *line_buffer; int j,zlen; if (stride_bytes == 0) stride_bytes = x * n; if (force_filter >= 5) { force_filter = -1; } filt = (unsigned char *) STBIW_MALLOC(((size_t)x * (size_t)n + 1) * (size_t)y); if (!filt) return 0; line_buffer = (signed char *) STBIW_MALLOC((size_t)x * (size_t)n); if (!line_buffer) { STBIW_FREE(filt); return 0; } for (j=0; j < y; ++j) { int filter_type; if (force_filter > -1) { filter_type = force_filter; stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, force_filter, line_buffer); } else { // Estimate the best filter by running through all of them: int best_filter = 0, best_filter_val = 0x7fffffff, est, i; for (filter_type = 0; filter_type < 5; filter_type++) { stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, filter_type, line_buffer); // Estimate the entropy of the line using this filter; the less, the better. est = 0; for (i = 0; i < x*n; ++i) { est += abs((signed char) line_buffer[i]); } if (est < best_filter_val) { best_filter_val = est; best_filter = filter_type; } } if (filter_type != best_filter) { // If the last iteration already got us the best filter, don't redo it stbiw__encode_png_line((unsigned char*)(pixels), stride_bytes, x, y, j, n, best_filter, line_buffer); filter_type = best_filter; } } // when we get here, filter_type contains the filter type, and line_buffer contains the data filt[(size_t)j * ((size_t)x * (size_t)n + 1)] = (unsigned char) filter_type; STBIW_MEMMOVE(filt + (size_t)j * ((size_t)x * (size_t)n + 1) + 1, line_buffer, (size_t)x * (size_t)n); } STBIW_FREE(line_buffer); zlib = stbi_zlib_compress(filt, y*( x*n+1), &zlen, stbi_write_png_compression_level); STBIW_FREE(filt); if (!zlib) return 0; // each tag requires 12 bytes of overhead out = (unsigned char *) STBIW_MALLOC(8 + 12+13 + 12+zlen + 12); if (!out) return 0; *out_len = 8 + 12+13 + 12+zlen + 12; o=out; STBIW_MEMMOVE(o,sig,8); o+= 8; stbiw__wp32(o, 13); // header length stbiw__wptag(o, "IHDR"); stbiw__wp32(o, x); stbiw__wp32(o, y); *o++ = 8; *o++ = STBIW_UCHAR(ctype[n]); *o++ = 0; *o++ = 0; *o++ = 0; stbiw__wpcrc(&o,13); stbiw__wp32(o, zlen); stbiw__wptag(o, "IDAT"); STBIW_MEMMOVE(o, zlib, zlen); o += zlen; STBIW_FREE(zlib); stbiw__wpcrc(&o, zlen); stbiw__wp32(o,0); stbiw__wptag(o, "IEND"); stbiw__wpcrc(&o,0); STBIW_ASSERT(o == out + *out_len); return out; } #ifndef STBI_WRITE_NO_STDIO STBIWDEF int stbi_write_png(char const *filename, int x, int y, int comp, const void *data, int stride_bytes) { FILE *f; int len; unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len); if (png == NULL) return 0; f = stbiw__fopen(filename, "wb"); if (!f) { STBIW_FREE(png); return 0; } fwrite(png, 1, len, f); fclose(f); STBIW_FREE(png); return 1; } #endif STBIWDEF int stbi_write_png_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int stride_bytes) { int len; unsigned char *png = stbi_write_png_to_mem((const unsigned char *) data, stride_bytes, x, y, comp, &len); if (png == NULL) return 0; func(context, png, len); STBIW_FREE(png); return 1; } /* *************************************************************************** * * JPEG writer * * This is based on Jon Olick's jo_jpeg.cpp: * public domain Simple, Minimalistic JPEG writer - http://www.jonolick.com/code.html */ static const unsigned char stbiw__jpg_ZigZag[] = { 0,1,5,6,14,15,27,28,2,4,7,13,16,26,29,42,3,8,12,17,25,30,41,43,9,11,18, 24,31,40,44,53,10,19,23,32,39,45,52,54,20,22,33,38,46,51,55,60,21,34,37,47,50,56,59,61,35,36,48,49,57,58,62,63 }; static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitCntP, const unsigned short *bs) { int bitBuf = *bitBufP, bitCnt = *bitCntP; bitCnt += bs[1]; bitBuf |= bs[0] << (24 - bitCnt); while(bitCnt >= 8) { unsigned char c = (bitBuf >> 16) & 255; stbiw__putc(s, c); if(c == 255) { stbiw__putc(s, 0); } bitBuf <<= 8; bitCnt -= 8; } *bitBufP = bitBuf; *bitCntP = bitCnt; } static void stbiw__jpg_DCT(float *d0p, float *d1p, float *d2p, float *d3p, float *d4p, float *d5p, float *d6p, float *d7p) { float d0 = *d0p, d1 = *d1p, d2 = *d2p, d3 = *d3p, d4 = *d4p, d5 = *d5p, d6 = *d6p, d7 = *d7p; float z1, z2, z3, z4, z5, z11, z13; float tmp0 = d0 + d7; float tmp7 = d0 - d7; float tmp1 = d1 + d6; float tmp6 = d1 - d6; float tmp2 = d2 + d5; float tmp5 = d2 - d5; float tmp3 = d3 + d4; float tmp4 = d3 - d4; // Even part float tmp10 = tmp0 + tmp3; // phase 2 float tmp13 = tmp0 - tmp3; float tmp11 = tmp1 + tmp2; float tmp12 = tmp1 - tmp2; d0 = tmp10 + tmp11; // phase 3 d4 = tmp10 - tmp11; z1 = (tmp12 + tmp13) * 0.707106781f; // c4 d2 = tmp13 + z1; // phase 5 d6 = tmp13 - z1; // Odd part tmp10 = tmp4 + tmp5; // phase 2 tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; // The rotator is modified from fig 4-8 to avoid extra negations. z5 = (tmp10 - tmp12) * 0.382683433f; // c6 z2 = tmp10 * 0.541196100f + z5; // c2-c6 z4 = tmp12 * 1.306562965f + z5; // c2+c6 z3 = tmp11 * 0.707106781f; // c4 z11 = tmp7 + z3; // phase 5 z13 = tmp7 - z3; *d5p = z13 + z2; // phase 6 *d3p = z13 - z2; *d1p = z11 + z4; *d7p = z11 - z4; *d0p = d0; *d2p = d2; *d4p = d4; *d6p = d6; } static void stbiw__jpg_calcBits(int val, unsigned short bits[2]) { int tmp1 = val < 0 ? -val : val; val = val < 0 ? val-1 : val; bits[1] = 1; while(tmp1 >>= 1) { ++bits[1]; } bits[0] = val & ((1<0)&&(DU[end0pos]==0); --end0pos) { } // end0pos = first element in reverse order !=0 if(end0pos == 0) { stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); return DU[0]; } for(i = 1; i <= end0pos; ++i) { int startpos = i; int nrzeroes; unsigned short bits[2]; for (; DU[i]==0 && i<=end0pos; ++i) { } nrzeroes = i-startpos; if ( nrzeroes >= 16 ) { int lng = nrzeroes>>4; int nrmarker; for (nrmarker=1; nrmarker <= lng; ++nrmarker) stbiw__jpg_writeBits(s, bitBuf, bitCnt, M16zeroes); nrzeroes &= 15; } stbiw__jpg_calcBits(DU[i], bits); stbiw__jpg_writeBits(s, bitBuf, bitCnt, HTAC[(nrzeroes<<4)+bits[1]]); stbiw__jpg_writeBits(s, bitBuf, bitCnt, bits); } if(end0pos != 63) { stbiw__jpg_writeBits(s, bitBuf, bitCnt, EOB); } return DU[0]; } static int stbi_write_jpg_core(stbi__write_context *s, int width, int height, int comp, const void* data, int quality) { // Constants that don't pollute global namespace static const unsigned char std_dc_luminance_nrcodes[] = {0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0}; static const unsigned char std_dc_luminance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; static const unsigned char std_ac_luminance_nrcodes[] = {0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d}; static const unsigned char std_ac_luminance_values[] = { 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08, 0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28, 0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59, 0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89, 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6, 0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2, 0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa }; static const unsigned char std_dc_chrominance_nrcodes[] = {0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0}; static const unsigned char std_dc_chrominance_values[] = {0,1,2,3,4,5,6,7,8,9,10,11}; static const unsigned char std_ac_chrominance_nrcodes[] = {0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77}; static const unsigned char std_ac_chrominance_values[] = { 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91, 0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26, 0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58, 0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87, 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4, 0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda, 0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,0xf9,0xfa }; // Huffman tables static const unsigned short YDC_HT[256][2] = { {0,2},{2,3},{3,3},{4,3},{5,3},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9}}; static const unsigned short UVDC_HT[256][2] = { {0,2},{1,2},{2,2},{6,3},{14,4},{30,5},{62,6},{126,7},{254,8},{510,9},{1022,10},{2046,11}}; static const unsigned short YAC_HT[256][2] = { {10,4},{0,2},{1,2},{4,3},{11,4},{26,5},{120,7},{248,8},{1014,10},{65410,16},{65411,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {12,4},{27,5},{121,7},{502,9},{2038,11},{65412,16},{65413,16},{65414,16},{65415,16},{65416,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {28,5},{249,8},{1015,10},{4084,12},{65417,16},{65418,16},{65419,16},{65420,16},{65421,16},{65422,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {58,6},{503,9},{4085,12},{65423,16},{65424,16},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {59,6},{1016,10},{65430,16},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {122,7},{2039,11},{65438,16},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {123,7},{4086,12},{65446,16},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {250,8},{4087,12},{65454,16},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {504,9},{32704,15},{65462,16},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {505,9},{65470,16},{65471,16},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {506,9},{65479,16},{65480,16},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {1017,10},{65488,16},{65489,16},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {1018,10},{65497,16},{65498,16},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {2040,11},{65506,16},{65507,16},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {65515,16},{65516,16},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{0,0},{0,0},{0,0},{0,0},{0,0}, {2041,11},{65525,16},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} }; static const unsigned short UVAC_HT[256][2] = { {0,2},{1,2},{4,3},{10,4},{24,5},{25,5},{56,6},{120,7},{500,9},{1014,10},{4084,12},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {11,4},{57,6},{246,8},{501,9},{2038,11},{4085,12},{65416,16},{65417,16},{65418,16},{65419,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {26,5},{247,8},{1015,10},{4086,12},{32706,15},{65420,16},{65421,16},{65422,16},{65423,16},{65424,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {27,5},{248,8},{1016,10},{4087,12},{65425,16},{65426,16},{65427,16},{65428,16},{65429,16},{65430,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {58,6},{502,9},{65431,16},{65432,16},{65433,16},{65434,16},{65435,16},{65436,16},{65437,16},{65438,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {59,6},{1017,10},{65439,16},{65440,16},{65441,16},{65442,16},{65443,16},{65444,16},{65445,16},{65446,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {121,7},{2039,11},{65447,16},{65448,16},{65449,16},{65450,16},{65451,16},{65452,16},{65453,16},{65454,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {122,7},{2040,11},{65455,16},{65456,16},{65457,16},{65458,16},{65459,16},{65460,16},{65461,16},{65462,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {249,8},{65463,16},{65464,16},{65465,16},{65466,16},{65467,16},{65468,16},{65469,16},{65470,16},{65471,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {503,9},{65472,16},{65473,16},{65474,16},{65475,16},{65476,16},{65477,16},{65478,16},{65479,16},{65480,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {504,9},{65481,16},{65482,16},{65483,16},{65484,16},{65485,16},{65486,16},{65487,16},{65488,16},{65489,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {505,9},{65490,16},{65491,16},{65492,16},{65493,16},{65494,16},{65495,16},{65496,16},{65497,16},{65498,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {506,9},{65499,16},{65500,16},{65501,16},{65502,16},{65503,16},{65504,16},{65505,16},{65506,16},{65507,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {2041,11},{65508,16},{65509,16},{65510,16},{65511,16},{65512,16},{65513,16},{65514,16},{65515,16},{65516,16},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, {16352,14},{65517,16},{65518,16},{65519,16},{65520,16},{65521,16},{65522,16},{65523,16},{65524,16},{65525,16},{0,0},{0,0},{0,0},{0,0},{0,0}, {1018,10},{32707,15},{65526,16},{65527,16},{65528,16},{65529,16},{65530,16},{65531,16},{65532,16},{65533,16},{65534,16},{0,0},{0,0},{0,0},{0,0},{0,0} }; static const int YQT[] = {16,11,10,16,24,40,51,61,12,12,14,19,26,58,60,55,14,13,16,24,40,57,69,56,14,17,22,29,51,87,80,62,18,22, 37,56,68,109,103,77,24,35,55,64,81,104,113,92,49,64,78,87,103,121,120,101,72,92,95,98,112,100,103,99}; static const int UVQT[] = {17,18,24,47,99,99,99,99,18,21,26,66,99,99,99,99,24,26,56,99,99,99,99,99,47,66,99,99,99,99,99,99, 99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99}; static const float aasf[] = { 1.0f * 2.828427125f, 1.387039845f * 2.828427125f, 1.306562965f * 2.828427125f, 1.175875602f * 2.828427125f, 1.0f * 2.828427125f, 0.785694958f * 2.828427125f, 0.541196100f * 2.828427125f, 0.275899379f * 2.828427125f }; int row, col, i, k, subsample; float fdtbl_Y[64], fdtbl_UV[64]; unsigned char YTable[64], UVTable[64]; if(!data || !width || !height || comp > 4 || comp < 1) { return 0; } quality = quality ? quality : 90; subsample = quality <= 90 ? 1 : 0; quality = quality < 1 ? 1 : quality > 100 ? 100 : quality; quality = quality < 50 ? 5000 / quality : 200 - quality * 2; for(i = 0; i < 64; ++i) { int uvti, yti = (YQT[i]*quality+50)/100; YTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (yti < 1 ? 1 : yti > 255 ? 255 : yti); uvti = (UVQT[i]*quality+50)/100; UVTable[stbiw__jpg_ZigZag[i]] = (unsigned char) (uvti < 1 ? 1 : uvti > 255 ? 255 : uvti); } for(row = 0, k = 0; row < 8; ++row) { for(col = 0; col < 8; ++col, ++k) { fdtbl_Y[k] = 1 / (YTable [stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); fdtbl_UV[k] = 1 / (UVTable[stbiw__jpg_ZigZag[k]] * aasf[row] * aasf[col]); } } // Write Headers { static const unsigned char head0[] = { 0xFF,0xD8,0xFF,0xE0,0,0x10,'J','F','I','F',0,1,1,0,0,1,0,1,0,0,0xFF,0xDB,0,0x84,0 }; static const unsigned char head2[] = { 0xFF,0xDA,0,0xC,3,1,0,2,0x11,3,0x11,0,0x3F,0 }; const unsigned char head1[] = { 0xFF,0xC0,0,0x11,8,(unsigned char)(height>>8),STBIW_UCHAR(height),(unsigned char)(width>>8),STBIW_UCHAR(width), 3,1,(unsigned char)(subsample?0x22:0x11),0,2,0x11,1,3,0x11,1,0xFF,0xC4,0x01,0xA2,0 }; s->func(s->context, (void*)head0, sizeof(head0)); s->func(s->context, (void*)YTable, sizeof(YTable)); stbiw__putc(s, 1); s->func(s->context, UVTable, sizeof(UVTable)); s->func(s->context, (void*)head1, sizeof(head1)); s->func(s->context, (void*)(std_dc_luminance_nrcodes+1), sizeof(std_dc_luminance_nrcodes)-1); s->func(s->context, (void*)std_dc_luminance_values, sizeof(std_dc_luminance_values)); stbiw__putc(s, 0x10); // HTYACinfo s->func(s->context, (void*)(std_ac_luminance_nrcodes+1), sizeof(std_ac_luminance_nrcodes)-1); s->func(s->context, (void*)std_ac_luminance_values, sizeof(std_ac_luminance_values)); stbiw__putc(s, 1); // HTUDCinfo s->func(s->context, (void*)(std_dc_chrominance_nrcodes+1), sizeof(std_dc_chrominance_nrcodes)-1); s->func(s->context, (void*)std_dc_chrominance_values, sizeof(std_dc_chrominance_values)); stbiw__putc(s, 0x11); // HTUACinfo s->func(s->context, (void*)(std_ac_chrominance_nrcodes+1), sizeof(std_ac_chrominance_nrcodes)-1); s->func(s->context, (void*)std_ac_chrominance_values, sizeof(std_ac_chrominance_values)); s->func(s->context, (void*)head2, sizeof(head2)); } // Encode 8x8 macroblocks { static const unsigned short fillBits[] = {0x7F, 7}; int DCY=0, DCU=0, DCV=0; int bitBuf=0, bitCnt=0; // comp == 2 is grey+alpha (alpha is ignored) int ofsG = comp > 2 ? 1 : 0, ofsB = comp > 2 ? 2 : 0; const unsigned char *dataR = (const unsigned char *)data; const unsigned char *dataG = dataR + ofsG; const unsigned char *dataB = dataR + ofsB; int x, y, pos; if(subsample) { for(y = 0; y < height; y += 16) { for(x = 0; x < width; x += 16) { float Y[256], U[256], V[256]; for(row = y, pos = 0; row < y+16; ++row) { // row >= height => use last input row int clamped_row = (row < height) ? row : height - 1; int base_p = (stbi__flip_vertically_on_write ? (height-1-clamped_row) : clamped_row)*width*comp; for(col = x; col < x+16; ++col, ++pos) { // if col >= width => use pixel from last input column int p = base_p + ((col < width) ? col : (width-1))*comp; float r = dataR[p], g = dataG[p], b = dataB[p]; Y[pos]= +0.29900f*r + 0.58700f*g + 0.11400f*b - 128; U[pos]= -0.16874f*r - 0.33126f*g + 0.50000f*b; V[pos]= +0.50000f*r - 0.41869f*g - 0.08131f*b; } } DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+0, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT); DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+8, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT); DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+128, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT); DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y+136, 16, fdtbl_Y, DCY, YDC_HT, YAC_HT); // subsample U,V { float subU[64], subV[64]; int yy, xx; for(yy = 0, pos = 0; yy < 8; ++yy) { for(xx = 0; xx < 8; ++xx, ++pos) { int j = yy*32+xx*2; subU[pos] = (U[j+0] + U[j+1] + U[j+16] + U[j+17]) * 0.25f; subV[pos] = (V[j+0] + V[j+1] + V[j+16] + V[j+17]) * 0.25f; } } DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subU, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, subV, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); } } } } else { for(y = 0; y < height; y += 8) { for(x = 0; x < width; x += 8) { float Y[64], U[64], V[64]; for(row = y, pos = 0; row < y+8; ++row) { // row >= height => use last input row int clamped_row = (row < height) ? row : height - 1; int base_p = (stbi__flip_vertically_on_write ? (height-1-clamped_row) : clamped_row)*width*comp; for(col = x; col < x+8; ++col, ++pos) { // if col >= width => use pixel from last input column int p = base_p + ((col < width) ? col : (width-1))*comp; float r = dataR[p], g = dataG[p], b = dataB[p]; Y[pos]= +0.29900f*r + 0.58700f*g + 0.11400f*b - 128; U[pos]= -0.16874f*r - 0.33126f*g + 0.50000f*b; V[pos]= +0.50000f*r - 0.41869f*g - 0.08131f*b; } } DCY = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, Y, 8, fdtbl_Y, DCY, YDC_HT, YAC_HT); DCU = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, U, 8, fdtbl_UV, DCU, UVDC_HT, UVAC_HT); DCV = stbiw__jpg_processDU(s, &bitBuf, &bitCnt, V, 8, fdtbl_UV, DCV, UVDC_HT, UVAC_HT); } } } // Do the bit alignment of the EOI marker stbiw__jpg_writeBits(s, &bitBuf, &bitCnt, fillBits); } // EOI stbiw__putc(s, 0xFF); stbiw__putc(s, 0xD9); return 1; } STBIWDEF int stbi_write_jpg_to_func(stbi_write_func *func, void *context, int x, int y, int comp, const void *data, int quality) { stbi__write_context s = { 0 }; stbi__start_write_callbacks(&s, func, context); return stbi_write_jpg_core(&s, x, y, comp, (void *) data, quality); } #ifndef STBI_WRITE_NO_STDIO STBIWDEF int stbi_write_jpg(char const *filename, int x, int y, int comp, const void *data, int quality) { stbi__write_context s = { 0 }; if (stbi__start_write_file(&s,filename)) { int r = stbi_write_jpg_core(&s, x, y, comp, data, quality); stbi__end_write_file(&s); return r; } else return 0; } #endif #endif // STB_IMAGE_WRITE_IMPLEMENTATION /* Revision history 1.16 (2021-07-11) make Deflate code emit uncompressed blocks when it would otherwise expand support writing BMPs with alpha channel 1.15 (2020-07-13) unknown 1.14 (2020-02-02) updated JPEG writer to downsample chroma channels 1.13 1.12 1.11 (2019-08-11) 1.10 (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs 1.09 (2018-02-11) fix typo in zlib quality API, improve STB_I_W_STATIC in C++ 1.08 (2018-01-29) add stbi__flip_vertically_on_write, external zlib, zlib quality, choose PNG filter 1.07 (2017-07-24) doc fix 1.06 (2017-07-23) writing JPEG (using Jon Olick's code) 1.05 ??? 1.04 (2017-03-03) monochrome BMP expansion 1.03 ??? 1.02 (2016-04-02) avoid allocating large structures on the stack 1.01 (2016-01-16) STBIW_REALLOC_SIZED: support allocators with no realloc support avoid race-condition in crc initialization minor compile issues 1.00 (2015-09-14) installable file IO function 0.99 (2015-09-13) warning fixes; TGA rle support 0.98 (2015-04-08) added STBIW_MALLOC, STBIW_ASSERT etc 0.97 (2015-01-18) fixed HDR asserts, rewrote HDR rle logic 0.96 (2015-01-17) add HDR output fix monochrome BMP 0.95 (2014-08-17) add monochrome TGA output 0.94 (2014-05-31) rename private functions to avoid conflicts with stb_image.h 0.93 (2014-05-27) warning fixes 0.92 (2010-08-01) casts to unsigned char to fix warnings 0.91 (2010-07-17) first public release 0.90 first internal release */ /* ------------------------------------------------------------------------------ This software is available under 2 licenses -- choose whichever you prefer. ------------------------------------------------------------------------------ ALTERNATIVE A - MIT License Copyright (c) 2017 Sean Barrett Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------ ALTERNATIVE B - Public Domain (www.unlicense.org) This is free and unencumbered software released into the public domain. Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------ */ DaemonEngine-crunch-ef4d32f/crunch/000077500000000000000000000000001503722002600173015ustar00rootroot00000000000000DaemonEngine-crunch-ef4d32f/crunch/CMakeLists.txt000066400000000000000000000012451503722002600220430ustar00rootroot00000000000000find_package (Threads) include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/../inc ${CMAKE_CURRENT_SOURCE_DIR}/../crnlib ) # Defines the source code for the library set(CRUNCH_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/corpus_gen.cpp ${CMAKE_CURRENT_SOURCE_DIR}/corpus_gen.h ${CMAKE_CURRENT_SOURCE_DIR}/corpus_test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/corpus_test.h ${CMAKE_CURRENT_SOURCE_DIR}/crunch.cpp ) add_executable(${CRUNCH_EXE_NAME} ${CRUNCH_SRCS}) target_link_libraries(${CRUNCH_EXE_NAME} ${CRUNCH_LIBRARY_NAME} ${CMAKE_THREAD_LIBS_INIT}) install(TARGETS ${CRUNCH_EXE_NAME} DESTINATION bin) set_property(TARGET ${CRUNCH_EXE_NAME} PROPERTY POSITION_INDEPENDENT_CODE 1) DaemonEngine-crunch-ef4d32f/crunch/corpus_gen.cpp000066400000000000000000000237201503722002600221550ustar00rootroot00000000000000// File: corpus_gen.cpp - Block compression corpus generator. // See Copyright Notice and license at the end of inc/crnlib.h // // Example command line: // -gentest [-deep] [-blockpercentage .035] [-width 4096] [-height 4096] -in c:\temp\*.jpg [-in c:\temp\*.jpeg] [-in @blah.txt] #include "crn_core.h" #include "corpus_gen.h" #include "crn_console.h" #include "crn_find_files.h" #include "crn_file_utils.h" #include "crn_command_line_params.h" #include "crn_dxt.h" #include "crn_cfile_stream.h" #include "crn_texture_conversion.h" #include "crn_radix_sort.h" #include "crn_defs.h" namespace crnlib { struct block { color_quad_u8 m_c[4 * 4]; inline operator size_t() const { return fast_hash(this, sizeof(*this)); } inline bool operator==(const block& rhs) const { return memcmp(this, &rhs, sizeof(*this)) == 0; } }; typedef crnlib::hash_map block_hash_map; corpus_gen::corpus_gen() { } void corpus_gen::sort_blocks(image_u8& img) { const uint num_blocks_x = img.get_width() / 4; const uint num_blocks_y = img.get_height() / 4; const uint total_blocks = num_blocks_x * num_blocks_y; console::printf("Sorting %u blocks...", total_blocks); crnlib::vector block_std_dev(total_blocks); for (uint by = 0; by < num_blocks_y; by++) { for (uint bx = 0; bx < num_blocks_x; bx++) { color_quad_u8 c[4 * 4]; for (uint y = 0; y < 4; y++) for (uint x = 0; x < 4; x++) c[x + y * 4] = img(bx * 4 + x, by * 4 + y); double std_dev = 0.0f; for (uint i = 0; i < 3; i++) std_dev += image_utils::compute_std_dev(16, c, i, 1); block_std_dev[bx + by * num_blocks_x] = (float)std_dev; } } crnlib::vector block_indices0(total_blocks); crnlib::vector block_indices1(total_blocks); const uint* pIndices = indirect_radix_sort(total_blocks, &block_indices0[0], &block_indices1[0], &block_std_dev[0], 0, sizeof(float), true); image_u8 new_img(img.get_width(), img.get_height()); uint dst_block_index = 0; //float prev_std_dev = -999; for (uint i = 0; i < total_blocks; i++) { uint src_block_index = pIndices[i]; //float std_dev = block_std_dev[src_block_index]; //crnlib_ASSERT(std_dev >= prev_std_dev); //prev_std_dev = std_dev; uint src_block_x = src_block_index % num_blocks_x; uint src_block_y = src_block_index / num_blocks_x; uint dst_block_x = dst_block_index % num_blocks_x; uint dst_block_y = dst_block_index / num_blocks_x; new_img.unclipped_blit(src_block_x * 4, src_block_y * 4, 4, 4, dst_block_x * 4, dst_block_y * 4, img); dst_block_index++; } #if 0 //new_img.swap(img); #else crnlib::vector remaining_blocks(num_blocks_x); console::printf("Arranging %u blocks...", total_blocks); for (uint by = 0; by < num_blocks_y; by++) { console::printf("%u of %u", by, num_blocks_y); remaining_blocks.resize(num_blocks_x); for (uint i = 0; i < num_blocks_x; i++) remaining_blocks[i] = i; color_quad_u8 match_block[16]; utils::zero_object(match_block); for (uint bx = 0; bx < num_blocks_x; bx++) { uint best_index = 0; uint64 best_error = cUINT64_MAX; for (uint i = 0; i < remaining_blocks.size(); i++) { uint src_block_index = remaining_blocks[i]; uint64 error = 0; for (uint y = 0; y < 4; y++) { for (uint x = 0; x < 4; x++) { const color_quad_u8& c = new_img(src_block_index * 4 + x, by * 4 + y); error += color::elucidian_distance(c, match_block[x + y * 4], false); } } if (error < best_error) { best_error = error; best_index = i; } } uint src_block_index = remaining_blocks[best_index]; for (uint y = 0; y < 4; y++) { for (uint x = 0; x < 4; x++) { const color_quad_u8& c = new_img(src_block_index * 4 + x, by * 4 + y); match_block[x + y * 4] = c; img(bx * 4 + x, by * 4 + y) = c; } } remaining_blocks.erase_unordered(best_index); } } #endif } bool corpus_gen::generate(const char* pCmd_line) { static const command_line_params::param_desc param_desc_array[] = { {"corpus_gen", 0, false}, {"in", 1, true}, {"deep", 0, false}, {"blockpercentage", 1, false}, {"width", 1, false}, {"height", 1, false}, {"alpha", 0, false}, }; command_line_params params; if (!params.parse(pCmd_line, CRNLIB_ARRAY_SIZE(param_desc_array), param_desc_array, true)) return false; if (!params.has_key("in")) { console::error("Must specify one or more input files using the /in option!"); return false; } uint num_dst_blocks_x = params.get_value_as_int("width", 0, 4096, 128, 4096); num_dst_blocks_x = (num_dst_blocks_x + 3) / 4; uint num_dst_blocks_y = params.get_value_as_int("height", 0, 4096, 128, 4096); num_dst_blocks_y = (num_dst_blocks_y + 3) / 4; const uint total_dst_blocks = num_dst_blocks_x * num_dst_blocks_y; image_u8 dst_img(num_dst_blocks_x * 4, num_dst_blocks_y * 4); uint next_dst_block = 0; uint total_dst_images = 0; random rm; block_hash_map block_hash; block_hash.reserve(total_dst_blocks); uint total_images_loaded = 0; uint total_blocks_written = 0; command_line_params::param_map_const_iterator it = params.begin(); for (; it != params.end(); ++it) { if (it->first != "in") continue; if (it->second.m_values.empty()) { console::error("Must follow /in parameter with a filename!\n"); return false; } for (uint in_value_index = 0; in_value_index < it->second.m_values.size(); in_value_index++) { const dynamic_string& filespec = it->second.m_values[in_value_index]; find_files file_finder; if (!file_finder.find(filespec.get_ptr(), find_files::cFlagAllowFiles | (params.has_key("deep") ? find_files::cFlagRecursive : 0))) { console::warning("Failed finding files: %s", filespec.get_ptr()); continue; } if (file_finder.get_files().empty()) { console::warning("No files found: %s", filespec.get_ptr()); return false; } const find_files::file_desc_vec& files = file_finder.get_files(); for (uint file_index = 0; file_index < files.size(); file_index++) { const find_files::file_desc& file_desc = files[file_index]; console::printf("Loading image: %s", file_desc.m_fullname.get_ptr()); image_u8 img; if (!image_utils::read_from_file(img, file_desc.m_fullname.get_ptr(), 0)) { console::warning("Failed loading image file: %s", file_desc.m_fullname.get_ptr()); continue; } if (!params.has_key("alpha")) { for (uint y = 0; y < img.get_height(); y++) for (uint x = 0; x < img.get_width(); x++) img(x, y).a = 255; } total_images_loaded++; uint width = img.get_width(); uint height = img.get_height(); uint num_blocks_x = (width + 3) / 4; uint num_blocks_y = (height + 3) / 4; uint total_blocks = num_blocks_x * num_blocks_y; float percentage = params.get_value_as_float("blockpercentage", 0, .1f, .001f, 1.0f); uint total_rand_blocks = math::maximum(1U, (uint)(total_blocks * percentage)); crnlib::vector remaining_blocks(total_blocks); for (uint i = 0; i < total_blocks; i++) remaining_blocks[i] = i; uint num_blocks_remaining = total_rand_blocks; while (num_blocks_remaining) { if (remaining_blocks.empty()) break; uint rand_block_index = rm.irand(0, remaining_blocks.size()); uint block_index = remaining_blocks[rand_block_index]; remaining_blocks.erase_unordered(rand_block_index); uint block_y = block_index / num_blocks_x; uint block_x = block_index % num_blocks_x; block b; for (uint y = 0; y < 4; y++) { for (uint x = 0; x < 4; x++) { b.m_c[x + y * 4] = img.get_clamped(block_x * 4 + x, block_y * 4 + y); } } if (!block_hash.insert(b).second) continue; if (block_hash.size() == total_dst_blocks) { block_hash.clear(); block_hash.reserve(total_dst_blocks); } uint dst_block_x = next_dst_block % num_dst_blocks_x; uint dst_block_y = next_dst_block / num_dst_blocks_x; for (uint y = 0; y < 4; y++) { for (uint x = 0; x < 4; x++) { dst_img(dst_block_x * 4 + x, dst_block_y * 4 + y) = b.m_c[x + y * 4]; } } next_dst_block++; if (total_dst_blocks == next_dst_block) { sort_blocks(dst_img); dynamic_string dst_filename(cVarArg, "test_%u.tga", total_dst_images); console::printf("Writing image: %s", dst_filename.get_ptr()); image_utils::write_to_file(dst_filename.get_ptr(), dst_img, 0); dst_img.set_all(color_quad_u8::make_black()); next_dst_block = 0; total_dst_images++; } total_blocks_written++; num_blocks_remaining--; } } // file_index } // in_value_index } if (next_dst_block) { sort_blocks(dst_img); dynamic_string dst_filename(cVarArg, "test_%u.tga", total_dst_images); console::printf("Writing image: %s", dst_filename.get_ptr()); image_utils::write_to_file(dst_filename.get_ptr(), dst_img, 0); next_dst_block = 0; total_dst_images++; } console::printf("Found %u input images, %u output images, %u total blocks", total_images_loaded, total_dst_images, total_blocks_written); return true; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crunch/corpus_gen.h000066400000000000000000000005421503722002600216170ustar00rootroot00000000000000// File: corpus_gen.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_command_line_params.h" #include "crn_image.h" namespace crnlib { class corpus_gen { public: corpus_gen(); bool generate(const char* pCmd_line); private: void sort_blocks(image_u8& img); }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crunch/corpus_test.cpp000066400000000000000000000325511503722002600223650ustar00rootroot00000000000000// File: corpus_test.cpp #include "crn_core.h" #include "corpus_test.h" #include "crn_find_files.h" #include "crn_console.h" #include "crn_image_utils.h" #include "crn_hash.h" #include "crn_hash_map.h" #include "crn_radix_sort.h" #include "crn_mipmapped_texture.h" namespace crnlib { corpus_tester::corpus_tester() { m_bad_block_img.resize(256, 256); m_next_bad_block_index = 0; m_total_bad_block_files = 0; } void corpus_tester::print_comparative_metric_stats(const command_line_params& cmd_line_params, const crnlib::vector& stats1, const crnlib::vector& stats2, uint num_blocks_x, uint /* num_blocks_y */) { crnlib::vector better_blocks; crnlib::vector equal_blocks; crnlib::vector worse_blocks; crnlib::vector delta_psnr; for (uint i = 0; i < stats1.size(); i++) { //uint bx = i % num_blocks_x; //uint by = i / num_blocks_x; const image_utils::error_metrics& em1 = stats1[i]; const image_utils::error_metrics& em2 = stats2[i]; if (em1.mPeakSNR < em2.mPeakSNR) { worse_blocks.push_back(i); delta_psnr.push_back((float)(em2.mPeakSNR - em1.mPeakSNR)); } else if (fabs(em1.mPeakSNR - em2.mPeakSNR) < .001f) equal_blocks.push_back(i); else better_blocks.push_back(i); } console::printf("Num worse blocks: %u, %3.3f%%", worse_blocks.size(), worse_blocks.size() * 100.0f / stats1.size()); console::printf("Num equal blocks: %u, %3.3f%%", equal_blocks.size(), equal_blocks.size() * 100.0f / stats1.size()); console::printf("Num better blocks: %u, %3.3f%%", better_blocks.size(), better_blocks.size() * 100.0f / stats1.size()); console::printf("Num equal+better blocks: %u, %3.3f%%", equal_blocks.size() + better_blocks.size(), (equal_blocks.size() + better_blocks.size()) * 100.0f / stats1.size()); if (!cmd_line_params.has_key("nobadblocks")) { crnlib::vector indices[2]; indices[0].resize(worse_blocks.size()); indices[1].resize(worse_blocks.size()); uint* pSorted_indices = NULL; if (worse_blocks.size()) { pSorted_indices = indirect_radix_sort(worse_blocks.size(), &indices[0][0], &indices[1][0], &delta_psnr[0], 0, sizeof(float), true); console::printf("List of worse blocks sorted by delta PSNR:"); for (uint i = 0; i < worse_blocks.size(); i++) { uint block_index = worse_blocks[pSorted_indices[i]]; uint bx = block_index % num_blocks_x; uint by = block_index / num_blocks_x; console::printf("%u. [%u,%u] %3.3f %3.3f %3.3f", i, bx, by, stats1[block_index].mPeakSNR, stats2[block_index].mPeakSNR, stats2[block_index].mPeakSNR - stats1[block_index].mPeakSNR); } } } } void corpus_tester::print_metric_stats(const crnlib::vector& stats, uint num_blocks_x, uint /* num_blocks_y */) { image_utils::error_metrics best_metrics; image_utils::error_metrics worst_metrics; worst_metrics.mPeakSNR = 1e+6f; vec2I best_loc; vec2I worst_loc; utils::zero_object(best_loc); utils::zero_object(worst_loc); double psnr_total = 0.0f; double psnr2_total = 0.0f; uint num_non_inf = 0; uint num_inf = 0; for (uint i = 0; i < stats.size(); i++) { uint bx = i % num_blocks_x; uint by = i / num_blocks_x; const image_utils::error_metrics& em = stats[i]; if ((em.mPeakSNR < 200.0f) && (em > best_metrics)) { best_metrics = em; best_loc.set(bx, by); } if (em < worst_metrics) { worst_metrics = em; worst_loc.set(bx, by); } if (em.mPeakSNR < 200.0f) { psnr_total += em.mPeakSNR; psnr2_total += em.mPeakSNR * em.mPeakSNR; num_non_inf++; } else { num_inf++; } } console::printf("Number of infinite PSNR blocks: %u", num_inf); console::printf("Number of non-infinite PSNR blocks: %u", num_non_inf); if (num_non_inf) { psnr_total /= num_non_inf; psnr2_total /= num_non_inf; double psnr_std_dev = sqrt(psnr2_total - psnr_total * psnr_total); console::printf("Average Non-Inf PSNR: %3.3f, Std dev: %3.3f", psnr_total, psnr_std_dev); console::printf("Worst PSNR: %3.3f, Block Location: %i,%i", worst_metrics.mPeakSNR, worst_loc[0], worst_loc[1]); console::printf("Best Non-Inf PSNR: %3.3f, Block Location: %i,%i", best_metrics.mPeakSNR, best_loc[0], best_loc[1]); } } void corpus_tester::flush_bad_blocks() { if (!m_next_bad_block_index) return; dynamic_string filename(cVarArg, "badblocks_%u.tga", m_total_bad_block_files); console::printf("Writing bad block image: %s", filename.get_ptr()); image_utils::write_to_file(filename.get_ptr(), m_bad_block_img, image_utils::cWriteFlagIgnoreAlpha); m_bad_block_img.set_all(color_quad_u8::make_black()); m_total_bad_block_files++; m_next_bad_block_index = 0; } void corpus_tester::add_bad_block(image_u8& block) { uint num_blocks_x = m_bad_block_img.get_block_width(4); uint num_blocks_y = m_bad_block_img.get_block_height(4); uint total_blocks = num_blocks_x * num_blocks_y; m_bad_block_img.blit((m_next_bad_block_index % num_blocks_x) * 4, (m_next_bad_block_index / num_blocks_x) * 4, block); m_next_bad_block_index++; if (m_next_bad_block_index == total_blocks) flush_bad_blocks(); } static bool progress_callback(uint percentage_complete, void* /* pUser_data_ptr */) { static int s_prev_percentage_complete = -1; if (s_prev_percentage_complete != static_cast(percentage_complete)) { console::progress("%u%%", percentage_complete); s_prev_percentage_complete = percentage_complete; } return true; } bool corpus_tester::test(const char* pCmd_line) { console::printf("Command line:\n\"%s\"", pCmd_line); static const command_line_params::param_desc param_desc_array[] = { {"corpus_test", 0, false}, {"in", 1, true}, {"deep", 0, false}, {"alpha", 0, false}, {"nomips", 0, false}, {"perceptual", 0, false}, {"endpointcaching", 0, false}, {"multithreaded", 0, false}, {"writehybrid", 0, false}, {"nobadblocks", 0, false}, }; command_line_params cmd_line_params; if (!cmd_line_params.parse(pCmd_line, CRNLIB_ARRAY_SIZE(param_desc_array), param_desc_array, true)) return false; double total_time1 = 0, total_time2 = 0; command_line_params::param_map_const_iterator it = cmd_line_params.begin(); for (; it != cmd_line_params.end(); ++it) { if (it->first != "in") continue; if (it->second.m_values.empty()) { console::error("Must follow /in parameter with a filename!\n"); return false; } for (uint in_value_index = 0; in_value_index < it->second.m_values.size(); in_value_index++) { const dynamic_string& filespec = it->second.m_values[in_value_index]; find_files file_finder; if (!file_finder.find(filespec.get_ptr(), find_files::cFlagAllowFiles | (cmd_line_params.has_key("deep") ? find_files::cFlagRecursive : 0))) { console::warning("Failed finding files: %s", filespec.get_ptr()); continue; } if (file_finder.get_files().empty()) { console::warning("No files found: %s", filespec.get_ptr()); return false; } const find_files::file_desc_vec& files = file_finder.get_files(); image_u8 o(4, 4), a(4, 4), b(4, 4); uint first_channel = 0; uint num_channels = 3; bool perceptual = cmd_line_params.get_value_as_bool("perceptual", false); if (perceptual) { first_channel = 0; num_channels = 0; } console::printf("Perceptual mode: %u", perceptual); for (uint file_index = 0; file_index < files.size(); file_index++) { const find_files::file_desc& file_desc = files[file_index]; console::printf("-------- Loading image: %s", file_desc.m_fullname.get_ptr()); image_u8 img; if (!image_utils::read_from_file(img, file_desc.m_fullname.get_ptr(), 0)) { console::warning("Failed loading image file: %s", file_desc.m_fullname.get_ptr()); continue; } if ((!cmd_line_params.has_key("alpha")) && img.is_component_valid(3)) { for (uint y = 0; y < img.get_height(); y++) for (uint x = 0; x < img.get_width(); x++) img(x, y).a = 255; img.set_component_valid(3, false); } mipmapped_texture orig_tex; orig_tex.assign(crnlib_new(img)); if (!cmd_line_params.has_key("nomips")) { mipmapped_texture::generate_mipmap_params genmip_params; genmip_params.m_srgb = true; console::printf("Generating mipmaps"); if (!orig_tex.generate_mipmaps(genmip_params, false)) { console::error("Mipmap generation failed!"); return false; } } console::printf("Compress 1"); mipmapped_texture tex1(orig_tex); dxt_image::pack_params convert_params; convert_params.m_endpoint_caching = cmd_line_params.get_value_as_bool("endpointcaching", 0, false); convert_params.m_compressor = cCRNDXTCompressorCRN; convert_params.m_quality = cCRNDXTQualityNormal; convert_params.m_perceptual = perceptual; convert_params.m_num_helper_threads = cmd_line_params.get_value_as_bool("multithreaded", 0, true) ? (g_number_of_processors - 1) : 0; convert_params.m_pProgress_callback = progress_callback; timer t; t.start(); if (!tex1.convert(PIXEL_FMT_ETC1, false, convert_params)) { console::error("Texture conversion failed!"); return false; } double time1 = t.get_elapsed_secs(); total_time1 += time1; console::printf("Elapsed time: %3.3f", time1); console::printf("Compress 2"); mipmapped_texture tex2(orig_tex); convert_params.m_endpoint_caching = false; convert_params.m_compressor = cCRNDXTCompressorCRN; convert_params.m_quality = cCRNDXTQualitySuperFast; t.start(); if (!tex2.convert(PIXEL_FMT_ETC1, false, convert_params)) { console::error("Texture conversion failed!"); return false; } double time2 = t.get_elapsed_secs(); total_time2 += time2; console::printf("Elapsed time: %3.3f", time2); image_u8 hybrid_img(img.get_width(), img.get_height()); for (uint l = 0; l < orig_tex.get_num_levels(); l++) { image_u8 orig_img, img1, img2; image_u8* pOrig = orig_tex.get_level(0, l)->get_unpacked_image(orig_img, cUnpackFlagUncook | cUnpackFlagUnflip); image_u8* pImg1 = tex1.get_level(0, l)->get_unpacked_image(img1, cUnpackFlagUncook | cUnpackFlagUnflip); image_u8* pImg2 = tex2.get_level(0, l)->get_unpacked_image(img2, cUnpackFlagUncook | cUnpackFlagUnflip); const uint num_blocks_x = pOrig->get_block_width(4); const uint num_blocks_y = pOrig->get_block_height(4); crnlib::vector metrics[2]; for (uint by = 0; by < num_blocks_y; by++) { for (uint bx = 0; bx < num_blocks_x; bx++) { pOrig->extract_block(o.get_ptr(), bx * 4, by * 4, 4, 4); pImg1->extract_block(a.get_ptr(), bx * 4, by * 4, 4, 4); pImg2->extract_block(b.get_ptr(), bx * 4, by * 4, 4, 4); image_utils::error_metrics em1; em1.compute(o, a, first_channel, num_channels); image_utils::error_metrics em2; em2.compute(o, b, first_channel, num_channels); metrics[0].push_back(em1); metrics[1].push_back(em2); if (em1.mPeakSNR < em2.mPeakSNR) { add_bad_block(o); hybrid_img.blit(bx * 4, by * 4, b); } else { hybrid_img.blit(bx * 4, by * 4, a); } } } if (cmd_line_params.has_key("writehybrid")) image_utils::write_to_file("hybrid.tga", hybrid_img, image_utils::cWriteFlagIgnoreAlpha); console::printf("---- Mip level: %u, Total blocks: %ux%u, %u", l, num_blocks_x, num_blocks_y, num_blocks_x * num_blocks_y); console::printf("Compressor 1:"); print_metric_stats(metrics[0], num_blocks_x, num_blocks_y); console::printf("Compressor 2:"); print_metric_stats(metrics[1], num_blocks_x, num_blocks_y); console::printf("Compressor 1 vs. 2:"); print_comparative_metric_stats(cmd_line_params, metrics[0], metrics[1], num_blocks_x, num_blocks_y); image_utils::error_metrics em; em.compute(*pOrig, *pImg1, 0, perceptual ? 0 : 3); em.print("Compressor 1: "); em.compute(*pOrig, *pImg2, 0, perceptual ? 0 : 3); em.print("Compressor 2: "); em.compute(*pOrig, hybrid_img, 0, perceptual ? 0 : 3); em.print("Best of Both: "); } } } // file_index } flush_bad_blocks(); console::printf("Total times: %4.3f vs. %4.3f", total_time1, total_time2); return true; } } // namespace crnlib DaemonEngine-crunch-ef4d32f/crunch/corpus_test.h000066400000000000000000000015151503722002600220260ustar00rootroot00000000000000// File: corpus_test.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_command_line_params.h" #include "crn_image_utils.h" namespace crnlib { class corpus_tester { public: corpus_tester(); bool test(const char* pCmd_line); private: void print_comparative_metric_stats(const command_line_params& params, const crnlib::vector& stats1, const crnlib::vector& stats2, uint num_blocks_x, uint num_blocks_y); void print_metric_stats(const crnlib::vector& stats, uint num_blocks_x, uint num_blocks_y); image_u8 m_bad_block_img; uint m_next_bad_block_index; uint m_total_bad_block_files; void flush_bad_blocks(); void add_bad_block(image_u8& block); }; } // namespace crnlib DaemonEngine-crunch-ef4d32f/crunch/crunch.cpp000066400000000000000000001446301503722002600212770ustar00rootroot00000000000000// File: crunch.cpp - Command line tool for DDS/CRN texture compression/decompression. // This tool exposes all of crnlib's functionality. It also uses a bunch of internal crlib // classes that aren't directly exposed in the main crnlib.h header. The actual tool is // implemented as a single class "crunch" which in theory is reusable. Most of the heavy // lifting is actually done by functions in the crnlib::texture_conversion namespace, // which are mostly wrappers over the public crnlib.h functions. // See Copyright Notice and license at the end of inc/crnlib.h // // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing #include "crn_core.h" #include "crn_console.h" #include "crn_colorized_console.h" #include "crn_find_files.h" #include "crn_file_utils.h" #include "crn_command_line_params.h" #include "crn_dxt.h" #include "crn_cfile_stream.h" #include "crn_texture_conversion.h" #include "crn_defs.h" #include "corpus_gen.h" #include "corpus_test.h" using namespace crnlib; const int cDefaultCRNQualityLevel = 128; class crunch { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(crunch); cfile_stream m_log_stream; uint32 m_num_processed; uint32 m_num_failed; uint32 m_num_succeeded; uint32 m_num_skipped; public: crunch() : m_num_processed(0), m_num_failed(0), m_num_succeeded(0), m_num_skipped(0) { } ~crunch() { } enum convert_status { cCSFailed, cCSSucceeded, cCSSkipped, cCSBadParam, }; inline uint32 get_num_processed() const { return m_num_processed; } inline uint32 get_num_failed() const { return m_num_failed; } inline uint32 get_num_succeeded() const { return m_num_succeeded; } inline uint32 get_num_skipped() const { return m_num_skipped; } static void print_usage() { // ------------------------------------------------------------------------------- console::message("\nCommand line usage:"); console::printf("crunch [options] -file filename"); console::printf("-file filename - Required input filename, wildcards, multiple -file params OK."); console::printf("-file @list.txt - List of files to convert."); console::printf("Supported source file formats: dds,ktx,crn,tga,bmp,png,jpg/jpeg,psd"); console::printf("Note: Some file format variants are unsupported."); console::printf("See the docs for stb_image.h: https://github.com/nothings/stb"); console::printf("Progressive JPEG files are supported, see: http://code.google.com/p/jpeg-compressor/"); console::printf("\n-h - Print this help."); console::message("\nPath/file related parameters:"); console::printf("-out filename - Output filename"); console::printf("-outdir dir - Output directory"); console::printf("-outsamedir - Write output file to input directory"); console::printf("-deep - Recurse subdirectories, default=false"); console::printf("-nooverwrite - Don't overwrite existing files"); console::printf("-timestamp - Update only changed files"); console::printf("-forcewrite - Overwrite read-only files"); console::printf("-recreate - Recreate directory structure"); console::printf("-fileformat [dds,ktx,crn,tga,bmp,png] - Output file format, default=crn or dds"); console::message("\nModes:"); console::printf("-compare - Compare input and output files (no output files are written)."); console::printf("-info - Only display input file statistics (no output files are written)."); console::message("\nMisc. options:"); console::printf("-helperThreads # - Set number of helper threads, 0-%d, default=(# of CPU's)-1", cCRNMaxHelperThreads); console::printf("-noTitle - Disable title output at run time"); console::printf("-noprogress - Disable progress output"); console::printf("-quiet - Disable all console output"); console::printf("-ignoreerrors - Continue processing files after errors. Note: The default"); console::printf(" behavior is to immediately exit whenever an error occurs."); console::printf("-logfile filename - Append output to log file"); console::printf("-pause - Wait for keypress on error"); console::printf("-window - Crop window before processing"); console::printf("-clamp - Crop image if larger than width/height"); console::printf("-clampscale - Scale image if larger than width/height"); console::printf("-nostats - Disable all output file statistics (faster)"); console::printf("-imagestats - Print various image quality statistics"); console::printf("-mipstats - Print statistics for each mipmap, not just the top mip"); console::printf("-lzmastats - Print size of output file compressed with LZMA codec"); console::printf("-split - Write faces/mip levels to multiple separate output PNG files"); console::printf("-yflip - Always flip texture on Y axis before processing"); console::printf("-unflip - Unflip texture if read from source file as flipped"); console::message("\nImage rescaling (mutually exclusive options)"); console::printf("-rescale - Rescale image to specified resolution"); console::printf("-relscale - Rescale image to specified relative resolution"); console::printf("-rescalemode - Auto-rescale non-power of two images"); console::printf(" nearest - Use nearest power of 2, hi - Use next, lo - Use previous"); console::message("\nDDS/CRN compression quality control:"); console::printf("-quality # (or /q #) - Set Clustered DDS/CRN quality factor [0-255] 255=best"); console::printf(" DDS default quality is best possible."); console::printf(" CRN default quality is %u.", cDefaultCRNQualityLevel); console::printf("-bitrate # - Set the desired output bitrate of DDS or CRN output files."); console::printf(" This option causes crunch to find the quality factor"); console::printf(" closest to the desired bitrate using a binary search."); console::message("\nLow-level CRN specific options:"); console::printf("-c # - Color endpoint palette size, 32-8192, default=3072"); console::printf("-s # - Color selector palette size, 32-8192, default=3072"); console::printf("-ca # - Alpha endpoint palette size, 32-8192, default=3072"); console::printf("-sa # - Alpha selector palette size, 32-8192, default=3072"); // ------------------------------------------------------------------------------- console::message("\nMipmap filtering options:"); console::printf("-mipMode [UseSourceOrGenerate,UseSource,Generate,None]"); console::printf(" Default mipMode is UseSourceOrGenerate"); console::printf(" UseSourceOrGenerate: Use source mipmaps if possible, or create new mipmaps."); console::printf(" UseSource: Always use source mipmaps, if any (never generate new mipmaps)"); console::printf(" Generate: Always generate a new mipmap chain (ignore source mipmaps)"); console::printf(" None: Do not output any mipmaps"); console::printf("-mipFilter [box,tent,lanczos4,mitchell,kaiser], default=kaiser"); console::printf("-gamma # - Mipmap gamma correction value, default=2.2, use 1.0 for linear"); console::printf("-blurriness # - Scale filter kernel, >1=blur, <1=sharpen, .01-8, default=.9"); console::printf("-wrap - Assume texture is tiled when filtering, default=clamping"); console::printf("-renormalize - Renormalize filtered normal map texels, default=disabled"); console::printf("-rtopmip - Renormalize on the top mip-level too, default=disabled"); console::printf("-maxmips # - Limit number of generated texture mipmap levels, 1-16, default=16"); console::printf("-minmipsize # - Smallest allowable mipmap resolution, default=1"); console::message("\nCompression options:"); console::printf("-alphaThreshold # - Set DXT1A alpha threshold, 0-255, default=128"); console::printf(" Note: -alphaThreshold also changes the compressor's behavior to"); console::printf(" prefer DXT1A over DXT5 for images with alpha channels (.DDS only)."); console::printf("-uniformMetrics - Use uniform color metrics, default=use perceptual metrics"); console::printf("-noAdaptiveBlocks - Disable adaptive block sizes (i.e. disable macroblocks)."); console::printf("-noNormalDetection - Disable normal map detection, default=disabled"); #ifdef CRNLIB_SUPPORT_ATI_COMPRESS console::printf("-compressor [CRN,CRNF,RYG,ATI] - Set DXTn compressor, default=CRN"); #else console::printf("-compressor [CRN,CRNF,RYG] - Set DXTn compressor, default=CRN"); #endif console::printf("-dxtQuality [superfast,fast,normal,better,uber] - Endpoint optimizer speed."); console::printf(" Sets endpoint optimizer's max iteration depth. Default=uber."); console::printf("-noendpointcaching - Don't try reusing previous DXT endpoint solutions."); console::printf("-grayscalesampling - Assume shader will convert fetched results to luma (Y)."); console::printf("-forceprimaryencoding - Only use DXT1 color4 and DXT5 alpha8 block encodings."); console::printf("-usetransparentindicesforblack - Try DXT1 transparent indices for dark pixels."); console::message("\nOuptut pixel format options:"); console::printf("-usesourceformat - Use input file's format for output format (when possible)."); console::message("\nAll supported texture formats (Note: .CRN only supports DXTn pixel formats):"); for (uint32 i = 0; i < pixel_format_helpers::get_num_formats(); i++) { pixel_format fmt = pixel_format_helpers::get_pixel_format_by_index(i); console::printf("-%s", pixel_format_helpers::get_pixel_format_string(fmt)); } } bool convert(const char* pCommand_line) { m_num_processed = 0; m_num_failed = 0; m_num_succeeded = 0; m_num_skipped = 0; command_line_params::param_desc std_params[] = { {"h", 0, false}, {"file", 1, true}, {"out", 1, false}, {"outdir", 1, false}, {"outsamedir", 0, false}, {"deep", 0, false}, {"fileformat", 1, false}, {"helperThreads", 1, false}, {"noTitle", 0, false}, {"noprogress", 0, false}, {"quiet", 0, false}, {"ignoreerrors", 0, false}, {"logfile", 1, false}, {"q", 1, false}, {"quality", 1, false}, {"c", 1, false}, {"s", 1, false}, {"ca", 1, false}, {"sa", 1, false}, {"mipMode", 1, false}, {"mipFilter", 1, false}, {"gamma", 1, false}, {"blurriness", 1, false}, {"wrap", 0, false}, {"renormalize", 0, false}, {"rtopmip", 0, false }, {"paramdebug", 0, false}, {"debug", 0, false}, {"quick", 0, false}, {"imagestats", 0, false}, {"nostats", 0, false}, {"mipstats", 0, false}, {"alphaThreshold", 1, false}, {"uniformMetrics", 0, false}, {"noAdaptiveBlocks", 0, false}, {"noNormalDetection", 0, false}, {"compressor", 1, false}, {"dxtQuality", 1, false}, {"noendpointcaching", 0, false}, {"grayscalesampling", 0, false}, {"converttoluma", 0, false}, {"setalphatoluma", 0, false}, {"pause", 0, false}, {"timestamp", 0, false}, {"nooverwrite", 0, false}, {"forcewrite", 0, false}, {"recreate", 0, false}, {"compare", 0, false}, {"info", 0, false}, {"forceprimaryencoding", 0, false}, {"usetransparentindicesforblack", 0, false}, {"usesourceformat", 0, false}, {"rescalemode", 1, false}, {"rescale", 2, false}, {"relrescale", 2, false}, {"clamp", 2, false}, {"clampScale", 2, false}, {"window", 4, false}, {"maxmips", 1, false}, {"minmipsize", 1, false}, {"bitrate", 1, false}, {"lzmastats", 0, false}, {"split", 0, false}, {"csvfile", 1, false}, {"yflip", 0, false}, {"unflip", 0, false}, }; crnlib::vector params; params.append(std_params, sizeof(std_params) / sizeof(std_params[0])); for (uint32 i = 0; i < pixel_format_helpers::get_num_formats(); i++) { pixel_format fmt = pixel_format_helpers::get_pixel_format_by_index(i); command_line_params::param_desc desc; desc.m_pName = pixel_format_helpers::get_pixel_format_string(fmt); desc.m_num_values = 0; desc.m_support_listing_file = false; params.push_back(desc); } if (!m_params.parse(pCommand_line, params.size(), params.get_ptr(), true)) { return false; } if (!m_params.get_num_params()) { console::error("No command line parameters specified!"); print_usage(); return false; } #if 0 if (m_params.get_count("")) { console::error("Unrecognized command line parameter: \"%s\"", m_params.get_value_as_string_or_empty("", 0).get_ptr()); return false; } #endif if (m_params.get_value_as_bool("h")) { print_usage(); return true; } if (m_params.get_value_as_bool("debug")) { console::debug("Command line parameters:"); for (command_line_params::param_map_const_iterator it = m_params.begin(); it != m_params.end(); ++it) { console::disable_crlf(); console::debug("Key:\"%s\" Values (%u): ", it->first.get_ptr(), it->second.m_values.size()); for (uint32 i = 0; i < it->second.m_values.size(); i++) console::debug("\"%s\" ", it->second.m_values[i].get_ptr()); console::debug("\n"); console::enable_crlf(); } } dynamic_string log_filename; if (m_params.get_value_as_string("logfile", 0, log_filename)) { if (!m_log_stream.open(log_filename.get_ptr(), cDataStreamWritable | cDataStreamSeekable, true)) { console::error("Unable to open log file: \"%s\"", log_filename.get_ptr()); return false; } console::printf("Appending output to log file \"%s\"", log_filename.get_ptr()); console::set_log_stream(&m_log_stream); } bool status = convert(); if (m_log_stream.is_opened()) { console::set_log_stream(NULL); m_log_stream.close(); } return status; } private: command_line_params m_params; bool convert() { find_files::file_desc_vec files; uint32 total_input_specs = 0; for (uint32 phase = 0; phase < 2; phase++) { command_line_params::param_map_const_iterator begin, end; m_params.find(phase ? "" : "file", begin, end); for (command_line_params::param_map_const_iterator it = begin; it != end; ++it) { total_input_specs++; const dynamic_string_array& strings = it->second.m_values; for (uint32 i = 0; i < strings.size(); i++) { if (!process_input_spec(files, strings[i])) { if (!m_params.get_value_as_bool("ignoreerrors")) return false; } } } } if (!total_input_specs) { console::error("No input files specified!"); return false; } if (files.empty()) { console::error("No files found to process!"); return false; } std::sort(files.begin(), files.end()); files.resize((uint32)(std::unique(files.begin(), files.end()) - files.begin())); timer tm; tm.start(); if (!process_files(files)) { if (!m_params.get_value_as_bool("ignoreerrors")) return false; } double total_time = tm.get_elapsed_secs(); console::printf("Total time: %3.3fs", total_time); console::printf( ((m_num_skipped) || (m_num_failed)) ? cWarningConsoleMessage : cInfoConsoleMessage, "%u total file(s) successfully processed, %u file(s) skipped, %u file(s) failed.", m_num_succeeded, m_num_skipped, m_num_failed); return true; } bool process_input_spec(find_files::file_desc_vec& files, const dynamic_string& input_spec) { dynamic_string find_name(input_spec); if ((find_name.get_len()) && (file_utils::does_dir_exist(find_name.get_ptr()))) { file_utils::combine_path(find_name, find_name.get_ptr(), "*"); } if ((find_name.is_empty()) || (!file_utils::full_path(find_name))) { console::error("Invalid input filename: %s", find_name.get_ptr()); return false; } const bool deep_flag = m_params.get_value_as_bool("deep"); dynamic_string find_drive, find_path, find_fname, find_ext; file_utils::split_path(find_name.get_ptr(), &find_drive, &find_path, &find_fname, &find_ext); dynamic_string find_pathname; file_utils::combine_path(find_pathname, find_drive.get_ptr(), find_path.get_ptr()); dynamic_string find_filename; find_filename = find_fname + find_ext; find_files file_finder; bool success = file_finder.find(find_pathname.get_ptr(), find_filename.get_ptr(), find_files::cFlagAllowFiles | (deep_flag ? find_files::cFlagRecursive : 0)); if (!success) { console::error("Failed finding files: %s", find_name.get_ptr()); return false; } if (file_finder.get_files().empty()) { console::warning("No files found: %s", find_name.get_ptr()); return true; } files.append(file_finder.get_files()); return true; } bool read_only_file_check(const char* pDst_filename) { if (!file_utils::is_read_only(pDst_filename)) return true; if (m_params.get_value_as_bool("forcewrite")) { if (file_utils::disable_read_only(pDst_filename)) { console::warning("Setting read-only file \"%s\" to writable", pDst_filename); return true; } else { console::error("Failed setting read-only file \"%s\" to writable!", pDst_filename); return false; } } console::error("Output file \"%s\" is read-only!", pDst_filename); return false; } bool process_files(find_files::file_desc_vec& files) { const bool compare_mode = m_params.get_value_as_bool("compare"); const bool info_mode = m_params.get_value_as_bool("info"); for (uint32 file_index = 0; file_index < files.size(); file_index++) { const find_files::file_desc& file_desc = files[file_index]; const dynamic_string& in_filename = file_desc.m_fullname; dynamic_string in_drive, in_path, in_fname, in_ext; file_utils::split_path(in_filename.get_ptr(), &in_drive, &in_path, &in_fname, &in_ext); texture_file_types::format out_file_type = texture_file_types::cFormatCRN; dynamic_string fmt; if (m_params.get_value_as_string("fileformat", 0, fmt)) { if (fmt == "tga") out_file_type = texture_file_types::cFormatTGA; else if (fmt == "bmp") out_file_type = texture_file_types::cFormatBMP; else if (fmt == "dds") out_file_type = texture_file_types::cFormatDDS; else if (fmt == "ktx") out_file_type = texture_file_types::cFormatKTX; else if (fmt == "crn") out_file_type = texture_file_types::cFormatCRN; else if (fmt == "png") out_file_type = texture_file_types::cFormatPNG; else { console::error("Unsupported output file type: %s", fmt.get_ptr()); return false; } } // No explicit output format has been specified - try to determine something doable. if (!m_params.has_key("fileformat")) { if (m_params.has_key("split")) { out_file_type = texture_file_types::cFormatPNG; } else { texture_file_types::format input_file_type = texture_file_types::determine_file_format(in_filename.get_ptr()); if (input_file_type == texture_file_types::cFormatCRN) { out_file_type = texture_file_types::cFormatDDS; cfile_stream in_stream; crnd::crn_header in_header; if (in_stream.open(in_filename.get_ptr()) && in_stream.read(&in_header, sizeof(in_header)) == sizeof(in_header) && (in_header.m_format == cCRNFmtETC1 || in_header.m_format == cCRNFmtETC2 || in_header.m_format == cCRNFmtETC2A || in_header.m_format == cCRNFmtETC1S || in_header.m_format == cCRNFmtETC2AS)) out_file_type = texture_file_types::cFormatKTX; } else if (input_file_type == texture_file_types::cFormatKTX) { // Default to converting KTX files to PNG out_file_type = texture_file_types::cFormatPNG; } } } dynamic_string out_filename; if (m_params.get_value_as_bool("outsamedir")) out_filename.format("%s%s%s.%s", in_drive.get_ptr(), in_path.get_ptr(), in_fname.get_ptr(), texture_file_types::get_extension(out_file_type)); else if (m_params.has_key("out")) { out_filename = m_params.get_value_as_string_or_empty("out"); if (files.size() > 1) { dynamic_string out_drive, out_dir, out_name, out_ext; file_utils::split_path(out_filename.get_ptr(), &out_drive, &out_dir, &out_name, &out_ext); out_name.format("%s_%u", out_name.get_ptr(), file_index); out_filename.format("%s%s%s%s", out_drive.get_ptr(), out_dir.get_ptr(), out_name.get_ptr(), out_ext.get_ptr()); } if (!m_params.has_key("fileformat")) out_file_type = texture_file_types::determine_file_format(out_filename.get_ptr()); } else { dynamic_string out_dir(m_params.get_value_as_string_or_empty("outdir")); if (m_params.get_value_as_bool("recreate") && file_desc.m_rel.get_len()) { file_utils::combine_path(out_dir, out_dir.get_ptr(), file_desc.m_rel.get_ptr()); } if (out_dir.get_len()) { if (file_utils::is_path_separator(out_dir.back())) out_filename.format("%s%s.%s", out_dir.get_ptr(), in_fname.get_ptr(), texture_file_types::get_extension(out_file_type)); else out_filename.format("%s\\%s.%s", out_dir.get_ptr(), in_fname.get_ptr(), texture_file_types::get_extension(out_file_type)); } else { out_filename.format("%s.%s", in_fname.get_ptr(), texture_file_types::get_extension(out_file_type)); } if (m_params.get_value_as_bool("recreate")) { if (file_utils::full_path(out_filename)) { if ((!compare_mode) && (!info_mode)) { dynamic_string out_drive, out_path; file_utils::split_path(out_filename.get_ptr(), &out_drive, &out_path, NULL, NULL); out_drive += out_path; file_utils::create_path(out_drive.get_ptr()); } } } } if ((!compare_mode) && (!info_mode)) { if (file_utils::does_file_exist(out_filename.get_ptr())) { if (m_params.get_value_as_bool("nooverwrite")) { console::warning("Skipping already existing file: %s\n", out_filename.get_ptr()); m_num_skipped++; continue; } if (m_params.get_value_as_bool("timestamp")) { if (file_utils::is_older_than(in_filename.get_ptr(), out_filename.get_ptr())) { console::warning("Skipping up to date file: %s\n", out_filename.get_ptr()); m_num_skipped++; continue; } } } } convert_status status = cCSFailed; if (info_mode) status = display_file_info(file_index, files.size(), in_filename.get_ptr()); else if (compare_mode) status = compare_file(file_index, files.size(), in_filename.get_ptr(), out_filename.get_ptr(), out_file_type); else if (read_only_file_check(out_filename.get_ptr())) status = convert_file(file_index, files.size(), in_filename.get_ptr(), out_filename.get_ptr(), out_file_type); m_num_processed++; switch (status) { case cCSSucceeded: { console::info(""); m_num_succeeded++; break; } case cCSSkipped: { console::info("Skipping file.\n"); m_num_skipped++; break; } case cCSBadParam: { return false; } default: { if (!m_params.get_value_as_bool("ignoreerrors")) return false; console::info(""); m_num_failed++; break; } } } return true; } void print_texture_info(const char* pTex_desc, texture_conversion::convert_params& params, mipmapped_texture& tex) { console::info("%s: %ux%u, Levels: %u, Faces: %u, Format: %s", pTex_desc, tex.get_width(), tex.get_height(), tex.get_num_levels(), tex.get_num_faces(), pixel_format_helpers::get_pixel_format_string(tex.get_format())); console::disable_crlf(); console::info("Apparent type: %s, ", get_texture_type_desc(params.m_texture_type)); console::info("Flags: "); if (tex.get_comp_flags() & pixel_format_helpers::cCompFlagRValid) console::info("R "); if (tex.get_comp_flags() & pixel_format_helpers::cCompFlagGValid) console::info("G "); if (tex.get_comp_flags() & pixel_format_helpers::cCompFlagBValid) console::info("B "); if (tex.get_comp_flags() & pixel_format_helpers::cCompFlagAValid) console::info("A "); if (tex.get_comp_flags() & pixel_format_helpers::cCompFlagGrayscale) console::info("Grayscale "); if (tex.get_comp_flags() & pixel_format_helpers::cCompFlagNormalMap) console::info("NormalMap "); if (tex.get_comp_flags() & pixel_format_helpers::cCompFlagLumaChroma) console::info("LumaChroma "); if (tex.is_flipped()) console::info("Flipped "); else console::info("Non-Flipped "); console::info("\n"); console::enable_crlf(); } static bool progress_callback_func(uint32 percentage_complete, void* /* pUser_data_ptr */) { console::disable_crlf(); char buf[8]; for (uint32 i = 0; i < 7; i++) buf[i] = 8; buf[7] = '\0'; for (uint32 i = 0; i < 130 / 8; i++) console::progress(buf); console::progress("Processing: %u%%", percentage_complete); for (uint32 i = 0; i < 7; i++) buf[i] = ' '; console::progress(buf); console::progress(buf); for (uint32 i = 0; i < 7; i++) buf[i] = 8; console::progress(buf); console::progress(buf); console::enable_crlf(); return true; } bool parse_mipmap_params(crn_mipmap_params& mip_params) { dynamic_string val; if (m_params.get_value_as_string("mipMode", 0, val)) { uint32 i; for (i = 0; i < cCRNMipModeTotal; i++) { if (val == crn_get_mip_mode_name(static_cast(i))) { mip_params.m_mode = static_cast(i); break; } } if (i == cCRNMipModeTotal) { console::error("Invalid MipMode: \"%s\"", val.get_ptr()); return false; } } if (m_params.get_value_as_string("mipFilter", 0, val)) { uint32 i; for (i = 0; i < cCRNMipFilterTotal; i++) { if (val == dynamic_string(crn_get_mip_filter_name(static_cast(i)))) { mip_params.m_filter = static_cast(i); break; } } if (i == cCRNMipFilterTotal) { console::error("Invalid MipFilter: \"%s\"", val.get_ptr()); return false; } if (i == cCRNMipFilterBox) mip_params.m_blurriness = 1.0f; } mip_params.m_gamma = m_params.get_value_as_float("gamma", 0, mip_params.m_gamma, .1f, 8.0f); mip_params.m_gamma_filtering = (mip_params.m_gamma != 1.0f); mip_params.m_blurriness = m_params.get_value_as_float("blurriness", 0, mip_params.m_blurriness, .01f, 8.0f); mip_params.m_renormalize = m_params.get_value_as_bool("renormalize", 0, mip_params.m_renormalize != 0); mip_params.m_rtopmip = m_params.get_value_as_bool("rtopmip", 0, mip_params.m_rtopmip != 0); mip_params.m_tiled = m_params.get_value_as_bool("wrap"); mip_params.m_max_levels = m_params.get_value_as_int("maxmips", 0, cCRNMaxLevels, 1, cCRNMaxLevels); mip_params.m_min_mip_size = m_params.get_value_as_int("minmipsize", 0, 1, 1, cCRNMaxLevelResolution); return true; } bool parse_scale_params(crn_mipmap_params& mipmap_params) { if (m_params.has_key("rescale")) { int w = m_params.get_value_as_int("rescale", 0, -1, 1, cCRNMaxLevelResolution, 0); int h = m_params.get_value_as_int("rescale", 0, -1, 1, cCRNMaxLevelResolution, 1); mipmap_params.m_scale_mode = cCRNSMAbsolute; mipmap_params.m_scale_x = (float)w; mipmap_params.m_scale_y = (float)h; } else if (m_params.has_key("relrescale")) { float w = m_params.get_value_as_float("relrescale", 0, 1, 1, 256, 0); float h = m_params.get_value_as_float("relrescale", 0, 1, 1, 256, 1); mipmap_params.m_scale_mode = cCRNSMRelative; mipmap_params.m_scale_x = w; mipmap_params.m_scale_y = h; } else if (m_params.has_key("rescalemode")) { // nearest | hi | lo dynamic_string mode_str(m_params.get_value_as_string_or_empty("rescalemode")); if (mode_str == "nearest") mipmap_params.m_scale_mode = cCRNSMNearestPow2; else if (mode_str == "hi") mipmap_params.m_scale_mode = cCRNSMNextPow2; else if (mode_str == "lo") mipmap_params.m_scale_mode = cCRNSMLowerPow2; else { console::error("Invalid rescale mode: \"%s\"", mode_str.get_ptr()); return false; } } if (m_params.has_key("clamp")) { uint32 w = m_params.get_value_as_int("clamp", 0, 1, 1, cCRNMaxLevelResolution, 0); uint32 h = m_params.get_value_as_int("clamp", 0, 1, 1, cCRNMaxLevelResolution, 1); mipmap_params.m_clamp_scale = false; mipmap_params.m_clamp_width = w; mipmap_params.m_clamp_height = h; } else if (m_params.has_key("clampScale")) { uint32 w = m_params.get_value_as_int("clampscale", 0, 1, 1, cCRNMaxLevelResolution, 0); uint32 h = m_params.get_value_as_int("clampscale", 0, 1, 1, cCRNMaxLevelResolution, 1); mipmap_params.m_clamp_scale = true; mipmap_params.m_clamp_width = w; mipmap_params.m_clamp_height = h; } if (m_params.has_key("window")) { uint32 xl = m_params.get_value_as_int("window", 0, 0, 0, cCRNMaxLevelResolution, 0); uint32 yl = m_params.get_value_as_int("window", 0, 0, 0, cCRNMaxLevelResolution, 1); uint32 xh = m_params.get_value_as_int("window", 0, 0, 0, cCRNMaxLevelResolution, 2); uint32 yh = m_params.get_value_as_int("window", 0, 0, 0, cCRNMaxLevelResolution, 3); mipmap_params.m_window_left = math::minimum(xl, xh); mipmap_params.m_window_top = math::minimum(yl, yh); mipmap_params.m_window_right = math::maximum(xl, xh); mipmap_params.m_window_bottom = math::maximum(yl, yh); } return true; } bool parse_comp_params(texture_file_types::format dst_file_format, crn_comp_params& comp_params) { if (dst_file_format == texture_file_types::cFormatCRN) comp_params.m_quality_level = cDefaultCRNQualityLevel; if (m_params.has_key("q") || m_params.has_key("quality")) { const char* pKeyName = m_params.has_key("q") ? "q" : "quality"; if ((dst_file_format == texture_file_types::cFormatDDS) || (dst_file_format == texture_file_types::cFormatCRN) || (dst_file_format == texture_file_types::cFormatKTX)) { uint32 i = m_params.get_value_as_int(pKeyName, 0, cDefaultCRNQualityLevel, 0, cCRNMaxQualityLevel); comp_params.m_quality_level = i; } else { console::error("/quality or /q option is only invalid when writing DDS, KTX or CRN files!"); return false; } } else { float desired_bitrate = m_params.get_value_as_float("bitrate", 0, 0.0f, .1f, 30.0f); if (desired_bitrate > 0.0f) { comp_params.m_target_bitrate = desired_bitrate; } } int color_endpoints = m_params.get_value_as_int("c", 0, 0, cCRNMinPaletteSize, cCRNMaxPaletteSize); int color_selectors = m_params.get_value_as_int("s", 0, 0, cCRNMinPaletteSize, cCRNMaxPaletteSize); int alpha_endpoints = m_params.get_value_as_int("ca", 0, 0, cCRNMinPaletteSize, cCRNMaxPaletteSize); int alpha_selectors = m_params.get_value_as_int("sa", 0, 0, cCRNMinPaletteSize, cCRNMaxPaletteSize); if (((color_endpoints > 0) && (color_selectors > 0)) || ((alpha_endpoints > 0) && (alpha_selectors > 0))) { comp_params.set_flag(cCRNCompFlagManualPaletteSizes, true); comp_params.m_crn_color_endpoint_palette_size = color_endpoints; comp_params.m_crn_color_selector_palette_size = color_selectors; comp_params.m_crn_alpha_endpoint_palette_size = alpha_endpoints; comp_params.m_crn_alpha_selector_palette_size = alpha_selectors; } if (m_params.has_key("alphaThreshold")) { int dxt1a_alpha_threshold = m_params.get_value_as_int("alphaThreshold", 0, 128, 0, 255); comp_params.m_dxt1a_alpha_threshold = dxt1a_alpha_threshold; if (dxt1a_alpha_threshold > 0) { comp_params.set_flag(cCRNCompFlagDXT1AForTransparency, true); } } comp_params.set_flag(cCRNCompFlagPerceptual, !m_params.get_value_as_bool("uniformMetrics")); comp_params.set_flag(cCRNCompFlagHierarchical, !m_params.get_value_as_bool("noAdaptiveBlocks")); if (m_params.has_key("helperThreads")) comp_params.m_num_helper_threads = m_params.get_value_as_int("helperThreads", 0, cCRNMaxHelperThreads, 0, cCRNMaxHelperThreads); else if (g_number_of_processors > 1) comp_params.m_num_helper_threads = g_number_of_processors - 1; dynamic_string comp_name; if (m_params.get_value_as_string("compressor", 0, comp_name)) { uint32 i; for (i = 0; i < cCRNTotalDXTCompressors; i++) { if (comp_name == get_dxt_compressor_name(static_cast(i))) { comp_params.m_dxt_compressor_type = static_cast(i); break; } } if (i == cCRNTotalDXTCompressors) { console::error("Invalid compressor: \"%s\"", comp_name.get_ptr()); return false; } } dynamic_string dxt_quality_str; if (m_params.get_value_as_string("dxtquality", 0, dxt_quality_str)) { uint32 i; for (i = 0; i < cCRNDXTQualityTotal; i++) { if (dxt_quality_str == crn_get_dxt_quality_string(static_cast(i))) { comp_params.m_dxt_quality = static_cast(i); break; } } if (i == cCRNDXTQualityTotal) { console::error("Invalid DXT quality: \"%s\"", dxt_quality_str.get_ptr()); return false; } } else { comp_params.m_dxt_quality = cCRNDXTQualityUber; } comp_params.set_flag(cCRNCompFlagDisableEndpointCaching, m_params.get_value_as_bool("noendpointcaching")); comp_params.set_flag(cCRNCompFlagGrayscaleSampling, m_params.get_value_as_bool("grayscalesampling")); comp_params.set_flag(cCRNCompFlagUseBothBlockTypes, !m_params.get_value_as_bool("forceprimaryencoding")); if (comp_params.get_flag(cCRNCompFlagUseBothBlockTypes)) comp_params.set_flag(cCRNCompFlagUseTransparentIndicesForBlack, m_params.get_value_as_bool("usetransparentindicesforblack")); else comp_params.set_flag(cCRNCompFlagUseTransparentIndicesForBlack, false); return true; } convert_status display_file_info(uint32 file_index, uint32 num_files, const char* pSrc_filename) { if (num_files > 1) console::message("[%u/%u] Source texture: \"%s\"", file_index + 1, num_files, pSrc_filename); else console::message("Source texture: \"%s\"", pSrc_filename); texture_file_types::format src_file_format = texture_file_types::determine_file_format(pSrc_filename); if (src_file_format == texture_file_types::cFormatInvalid) { console::error("Unrecognized file type: %s", pSrc_filename); return cCSFailed; } mipmapped_texture src_tex; if (!src_tex.read_from_file(pSrc_filename, src_file_format)) { if (src_tex.get_last_error().is_empty()) console::error("Failed reading source file: \"%s\"", pSrc_filename); else console::error("%s", src_tex.get_last_error().get_ptr()); return cCSFailed; } uint64 input_file_size; file_utils::get_file_size(pSrc_filename, input_file_size); uint32 total_in_pixels = 0; for (uint32 i = 0; i < src_tex.get_num_levels(); i++) { uint32 width = math::maximum(1, src_tex.get_width() >> i); uint32 height = math::maximum(1, src_tex.get_height() >> i); total_in_pixels += width * height * src_tex.get_num_faces(); } vector src_tex_bytes; if (!cfile_stream::read_file_into_array(pSrc_filename, src_tex_bytes)) { console::error("Failed loading source file: %s", pSrc_filename); return cCSFailed; } if (!src_tex_bytes.size()) { console::warning("Source file is empty: %s", pSrc_filename); return cCSSkipped; } uint32 compressed_size = 0; if (m_params.has_key("lzmastats")) { lzma_codec lossless_codec; vector cmp_tex_bytes; if (lossless_codec.pack(src_tex_bytes.get_ptr(), src_tex_bytes.size(), cmp_tex_bytes)) { compressed_size = cmp_tex_bytes.size(); } } bool no_normal_type = m_params.has_key("noNormalDetection"); console::info("Source texture dimensions: %ux%u, Levels: %u, Faces: %u, Format: %s\nPacked Format: %u, Apparent Type: %s, Flipped: %u, Can Unflip Without Unpacking: %u", src_tex.get_width(), src_tex.get_height(), src_tex.get_num_levels(), src_tex.get_num_faces(), pixel_format_helpers::get_pixel_format_string(src_tex.get_format()), src_tex.is_packed(), get_texture_type_desc(src_tex.determine_texture_type(no_normal_type)), src_tex.is_flipped(), src_tex.can_unflip_without_unpacking()); console::info("Total pixels: %u, Source file size: " CRNLIB_UINT64_FORMAT_SPECIFIER ", Source file bits/pixel: %1.3f", total_in_pixels, input_file_size, (input_file_size * 8.0f) / total_in_pixels); if (compressed_size) { console::info("LZMA compressed file size: %u bytes, %1.3f bits/pixel", compressed_size, compressed_size * 8.0f / total_in_pixels); } double entropy = math::compute_entropy(src_tex_bytes.get_ptr(), src_tex_bytes.size()); console::info("Source file entropy: %3.6f bits per byte", entropy / src_tex_bytes.size()); if (src_file_format == texture_file_types::cFormatCRN) { crnd::crn_texture_info tex_info; tex_info.m_struct_size = sizeof(crnd::crn_texture_info); crn_bool success = crnd::crnd_get_texture_info(src_tex_bytes.get_ptr(), src_tex_bytes.size(), &tex_info); if (!success) console::error("Failed retrieving CRN texture info!"); else { console::info("CRN texture info:"); console::info("Width: %u, Height: %u, Levels: %u, Faces: %u\nBytes per block: %u, User0: 0x%08X, User1: 0x%08X, CRN Format: %u", tex_info.m_width, tex_info.m_height, tex_info.m_levels, tex_info.m_faces, tex_info.m_bytes_per_block, tex_info.m_userdata0, tex_info.m_userdata1, tex_info.m_format); } } return cCSSucceeded; } void print_stats(texture_conversion::convert_stats& stats, bool force_image_stats = false) { dynamic_string csv_filename; const char* pCSVStatsFilename = m_params.get_value_as_string("csvfile", 0, csv_filename) ? csv_filename.get_ptr() : NULL; bool image_stats = force_image_stats || m_params.get_value_as_bool("imagestats") || m_params.get_value_as_bool("mipstats") || (pCSVStatsFilename != NULL); bool mip_stats = m_params.get_value_as_bool("mipstats"); bool grayscale_sampling = m_params.get_value_as_bool("grayscalesampling"); if (!stats.print(image_stats, mip_stats, grayscale_sampling, pCSVStatsFilename)) { console::warning("Unable to compute/display full output file statistics."); } } convert_status compare_file(uint32 file_index, uint32 num_files, const char* pSrc_filename, const char* pDst_filename, texture_file_types::format out_file_type) { if (num_files > 1) console::message("[%u/%u] Comparing source texture \"%s\" to output texture \"%s\"", file_index + 1, num_files, pSrc_filename, pDst_filename); else console::message("Comparing source texture \"%s\" to output texture \"%s\"", pSrc_filename, pDst_filename); texture_file_types::format src_file_format = texture_file_types::determine_file_format(pSrc_filename); if (src_file_format == texture_file_types::cFormatInvalid) { console::error("Unrecognized file type: %s", pSrc_filename); return cCSFailed; } mipmapped_texture src_tex; if (!src_tex.read_from_file(pSrc_filename, src_file_format)) { if (src_tex.get_last_error().is_empty()) console::error("Failed reading source file: \"%s\"", pSrc_filename); else console::error("%s", src_tex.get_last_error().get_ptr()); return cCSFailed; } texture_conversion::convert_stats stats; if (!stats.init(pSrc_filename, pDst_filename, src_tex, out_file_type, m_params.has_key("lzmastats"))) return cCSFailed; print_stats(stats, true); return cCSSucceeded; } convert_status convert_file(uint32 file_index, uint32 num_files, const char* pSrc_filename, const char* pDst_filename, texture_file_types::format out_file_type) { timer tim; if (num_files > 1) console::message("[%u/%u] Reading source texture: \"%s\"", file_index + 1, num_files, pSrc_filename); else console::message("Reading source texture: \"%s\"", pSrc_filename); texture_file_types::format src_file_format = texture_file_types::determine_file_format(pSrc_filename); if (src_file_format == texture_file_types::cFormatInvalid) { console::error("Unrecognized file type: %s", pSrc_filename); return cCSFailed; } mipmapped_texture src_tex; tim.start(); if (!src_tex.read_from_file(pSrc_filename, src_file_format)) { if (src_tex.get_last_error().is_empty()) console::error("Failed reading source file: \"%s\"", pSrc_filename); else console::error("%s", src_tex.get_last_error().get_ptr()); return cCSFailed; } double total_time = tim.get_elapsed_secs(); console::info("Texture successfully loaded in %3.3fs", total_time); if (m_params.get_value_as_bool("converttoluma")) src_tex.convert(image_utils::cConversion_Y_To_RGB); if (m_params.get_value_as_bool("setalphatoluma")) src_tex.convert(image_utils::cConversion_Y_To_A); texture_conversion::convert_params params; bool no_normal_type = m_params.has_key("noNormalDetection"); params.m_texture_type = src_tex.determine_texture_type(no_normal_type); params.m_pInput_texture = &src_tex; params.m_dst_filename = pDst_filename; params.m_dst_file_type = out_file_type; params.m_lzma_stats = m_params.has_key("lzmastats"); params.m_write_mipmaps_to_multiple_files = m_params.has_key("split"); params.m_always_use_source_pixel_format = m_params.has_key("usesourceformat"); params.m_y_flip = m_params.has_key("yflip"); params.m_unflip = m_params.has_key("unflip"); if ((!m_params.get_value_as_bool("noprogress")) && (!m_params.get_value_as_bool("quiet"))) params.m_pProgress_func = progress_callback_func; if (m_params.get_value_as_bool("debug")) { params.m_debugging = true; params.m_comp_params.set_flag(cCRNCompFlagDebugging, true); } if (m_params.get_value_as_bool("paramdebug")) params.m_param_debugging = true; if (m_params.get_value_as_bool("quick")) params.m_quick = true; params.m_no_stats = m_params.get_value_as_bool("nostats"); params.m_dst_format = PIXEL_FMT_INVALID; for (uint32 i = 0; i < pixel_format_helpers::get_num_formats(); i++) { pixel_format trial_fmt = pixel_format_helpers::get_pixel_format_by_index(i); if (m_params.has_key(pixel_format_helpers::get_pixel_format_string(trial_fmt))) { params.m_dst_format = trial_fmt; break; } } if (texture_file_types::supports_mipmaps(src_file_format)) { params.m_mipmap_params.m_mode = cCRNMipModeUseSourceMips; } if (!parse_mipmap_params(params.m_mipmap_params)) return cCSBadParam; if (!parse_comp_params(params.m_dst_file_type, params.m_comp_params)) return cCSBadParam; if (!parse_scale_params(params.m_mipmap_params)) return cCSBadParam; print_texture_info("Source texture", params, src_tex); if (params.m_texture_type == cTextureTypeNormalMap) { params.m_comp_params.set_flag(cCRNCompFlagPerceptual, false); } texture_conversion::convert_stats stats; tim.start(); bool status = texture_conversion::process(params, stats); total_time = tim.get_elapsed_secs(); if (!status) { if (params.m_error_message.is_empty()) console::error("Failed writing output file: \"%s\"", pDst_filename); else console::error(params.m_error_message.get_ptr()); return cCSFailed; } console::info("Texture successfully processed in %3.3fs", total_time); if (!m_params.get_value_as_bool("nostats")) print_stats(stats); return cCSSucceeded; } }; //----------------------------------------------------------------------------------------------------------------------- static bool check_for_option(int argc, char* argv[], const char* pOption) { for (int i = 1; i < argc; i++) { if ((argv[i][0] == '/') || (argv[i][0] == '-')) { if (crnlib_stricmp(&argv[i][1], pOption) == 0) return true; } } return false; } //----------------------------------------------------------------------------------------------------------------------- static void print_title() { console::printf("crunch: Advanced DXTn Texture Compressor (Daemon branch, Unity format variant)"); console::printf(""); /* Add U suffix to the version string to remind it's the Unity variant. */ #if defined COMPUTED_VERSION_SUFFIX console::printf("crnlib version v%u.%02uU %s %s", CRNLIB_VERSION / 100U, CRNLIB_VERSION % 100U, crnlib_is_x64() ? "64-bit" : "32-bit", COMPUTED_VERSION_SUFFIX); #else console::printf("crnlib version v%u.%02uU %s Built %s, %s", CRNLIB_VERSION / 100U, CRNLIB_VERSION % 100U, crnlib_is_x64() ? "64-bit" : "32-bit", __DATE__, __TIME__); #endif console::printf(""); console::printf("Crunch is brought to you by:"); console::printf("- 2014-2024 Daemon Developers and contributors"); console::printf(" https://github.com/DaemonEngine/crunch"); /* Who owns the copyright, Alexander, Unity, both? Unity Technologies is a Trade Name for Unity Software Inc. */ console::printf("- 2017-2018 Alexander Suvorov and Unity Software Inc."); console::printf(" https://github.com/Unity-Technologies/crunch/tree/unity"); /* Richard removed copyright on his work on 2020-09-15 https://github.com/BinomialLLC/crunch/commit/57353fa9ac0908893215bc30ba106adfb80c4c95 He also stated on 2019-06-15 that Tenacious Software LLC didn't had copyright https://github.com/BinomialLLC/crunch/commit/7c54efc80e78ac0b7548d5dce35ed7318d413390 He also removed mention to Binomial LLC in the copyright line but since this line is no longer a copyright line, the name can be kept. */ console::printf("- 2010-2017 Richard Geldreich, Jr. and Binomial LLC and contributors"); console::printf(" https://github.com/BinomialLLC/crunch"); console::printf(""); console::printf("Please report bugs here: https://github.com/DaemonEngine/crunch/issues"); console::printf(""); } //----------------------------------------------------------------------------------------------------------------------- static int main_internal(int argc, char* argv[]) { colorized_console::init(); if (check_for_option(argc, argv, "quiet")) console::disable_output(); if (!check_for_option(argc, argv, "noTitle")) print_title(); dynamic_string cmd_line; get_command_line_as_single_string(cmd_line, argc, argv); bool status = false; if (check_for_option(argc, argv, "corpus_gen")) { corpus_gen generator; status = generator.generate(cmd_line.get_ptr()); } else if (check_for_option(argc, argv, "corpus_test")) { corpus_tester tester; status = tester.test(cmd_line.get_ptr()); } else { crunch converter; status = converter.convert(cmd_line.get_ptr()); } colorized_console::deinit(); crnlib_print_mem_stats(); return status ? EXIT_SUCCESS : EXIT_FAILURE; } static void pause_and_wait(void) { console::enable_output(); console::message("\nPress a key to continue."); for (;;) { if (crn_getch() != -1) break; } } //----------------------------------------------------------------------------------------------------------------------- int main(int argc, char* argv[]) { int status = EXIT_FAILURE; if (crnlib_is_debugger_present()) { status = main_internal(argc, argv); } else { #ifdef _MSC_VER __try { status = main_internal(argc, argv); } __except (EXCEPTION_EXECUTE_HANDLER) { console::error("Uncached exception! crunch command line tool failed!"); } #else status = main_internal(argc, argv); #endif } console::printf("\nExit status: %i", status); if (check_for_option(argc, argv, "pause")) { if ((status == EXIT_FAILURE) || (console::get_num_messages(cErrorConsoleMessage))) pause_and_wait(); } return status; } DaemonEngine-crunch-ef4d32f/emscripten/000077500000000000000000000000001503722002600201705ustar00rootroot00000000000000DaemonEngine-crunch-ef4d32f/emscripten/CMakeLists.txt000066400000000000000000000015721503722002600227350ustar00rootroot00000000000000cmake_minimum_required(VERSION 3.5) set(CMAKE_CXX_STANDARD 11) set(CRUNCHJS_EXE_NAME crunch) if (NOT CMAKE_CXX_COMPILER) set(CMAKE_CXX_COMPILER emcc) endif() project(${CRUNCHJS_EXE_NAME}.js LANGUAGES CXX) if (NOT CMAKE_TOOLCHAIN_FILE) set(CMAKE_EXECUTABLE_SUFFIX .js) endif() set(CMAKE_CXX_FLAGS "-O3 ${CMAKE_CXX_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -s EXPORTED_FUNCTIONS=\"['_malloc', '_free', '_crn_get_width', '_crn_get_height', '_crn_get_levels', '_crn_get_dxt_format', '_crn_get_bytes_per_block', '_crn_get_uncompressed_size', '_crn_decompress']\" -s NO_EXIT_RUNTIME=1 -s NO_FILESYSTEM=1 -s ELIMINATE_DUPLICATE_FUNCTIONS=1 -s ALLOW_MEMORY_GROWTH=1 --memory-init-file 0") include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/../inc ) set(CRUNCHJS_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/crunch_lib.cpp ) add_executable(${CRUNCHJS_EXE_NAME} ${CRUNCHJS_SRCS}) DaemonEngine-crunch-ef4d32f/emscripten/Makefile000066400000000000000000000011641503722002600216320ustar00rootroot00000000000000.DEFAULT_GOAL := all .PHONY: all clean CXX = emcc COMPILE_OPTIMIZATION_OPTIONS = -O3 COMPILE_OPTIONS = -std=c++11 ${COMPILE_OPTIMIZATION_OPTIONS} LINKER_OPTIONS = -s EXPORTED_FUNCTIONS="['_malloc', '_free', '_crn_get_width', '_crn_get_height', '_crn_get_levels', '_crn_get_dxt_format', '_crn_get_bytes_per_block', '_crn_get_uncompressed_size', '_crn_decompress']" -s NO_EXIT_RUNTIME=1 -s NO_FILESYSTEM=1 -s ELIMINATE_DUPLICATE_FUNCTIONS=1 -s ALLOW_MEMORY_GROWTH=1 --memory-init-file 0 all: crunch.js crunch.js: crunch_lib.cpp $(CXX) $< -o $@ -I../inc $(COMPILE_OPTIONS) $(LINKER_OPTIONS) clean: rm crunch.js crunch.wasm DaemonEngine-crunch-ef4d32f/emscripten/crunch_lib.cpp000066400000000000000000000114731503722002600230120ustar00rootroot00000000000000/* Copyright (c) 2013, Evan Parker, Brandon Jones. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define PLATFORM_NACL // This disables use of 64 bit integers, among other things. #include // For NULL, size_t #include // for malloc etc #include "crn_decomp.h" extern "C" { unsigned int crn_get_width(void* src, unsigned int src_size); unsigned int crn_get_height(void* src, unsigned int src_size); unsigned int crn_get_levels(void* src, unsigned int src_size); unsigned int crn_get_dxt_format(void* src, unsigned int src_size); unsigned int crn_get_bytes_per_block(void* src, unsigned int src_size); unsigned int crn_get_uncompressed_size(void* p, unsigned int size, unsigned int level); void crn_decompress(void* src, unsigned int src_size, void* dst, unsigned int dst_size, unsigned int firstLevel, unsigned int levelCount); } unsigned int crn_get_width(void* src, unsigned int src_size) { crnd::crn_texture_info tex_info; crnd::crnd_get_texture_info(static_cast(src), src_size, &tex_info); return tex_info.m_width; } unsigned int crn_get_height(void* src, unsigned int src_size) { crnd::crn_texture_info tex_info; crnd::crnd_get_texture_info(static_cast(src), src_size, &tex_info); return tex_info.m_height; } unsigned int crn_get_levels(void* src, unsigned int src_size) { crnd::crn_texture_info tex_info; crnd::crnd_get_texture_info(static_cast(src), src_size, &tex_info); return tex_info.m_levels; } unsigned int crn_get_dxt_format(void* src, unsigned int src_size) { crnd::crn_texture_info tex_info; crnd::crnd_get_texture_info(static_cast(src), src_size, &tex_info); return tex_info.m_format; } unsigned int crn_get_bytes_per_block(void* src, unsigned int src_size) { crnd::crn_texture_info tex_info; crnd::crnd_get_texture_info(static_cast(src), src_size, &tex_info); return crnd::crnd_get_bytes_per_dxt_block(tex_info.m_format); } unsigned int crn_get_uncompressed_size(void* src, unsigned int src_size, unsigned int level) { crnd::crn_texture_info tex_info; crnd::crnd_get_texture_info(static_cast(src), src_size, &tex_info); const crn_uint32 width = tex_info.m_width >> level; const crn_uint32 height = tex_info.m_height >> level; const crn_uint32 blocks_x = (width + 3) >> 2; const crn_uint32 blocks_y = (height + 3) >> 2; const crn_uint32 row_pitch = blocks_x * crnd::crnd_get_bytes_per_dxt_block(tex_info.m_format); const crn_uint32 total_face_size = row_pitch * blocks_y; return total_face_size; } void crn_decompress(void* src, unsigned int src_size, void* dst, unsigned int dst_size, unsigned int firstLevel, unsigned int levelCount) { crnd::crn_texture_info tex_info; crnd::crnd_get_texture_info(static_cast(src), src_size, &tex_info); crn_uint32 width = tex_info.m_width >> firstLevel; crn_uint32 height = tex_info.m_height >> firstLevel; crn_uint32 bytes_per_block = crnd::crnd_get_bytes_per_dxt_block(tex_info.m_format); void* pDecomp_images[1]; pDecomp_images[0] = dst; crnd::crnd_unpack_context pContext = crnd::crnd_unpack_begin(static_cast(src), src_size); for (int i = firstLevel; i < firstLevel + levelCount; ++i) { crn_uint32 blocks_x = (width + 3) >> 2; crn_uint32 blocks_y = (height + 3) >> 2; crn_uint32 row_pitch = blocks_x * bytes_per_block; crn_uint32 total_level_size = row_pitch * blocks_y; crnd::crnd_unpack_level(pContext, pDecomp_images, total_level_size, row_pitch, i); pDecomp_images[0] = (char*)pDecomp_images[0] + total_level_size; width = width >> 1; height = height >> 1; } crnd::crnd_unpack_end(pContext); }DaemonEngine-crunch-ef4d32f/example1/000077500000000000000000000000001503722002600175335ustar00rootroot00000000000000DaemonEngine-crunch-ef4d32f/example1/CMakeLists.txt000066400000000000000000000004601503722002600222730ustar00rootroot00000000000000include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/../inc ${CMAKE_CURRENT_SOURCE_DIR}/../crnlib ) # Defines the source code for the library set(EXAMPLE1_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/example1.cpp ) add_executable(example1 ${EXAMPLE1_SRCS}) target_link_libraries(example1 ${CRUNCH_LIBRARY_NAME}) DaemonEngine-crunch-ef4d32f/example1/example1.cpp000066400000000000000000000562301503722002600217610ustar00rootroot00000000000000// File: example1.cpp - Simple command line tool that uses the crnlib lib and the crn_decomp.h header file library // to compress, transcode/unpack, and inspect CRN/DDS textures. // See Copyright Notice and license at the end of inc/crnlib.h #include #include #include #include #include #if !defined(_WIN32) #include #endif // Public crnlib header. #include "crnlib.h" // CRN transcoder library. #include "crn_decomp.h" // .DDS file format definitions. #include "dds_defs.h" #include "crn_core.h" #include "crn_strutils.h" #include "crn_file_utils.h" // stb_image, for loading/saving image files. #ifdef _MSC_VER #pragma warning(disable : 4244) // conversion from 'int' to 'uint8', possible loss of data #pragma warning(disable : 4100) // unreferenced formal parameter #endif #define STB_IMAGE_IMPLEMENTATION #include "stb_image.h" #define STB_IMAGE_WRITE_IMPLEMENTATION #include "stb_image_write.h" #if defined(_WIN32) // windows.h is only needed here for GetSystemInfo(). #define WIN32_LEAN_AND_MEAN #define NOMINMAX #include "windows.h" #elif defined(__FreeBSD__) || defined(__APPLE__) #include #include #elif defined(__GNUC__) #include #endif using namespace crnlib; typedef unsigned int uint; const int cDefaultCRNQualityLevel = 128; static int print_usage() { printf("Description: Simple crnlib API example program.\n"); printf("Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC\n"); printf("Usage: example1 [mode: i/c/d] [source_file] [options]\n"); printf("\nModes:\n"); printf("c: Compress to .DDS or .CRN using the crn_compress() func. in crnlib.h\n"); printf(" The default output format is .DDS\n"); printf(" Supported source image formats:\n"); printf(" Baseline JPEG, PNG, BMP, TGA, PSD, and HDR\n"); printf("d: Transcodes a .CRN file to .DDS using the crn_decompress_crn_to_dds() func.,\n"); printf("or unpacks each face and mipmap level in a .DDS file to multiple .TGA files.\n"); printf("i: Display info about source_file.\n"); printf("\nOptions:\n"); printf("-out filename - Force output filename.\n"); printf("\nCompression mode options:\n"); printf("-crn - Generate a .CRN file instead of .DDS\n"); printf("-bitrate # - Specify desired CRN/DDS bits/texel, from [.1-8]\n"); printf(" When writing .DDS: -bitrate or -quality enable clustered DXTn compression.\n"); printf("-quality # - Specify CRN/DDS quality level factor, from [0-255]\n"); printf("-noAdaptiveBlocks - Always use 4x4 blocks instead of up to 8x8 macroblocks\n"); printf("-nonsrgb - Input is not sRGB: disables gamma filtering, perceptual metrics.\n"); printf("-nomips - Don't generate mipmaps\n"); printf("-setalphatoluma - Set alpha channel to luma before compression.\n"); printf("-converttoluma - Set RGB to luma before compression.\n"); printf("-pixelformat fmt - Output file's crn_format: DXT1, DXT1A, DXT3, DXT5_CCxY,\n"); printf(" DXT5_xGxR, DXT5_xGBR, DXT5_AGBR, DXN_XY (ATI 3DC), DXN_YX (ATI 3DC),\n"); printf(" DXT5A (ATN1N)\n"); printf(" If no output format is specified, this example uses either DXT1 or DXT5.\n"); return EXIT_FAILURE; } static int error(const char* pMsg, ...) { va_list args; va_start(args, pMsg); char buf[512]; crnlib_vsnprintf(buf, sizeof(buf), pMsg, args); va_end(args); printf("%s", buf); return EXIT_FAILURE; } // Loads an entire file into an allocated memory block. static crn_uint8* read_file_into_buffer(const char* pFilename, crn_uint32& size) { size = 0; FILE* pFile = NULL; crn_fopen(&pFile, pFilename, "rb"); if (!pFile) return NULL; fseek(pFile, 0, SEEK_END); size = ftell(pFile); fseek(pFile, 0, SEEK_SET); crn_uint8* pSrc_file_data = static_cast(malloc(std::max(1U, size))); if ((!pSrc_file_data) || (fread(pSrc_file_data, size, 1, pFile) != 1)) { fclose(pFile); free(pSrc_file_data); size = 0; return NULL; } fclose(pFile); return pSrc_file_data; } // Cracks a CRN's file header using the helper functions in crn_decomp.h. static bool print_crn_info(const crn_uint8* pData, crn_uint32 data_size) { crnd::crn_file_info file_info; if (!crnd::crnd_validate_file(pData, data_size, &file_info)) return false; printf("crnd_validate_file:\n"); printf("File size: %u\nActualDataSize: %u\nHeaderSize: %u\nTotalPaletteSize: %u\nTablesSize: %u\nLevels: %u\n", data_size, file_info.m_actual_data_size, file_info.m_header_size, file_info.m_total_palette_size, file_info.m_tables_size, file_info.m_levels); printf("LevelCompressedSize: "); for (crn_uint32 i = 0; i < cCRNMaxLevels; i++) printf("%u ", file_info.m_level_compressed_size[i]); printf("\n"); printf("ColorEndpointPaletteSize: %u\n", file_info.m_color_endpoint_palette_entries); printf("ColorSelectorPaletteSize: %u\n", file_info.m_color_selector_palette_entries); printf("AlphaEndpointPaletteSize: %u\n", file_info.m_alpha_endpoint_palette_entries); printf("AlphaSelectorPaletteSize: %u\n", file_info.m_alpha_selector_palette_entries); printf("crnd_get_texture_info:\n"); crnd::crn_texture_info tex_info; if (!crnd::crnd_get_texture_info(pData, data_size, &tex_info)) return false; printf("Dimensions: %ux%u\nLevels: %u\nFaces: %u\nBytesPerBlock: %u\nUserData0: %u\nUserData1: %u\nCrnFormat: %s\n", tex_info.m_width, tex_info.m_height, tex_info.m_levels, tex_info.m_faces, tex_info.m_bytes_per_block, tex_info.m_userdata0, tex_info.m_userdata1, crn_get_format_string(tex_info.m_format)); return true; } // Cracks the DDS header and dump its contents. static bool print_dds_info(const void* pData, crn_uint32 data_size) { if ((data_size < 128) || (*reinterpret_cast(pData) != crnlib::cDDSFileSignature)) return false; const crnlib::DDSURFACEDESC2& desc = *reinterpret_cast((reinterpret_cast(pData) + sizeof(crn_uint32))); if (desc.dwSize != sizeof(crnlib::DDSURFACEDESC2)) return false; printf("DDS file information:\n"); printf("File size: %u\nDimensions: %ux%u\nPitch/LinearSize: %u\n", data_size, desc.dwWidth, desc.dwHeight, desc.dwLinearSize); printf("MipMapCount: %u\nAlphaBitDepth: %u\n", desc.dwMipMapCount, desc.dwAlphaBitDepth); const char* pDDSDFlagNames[] = { "DDSD_CAPS", "DDSD_HEIGHT", "DDSD_WIDTH", "DDSD_PITCH", NULL, "DDSD_BACKBUFFERCOUNT", "DDSD_ZBUFFERBITDEPTH", "DDSD_ALPHABITDEPTH", NULL, NULL, NULL, "DDSD_LPSURFACE", "DDSD_PIXELFORMAT", "DDSD_CKDESTOVERLAY", "DDSD_CKDESTBLT", "DDSD_CKSRCOVERLAY", "DDSD_CKSRCBLT", "DDSD_MIPMAPCOUNT", "DDSD_REFRESHRATE", "DDSD_LINEARSIZE", "DDSD_TEXTURESTAGE", "DDSD_FVF", "DDSD_SRCVBHANDLE", "DDSD_DEPTH"}; printf("DDSD Flags: 0x%08X ", desc.dwFlags); for (size_t i = 0; i < sizeof(pDDSDFlagNames) / sizeof(pDDSDFlagNames[0]); i++) if ((pDDSDFlagNames[i]) && (desc.dwFlags & (1 << i))) printf("%s ", pDDSDFlagNames[i]); printf("\n\n"); printf("ddpfPixelFormat.dwFlags: 0x%08X ", desc.ddpfPixelFormat.dwFlags); if (desc.ddpfPixelFormat.dwFlags & DDPF_ALPHAPIXELS) printf("DDPF_ALPHAPIXELS "); if (desc.ddpfPixelFormat.dwFlags & DDPF_ALPHA) printf("DDPF_ALPHA "); if (desc.ddpfPixelFormat.dwFlags & DDPF_FOURCC) printf("DDPF_FOURCC "); if (desc.ddpfPixelFormat.dwFlags & DDPF_PALETTEINDEXED8) printf("DDPF_PALETTEINDEXED8 "); if (desc.ddpfPixelFormat.dwFlags & DDPF_RGB) printf("DDPF_RGB "); if (desc.ddpfPixelFormat.dwFlags & DDPF_LUMINANCE) printf("DDPF_LUMINANCE "); printf("\n"); printf("ddpfPixelFormat.dwFourCC: 0x%08X '%c' '%c' '%c' '%c'\n", desc.ddpfPixelFormat.dwFourCC, std::max(32U, desc.ddpfPixelFormat.dwFourCC & 0xFF), std::max(32U, (desc.ddpfPixelFormat.dwFourCC >> 8) & 0xFF), std::max(32U, (desc.ddpfPixelFormat.dwFourCC >> 16) & 0xFF), std::max(32U, (desc.ddpfPixelFormat.dwFourCC >> 24) & 0xFF)); printf("dwRGBBitCount: %u 0x%08X\n", desc.ddpfPixelFormat.dwRGBBitCount, desc.ddpfPixelFormat.dwRGBBitCount); printf("dwRGBBitCount as FOURCC: '%c' '%c' '%c' '%c'\n", std::max(32U, desc.ddpfPixelFormat.dwRGBBitCount & 0xFF), std::max(32U, (desc.ddpfPixelFormat.dwRGBBitCount >> 8) & 0xFF), std::max(32U, (desc.ddpfPixelFormat.dwRGBBitCount >> 16) & 0xFF), std::max(32U, (desc.ddpfPixelFormat.dwRGBBitCount >> 24) & 0xFF)); printf("dwRBitMask: 0x%08X\ndwGBitMask: 0x%08X\ndwBBitMask: 0x%08X\ndwRGBAlphaBitMask: 0x%08X\n", desc.ddpfPixelFormat.dwRBitMask, desc.ddpfPixelFormat.dwGBitMask, desc.ddpfPixelFormat.dwBBitMask, desc.ddpfPixelFormat.dwRGBAlphaBitMask); printf("\n"); printf("ddsCaps.dwCaps: 0x%08X ", desc.ddsCaps.dwCaps); if (desc.ddsCaps.dwCaps & DDSCAPS_COMPLEX) printf("DDSCAPS_COMPLEX "); if (desc.ddsCaps.dwCaps & DDSCAPS_TEXTURE) printf("DDSCAPS_TEXTURE "); if (desc.ddsCaps.dwCaps & DDSCAPS_MIPMAP) printf("DDSCAPS_MIPMAP"); printf("\n"); printf("ddsCaps.dwCaps2: 0x%08X ", desc.ddsCaps.dwCaps2); const char* pDDCAPS2FlagNames[] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "DDSCAPS2_CUBEMAP", "DDSCAPS2_CUBEMAP_POSITIVEX", "DDSCAPS2_CUBEMAP_NEGATIVEX", "DDSCAPS2_CUBEMAP_POSITIVEY", "DDSCAPS2_CUBEMAP_NEGATIVEY", "DDSCAPS2_CUBEMAP_POSITIVEZ", "DDSCAPS2_CUBEMAP_NEGATIVEZ", NULL, NULL, NULL, NULL, NULL, "DDSCAPS2_VOLUME"}; for (size_t i = 0; i < sizeof(pDDCAPS2FlagNames) / sizeof(pDDCAPS2FlagNames[0]); i++) if ((pDDCAPS2FlagNames[i]) && (desc.ddsCaps.dwCaps2 & (1 << i))) printf("%s ", pDDCAPS2FlagNames[i]); printf("\n"); printf("ddsCaps.dwCaps3: 0x%08X\nddsCaps.dwCaps4: 0x%08X\n", desc.ddsCaps.dwCaps3, desc.ddsCaps.dwCaps4); return true; } // CRN/DDS compression callback function. static crn_bool progress_callback_func(crn_uint32 phase_index, crn_uint32 total_phases, crn_uint32 subphase_index, crn_uint32 total_subphases, void* pUser_data_ptr) { int percentage_complete = (int)(.5f + (phase_index + float(subphase_index) / total_subphases) * 100.0f) / total_phases; printf("\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\b\bProcessing: %u%%", std::min(100, std::max(0, percentage_complete))); return true; } int main(int argc, char* argv[]) { printf("example1 - Version v%u.%02u Built " __DATE__ ", " __TIME__ "\n", CRNLIB_VERSION / 100, CRNLIB_VERSION % 100); if (argc < 3) return print_usage(); // Parse command line options int mode = argv[1][0]; if ((mode != 'c') && (mode != 'd') && (mode != 'i')) return error("Invalid mode!\n"); const char* pSrc_filename = argv[2]; char out_filename[FILENAME_MAX] = {'\0'}; float bitrate = 0.0f; int quality_level = -1; bool srgb_colorspace = true; bool create_mipmaps = true; bool output_crn = false; crn_format fmt = cCRNFmtInvalid; bool use_adaptive_block_sizes = true; bool set_alpha_to_luma = false; bool convert_to_luma = false; bool enable_dxt1a = false; for (int i = 3; i < argc; i++) { if (argv[i][0] == '/') argv[i][0] = '-'; if (!crnlib_stricmp(argv[i], "-crn")) { output_crn = true; } else if (!crnlib_stricmp(argv[i], "-pixelformat")) { if (++i >= argc) return error("Expected pixel format!"); if (!crnlib_stricmp(argv[i], "dxt1a")) { enable_dxt1a = true; fmt = cCRNFmtDXT1; } else { uint f; for (f = 0; f < cCRNFmtTotal; f++) { if (!crnlib_stricmp(argv[i], crn_get_format_string(static_cast(f)))) { fmt = static_cast(f); break; } } if (f == cCRNFmtTotal) return error("Unrecognized pixel format: %s\n", argv[i]); } } else if (!crnlib_stricmp(argv[i], "-bitrate")) { if (++i >= argc) return error("Invalid bitrate!"); bitrate = (float)atof(argv[i]); if ((bitrate < .1f) || (bitrate > 8.0f)) return error("Invalid bitrate!"); } else if (!crnlib_stricmp(argv[i], "-quality")) { if (++i >= argc) return error("Invalid quality level!"); quality_level = atoi(argv[i]); if ((quality_level < 0) || (quality_level > cCRNMaxQualityLevel)) return error("Invalid quality level!"); } else if (!crnlib_stricmp(argv[i], "-out")) { if (++i >= argc) return error("Expected output filename!"); strcpy_safe(out_filename, sizeof(out_filename), argv[i]); } else if (!crnlib_stricmp(argv[i], "-nonsrgb")) srgb_colorspace = false; else if (!crnlib_stricmp(argv[i], "-nomips")) create_mipmaps = false; else if (!crnlib_stricmp(argv[i], "-noAdaptiveBlocks")) use_adaptive_block_sizes = false; else if (!crnlib_stricmp(argv[i], "-setalphatoluma")) set_alpha_to_luma = true; else if (!crnlib_stricmp(argv[i], "-converttoluma")) convert_to_luma = true; else return error("Invalid option: %s\n", argv[i]); } // Split the source filename into its various components. dynamic_string drive, dir, fname, ext; if (!file_utils::split_path(pSrc_filename, &drive, &dir, &fname, &ext)) return error("Invalid source filename!\n"); // Load the source file into memory. printf("Loading source file: %s\n", pSrc_filename); crn_uint32 src_file_size; crn_uint8* pSrc_file_data = read_file_into_buffer(pSrc_filename, src_file_size); if (!pSrc_file_data) return error("Unable to read source file\n"); if (mode == 'i') { // Information if (crnlib_stricmp(ext.get_ptr(), ".crn") == 0) { if (!print_crn_info(pSrc_file_data, src_file_size)) { free(pSrc_file_data); return error("Not a CRN file!\n"); } } else if (crnlib_stricmp(ext.get_ptr(), ".dds") == 0) { if (!print_dds_info(pSrc_file_data, src_file_size)) { free(pSrc_file_data); return error("Not a DDS file!\n"); } } else { // Try parsing the source file as a regular image. int x, y, actual_comps; stbi_uc* p = stbi_load_from_memory(pSrc_file_data, src_file_size, &x, &y, &actual_comps, 4); if (!p) { free(pSrc_file_data); return error("Failed reading image file!\n"); } stbi_image_free(p); printf("File size: %u\nDimensions: %ix%i\nActual Components: %i\n", src_file_size, x, y, actual_comps); } } else if (mode == 'c') { // Compression to DDS or CRN. // If the user has explicitly specified an output file, check the output file's extension to ensure we write the expected format. if (out_filename[0]) { dynamic_string out_fname, out_ext; file_utils::split_path(out_filename, NULL, NULL, &out_fname, &out_ext); if (!crnlib_stricmp(out_ext.get_ptr(), ".crn")) output_crn = true; else if (!crnlib_stricmp(out_ext.get_ptr(), ".dds")) output_crn = false; } // Load source image int width, height, actual_comps; crn_uint32* pSrc_image = (crn_uint32*)stbi_load_from_memory(pSrc_file_data, src_file_size, &width, &height, &actual_comps, 4); if (!pSrc_image) { free(pSrc_file_data); return error("Failed reading image file!\n"); } printf("Source file size: %u, Dimensions: %ux%u\nActual Components: %u\n", src_file_size, width, height, actual_comps); // Fill in compression parameters struct. bool has_alpha_channel = actual_comps > 3; if ((fmt == cCRNFmtDXT5A) && (actual_comps <= 3)) set_alpha_to_luma = true; if ((set_alpha_to_luma) || (convert_to_luma)) { for (int i = 0; i < width * height; i++) { crn_uint32 r = pSrc_image[i] & 0xFF, g = (pSrc_image[i] >> 8) & 0xFF, b = (pSrc_image[i] >> 16) & 0xFF; // Compute CCIR 601 luma. crn_uint32 y = (19595U * r + 38470U * g + 7471U * b + 32768) >> 16U; crn_uint32 a = (pSrc_image[i] >> 24) & 0xFF; if (set_alpha_to_luma) a = y; if (convert_to_luma) { r = y; g = y; b = y; } pSrc_image[i] = r | (g << 8) | (b << 16) | (a << 24); } } crn_comp_params comp_params; comp_params.m_width = width; comp_params.m_height = height; comp_params.set_flag(cCRNCompFlagPerceptual, srgb_colorspace); comp_params.set_flag(cCRNCompFlagDXT1AForTransparency, enable_dxt1a && has_alpha_channel); comp_params.set_flag(cCRNCompFlagHierarchical, use_adaptive_block_sizes); comp_params.m_file_type = output_crn ? cCRNFileTypeCRN : cCRNFileTypeDDS; comp_params.m_format = (fmt != cCRNFmtInvalid) ? fmt : (has_alpha_channel ? cCRNFmtDXT5 : cCRNFmtDXT1); // Important note: This example only feeds a single source image to the compressor, and it internaly generates mipmaps from that source image. // If you want, there's nothing stopping you from generating the mipmaps on your own, then feeding the multiple source images // to the compressor. Just set the crn_mipmap_params::m_mode member (set below) to cCRNMipModeUseSourceMips. comp_params.m_pImages[0][0] = pSrc_image; if (bitrate > 0.0f) comp_params.m_target_bitrate = bitrate; else if (quality_level >= 0) comp_params.m_quality_level = quality_level; else if (output_crn) { // Set a default quality level for CRN, otherwise we'll get the default (highest quality) which leads to huge compressed palettes. comp_params.m_quality_level = cDefaultCRNQualityLevel; } // Determine the # of helper threads (in addition to the main thread) to use during compression. NumberOfCPU's-1 is reasonable. int number_of_processors = 1; #if defined(_WIN32) SYSTEM_INFO g_system_info; GetSystemInfo(&g_system_info); number_of_processors = std::max(1, (int)g_system_info.dwNumberOfProcessors); #elif defined(__FreeBSD__) || defined(__APPLE__) number_of_processors = std::max(1, sysconf(_SC_NPROCESSORS_ONLN)); #elif defined(__GNUC__) number_of_processors = std::max(1, get_nprocs()); #endif comp_params.m_num_helper_threads = std::min(number_of_processors - 1, (int)cCRNMaxHelperThreads); comp_params.m_pProgress_func = progress_callback_func; // Fill in mipmap parameters struct. crn_mipmap_params mip_params; mip_params.m_gamma_filtering = srgb_colorspace; mip_params.m_mode = create_mipmaps ? cCRNMipModeGenerateMips : cCRNMipModeNoMips; crn_uint32 actual_quality_level; float actual_bitrate; crn_uint32 output_file_size; printf("Compressing to %s\n", crn_get_format_string(comp_params.m_format)); // Now compress to DDS or CRN. void* pOutput_file_data = crn_compress(comp_params, mip_params, output_file_size, &actual_quality_level, &actual_bitrate); printf("\n"); if (!pOutput_file_data) { stbi_image_free(pSrc_image); free(pSrc_file_data); return error("Compression failed!"); } printf("Compressed to %u bytes, quality level: %u, effective bitrate: %f\n", output_file_size, actual_quality_level, actual_bitrate); // Write the output file. char dst_filename[FILENAME_MAX]; crnlib_snprintf(dst_filename, sizeof(dst_filename), "%s%s%s%s", drive.get_ptr(), dir.get_ptr(), fname.get_ptr(), output_crn ? ".crn" : ".dds"); if (out_filename[0]) strcpy_safe(dst_filename, sizeof(dst_filename), out_filename); printf("Writing %s file: %s\n", output_crn ? "CRN" : "DDS", dst_filename); FILE* pFile = NULL; crn_fopen(&pFile, dst_filename, "wb"); if ((!pFile) || (fwrite(pOutput_file_data, output_file_size, 1, pFile) != 1) || (fclose(pFile) == EOF)) { free(pSrc_file_data); crn_free_block(pOutput_file_data); stbi_image_free(pSrc_image); return error("Failed writing to output file!\n"); } crn_free_block(pOutput_file_data); stbi_image_free(pSrc_image); } else if (crnlib_stricmp(ext.get_ptr(), ".crn") == 0) { // Decompress/transcode CRN to DDS. printf("Decompressing CRN to DDS\n"); // Transcode the CRN file to a DDS file in memory. crn_uint32 dds_file_size = src_file_size; void* pDDS_file_data = crn_decompress_crn_to_dds(pSrc_file_data, dds_file_size); if (!pDDS_file_data) { free(pSrc_file_data); return error("Failed decompressing CRN file!\n"); } // Now write the DDS file to disk. char dst_filename[FILENAME_MAX]; crnlib_snprintf(dst_filename, sizeof(dst_filename), "%s%s%s.dds", drive.get_ptr(), dir.get_ptr(), fname.get_ptr()); if (out_filename[0]) strcpy_safe(dst_filename, sizeof(dst_filename), out_filename); printf("Writing file: %s\n", dst_filename); FILE* pFile = NULL; crn_fopen(&pFile, dst_filename, "wb"); if ((!pFile) || (fwrite(pDDS_file_data, dds_file_size, 1, pFile) != 1) || (fclose(pFile) == EOF)) { crn_free_block(pDDS_file_data); free(pSrc_file_data); return error("Failed writing to output file!\n"); } printf("\n"); print_dds_info(pDDS_file_data, dds_file_size); crn_free_block(pDDS_file_data); } else if (crnlib_stricmp(ext.get_ptr(), ".dds") == 0) { // Unpack DDS to one or more TGA's. if (out_filename[0]) { file_utils::split_path(out_filename, &drive, &dir, &fname, &ext); } crn_texture_desc tex_desc; crn_uint32* pImages[cCRNMaxFaces * cCRNMaxLevels]; if (!crn_decompress_dds_to_images(pSrc_file_data, src_file_size, pImages, tex_desc)) { free(pSrc_file_data); return error("Failed unpacking DDS file!\n"); } printf("Decompressed texture Dimensions: %ux%u, Faces: %u, Levels: %u, FourCC: 0x%08X '%c' '%c' '%c' '%c'\n", tex_desc.m_width, tex_desc.m_height, tex_desc.m_faces, tex_desc.m_levels, tex_desc.m_fmt_fourcc, std::max(32U, tex_desc.m_fmt_fourcc & 0xFF), std::max(32U, (tex_desc.m_fmt_fourcc >> 8) & 0xFF), std::max(32U, (tex_desc.m_fmt_fourcc >> 16) & 0xFF), std::max(32U, (tex_desc.m_fmt_fourcc >> 24) & 0xFF)); for (crn_uint32 face_index = 0; face_index < tex_desc.m_faces; face_index++) { for (crn_uint32 level_index = 0; level_index < tex_desc.m_levels; level_index++) { int width = std::max(1U, tex_desc.m_width >> level_index); int height = std::max(1U, tex_desc.m_height >> level_index); char dst_filename[FILENAME_MAX]; crnlib_snprintf(dst_filename, sizeof(dst_filename), "%s%s%s_face%u_mip%u.tga", drive.get_ptr(), dir.get_ptr(), fname.get_ptr(), face_index, level_index); printf("Writing file: %s\n", dst_filename); if (!stbi_write_tga(dst_filename, width, height, 4, pImages[level_index + face_index * tex_desc.m_levels])) { crn_free_all_images(pImages, tex_desc); free(pSrc_file_data); return error("Failed writing output file!\n"); } } } crn_free_all_images(pImages, tex_desc); } else { free(pSrc_file_data); return error("Decompression mode only supports .dds or .crn files!\n"); } free(pSrc_file_data); return EXIT_SUCCESS; } DaemonEngine-crunch-ef4d32f/example2/000077500000000000000000000000001503722002600175345ustar00rootroot00000000000000DaemonEngine-crunch-ef4d32f/example2/CMakeLists.txt000066400000000000000000000005761503722002600223040ustar00rootroot00000000000000include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/../inc ${CMAKE_CURRENT_SOURCE_DIR}/../crnlib ) # Defines the source code for the library set(EXAMPLE2_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/example2.cpp ${CMAKE_CURRENT_SOURCE_DIR}/timer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/timer.h ) add_executable(example2 ${EXAMPLE2_SRCS}) target_link_libraries(example2 ${CRUNCH_LIBRARY_NAME}) DaemonEngine-crunch-ef4d32f/example2/example2.cpp000066400000000000000000000250071503722002600217610ustar00rootroot00000000000000// File: example2.cpp - This example uses the crn_decomp.h stand-alone header file library // to transcode .CRN files directly to .DDS, with no intermediate recompression step to DXTn. // This tool does NOT depend on the crnlib library at all. It only needs the low-level // decompression/transcoding functionality defined in inc/crn_decomp.h. // This is the basic functionality a game engine would need to employ at runtime to utilize // .CRN textures (excluding writing the output DDS file - instead you would provide the DXTn // bits directly to OpenGL/D3D). // See Copyright Notice and license at the end of inc/crnlib.h #include #include #include #include #include #if !defined(_WIN32) #include #endif // CRN transcoder library. #include "crn_decomp.h" // .DDS file format definitions. #include "dds_defs.h" // A simple high-precision, platform independent timer class. #include "timer.h" #include "crn_platform.h" using namespace crnlib; #if defined(_WIN32) #define example2_strcpy_safe(d, l, s) strcpy_s(d, l, s) #else void example2_strcpy_safe(char *d, size_t l, const char* s) {l = strnlen(s, l - 1); memcpy(d, s, l); d[l] = '\0';} #endif static int print_usage() { printf("Description: Transcodes .CRN to .DDS files using crn_decomp.h.\n"); printf("Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC\n"); printf("Usage: example2 [source_file] [options]\n"); printf("\nOptions:\n"); printf("-out filename - Force output filename.\n"); return EXIT_FAILURE; } static int error(const char* pMsg, ...) { va_list args; va_start(args, pMsg); char buf[512]; crnlib_vsnprintf(buf, sizeof(buf), pMsg, args); va_end(args); printf("%s", buf); return EXIT_FAILURE; } // Loads an entire file into an allocated memory block. static crn_uint8* read_file_into_buffer(const char* pFilename, crn_uint32& size) { size = 0; FILE* pFile = NULL; crn_fopen(&pFile, pFilename, "rb"); if (!pFile) return NULL; fseek(pFile, 0, SEEK_END); size = ftell(pFile); fseek(pFile, 0, SEEK_SET); crn_uint8* pSrc_file_data = static_cast(malloc(std::max(1U, size))); if ((!pSrc_file_data) || (fread(pSrc_file_data, size, 1, pFile) != 1)) { fclose(pFile); free(pSrc_file_data); size = 0; return NULL; } fclose(pFile); return pSrc_file_data; } int main(int argc, char* argv[]) { printf("example2 - Version v%u.%02u Built " __DATE__ ", " __TIME__ "\n", CRNLIB_VERSION / 100, CRNLIB_VERSION % 100); if (argc < 2) return print_usage(); // Parse command line options const char* pSrc_filename = argv[1]; char out_filename[FILENAME_MAX] = {'\0'}; for (int i = 2; i < argc; i++) { if (argv[i][0] == '/') argv[i][0] = '-'; if (!crnlib_stricmp(argv[i], "-out")) { if (++i >= argc) return error("Expected output filename!"); example2_strcpy_safe(out_filename, sizeof(out_filename), argv[i]); } else return error("Invalid option: %s\n", argv[i]); } // Load the source file into memory. printf("Loading source file: %s\n", pSrc_filename); crn_uint32 src_file_size; crn_uint8* pSrc_file_data = read_file_into_buffer(pSrc_filename, src_file_size); if (!pSrc_file_data) return error("Unable to read source file\n"); // Decompress/transcode CRN to DDS. // DDS files are organized in face-major order, like this: // Face0: Mip0, Mip1, Mip2, etc. // Face1: Mip0, Mip1, Mip2, etc. // etc. // While CRN files are organized in mip-major order, like this: // Mip0: Face0, Face1, Face2, Face3, Face4, Face5 // Mip1: Face0, Face1, Face2, Face3, Face4, Face5 // etc. printf("Transcoding CRN to DDS\n"); crnd::crn_texture_info tex_info; if (!crnd::crnd_get_texture_info(pSrc_file_data, src_file_size, &tex_info)) { free(pSrc_file_data); return error("crnd_get_texture_info() failed!\n"); } timer tm; tm.start(); crnd::crnd_unpack_context pContext = crnd::crnd_unpack_begin(pSrc_file_data, src_file_size); double total_unpack_begin_time = tm.get_elapsed_ms(); if (!pContext) { free(pSrc_file_data); return error("crnd_unpack_begin() failed!\n"); } // Now create the DDS file. char dst_filename[FILENAME_MAX]; if (out_filename[0]) { example2_strcpy_safe(dst_filename, sizeof(dst_filename), out_filename); } else { unsigned int stripped_length = UINT32_MAX; const char* ext_begin = strrchr(pSrc_filename, '.'); if (ext_begin) { #ifdef _WIN32 const char* sep = strpbrk(ext_begin, "/\\"); #else const char* sep = strpbrk(ext_begin, "/"); #endif if (!sep) stripped_length = ext_begin - pSrc_filename; } crnlib_snprintf(dst_filename, sizeof(dst_filename), "%-*s.dds", stripped_length, pSrc_filename); } printf("Writing DDS file: %s\n", dst_filename); FILE* pDDS_file = NULL; crn_fopen(&pDDS_file, dst_filename, "wb"); if (!pDDS_file) { crnd::crnd_unpack_end(pContext); free(pSrc_file_data); return error("Failed creating destination file!\n"); } // Write the 4-byte DDS signature (not endian safe, but whatever this is a sample). fwrite(&crnlib::cDDSFileSignature, sizeof(crnlib::cDDSFileSignature), 1, pDDS_file); // Prepare the DDS header. crnlib::DDSURFACEDESC2 dds_desc; memset(&dds_desc, 0, sizeof(dds_desc)); dds_desc.dwSize = sizeof(dds_desc); dds_desc.dwFlags = DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT | ((tex_info.m_levels > 1) ? DDSD_MIPMAPCOUNT : 0); dds_desc.dwWidth = tex_info.m_width; dds_desc.dwHeight = tex_info.m_height; dds_desc.dwMipMapCount = (tex_info.m_levels > 1) ? tex_info.m_levels : 0; dds_desc.ddpfPixelFormat.dwSize = sizeof(crnlib::DDPIXELFORMAT); dds_desc.ddpfPixelFormat.dwFlags = DDPF_FOURCC; crn_format fundamental_fmt = crnd::crnd_get_fundamental_dxt_format(tex_info.m_format); dds_desc.ddpfPixelFormat.dwFourCC = crnd::crnd_crn_format_to_fourcc(fundamental_fmt); if (fundamental_fmt != tex_info.m_format) { // It's a funky swizzled DXTn format - write its FOURCC to dwRGBBitCount. dds_desc.ddpfPixelFormat.dwRGBBitCount = crnd::crnd_crn_format_to_fourcc(tex_info.m_format); } dds_desc.ddsCaps.dwCaps = DDSCAPS_TEXTURE; if (tex_info.m_levels > 1) { dds_desc.ddsCaps.dwCaps |= (DDSCAPS_COMPLEX | DDSCAPS_MIPMAP); } if (tex_info.m_faces == 6) { dds_desc.ddsCaps.dwCaps2 = DDSCAPS2_CUBEMAP | DDSCAPS2_CUBEMAP_POSITIVEX | DDSCAPS2_CUBEMAP_NEGATIVEX | DDSCAPS2_CUBEMAP_POSITIVEY | DDSCAPS2_CUBEMAP_NEGATIVEY | DDSCAPS2_CUBEMAP_POSITIVEZ | DDSCAPS2_CUBEMAP_NEGATIVEZ; } // Set pitch/linearsize field (some DDS readers require this field to be non-zero). int bits_per_pixel = crnd::crnd_get_crn_format_bits_per_texel(tex_info.m_format); dds_desc.lPitch = (((dds_desc.dwWidth + 3) & ~3) * ((dds_desc.dwHeight + 3) & ~3) * bits_per_pixel) >> 3; dds_desc.dwFlags |= DDSD_LINEARSIZE; // Write the DDS header to the output file. fwrite(&dds_desc, sizeof(dds_desc), 1, pDDS_file); // Now transcode all face and mipmap levels into memory, one mip level at a time. void* pImages[cCRNMaxFaces][cCRNMaxLevels]; crn_uint32 image_size_in_bytes[cCRNMaxLevels]; memset(pImages, 0, sizeof(pImages)); memset(image_size_in_bytes, 0, sizeof(image_size_in_bytes)); crn_uint32 total_unpacked_texels = 0; double total_unpack_time = 0.0f; for (crn_uint32 level_index = 0; level_index < tex_info.m_levels; level_index++) { // Compute the face's width, height, number of DXT blocks per row/col, etc. const crn_uint32 width = std::max(1U, tex_info.m_width >> level_index); const crn_uint32 height = std::max(1U, tex_info.m_height >> level_index); const crn_uint32 blocks_x = std::max(1U, (width + 3) >> 2); const crn_uint32 blocks_y = std::max(1U, (height + 3) >> 2); const crn_uint32 row_pitch = blocks_x * crnd::crnd_get_bytes_per_dxt_block(tex_info.m_format); const crn_uint32 total_face_size = row_pitch * blocks_y; image_size_in_bytes[level_index] = total_face_size; for (crn_uint32 face_index = 0; face_index < tex_info.m_faces; face_index++) { void* p = malloc(total_face_size); if (!p) { for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) free(pImages[f][l]); crnd::crnd_unpack_end(pContext); free(pSrc_file_data); return error("Out of memory!"); } pImages[face_index][level_index] = p; } // Prepare the face pointer array needed by crnd_unpack_level(). void* pDecomp_images[cCRNMaxFaces]; for (crn_uint32 face_index = 0; face_index < tex_info.m_faces; face_index++) pDecomp_images[face_index] = pImages[face_index][level_index]; // Now transcode the level to raw DXTn tm.start(); if (!crnd::crnd_unpack_level(pContext, pDecomp_images, total_face_size, row_pitch, level_index)) { for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) free(pImages[f][l]); crnd::crnd_unpack_end(pContext); free(pSrc_file_data); return error("Failed transcoding texture!"); } total_unpack_time += tm.get_elapsed_ms(); total_unpacked_texels += (blocks_x * blocks_y * 16); } printf("crnd_unpack_begin time: %3.3fms\n", total_unpack_begin_time); printf("Total crnd_unpack_level time: %3.3fms\n", total_unpack_time); double total_time = total_unpack_begin_time + total_unpack_time; printf("Total transcode time: %3.3fms\n", total_time); printf("Total texels transcoded: %u\n", total_unpacked_texels); printf("Overall transcode throughput: %3.3f million texels/sec\n", (total_unpacked_texels / (total_time / 1000.0f)) / 1000000.0f); // Now write the DXTn data to the DDS file in face-major order. for (crn_uint32 face_index = 0; face_index < tex_info.m_faces; face_index++) for (crn_uint32 level_index = 0; level_index < tex_info.m_levels; level_index++) fwrite(pImages[face_index][level_index], image_size_in_bytes[level_index], 1, pDDS_file); for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) free(pImages[f][l]); crnd::crnd_unpack_end(pContext); free(pSrc_file_data); if (fclose(pDDS_file) == EOF) { return error("Failed writing to DDS file!\n"); } return EXIT_SUCCESS; } DaemonEngine-crunch-ef4d32f/example2/timer.cpp000066400000000000000000000054561503722002600213720ustar00rootroot00000000000000// File: timer.cpp // A simple high-precision, platform independent timer class. #include #include #include #include #include "timer.h" #if defined(WIN32) #include #elif defined(_XBOX) #include #endif unsigned long long timer::g_init_ticks; unsigned long long timer::g_freq; double timer::g_inv_freq; #if defined(WIN32) || defined(_XBOX) inline void query_counter(timer_ticks* pTicks) { QueryPerformanceCounter(reinterpret_cast(pTicks)); } inline void query_counter_frequency(timer_ticks* pTicks) { QueryPerformanceFrequency(reinterpret_cast(pTicks)); } #elif defined(__GNUC__) #include #include inline void query_counter(timer_ticks* pTicks) { struct timeval cur_time; gettimeofday(&cur_time, NULL); *pTicks = static_cast(cur_time.tv_sec) * 1000000ULL + static_cast(cur_time.tv_usec); } inline void query_counter_frequency(timer_ticks* pTicks) { *pTicks = 1000000; } #endif timer::timer() : m_start_time(0), m_stop_time(0), m_started(false), m_stopped(false) { if (!g_inv_freq) init(); } timer::timer(timer_ticks start_ticks) { if (!g_inv_freq) init(); m_start_time = start_ticks; m_started = true; m_stopped = false; } void timer::start(timer_ticks start_ticks) { m_start_time = start_ticks; m_started = true; m_stopped = false; } void timer::start() { query_counter(&m_start_time); m_started = true; m_stopped = false; } void timer::stop() { assert(m_started); query_counter(&m_stop_time); m_stopped = true; } double timer::get_elapsed_secs() const { assert(m_started); if (!m_started) return 0; timer_ticks stop_time = m_stop_time; if (!m_stopped) query_counter(&stop_time); timer_ticks delta = stop_time - m_start_time; return delta * g_inv_freq; } timer_ticks timer::get_elapsed_us() const { assert(m_started); if (!m_started) return 0; timer_ticks stop_time = m_stop_time; if (!m_stopped) query_counter(&stop_time); timer_ticks delta = stop_time - m_start_time; return (delta * 1000000ULL + (g_freq >> 1U)) / g_freq; } void timer::init() { if (!g_inv_freq) { query_counter_frequency(&g_freq); g_inv_freq = 1.0f / g_freq; query_counter(&g_init_ticks); } } timer_ticks timer::get_init_ticks() { if (!g_inv_freq) init(); return g_init_ticks; } timer_ticks timer::get_ticks() { if (!g_inv_freq) init(); timer_ticks ticks; query_counter(&ticks); return ticks - g_init_ticks; } double timer::ticks_to_secs(timer_ticks ticks) { if (!g_inv_freq) init(); return ticks * g_inv_freq; } DaemonEngine-crunch-ef4d32f/example2/timer.h000066400000000000000000000021471503722002600210310ustar00rootroot00000000000000// File: timer.h // A simple high-precision, platform independent timer class. #pragma once typedef unsigned long long timer_ticks; class timer { public: timer(); timer(timer_ticks start_ticks); void start(); void start(timer_ticks start_ticks); void stop(); double get_elapsed_secs() const; inline double get_elapsed_ms() const { return get_elapsed_secs() * 1000.0f; } timer_ticks get_elapsed_us() const; static void init(); static inline timer_ticks get_ticks_per_sec() { return g_freq; } static timer_ticks get_init_ticks(); static timer_ticks get_ticks(); static double ticks_to_secs(timer_ticks ticks); static inline double ticks_to_ms(timer_ticks ticks) { return ticks_to_secs(ticks) * 1000.0f; } static inline double get_secs() { return ticks_to_secs(get_ticks()); } static inline double get_ms() { return ticks_to_ms(get_ticks()); } private: static timer_ticks g_init_ticks; static timer_ticks g_freq; static double g_inv_freq; timer_ticks m_start_time; timer_ticks m_stop_time; bool m_started : 1; bool m_stopped : 1; }; DaemonEngine-crunch-ef4d32f/example3/000077500000000000000000000000001503722002600175355ustar00rootroot00000000000000DaemonEngine-crunch-ef4d32f/example3/CMakeLists.txt000066400000000000000000000004601503722002600222750ustar00rootroot00000000000000include_directories( ${CMAKE_CURRENT_SOURCE_DIR}/../inc ${CMAKE_CURRENT_SOURCE_DIR}/../crnlib ) # Defines the source code for the library set(EXAMPLE3_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/example3.cpp ) add_executable(example3 ${EXAMPLE3_SRCS}) target_link_libraries(example3 ${CRUNCH_LIBRARY_NAME}) DaemonEngine-crunch-ef4d32f/example3/example3.cpp000066400000000000000000000245541503722002600217710ustar00rootroot00000000000000// File: example3.cpp - Demonstrates how to use crnlib's simple block compression // API's to manually pack images to DXTn compressed .DDS files. This example isn't multithreaded // so it's not going to be fast. // Also note that this sample only demonstrates traditional/vanilla 4x4 DXTn block compression (not CRN). // See Copyright Notice and license at the end of inc/crnlib.h #include #include #include #include #if !defined(_WIN32) #include #endif // CRN transcoder library. #include "crnlib.h" // .DDS file format definitions. #include "dds_defs.h" #include "crn_core.h" #include "crn_strutils.h" #include "crn_file_utils.h" // stb_image, for loading/saving image files. #ifdef _MSC_VER #pragma warning(disable : 4244) // conversion from 'int' to 'uint8', possible loss of data #pragma warning(disable : 4100) // unreferenced formal parameter #pragma warning(disable : 4127) // conditional expression is constant #endif #define STB_IMAGE_IMPLEMENTATION #include "stb_image.h" #define STB_IMAGE_WRITE_IMPLEMENTATION #include "stb_image_write.h" using namespace crnlib; typedef unsigned int uint; const uint cDXTBlockSize = 4; static int print_usage() { printf("Description: Simple .DDS DXTn block compression using crnlib.\n"); printf("Copyright (c) 2010-2016 Binomial LLC\n"); printf("Usage: example3 [source_file] [options]\n"); printf("\n"); printf("Note: This simple example is not multithreaded, so it's not going to be\n"); printf("particularly fast.\n"); printf("\n"); printf("Supported source image formats:\n"); printf("Baseline JPEG, PNG, BMP, TGA, PSD, and HDR\n"); printf("\nOptions:\n"); printf("-out filename - Force output filename (always use .DDS extension).\n"); printf("-nonsrgb - Input is not sRGB: disables gamma filtering, perceptual metrics.\n"); printf("-pixelformat X - Output DXTn format. Supported formats:\n"); printf("DXT1, DXT3, DXT5, DXN_XY (ATI 3DC), DXN_YX (ATI 3DC), DXT5A (ATN1N)\n"); printf("If no output pixel format is specified, this example uses either DXT1 or DXT5.\n"); printf("-dxtquality X - DXTn quality: superfast, fast, normal, better, uber (default)\n"); printf("-setalphatoluma - Set alpha channel to luma before compression.\n"); printf("-converttoluma - Set RGB to luma before compression.\n"); return EXIT_FAILURE; } static int error(const char* pMsg, ...) { va_list args; va_start(args, pMsg); char buf[512]; crnlib_vsnprintf(buf, sizeof(buf), pMsg, args); va_end(args); printf("%s", buf); return EXIT_FAILURE; } int main(int argc, char* argv[]) { printf("example3 - Version v%u.%02u Built " __DATE__ ", " __TIME__ "\n", CRNLIB_VERSION / 100, CRNLIB_VERSION % 100); if (argc < 2) return print_usage(); // Parse command line options const char* pSrc_filename = argv[1]; char out_filename[FILENAME_MAX] = {'\0'}; crn_format fmt = cCRNFmtInvalid; bool srgb_colorspace = true; crn_dxt_quality dxt_quality = cCRNDXTQualityUber; // best quality, but slowest bool set_alpha_to_luma = false; bool convert_to_luma = false; for (int i = 2; i < argc; i++) { if (argv[i][0] == '/') argv[i][0] = '-'; if (!crnlib_stricmp(argv[i], "-out")) { if (++i >= argc) return error("Expected output filename!"); strcpy_safe(out_filename, sizeof(out_filename), argv[i]); } else if (!crnlib_stricmp(argv[i], "-nonsrgb")) srgb_colorspace = false; else if (!crnlib_stricmp(argv[i], "-pixelformat")) { if (++i >= argc) return error("Expected pixel format!"); uint f; for (f = 0; f < cCRNFmtTotal; f++) { crn_format actual_fmt = crn_get_fundamental_dxt_format(static_cast(f)); if (!crnlib_stricmp(argv[i], crn_get_format_string(actual_fmt))) { fmt = actual_fmt; break; } } if (f == cCRNFmtTotal) return error("Unrecognized pixel format: %s\n", argv[i]); } else if (!crnlib_stricmp(argv[i], "-dxtquality")) { if (++i >= argc) return error("Expected DXTn quality!\n"); uint q; for (q = 0; q < cCRNDXTQualityTotal; q++) { if (!crnlib_stricmp(argv[i], crn_get_dxt_quality_string(static_cast(q)))) { dxt_quality = static_cast(q); break; } } if (q == cCRNDXTQualityTotal) return error("Unrecognized DXTn quality: %s\n", argv[i]); } else if (!crnlib_stricmp(argv[i], "-setalphatoluma")) set_alpha_to_luma = true; else if (!crnlib_stricmp(argv[i], "-converttoluma")) convert_to_luma = true; else return error("Invalid option: %s\n", argv[i]); } // Split the source filename into its various components. dynamic_string drive, dir, fname, ext; if (!file_utils::split_path(pSrc_filename, &drive, &dir, &fname, &ext)) return error("Invalid source filename!\n"); // Load the source image into memory. printf("Loading source file: %s\n", pSrc_filename); int width, height, actual_comps; crn_uint32* pSrc_image = (crn_uint32*)stbi_load(pSrc_filename, &width, &height, &actual_comps, 4); if (!pSrc_image) return error("Unable to read source file\n"); if (fmt == cCRNFmtInvalid) { // Format not specified - automatically choose the DXTn format. fmt = (actual_comps > 3) ? cCRNFmtDXT5 : cCRNFmtDXT1; } if ((fmt == cCRNFmtDXT5A) && (actual_comps <= 3)) set_alpha_to_luma = true; if ((set_alpha_to_luma) || (convert_to_luma)) { for (int i = 0; i < width * height; i++) { crn_uint32 r = pSrc_image[i] & 0xFF, g = (pSrc_image[i] >> 8) & 0xFF, b = (pSrc_image[i] >> 16) & 0xFF; // Compute CCIR 601 luma. crn_uint32 y = (19595U * r + 38470U * g + 7471U * b + 32768) >> 16U; crn_uint32 a = (pSrc_image[i] >> 24) & 0xFF; if (set_alpha_to_luma) a = y; if (convert_to_luma) { r = y; g = y; b = y; } pSrc_image[i] = r | (g << 8) | (b << 16) | (a << 24); } } printf("Source Dimensions: %ux%u, Actual Components: %u\n", width, height, actual_comps); const uint num_blocks_x = (width + cDXTBlockSize - 1) / cDXTBlockSize; const uint num_blocks_y = (height + cDXTBlockSize - 1) / cDXTBlockSize; const uint bytes_per_block = crn_get_bytes_per_dxt_block(fmt); const uint total_compressed_size = num_blocks_x * num_blocks_y * bytes_per_block; printf("Block Dimensions: %ux%u, BytesPerBlock: %u, Total Compressed Size: %u\n", num_blocks_x, num_blocks_y, bytes_per_block, total_compressed_size); void* pCompressed_data = malloc(total_compressed_size); if (!pCompressed_data) { stbi_image_free(pSrc_image); return error("Out of memory!"); } crn_comp_params comp_params; comp_params.m_format = fmt; comp_params.m_dxt_quality = dxt_quality; comp_params.set_flag(cCRNCompFlagPerceptual, srgb_colorspace); comp_params.set_flag(cCRNCompFlagDXT1AForTransparency, actual_comps > 3); crn_block_compressor_context_t pContext = crn_create_block_compressor(comp_params); printf("Compressing to %s: ", crn_get_format_string(fmt)); int prev_percentage_complete = -1; for (crn_uint32 block_y = 0; block_y < num_blocks_y; block_y++) { for (crn_uint32 block_x = 0; block_x < num_blocks_x; block_x++) { crn_uint32 pixels[cDXTBlockSize * cDXTBlockSize]; // Exact block from image, clamping at the sides of non-divisible by 4 images to avoid artifacts. crn_uint32* pDst_pixels = pixels; for (uint y = 0; y < cDXTBlockSize; y++) { const uint actual_y = std::min(height - 1U, (block_y * cDXTBlockSize) + y); for (uint x = 0; x < cDXTBlockSize; x++) { const uint actual_x = std::min(width - 1U, (block_x * cDXTBlockSize) + x); *pDst_pixels++ = pSrc_image[actual_x + actual_y * width]; } } // Compress the DXTn block. crn_compress_block(pContext, pixels, static_cast(pCompressed_data) + (block_x + block_y * num_blocks_x) * bytes_per_block); } int percentage_complete = ((block_y + 1) * 100 + (num_blocks_y / 2)) / num_blocks_y; if (percentage_complete != prev_percentage_complete) { printf("\b\b\b\b%3u%%", percentage_complete); prev_percentage_complete = percentage_complete; } } printf("\n"); // Free the block compressor. crn_free_block_compressor(pContext); pContext = NULL; // Now create the DDS file. char dst_filename[FILENAME_MAX]; crnlib_snprintf(dst_filename, sizeof(dst_filename), "%s%s%s.dds", drive.get_ptr(), dir.get_ptr(), fname.get_ptr()); if (out_filename[0]) strcpy_safe(dst_filename, sizeof(dst_filename), out_filename); printf("Writing DDS file: %s\n", dst_filename); FILE* pDDS_file = NULL; crn_fopen(&pDDS_file, dst_filename, "wb"); if (!pDDS_file) { free(pCompressed_data); return error("Failed creating destination file!\n"); } // Write the 4-byte DDS signature (not endian safe, but whatever this is a sample). fwrite(&crnlib::cDDSFileSignature, sizeof(crnlib::cDDSFileSignature), 1, pDDS_file); // Prepare the DDS header. crnlib::DDSURFACEDESC2 dds_desc; memset(&dds_desc, 0, sizeof(dds_desc)); dds_desc.dwSize = sizeof(dds_desc); dds_desc.dwFlags = DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT; dds_desc.dwWidth = width; dds_desc.dwHeight = height; dds_desc.ddpfPixelFormat.dwSize = sizeof(crnlib::DDPIXELFORMAT); dds_desc.ddpfPixelFormat.dwFlags = DDPF_FOURCC; dds_desc.ddpfPixelFormat.dwFourCC = crn_get_format_fourcc(fmt); dds_desc.ddsCaps.dwCaps = DDSCAPS_TEXTURE; // Set pitch/linearsize field (some DDS readers require this field to be non-zero). uint bits_per_pixel = crn_get_format_bits_per_texel(fmt); dds_desc.lPitch = (((dds_desc.dwWidth + 3) & ~3) * ((dds_desc.dwHeight + 3) & ~3) * bits_per_pixel) >> 3; dds_desc.dwFlags |= DDSD_LINEARSIZE; // Write the DDS header to the output file. fwrite(&dds_desc, sizeof(dds_desc), 1, pDDS_file); // Write the image's compressed data to the output file. fwrite(pCompressed_data, total_compressed_size, 1, pDDS_file); free(pCompressed_data); stbi_image_free(pSrc_image); if (fclose(pDDS_file) == EOF) { return error("Failed writing to DDS file!\n"); } return EXIT_SUCCESS; } DaemonEngine-crunch-ef4d32f/flake.nix000066400000000000000000000013701503722002600176220ustar00rootroot00000000000000# This file allows building and running the software with the Nix package # manager, used in NixOS or on another distribution. { description = "crunch is a lossy image compression format meant for video games — daemon engine's fork"; inputs = { nixpkgs.url = "flake:nixpkgs"; }; outputs = { self, nixpkgs }: let pkgs = nixpkgs.legacyPackages.x86_64-linux; in { defaultPackage.x86_64-linux = pkgs.stdenv.mkDerivation { name = "crunch"; src = pkgs.lib.cleanSource ./.; buildInputs = with pkgs; [ gcc cmake ]; }; defaultApp.x86_64-linux = { type = "app"; program = "${self.defaultPackage.x86_64-linux}/bin/crunch"; }; }; } DaemonEngine-crunch-ef4d32f/inc/000077500000000000000000000000001503722002600165705ustar00rootroot00000000000000DaemonEngine-crunch-ef4d32f/inc/crn_decomp.h000066400000000000000000003476401503722002600210700ustar00rootroot00000000000000// File: crn_decomp.h - Fast CRN->DXTc texture transcoder header file library // Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC // See Copyright Notice and license at the end of this file. // // This single header file contains *all* of the code necessary to unpack .CRN files to raw DXTn bits. // It does NOT depend on the crn compression library. // // Note: This is a single file, stand-alone C++ library which is controlled by the use of the following macro: // If CRND_INCLUDE_CRND_H is NOT defined, the header is included. // // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing #ifndef CRND_INCLUDE_CRND_H #define CRND_INCLUDE_CRND_H // Include crn_defs.h (only to bring in some basic CRN-related types and structures). #include "crn_defs.h" #include #include #include #ifdef WIN32 #include #elif defined(__FreeBSD__) // has been replaced by #include // for malloc_usable_size #elif defined(__APPLE__) #include #else #include #endif #include #include // needed for placement new, _msize, _expand #define CRND_RESTRICT __restrict #ifdef _MSC_VER #pragma warning(disable : 4127) // warning C4127: conditional expression is constant #endif #ifdef CRND_DEVEL #ifndef _WIN32_WINNT #define _WIN32_WINNT 0x500 #endif #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #ifndef #define NOMINMAX #endif #include "windows.h" // only for IsDebuggerPresent(), DebugBreak(), and OutputDebugStringA() #endif // File: crnd_types.h namespace crnd { const crn_uint8 cUINT8_MIN = 0; const crn_uint8 cUINT8_MAX = 0xFFU; const uint16 cUINT16_MIN = 0; const uint16 cUINT16_MAX = 0xFFFFU; const uint32 cUINT32_MIN = 0; const uint32 cUINT32_MAX = 0xFFFFFFFFU; const int8 cINT8_MIN = -128; const int8 cINT8_MAX = 127; const int16 cINT16_MIN = -32768; const int16 cINT16_MAX = 32767; const int32 cINT32_MIN = (-2147483647 - 1); const int32 cINT32_MAX = 2147483647; enum eClear { cClear }; const uint32 cIntBits = 32U; template struct int_traits { enum { cMin = crnd::cINT32_MIN, cMax = crnd::cINT32_MAX, cSigned = true }; }; template <> struct int_traits { enum { cMin = crnd::cINT8_MIN, cMax = crnd::cINT8_MAX, cSigned = true }; }; template <> struct int_traits { enum { cMin = crnd::cINT16_MIN, cMax = crnd::cINT16_MAX, cSigned = true }; }; template <> struct int_traits { enum { cMin = crnd::cINT32_MIN, cMax = crnd::cINT32_MAX, cSigned = true }; }; template <> struct int_traits { enum { cMin = 0, cMax = crnd::cUINT8_MAX, cSigned = false }; }; template <> struct int_traits { enum { cMin = 0, cMax = crnd::cUINT16_MAX, cSigned = false }; }; template <> struct int_traits { enum { cMin = 0, cMax = crnd::cUINT32_MAX, cSigned = false }; }; struct empty_type {}; } // namespace crnd // File: crnd_platform.h namespace crnd { bool crnd_is_debugger_present(); void crnd_debug_break(); void crnd_output_debug_string(const char* p); // actually in crnd_assert.cpp void crnd_assert(const char* pExp, const char* pFile, unsigned line); void crnd_fail(const char* pExp, const char* pFile, unsigned line); } // namespace crnd // File: crnd_assert.h namespace crnd { void crnd_assert(const char* pExp, const char* pFile, unsigned line); // Define CRND_ASSERT if there is no user-supplied definition. // Note that if there is a user-supplied definition, it is used even if NDEBUG is defined. #ifndef CRND_ASSERT #ifdef NDEBUG #define CRND_ASSERT(x) ((void)0) #else #define CRND_ASSERT(_exp) (void)((!!(_exp)) || (crnd::crnd_assert(#_exp, __FILE__, __LINE__), 0)) #endif #endif // ifndef CRND_ASSERT void crnd_trace(const char* pFmt, va_list args); void crnd_trace(const char* pFmt, ...); } // namespace crnd // File: crnd_helpers.h namespace crnd { namespace helpers { template struct rel_ops { friend bool operator!=(const T& x, const T& y) { return (!(x == y)); } friend bool operator>(const T& x, const T& y) { return (y < x); } friend bool operator<=(const T& x, const T& y) { return (!(y < x)); } friend bool operator>=(const T& x, const T& y) { return (!(x < y)); } }; template inline T* construct(T* p) { return new (static_cast(p)) T; } template inline T* construct(T* p, const U& init) { return new (static_cast(p)) T(init); } template void construct_array(T* p, uint32 n) { T* q = p + n; for (; p != q; ++p) new (static_cast(p)) T; } template void construct_array(T* p, uint32 n, const U& init) { T* q = p + n; for (; p != q; ++p) new (static_cast(p)) T(init); } template inline void destruct(T* p) { p->~T(); } template inline void destruct_array(T* p, uint32 n) { T* q = p + n; for (; p != q; ++p) p->~T(); } } // namespace helpers } // namespace crnd // File: crnd_traits.h namespace crnd { template struct scalar_type { enum { cFlag = false }; static inline void construct(T* p) { helpers::construct(p); } static inline void construct(T* p, const T& init) { helpers::construct(p, init); } static inline void construct_array(T* p, uint32 n) { helpers::construct_array(p, n); } static inline void destruct(T* p) { helpers::destruct(p); } static inline void destruct_array(T* p, uint32 n) { helpers::destruct_array(p, n); } }; template struct scalar_type { enum { cFlag = true }; static inline void construct(T** p) { memset(p, 0, sizeof(T*)); } static inline void construct(T** p, T* init) { *p = init; } static inline void construct_array(T** p, uint32 n) { memset(p, 0, sizeof(T*) * n); } static inline void destruct(T**) {} static inline void destruct_array(T**, uint32) {} }; #define CRND_DEFINE_BUILT_IN_TYPE(X) \ template <> \ struct scalar_type { \ enum { cFlag = true }; \ static inline void construct(X* p) { memset(p, 0, sizeof(X)); } \ static inline void construct(X* p, const X& init) { memcpy(p, &init, sizeof(X)); } \ static inline void construct_array(X* p, uint32 n) { memset(p, 0, sizeof(X) * n); } \ static inline void destruct(X*) {} \ static inline void destruct_array(X*, uint32) {} \ }; CRND_DEFINE_BUILT_IN_TYPE(bool) CRND_DEFINE_BUILT_IN_TYPE(char) CRND_DEFINE_BUILT_IN_TYPE(unsigned char) CRND_DEFINE_BUILT_IN_TYPE(short) CRND_DEFINE_BUILT_IN_TYPE(unsigned short) CRND_DEFINE_BUILT_IN_TYPE(int) CRND_DEFINE_BUILT_IN_TYPE(unsigned int) CRND_DEFINE_BUILT_IN_TYPE(long) CRND_DEFINE_BUILT_IN_TYPE(unsigned long) CRND_DEFINE_BUILT_IN_TYPE(int64) CRND_DEFINE_BUILT_IN_TYPE(uint64) CRND_DEFINE_BUILT_IN_TYPE(float) CRND_DEFINE_BUILT_IN_TYPE(double) CRND_DEFINE_BUILT_IN_TYPE(long double) #undef CRND_DEFINE_BUILT_IN_TYPE // See: http://erdani.org/publications/cuj-2004-06.pdf template struct bitwise_movable { enum { cFlag = false }; }; // Defines type Q as bitwise movable. #define CRND_DEFINE_BITWISE_MOVABLE(Q) \ template <> \ struct bitwise_movable { \ enum { cFlag = true }; \ }; // From yasli_traits.h: // Credit goes to Boost; // also found in the C++ Templates book by Vandevoorde and Josuttis typedef char (&yes_t)[1]; typedef char (&no_t)[2]; template yes_t class_test(int U::*); template no_t class_test(...); template struct is_class { enum { value = (sizeof(class_test(0)) == sizeof(yes_t)) }; }; template struct is_pointer { enum { value = false }; }; template struct is_pointer { enum { value = true }; }; #define CRND_IS_POD(T) __is_pod(T) } // namespace crnd // File: crnd_mem.h namespace crnd { void* crnd_malloc(size_t size, size_t* pActual_size = NULL); void* crnd_realloc(void* p, size_t size, size_t* pActual_size = NULL, bool movable = true); void crnd_free(void* p); size_t crnd_msize(void* p); template inline T* crnd_new() { T* p = static_cast(crnd_malloc(sizeof(T))); if (!p) return NULL; return helpers::construct(p); } template inline T* crnd_new(const T& init) { T* p = static_cast(crnd_malloc(sizeof(T))); if (!p) return NULL; return helpers::construct(p, init); } template inline T* crnd_new_array(uint32 num) { if (!num) num = 1; uint8* q = static_cast(crnd_malloc(CRND_MIN_ALLOC_ALIGNMENT + sizeof(T) * num)); if (!q) return NULL; T* p = reinterpret_cast(q + CRND_MIN_ALLOC_ALIGNMENT); reinterpret_cast(p)[-1] = num; reinterpret_cast(p)[-2] = ~num; helpers::construct_array(p, num); return p; } template inline void crnd_delete(T* p) { if (p) { helpers::destruct(p); crnd_free(p); } } template inline void crnd_delete_array(T* p) { if (p) { const uint32 num = reinterpret_cast(p)[-1]; CRND_ASSERT(num && (num == ~reinterpret_cast(p)[-2])); helpers::destruct_array(p, num); crnd_free(reinterpret_cast(p) - CRND_MIN_ALLOC_ALIGNMENT); } } } // namespace crnd // File: crnd_math.h namespace crnd { namespace math { const float cNearlyInfinite = 1.0e+37f; const float cDegToRad = 0.01745329252f; const float cRadToDeg = 57.29577951f; extern uint32 g_bitmasks[32]; // Yes I know these should probably be pass by ref, not val: // http://www.stepanovpapers.com/notes.pdf // Just don't use them on non-simple (non built-in) types! template inline T minimum(T a, T b) { return (a < b) ? a : b; } template inline T minimum(T a, T b, T c) { return minimum(minimum(a, b), c); } template inline T maximum(T a, T b) { return (a > b) ? a : b; } template inline T maximum(T a, T b, T c) { return maximum(maximum(a, b), c); } template inline T clamp(T value, T low, T high) { return (value < low) ? low : ((value > high) ? high : value); } template inline T square(T value) { return value * value; } inline bool is_power_of_2(uint32 x) { return x && ((x & (x - 1U)) == 0U); } // From "Hackers Delight" inline int next_pow2(uint32 val) { val--; val |= val >> 16; val |= val >> 8; val |= val >> 4; val |= val >> 2; val |= val >> 1; return val + 1; } // Returns the total number of bits needed to encode v. inline uint32 total_bits(uint32 v) { uint32 l = 0; while (v > 0U) { v >>= 1; l++; } return l; } inline uint floor_log2i(uint v) { uint l = 0; while (v > 1U) { v >>= 1; l++; } return l; } inline uint ceil_log2i(uint v) { uint l = floor_log2i(v); if ((l != cIntBits) && (v > (1U << l))) l++; return l; } } } // File: crnd_utils.h namespace crnd { namespace utils { template inline void zero_object(T& obj) { memset(&obj, 0, sizeof(obj)); } template inline void zero_this(T* pObj) { memset(pObj, 0, sizeof(*pObj)); } template inline void swap(T& left, T& right) { T temp(left); left = right; right = temp; } inline void invert_buf(void* pBuf, uint32 size) { uint8* p = static_cast(pBuf); const uint32 half_size = size >> 1; for (uint32 i = 0; i < half_size; i++) swap(p[i], p[size - 1U - i]); } static inline uint16 swap16(uint16 x) { return static_cast((x << 8) | (x >> 8)); } static inline uint32 swap32(uint32 x) { return ((x << 24) | ((x << 8) & 0x00FF0000) | ((x >> 8) & 0x0000FF00) | (x >> 24)); } uint32 compute_max_mips(uint32 width, uint32 height); } // namespace utils } // namespace crnd // File: crnd_vector.h namespace crnd { struct elemental_vector { void* m_p; uint32 m_size; uint32 m_capacity; typedef void (*object_mover)(void* pDst, void* pSrc, uint32 num); bool increase_capacity(uint32 min_new_capacity, bool grow_hint, uint32 element_size, object_mover pRelocate); }; #ifdef _MSC_VER #pragma warning(push) #pragma warning(disable : 4127) // warning C4127: conditional expression is constant #endif template class vector : public helpers::rel_ops > { public: typedef T* iterator; typedef const T* const_iterator; typedef T value_type; typedef T& reference; typedef const T& const_reference; typedef T* pointer; typedef const T* const_pointer; inline vector() : m_p(NULL), m_size(0), m_capacity(0), m_alloc_failed(false) { } inline vector(const vector& other) : m_p(NULL), m_size(0), m_capacity(0), m_alloc_failed(false) { *this = other; } inline vector(uint32 size) : m_p(NULL), m_size(0), m_capacity(0), m_alloc_failed(false) { resize(size); } inline ~vector() { clear(); } // I don't like this. Not at all. But exceptions, or just failing suck worse. inline bool get_alloc_failed() const { return m_alloc_failed; } inline void clear_alloc_failed() { m_alloc_failed = false; } inline bool assign(const vector& other) { if (this == &other) return true; if (m_capacity == other.m_size) resize(0); else { clear(); if (!increase_capacity(other.m_size, false)) return false; } if (scalar_type::cFlag) memcpy(m_p, other.m_p, other.m_size * sizeof(T)); else { T* pDst = m_p; const T* pSrc = other.m_p; for (uint32 i = other.m_size; i > 0; i--) helpers::construct(pDst++, *pSrc++); } m_size = other.m_size; return true; } inline vector& operator=(const vector& other) { assign(other); return *this; } inline const T* begin() const { return m_p; } T* begin() { return m_p; } inline const T* end() const { return m_p + m_size; } T* end() { return m_p + m_size; } inline bool empty() const { return !m_size; } inline uint32 size() const { return m_size; } inline uint32 capacity() const { return m_capacity; } inline const T& operator[](uint32 i) const { CRND_ASSERT(i < m_size); return m_p[i]; } inline T& operator[](uint32 i) { CRND_ASSERT(i < m_size); return m_p[i]; } inline const T& front() const { CRND_ASSERT(m_size); return m_p[0]; } inline T& front() { CRND_ASSERT(m_size); return m_p[0]; } inline const T& back() const { CRND_ASSERT(m_size); return m_p[m_size - 1]; } inline T& back() { CRND_ASSERT(m_size); return m_p[m_size - 1]; } inline void clear() { if (m_p) { scalar_type::destruct_array(m_p, m_size); crnd_free(m_p); m_p = NULL; m_size = 0; m_capacity = 0; } m_alloc_failed = false; } inline bool reserve(uint32 new_capacity) { if (!increase_capacity(new_capacity, false)) return false; return true; } inline bool resize(uint32 new_size) { if (m_size != new_size) { if (new_size < m_size) scalar_type::destruct_array(m_p + new_size, m_size - new_size); else { if (new_size > m_capacity) { if (!increase_capacity(new_size, new_size == (m_size + 1))) return false; } scalar_type::construct_array(m_p + m_size, new_size - m_size); } m_size = new_size; } return true; } inline bool push_back(const T& obj) { CRND_ASSERT(!m_p || (&obj < m_p) || (&obj >= (m_p + m_size))); if (m_size >= m_capacity) { if (!increase_capacity(m_size + 1, true)) return false; } scalar_type::construct(m_p + m_size, obj); m_size++; return true; } inline void pop_back() { CRND_ASSERT(m_size); if (m_size) { m_size--; scalar_type::destruct(&m_p[m_size]); } } inline void insert(uint32 index, const T* p, uint32 n) { CRND_ASSERT(index <= m_size); if (!n) return; const uint32 orig_size = m_size; resize(m_size + n); const T* pSrc = m_p + orig_size - 1; T* pDst = const_cast(pSrc) + n; const uint32 num_to_move = orig_size - index; for (uint32 i = 0; i < num_to_move; i++) { CRND_ASSERT((pDst - m_p) < (int)m_size); *pDst-- = *pSrc--; } pSrc = p; pDst = m_p + index; for (uint32 i = 0; i < n; i++) { CRND_ASSERT((pDst - m_p) < (int)m_size); *pDst++ = *p++; } } inline void erase(uint32 start, uint32 n) { CRND_ASSERT((start + n) <= m_size); if (!n) return; const uint32 num_to_move = m_size - (start + n); T* pDst = m_p + start; T* pDst_end = pDst + num_to_move; const T* pSrc = m_p + start + n; while (pDst != pDst_end) *pDst++ = *pSrc++; scalar_type::destruct_array(pDst_end, n); m_size -= n; } inline void erase(uint32 index) { erase(index, 1); } inline void erase(T* p) { CRND_ASSERT((p >= m_p) && (p < (m_p + m_size))); erase(p - m_p); } inline bool operator==(const vector& rhs) const { if (m_size != rhs.m_size) return false; else if (m_size) { if (scalar_type::cFlag) return memcmp(m_p, rhs.m_p, sizeof(T) * m_size) == 0; else { const T* pSrc = m_p; const T* pDst = rhs.m_p; for (uint32 i = m_size; i; i--) if (!(*pSrc++ == *pDst++)) return false; } } return true; } inline bool operator<(const vector& rhs) const { const uint32 min_size = math::minimum(m_size, rhs.m_size); const T* pSrc = m_p; const T* pSrc_end = m_p + min_size; const T* pDst = rhs.m_p; while ((pSrc < pSrc_end) && (*pSrc == *pDst)) { pSrc++; pDst++; } if (pSrc < pSrc_end) return *pSrc < *pDst; return m_size < rhs.m_size; } void swap(vector& other) { utils::swap(m_p, other.m_p); utils::swap(m_size, other.m_size); utils::swap(m_capacity, other.m_capacity); } private: T* m_p; uint32 m_size; uint32 m_capacity; bool m_alloc_failed; template struct is_vector { enum { cFlag = false }; }; template struct is_vector > { enum { cFlag = true }; }; static void object_mover(void* pDst_void, void* pSrc_void, uint32 num) { T* pSrc = static_cast(pSrc_void); T* const pSrc_end = pSrc + num; T* pDst = static_cast(pDst_void); while (pSrc != pSrc_end) { helpers::construct(pDst, *pSrc); pSrc->~T(); pSrc++; pDst++; } } inline bool increase_capacity(uint32 min_new_capacity, bool grow_hint) { if (!reinterpret_cast(this)->increase_capacity( min_new_capacity, grow_hint, sizeof(T), ((scalar_type::cFlag) || (is_vector::cFlag) || (bitwise_movable::cFlag) || CRND_IS_POD(T)) ? NULL : object_mover)) { m_alloc_failed = true; return false; } return true; } }; #ifdef _MSC_VER #pragma warning(pop) #endif extern void vector_test(); } // namespace crnd // File: crnd_private.h namespace crnd { const crn_header* crnd_get_header(const void* pData, uint32 data_size); } // namespace crnd // File: checksum.h namespace crnd { // crc16() intended for small buffers - doesn't use an acceleration table. const uint16 cInitCRC16 = 0; uint16 crc16(const void* pBuf, uint32 len, uint16 crc = cInitCRC16); } // namespace crnd // File: crnd_color.h namespace crnd { template struct color_quad_component_traits { enum { cSigned = false, cFloat = false, cMin = cUINT8_MIN, cMax = cUINT8_MAX }; }; template <> struct color_quad_component_traits { enum { cSigned = true, cFloat = false, cMin = cINT16_MIN, cMax = cINT16_MAX }; }; template <> struct color_quad_component_traits { enum { cSigned = false, cFloat = false, cMin = cUINT16_MIN, cMax = cUINT16_MAX }; }; template <> struct color_quad_component_traits { enum { cSigned = true, cFloat = false, cMin = cINT32_MIN, cMax = cINT32_MAX }; }; template <> struct color_quad_component_traits { enum { cSigned = false, cFloat = false, cMin = cUINT32_MIN, cMax = cUINT32_MAX }; }; template <> struct color_quad_component_traits { enum { cSigned = false, cFloat = true, cMin = cINT32_MIN, cMax = cINT32_MAX }; }; template <> struct color_quad_component_traits { enum { cSigned = false, cFloat = true, cMin = cINT32_MIN, cMax = cINT32_MAX }; }; #ifdef _MSC_VER #pragma warning(push) #pragma warning(disable : 4201) // warning C4201: nonstandard extension used : nameless struct/union #pragma warning(disable : 4127) // warning C4127: conditional expression is constant #endif template class color_quad : public helpers::rel_ops > { static parameter_type clamp(parameter_type v) { if (component_traits::cFloat) return v; else { if (v < component_traits::cMin) return component_traits::cMin; else if (v > component_traits::cMax) return component_traits::cMax; return v; } } public: typedef component_type component_t; typedef parameter_type parameter_t; typedef color_quad_component_traits component_traits; enum { cNumComps = 4 }; union { struct { component_type r; component_type g; component_type b; component_type a; }; component_type c[cNumComps]; }; inline color_quad() { } inline color_quad(eClear) : r(0), g(0), b(0), a(0) { } inline color_quad(const color_quad& other) : r(other.r), g(other.g), b(other.b), a(other.a) { } inline color_quad(parameter_type y, parameter_type alpha = component_traits::cMax) { set(y, alpha); } inline color_quad(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) { set(red, green, blue, alpha); } template inline color_quad(const color_quad& other) : r(clamp(other.r)), g(clamp(other.g)), b(clamp(other.b)), a(clamp(other.a)) { } inline void clear() { r = 0; g = 0; b = 0; a = 0; } inline color_quad& operator=(const color_quad& other) { r = other.r; g = other.g; b = other.b; a = other.a; return *this; } template inline color_quad& operator=(const color_quad& other) { r = clamp(other.r); g = clamp(other.g); b = clamp(other.b); a = clamp(other.a); return *this; } inline color_quad& set(parameter_type y, parameter_type alpha = component_traits::cMax) { y = clamp(y); r = static_cast(y); g = static_cast(y); b = static_cast(y); a = static_cast(alpha); return *this; } inline color_quad& set(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha = component_traits::cMax) { r = static_cast(clamp(red)); g = static_cast(clamp(green)); b = static_cast(clamp(blue)); a = static_cast(clamp(alpha)); return *this; } inline color_quad& set_noclamp_rgba(parameter_type red, parameter_type green, parameter_type blue, parameter_type alpha) { r = static_cast(red); g = static_cast(green); b = static_cast(blue); a = static_cast(alpha); return *this; } inline color_quad& set_noclamp_rgb(parameter_type red, parameter_type green, parameter_type blue) { r = static_cast(red); g = static_cast(green); b = static_cast(blue); return *this; } static inline parameter_type get_min_comp() { return component_traits::cMin; } static inline parameter_type get_max_comp() { return component_traits::cMax; } static inline bool get_comps_are_signed() { return component_traits::cSigned; } inline component_type operator[](uint32 i) const { CRND_ASSERT(i < cNumComps); return c[i]; } inline component_type& operator[](uint32 i) { CRND_ASSERT(i < cNumComps); return c[i]; } inline color_quad& set_component(uint32 i, parameter_type f) { CRND_ASSERT(i < cNumComps); c[i] = static_cast(clamp(f)); return *this; } inline color_quad& clamp(const color_quad& l, const color_quad& h) { for (uint32 i = 0; i < cNumComps; i++) c[i] = static_cast(math::clamp(c[i], l[i], h[i])); return *this; } inline color_quad& clamp(parameter_type l, parameter_type h) { for (uint32 i = 0; i < cNumComps; i++) c[i] = static_cast(math::clamp(c[i], l, h)); return *this; } // Returns CCIR 601 luma (consistent with color_utils::RGB_To_Y). inline parameter_type get_luma() const { return static_cast((19595U * r + 38470U * g + 7471U * b + 32768) >> 16U); } // Returns REC 709 luma. inline parameter_type get_luma_rec709() const { return static_cast((13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U); } inline uint32 squared_distance(const color_quad& c, bool alpha = true) const { return math::square(r - c.r) + math::square(g - c.g) + math::square(b - c.b) + (alpha ? math::square(a - c.a) : 0); } inline bool rgb_equals(const color_quad& rhs) const { return (r == rhs.r) && (g == rhs.g) && (b == rhs.b); } inline bool operator==(const color_quad& rhs) const { return (r == rhs.r) && (g == rhs.g) && (b == rhs.b) && (a == rhs.a); } inline bool operator<(const color_quad& rhs) const { for (uint32 i = 0; i < cNumComps; i++) { if (c[i] < rhs.c[i]) return true; else if (!(c[i] == rhs.c[i])) return false; } return false; } inline color_quad& operator+=(const color_quad& other) { for (uint32 i = 0; i < 4; i++) c[i] = static_cast(clamp(c[i] + other.c[i])); return *this; } inline color_quad& operator-=(const color_quad& other) { for (uint32 i = 0; i < 4; i++) c[i] = static_cast(clamp(c[i] - other.c[i])); return *this; } inline color_quad& operator*=(parameter_type v) { for (uint32 i = 0; i < 4; i++) c[i] = static_cast(clamp(c[i] * v)); return *this; } inline color_quad& operator/=(parameter_type v) { for (uint32 i = 0; i < 4; i++) c[i] = static_cast(c[i] / v); return *this; } inline color_quad get_swizzled(uint32 x, uint32 y, uint32 z, uint32 w) const { CRND_ASSERT((x | y | z | w) < 4); return color_quad(c[x], c[y], c[z], c[w]); } inline friend color_quad operator+(const color_quad& lhs, const color_quad& rhs) { color_quad result(lhs); result += rhs; return result; } inline friend color_quad operator-(const color_quad& lhs, const color_quad& rhs) { color_quad result(lhs); result -= rhs; return result; } inline friend color_quad operator*(const color_quad& lhs, parameter_type v) { color_quad result(lhs); result *= v; return result; } friend inline color_quad operator/(const color_quad& lhs, parameter_type v) { color_quad result(lhs); result /= v; return result; } friend inline color_quad operator*(parameter_type v, const color_quad& rhs) { color_quad result(rhs); result *= v; return result; } inline uint32 get_min_component_index(bool alpha = true) const { uint32 index = 0; uint32 limit = alpha ? cNumComps : (cNumComps - 1); for (uint32 i = 1; i < limit; i++) if (c[i] < c[index]) index = i; return index; } inline uint32 get_max_component_index(bool alpha = true) const { uint32 index = 0; uint32 limit = alpha ? cNumComps : (cNumComps - 1); for (uint32 i = 1; i < limit; i++) if (c[i] > c[index]) index = i; return index; } inline void get_float4(float* pDst) { for (uint32 i = 0; i < 4; i++) pDst[i] = ((*this)[i] - component_traits::cMin) / float(component_traits::cMax - component_traits::cMin); } inline void get_float3(float* pDst) { for (uint32 i = 0; i < 3; i++) pDst[i] = ((*this)[i] - component_traits::cMin) / float(component_traits::cMax - component_traits::cMin); } static inline color_quad make_black() { return color_quad(0, 0, 0, component_traits::cMax); } static inline color_quad make_white() { return color_quad(component_traits::cMax, component_traits::cMax, component_traits::cMax, component_traits::cMax); } }; // class color_quad #ifdef _MSC_VER #pragma warning(pop) #endif template struct scalar_type > { enum { cFlag = true }; static inline void construct(color_quad*) {} static inline void construct(color_quad* p, const color_quad& init) { memcpy(p, &init, sizeof(color_quad)); } static inline void construct_array(color_quad* p, uint32 n) { p, n; } static inline void destruct(color_quad* p) { p; } static inline void destruct_array(color_quad* p, uint32 n) { p, n; } }; typedef color_quad color_quad_u8; typedef color_quad color_quad_i16; typedef color_quad color_quad_u16; typedef color_quad color_quad_i32; typedef color_quad color_quad_u32; typedef color_quad color_quad_f; typedef color_quad color_quad_d; } // namespace crnd // File: crnd_dxt.h namespace crnd { enum dxt_format { cDXTInvalid = -1, // cDXT1/1A must appear first! cDXT1, cDXT1A, cDXT3, cDXT5, cDXT5A, cDXN_XY, // inverted relative to standard ATI2, 360's DXN cDXN_YX // standard ATI2 }; enum dxt_constants { cDXTBlockShift = 2U, cDXTBlockSize = 1U << cDXTBlockShift, cDXT1BytesPerBlock = 8U, cDXT5NBytesPerBlock = 16U, cDXT1SelectorBits = 2U, cDXT1SelectorValues = 1U << cDXT1SelectorBits, cDXT1SelectorMask = cDXT1SelectorValues - 1U, cDXT5SelectorBits = 3U, cDXT5SelectorValues = 1U << cDXT5SelectorBits, cDXT5SelectorMask = cDXT5SelectorValues - 1U }; const float cDXT1MaxLinearValue = 3.0f; const float cDXT1InvMaxLinearValue = 1.0f / 3.0f; const float cDXT5MaxLinearValue = 7.0f; const float cDXT5InvMaxLinearValue = 1.0f / 7.0f; // Converts DXT1 raw color selector index to a linear value. extern const uint8 g_dxt1_to_linear[cDXT1SelectorValues]; // Converts DXT5 raw alpha selector index to a linear value. extern const uint8 g_dxt5_to_linear[cDXT5SelectorValues]; // Converts DXT1 linear color selector index to a raw value (inverse of g_dxt1_to_linear). extern const uint8 g_dxt1_from_linear[cDXT1SelectorValues]; // Converts DXT5 linear alpha selector index to a raw value (inverse of g_dxt5_to_linear). extern const uint8 g_dxt5_from_linear[cDXT5SelectorValues]; extern const uint8 g_six_alpha_invert_table[cDXT5SelectorValues]; extern const uint8 g_eight_alpha_invert_table[cDXT5SelectorValues]; struct dxt1_block { uint8 m_low_color[2]; uint8 m_high_color[2]; enum { cNumSelectorBytes = 4 }; uint8 m_selectors[cNumSelectorBytes]; inline void clear() { utils::zero_this(this); } // These methods assume the in-memory rep is in LE byte order. inline uint32 get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); } inline uint32 get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); } inline void set_low_color(uint16 c) { m_low_color[0] = static_cast(c & 0xFF); m_low_color[1] = static_cast((c >> 8) & 0xFF); } inline void set_high_color(uint16 c) { m_high_color[0] = static_cast(c & 0xFF); m_high_color[1] = static_cast((c >> 8) & 0xFF); } inline uint32 get_selector(uint32 x, uint32 y) const { CRND_ASSERT((x < 4U) && (y < 4U)); return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask; } inline void set_selector(uint32 x, uint32 y, uint32 val) { CRND_ASSERT((x < 4U) && (y < 4U) && (val < 4U)); m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); m_selectors[y] |= (val << (x * cDXT1SelectorBits)); } static uint16 pack_color(const color_quad_u8& color, bool scaled, uint32 bias = 127U); static uint16 pack_color(uint32 r, uint32 g, uint32 b, bool scaled, uint32 bias = 127U); static color_quad_u8 unpack_color(uint16 packed_color, bool scaled, uint32 alpha = 255U); static void unpack_color(uint32& r, uint32& g, uint32& b, uint16 packed_color, bool scaled); static uint32 get_block_colors3(color_quad_u8* pDst, uint16 color0, uint16 color1); static uint32 get_block_colors4(color_quad_u8* pDst, uint16 color0, uint16 color1); // pDst must point to an array at least cDXT1SelectorValues long. static uint32 get_block_colors(color_quad_u8* pDst, uint16 color0, uint16 color1); static color_quad_u8 unpack_endpoint(uint32 endpoints, uint32 index, bool scaled, uint32 alpha = 255U); static uint32 pack_endpoints(uint32 lo, uint32 hi); }; CRND_DEFINE_BITWISE_MOVABLE(dxt1_block); struct dxt3_block { enum { cNumAlphaBytes = 8 }; uint8 m_alpha[cNumAlphaBytes]; void set_alpha(uint32 x, uint32 y, uint32 value, bool scaled); uint32 get_alpha(uint32 x, uint32 y, bool scaled) const; }; CRND_DEFINE_BITWISE_MOVABLE(dxt3_block); struct dxt5_block { uint8 m_endpoints[2]; enum { cNumSelectorBytes = 6 }; uint8 m_selectors[cNumSelectorBytes]; inline void clear() { utils::zero_this(this); } inline uint32 get_low_alpha() const { return m_endpoints[0]; } inline uint32 get_high_alpha() const { return m_endpoints[1]; } inline void set_low_alpha(uint32 i) { CRND_ASSERT(i <= cUINT8_MAX); m_endpoints[0] = static_cast(i); } inline void set_high_alpha(uint32 i) { CRND_ASSERT(i <= cUINT8_MAX); m_endpoints[1] = static_cast(i); } uint32 get_endpoints_as_word() const { return m_endpoints[0] | (m_endpoints[1] << 8); } uint32 get_selectors_as_word(uint32 index) { CRND_ASSERT(index < 3); return m_selectors[index * 2] | (m_selectors[index * 2 + 1] << 8); } inline uint32 get_selector(uint32 x, uint32 y) const { CRND_ASSERT((x < 4U) && (y < 4U)); uint32 selector_index = (y * 4) + x; uint32 bit_index = selector_index * cDXT5SelectorBits; uint32 byte_index = bit_index >> 3; uint32 bit_ofs = bit_index & 7; uint32 v = m_selectors[byte_index]; if (byte_index < (cNumSelectorBytes - 1)) v |= (m_selectors[byte_index + 1] << 8); return (v >> bit_ofs) & 7; } inline void set_selector(uint32 x, uint32 y, uint32 val) { CRND_ASSERT((x < 4U) && (y < 4U) && (val < 8U)); uint32 selector_index = (y * 4) + x; uint32 bit_index = selector_index * cDXT5SelectorBits; uint32 byte_index = bit_index >> 3; uint32 bit_ofs = bit_index & 7; uint32 v = m_selectors[byte_index]; if (byte_index < (cNumSelectorBytes - 1)) v |= (m_selectors[byte_index + 1] << 8); v &= (~(7 << bit_ofs)); v |= (val << bit_ofs); m_selectors[byte_index] = static_cast(v); if (byte_index < (cNumSelectorBytes - 1)) m_selectors[byte_index + 1] = static_cast(v >> 8); } // Results written to alpha channel. static uint32 get_block_values6(color_quad_u8* pDst, uint32 l, uint32 h); static uint32 get_block_values8(color_quad_u8* pDst, uint32 l, uint32 h); static uint32 get_block_values(color_quad_u8* pDst, uint32 l, uint32 h); static uint32 get_block_values6(uint32* pDst, uint32 l, uint32 h); static uint32 get_block_values8(uint32* pDst, uint32 l, uint32 h); // pDst must point to an array at least cDXT5SelectorValues long. static uint32 get_block_values(uint32* pDst, uint32 l, uint32 h); static uint32 unpack_endpoint(uint32 packed, uint32 index); static uint32 pack_endpoints(uint32 lo, uint32 hi); }; CRND_DEFINE_BITWISE_MOVABLE(dxt5_block); } // namespace crnd // File: crnd_prefix_coding.h #ifdef _XBOX #define CRND_PREFIX_CODING_USE_FIXED_TABLE_SIZE 1 #else #define CRND_PREFIX_CODING_USE_FIXED_TABLE_SIZE 0 #endif namespace crnd { namespace prefix_coding { const uint32 cMaxExpectedCodeSize = 16; const uint32 cMaxSupportedSyms = 8192; const uint32 cMaxTableBits = 11; class decoder_tables { public: inline decoder_tables() : m_cur_lookup_size(0), m_lookup(NULL), m_cur_sorted_symbol_order_size(0), m_sorted_symbol_order(NULL) { } inline decoder_tables(const decoder_tables& other) : m_cur_lookup_size(0), m_lookup(NULL), m_cur_sorted_symbol_order_size(0), m_sorted_symbol_order(NULL) { *this = other; } decoder_tables& operator=(const decoder_tables& other) { if (this == &other) return *this; clear(); memcpy((void*) this, &other, sizeof(*this)); if (other.m_lookup) { m_lookup = crnd_new_array(m_cur_lookup_size); if (m_lookup) memcpy(m_lookup, other.m_lookup, sizeof(m_lookup[0]) * m_cur_lookup_size); } if (other.m_sorted_symbol_order) { m_sorted_symbol_order = crnd_new_array(m_cur_sorted_symbol_order_size); if (m_sorted_symbol_order) memcpy(m_sorted_symbol_order, other.m_sorted_symbol_order, sizeof(m_sorted_symbol_order[0]) * m_cur_sorted_symbol_order_size); } return *this; } inline void clear() { if (m_lookup) { crnd_delete_array(m_lookup); m_lookup = 0; m_cur_lookup_size = 0; } if (m_sorted_symbol_order) { crnd_delete_array(m_sorted_symbol_order); m_sorted_symbol_order = NULL; m_cur_sorted_symbol_order_size = 0; } } inline ~decoder_tables() { if (m_lookup) crnd_delete_array(m_lookup); if (m_sorted_symbol_order) crnd_delete_array(m_sorted_symbol_order); } bool init(uint32 num_syms, const uint8* pCodesizes, uint32 table_bits); // DO NOT use any complex classes here - it is bitwise copied. uint32 m_num_syms; uint32 m_total_used_syms; uint32 m_table_bits; uint32 m_table_shift; uint32 m_table_max_code; uint32 m_decode_start_code_size; uint8 m_min_code_size; uint8 m_max_code_size; uint32 m_max_codes[cMaxExpectedCodeSize + 1]; int32 m_val_ptrs[cMaxExpectedCodeSize + 1]; uint32 m_cur_lookup_size; uint32* m_lookup; uint32 m_cur_sorted_symbol_order_size; uint16* m_sorted_symbol_order; inline uint32 get_unshifted_max_code(uint32 len) const { CRND_ASSERT((len >= 1) && (len <= cMaxExpectedCodeSize)); uint32 k = m_max_codes[len - 1]; if (!k) return crnd::cUINT32_MAX; return (k - 1) >> (16 - len); } }; } // namespace prefix_coding } // namespace crnd // File: crnd_symbol_codec.h namespace crnd { class static_huffman_data_model { public: static_huffman_data_model(); static_huffman_data_model(const static_huffman_data_model& other); ~static_huffman_data_model(); static_huffman_data_model& operator=(const static_huffman_data_model& rhs); bool init(uint32 total_syms, const uint8* pCode_sizes, uint32 code_size_limit); void clear(); inline bool is_valid() const { return m_pDecode_tables != NULL; } inline uint32 get_total_syms() const { return m_total_syms; } inline uint32 get_code_size(uint32 sym) const { return m_code_sizes[sym]; } inline const uint8* get_code_sizes() const { return m_code_sizes.empty() ? NULL : &m_code_sizes[0]; } public: uint32 m_total_syms; crnd::vector m_code_sizes; prefix_coding::decoder_tables* m_pDecode_tables; private: bool prepare_decoder_tables(); uint compute_decoder_table_bits() const; friend class symbol_codec; }; class symbol_codec { public: symbol_codec(); bool start_decoding(const uint8* pBuf, uint32 buf_size); bool decode_receive_static_data_model(static_huffman_data_model& model); uint32 decode_bits(uint32 num_bits); uint32 decode(const static_huffman_data_model& model); uint64 stop_decoding(); public: const uint8* m_pDecode_buf; const uint8* m_pDecode_buf_next; const uint8* m_pDecode_buf_end; uint32 m_decode_buf_size; typedef uint32 bit_buf_type; enum { cBitBufSize = 32U }; bit_buf_type m_bit_buf; int m_bit_count; private: void get_bits_init(); uint32 get_bits(uint32 num_bits); }; } // namespace crnd namespace crnd { void crnd_assert(const char* pExp, const char* pFile, unsigned line) { char buf[512]; #if defined(_WIN32) sprintf_s(buf, sizeof(buf), "%s(%u): Assertion failure: \"%s\"\n", pFile, line, pExp); #else snprintf(buf, sizeof(buf), "%s(%u): Assertion failure: \"%s\"\n", pFile, line, pExp); #endif crnd_output_debug_string(buf); puts(buf); if (crnd_is_debugger_present()) crnd_debug_break(); } void crnd_trace(const char* pFmt, va_list args) { if (crnd_is_debugger_present()) { char buf[512]; #if defined(_WIN32) vsprintf_s(buf, sizeof(buf), pFmt, args); #else vsnprintf(buf, sizeof(buf), pFmt, args); #endif crnd_output_debug_string(buf); } }; void crnd_trace(const char* pFmt, ...) { va_list args; va_start(args, pFmt); crnd_trace(pFmt, args); va_end(args); }; } // namespace crnd // File: checksum.cpp // From the public domain stb.h header. namespace crnd { uint16 crc16(const void* pBuf, uint32 len, uint16 crc) { crc = ~crc; const uint8* p = reinterpret_cast(pBuf); while (len) { const uint16 q = *p++ ^ (crc >> 8U); crc <<= 8U; uint16 r = (q >> 4U) ^ q; crc ^= r; r <<= 5U; crc ^= r; r <<= 7U; crc ^= r; len--; } return static_cast(~crc); } } // namespace crnd // File: crnd_vector.cpp namespace crnd { bool elemental_vector::increase_capacity(uint32 min_new_capacity, bool grow_hint, uint32 element_size, object_mover pMover) { CRND_ASSERT(m_size <= m_capacity); CRND_ASSERT(min_new_capacity < (0x7FFF0000U / element_size)); if (m_capacity >= min_new_capacity) return true; uint32 new_capacity = min_new_capacity; if ((grow_hint) && (!math::is_power_of_2(new_capacity))) new_capacity = math::next_pow2(new_capacity); CRND_ASSERT(new_capacity && (new_capacity > m_capacity)); const uint32 desired_size = element_size * new_capacity; size_t actual_size; if (!pMover) { void* new_p = crnd_realloc(m_p, desired_size, &actual_size, true); if (!new_p) return false; m_p = new_p; } else { void* new_p = crnd_malloc(desired_size, &actual_size); if (!new_p) return false; (*pMover)(new_p, m_p, m_size); if (m_p) crnd_free(m_p); m_p = new_p; } if (actual_size > desired_size) m_capacity = static_cast(actual_size / element_size); else m_capacity = new_capacity; return true; } } // namespace crnd // File: crnd_utils.cpp namespace crnd { namespace utils { uint32 compute_max_mips(uint32 width, uint32 height) { if ((width | height) == 0) return 0; uint32 num_mips = 1; while ((width > 1U) || (height > 1U)) { width >>= 1U; height >>= 1U; num_mips++; } return num_mips; } } // namespace utils } // namespace crnd // File: crnd_prefix_coding.cpp namespace crnd { namespace prefix_coding { bool decoder_tables::init(uint32 num_syms, const uint8* pCodesizes, uint32 table_bits) { uint32 min_codes[cMaxExpectedCodeSize]; if ((!num_syms) || (table_bits > cMaxTableBits)) return false; m_num_syms = num_syms; uint32 num_codes[cMaxExpectedCodeSize + 1]; utils::zero_object(num_codes); for (uint32 i = 0; i < num_syms; i++) { uint32 c = pCodesizes[i]; if (c) num_codes[c]++; } uint32 sorted_positions[cMaxExpectedCodeSize + 1]; uint32 cur_code = 0; uint32 total_used_syms = 0; uint32 max_code_size = 0; uint32 min_code_size = cUINT32_MAX; for (uint32 i = 1; i <= cMaxExpectedCodeSize; i++) { const uint32 n = num_codes[i]; if (!n) m_max_codes[i - 1] = 0; //UINT_MAX; else { min_code_size = math::minimum(min_code_size, i); max_code_size = math::maximum(max_code_size, i); min_codes[i - 1] = cur_code; m_max_codes[i - 1] = cur_code + n - 1; m_max_codes[i - 1] = 1 + ((m_max_codes[i - 1] << (16 - i)) | ((1 << (16 - i)) - 1)); m_val_ptrs[i - 1] = total_used_syms; sorted_positions[i] = total_used_syms; cur_code += n; total_used_syms += n; } cur_code <<= 1; } m_total_used_syms = total_used_syms; if (total_used_syms > m_cur_sorted_symbol_order_size) { m_cur_sorted_symbol_order_size = total_used_syms; if (!math::is_power_of_2(total_used_syms)) m_cur_sorted_symbol_order_size = math::minimum(num_syms, math::next_pow2(total_used_syms)); if (m_sorted_symbol_order) crnd_delete_array(m_sorted_symbol_order); m_sorted_symbol_order = crnd_new_array(m_cur_sorted_symbol_order_size); if (!m_sorted_symbol_order) return false; } m_min_code_size = static_cast(min_code_size); m_max_code_size = static_cast(max_code_size); for (uint32 i = 0; i < num_syms; i++) { uint32 c = pCodesizes[i]; if (c) { CRND_ASSERT(num_codes[c]); uint32 sorted_pos = sorted_positions[c]++; CRND_ASSERT(sorted_pos < total_used_syms); m_sorted_symbol_order[sorted_pos] = static_cast(i); } } if (table_bits <= m_min_code_size) table_bits = 0; m_table_bits = table_bits; if (table_bits) { uint32 table_size = 1 << table_bits; if (table_size > m_cur_lookup_size) { m_cur_lookup_size = table_size; if (m_lookup) crnd_delete_array(m_lookup); m_lookup = crnd_new_array(table_size); if (!m_lookup) return false; } memset(m_lookup, 0xFF, (uint)sizeof(m_lookup[0]) * (1UL << table_bits)); for (uint32 codesize = 1; codesize <= table_bits; codesize++) { if (!num_codes[codesize]) continue; const uint32 fillsize = table_bits - codesize; const uint32 fillnum = 1 << fillsize; const uint32 min_code = min_codes[codesize - 1]; const uint32 max_code = get_unshifted_max_code(codesize); const uint32 val_ptr = m_val_ptrs[codesize - 1]; for (uint32 code = min_code; code <= max_code; code++) { const uint32 sym_index = m_sorted_symbol_order[val_ptr + code - min_code]; CRND_ASSERT(pCodesizes[sym_index] == codesize); for (uint32 j = 0; j < fillnum; j++) { const uint32 t = j + (code << fillsize); CRND_ASSERT(t < (1U << table_bits)); CRND_ASSERT(m_lookup[t] == cUINT32_MAX); m_lookup[t] = sym_index | (codesize << 16U); } } } } for (uint32 i = 0; i < cMaxExpectedCodeSize; i++) m_val_ptrs[i] -= min_codes[i]; m_table_max_code = 0; m_decode_start_code_size = m_min_code_size; if (table_bits) { uint32 i; for (i = table_bits; i >= 1; i--) { if (num_codes[i]) { m_table_max_code = m_max_codes[i - 1]; break; } } if (i >= 1) { m_decode_start_code_size = table_bits + 1; for (uint32 j = table_bits + 1; j <= max_code_size; j++) { if (num_codes[j]) { m_decode_start_code_size = j; break; } } } } // sentinels m_max_codes[cMaxExpectedCodeSize] = cUINT32_MAX; m_val_ptrs[cMaxExpectedCodeSize] = 0xFFFFF; m_table_shift = 32 - m_table_bits; return true; } } // namespace prefix_codig } // namespace crnd // File: crnd_platform.cpp namespace crnd { bool crnd_is_debugger_present() { #ifdef CRND_DEVEL return IsDebuggerPresent() != 0; #else return false; #endif } void crnd_debug_break() { #ifdef CRND_DEVEL DebugBreak(); #endif } void crnd_output_debug_string(const char* p) { (void)p; #ifdef CRND_DEVEL OutputDebugStringA(p); #endif } } // namespace crnd // File: crnd_mem.cpp namespace crnd { const uint32 MAX_POSSIBLE_BLOCK_SIZE = 0x7FFF0000U; static void* crnd_default_realloc(void* p, size_t size, size_t* pActual_size, bool movable, void*) { void* p_new; if (!p) { #if defined(__FreeBSD__) // See https://github.com/DaemonEngine/crunch/pull/36 p_new = ::aligned_alloc(CRNLIB_MIN_ALLOC_ALIGNMENT, size); #else p_new = ::malloc(size); #endif if (pActual_size) { #ifdef WIN32 *pActual_size = p_new ? ::_msize(p_new) : 0; #elif defined(__APPLE__) *pActual_size = p_new ? ::malloc_size(p_new) : 0; #else *pActual_size = p_new ? malloc_usable_size(p_new) : 0; #endif } } else if (!size) { ::free(p); p_new = NULL; if (pActual_size) *pActual_size = 0; } else { void* p_final_block = p; #ifdef WIN32 p_new = ::_expand(p, size); #else p_new = NULL; #endif if (p_new) p_final_block = p_new; else if (movable) { p_new = ::realloc(p, size); if (p_new) p_final_block = p_new; } if (pActual_size) { #ifdef WIN32 *pActual_size = ::_msize(p_final_block); #elif defined(__APPLE__) *pActual_size = ::malloc_size(p_final_block); #else *pActual_size = ::malloc_usable_size(p_final_block); #endif } } return p_new; } static size_t crnd_default_msize(void* p, void* pUser_data) { (void)pUser_data; #ifdef WIN32 return p ? _msize(p) : 0; #elif defined(__APPLE__) return p ? malloc_size(p) : 0; #else return p ? malloc_usable_size(p) : 0; #endif } static crnd_realloc_func g_pRealloc = crnd_default_realloc; static crnd_msize_func g_pMSize = crnd_default_msize; static void* g_pUser_data; void crnd_set_memory_callbacks(crnd_realloc_func pRealloc, crnd_msize_func pMSize, void* pUser_data) { if ((!pRealloc) || (!pMSize)) { g_pRealloc = crnd_default_realloc; g_pMSize = crnd_default_msize; g_pUser_data = NULL; } else { g_pRealloc = pRealloc; g_pMSize = pMSize; g_pUser_data = pUser_data; } } static inline void crnd_mem_error(const char* p_msg) { crnd_assert(p_msg, __FILE__, __LINE__); } void* crnd_malloc(size_t size, size_t* pActual_size) { size = (size + sizeof(uint32) - 1U) & ~(sizeof(uint32) - 1U); if (!size) size = sizeof(uint32); if (size > MAX_POSSIBLE_BLOCK_SIZE) { crnd_mem_error("crnd_malloc: size too big"); return NULL; } size_t actual_size = size; uint8* p_new = static_cast((*g_pRealloc)(NULL, size, &actual_size, true, g_pUser_data)); if (pActual_size) *pActual_size = actual_size; if ((!p_new) || (actual_size < size)) { crnd_mem_error("crnd_malloc: out of memory"); return NULL; } CRND_ASSERT(((uint32) reinterpret_cast(p_new) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) == 0); return p_new; } void* crnd_realloc(void* p, size_t size, size_t* pActual_size, bool movable) { if ((uint32) reinterpret_cast(p) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) { crnd_mem_error("crnd_realloc: bad ptr"); return NULL; } if (size > MAX_POSSIBLE_BLOCK_SIZE) { crnd_mem_error("crnd_malloc: size too big"); return NULL; } size_t actual_size = size; void* p_new = (*g_pRealloc)(p, size, &actual_size, movable, g_pUser_data); if (pActual_size) *pActual_size = actual_size; CRND_ASSERT(((uint32) reinterpret_cast(p_new) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) == 0); return p_new; } void crnd_free(void* p) { if (!p) return; if ((uint32) reinterpret_cast(p) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) { crnd_mem_error("crnd_free: bad ptr"); return; } (*g_pRealloc)(p, 0, NULL, true, g_pUser_data); } size_t crnd_msize(void* p) { if (!p) return 0; if ((uint32) reinterpret_cast(p) & (CRND_MIN_ALLOC_ALIGNMENT - 1)) { crnd_mem_error("crnd_msize: bad ptr"); return 0; } return (*g_pMSize)(p, g_pUser_data); } } // namespace crnd // File: crnd_math.cpp namespace crnd { namespace math { uint32 g_bitmasks[32] = { 1U << 0U, 1U << 1U, 1U << 2U, 1U << 3U, 1U << 4U, 1U << 5U, 1U << 6U, 1U << 7U, 1U << 8U, 1U << 9U, 1U << 10U, 1U << 11U, 1U << 12U, 1U << 13U, 1U << 14U, 1U << 15U, 1U << 16U, 1U << 17U, 1U << 18U, 1U << 19U, 1U << 20U, 1U << 21U, 1U << 22U, 1U << 23U, 1U << 24U, 1U << 25U, 1U << 26U, 1U << 27U, 1U << 28U, 1U << 29U, 1U << 30U, 1U << 31U}; } // namespace math } // namespace crnd // File: crnd_info.cpp namespace crnd { #define CRND_FOURCC(a, b, c, d) ((a) | ((b) << 8U) | ((c) << 16U) | ((d) << 24U)) uint32 crnd_crn_format_to_fourcc(crn_format fmt) { switch (fmt) { case cCRNFmtDXT1: return CRND_FOURCC('D', 'X', 'T', '1'); case cCRNFmtDXT3: return CRND_FOURCC('D', 'X', 'T', '3'); case cCRNFmtDXT5: return CRND_FOURCC('D', 'X', 'T', '5'); case cCRNFmtDXN_XY: return CRND_FOURCC('A', '2', 'X', 'Y'); case cCRNFmtDXN_YX: return CRND_FOURCC('A', 'T', 'I', '2'); case cCRNFmtDXT5A: return CRND_FOURCC('A', 'T', 'I', '1'); case cCRNFmtDXT5_CCxY: return CRND_FOURCC('C', 'C', 'x', 'Y'); case cCRNFmtDXT5_xGxR: return CRND_FOURCC('x', 'G', 'x', 'R'); case cCRNFmtDXT5_xGBR: return CRND_FOURCC('x', 'G', 'B', 'R'); case cCRNFmtDXT5_AGBR: return CRND_FOURCC('A', 'G', 'B', 'R'); case cCRNFmtETC1: return CRND_FOURCC('E', 'T', 'C', '1'); case cCRNFmtETC2: return CRND_FOURCC('E', 'T', 'C', '2'); case cCRNFmtETC2A: return CRND_FOURCC('E', 'T', '2', 'A'); case cCRNFmtETC1S: return CRND_FOURCC('E', 'T', '1', 'S'); case cCRNFmtETC2AS: return CRND_FOURCC('E', '2', 'A', 'S'); default: break; } CRND_ASSERT(false); return 0; } crn_format crnd_get_fundamental_dxt_format(crn_format fmt) { switch (fmt) { case cCRNFmtDXT5_CCxY: case cCRNFmtDXT5_xGxR: case cCRNFmtDXT5_xGBR: case cCRNFmtDXT5_AGBR: return cCRNFmtDXT5; default: break; } return fmt; } uint32 crnd_get_crn_format_bits_per_texel(crn_format fmt) { switch (fmt) { case cCRNFmtDXT1: case cCRNFmtDXT5A: case cCRNFmtETC1: case cCRNFmtETC2: case cCRNFmtETC1S: return 4; case cCRNFmtDXT3: case cCRNFmtDXT5: case cCRNFmtDXN_XY: case cCRNFmtDXN_YX: case cCRNFmtDXT5_CCxY: case cCRNFmtDXT5_xGxR: case cCRNFmtDXT5_xGBR: case cCRNFmtDXT5_AGBR: case cCRNFmtETC2A: case cCRNFmtETC2AS: return 8; default: break; } CRND_ASSERT(false); return 0; } uint32 crnd_get_bytes_per_dxt_block(crn_format fmt) { return (crnd_get_crn_format_bits_per_texel(fmt) << 4) >> 3; } // TODO: tmp_header isn't used/This function is a helper to support old headers. const crn_header* crnd_get_header(const void* pData, uint32 data_size) { if ((!pData) || (data_size < sizeof(crn_header))) return NULL; const crn_header& file_header = *static_cast(pData); if (file_header.m_sig != crn_header::cCRNSigValue) return NULL; if ((file_header.m_header_size < sizeof(crn_header)) || (data_size < file_header.m_data_size)) return NULL; return &file_header; } bool crnd_validate_header(const void* pData, uint32 data_size, crn_file_info* pFile_info) { if (pFile_info) { if (pFile_info->m_struct_size != sizeof(crn_file_info)) return false; memset(&pFile_info->m_struct_size + 1, 0, sizeof(crn_file_info) - sizeof(pFile_info->m_struct_size)); } if ((!pData) || (data_size < cCRNHeaderMinSize)) return false; const crn_header* pHeader = crnd_get_header(pData, data_size); if (!pHeader) return false; const uint32 header_crc = crc16(&pHeader->m_data_size, (uint32)(pHeader->m_header_size - ((const uint8*)&pHeader->m_data_size - (const uint8*)pHeader))); if (header_crc != pHeader->m_header_crc16) return false; if (pHeader->m_data_size < pHeader->m_header_size) return false; if ((pHeader->m_faces != 1) && (pHeader->m_faces != 6)) return false; if ((pHeader->m_width < 1) || (pHeader->m_width > cCRNMaxLevelResolution)) return false; if ((pHeader->m_height < 1) || (pHeader->m_height > cCRNMaxLevelResolution)) return false; if ((pHeader->m_levels < 1) || (pHeader->m_levels > utils::compute_max_mips(pHeader->m_width, pHeader->m_height))) return false; if (((int)pHeader->m_format < cCRNFmtDXT1) || ((int)pHeader->m_format >= cCRNFmtTotal)) return false; if (pFile_info) { pFile_info->m_actual_data_size = pHeader->m_data_size; pFile_info->m_header_size = pHeader->m_header_size; pFile_info->m_total_palette_size = pHeader->m_color_endpoints.m_size + pHeader->m_color_selectors.m_size + pHeader->m_alpha_endpoints.m_size + pHeader->m_alpha_selectors.m_size; pFile_info->m_tables_size = pHeader->m_tables_size; pFile_info->m_levels = pHeader->m_levels; for (uint32 i = 0; i < pHeader->m_levels; i++) { uint32 next_ofs = pHeader->m_data_size; // assumes the levels are packed together sequentially if ((i + 1) < pHeader->m_levels) next_ofs = pHeader->m_level_ofs[i + 1]; pFile_info->m_level_compressed_size[i] = next_ofs - pHeader->m_level_ofs[i]; } pFile_info->m_color_endpoint_palette_entries = pHeader->m_color_endpoints.m_num; pFile_info->m_color_selector_palette_entries = pHeader->m_color_selectors.m_num; ; pFile_info->m_alpha_endpoint_palette_entries = pHeader->m_alpha_endpoints.m_num; ; pFile_info->m_alpha_selector_palette_entries = pHeader->m_alpha_selectors.m_num; ; } return true; } bool crnd_validate_file(const void* pData, uint32 data_size, crn_file_info* pFile_info) { if (!crnd_validate_header(pData, data_size, pFile_info)) return false; // was already validated by crnd_get_header const crn_header* pHeader = static_cast(pData); if (pHeader->m_data_size > data_size) return false; const uint32 data_crc = crc16((const uint8*)pData + pHeader->m_header_size, pHeader->m_data_size - pHeader->m_header_size); return data_crc == pHeader->m_data_crc16; } bool crnd_get_texture_info(const void* pData, uint32 data_size, crn_texture_info* pInfo) { if ((!pData) || (data_size < sizeof(crn_header)) || (!pInfo)) return false; if (pInfo->m_struct_size != sizeof(crn_texture_info)) return false; const crn_header* pHeader = crnd_get_header(pData, data_size); if (!pHeader) return false; pInfo->m_width = pHeader->m_width; pInfo->m_height = pHeader->m_height; pInfo->m_levels = pHeader->m_levels; pInfo->m_faces = pHeader->m_faces; pInfo->m_format = static_cast((uint32)pHeader->m_format); pInfo->m_bytes_per_block = pHeader->m_format == cCRNFmtDXT1 || pHeader->m_format == cCRNFmtDXT5A || pHeader->m_format == cCRNFmtETC1 || pHeader->m_format == cCRNFmtETC2 || pHeader->m_format == cCRNFmtETC1S ? 8 : 16; pInfo->m_userdata0 = pHeader->m_userdata0; pInfo->m_userdata1 = pHeader->m_userdata1; return true; } bool crnd_get_level_info(const void* pData, uint32 data_size, uint32 level_index, crn_level_info* pLevel_info) { if ((!pData) || (data_size < cCRNHeaderMinSize) || (!pLevel_info)) return false; if (pLevel_info->m_struct_size != sizeof(crn_level_info)) return false; const crn_header* pHeader = crnd_get_header(pData, data_size); if (!pHeader) return false; if (level_index >= pHeader->m_levels) return false; uint32 width = math::maximum(1U, pHeader->m_width >> level_index); uint32 height = math::maximum(1U, pHeader->m_height >> level_index); pLevel_info->m_width = width; pLevel_info->m_height = height; pLevel_info->m_faces = pHeader->m_faces; pLevel_info->m_blocks_x = (width + 3) >> 2; pLevel_info->m_blocks_y = (height + 3) >> 2; pLevel_info->m_bytes_per_block = ((pHeader->m_format == cCRNFmtDXT1) || (pHeader->m_format == cCRNFmtDXT5A)) ? 8 : 16; pLevel_info->m_format = static_cast((uint32)pHeader->m_format); return true; } const void* crnd_get_level_data(const void* pData, uint32 data_size, uint32 level_index, uint32* pSize) { if (pSize) *pSize = 0; if ((!pData) || (data_size < cCRNHeaderMinSize)) return NULL; const crn_header* pHeader = crnd_get_header(pData, data_size); if (!pHeader) return NULL; if (level_index >= pHeader->m_levels) return NULL; uint32 cur_level_ofs = pHeader->m_level_ofs[level_index]; if (pSize) { uint32 next_level_ofs = data_size; if ((level_index + 1) < (pHeader->m_levels)) next_level_ofs = pHeader->m_level_ofs[level_index + 1]; *pSize = next_level_ofs - cur_level_ofs; } return static_cast(pData) + cur_level_ofs; } uint32 crnd_get_segmented_file_size(const void* pData, uint32 data_size) { if ((!pData) || (data_size < cCRNHeaderMinSize)) return false; const crn_header* pHeader = crnd_get_header(pData, data_size); if (!pHeader) return false; uint32 size = pHeader->m_header_size; size = math::maximum(size, pHeader->m_color_endpoints.m_ofs + pHeader->m_color_endpoints.m_size); size = math::maximum(size, pHeader->m_color_selectors.m_ofs + pHeader->m_color_selectors.m_size); size = math::maximum(size, pHeader->m_alpha_endpoints.m_ofs + pHeader->m_alpha_endpoints.m_size); size = math::maximum(size, pHeader->m_alpha_selectors.m_ofs + pHeader->m_alpha_selectors.m_size); size = math::maximum(size, pHeader->m_tables_ofs + pHeader->m_tables_size); return size; } bool crnd_create_segmented_file(const void* pData, uint32 data_size, void* pBase_data, uint base_data_size) { if ((!pData) || (data_size < cCRNHeaderMinSize)) return false; const crn_header* pHeader = crnd_get_header(pData, data_size); if (!pHeader) return false; if (pHeader->m_flags & cCRNHeaderFlagSegmented) return false; const uint actual_base_data_size = crnd_get_segmented_file_size(pData, data_size); if (base_data_size < actual_base_data_size) return false; memcpy(pBase_data, pData, actual_base_data_size); crn_header& new_header = *static_cast(pBase_data); new_header.m_flags = new_header.m_flags | cCRNHeaderFlagSegmented; new_header.m_data_size = actual_base_data_size; new_header.m_data_crc16 = crc16((const uint8*)pBase_data + new_header.m_header_size, new_header.m_data_size - new_header.m_header_size); new_header.m_header_crc16 = crc16(&new_header.m_data_size, new_header.m_header_size - (uint32)((const uint8*)&new_header.m_data_size - (const uint8*)&new_header)); CRND_ASSERT(crnd_validate_file(&new_header, actual_base_data_size, NULL)); return true; } } // namespace crnd // File: symbol_codec.cpp namespace crnd { static_huffman_data_model::static_huffman_data_model() : m_total_syms(0), m_pDecode_tables(NULL) { } static_huffman_data_model::static_huffman_data_model(const static_huffman_data_model& other) : m_total_syms(0), m_pDecode_tables(NULL) { *this = other; } static_huffman_data_model::~static_huffman_data_model() { if (m_pDecode_tables) crnd_delete(m_pDecode_tables); } static_huffman_data_model& static_huffman_data_model::operator=(const static_huffman_data_model& rhs) { if (this == &rhs) return *this; m_total_syms = rhs.m_total_syms; m_code_sizes = rhs.m_code_sizes; if (m_code_sizes.get_alloc_failed()) { clear(); return *this; } if (rhs.m_pDecode_tables) { if (m_pDecode_tables) *m_pDecode_tables = *rhs.m_pDecode_tables; else m_pDecode_tables = crnd_new(*rhs.m_pDecode_tables); } else { crnd_delete(m_pDecode_tables); m_pDecode_tables = NULL; } return *this; } void static_huffman_data_model::clear() { m_total_syms = 0; m_code_sizes.clear(); if (m_pDecode_tables) { crnd_delete(m_pDecode_tables); m_pDecode_tables = NULL; } } bool static_huffman_data_model::init(uint32 total_syms, const uint8* pCode_sizes, uint32 code_size_limit) { CRND_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms) && (code_size_limit >= 1)); code_size_limit = math::minimum(code_size_limit, prefix_coding::cMaxExpectedCodeSize); if (!m_code_sizes.resize(total_syms)) return false; uint32 min_code_size = cUINT32_MAX; uint32 max_code_size = 0; for (uint32 i = 0; i < total_syms; i++) { uint32 s = pCode_sizes[i]; m_code_sizes[i] = static_cast(s); min_code_size = math::minimum(min_code_size, s); max_code_size = math::maximum(max_code_size, s); } if ((max_code_size < 1) || (max_code_size > 32) || (min_code_size > code_size_limit)) return false; if (max_code_size > code_size_limit) return false; if (!m_pDecode_tables) m_pDecode_tables = crnd_new(); if (!m_pDecode_tables->init(m_total_syms, &m_code_sizes[0], compute_decoder_table_bits())) return false; return true; } bool static_huffman_data_model::prepare_decoder_tables() { uint32 total_syms = m_code_sizes.size(); CRND_ASSERT((total_syms >= 1) && (total_syms <= prefix_coding::cMaxSupportedSyms)); m_total_syms = total_syms; if (!m_pDecode_tables) m_pDecode_tables = crnd_new(); return m_pDecode_tables->init(m_total_syms, &m_code_sizes[0], compute_decoder_table_bits()); } uint static_huffman_data_model::compute_decoder_table_bits() const { #if CRND_PREFIX_CODING_USE_FIXED_TABLE_SIZE return prefix_coding::cMaxTableBits; #else uint32 decoder_table_bits = 0; if (m_total_syms > 16) decoder_table_bits = static_cast(math::minimum(1 + math::ceil_log2i(m_total_syms), prefix_coding::cMaxTableBits)); return decoder_table_bits; #endif } symbol_codec::symbol_codec() : m_pDecode_buf(NULL), m_pDecode_buf_next(NULL), m_pDecode_buf_end(NULL), m_decode_buf_size(0), m_bit_buf(0), m_bit_count(0) { } // Code length encoding symbols: // 0-16 - actual code lengths const uint32 cMaxCodelengthCodes = 21; const uint32 cSmallZeroRunCode = 17; const uint32 cLargeZeroRunCode = 18; const uint32 cSmallRepeatCode = 19; const uint32 cLargeRepeatCode = 20; const uint32 cMinSmallZeroRunSize = 3; const uint32 cMaxSmallZeroRunSize = 10; const uint32 cMinLargeZeroRunSize = 11; const uint32 cMaxLargeZeroRunSize = 138; const uint32 cSmallMinNonZeroRunSize = 3; const uint32 cSmallMaxNonZeroRunSize = 6; const uint32 cLargeMinNonZeroRunSize = 7; const uint32 cLargeMaxNonZeroRunSize = 70; const uint32 cSmallZeroRunExtraBits = 3; const uint32 cLargeZeroRunExtraBits = 7; const uint32 cSmallNonZeroRunExtraBits = 2; const uint32 cLargeNonZeroRunExtraBits = 6; static const uint8 g_most_probable_codelength_codes[] = { cSmallZeroRunCode, cLargeZeroRunCode, cSmallRepeatCode, cLargeRepeatCode, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15, 16}; const uint32 cNumMostProbableCodelengthCodes = sizeof(g_most_probable_codelength_codes) / sizeof(g_most_probable_codelength_codes[0]); bool symbol_codec::decode_receive_static_data_model(static_huffman_data_model& model) { const uint32 total_used_syms = decode_bits(math::total_bits(prefix_coding::cMaxSupportedSyms)); if (!total_used_syms) { model.clear(); return true; } if (!model.m_code_sizes.resize(total_used_syms)) return false; memset(&model.m_code_sizes[0], 0, sizeof(model.m_code_sizes[0]) * total_used_syms); const uint32 num_codelength_codes_to_send = decode_bits(5); if ((num_codelength_codes_to_send < 1) || (num_codelength_codes_to_send > cMaxCodelengthCodes)) return false; static_huffman_data_model dm; if (!dm.m_code_sizes.resize(cMaxCodelengthCodes)) return false; for (uint32 i = 0; i < num_codelength_codes_to_send; i++) dm.m_code_sizes[g_most_probable_codelength_codes[i]] = static_cast(decode_bits(3)); if (!dm.prepare_decoder_tables()) return false; uint32 ofs = 0; while (ofs < total_used_syms) { const uint32 num_remaining = total_used_syms - ofs; uint32 code = decode(dm); if (code <= 16) model.m_code_sizes[ofs++] = static_cast(code); else if (code == cSmallZeroRunCode) { uint32 len = decode_bits(cSmallZeroRunExtraBits) + cMinSmallZeroRunSize; if (len > num_remaining) return false; ofs += len; } else if (code == cLargeZeroRunCode) { uint32 len = decode_bits(cLargeZeroRunExtraBits) + cMinLargeZeroRunSize; if (len > num_remaining) return false; ofs += len; } else if ((code == cSmallRepeatCode) || (code == cLargeRepeatCode)) { uint32 len; if (code == cSmallRepeatCode) len = decode_bits(cSmallNonZeroRunExtraBits) + cSmallMinNonZeroRunSize; else len = decode_bits(cLargeNonZeroRunExtraBits) + cLargeMinNonZeroRunSize; if ((!ofs) || (len > num_remaining)) return false; const uint32 prev = model.m_code_sizes[ofs - 1]; if (!prev) return false; const uint32 end = ofs + len; while (ofs < end) model.m_code_sizes[ofs++] = static_cast(prev); } else { CRND_ASSERT(0); return false; } } if (ofs != total_used_syms) return false; return model.prepare_decoder_tables(); } bool symbol_codec::start_decoding(const uint8* pBuf, uint32 buf_size) { if (!buf_size) return false; m_pDecode_buf = pBuf; m_pDecode_buf_next = pBuf; m_decode_buf_size = buf_size; m_pDecode_buf_end = pBuf + buf_size; get_bits_init(); return true; } void symbol_codec::get_bits_init() { m_bit_buf = 0; m_bit_count = 0; } uint32 symbol_codec::decode_bits(uint32 num_bits) { if (!num_bits) return 0; if (num_bits > 16) { uint32 a = get_bits(num_bits - 16); uint32 b = get_bits(16); return (a << 16) | b; } else return get_bits(num_bits); } uint32 symbol_codec::get_bits(uint32 num_bits) { CRND_ASSERT(num_bits <= 32U); while (m_bit_count < (int)num_bits) { bit_buf_type c = 0; if (m_pDecode_buf_next != m_pDecode_buf_end) c = *m_pDecode_buf_next++; m_bit_count += 8; CRND_ASSERT(m_bit_count <= cBitBufSize); m_bit_buf |= (c << (cBitBufSize - m_bit_count)); } uint32 result = static_cast(m_bit_buf >> (cBitBufSize - num_bits)); m_bit_buf <<= num_bits; m_bit_count -= num_bits; return result; } uint32 symbol_codec::decode(const static_huffman_data_model& model) { const prefix_coding::decoder_tables* pTables = model.m_pDecode_tables; if (m_bit_count < 24) { if (m_bit_count < 16) { uint32 c0 = 0, c1 = 0; const uint8* p = m_pDecode_buf_next; if (p < m_pDecode_buf_end) c0 = *p++; if (p < m_pDecode_buf_end) c1 = *p++; m_pDecode_buf_next = p; m_bit_count += 16; uint32 c = (c0 << 8) | c1; m_bit_buf |= (c << (32 - m_bit_count)); } else { uint32 c = (m_pDecode_buf_next < m_pDecode_buf_end) ? *m_pDecode_buf_next++ : 0; m_bit_count += 8; m_bit_buf |= (c << (32 - m_bit_count)); } } uint32 k = (m_bit_buf >> 16) + 1; uint32 sym, len; if (k <= pTables->m_table_max_code) { uint32 t = pTables->m_lookup[m_bit_buf >> (32 - pTables->m_table_bits)]; CRND_ASSERT(t != cUINT32_MAX); sym = t & cUINT16_MAX; len = t >> 16; CRND_ASSERT(model.m_code_sizes[sym] == len); } else { len = pTables->m_decode_start_code_size; for (;;) { if (k <= pTables->m_max_codes[len - 1]) break; len++; } int val_ptr = pTables->m_val_ptrs[len - 1] + (m_bit_buf >> (32 - len)); if (((uint32)val_ptr >= model.m_total_syms)) { // corrupted stream, or a bug CRND_ASSERT(0); return 0; } sym = pTables->m_sorted_symbol_order[val_ptr]; } m_bit_buf <<= len; m_bit_count -= len; return sym; } uint64 symbol_codec::stop_decoding() { return static_cast(m_pDecode_buf_next - m_pDecode_buf); } } // namespace crnd // File: crnd_dxt.cpp namespace crnd { const uint8 g_dxt1_to_linear[cDXT1SelectorValues] = {0U, 3U, 1U, 2U}; const uint8 g_dxt1_from_linear[cDXT1SelectorValues] = {0U, 2U, 3U, 1U}; const uint8 g_etc1_from_linear[cDXT1SelectorValues] = {3U, 2U, 0U, 1U}; const uint8 g_dxt5_to_linear[cDXT5SelectorValues] = {0U, 7U, 1U, 2U, 3U, 4U, 5U, 6U}; const uint8 g_dxt5_from_linear[cDXT5SelectorValues] = {0U, 2U, 3U, 4U, 5U, 6U, 7U, 1U}; const uint8 g_six_alpha_invert_table[cDXT5SelectorValues] = {1, 0, 5, 4, 3, 2, 6, 7}; const uint8 g_eight_alpha_invert_table[cDXT5SelectorValues] = {1, 0, 7, 6, 5, 4, 3, 2}; uint16 dxt1_block::pack_color(const color_quad_u8& color, bool scaled, uint32 bias) { uint32 r = color.r; uint32 g = color.g; uint32 b = color.b; if (scaled) { r = (r * 31U + bias) / 255U; g = (g * 63U + bias) / 255U; b = (b * 31U + bias) / 255U; } r = math::minimum(r, 31U); g = math::minimum(g, 63U); b = math::minimum(b, 31U); return static_cast(b | (g << 5U) | (r << 11U)); } uint16 dxt1_block::pack_color(uint32 r, uint32 g, uint32 b, bool scaled, uint32 bias) { return pack_color(color_quad_u8(r, g, b, 0), scaled, bias); } color_quad_u8 dxt1_block::unpack_color(uint16 packed_color, bool scaled, uint32 alpha) { uint32 b = packed_color & 31U; uint32 g = (packed_color >> 5U) & 63U; uint32 r = (packed_color >> 11U) & 31U; if (scaled) { b = (b << 3U) | (b >> 2U); g = (g << 2U) | (g >> 4U); r = (r << 3U) | (r >> 2U); } return color_quad_u8(r, g, b, alpha); } void dxt1_block::unpack_color(uint32& r, uint32& g, uint32& b, uint16 packed_color, bool scaled) { color_quad_u8 c(unpack_color(packed_color, scaled, 0)); r = c.r; g = c.g; b = c.b; } uint32 dxt1_block::get_block_colors3(color_quad_u8* pDst, uint16 color0, uint16 color1) { color_quad_u8 c0(unpack_color(color0, true)); color_quad_u8 c1(unpack_color(color1, true)); pDst[0] = c0; pDst[1] = c1; pDst[2].set((c0.r + c1.r) >> 1U, (c0.g + c1.g) >> 1U, (c0.b + c1.b) >> 1U, 255U); pDst[3].set(0, 0, 0, 0); return 3; } uint32 dxt1_block::get_block_colors4(color_quad_u8* pDst, uint16 color0, uint16 color1) { color_quad_u8 c0(unpack_color(color0, true)); color_quad_u8 c1(unpack_color(color1, true)); pDst[0] = c0; pDst[1] = c1; // 12/14/09 - Supposed to round according to DX docs, but this conflicts with the OpenGL S3TC spec. ? // Turns out some GPU's round and some don't. Great. //pDst[2].set( (c0.r * 2 + c1.r + 1) / 3, (c0.g * 2 + c1.g + 1) / 3, (c0.b * 2 + c1.b + 1) / 3, 255U); //pDst[3].set( (c1.r * 2 + c0.r + 1) / 3, (c1.g * 2 + c0.g + 1) / 3, (c1.b * 2 + c0.b + 1) / 3, 255U); pDst[2].set((c0.r * 2 + c1.r) / 3, (c0.g * 2 + c1.g) / 3, (c0.b * 2 + c1.b) / 3, 255U); pDst[3].set((c1.r * 2 + c0.r) / 3, (c1.g * 2 + c0.g) / 3, (c1.b * 2 + c0.b) / 3, 255U); return 4; } uint32 dxt1_block::get_block_colors(color_quad_u8* pDst, uint16 color0, uint16 color1) { if (color0 > color1) return get_block_colors4(pDst, color0, color1); else return get_block_colors3(pDst, color0, color1); } color_quad_u8 dxt1_block::unpack_endpoint(uint32 endpoints, uint32 index, bool scaled, uint32 alpha) { CRND_ASSERT(index < 2); return unpack_color(static_cast((endpoints >> (index * 16U)) & 0xFFFFU), scaled, alpha); } uint32 dxt1_block::pack_endpoints(uint32 lo, uint32 hi) { CRND_ASSERT((lo <= 0xFFFFU) && (hi <= 0xFFFFU)); return lo | (hi << 16U); } void dxt3_block::set_alpha(uint32 x, uint32 y, uint32 value, bool scaled) { CRND_ASSERT((x < cDXTBlockSize) && (y < cDXTBlockSize)); if (scaled) { CRND_ASSERT(value <= 0xFF); value = (value * 15U + 128U) / 255U; } else { CRND_ASSERT(value <= 0xF); } uint32 ofs = (y << 1U) + (x >> 1U); uint32 c = m_alpha[ofs]; c &= ~(0xF << ((x & 1U) << 2U)); c |= (value << ((x & 1U) << 2U)); m_alpha[ofs] = static_cast(c); } uint32 dxt3_block::get_alpha(uint32 x, uint32 y, bool scaled) const { CRND_ASSERT((x < cDXTBlockSize) && (y < cDXTBlockSize)); uint32 value = m_alpha[(y << 1U) + (x >> 1U)]; if (x & 1) value >>= 4; value &= 0xF; if (scaled) value = (value << 4U) | value; return value; } uint32 dxt5_block::get_block_values6(color_quad_u8* pDst, uint32 l, uint32 h) { pDst[0].a = static_cast(l); pDst[1].a = static_cast(h); pDst[2].a = static_cast((l * 4 + h) / 5); pDst[3].a = static_cast((l * 3 + h * 2) / 5); pDst[4].a = static_cast((l * 2 + h * 3) / 5); pDst[5].a = static_cast((l + h * 4) / 5); pDst[6].a = 0; pDst[7].a = 255; return 6; } uint32 dxt5_block::get_block_values8(color_quad_u8* pDst, uint32 l, uint32 h) { pDst[0].a = static_cast(l); pDst[1].a = static_cast(h); pDst[2].a = static_cast((l * 6 + h) / 7); pDst[3].a = static_cast((l * 5 + h * 2) / 7); pDst[4].a = static_cast((l * 4 + h * 3) / 7); pDst[5].a = static_cast((l * 3 + h * 4) / 7); pDst[6].a = static_cast((l * 2 + h * 5) / 7); pDst[7].a = static_cast((l + h * 6) / 7); return 8; } uint32 dxt5_block::get_block_values(color_quad_u8* pDst, uint32 l, uint32 h) { if (l > h) return get_block_values8(pDst, l, h); else return get_block_values6(pDst, l, h); } uint32 dxt5_block::get_block_values6(uint32* pDst, uint32 l, uint32 h) { pDst[0] = l; pDst[1] = h; pDst[2] = (l * 4 + h) / 5; pDst[3] = (l * 3 + h * 2) / 5; pDst[4] = (l * 2 + h * 3) / 5; pDst[5] = (l + h * 4) / 5; pDst[6] = 0; pDst[7] = 255; return 6; } uint32 dxt5_block::get_block_values8(uint32* pDst, uint32 l, uint32 h) { pDst[0] = l; pDst[1] = h; pDst[2] = (l * 6 + h) / 7; pDst[3] = (l * 5 + h * 2) / 7; pDst[4] = (l * 4 + h * 3) / 7; pDst[5] = (l * 3 + h * 4) / 7; pDst[6] = (l * 2 + h * 5) / 7; pDst[7] = (l + h * 6) / 7; return 8; } uint32 dxt5_block::unpack_endpoint(uint32 packed, uint32 index) { CRND_ASSERT(index < 2); return (packed >> (8 * index)) & 0xFF; } uint32 dxt5_block::pack_endpoints(uint32 lo, uint32 hi) { CRND_ASSERT((lo <= 0xFF) && (hi <= 0xFF)); return lo | (hi << 8U); } uint32 dxt5_block::get_block_values(uint32* pDst, uint32 l, uint32 h) { if (l > h) return get_block_values8(pDst, l, h); else return get_block_values6(pDst, l, h); } } // namespace crnd // File: crnd_decode.cpp namespace crnd { class crn_unpacker { public: inline crn_unpacker() : m_magic(cMagicValue), m_pData(NULL), m_data_size(0), m_pHeader(NULL) { } inline ~crn_unpacker() { m_magic = 0; } inline bool is_valid() const { return m_magic == cMagicValue; } bool init(const void* pData, uint32 data_size) { m_pHeader = crnd_get_header(pData, data_size); if (!m_pHeader) return false; m_pData = static_cast(pData); m_data_size = data_size; if (!init_tables()) return false; if (!decode_palettes()) return false; return true; } bool unpack_level( void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, uint32 level_index) { uint32 cur_level_ofs = m_pHeader->m_level_ofs[level_index]; uint32 next_level_ofs = m_data_size; if ((level_index + 1) < (m_pHeader->m_levels)) next_level_ofs = m_pHeader->m_level_ofs[level_index + 1]; CRND_ASSERT(next_level_ofs > cur_level_ofs); return unpack_level(m_pData + cur_level_ofs, next_level_ofs - cur_level_ofs, pDst, dst_size_in_bytes, row_pitch_in_bytes, level_index); } bool unpack_level( const void* pSrc, uint32 src_size_in_bytes, void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, uint32 level_index) { #ifdef CRND_BUILD_DEBUG for (uint32 f = 0; f < m_pHeader->m_faces; f++) if (!pDst[f]) return false; #endif const uint32 width = math::maximum(m_pHeader->m_width >> level_index, 1U); const uint32 height = math::maximum(m_pHeader->m_height >> level_index, 1U); const uint32 blocks_x = (width + 3U) >> 2U; const uint32 blocks_y = (height + 3U) >> 2U; const uint32 block_size = m_pHeader->m_format == cCRNFmtDXT1 || m_pHeader->m_format == cCRNFmtDXT5A || m_pHeader->m_format == cCRNFmtETC1 || m_pHeader->m_format == cCRNFmtETC2 || m_pHeader->m_format == cCRNFmtETC1S ? 8 : 16; uint32 minimal_row_pitch = block_size * blocks_x; if (!row_pitch_in_bytes) row_pitch_in_bytes = minimal_row_pitch; else if ((row_pitch_in_bytes < minimal_row_pitch) || (row_pitch_in_bytes & 3)) return false; if (dst_size_in_bytes < row_pitch_in_bytes * blocks_y) return false; if (!m_codec.start_decoding(static_cast(pSrc), src_size_in_bytes)) return false; bool status = false; switch (m_pHeader->m_format) { case cCRNFmtDXT1: case cCRNFmtETC1S: status = unpack_dxt1((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); break; case cCRNFmtDXT5: case cCRNFmtDXT5_CCxY: case cCRNFmtDXT5_xGBR: case cCRNFmtDXT5_AGBR: case cCRNFmtDXT5_xGxR: case cCRNFmtETC2AS: status = unpack_dxt5((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); break; case cCRNFmtDXT5A: status = unpack_dxt5a((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); break; case cCRNFmtDXN_XY: case cCRNFmtDXN_YX: status = unpack_dxn((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); break; case cCRNFmtETC1: status = unpack_etc1((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); break; case cCRNFmtETC2: status = unpack_etc1((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); break; case cCRNFmtETC2A: status = unpack_etc2a((uint8**)pDst, row_pitch_in_bytes, blocks_x, blocks_y); break; default: return false; } if (!status) return false; m_codec.stop_decoding(); return true; } inline const void* get_data() const { return m_pData; } inline uint32 get_data_size() const { return m_data_size; } private: enum { cMagicValue = 0x1EF9CABD }; uint32 m_magic; const uint8* m_pData; uint32 m_data_size; const crn_header* m_pHeader; symbol_codec m_codec; static_huffman_data_model m_reference_encoding_dm; static_huffman_data_model m_endpoint_delta_dm[2]; static_huffman_data_model m_selector_delta_dm[2]; crnd::vector m_color_endpoints; crnd::vector m_color_selectors; crnd::vector m_alpha_endpoints; crnd::vector m_alpha_selectors; struct block_buffer_element { uint16 endpoint_reference; uint16 color_endpoint_index; uint16 alpha0_endpoint_index; uint16 alpha1_endpoint_index; }; crnd::vector m_block_buffer; bool init_tables() { if (!m_codec.start_decoding(m_pData + m_pHeader->m_tables_ofs, m_pHeader->m_tables_size)) return false; if (!m_codec.decode_receive_static_data_model(m_reference_encoding_dm)) return false; if ((!m_pHeader->m_color_endpoints.m_num) && (!m_pHeader->m_alpha_endpoints.m_num)) return false; if (m_pHeader->m_color_endpoints.m_num) { if (!m_codec.decode_receive_static_data_model(m_endpoint_delta_dm[0])) return false; if (!m_codec.decode_receive_static_data_model(m_selector_delta_dm[0])) return false; } if (m_pHeader->m_alpha_endpoints.m_num) { if (!m_codec.decode_receive_static_data_model(m_endpoint_delta_dm[1])) return false; if (!m_codec.decode_receive_static_data_model(m_selector_delta_dm[1])) return false; } m_codec.stop_decoding(); return true; } bool decode_palettes() { if (m_pHeader->m_color_endpoints.m_num) { if (!decode_color_endpoints()) return false; if (!decode_color_selectors()) return false; } if (m_pHeader->m_alpha_endpoints.m_num) { if (!decode_alpha_endpoints()) return false; if (!(m_pHeader->m_format == cCRNFmtETC2AS ? decode_alpha_selectors_etcs() : m_pHeader->m_format == cCRNFmtETC2A ? decode_alpha_selectors_etc() : decode_alpha_selectors())) return false; } return true; } bool decode_color_endpoints() { const uint32 num_color_endpoints = m_pHeader->m_color_endpoints.m_num; const bool has_etc_color_blocks = m_pHeader->m_format == cCRNFmtETC1 || m_pHeader->m_format == cCRNFmtETC2 || m_pHeader->m_format == cCRNFmtETC2A || m_pHeader->m_format == cCRNFmtETC1S || m_pHeader->m_format == cCRNFmtETC2AS; const bool has_subblocks = m_pHeader->m_format == cCRNFmtETC1 || m_pHeader->m_format == cCRNFmtETC2 || m_pHeader->m_format == cCRNFmtETC2A; if (!m_color_endpoints.resize(num_color_endpoints)) return false; if (!m_codec.start_decoding(m_pData + m_pHeader->m_color_endpoints.m_ofs, m_pHeader->m_color_endpoints.m_size)) return false; static_huffman_data_model dm[2]; for (uint32 i = 0; i < (has_etc_color_blocks ? 1U : 2U); i++) if (!m_codec.decode_receive_static_data_model(dm[i])) return false; uint32 a = 0, b = 0, c = 0; uint32 d = 0, e = 0, f = 0; uint32* CRND_RESTRICT pDst = &m_color_endpoints[0]; for (uint32 i = 0; i < num_color_endpoints; i++) { if (has_etc_color_blocks) { for (b = 0; b < 32; b += 8) a += m_codec.decode(dm[0]) << b; a &= 0x1F1F1F1F; *pDst++ = has_subblocks ? a : (a & 0x07000000) << 5 | (a & 0x07000000) << 2 | 0x02000000 | (a & 0x001F1F1F) << 3; } else { a = (a + m_codec.decode(dm[0])) & 31; b = (b + m_codec.decode(dm[1])) & 63; c = (c + m_codec.decode(dm[0])) & 31; d = (d + m_codec.decode(dm[0])) & 31; e = (e + m_codec.decode(dm[1])) & 63; f = (f + m_codec.decode(dm[0])) & 31; *pDst++ = c | (b << 5U) | (a << 11U) | (f << 16U) | (e << 21U) | (d << 27U); } } m_codec.stop_decoding(); return true; } bool decode_color_selectors() { const bool has_etc_color_blocks = m_pHeader->m_format == cCRNFmtETC1 || m_pHeader->m_format == cCRNFmtETC2 || m_pHeader->m_format == cCRNFmtETC2A || m_pHeader->m_format == cCRNFmtETC1S || m_pHeader->m_format == cCRNFmtETC2AS; const bool has_subblocks = m_pHeader->m_format == cCRNFmtETC1 || m_pHeader->m_format == cCRNFmtETC2 || m_pHeader->m_format == cCRNFmtETC2A; m_codec.start_decoding(m_pData + m_pHeader->m_color_selectors.m_ofs, m_pHeader->m_color_selectors.m_size); static_huffman_data_model dm; m_codec.decode_receive_static_data_model(dm); m_color_selectors.resize(m_pHeader->m_color_selectors.m_num << (has_subblocks ? 1 : 0)); for (uint32 s = 0, i = 0; i < m_pHeader->m_color_selectors.m_num; i++) { for (uint32 j = 0; j < 32; j += 4) s ^= m_codec.decode(dm) << j; if (has_etc_color_blocks) { for (uint32 selector = (~s & 0xAAAAAAAA) | (~(s ^ s >> 1) & 0x55555555), t = 8, h = 0; h < 4; h++, t -= 15) { for (uint32 w = 0; w < 4; w++, t += 4) { if (has_subblocks) { uint32 s0 = selector >> (w << 3 | h << 1); m_color_selectors[i << 1] |= ((s0 >> 1 & 1) | (s0 & 1) << 16) << (t & 15); } uint32 s1 = selector >> (h << 3 | w << 1); m_color_selectors[has_subblocks ? i << 1 | 1 : i] |= ((s1 >> 1 & 1) | (s1 & 1) << 16) << (t & 15); } } } else { m_color_selectors[i] = ((s ^ s << 1) & 0xAAAAAAAA) | (s >> 1 & 0x55555555); } } m_codec.stop_decoding(); return true; } bool decode_alpha_endpoints() { const uint32 num_alpha_endpoints = m_pHeader->m_alpha_endpoints.m_num; if (!m_codec.start_decoding(m_pData + m_pHeader->m_alpha_endpoints.m_ofs, m_pHeader->m_alpha_endpoints.m_size)) return false; static_huffman_data_model dm; if (!m_codec.decode_receive_static_data_model(dm)) return false; if (!m_alpha_endpoints.resize(num_alpha_endpoints)) return false; uint16* CRND_RESTRICT pDst = &m_alpha_endpoints[0]; uint32 a = 0, b = 0; for (uint32 i = 0; i < num_alpha_endpoints; i++) { a = (a + m_codec.decode(dm)) & 255; b = (b + m_codec.decode(dm)) & 255; *pDst++ = (uint16)(a | (b << 8)); } m_codec.stop_decoding(); return true; } bool decode_alpha_selectors() { m_codec.start_decoding(m_pData + m_pHeader->m_alpha_selectors.m_ofs, m_pHeader->m_alpha_selectors.m_size); static_huffman_data_model dm; m_codec.decode_receive_static_data_model(dm); m_alpha_selectors.resize(m_pHeader->m_alpha_selectors.m_num * 3); uint8 dxt5_from_linear[64]; for (uint32 i = 0; i < 64; i++) dxt5_from_linear[i] = g_dxt5_from_linear[i & 7] | g_dxt5_from_linear[i >> 3] << 3; for (uint32 s0_linear = 0, s1_linear = 0, i = 0; i < m_alpha_selectors.size();) { uint32 s0 = 0, s1 = 0; for (uint32 j = 0; j < 24; s0 |= dxt5_from_linear[s0_linear >> j & 0x3F] << j, j += 6) s0_linear ^= m_codec.decode(dm) << j; for (uint32 j = 0; j < 24; s1 |= dxt5_from_linear[s1_linear >> j & 0x3F] << j, j += 6) s1_linear ^= m_codec.decode(dm) << j; m_alpha_selectors[i++] = s0; m_alpha_selectors[i++] = s0 >> 16 | s1 << 8; m_alpha_selectors[i++] = s1 >> 8; } m_codec.stop_decoding(); return true; } bool decode_alpha_selectors_etc() { m_codec.start_decoding(m_pData + m_pHeader->m_alpha_selectors.m_ofs, m_pHeader->m_alpha_selectors.m_size); static_huffman_data_model dm; m_codec.decode_receive_static_data_model(dm); m_alpha_selectors.resize(m_pHeader->m_alpha_selectors.m_num * 6); uint8 s_linear[8] = {}; uint8* data = (uint8*)m_alpha_selectors.begin(); for (uint i = 0; i < m_alpha_selectors.size(); i += 6, data += 12) { for (uint s_group = 0, p = 0; p < 16; p++) { s_group = p & 1 ? s_group >> 3 : s_linear[p >> 1] ^= m_codec.decode(dm); uint8 s = s_group & 7; if (s <= 3) s = 3 - s; uint8 d = 3 * (p + 1); uint8 byte_offset = d >> 3; uint8 bit_offset = d & 7; data[byte_offset] |= s << (8 - bit_offset); if (bit_offset < 3) data[byte_offset - 1] |= s >> bit_offset; d += 9 * ((p & 3) - (p >> 2)); byte_offset = d >> 3; bit_offset = d & 7; data[byte_offset + 6] |= s << (8 - bit_offset); if (bit_offset < 3) data[byte_offset + 5] |= s >> bit_offset; } } m_codec.stop_decoding(); return true; } bool decode_alpha_selectors_etcs() { m_codec.start_decoding(m_pData + m_pHeader->m_alpha_selectors.m_ofs, m_pHeader->m_alpha_selectors.m_size); static_huffman_data_model dm; m_codec.decode_receive_static_data_model(dm); m_alpha_selectors.resize(m_pHeader->m_alpha_selectors.m_num * 3); uint8 s_linear[8] = {}; uint8* data = (uint8*)m_alpha_selectors.begin(); for (uint i = 0; i < (m_alpha_selectors.size() << 1); i += 6) { for (uint s_group = 0, p = 0; p < 16; p++) { s_group = p & 1 ? s_group >> 3 : s_linear[p >> 1] ^= m_codec.decode(dm); uint8 s = s_group & 7; if (s <= 3) s = 3 - s; uint8 d = 3 * (p + 1) + 9 * ((p & 3) - (p >> 2)); uint8 byte_offset = d >> 3; uint8 bit_offset = d & 7; data[i + byte_offset] |= s << (8 - bit_offset); if (bit_offset < 3) data[i + byte_offset - 1] |= s >> bit_offset; } } m_codec.stop_decoding(); return true; } static inline uint32 tiled_offset_2d_outer(uint32 y, uint32 AlignedWidth, uint32 LogBpp) { uint32 Macro = ((y >> 5) * (AlignedWidth >> 5)) << (LogBpp + 7); uint32 Micro = ((y & 6) << 2) << LogBpp; return Macro + ((Micro & ~15) << 1) + (Micro & 15) + ((y & 8) << (3 + LogBpp)) + ((y & 1) << 4); } static inline uint32 tiled_offset_2d_inner(uint32 x, uint32 y, uint32 LogBpp, uint32 BaseOffset) { uint32 Macro = (x >> 5) << (LogBpp + 7); uint32 Micro = (x & 7) << LogBpp; uint32 Offset = BaseOffset + Macro + ((Micro & ~15) << 1) + (Micro & 15); return ((Offset & ~511) << 3) + ((Offset & 448) << 2) + (Offset & 63) + ((y & 16) << 7) + (((((y & 8) >> 2) + (x >> 3)) & 3) << 6); } static inline void limit(uint& x, uint n) { int v = x - n; int msk = (v >> 31); x = (x & msk) | (v & ~msk); } bool unpack_dxt1(uint8** pDst, uint32 output_pitch_in_bytes, uint32 output_width, uint32 output_height) { const uint32 num_color_endpoints = m_color_endpoints.size(); const uint32 width = (output_width + 1) & ~1; const uint32 height = (output_height + 1) & ~1; const int32 delta_pitch_in_dwords = (output_pitch_in_bytes >> 2) - (width << 1); if (m_block_buffer.size() < width) m_block_buffer.resize(width); uint32 color_endpoint_index = 0; uint8 reference_group = 0; for (uint32 f = 0; f < m_pHeader->m_faces; f++) { uint32* pData = (uint32*)pDst[f]; for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { bool visible = y < output_height; for (uint32 x = 0; x < width; x++, pData += 2) { visible = visible && x < output_width; if (!(y & 1) && !(x & 1)) reference_group = m_codec.decode(m_reference_encoding_dm); block_buffer_element &buffer = m_block_buffer[x]; uint8 endpoint_reference; if (y & 1) { endpoint_reference = buffer.endpoint_reference; } else { endpoint_reference = reference_group & 3; reference_group >>= 2; buffer.endpoint_reference = reference_group & 3; reference_group >>= 2; } if (!endpoint_reference) { color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); if (color_endpoint_index >= num_color_endpoints) color_endpoint_index -= num_color_endpoints; buffer.color_endpoint_index = color_endpoint_index; } else if (endpoint_reference == 1) { buffer.color_endpoint_index = color_endpoint_index; } else { color_endpoint_index = buffer.color_endpoint_index; } uint32 color_selector_index = m_codec.decode(m_selector_delta_dm[0]); if (visible) { pData[0] = m_color_endpoints[color_endpoint_index]; pData[1] = m_color_selectors[color_selector_index]; } } } } return true; } bool unpack_dxt5(uint8** pDst, uint32 row_pitch_in_bytes, uint32 output_width, uint32 output_height) { const uint32 num_color_endpoints = m_color_endpoints.size(); const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); const uint32 width = (output_width + 1) & ~1; const uint32 height = (output_height + 1) & ~1; const int32 delta_pitch_in_dwords = (row_pitch_in_bytes >> 2) - (width << 2); if (m_block_buffer.size() < width) m_block_buffer.resize(width); uint32 color_endpoint_index = 0; uint32 alpha0_endpoint_index = 0; uint8 reference_group = 0; for (uint32 f = 0; f < m_pHeader->m_faces; f++) { uint32* pData = (uint32*)pDst[f]; for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { bool visible = y < output_height; for (uint32 x = 0; x < width; x++, pData += 4) { visible = visible && x < output_width; if (!(y & 1) && !(x & 1)) reference_group = m_codec.decode(m_reference_encoding_dm); block_buffer_element &buffer = m_block_buffer[x]; uint8 endpoint_reference; if (y & 1) { endpoint_reference = buffer.endpoint_reference; } else { endpoint_reference = reference_group & 3; reference_group >>= 2; buffer.endpoint_reference = reference_group & 3; reference_group >>= 2; } if (!endpoint_reference) { color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); if (color_endpoint_index >= num_color_endpoints) color_endpoint_index -= num_color_endpoints; buffer.color_endpoint_index = color_endpoint_index; alpha0_endpoint_index += m_codec.decode(m_endpoint_delta_dm[1]); if (alpha0_endpoint_index >= num_alpha_endpoints) alpha0_endpoint_index -= num_alpha_endpoints; buffer.alpha0_endpoint_index = alpha0_endpoint_index; } else if (endpoint_reference == 1) { buffer.color_endpoint_index = color_endpoint_index; buffer.alpha0_endpoint_index = alpha0_endpoint_index; } else { color_endpoint_index = buffer.color_endpoint_index; alpha0_endpoint_index = buffer.alpha0_endpoint_index; } uint32 color_selector_index = m_codec.decode(m_selector_delta_dm[0]); uint32 alpha0_selector_index = m_codec.decode(m_selector_delta_dm[1]); if (visible) { const uint16* pAlpha0_selectors = &m_alpha_selectors[alpha0_selector_index * 3]; pData[0] = m_alpha_endpoints[alpha0_endpoint_index] | (pAlpha0_selectors[0] << 16); pData[1] = pAlpha0_selectors[1] | (pAlpha0_selectors[2] << 16); pData[2] = m_color_endpoints[color_endpoint_index]; pData[3] = m_color_selectors[color_selector_index]; } } } } return true; } bool unpack_dxn(uint8** pDst, uint32 row_pitch_in_bytes, uint32 output_width, uint32 output_height) { const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); const uint32 width = (output_width + 1) & ~1; const uint32 height = (output_height + 1) & ~1; const int32 delta_pitch_in_dwords = (row_pitch_in_bytes >> 2) - (width << 2); if (m_block_buffer.size() < width) m_block_buffer.resize(width); uint32 alpha0_endpoint_index = 0; uint32 alpha1_endpoint_index = 0; uint8 reference_group = 0; for (uint32 f = 0; f < m_pHeader->m_faces; f++) { uint32* pData = (uint32*)pDst[f]; for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { bool visible = y < output_height; for (uint32 x = 0; x < width; x++, pData += 4) { visible = visible && x < output_width; if (!(y & 1) && !(x & 1)) reference_group = m_codec.decode(m_reference_encoding_dm); block_buffer_element &buffer = m_block_buffer[x]; uint8 endpoint_reference; if (y & 1) { endpoint_reference = buffer.endpoint_reference; } else { endpoint_reference = reference_group & 3; reference_group >>= 2; buffer.endpoint_reference = reference_group & 3; reference_group >>= 2; } if (!endpoint_reference) { alpha0_endpoint_index += m_codec.decode(m_endpoint_delta_dm[1]); if (alpha0_endpoint_index >= num_alpha_endpoints) alpha0_endpoint_index -= num_alpha_endpoints; buffer.alpha0_endpoint_index = alpha0_endpoint_index; alpha1_endpoint_index += m_codec.decode(m_endpoint_delta_dm[1]); if (alpha1_endpoint_index >= num_alpha_endpoints) alpha1_endpoint_index -= num_alpha_endpoints; buffer.alpha1_endpoint_index = alpha1_endpoint_index; } else if (endpoint_reference == 1) { buffer.alpha0_endpoint_index = alpha0_endpoint_index; buffer.alpha1_endpoint_index = alpha1_endpoint_index; } else { alpha0_endpoint_index = buffer.alpha0_endpoint_index; alpha1_endpoint_index = buffer.alpha1_endpoint_index; } uint32 alpha0_selector_index = m_codec.decode(m_selector_delta_dm[1]); uint32 alpha1_selector_index = m_codec.decode(m_selector_delta_dm[1]); if (visible) { const uint16* pAlpha0_selectors = &m_alpha_selectors[alpha0_selector_index * 3]; const uint16* pAlpha1_selectors = &m_alpha_selectors[alpha1_selector_index * 3]; pData[0] = m_alpha_endpoints[alpha0_endpoint_index] | (pAlpha0_selectors[0] << 16); pData[1] = pAlpha0_selectors[1] | (pAlpha0_selectors[2] << 16); pData[2] = m_alpha_endpoints[alpha1_endpoint_index] | (pAlpha1_selectors[0] << 16); pData[3] = pAlpha1_selectors[1] | (pAlpha1_selectors[2] << 16); } } } } return true; } bool unpack_dxt5a(uint8** pDst, uint32 row_pitch_in_bytes, uint32 output_width, uint32 output_height) { const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); const uint32 width = (output_width + 1) & ~1; const uint32 height = (output_height + 1) & ~1; const int32 delta_pitch_in_dwords = (row_pitch_in_bytes >> 2) - (width << 1); if (m_block_buffer.size() < width) m_block_buffer.resize(width); uint32 alpha0_endpoint_index = 0; uint8 reference_group = 0; for (uint32 f = 0; f < m_pHeader->m_faces; f++) { uint32* pData = (uint32*)pDst[f]; for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { bool visible = y < output_height; for (uint32 x = 0; x < width; x++, pData += 2) { visible = visible && x < output_width; if (!(y & 1) && !(x & 1)) reference_group = m_codec.decode(m_reference_encoding_dm); block_buffer_element &buffer = m_block_buffer[x]; uint8 endpoint_reference; if (y & 1) { endpoint_reference = buffer.endpoint_reference; } else { endpoint_reference = reference_group & 3; reference_group >>= 2; buffer.endpoint_reference = reference_group & 3; reference_group >>= 2; } if (!endpoint_reference) { alpha0_endpoint_index += m_codec.decode(m_endpoint_delta_dm[1]); if (alpha0_endpoint_index >= num_alpha_endpoints) alpha0_endpoint_index -= num_alpha_endpoints; buffer.alpha0_endpoint_index = alpha0_endpoint_index; } else if (endpoint_reference == 1) { buffer.alpha0_endpoint_index = alpha0_endpoint_index; } else { alpha0_endpoint_index = buffer.alpha0_endpoint_index; } uint32 alpha0_selector_index = m_codec.decode(m_selector_delta_dm[1]); if (visible) { const uint16* pAlpha0_selectors = &m_alpha_selectors[alpha0_selector_index * 3]; pData[0] = m_alpha_endpoints[alpha0_endpoint_index] | (pAlpha0_selectors[0] << 16); pData[1] = pAlpha0_selectors[1] | (pAlpha0_selectors[2] << 16); } } } } return true; } bool unpack_etc1(uint8** pDst, uint32 output_pitch_in_bytes, uint32 output_width, uint32 output_height) { const uint32 num_color_endpoints = m_color_endpoints.size(); const uint32 width = (output_width + 1) & ~1; const uint32 height = (output_height + 1) & ~1; const int32 delta_pitch_in_dwords = (output_pitch_in_bytes >> 2) - (width << 1); if (m_block_buffer.size() < width << 1) m_block_buffer.resize(width << 1); uint32 color_endpoint_index = 0, diagonal_color_endpoint_index = 0; uint8 reference_group = 0; for (uint32 f = 0; f < m_pHeader->m_faces; f++) { uint32* pData = (uint32*)pDst[f]; for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { bool visible = y < output_height; for (uint32 x = 0; x < width; x++, pData += 2) { visible = visible && x < output_width; block_buffer_element &buffer = m_block_buffer[x << 1]; uint8 endpoint_reference, block_endpoint[4], e0[4], e1[4]; if (y & 1) { endpoint_reference = buffer.endpoint_reference; } else { reference_group = m_codec.decode(m_reference_encoding_dm); endpoint_reference = (reference_group & 3) | (reference_group >> 2 & 12); buffer.endpoint_reference = (reference_group >> 2 & 3) | (reference_group >> 4 & 12); } if (!(endpoint_reference & 3)) { color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); if (color_endpoint_index >= num_color_endpoints) color_endpoint_index -= num_color_endpoints; buffer.color_endpoint_index = color_endpoint_index; } else if ((endpoint_reference & 3) == 1) { buffer.color_endpoint_index = color_endpoint_index; } else if ((endpoint_reference & 3) == 3) { buffer.color_endpoint_index = color_endpoint_index = diagonal_color_endpoint_index; } else { color_endpoint_index = buffer.color_endpoint_index; } endpoint_reference >>= 2; *(uint32*)&e0 = m_color_endpoints[color_endpoint_index]; uint32 selector_index = m_codec.decode(m_selector_delta_dm[0]); if (endpoint_reference) { color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); if (color_endpoint_index >= num_color_endpoints) color_endpoint_index -= num_color_endpoints; } diagonal_color_endpoint_index = m_block_buffer[x << 1 | 1].color_endpoint_index; m_block_buffer[x << 1 | 1].color_endpoint_index = color_endpoint_index; *(uint32*)&e1 = m_color_endpoints[color_endpoint_index]; if (visible) { uint32 flip = endpoint_reference >> 1 ^ 1, diff = 1; for (uint c = 0; diff && c < 3; c++) diff = e0[c] + 3 >= e1[c] && e1[c] + 4 >= e0[c] ? diff : 0; for (uint c = 0; c < 3; c++) block_endpoint[c] = diff ? e0[c] << 3 | ((e1[c] - e0[c]) & 7) : (e0[c] << 3 & 0xF0) | e1[c] >> 1; block_endpoint[3] = e0[3] << 5 | e1[3] << 2 | diff << 1 | flip; pData[0] = *(uint32*)&block_endpoint; pData[1] = m_color_selectors[selector_index << 1 | flip]; } } } } return true; } bool unpack_etc2a(uint8** pDst, uint32 output_pitch_in_bytes, uint32 output_width, uint32 output_height) { const uint32 num_color_endpoints = m_color_endpoints.size(); const uint32 num_alpha_endpoints = m_alpha_endpoints.size(); const uint32 width = (output_width + 1) & ~1; const uint32 height = (output_height + 1) & ~1; const int32 delta_pitch_in_dwords = (output_pitch_in_bytes >> 2) - (width << 2); if (m_block_buffer.size() < width << 1) m_block_buffer.resize(width << 1); uint32 color_endpoint_index = 0, diagonal_color_endpoint_index = 0, alpha0_endpoint_index = 0, diagonal_alpha0_endpoint_index = 0; uint8 reference_group = 0; for (uint32 f = 0; f < m_pHeader->m_faces; f++) { uint32* pData = (uint32*)pDst[f]; for (uint32 y = 0; y < height; y++, pData += delta_pitch_in_dwords) { bool visible = y < output_height; for (uint32 x = 0; x < width; x++, pData += 4) { visible = visible && x < output_width; block_buffer_element &buffer = m_block_buffer[x << 1]; uint8 endpoint_reference, block_endpoint[4], e0[4], e1[4]; if (y & 1) { endpoint_reference = buffer.endpoint_reference; } else { reference_group = m_codec.decode(m_reference_encoding_dm); endpoint_reference = (reference_group & 3) | (reference_group >> 2 & 12); buffer.endpoint_reference = (reference_group >> 2 & 3) | (reference_group >> 4 & 12); } if (!(endpoint_reference & 3)) { color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); if (color_endpoint_index >= num_color_endpoints) color_endpoint_index -= num_color_endpoints; alpha0_endpoint_index += m_codec.decode(m_endpoint_delta_dm[1]); if (alpha0_endpoint_index >= num_alpha_endpoints) alpha0_endpoint_index -= num_alpha_endpoints; buffer.color_endpoint_index = color_endpoint_index; buffer.alpha0_endpoint_index = alpha0_endpoint_index; } else if ((endpoint_reference & 3) == 1) { buffer.color_endpoint_index = color_endpoint_index; buffer.alpha0_endpoint_index = alpha0_endpoint_index; } else if ((endpoint_reference & 3) == 3) { buffer.color_endpoint_index = color_endpoint_index = diagonal_color_endpoint_index; buffer.alpha0_endpoint_index = alpha0_endpoint_index = diagonal_alpha0_endpoint_index; } else { color_endpoint_index = buffer.color_endpoint_index; alpha0_endpoint_index = buffer.alpha0_endpoint_index; } endpoint_reference >>= 2; *(uint32*)&e0 = m_color_endpoints[color_endpoint_index]; uint32 color_selector_index = m_codec.decode(m_selector_delta_dm[0]); uint32 alpha0_selector_index = m_codec.decode(m_selector_delta_dm[1]); if (endpoint_reference) { color_endpoint_index += m_codec.decode(m_endpoint_delta_dm[0]); if (color_endpoint_index >= num_color_endpoints) color_endpoint_index -= num_color_endpoints; } *(uint32*)&e1 = m_color_endpoints[color_endpoint_index]; diagonal_color_endpoint_index = m_block_buffer[x << 1 | 1].color_endpoint_index; diagonal_alpha0_endpoint_index = m_block_buffer[x << 1 | 1].alpha0_endpoint_index; m_block_buffer[x << 1 | 1].color_endpoint_index = color_endpoint_index; m_block_buffer[x << 1 | 1].alpha0_endpoint_index = alpha0_endpoint_index; if (visible) { uint32 flip = endpoint_reference >> 1 ^ 1, diff = 1; for (uint c = 0; diff && c < 3; c++) diff = e0[c] + 3 >= e1[c] && e1[c] + 4 >= e0[c] ? diff : 0; for (uint c = 0; c < 3; c++) block_endpoint[c] = diff ? e0[c] << 3 | ((e1[c] - e0[c]) & 7) : (e0[c] << 3 & 0xF0) | e1[c] >> 1; block_endpoint[3] = e0[3] << 5 | e1[3] << 2 | diff << 1 | flip; const uint16* pAlpha0_selectors = &m_alpha_selectors[alpha0_selector_index * 6 + (flip ? 3 : 0)]; pData[0] = m_alpha_endpoints[alpha0_endpoint_index] | pAlpha0_selectors[0] << 16; pData[1] = pAlpha0_selectors[1] | pAlpha0_selectors[2] << 16; pData[2] = *(uint32*)&block_endpoint; pData[3] = m_color_selectors[color_selector_index << 1 | flip]; } } } } return true; } }; crnd_unpack_context crnd_unpack_begin(const void* pData, uint32 data_size) { if ((!pData) || (data_size < cCRNHeaderMinSize)) return NULL; crn_unpacker* p = crnd_new(); if (!p) return NULL; if (!p->init(pData, data_size)) { crnd_delete(p); return NULL; } return p; } bool crnd_get_data(crnd_unpack_context pContext, const void** ppData, uint32* pData_size) { if (!pContext) return false; crn_unpacker* pUnpacker = static_cast(pContext); if (!pUnpacker->is_valid()) return false; if (ppData) *ppData = pUnpacker->get_data(); if (pData_size) *pData_size = pUnpacker->get_data_size(); return true; } bool crnd_unpack_level( crnd_unpack_context pContext, void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, uint32 level_index) { if ((!pContext) || (!pDst) || (dst_size_in_bytes < 8U) || (level_index >= cCRNMaxLevels)) return false; crn_unpacker* pUnpacker = static_cast(pContext); if (!pUnpacker->is_valid()) return false; return pUnpacker->unpack_level(pDst, dst_size_in_bytes, row_pitch_in_bytes, level_index); } bool crnd_unpack_level_segmented( crnd_unpack_context pContext, const void* pSrc, uint32 src_size_in_bytes, void** pDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, uint32 level_index) { if ((!pContext) || (!pSrc) || (!pDst) || (dst_size_in_bytes < 8U) || (level_index >= cCRNMaxLevels)) return false; crn_unpacker* pUnpacker = static_cast(pContext); if (!pUnpacker->is_valid()) return false; return pUnpacker->unpack_level(pSrc, src_size_in_bytes, pDst, dst_size_in_bytes, row_pitch_in_bytes, level_index); } bool crnd_unpack_end(crnd_unpack_context pContext) { if (!pContext) return false; crn_unpacker* pUnpacker = static_cast(pContext); if (!pUnpacker->is_valid()) return false; crnd_delete(pUnpacker); return true; } } // namespace crnd #endif // CRND_INCLUDE_CRND_H //------------------------------------------------------------------------------ // // crn_decomp.h uses the ZLIB license: // http://opensource.org/licenses/Zlib // // Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC // // This software is provided 'as-is', without any express or implied // warranty. In no event will the authors be held liable for any damages // arising from the use of this software. // // Permission is granted to anyone to use this software for any purpose, // including commercial applications, and to alter it and redistribute it // freely, subject to the following restrictions: // // 1. The origin of this software must not be misrepresented; you must not // claim that you wrote the original software. If you use this software // in a product, an acknowledgment in the product documentation would be // appreciated but is not required. // // 2. Altered source versions must be plainly marked as such, and must not be // misrepresented as being the original software. // // 3. This notice may not be removed or altered from any source distribution. // //------------------------------------------------------------------------------ DaemonEngine-crunch-ef4d32f/inc/crn_defs.h000066400000000000000000000276441503722002600205410ustar00rootroot00000000000000#ifndef CRND_INCLUDE_CRN_DEFS_H #define CRND_INCLUDE_CRN_DEFS_H // Include crnlib.h (only to bring in some basic CRN-related types). #include "crnlib.h" #define CRND_LIB_VERSION 104 #define CRND_VERSION_STRING "01.04" #ifdef _DEBUG #define CRND_BUILD_DEBUG #else #define CRND_BUILD_RELEASE #endif // CRN decompression API namespace crnd { typedef unsigned char uint8; typedef signed char int8; typedef unsigned short uint16; typedef signed short int16; typedef unsigned int uint32; typedef uint32 uint32; typedef unsigned int uint; typedef signed int int32; #ifdef __GNUC__ typedef unsigned long long uint64; typedef long long int64; #else typedef unsigned __int64 uint64; typedef signed __int64 int64; #endif // The crnd library assumes all allocation blocks have at least CRND_MIN_ALLOC_ALIGNMENT alignment. const uint32 CRND_MIN_ALLOC_ALIGNMENT = sizeof(uint32) * 2U; // realloc callback: // Used to allocate, resize, or free memory blocks. // If p is NULL, the realloc function attempts to allocate a block of at least size bytes. Returns NULL on out of memory. // *pActual_size must be set to the actual size of the allocated block, which must be greater than or equal to the requested size. // If p is not NULL, and size is 0, the realloc function frees the specified block, and always returns NULL. *pActual_size should be set to 0. // If p is not NULL, and size is non-zero, the realloc function attempts to resize the specified block: // If movable is false, the realloc function attempts to shrink or expand the block in-place. NULL is returned if the block cannot be resized in place, or if the // underlying heap implementation doesn't support in-place resizing. Otherwise, the pointer to the original block is returned. // If movable is true, it is permissible to move the block's contents if it cannot be resized in place. NULL is returned if the block cannot be resized in place, and there // is not enough memory to relocate the block. // In all cases, *pActual_size must be set to the actual size of the allocated block, whether it was successfully resized or not. typedef void* (*crnd_realloc_func)(void* p, size_t size, size_t* pActual_size, bool movable, void* pUser_data); // msize callback: Returns the size of the memory block in bytes, or 0 if the pointer or block is invalid. typedef size_t (*crnd_msize_func)(void* p, void* pUser_data); // crnd_set_memory_callbacks() - Use to override the crnd library's memory allocation functions. // If any input parameters are NULL, the memory callback functions are reset to the default functions. // The default functions call malloc(), free(), _msize(), _expand(), etc. void crnd_set_memory_callbacks(crnd_realloc_func pRealloc, crnd_msize_func pMSize, void* pUser_data); struct crn_file_info { inline crn_file_info() : m_struct_size(sizeof(crn_file_info)) {} uint32 m_struct_size; uint32 m_actual_data_size; uint32 m_header_size; uint32 m_total_palette_size; uint32 m_tables_size; uint32 m_levels; uint32 m_level_compressed_size[cCRNMaxLevels]; uint32 m_color_endpoint_palette_entries; uint32 m_color_selector_palette_entries; uint32 m_alpha_endpoint_palette_entries; uint32 m_alpha_selector_palette_entries; }; struct crn_texture_info { inline crn_texture_info() : m_struct_size(sizeof(crn_texture_info)) {} uint32 m_struct_size; uint32 m_width; uint32 m_height; uint32 m_levels; uint32 m_faces; uint32 m_bytes_per_block; uint32 m_userdata0; uint32 m_userdata1; crn_format m_format; }; struct crn_level_info { inline crn_level_info() : m_struct_size(sizeof(crn_level_info)) {} uint32 m_struct_size; uint32 m_width; uint32 m_height; uint32 m_faces; uint32 m_blocks_x; uint32 m_blocks_y; uint32 m_bytes_per_block; crn_format m_format; }; // Returns the FOURCC format code corresponding to the specified CRN format. uint32 crnd_crn_format_to_fourcc(crn_format fmt); // Returns the fundamental GPU format given a potentially swizzled DXT5 crn_format. crn_format crnd_get_fundamental_dxt_format(crn_format fmt); // Returns the size of the crn_format in bits/texel (either 4 or 8). uint32 crnd_get_crn_format_bits_per_texel(crn_format fmt); // Returns the number of bytes per DXTn block (8 or 16). uint32 crnd_get_bytes_per_dxt_block(crn_format fmt); // Validates the file header, performing a CRC check on it and verifying that there is // e.g. a sane number of levels. // pData/data_size need only include the header, but may be the whole file. // The crn_file_info.m_struct_size field must be set before calling this function. bool crnd_validate_header(const void* pData, uint32 data_size, crn_file_info* pFile_info); // Validates the entire file by calling crnd_validate_header, then checking the data CRC // for the whole file. // This is not something you want to be doing much! // The crn_file_info.m_struct_size field must be set before calling this function. bool crnd_validate_file(const void* pData, uint32 data_size, crn_file_info* pFile_info); // Retrieves texture information from the CRN file. // The crn_texture_info.m_struct_size field must be set before calling this function. bool crnd_get_texture_info(const void* pData, uint32 data_size, crn_texture_info* pTexture_info); // Retrieves mipmap level specific information from the CRN file. // The crn_level_info.m_struct_size field must be set before calling this function. bool crnd_get_level_info(const void* pData, uint32 data_size, uint32 level_index, crn_level_info* pLevel_info); // Transcode/unpack context handle. typedef void* crnd_unpack_context; // crnd_unpack_begin() - Decompresses the texture's decoder tables and endpoint/selector palettes. // Once you call this function, you may call crnd_unpack_level() to unpack one or more mip levels. // Don't call this once per mip level (unless you absolutely must)! // This function allocates enough memory to hold: Huffman decompression tables, and the endpoint/selector palettes (color and/or alpha). // Worst case allocation is approx. 200k, assuming all palettes contain 8192 entries. // pData must point to a buffer holding all of the compressed .CRN file data. // This buffer must be stable until crnd_unpack_end() is called. // Returns NULL if out of memory, or if any of the input parameters are invalid. crnd_unpack_context crnd_unpack_begin(const void* pData, uint32 data_size); // Returns a pointer to the compressed .CRN data associated with a crnd_unpack_context. // Returns false if any of the input parameters are invalid. bool crnd_get_data(crnd_unpack_context pContext, const void** ppData, uint32* pData_size); // crnd_unpack_level() - Transcodes the specified mipmap level to a destination buffer in cached or write combined memory. // pContext - Context created by a call to crnd_unpack_begin(). // ppDst - A pointer to an array of 1 or 6 destination buffer pointers. Cubemaps require an array of 6 pointers, 2D textures require an array of 1 pointer. // dst_size_in_bytes - Optional size of each destination buffer. Only used for debugging - OK to set to UINT32_MAX. // row_pitch_in_bytes - The pitch in bytes from one row of DXT blocks to the next. Must be a multiple of 4. // level_index - mipmap level index, where 0 is the largest/first level. // Returns false if any of the input parameters, or the compressed stream, are invalid. // This function does not allocate any memory. bool crnd_unpack_level( crnd_unpack_context pContext, void** ppDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, uint32 level_index); // crnd_unpack_level_segmented() - Unpacks the specified mipmap level from a "segmented" CRN file. // See the crnd_create_segmented_file() API below. // Segmented files allow the user to control where the compressed mipmap data is stored. bool crnd_unpack_level_segmented( crnd_unpack_context pContext, const void* pSrc, uint32 src_size_in_bytes, void** ppDst, uint32 dst_size_in_bytes, uint32 row_pitch_in_bytes, uint32 level_index); // crnd_unpack_end() - Frees the decompress tables and unpacked palettes associated with the specified unpack context. // Returns false if the context is NULL, or if it points to an invalid context. // This function frees all memory associated with the context. bool crnd_unpack_end(crnd_unpack_context pContext); // The following API's allow the user to create "segmented" CRN files. A segmented file contains multiple pieces: // - Base data: Header + compression tables // - Level data: Individual mipmap levels // This allows mipmap levels from multiple CRN files to be tightly packed together into single files. // Returns a pointer to the level's compressed data, and optionally returns the level's compressed data size if pSize is not NULL. const void* crnd_get_level_data(const void* pData, uint32 data_size, uint32 level_index, uint32* pSize); // Returns the compressed size of the texture's header and compression tables (but no levels). uint32 crnd_get_segmented_file_size(const void* pData, uint32 data_size); // Creates a "segmented" CRN texture from a normal CRN texture. The new texture will be created at pBase_data, and will be crnd_get_base_data_size() bytes long. // base_data_size must be >= crnd_get_base_data_size(). // The base data will contain the CRN header and compression tables, but no mipmap data. bool crnd_create_segmented_file(const void* pData, uint32 data_size, void* pBase_data, uint base_data_size); } // namespace crnd // Low-level CRN file header cracking. namespace crnd { template struct crn_packed_uint { inline crn_packed_uint() {} inline crn_packed_uint(unsigned int val) { *this = val; } inline crn_packed_uint(const crn_packed_uint& other) { *this = other; } inline crn_packed_uint& operator=(const crn_packed_uint& rhs) { if (this != &rhs) memcpy(m_buf, rhs.m_buf, sizeof(m_buf)); return *this; } inline crn_packed_uint& operator=(unsigned int val) { //CRND_ASSERT((N == 4U) || (val < (1U << (N * 8U)))); val <<= (8U * (4U - N)); for (unsigned int i = 0; i < N; i++) { m_buf[i] = static_cast(val >> 24U); val <<= 8U; } return *this; } inline operator unsigned int() const { switch (N) { case 1: return m_buf[0]; case 2: return (m_buf[0] << 8U) | m_buf[1]; case 3: return (m_buf[0] << 16U) | (m_buf[1] << 8U) | (m_buf[2]); default: return (m_buf[0] << 24U) | (m_buf[1] << 16U) | (m_buf[2] << 8U) | (m_buf[3]); } } unsigned char m_buf[N]; }; #pragma pack(push) #pragma pack(1) struct crn_palette { crn_packed_uint<3> m_ofs; crn_packed_uint<3> m_size; crn_packed_uint<2> m_num; }; enum crn_header_flags { // If set, the compressed mipmap level data is not located after the file's base data - it will be separately managed by the user instead. cCRNHeaderFlagSegmented = 1 }; struct crn_header { enum { cCRNSigValue = ('H' << 8) | 'x' }; crn_packed_uint<2> m_sig; crn_packed_uint<2> m_header_size; crn_packed_uint<2> m_header_crc16; crn_packed_uint<4> m_data_size; crn_packed_uint<2> m_data_crc16; crn_packed_uint<2> m_width; crn_packed_uint<2> m_height; crn_packed_uint<1> m_levels; crn_packed_uint<1> m_faces; crn_packed_uint<1> m_format; crn_packed_uint<2> m_flags; crn_packed_uint<4> m_reserved; crn_packed_uint<4> m_userdata0; crn_packed_uint<4> m_userdata1; crn_palette m_color_endpoints; crn_palette m_color_selectors; crn_palette m_alpha_endpoints; crn_palette m_alpha_selectors; crn_packed_uint<2> m_tables_size; crn_packed_uint<3> m_tables_ofs; // m_level_ofs[] is actually an array of offsets: m_level_ofs[m_levels] crn_packed_uint<4> m_level_ofs[1]; }; const unsigned int cCRNHeaderMinSize = 62U; #pragma pack(pop) } // namespace crnd #endif // CRND_INCLUDE_CRN_DEFS_H DaemonEngine-crunch-ef4d32f/inc/crnlib.h000066400000000000000000000621541503722002600202220ustar00rootroot00000000000000// File: crnlib.h - Advanced DXTn texture compression library. // Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC // See copyright notice and license at the end of this file. // // This header file contains the public crnlib declarations for DXTn, // clustered DXTn, and CRN compression/decompression. // // Note: This library does NOT need to be linked into your game executable if // all you want to do is transcode .CRN files to raw DXTn bits at run-time. // The crn_decomp.h header file library contains all the code necessary for // decompression. // // Important: If compiling with gcc, be sure strict aliasing is disabled: -fno-strict-aliasing #ifndef CRNLIB_H #define CRNLIB_H #ifdef _MSC_VER #pragma warning(disable : 4127) // conditional expression is constant #endif #define CRNLIB_VERSION 104 #define CRNLIB_SUPPORT_ATI_COMPRESS 0 #define CRNLIB_SUPPORT_SQUISH 0 typedef unsigned char crn_uint8; typedef unsigned short crn_uint16; typedef unsigned int crn_uint32; typedef signed char crn_int8; typedef signed short crn_int16; typedef signed int crn_int32; typedef unsigned int crn_bool; // crnlib can compress to these file types. enum crn_file_type { // .CRN cCRNFileTypeCRN = 0, // .DDS using regular DXT or clustered DXT cCRNFileTypeDDS, cCRNFileTypeForceDWORD = 0xFFFFFFFF }; // Supported compressed pixel formats. // Basically all the standard DX9 formats, with some swizzled DXT5 formats // (most of them supported by ATI's Compressonator), along with some ATI/X360 GPU specific formats. enum crn_format { cCRNFmtInvalid = -1, cCRNFmtDXT1 = 0, cCRNFmtFirstValid = cCRNFmtDXT1, // cCRNFmtDXT3 is not currently supported when writing to CRN - only DDS. cCRNFmtDXT3, cCRNFmtDXT5, // Various DXT5 derivatives cCRNFmtDXT5_CCxY, // Luma-chroma cCRNFmtDXT5_xGxR, // Swizzled 2-component cCRNFmtDXT5_xGBR, // Swizzled 3-component cCRNFmtDXT5_AGBR, // Swizzled 4-component // ATI 3DC and X360 DXN cCRNFmtDXN_XY, cCRNFmtDXN_YX, // DXT5 alpha blocks only cCRNFmtDXT5A, cCRNFmtETC1, cCRNFmtETC2, cCRNFmtETC2A, cCRNFmtETC1S, cCRNFmtETC2AS, cCRNFmtTotal, cCRNFmtForceDWORD = 0xFFFFFFFF }; // Various library/file format limits. enum crn_limits { // Max. mipmap level resolution on any axis. cCRNMaxLevelResolution = 4096, cCRNMinPaletteSize = 8, cCRNMaxPaletteSize = 8192, cCRNMaxFaces = 6, cCRNMaxLevels = 16, cCRNMaxHelperThreads = 15, cCRNMinQualityLevel = 0, cCRNMaxQualityLevel = 255 }; // CRN/DDS compression flags. // See the m_flags member in the crn_comp_params struct, below. enum crn_comp_flags { // Enables perceptual colorspace distance metrics if set. // Important: Be sure to disable this when compressing non-sRGB colorspace images, like normal maps! // Default: Set cCRNCompFlagPerceptual = 1, // Enables (up to) 8x8 macroblock usage if set. If disabled, only 4x4 blocks are allowed. // Compression ratio will be lower when disabled, but may cut down on blocky artifacts because the process used to determine // where large macroblocks can be used without artifacts isn't perfect. // Default: Set. cCRNCompFlagHierarchical = 2, // cCRNCompFlagQuick disables several output file optimizations - intended for things like quicker previews. // Default: Not set. cCRNCompFlagQuick = 4, // DXT1: OK to use DXT1 alpha blocks for better quality or DXT1A transparency. // DXT5: OK to use both DXT5 block types. // Currently only used when writing to .DDS files, as .CRN uses only a subset of the possible DXTn block types. // Default: Set. cCRNCompFlagUseBothBlockTypes = 8, // OK to use DXT1A transparent indices to encode black (assumes pixel shader ignores fetched alpha). // Currently only used when writing to .DDS files, .CRN never uses alpha blocks. // Default: Not set. cCRNCompFlagUseTransparentIndicesForBlack = 16, // Disables endpoint caching, for more deterministic output. // Currently only used when writing to .DDS files. // Default: Not set. cCRNCompFlagDisableEndpointCaching = 32, // If enabled, use the cCRNColorEndpointPaletteSize, etc. params to control the CRN palette sizes. Only useful when writing to .CRN files. // Default: Not set. cCRNCompFlagManualPaletteSizes = 64, // If enabled, DXT1A alpha blocks are used to encode single bit transparency. // Default: Not set. cCRNCompFlagDXT1AForTransparency = 128, // If enabled, the DXT1 compressor's color distance metric assumes the pixel shader will be converting the fetched RGB results to luma (Y part of YCbCr). // This increases quality when compressing grayscale images, because the compressor can spread the luma error amoung all three channels (i.e. it can generate blocks // with some chroma present if doing so will ultimately lead to lower luma error). // Only enable on grayscale source images. // Default: Not set. cCRNCompFlagGrayscaleSampling = 256, // If enabled, debug information will be output during compression. // Default: Not set. cCRNCompFlagDebugging = 0x80000000, cCRNCompFlagForceDWORD = 0xFFFFFFFF }; // Controls DXTn quality vs. speed control - only used when compressing to .DDS. enum crn_dxt_quality { cCRNDXTQualitySuperFast, cCRNDXTQualityFast, cCRNDXTQualityNormal, cCRNDXTQualityBetter, cCRNDXTQualityUber, cCRNDXTQualityTotal, cCRNDXTQualityForceDWORD = 0xFFFFFFFF }; // Which DXTn compressor to use when compressing to plain (non-clustered) .DDS. enum crn_dxt_compressor_type { cCRNDXTCompressorCRN, // Use crnlib's ETC1 or DXTc block compressor (default, highest quality, comparable or better than ati_compress or squish, and crnlib's ETC1 is a lot fasterw with similiar quality to Erricson's) cCRNDXTCompressorCRNF, // Use crnlib's "fast" DXTc block compressor cCRNDXTCompressorRYG, // Use RYG's DXTc block compressor (low quality, but very fast) #if CRNLIB_SUPPORT_ATI_COMPRESS cCRNDXTCompressorATI, #endif #if CRNLIB_SUPPORT_SQUISH cCRNDXTCompressorSquish, #endif cCRNTotalDXTCompressors, cCRNDXTCompressorForceDWORD = 0xFFFFFFFF }; // Progress callback function. // Processing will stop prematurely (and fail) if the callback returns false. // phase_index, total_phases - high level progress // subphase_index, total_subphases - progress within current phase typedef crn_bool (*crn_progress_callback_func)(crn_uint32 phase_index, crn_uint32 total_phases, crn_uint32 subphase_index, crn_uint32 total_subphases, void* pUser_data_ptr); // CRN/DDS compression parameters struct. struct crn_comp_params { inline crn_comp_params() { clear(); } // Clear struct to default parameters. inline void clear() { m_size_of_obj = sizeof(*this); m_file_type = cCRNFileTypeCRN; m_faces = 1; m_width = 0; m_height = 0; m_levels = 1; m_format = cCRNFmtDXT1; m_flags = cCRNCompFlagPerceptual | cCRNCompFlagHierarchical | cCRNCompFlagUseBothBlockTypes; for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) m_pImages[f][l] = NULL; m_target_bitrate = 0.0f; m_quality_level = cCRNMaxQualityLevel; m_dxt1a_alpha_threshold = 128; m_dxt_quality = cCRNDXTQualityUber; m_dxt_compressor_type = cCRNDXTCompressorCRN; m_alpha_component = 3; m_crn_adaptive_tile_color_psnr_derating = 2.0f; m_crn_adaptive_tile_alpha_psnr_derating = 2.0f; m_crn_color_endpoint_palette_size = 0; m_crn_color_selector_palette_size = 0; m_crn_alpha_endpoint_palette_size = 0; m_crn_alpha_selector_palette_size = 0; m_num_helper_threads = 0; m_userdata0 = 1; m_userdata1 = 0; m_pProgress_func = NULL; m_pProgress_func_data = NULL; } inline bool operator==(const crn_comp_params& rhs) const { #define CRNLIB_COMP(x) \ do { \ if ((x) != (rhs.x)) \ return false; \ } while (0) CRNLIB_COMP(m_size_of_obj); CRNLIB_COMP(m_file_type); CRNLIB_COMP(m_faces); CRNLIB_COMP(m_width); CRNLIB_COMP(m_height); CRNLIB_COMP(m_levels); CRNLIB_COMP(m_format); CRNLIB_COMP(m_flags); CRNLIB_COMP(m_target_bitrate); CRNLIB_COMP(m_quality_level); CRNLIB_COMP(m_dxt1a_alpha_threshold); CRNLIB_COMP(m_dxt_quality); CRNLIB_COMP(m_dxt_compressor_type); CRNLIB_COMP(m_alpha_component); CRNLIB_COMP(m_crn_adaptive_tile_color_psnr_derating); CRNLIB_COMP(m_crn_adaptive_tile_alpha_psnr_derating); CRNLIB_COMP(m_crn_color_endpoint_palette_size); CRNLIB_COMP(m_crn_color_selector_palette_size); CRNLIB_COMP(m_crn_alpha_endpoint_palette_size); CRNLIB_COMP(m_crn_alpha_selector_palette_size); CRNLIB_COMP(m_num_helper_threads); CRNLIB_COMP(m_userdata0); CRNLIB_COMP(m_userdata1); CRNLIB_COMP(m_pProgress_func); CRNLIB_COMP(m_pProgress_func_data); for (crn_uint32 f = 0; f < cCRNMaxFaces; f++) for (crn_uint32 l = 0; l < cCRNMaxLevels; l++) CRNLIB_COMP(m_pImages[f][l]); #undef CRNLIB_COMP return true; } // Returns true if the input parameters are reasonable. inline bool check() const { if ((m_file_type > cCRNFileTypeDDS) || (((int)m_quality_level < (int)cCRNMinQualityLevel) || ((int)m_quality_level > (int)cCRNMaxQualityLevel)) || (m_dxt1a_alpha_threshold > 255) || ((m_faces != 1) && (m_faces != 6)) || ((m_width < 1) || (m_width > cCRNMaxLevelResolution)) || ((m_height < 1) || (m_height > cCRNMaxLevelResolution)) || ((m_levels < 1) || (m_levels > cCRNMaxLevels)) || ((m_format < cCRNFmtDXT1) || (m_format >= cCRNFmtTotal)) || ((m_crn_color_endpoint_palette_size) && ((m_crn_color_endpoint_palette_size < cCRNMinPaletteSize) || (m_crn_color_endpoint_palette_size > cCRNMaxPaletteSize))) || ((m_crn_color_selector_palette_size) && ((m_crn_color_selector_palette_size < cCRNMinPaletteSize) || (m_crn_color_selector_palette_size > cCRNMaxPaletteSize))) || ((m_crn_alpha_endpoint_palette_size) && ((m_crn_alpha_endpoint_palette_size < cCRNMinPaletteSize) || (m_crn_alpha_endpoint_palette_size > cCRNMaxPaletteSize))) || ((m_crn_alpha_selector_palette_size) && ((m_crn_alpha_selector_palette_size < cCRNMinPaletteSize) || (m_crn_alpha_selector_palette_size > cCRNMaxPaletteSize))) || (m_alpha_component > 3) || (m_num_helper_threads > cCRNMaxHelperThreads) || (m_dxt_quality > cCRNDXTQualityUber) || (m_dxt_compressor_type >= cCRNTotalDXTCompressors)) { return false; } return true; } // Helper to set/get flags from m_flags member. inline bool get_flag(crn_comp_flags flag) const { return (m_flags & flag) != 0; } inline void set_flag(crn_comp_flags flag, bool val) { m_flags &= ~flag; if (val) m_flags |= flag; } crn_uint32 m_size_of_obj; crn_file_type m_file_type; // Output file type: cCRNFileTypeCRN or cCRNFileTypeDDS. crn_uint32 m_faces; // 1 (2D map) or 6 (cubemap) crn_uint32 m_width; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK crn_uint32 m_height; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK crn_uint32 m_levels; // [1,cCRNMaxLevelResolution], non-power of 2 OK, non-square OK crn_format m_format; // Output pixel format. crn_uint32 m_flags; // see crn_comp_flags enum // Array of pointers to 32bpp input images. const crn_uint32* m_pImages[cCRNMaxFaces][cCRNMaxLevels]; // Target bitrate - if non-zero, the compressor will use an interpolative search to find the // highest quality level that is <= the target bitrate. If it fails to find a bitrate high enough, it'll // try disabling adaptive block sizes (cCRNCompFlagHierarchical flag) and redo the search. This process can be pretty slow. float m_target_bitrate; // Desired quality level. // Currently, CRN and DDS quality levels are not compatible with eachother from an image quality standpoint. crn_uint32 m_quality_level; // [cCRNMinQualityLevel, cCRNMaxQualityLevel] // DXTn compression parameters. crn_uint32 m_dxt1a_alpha_threshold; crn_dxt_quality m_dxt_quality; crn_dxt_compressor_type m_dxt_compressor_type; // Alpha channel's component. Defaults to 3. crn_uint32 m_alpha_component; // Various low-level CRN specific parameters. float m_crn_adaptive_tile_color_psnr_derating; float m_crn_adaptive_tile_alpha_psnr_derating; crn_uint32 m_crn_color_endpoint_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] crn_uint32 m_crn_color_selector_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] crn_uint32 m_crn_alpha_endpoint_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] crn_uint32 m_crn_alpha_selector_palette_size; // [cCRNMinPaletteSize,cCRNMaxPaletteSize] // Number of helper threads to create during compression. 0=no threading. crn_uint32 m_num_helper_threads; // CRN userdata0 and userdata1 members, which are written directly to the header of the output file. crn_uint32 m_userdata0; crn_uint32 m_userdata1; // User provided progress callback. crn_progress_callback_func m_pProgress_func; void* m_pProgress_func_data; }; // Mipmap generator's mode. enum crn_mip_mode { cCRNMipModeUseSourceOrGenerateMips, // Use source texture's mipmaps if it has any, otherwise generate new mipmaps cCRNMipModeUseSourceMips, // Use source texture's mipmaps if it has any, otherwise the output has no mipmaps cCRNMipModeGenerateMips, // Always generate new mipmaps cCRNMipModeNoMips, // Output texture has no mipmaps cCRNMipModeTotal, cCRNModeForceDWORD = 0xFFFFFFFF }; const char* crn_get_mip_mode_desc(crn_mip_mode m); const char* crn_get_mip_mode_name(crn_mip_mode m); // Mipmap generator's filter kernel. enum crn_mip_filter { cCRNMipFilterBox, cCRNMipFilterTent, cCRNMipFilterLanczos4, cCRNMipFilterMitchell, cCRNMipFilterKaiser, // Kaiser=default mipmap filter cCRNMipFilterTotal, cCRNMipFilterForceDWORD = 0xFFFFFFFF }; const char* crn_get_mip_filter_name(crn_mip_filter f); // Mipmap generator's scale mode. enum crn_scale_mode { cCRNSMDisabled, cCRNSMAbsolute, cCRNSMRelative, cCRNSMLowerPow2, cCRNSMNearestPow2, cCRNSMNextPow2, cCRNSMTotal, cCRNSMForceDWORD = 0xFFFFFFFF }; const char* crn_get_scale_mode_desc(crn_scale_mode sm); // Mipmap generator parameters. struct crn_mipmap_params { inline crn_mipmap_params() { clear(); } inline void clear() { m_size_of_obj = sizeof(*this); m_mode = cCRNMipModeUseSourceOrGenerateMips; m_filter = cCRNMipFilterKaiser; m_gamma_filtering = true; m_gamma = 2.2f; // Default "blurriness" factor of .9 actually sharpens the output a little. m_blurriness = .9f; m_renormalize = false; m_rtopmip = false; m_tiled = false; m_max_levels = cCRNMaxLevels; m_min_mip_size = 1; m_scale_mode = cCRNSMDisabled; m_scale_x = 1.0f; m_scale_y = 1.0f; m_window_left = 0; m_window_top = 0; m_window_right = 0; m_window_bottom = 0; m_clamp_scale = false; m_clamp_width = 0; m_clamp_height = 0; } inline bool check() const { return true; } inline bool operator==(const crn_mipmap_params& rhs) const { #define CRNLIB_COMP(x) \ do { \ if ((x) != (rhs.x)) \ return false; \ } while (0) CRNLIB_COMP(m_size_of_obj); CRNLIB_COMP(m_mode); CRNLIB_COMP(m_filter); CRNLIB_COMP(m_gamma_filtering); CRNLIB_COMP(m_gamma); CRNLIB_COMP(m_blurriness); CRNLIB_COMP(m_renormalize); CRNLIB_COMP(m_rtopmip); CRNLIB_COMP(m_tiled); CRNLIB_COMP(m_max_levels); CRNLIB_COMP(m_min_mip_size); CRNLIB_COMP(m_scale_mode); CRNLIB_COMP(m_scale_x); CRNLIB_COMP(m_scale_y); CRNLIB_COMP(m_window_left); CRNLIB_COMP(m_window_top); CRNLIB_COMP(m_window_right); CRNLIB_COMP(m_window_bottom); CRNLIB_COMP(m_clamp_scale); CRNLIB_COMP(m_clamp_width); CRNLIB_COMP(m_clamp_height); return true; #undef CRNLIB_COMP } crn_uint32 m_size_of_obj; crn_mip_mode m_mode; crn_mip_filter m_filter; crn_bool m_gamma_filtering; float m_gamma; float m_blurriness; crn_uint32 m_max_levels; crn_uint32 m_min_mip_size; crn_bool m_renormalize; crn_bool m_rtopmip; crn_bool m_tiled; crn_scale_mode m_scale_mode; float m_scale_x; float m_scale_y; crn_uint32 m_window_left; crn_uint32 m_window_top; crn_uint32 m_window_right; crn_uint32 m_window_bottom; crn_bool m_clamp_scale; crn_uint32 m_clamp_width; crn_uint32 m_clamp_height; }; // -------- High-level helper function definitions for CDN/DDS compression. #ifndef CRNLIB_MIN_ALLOC_ALIGNMENT #define CRNLIB_MIN_ALLOC_ALIGNMENT sizeof(size_t) * 2 #endif // Function to set an optional user provided memory allocation/reallocation/msize routines. // By default, crnlib just uses malloc(), free(), etc. for all allocations. typedef void* (*crn_realloc_func)(void* p, size_t size, size_t* pActual_size, bool movable, void* pUser_data); typedef size_t (*crn_msize_func)(void* p, void* pUser_data); void crn_set_memory_callbacks(crn_realloc_func pRealloc, crn_msize_func pMSize, void* pUser_data); // Frees memory blocks allocated by crn_compress(), crn_decompress_crn_to_dds(), or crn_decompress_dds_to_images(). void crn_free_block(void* pBlock); // Compresses a 32-bit/pixel texture to either: a regular DX9 DDS file, a "clustered" (or reduced entropy) DX9 DDS file, or a CRN file in memory. // Input parameters: // comp_params is the compression parameters struct, defined above. // compressed_size will be set to the size of the returned memory block containing the output file. // The returned block must be freed by calling crn_free_block(). // *pActual_quality_level will be set to the actual quality level used to compress the image. May be NULL. // *pActual_bitrate will be set to the output file's effective bitrate, possibly taking into account LZMA compression. May be NULL. // Return value: // The compressed file data, or NULL on failure. // compressed_size will be set to the size of the returned memory buffer. // Notes: // A "regular" DDS file is compressed using normal DXTn compression at the specified DXT quality level. // A "clustered" DDS file is compressed using clustered DXTn compression to either the target bitrate or the specified integer quality factor. // The output file is a standard DX9 format DDS file, except the compressor assumes you will be later losslessly compressing the DDS output file using the LZMA algorithm. // A texture is defined as an array of 1 or 6 "faces" (6 faces=cubemap), where each "face" consists of between [1,cCRNMaxLevels] mipmap levels. // Mipmap levels are simple 32-bit 2D images with a pitch of width*sizeof(uint32), arranged in the usual raster order (top scanline first). // The image pixels may be grayscale (YYYX bytes in memory), grayscale/alpha (YYYA in memory), 24-bit (RGBX in memory), or 32-bit (RGBA) colors (where "X"=don't care). // RGB color data is generally assumed to be in the sRGB colorspace. If not, be sure to clear the "cCRNCompFlagPerceptual" in the crn_comp_params struct! void* crn_compress(const crn_comp_params& comp_params, crn_uint32& compressed_size, crn_uint32* pActual_quality_level = NULL, float* pActual_bitrate = NULL); // Like the above function, except this function can also do things like generate mipmaps, and resize or crop the input texture before compression. // The actual operations performed are controlled by the crn_mipmap_params struct members. // Be sure to set the "m_gamma_filtering" member of crn_mipmap_params to false if the input texture is not sRGB. void* crn_compress(const crn_comp_params& comp_params, const crn_mipmap_params& mip_params, crn_uint32& compressed_size, crn_uint32* pActual_quality_level = NULL, float* pActual_bitrate = NULL); // Transcodes an entire CRN file to DDS using the crn_decomp.h header file library to do most of the heavy lifting. // The output DDS file's format is guaranteed to be one of the DXTn formats in the crn_format enum. // This is a fast operation, because the CRN format is explicitly designed to be efficiently transcodable to DXTn. // For more control over decompression, see the lower-level helper functions in crn_decomp.h, which do not depend at all on crnlib. void* crn_decompress_crn_to_dds(const void* pCRN_file_data, crn_uint32& file_size); // Decompresses an entire DDS file in any supported format to uncompressed 32-bit/pixel image(s). // See the crnlib::pixel_format enum in inc/dds_defs.h for a list of the supported DDS formats. // You are responsible for freeing each image block, either by calling crn_free_all_images() or manually calling crn_free_block() on each image pointer. struct crn_texture_desc { crn_uint32 m_faces; crn_uint32 m_width; crn_uint32 m_height; crn_uint32 m_levels; crn_uint32 m_fmt_fourcc; // Same as crnlib::pixel_format }; bool crn_decompress_dds_to_images(const void* pDDS_file_data, crn_uint32 dds_file_size, crn_uint32** ppImages, crn_texture_desc& tex_desc); // Frees all images allocated by crn_decompress_dds_to_images(). void crn_free_all_images(crn_uint32** ppImages, const crn_texture_desc& desc); // -------- crn_format related helpers functions. // Returns the FOURCC format equivalent to the specified crn_format. crn_uint32 crn_get_format_fourcc(crn_format fmt); // Returns the crn_format's bits per texel. crn_uint32 crn_get_format_bits_per_texel(crn_format fmt); // Returns the crn_format's number of bytes per block. crn_uint32 crn_get_bytes_per_dxt_block(crn_format fmt); // Returns the non-swizzled, basic DXTn version of the specified crn_format. // This is the format you would supply D3D or OpenGL. crn_format crn_get_fundamental_dxt_format(crn_format fmt); // -------- String helpers. // Converts a crn_file_type to a string. const char* crn_get_file_type_ext(crn_file_type file_type); // Converts a crn_format to a string. const char* crn_get_format_string(crn_format fmt); // Converts a crn_dxt_quality to a string. const char* crn_get_dxt_quality_string(crn_dxt_quality q); // -------- Low-level DXTn 4x4 block compressor API // crnlib's DXTn endpoint optimizer actually supports any number of source pixels (i.e. from 1 to thousands, not just 16), // but for simplicity this API only supports 4x4 texel blocks. typedef void* crn_block_compressor_context_t; // Create a DXTn block compressor. // This function only supports the basic/nonswizzled "fundamental" formats: DXT1, DXT3, DXT5, DXT5A, DXN_XY and DXN_YX. // Avoid calling this multiple times if you intend on compressing many blocks, because it allocates some memory. crn_block_compressor_context_t crn_create_block_compressor(const crn_comp_params& params); // Compresses a block of 16 pixels to the destination DXTn block. // pDst_block should be 8 (for DXT1/DXT5A) or 16 bytes (all the others). // pPixels should be an array of 16 crn_uint32's. Each crn_uint32 must be r,g,b,a (r is always first) in memory. void crn_compress_block(crn_block_compressor_context_t pContext, const crn_uint32* pPixels, void* pDst_block); // Frees a DXTn block compressor. void crn_free_block_compressor(crn_block_compressor_context_t pContext); // Unpacks a compressed block to pDst_pixels. // pSrc_block should be 8 (for DXT1/DXT5A) or 16 bytes (all the others). // pDst_pixel should be an array of 16 crn_uint32's. Each uint32 will be r,g,b,a (r is always first) in memory. // crn_fmt should be one of the "fundamental" formats: DXT1, DXT3, DXT5, DXT5A, DXN_XY and DXN_YX. // The various swizzled DXT5 formats (such as cCRNFmtDXT5_xGBR, etc.) will be unpacked as if they where plain DXT5. // Returns false if the crn_fmt is invalid. bool crn_decompress_block(const void* pSrc_block, crn_uint32* pDst_pixels, crn_format crn_fmt); #endif // CRNLIB_H //------------------------------------------------------------------------------ // // crnlib uses the ZLIB license: // http://opensource.org/licenses/Zlib // // Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC // // This software is provided 'as-is', without any express or implied // warranty. In no event will the authors be held liable for any damages // arising from the use of this software. // // Permission is granted to anyone to use this software for any purpose, // including commercial applications, and to alter it and redistribute it // freely, subject to the following restrictions: // // 1. The origin of this software must not be misrepresented; you must not // claim that you wrote the original software. If you use this software // in a product, an acknowledgment in the product documentation would be // appreciated but is not required. // // 2. Altered source versions must be plainly marked as such, and must not be // misrepresented as being the original software. // // 3. This notice may not be removed or altered from any source distribution. // //------------------------------------------------------------------------------ DaemonEngine-crunch-ef4d32f/inc/dds_defs.h000066400000000000000000000122351503722002600205170ustar00rootroot00000000000000// File: dds_defs.h // DX9 .DDS file header definitions. #ifndef CRNLIB_DDS_DEFS_H #define CRNLIB_DDS_DEFS_H #include "crnlib.h" #define CRNLIB_PIXEL_FMT_FOURCC(a, b, c, d) ((a) | ((b) << 8U) | ((c) << 16U) | ((d) << 24U)) namespace crnlib { enum pixel_format { PIXEL_FMT_INVALID = 0, PIXEL_FMT_DXT1 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '1'), PIXEL_FMT_DXT2 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '2'), PIXEL_FMT_DXT3 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '3'), PIXEL_FMT_DXT4 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '4'), PIXEL_FMT_DXT5 = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', 'T', '5'), PIXEL_FMT_3DC = CRNLIB_PIXEL_FMT_FOURCC('A', 'T', 'I', '2'), // DXN_YX PIXEL_FMT_DXN = CRNLIB_PIXEL_FMT_FOURCC('A', '2', 'X', 'Y'), // DXN_XY PIXEL_FMT_DXT5A = CRNLIB_PIXEL_FMT_FOURCC('A', 'T', 'I', '1'), // ATI1N, https://www.vgamuseum.info/images/doc/ati/radeon_x1/radeon_x1x00_programming_guide.pdf // Non-standard, crnlib-specific pixel formats (some of these are supported by ATI's Compressonator) PIXEL_FMT_DXT5_CCxY = CRNLIB_PIXEL_FMT_FOURCC('C', 'C', 'x', 'Y'), PIXEL_FMT_DXT5_xGxR = CRNLIB_PIXEL_FMT_FOURCC('x', 'G', 'x', 'R'), PIXEL_FMT_DXT5_xGBR = CRNLIB_PIXEL_FMT_FOURCC('x', 'G', 'B', 'R'), PIXEL_FMT_DXT5_AGBR = CRNLIB_PIXEL_FMT_FOURCC('A', 'G', 'B', 'R'), PIXEL_FMT_DXT1A = CRNLIB_PIXEL_FMT_FOURCC('D', 'X', '1', 'A'), PIXEL_FMT_ETC1 = CRNLIB_PIXEL_FMT_FOURCC('E', 'T', 'C', '1'), PIXEL_FMT_ETC2 = CRNLIB_PIXEL_FMT_FOURCC('E', 'T', 'C', '2'), PIXEL_FMT_ETC2A = CRNLIB_PIXEL_FMT_FOURCC('E', 'T', '2', 'A'), PIXEL_FMT_ETC1S = CRNLIB_PIXEL_FMT_FOURCC('E', 'T', '1', 'S'), PIXEL_FMT_ETC2AS = CRNLIB_PIXEL_FMT_FOURCC('E', '2', 'A', 'S'), PIXEL_FMT_R8G8B8 = CRNLIB_PIXEL_FMT_FOURCC('R', 'G', 'B', 'x'), PIXEL_FMT_L8 = CRNLIB_PIXEL_FMT_FOURCC('L', 'x', 'x', 'x'), PIXEL_FMT_A8 = CRNLIB_PIXEL_FMT_FOURCC('x', 'x', 'x', 'A'), PIXEL_FMT_A8L8 = CRNLIB_PIXEL_FMT_FOURCC('L', 'x', 'x', 'A'), PIXEL_FMT_A8R8G8B8 = CRNLIB_PIXEL_FMT_FOURCC('R', 'G', 'B', 'A') }; const crn_uint32 cDDSMaxImageDimensions = 8192U; // Total size of header is sizeof(uint32)+cDDSSizeofDDSurfaceDesc2; const crn_uint32 cDDSSizeofDDSurfaceDesc2 = 124; // "DDS " const crn_uint32 cDDSFileSignature = 0x20534444; struct DDCOLORKEY { crn_uint32 dwUnused0; crn_uint32 dwUnused1; }; struct DDPIXELFORMAT { crn_uint32 dwSize; crn_uint32 dwFlags; crn_uint32 dwFourCC; crn_uint32 dwRGBBitCount; // ATI compressonator and crnlib will place a FOURCC code here for swizzled/cooked DXTn formats crn_uint32 dwRBitMask; crn_uint32 dwGBitMask; crn_uint32 dwBBitMask; crn_uint32 dwRGBAlphaBitMask; }; struct DDSCAPS2 { crn_uint32 dwCaps; crn_uint32 dwCaps2; crn_uint32 dwCaps3; crn_uint32 dwCaps4; }; struct DDSURFACEDESC2 { crn_uint32 dwSize; crn_uint32 dwFlags; crn_uint32 dwHeight; crn_uint32 dwWidth; union { crn_int32 lPitch; crn_uint32 dwLinearSize; }; crn_uint32 dwBackBufferCount; crn_uint32 dwMipMapCount; crn_uint32 dwAlphaBitDepth; crn_uint32 dwUnused0; crn_uint32 lpSurface; DDCOLORKEY unused0; DDCOLORKEY unused1; DDCOLORKEY unused2; DDCOLORKEY unused3; DDPIXELFORMAT ddpfPixelFormat; DDSCAPS2 ddsCaps; crn_uint32 dwUnused1; }; const crn_uint32 DDSD_CAPS = 0x00000001; const crn_uint32 DDSD_HEIGHT = 0x00000002; const crn_uint32 DDSD_WIDTH = 0x00000004; const crn_uint32 DDSD_PITCH = 0x00000008; const crn_uint32 DDSD_BACKBUFFERCOUNT = 0x00000020; const crn_uint32 DDSD_ZBUFFERBITDEPTH = 0x00000040; const crn_uint32 DDSD_ALPHABITDEPTH = 0x00000080; const crn_uint32 DDSD_LPSURFACE = 0x00000800; const crn_uint32 DDSD_PIXELFORMAT = 0x00001000; const crn_uint32 DDSD_CKDESTOVERLAY = 0x00002000; const crn_uint32 DDSD_CKDESTBLT = 0x00004000; const crn_uint32 DDSD_CKSRCOVERLAY = 0x00008000; const crn_uint32 DDSD_CKSRCBLT = 0x00010000; const crn_uint32 DDSD_MIPMAPCOUNT = 0x00020000; const crn_uint32 DDSD_REFRESHRATE = 0x00040000; const crn_uint32 DDSD_LINEARSIZE = 0x00080000; const crn_uint32 DDSD_TEXTURESTAGE = 0x00100000; const crn_uint32 DDSD_FVF = 0x00200000; const crn_uint32 DDSD_SRCVBHANDLE = 0x00400000; const crn_uint32 DDSD_DEPTH = 0x00800000; const crn_uint32 DDSD_ALL = 0x00fff9ee; const crn_uint32 DDPF_ALPHAPIXELS = 0x00000001; const crn_uint32 DDPF_ALPHA = 0x00000002; const crn_uint32 DDPF_FOURCC = 0x00000004; const crn_uint32 DDPF_PALETTEINDEXED8 = 0x00000020; const crn_uint32 DDPF_RGB = 0x00000040; const crn_uint32 DDPF_LUMINANCE = 0x00020000; const crn_uint32 DDSCAPS_COMPLEX = 0x00000008; const crn_uint32 DDSCAPS_TEXTURE = 0x00001000; const crn_uint32 DDSCAPS_MIPMAP = 0x00400000; const crn_uint32 DDSCAPS2_CUBEMAP = 0x00000200; const crn_uint32 DDSCAPS2_CUBEMAP_POSITIVEX = 0x00000400; const crn_uint32 DDSCAPS2_CUBEMAP_NEGATIVEX = 0x00000800; const crn_uint32 DDSCAPS2_CUBEMAP_POSITIVEY = 0x00001000; const crn_uint32 DDSCAPS2_CUBEMAP_NEGATIVEY = 0x00002000; const crn_uint32 DDSCAPS2_CUBEMAP_POSITIVEZ = 0x00004000; const crn_uint32 DDSCAPS2_CUBEMAP_NEGATIVEZ = 0x00008000; const crn_uint32 DDSCAPS2_VOLUME = 0x00200000; } // namespace crnlib #endif // CRNLIB_DDS_DEFS_H DaemonEngine-crunch-ef4d32f/license.txt000066400000000000000000000017121503722002600202030ustar00rootroot00000000000000crunch/crnlib uses the ZLIB license: http://opensource.org/licenses/Zlib Copyright (c) 2010-2016 Richard Geldreich, Jr. and Binomial LLC This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. DaemonEngine-crunch-ef4d32f/test/000077500000000000000000000000001503722002600167765ustar00rootroot00000000000000DaemonEngine-crunch-ef4d32f/test/black.jpg000066400000000000000000000002541503722002600205550ustar00rootroot00000000000000JFIFC @@'?DaemonEngine-crunch-ef4d32f/test/checksums.tsv000066400000000000000000000357031503722002600215310ustar00rootroot00000000000000build/test/png-to-all/unvanquished_64.tga 34434170b06843b2d4fb5964a4b8751ec3a151a7d03e351e45a5967508f6a21f6e40ffc214bb46640105c4058bc35e196ecb5dd3261528a64acfeff52034fb40 build/test/png-to-all/unvanquished_64.bmp 1c5f28be8151a8125346e9cf17b1bf6e3f9fe27f6947b164b7eefa8fd3732a2e4dff173c354a0cd4ed599cd604d4b8f916c0575b0a4b4b508acc68dc9b4067ae build/test/png-to-all/unvanquished_64.png 71bb28da97cd778b99f131acaeb1c28ea37bae295a9e9934bed339508d8428587120c0da8f5bbc7963cdb77741ef789414f9d0d2c1e0da46026336efde147cc5 build/test/png-to-all/unvanquished_64.crn a3b526e95f44b6a4c919bc8eb79f4debf592f0bccc163c900cd1b6f9add0ce58c3db9fc8dac1828d350bbcd915823d7743a0c114a533fbc08c163fcb4846d2f5 build/test/png-to-all/unvanquished_64.dds ce98b21c574b4883e4aff4a0b92fd0f533f22e00ab753686f353111fa7503287c2c4dba25f2c2670508630d79b59464578e094461bad81ad803e1d58ed5e69f8 build/test/png-to-all/unvanquished_64.ktx d82eab591163d802231722ef325571735971b6a9b0703ed8ea59b4a4144189825d96d3455605426453fb3ad8d7cc61f931c52b3e73df2bb6787f7408baeb4ac7 build/test/png-to-all/unvanquished_64.jpg 6cd22de7e2e5baced5ef16e84f816dde3049f822459d552ced4dcd4e3f9a16be82ee0747ed1479fe8339991176004b4ccc90380b7ef51292c7d9b518503a348d build/test/tga-to-all/unvanquished_64.tga 34434170b06843b2d4fb5964a4b8751ec3a151a7d03e351e45a5967508f6a21f6e40ffc214bb46640105c4058bc35e196ecb5dd3261528a64acfeff52034fb40 build/test/tga-to-all/unvanquished_64.bmp 1c5f28be8151a8125346e9cf17b1bf6e3f9fe27f6947b164b7eefa8fd3732a2e4dff173c354a0cd4ed599cd604d4b8f916c0575b0a4b4b508acc68dc9b4067ae build/test/tga-to-all/unvanquished_64.png 71bb28da97cd778b99f131acaeb1c28ea37bae295a9e9934bed339508d8428587120c0da8f5bbc7963cdb77741ef789414f9d0d2c1e0da46026336efde147cc5 build/test/tga-to-all/unvanquished_64.crn a3b526e95f44b6a4c919bc8eb79f4debf592f0bccc163c900cd1b6f9add0ce58c3db9fc8dac1828d350bbcd915823d7743a0c114a533fbc08c163fcb4846d2f5 build/test/tga-to-all/unvanquished_64.dds ce98b21c574b4883e4aff4a0b92fd0f533f22e00ab753686f353111fa7503287c2c4dba25f2c2670508630d79b59464578e094461bad81ad803e1d58ed5e69f8 build/test/tga-to-all/unvanquished_64.ktx d82eab591163d802231722ef325571735971b6a9b0703ed8ea59b4a4144189825d96d3455605426453fb3ad8d7cc61f931c52b3e73df2bb6787f7408baeb4ac7 build/test/tga-to-all/unvanquished_64.jpg 6cd22de7e2e5baced5ef16e84f816dde3049f822459d552ced4dcd4e3f9a16be82ee0747ed1479fe8339991176004b4ccc90380b7ef51292c7d9b518503a348d build/test/bmp-to-all/unvanquished_64.tga 34434170b06843b2d4fb5964a4b8751ec3a151a7d03e351e45a5967508f6a21f6e40ffc214bb46640105c4058bc35e196ecb5dd3261528a64acfeff52034fb40 build/test/bmp-to-all/unvanquished_64.bmp 1c5f28be8151a8125346e9cf17b1bf6e3f9fe27f6947b164b7eefa8fd3732a2e4dff173c354a0cd4ed599cd604d4b8f916c0575b0a4b4b508acc68dc9b4067ae build/test/bmp-to-all/unvanquished_64.png 71bb28da97cd778b99f131acaeb1c28ea37bae295a9e9934bed339508d8428587120c0da8f5bbc7963cdb77741ef789414f9d0d2c1e0da46026336efde147cc5 build/test/bmp-to-all/unvanquished_64.crn a3b526e95f44b6a4c919bc8eb79f4debf592f0bccc163c900cd1b6f9add0ce58c3db9fc8dac1828d350bbcd915823d7743a0c114a533fbc08c163fcb4846d2f5 build/test/bmp-to-all/unvanquished_64.dds ce98b21c574b4883e4aff4a0b92fd0f533f22e00ab753686f353111fa7503287c2c4dba25f2c2670508630d79b59464578e094461bad81ad803e1d58ed5e69f8 build/test/bmp-to-all/unvanquished_64.ktx d82eab591163d802231722ef325571735971b6a9b0703ed8ea59b4a4144189825d96d3455605426453fb3ad8d7cc61f931c52b3e73df2bb6787f7408baeb4ac7 build/test/bmp-to-all/unvanquished_64.jpg 6cd22de7e2e5baced5ef16e84f816dde3049f822459d552ced4dcd4e3f9a16be82ee0747ed1479fe8339991176004b4ccc90380b7ef51292c7d9b518503a348d build/test/crn-to-all/unvanquished_64.tga 71014682514ec9d2baa19444fd026ceb65aa06e86d32f34b326f336e47cf4ea351aa7c2eee6a1f0fbf290658fae72c24a86c4e167272143012d384dd6d6b1ce9 build/test/crn-to-all/unvanquished_64.bmp a6d6886879f9aade027ecb4d55f2092c3af98c297aaf2f689d77a11870156099853a41993575a9a313a23e399b3bcb06dcc0eac46b877639f3875972af7933d7 build/test/crn-to-all/unvanquished_64.png dd9d45459bd5a3ed74d494838a3d61ed4baea9960300cb3f1d54ecc923f742c9036fe13a338da6db2efc98049a2e206cb7c0e1cba6d79ea0e3ad86473af13e83 build/test/crn-to-all/unvanquished_64.crn 1596d312526073fd8923faefbed879fb1299ee53eeb54787f2b66754768d9a8160d9921b13aa9b7c781eb088be41cec0f3b703713aede43116a2b62f284f39f9 build/test/crn-to-all/unvanquished_64.dds 2aae5c5bd8c6a82d1cf3fc9d9da12fe3fc5be8651261ff1cb359e754308fd81e93bf304aeb55ada0bad50794d6e97d3e48084e5a4f48fa7caec4ed954152bae9 build/test/crn-to-all/unvanquished_64.ktx 10a45e43609c49b4929b91293e203557d8f5c1b146aa8162d946248e8fa24709e1645ed1fc40c2c1a541567b3d1287c7885cb0185230b660386825c4d03db22f build/test/crn-to-all/unvanquished_64.jpg f68e74b3c8a404fc867345411c8778228728897e5d0870e071d6ec67fd1fccb45b3b97993575c3ed66d80cd6a7d60845e8fe68780ac322fe1d46adc57014fa8b build/test/dds-to-all/unvanquished_64.tga 46e0c1796f7b1a998829919e14f69cb43d83b895585aca69ad17e0f6cd9839d20b383a6eca161320b87bb7161cbedc116466ec27319078a011b59fe16dc6ce26 build/test/dds-to-all/unvanquished_64.bmp 9d1b7c76f345332fa53e9b517ee47ba142d0c9def2b27b908af67b3c19edec0882f183f83eb9022cc2e726082193d7c82394a92b068137a4698a505d8bab0080 build/test/dds-to-all/unvanquished_64.png cd5b04f9c081f91a2d3a3fbf4484153feec5f0c027608f48a51209596ff604680ab93de70fbb616dc3fecea3fa6d240ac7a2e294bb0fc8244766375ffae54230 build/test/dds-to-all/unvanquished_64.crn 0db1f3bcf6240389be389dc2a94fdabf97aab439d12aabc6cca2ccb69559759fc414d2bc7735d0d4812b895ffb4085172f6a48439a104d79e0b5d65124b1f002 build/test/dds-to-all/unvanquished_64.dds ce98b21c574b4883e4aff4a0b92fd0f533f22e00ab753686f353111fa7503287c2c4dba25f2c2670508630d79b59464578e094461bad81ad803e1d58ed5e69f8 build/test/dds-to-all/unvanquished_64.ktx dbbea021367254587000704d11b5cf7982d3ac4a42edd9489b7b2ec49da796d4346c68c6ee1a163e6880f0efaeafc0235b739c2a4acbd1ed27750657af36e3df build/test/dds-to-all/unvanquished_64.jpg 3a2bfcc1f2ecb18371163bb16812096b019fec176731f015a301384d605910702d168f8762076fa5d912760947c8df8d4026fd9b038c0d94360d1bf744f592a2 build/test/ktx-to-all/unvanquished_64.tga 516bc1db1bfcda9c7f466cbb6f45b7608d9803391ae6d32e8de30d8e8102ef2de623cf6adfc74d471202ab14930dc4babe93a41b6c35b90e250a0de6b690fe7e build/test/ktx-to-all/unvanquished_64.bmp 0493fc75c26629d3a532e68e3778bb25ed6de4d81f3a39eb34a65a1990aa4072aa6b0948dc1ed3122b7ecf45e6be139d05b008ad7277649fc244d039404503fd build/test/ktx-to-all/unvanquished_64.png 7b5da6fea08f73d0e06791d000780820574d08e14c00c06ba2ef208296ebcfbc427175d72d6f2797e7240930782c70bf717a1e966fdd9aaada4a8b826ff9a2df build/test/ktx-to-all/unvanquished_64.crn 7cb43bf1f8fa87735d7643c3590b52bf918b6e202095787f2305c7469db957185871434b101fb2e2fb7d57e55fc0dc635994b8ed2765edb6b453f10931508f6b build/test/ktx-to-all/unvanquished_64.dds b2cecbf2870835ec04a8ed69012d294402017fa565393f8266a6dcc868f9c5b472b8b568c98250b5beb5b0d43ee9d9dc06d04cca4f35073541ba8331d55a6b46 build/test/ktx-to-all/unvanquished_64.ktx d82eab591163d802231722ef325571735971b6a9b0703ed8ea59b4a4144189825d96d3455605426453fb3ad8d7cc61f931c52b3e73df2bb6787f7408baeb4ac7 build/test/ktx-to-all/unvanquished_64.jpg c350e1e0d90b8d519a0cfdbad85c972a6f6c077988b8e96f5daef415f816b3ace2bb87c79772c6d25954caeb598ef3af7700f0bc10914349161fe1a4cc1f2d73 build/test/jpg-to-all/unvanquished_64.tga 4ea9e7b358a01fe1b09e9c16845a0249daddc87010403f33c525cfca69af85cb561019221adc8beae4172908d48a951df2d8bf65d63ae2ee183ee5c6df424c99 build/test/jpg-to-all/unvanquished_64.bmp 20636873950cde44336751526644ea6e979b5bdf1f45a166a40b583977a6f6cce6db32ab9b14d66eba1777ab8447fbc926a91b095c27cb4487e04d1f36ed3b0b build/test/jpg-to-all/unvanquished_64.png 24124a42c01f243ec2864cc70d399b685183acb04216a0b21133b454f4d69d7606b3190119ff8c869c5dd4a28f77fbc352f3717747e32099f507cc7b790c39e2 build/test/jpg-to-all/unvanquished_64.crn f7bf97cafc570eb16335006623b887de1b05591035a20f9399edc2386c59ab43832d4741a3763210c022cb65ee7a0facb29138bb7daf0b71f6b9059c1852c445 build/test/jpg-to-all/unvanquished_64.dds c1b3b785d107ba2f18097fb1cf009f598dbda9e797741ed66bd6cf9b1c376447f0990bb3890af12e3c73c4e02410adc0b5be6f386867d8ffc6aded53adb5f309 build/test/jpg-to-all/unvanquished_64.ktx 233e5d029b08cc45f8b3e9853edc6dc5dd5cab49cb0f95e870b9b6db917b2ddceaa2756d55aec1785c147375bc7a37030e3a8cde77d364f73d86043e99a7969a build/test/jpg-to-all/unvanquished_64.jpg abe6f8c6820c4d0762a00b6ce0789834601d764b1a9a330eeb3b49d5209ee9ca2968e154d53e74912e05a4b99467df1710c12d62354cdfc8d49c9cd315de2b65 build/test/tga-to-png/raw-bottom-left.png 6238f9678cd9fa774e6d50ee6638f316d0b1148ade344c06b4ddebac4738ea90e93c19d5cca405fccdabfdb37898bc7f9bccce1f0d974f9cbd18ab54b66adaa3 build/test/tga-to-png/raw-bottom-right.png 6238f9678cd9fa774e6d50ee6638f316d0b1148ade344c06b4ddebac4738ea90e93c19d5cca405fccdabfdb37898bc7f9bccce1f0d974f9cbd18ab54b66adaa3 build/test/tga-to-png/raw-top-left.png 6238f9678cd9fa774e6d50ee6638f316d0b1148ade344c06b4ddebac4738ea90e93c19d5cca405fccdabfdb37898bc7f9bccce1f0d974f9cbd18ab54b66adaa3 build/test/tga-to-png/raw-top-right.png 6238f9678cd9fa774e6d50ee6638f316d0b1148ade344c06b4ddebac4738ea90e93c19d5cca405fccdabfdb37898bc7f9bccce1f0d974f9cbd18ab54b66adaa3 build/test/tga-to-png/rle-bottom-left.png 6238f9678cd9fa774e6d50ee6638f316d0b1148ade344c06b4ddebac4738ea90e93c19d5cca405fccdabfdb37898bc7f9bccce1f0d974f9cbd18ab54b66adaa3 build/test/tga-to-png/rle-bottom-right.png 6238f9678cd9fa774e6d50ee6638f316d0b1148ade344c06b4ddebac4738ea90e93c19d5cca405fccdabfdb37898bc7f9bccce1f0d974f9cbd18ab54b66adaa3 build/test/tga-to-png/rle-top-left.png 6238f9678cd9fa774e6d50ee6638f316d0b1148ade344c06b4ddebac4738ea90e93c19d5cca405fccdabfdb37898bc7f9bccce1f0d974f9cbd18ab54b66adaa3 build/test/tga-to-png/rle-top-right.png 6238f9678cd9fa774e6d50ee6638f316d0b1148ade344c06b4ddebac4738ea90e93c19d5cca405fccdabfdb37898bc7f9bccce1f0d974f9cbd18ab54b66adaa3 build/test/tga-to-crn/raw-bottom-left.crn fcf1da3fc69f6b05935deafacb1c637f44983b9992a7452f8f26a7787d5577c6548986366fae5fd6d300366de4acade853464e7656224b04673a7d129e006854 build/test/tga-to-crn/raw-bottom-right.crn fcf1da3fc69f6b05935deafacb1c637f44983b9992a7452f8f26a7787d5577c6548986366fae5fd6d300366de4acade853464e7656224b04673a7d129e006854 build/test/tga-to-crn/raw-top-left.crn fcf1da3fc69f6b05935deafacb1c637f44983b9992a7452f8f26a7787d5577c6548986366fae5fd6d300366de4acade853464e7656224b04673a7d129e006854 build/test/tga-to-crn/raw-top-right.crn fcf1da3fc69f6b05935deafacb1c637f44983b9992a7452f8f26a7787d5577c6548986366fae5fd6d300366de4acade853464e7656224b04673a7d129e006854 build/test/tga-to-crn/rle-bottom-left.crn fcf1da3fc69f6b05935deafacb1c637f44983b9992a7452f8f26a7787d5577c6548986366fae5fd6d300366de4acade853464e7656224b04673a7d129e006854 build/test/tga-to-crn/rle-bottom-right.crn fcf1da3fc69f6b05935deafacb1c637f44983b9992a7452f8f26a7787d5577c6548986366fae5fd6d300366de4acade853464e7656224b04673a7d129e006854 build/test/tga-to-crn/rle-top-left.crn fcf1da3fc69f6b05935deafacb1c637f44983b9992a7452f8f26a7787d5577c6548986366fae5fd6d300366de4acade853464e7656224b04673a7d129e006854 build/test/tga-to-crn/rle-top-right.crn fcf1da3fc69f6b05935deafacb1c637f44983b9992a7452f8f26a7787d5577c6548986366fae5fd6d300366de4acade853464e7656224b04673a7d129e006854 build/test/png-to-png/test-colormap1-alpha1.png f596534970da37597230ae60112a4fe4d08d950e03b99754a633d9d285f4683624b1f710727d6b3e556b80b9d44428bfa297dc898beaf6fecad29fe98bbc98cb build/test/png-to-png/test-colormap2-alpha1.png 2ffd5907e77157dfaac01c786b5150dcf822699254bebdd51ef1797568208ae924a572d90bc3ed3a06cc5e524c4c0d5a3de7aa75ae99febc4638533e41a6dda0 build/test/png-to-png/test-colormap4-alpha1.png f8cd5a9b1ae040baee41a7f378e18bba426f79532555e22fe2f8c605a8347d79e7b98bb8284130536512b187f9d8435d58665eddb8d36596296f84f05ee904ae build/test/png-to-png/test-colormap8-alpha1.png 3561be7b7958d37913708664413c149942f302907bf5f928782c7cf6cc4b312082f51fea6d8a3271b93bd37ace03ec22af65c224d6e7617ed8644fbe2f95dc35 build/test/png-to-png/test-grayscale1-alpha1.png 57876e1d84ada2604c5e2dbbfd08c19286c87c8f1843cf47a9b77b4f6561c3b414a269f924c2be74bba4662503ede0fd9d94cb0a084c35a9f63a3cce07dcb876 build/test/png-to-png/test-grayscale1-alpha8.png 62ba2ca3669febcb88959224732c295d891f10942dcddf2494c052886c4f502b0e30c1238d05196ce8113c2c150fa4d96fc81436af5ad6a28f7b8a64a77bbe7f build/test/png-to-png/test-grayscale8-alpha1.png 62ba2ca3669febcb88959224732c295d891f10942dcddf2494c052886c4f502b0e30c1238d05196ce8113c2c150fa4d96fc81436af5ad6a28f7b8a64a77bbe7f build/test/png-to-png/test-rgb8-alpha8.png 21630317b2ed9957241f4982c4e8207f7a00af23e0dd5df863965dbca9dcefd9a3c4f77b15ea91141c6c35181e44801ff1b5d0a7ed62f0c11eddfdaf7122b1a9 build/test/png-to-crn/test-colormap1-alpha1.crn f223d84528a0470cedeaf014c85f78ed16e1cc0f8990324fdbc087b190b6f972d1db6d829881c1414c5bd37b04c26794e7aa59b0d5b6eb535dae766a0c1d4ee9 build/test/png-to-crn/test-colormap2-alpha1.crn 91b8372d3f16a0d198ac557b15cbafff4c3d456108dd4bee66902a20593a3ea2735ca2f052d3f6ca62cc31335e5a24f6f995116802ab46e3ded534168ba4fea3 build/test/png-to-crn/test-colormap4-alpha1.crn 75c03a7c9181aa74a32040f9bea4e13317b342f1ce8c41e3a52ff8784f6cc28de08b4c002e089470873bbdd555d87a9c09c8a649c5ab4cda6614cf2f0cbf73e9 build/test/png-to-crn/test-colormap8-alpha1.crn b9ca0def9743aa4790d12de2faa9ce0bc355c916af8c0c4440e9a87a20c482feb563222b364a20ee7f70abff97e7d6312d679700b64704c2783d03b8cd9a757b build/test/png-to-crn/test-grayscale1-alpha1.crn 3a9175379f9512c74675a6d573bccdb0426d70e0ff9d63dacaa83e2c1f857dda14df93732a5776ef3625bdcf17d3ec51788623bb85e46c70a56bb306fd563b05 build/test/png-to-crn/test-grayscale1-alpha8.crn 2660ab64afdb6eb70fdb131ef9927fc0b21bca021694bb4b8220451a9e0d5195306ef08d022ec7fb3b6535eed56efbe093e1108a116fdca81c5ce5a0c6e1da01 build/test/png-to-crn/test-grayscale8-alpha1.crn 0e0dd5cf4f613eae1a5638060e6fbdcc1b8438e1c22ab27785a30efa8f0b20458e9b28b3da09b87d6bb9cab16f101908c64d74ab8247bef032de09449b380a77 build/test/png-to-crn/test-rgb8-alpha8.crn 70aa2d510a77ef971cbc91c8eea5cc1c0d571722f3cd7eb5cf9e3b825987b87eb7653b0bfceee733f2a54d5fa9ce452df64ab441e64f327b78b492fd57c461a9 build/test/bmp-to-crn/sample-default.crn 119f0f756811e56ba688c402ea317923d9665017aed19022ad19fc8396e45d3ca3bb7b5383f3edc00e859e31558a0865de88af71110529b16b89ded731070925 build/test/bmp-to-crn/sample-vertical-flip.crn 119f0f756811e56ba688c402ea317923d9665017aed19022ad19fc8396e45d3ca3bb7b5383f3edc00e859e31558a0865de88af71110529b16b89ded731070925 build/test/jpg-to-crn/black.crn 19fb840c8dbb7da96400af5af1c45fe48c7c7e19340d03fbe5a2af28ceecfb9efba26fe6c1bf46bd76164ecb0efe91cc9237917e8e431cd8365f80e9ffacc906 build/test/example1-dds/unvanquished_64.dds ce98b21c574b4883e4aff4a0b92fd0f533f22e00ab753686f353111fa7503287c2c4dba25f2c2670508630d79b59464578e094461bad81ad803e1d58ed5e69f8 build/test/example1-crn/unvanquished_64.crn a3b526e95f44b6a4c919bc8eb79f4debf592f0bccc163c900cd1b6f9add0ce58c3db9fc8dac1828d350bbcd915823d7743a0c114a533fbc08c163fcb4846d2f5 build/test/example1-crn/unvanquished_64.dds 2aae5c5bd8c6a82d1cf3fc9d9da12fe3fc5be8651261ff1cb359e754308fd81e93bf304aeb55ada0bad50794d6e97d3e48084e5a4f48fa7caec4ed954152bae9 build/test/example2-dds/unvanquished_64.dds 2aae5c5bd8c6a82d1cf3fc9d9da12fe3fc5be8651261ff1cb359e754308fd81e93bf304aeb55ada0bad50794d6e97d3e48084e5a4f48fa7caec4ed954152bae9 build/test/example3-dds/unvanquished_64.dds a719dd943bc4e30be0c124e6bafb1d8ba4002b9233e6208104f93046edd4ddd7aea7815430ae2c0b7efe259943f6a8fe109f56ea457e43cde79295ae7ac85b43 DaemonEngine-crunch-ef4d32f/test/raw-bottom-left.tga000066400000000000000000000100741503722002600225200ustar00rootroot00000000000000 raw-bottom-left??????TRUEVISION-XFILE.DaemonEngine-crunch-ef4d32f/test/raw-bottom-right.tga000066400000000000000000000100751503722002600227040ustar00rootroot00000000000000 raw-bottom-right??????TRUEVISION-XFILE.DaemonEngine-crunch-ef4d32f/test/raw-top-left.tga000066400000000000000000000100711503722002600220130ustar00rootroot00000000000000  (raw-top-left??????TRUEVISION-XFILE.DaemonEngine-crunch-ef4d32f/test/raw-top-right.tga000066400000000000000000000100721503722002600221770ustar00rootroot00000000000000 8raw-top-right??????TRUEVISION-XFILE.DaemonEngine-crunch-ef4d32f/test/rle-bottom-left.tga000066400000000000000000000010301503722002600225010ustar00rootroot00000000000000 ??????TRUEVISION-XFILE.DaemonEngine-crunch-ef4d32f/test/rle-bottom-right.tga000066400000000000000000000010301503722002600226640ustar00rootroot00000000000000 ??????TRUEVISION-XFILE.DaemonEngine-crunch-ef4d32f/test/rle-top-left.tga000066400000000000000000000010301503722002600217770ustar00rootroot00000000000000 (??????TRUEVISION-XFILE.DaemonEngine-crunch-ef4d32f/test/rle-top-right.tga000066400000000000000000000010301503722002600221620ustar00rootroot00000000000000 8??????TRUEVISION-XFILE.DaemonEngine-crunch-ef4d32f/test/sample-default.bmp000066400000000000000000000102121503722002600223750ustar00rootroot00000000000000BM|   inW======DaemonEngine-crunch-ef4d32f/test/sample-vertical-flip.bmp000066400000000000000000000102121503722002600235120ustar00rootroot00000000000000BM|   inW======DaemonEngine-crunch-ef4d32f/test/test-colormap1-alpha1.png000066400000000000000000000006601503722002600235240ustar00rootroot00000000000000PNG  IHDRf:%PLTEtRNS@fXIDATx0FQ@Уhr! 2] Cj~4bYeY&INp\Nf/ʳ/̿!2  p 3V@F#J7J蕈^tw (Pf3f@U4; l=ZQ߃i Aԁua ? ?f / P1HҞu+jj7Q!°0q )MWorzuN*΢h=jQޤXi^m %-5=EMU]em;y#IENDB`DaemonEngine-crunch-ef4d32f/test/test-colormap2-alpha1.png000066400000000000000000000007061503722002600235260ustar00rootroot00000000000000PNG  IHDR!@ PLTEtRNS@fkIDATx1jAQ8i()E~$v+W1zA_0 0 0 7 NXg# B  @*I. \@e pW,& L@e XY:L@rAԐ6rAԐ5sAvrAv*;+DGsAv+A]Zp`ł3/-x[ԇSF0k];0aq@A 6%khL@1< 4oHCYȕ8~ ["7]޵ygC;Wo}m|{nop {[  .@#B s/8r󜈈Go IENDB`DaemonEngine-crunch-ef4d32f/test/test-colormap4-alpha1.png000066400000000000000000000007461503722002600235340ustar00rootroot00000000000000PNG  IHDR\UPLTE,BtRNS@fIDATxAQP?fX5qS.EKg`;$I$I$I]S s#u+ tݯ@ﯨE/T4@_~@AT  Ph@4@0h(h :;`N97̒=:5` <+γ:1f5 u 0 0j\s 06  ɫew _L pߎ !#ʕZ7}@ţoل+V}׬oە+}oݖ+}oP=Boh'M,>T;b+W%ln:ޫ y)IENDB`DaemonEngine-crunch-ef4d32f/test/test-colormap8-alpha1.png000066400000000000000000000111661503722002600235360ustar00rootroot00000000000000PNG  IHDRkXTPLTE@N\tRNS<IDATxй A{jxC"nݍ B?|X(?Ňgr%G, ]v#;]9{@AaD7!, #A$oB/AKz #q gz tFAh:f+|n>~ [  [0XcBO_Vz&aZBYTQ`,c;t:IpL &sy>R^l-{`'h>5<ؕw:n+..|Ox C0ha|4=LޮPz=,VK]1ȫBX*傔JbZ%$Sj(0 Co7*@x:8dpPi@Jƌ_Lq]˲m;[r]'v૷ )K3>/Zv"DO"d2a[㈃̖[#iCEqAC<:κ/&L,UՃ TN[o߅^A_^.X,./#woWI9p69=6,_ |lL!Dш`#rMlc),ݯxEORt:C*T@lkzhVHp& ^P8_/gW.n?ƴ[#ϭ?2 j4l]rYr $ZTc~ϭSHT}-BY1->  X&xNZ p~g_// >Wzwuw7t3SEP Xh3=`p׬_o+ijSc"HA0&(𶬀ko^S46|yL>GE@l#T~\~yt+?'?e-,_^ȃP `f\͐P 8㷵Vc~^CAfb=m$ jNY_v 8?{66f^_7Z>4411!ҠQ%hy1A8CߝU~?WX9nO<ךDW-Ќ b*T_X<׹܂2qAatIdA_$p b`*8PRvſ,R-~O?>_fg6[0"и:9-^v dmon VVV% *a z{ܮΎw-no?9;9?7a?fqr”uTG H7. 4@ 4Ms ڇ)'ğٜ—66.UFIE͊ sS6X L  hp ߽GR @pxes8g=g//A/7d"A(@SF- =0yըbj~A%PG 4`HcȣU!P VNgS?߬?tf gL`)MIejX!-PS=6Jfbc^7gKb~ޠWN%L`bЏ(eSR&o+-PcHG&+JCo# AƊiɿ a)B ^[64|P|ut=Aݬh 3&Xz]|R&I,=XaOm.V!?%f˳$glFZ۶m6M*rW[3ܷg~ӿzրp H)@lxyԡr-xc8 ?ϟ5 0 78 ǦoM@_{|dQދ) gVR {`5?^ +H@{ A`+  \a~~Mh B)4$7mX,d# T@u`/ E1-,A-P` ^bvy z@8Y (L"<_g,@N Q p"A#OP_z p g 0q57My~ "X+@}5L`x~ uk8s\\?L_hP(p71`pp|^vh4 x~_%Xa&8ݬ]w@:1G@^`&k_[Ox 0`+; IdTq,g8ϯ~ d&A!cϲ.P g hrzJ r 2+ث遜 `}Z(Ll8'3V Y6؍:xL u ؂n0>D R\0oƀVo"bphۭ.5$҇; _v #"S n9W" @ϣMP.ny  1j J@ LTk+ZRF$ÉwlcR`E`ݺ (G2t10 !عgjh, Ц=;&?T)X o^@A@!@Qj`F`0p,7ρ~Tn B`^{gp@Q0Naᐌ-8GпA?2u|}1q< .I,} ahj06Kbxc Xq S%*XsCEJXՀ25ҁ:=BʼnJMRՁZ]҄z2bv\ZOP,Tl$+dJفj,llnpr,™tl‘v‰xېi,_'UO-h:a~ֲ[J^)y{S0A2$ 0(~(yu}~7u 8_SO q 1ǟ'#x%z1ǟJ{7Ϛhs>?i3Hx}|Ktϝh0af%@bo{nN0aY|3~| DЁYnRmEXXQt,vu O $dHIENDB`DaemonEngine-crunch-ef4d32f/test/test-grayscale1-alpha1.png000066400000000000000000000006371503722002600236660ustar00rootroot00000000000000PNG  IHDRt tRNSv8XIDATx0FQ@Уhr! 2] Cj~4bYeY&INp\Nf/ʳ/̿!2  p 3V@F#J7J蕈^tw (Pf3f@U4; l=ZQ߃i Aԁua ? ?f / P1HҞu+jj7Q!°0q )MWorzuN*΢h=jQޤXi^m %-5=EMU]em;y#IENDB`DaemonEngine-crunch-ef4d32f/test/test-grayscale1-alpha8.png000066400000000000000000000131301503722002600236650ustar00rootroot00000000000000PNG  IHDR{`IDATx1 .`@Q&DiIsARrW^+i J0`=$mͿЯ{B_2h{'qڡvA'S uЮh;ggz)nh+3[I}g!ݯ}+EM?P,@gQ` |k!+pAh>hefen,5h&KE'|yn7) h[YO;LM ֗@'SgHGA<*3HpOW"L?볖ZmȲP3>@5?g^IZЪk_ ͣhHAu(j/`=2;[HP׬Z?U0"9=l -, =T!Z|J}M̞:Z?+x n*_Ɍe>>cǏ[sVJ?^$p5 /=J7v @_¾iqHQ|%0` 6!@DgZ]t@ 3 |3|$<~It`OH/ ea0"Ųq ?~¿āKI<J7@T:D(@u,new7~nJ@ |%`2P$0|uI?Ot J-A0A/Epֵ~NH@x+;;O?Cf8CamԮT0X=.K*H uut)bo'|(pY8~xB8 .PcĀOw'tA11`cfݿځ"Rgn Z@ =P$7DaxƀY?ӯk_#߆G P EP hg" ̗t[~|ׅ~ppDJQ> ,K35y cY~tП%`+8_+@5qI-Gm iZg4ݺib\g7~_ ?@1 ?%`@t@"!@R/uHV 9^v|i}7)04TPAB~GP(v xi?!$SeR P  @Hǀm/I@CK )\A$5<zGÑ5 ~,c8zhc:ubzH H@ |+yDHtATJ XvQp? 4)_4 O}zfJu@-A =< 4,CLvQh7L@[ (t+ [@+_@4IF}yPi  aK8Y6@_d) : x՝*+ZZH0n k.1B6i=RO t XY<5CPs1_{4dl`{'`(EgYSP}[ # dp. ABNp1DZZ ІxJc*u@vB5Ν J8B]v2FPܛABv<GHX Cf,!hX4?폇 " "+b+b^%Q%Q5q5qEEWWž2ˢˢ#??2hTѨCٸƽ$Gl ӱ?<||%@+@ BvpC XZ+7`=B?Oڿqw@v ОJ@g@I*C`xk?>[ǀCPOB(((Gj=RV!0# ;WK?<; @; )]k$/~Do wD w ݠ;vrĀo{Ԡh`E ]Akwj]it66oLzT8;"@Ł; VEnړ9*L_9!&.:M¿>%<(b  ~g*٧2 C`wߎ1R0@x-{ ;惠@ 6<@~e w愢(T TZ _9@j0 T?o?h'OOo?PHԈ3NZyG`w?t/ vqUPF[X.:(w~^?M TR Hqw`R@z+T.c*/ DxGx =- x\LhO (qY2Уt%߀L/©g&PP?F&AR)p};0`ec<  Tb -144d( +~|Nvq˃d4S*ĩiЌi -Uh0h)?<sz4zMwF;K@xUg9nFE34mpsLk`d RhKA ָ+*,)OExcEmY,H1_gg}I܁}_ >C!K輂O%3c!9贂 >RE$U7>Egi; ̄ }XDO{N??;555] ! S7>_|MPH:. AO|nnnQx;7=I,V +J ">艾,W!9` `h(:g| ++̚j: \07>/z |}}}#! rX xonnnQ k@灏كDavަJ C*?D|Ix|ooo? @[PPo^)xJb n lD?::FN>:HP=PJ7?_9Oz“,@ t%-xq|'=~~~\*u~N @IK5^r յ t%ȇm?| 777)xI*8ا\ ۏ{> O<%j6_?EOx?<<< ABA/008"*?o| 9i+"a`X/?NM/oi+9T @6@-:U~ֿoz! I p jh |^>I|Jh!|?ן/z‹Y9kC CH@3?_'ῘoG@l}Q ꋈx@Plo|J%% @ Zֿ!|^$HGO@@w2hz O7`TS 5 ކ q~?/|}Rj(z0 \~% i?=љ9N%!]B~,`K 4\X@9~?='9"=6CK+l X ? O-@{!\7A@'8_"V^B0Ui=~J@ \lJkcjr@~'2sn/05d'نUS_?w򜃤` 8 aj l @hpt~A * 6%[w_uǍ&@O =V$EĀK 1(x( t` 5Lܺ ?S a۷H5"p; ]G d* C51@ 07nI b8PbL @Z. @_nS1I`-bT8@K~G?r` ,] "%1A@AL1 b = r"0F@08~_)3$9B_A8E \@L@ *0wP( JP3([ 03&hx B":03X@m`w@J`0P &T\(X`9D?, ! CQ@PQ (iv TDKd -5Ǻ=EMgU G] 'e |WoC|W>K6@؟j1 ?y3x?o DXWtve ?=! 矘@BONA*d!wyN?B3XM"4_p:tO?y7t*@"~n`h[boeC28IENDB`DaemonEngine-crunch-ef4d32f/test/test-rgb8-alpha8.png000066400000000000000000000250561503722002600225060ustar00rootroot00000000000000PNG  IHDR\rf)IDATxԁ @C @GxW tAǛ%0@'d+9; Am/$-e)2tYFd]_ ]h'*! ))[/I9 !FT= ވ7+˘Bi{u?^jY}ZzO9ޒb lu`a5|27B57l~ɵߡ{>1$ `˸A &^ޡ^t`G2xibZ6FMaWb1?r,x)VGl' 垾σxx S{Qo?w 0rd{+MLTXwb;s6u * rP ;d>+}P96x0GzfڒMw`ydc3~Z g'),""i5Ǧ32 B!4ï_ `,~XGa/6=3 9?Iy)߉2AJs+;vXl]&€]DM??2ׯ-+=a9=iZʵ9.IF3!ȱ ;;֋>{>eprr Ui 2>v+ 6o)~?vBف6%2o/ݟ ǀZ3rә mx#(jTPMPL X'KNXU(ás?B=!q( fr}^>F( ;r _Yq_}8 *S&)0[hG,xT"Yg9YN+8 : 5R3EA`c@D޽W> u88%:y|^̂2G('PhavwWM|~j\n]hu>ϓDYk.>:h&z# eY-c_f,` ?x|$0dzk:!x"Slwbwݷ9 5ιb ^`0t @ = 8a/c\' n\գ\"Wd DX@ֱWF@K?L"3Zؾ1_gFxkПz_{_FmO@' |?(=|!Bm e鶱 FZ<\89Ĵ^o&#|o Ů-s*{?t~b7X\ů=E`U;d] a~}KF&IZd/|p?E?|OM::I:AI&s Õ\хv_+Bw~p7{G;~۶[><鵀Ak]׾W4}/qn,g??^_BNZ (zzK""vDP I#K=&f`a3Ho|q}|-VJ)RE+Oc!>&A@b7\iV!)GG;pW<@/Gf'B0{ H"~,wT[{ܥ?pY!vn݁ò9J1)(|PRO?~|0<>9}/Aiow/A?7 8~З6W~luWKH Q\)GϋOa~^_ھ}?ch= _K{Qo׍q 3K=¸A? mfVÅ9@v'\,AU5>7 0o eׄ a e1Ss6PD@|/ю}ӂ!7-c~OQul;o3yI6NJ+w%zʕW~+P6^!# k ц& mݗm6<l)k`ƗS cŏ]A{ 3W6ZW2BPd n{e|lஉj;/p_>_GDh??gz[olhP Gܼ O^ǂcY s9.nenA JKN SQW&  "-m/" үٟe(xle}N_wfـ ݱRQ6( ]oͯCQLؤv5Hur2yWlWzsoz؊ϺeWBw DN̂ʣA96Ǿߠ쪽l(=3 T+~'Y@]Uk5ϕjϯsG{>ŎrfUѲXD,(<{'sPSپå=i{re*Ѳq{ "7:y2؜l _Cw\y^<ʋ IKS.A7MO#tUP }qp3Ny..l&"Nr@b*kz,]W?#":^ Uҕ[OdGPA+C@Pt8Aq>!=bҲBqQԲ11ŭ1"(\L>?xv .܊j)^W +;'~ſ[x6(!ܮEc>+tu`PGO|>@Pwuh_7ʴ(4 ѣ\ ]),w^ GOGkk~(Š 3^ 0D b .̜g. ~#mW5lltۜɄ{@z)ǥuMWk}x\cw~AleHnZ _ Oc?@/[C)h9xOSWŇ5XjQoYDG1AMr16HXk˗.VcM瞲չX[ms2FFCxY8U!&P2W.[I?iZj}2hU* "q;찣vaG8ÎF_? 0(_'x~<P1hƈI1.9=>RaW4,gE[/X|Fh7 V Gm<3Vz#rMMEurDzrwoom<}o}$Q+A4w}x[נz\2e.h]? oGDl-2W_ V?ρdjbl-g ݮ]-+dc*k5Ru-m&XEW~vQXw_ jk5|%ů;wu_3Gc~MbuN߃[p|KPUCv x~66P(OwN4niui;+u `z*Kt! m<09c[㱷jU16dB||/c{ ft=>Nb,͉9VeMkX*x~J|) 5DCik ot=\51׎~hb4ÅBThد`b<3C:'‰4݊SMP9+v6G^c`_/w&R. z!m189\1 [<0`o5urTX6ԉ䰀C+7=GŔO%ߕ=R֋ؑ f 'f Fg_v43AyH\OD4zJO,0&DNGI$0HpuF{#JWL QlDimiOKNz۷޸tڊ\E8t0\A3:L x!7̡HF.(|| deР1C#Wf1.R.| W(;y+hcOϔ&cR'^v+)y !^$FAcѹ7{ti5mZţ|_Yuú%Vr ~0TW\Gsxn/~W!^aI:U!Onv΅˕v y'G$6;r}σpAO`tl@ H^sK<_W?0]sQsQ"/Leڞ8{I p6_@_]KQ#B|[w=%H@0&OUH}<-J&br X̝9o'=1sهc&R,iX|, ]ɒ-WQ$QeUo`39$ߚ63'&!(|b-¶2s$+ƒꂯ |ɝ?hvٟ7r5VkA(+`(Nݲin }pIF">auZk\ uuG7>_0kN :ɗDaG[?;g";Ca۩T)qj+!Yqw-9:f.Z7{zXEEY\|XD\?)s?+C>dmT\V~;28dϘ*>]~]?JOțqz߃^+oPw|UFj _r廥c{ +l?7}?X%8_ @#ہ/m"g5vZΝ_*ƐRG|H = A 36"v+\CMIZ̦'{0lT=2vj\\_d("{{h:,|WBX&_`%2:ąw:pǃ9-(D{k&`淲_- (]vWH |緙9,r-' |<^~gl_|ؠx=I׆,cR&}ɯJ徿GUxRӲ?>?\ ;*Ѡ2A^%<[l$gL޿iO[6 Ms`н PmT5Pd>Q`fU\ UG?Zĭ_-xQ ?AQ 1_cKZ">H"r#- /YTHmޞVNcϧ?z?n-J>;yԀb$؉^ؽ~ >ȹCgb>$mHAu:߿m? :I"Ģ>8¯WΒ ZRs:dw$Pooc i<.۝#?#R!YN@z=d9b{b*{?P+@z0vEL)G `sxI|ekЋ/W\Ji|o-fGn_ ܫ FT ?b{&Ϭ3I3j zoJ+owaqS(z?xG*@J8W߿C<@ r(>ϐks-9_k{Oν7q@ TGY ߱rq1Z*17MLtx$T6^V{ yƘUj@iW ƥ kCZ5>%~97@4? NM,2O{C cT()žzŷb p ~`F՞B՟/Zwj?P$Om-?0\$^5G psF~= X@}Ks 3 x&b@I<:GV(s n}urw)znXs+U'@>79#Ld "Jx\!A&V*_ x_Xg!g2HJ%'va̱\gW dz?pKP P 0"B"Hw&9ϳ hA 9r&m n~ }0 h 2y~* br|9o>%ƛV`UXn{؀ j܊q.C$f_+@TЅ9 aq!,r{wcA[k xEPU0!UƮKFLջyu4窺CĽ*@௕fPж؟=6mv_h\Y1rM9:=ޒPh''|FοMgہkS/sd2naw ŵVՖ1i╾/ۖ0x 1 aW^HIDxh궚ϭW݅Ax zD@_υCM4ib_O5)[~9D~knM|`4 "y-j4{QYsaԠ*B#Zr_|uOLnHIƜ5(?85BGSkh fܨa+ wuy/k[yw* BA?  h{<_ ۹3?0j , ڗ .ʷgHR1gg~}$ NJzII!b^B/@S@L@ tPEw*ٞ\(=@2X06WB9/g@ڃdFH:FڅLE9@9VCwSc @@}JM@&th _P'BKrBO[ZbOژ;J/S/?u? >jBѾw&~wY~IS<wrcqgn,UdŊq$cFT5uH]"S PuG*k >CUx{xݫ_- P5{gȓ1~+6 Mͣ!t>*QBh}dݬiێKtvd6cZ M ;:O^7ؔ\0UuEJ A09eaxT3~'n`@,.\}؊;ofSD [#b,Z0R:jM;<:EӐr"j:0jthB7=zY/r;p.jEJ:Lfr=wZs'?է/~~_OGdq'gmΙC~ T!%:8R Cc GC@k͜M ]-(2b )?HI+ZRMՋ^W=8yQ9S)Srq۹ C_ ﺋw 7փ1s>MBAC#Ԣc̚ u5R;! \K9`B H9ᥠ[9cRLU+a>Vu0kLY^KV֒VA}.M7hK UY,8Xr6d1] +ebW,!:'RGFWKezW+bep@#{3m=Bw/W,-{b"a T#'w^S'?z:E^/VF-BJs1/ʢmXڮBʤ(-fX/19S]1ܡj^x{r5оB@ABqԪB&q5uӐk)#&vΈ|żw8~'_~W=.}ga9+PM7%GLJ7}rEq\_-U lA{eɝSBZ-r@#]2a "d@S_+mItWUFԠչ;"G\л}xqnzι57J!*X@c# )azX0{adz*0]+Gg0|;a Ѹwk킼D"D-Y$FF1+8Eb b]Z%![Āu׮2<S~Ղ*~_Nk~} s&LAm[Ȃ.tN`B%RY.&dkAD F#~|h xpOJK?AJQVb^^Co8B!ZKHl"prGl01أmFLvrKbAMI`L֒jF90;Uuj ATo-WL8s@#@Α˗//~}^W}d Y ҷ+yG_ay hOn3npWqg^ c9:cƗ#HS9q\cB f>![p @;{'P)c =_;GwE֌ Ĭt9|S=vo[҃a(R7ِ#S`""5M֡z!fcn> {uߔc=ȩm Xb9ie+ { ~'G.}y[e_"k(J{a8lΐ هrٍP0JF" sX; qyNUd!ܢ|/ߖz= LhVRe 7_M5|_'#[F@c7?~{0BCu8BӀ!fd-%Zfa?@ցLAhN`CAo@ )x^c76BME!jݐWG@dk1n&G>>+AIj5y Ar%3JK<*`[/>pCIG/`lGiK)'Ͷp𮗃)HO )QMQ]'VhΘ 놽 K!*Ntp&&Z@[n7a" l?#d"OڸSU7j\#%[H³ȭMcMpzB\M84LW`7zb`8)7TC!H|`rДqo[P֤ ٖjO \읁'D-,)m%(ðDm5"6[歰@4ze^$t-)9A"x~nŜ^L]IB;$Ò~~[B% .$ǸNT?UTxD(qs?z3گ ișD+CAѾLK1ƻhDSCzX3vƢ׏Q[ S ٟ0`EO0MR p P+7irOUw(Q9m i4\`Ij!&а_'HU`JCq*VX".yY}"斗K4 t0Zј G8g{m>N͚p a@lYQTEJX} 5I(|iuƆ$ ,]Us 7s*jb=UU<@N?Js [Lmn[lpzg&P Go{qp a1% Hptc}U-ΘH${2PJZȑ"y\R`<&TS@mp:B&do karE5(Y!RL锴7$UY=qvbgL̮Đe;)!L&-H8z{05s00bf{> 1v8y{-(S&z֪<ݯ񊷿ݣ#,6s5EU8]b1D tvm[Y?)Ү sF/?8~{8W!;`sAD7;A`TK65Z299ft4" * ",(3Lj6O+<A$7 ^L~ E)oqSDĠ9b+7pGh#\> Z.R/RmΠC=Eϩe7cptȸ[RǖA^?0 )D19aDܪU2v؜WÏ<.@0b!'T3)%r lgP@'UXbZB2CbvHW n#+h8qSƖr5rU |K%#Fzp wsp!U@%Ӭo_<ʖ/j3 YAnL2% bٽnC.҆@v!52!"z4ejqRj~( aC{pX=cRpT)Z x^om-A)l95ZcWYۚbATYpWp/S5~U[섉d=ҜMET356'J1Licqk'|Lg<US oؽ{=/wgQ#Đ}}鿔E/ÈӟEA|9d0!@+0"CېÐIY쌆ZLK.JL(+͖Ba `XlX