simd-adler32-0.3.7/.cargo_vcs_info.json0000644000000001360000000000100132310ustar { "git": { "sha1": "94f3f72eb7346d9e2bfe7e985a496582da409a63" }, "path_in_vcs": "" }simd-adler32-0.3.7/.github/workflows/build.yaml000064400000000000000000000121711046102023000174030ustar 00000000000000name: build on: push: branches-ignore: [main] pull_request: branches: [main] jobs: clippy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v1 - uses: actions-rs/toolchain@v1 with: toolchain: nightly components: clippy override: true - uses: actions-rs/clippy-check@v1 with: token: ${{ secrets.GITHUB_TOKEN }} args: --all-features test-doc: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Install toolchain uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: nightly override: true - uses: Swatinem/rust-cache@v1 - run: cargo install cargo-deadlinks - name: doc env: RUSTDOCFLAGS: --cfg doc_cfg run: cargo deadlinks --ignore-fragments -- --all test: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: include: - os: ubuntu-latest target: x86_64-unknown-linux-gnu toolchain: stable - os: macos-latest target: x86_64-apple-darwin toolchain: stable # TODO: also aarch64 / M1 - os: windows-latest target: x86_64-pc-windows-gnu toolchain: stable - os: windows-latest target: x86_64-pc-windows-msvc toolchain: beta # Test both windows-gnu and windows-msvc; use beta rust on one - os: ubuntu-latest deps: sudo apt-get update ; sudo apt install gcc-multilib target: i686-unknown-linux-gnu toolchain: nightly - os: ubuntu-latest target: x86_64-unknown-linux-gnu toolchain: nightly variant: minimal_versions steps: - uses: actions/checkout@v2 - name: Install toolchain uses: actions-rs/toolchain@v1 with: profile: minimal target: ${{ matrix.target }} toolchain: ${{ matrix.toolchain }} override: true - uses: Swatinem/rust-cache@v1 - run: ${{ matrix.deps }} - name: Maybe minimal versions if: ${{ matrix.variant == 'minimal_versions' }} run: cargo generate-lockfile -Z minimal-versions - name: Test run: | cargo test --lib --target ${{ matrix.target }} ${{ matrix.toolchain == 'nightly' && '--features=nightly' || '' }} env: # Enables address sanitization if supported target and nightly. RUSTFLAGS: | ${{ ( matrix.toolchain == 'nightly' && ( matrix.target == 'aarch64-apple-darwin' || matrix.target == 'aarch64-fuchsia' || matrix.target == 'aarch64-unknown-linux-gnu' || matrix.target == 'x86_64-apple-darwin' || matrix.target == 'x86_64-fuchsia' || matrix.target == 'x86_64-unknown-freebsd' || matrix.target == 'x86_64-unknown-linux-gnu' ) ) && '-Z sanitizer=address' || '' }} test-cross: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: include: - os: ubuntu-latest target: mips-unknown-linux-gnu toolchain: stable - os: ubuntu-latest target: arm-unknown-linux-gnueabi toolchain: stable - os: ubuntu-latest target: aarch64-unknown-linux-gnu toolchain: stable - os: ubuntu-latest target: arm-unknown-linux-gnueabi toolchain: nightly - os: ubuntu-latest target: aarch64-unknown-linux-gnu toolchain: nightly steps: - uses: actions/checkout@v2 - name: Install toolchain uses: actions-rs/toolchain@v1 with: profile: minimal target: ${{ matrix.target }} toolchain: ${{ matrix.toolchain }} override: true - uses: Swatinem/rust-cache@v1 - name: Install cross run: cargo install cross || true - name: Test run: | cross test --no-fail-fast --target ${{ matrix.target }} ${{ matrix.toolchain == 'nightly' && '--features=nightly' || '' }} test-msrv: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Install toolchain uses: actions-rs/toolchain@v1 with: target: x86_64-unknown-linux-gnu toolchain: 1.36.0 # MSRV - uses: Swatinem/rust-cache@v1 - name: Test run: cargo test --target=x86_64-unknown-linux-gnu --no-default-features --features=std test-no-std: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Install toolchain uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: nightly target: thumbv6m-none-eabi override: true - uses: Swatinem/rust-cache@v1 - name: Build top-level only run: cargo build --target=thumbv6m-none-eabi --no-default-features simd-adler32-0.3.7/.github/workflows/fuzz.yaml000064400000000000000000000025461046102023000173070ustar 00000000000000name: fuzz on: schedule: # Run every 24h - cron: "0 0 * * *" defaults: run: working-directory: fuzz jobs: fuzz: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest] target: [avx2, sse2, ssse3] steps: - uses: actions/checkout@v2 - name: setup uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: nightly override: true - uses: Swatinem/rust-cache@v1 - run: cargo install cargo-fuzz - name: Download corpus artifact run: | wget https://nightly.link/mcountryman/simd-adler32/workflows/fuzz.yaml/main/corpus.zip mkdir -p corpus unzip corpus.zip -d corpus # keep going if artifact doesn't exist continue-on-error: true - name: Run fuzz test (30m) run: | cargo fuzz run ${{ matrix.target }} -- -max_total_time=1800 - name: Archive artifacts uses: actions/upload-artifact@v2 if: failure() with: name: artifacts path: fuzz/artifacts # big decision. do we store corpus from failed jobs? for now we don't.. - uses: actions/upload-artifact@v2 with: name: corpus path: | fuzz/corpus/ fuzz/corpus/${{ matrix.target }} simd-adler32-0.3.7/.github/workflows/publish.yaml000064400000000000000000000014271046102023000177540ustar 00000000000000name: publish on: push: branches: [main] jobs: publish: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Configure git user run: | git config user.name "GitHub Actions" git config user.email noreply@github.com - name: Install cargo-release uses: actions-rs/install@v0.1 with: crate: cargo-release version: latest - name: Publish run: | cargo login ${{ secrets.CRATES_IO_TOKEN }} cargo release --no-confirm --no-push -x - name: Push changes uses: ad-m/github-push-action@master with: tags: true force: true branch: ${{ github.ref }} github_token: ${{ secrets.GITHUB_TOKEN }} simd-adler32-0.3.7/.gitignore000064400000000000000000000001151046102023000140060ustar 00000000000000# editor .idea/ .vscode/ # fuzz fuzz/artifacts fuzz/corpus # cargo target/ simd-adler32-0.3.7/.rustfmt.toml000064400000000000000000000000351046102023000144760ustar 00000000000000max_width = 90 tab_spaces = 2simd-adler32-0.3.7/CHANGELOG.md000064400000000000000000000003501046102023000136300ustar 00000000000000# Changelog ## 0.3.3 - 2021-04-14 ### Features - **from_checksum**: add `Adler32::from_checksum` ### Performance Improvements - **scalar**: improve scalar performance by 90-600% - Defer modulo until right before u16 overflow simd-adler32-0.3.7/Cargo.toml0000644000000025150000000000100112320ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "simd-adler32" version = "0.3.7" authors = ["Marvin Countryman "] exclude = ["bench"] description = "A SIMD-accelerated Adler-32 hash algorithm implementation." readme = "README.md" keywords = [ "simd", "avx2", "ssse3", "adler", "adler32", ] categories = [ "algorithms", "no-std", ] license = "MIT" repository = "https://github.com/mcountryman/simd-adler32" [profile.release] opt-level = 2 debug = 2 [[bench]] name = "alts" path = "bench/alts.rs" harness = false [[bench]] name = "variants" path = "bench/variants.rs" harness = false [dev-dependencies.adler] version = "1.0.2" [dev-dependencies.adler32] version = "1.2.0" [dev-dependencies.criterion] version = "0.3" [dev-dependencies.rand] version = "0.8" [features] const-generics = [] default = [ "std", "const-generics", ] nightly = [] std = [] simd-adler32-0.3.7/Cargo.toml.orig000064400000000000000000000013561046102023000147150ustar 00000000000000[package] name = "simd-adler32" authors = ["Marvin Countryman "] license = "MIT" version = "0.3.7" edition = "2018" keywords = ["simd", "avx2", "ssse3", "adler", "adler32"] categories = ["algorithms", "no-std"] repository = "https://github.com/mcountryman/simd-adler32" description = "A SIMD-accelerated Adler-32 hash algorithm implementation." exclude = ["bench"] [profile.release] debug = true opt-level = 2 [[bench]] name = "alts" path = "bench/alts.rs" harness = false [[bench]] name = "variants" path = "bench/variants.rs" harness = false [features] default = ["std", "const-generics"] std = [] nightly = [] const-generics = [] [dev-dependencies] rand = "0.8" criterion = "0.3" # competition adler = "1.0.2" adler32 = "1.2.0" simd-adler32-0.3.7/LICENSE.md000064400000000000000000000020661046102023000134310ustar 00000000000000MIT License Copyright (c) [2021] [Marvin Countryman] Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. simd-adler32-0.3.7/README.md000064400000000000000000000106671046102023000133120ustar 00000000000000

simd-adler32

docs.rs badge crates.io badge mit license badge

A SIMD-accelerated Adler-32 hash algorithm implementation. ## Features - No dependencies - Support `no_std` (with `default-features = false`) - Runtime CPU feature detection (when `std` enabled) - Blazing fast performance on as many targets as possible (currently only x86 and x86_64) - Default to scalar implementation when simd not available ## Quick start > Cargo.toml ```toml [dependencies] simd-adler32 = "*" ``` > example.rs ```rust use simd_adler32::Adler32; let mut adler = Adler32::new(); adler.write(b"rust is pretty cool, man"); let hash = adler.finish(); println!("{}", hash); // 1921255656 ``` ## Support **CPU Features** | impl | arch | feature | | ---- | ---------------- | ------- | | βœ… | `x86`, `x86_64` | avx512 | | βœ… | `x86`, `x86_64` | avx2 | | βœ… | `x86`, `x86_64` | ssse3 | | βœ… | `x86`, `x86_64` | sse2 | | 🚧 | `arm`, `aarch64` | neon | | βœ… | `wasm32` | simd128 | **MSRV** `1.36.0`\*\* Minimum supported rust version is tested before a new version is published. [**] Feature `const-generics` needs to disabled to build on rustc versions `<1.51` which can be done by updating your dependency definition to the following. > Cargo.toml ```toml [dependencies] simd-adler32 = { version "*", default-features = false, features = ["std"] } ``` ## Performance Benchmarks listed display number of randomly generated bytes (10k / 100k) and library name. Benchmarks sources can be found under the [bench](/bench) directory. Crates used for comparison are [adler](https://crates.io/crates/adler) and [adler32](https://crates.io/crates/adler32). > Windows 10 Pro - Intel i5-8300H @ 2.30GHz | name | avg. time | avg. thrpt | | ----------------------- | --------------- | ------------------ | | **10k/simd-adler32** | **212.61 ns** | **43.805 GiB/s** | | 10k/wuffs | 3843 ns | 2.63 GiB/s\* | | 10k/adler32 | 4.8084 us | 1.9369 GiB/s | | 10k/adler | 17.979 us | 530.43 MiB/s | | ----------------------- | --------------- | ------------------ | | **100k/simd-adler32** | **2.7951 us** | **33.320 GiB/s** | | 100k/wuffs | 34733 ns | 2.6814 GiB/s\* | | 100k/adler32 | 48.488 us | 1.9207 GiB/s | | 100k/adler | 178.36 us | 534.69 MiB/s | \* wuffs ran using mingw64/gcc, ran with `wuffs bench -ccompilers=gcc -reps=1 -iterscale=300 std/adler32`. > MacBookPro16,1 - Intel i9-9880H CPU @ 2.30GHz | name | avg. time | avg. thrpt | | ----------------------- | --------------- | ------------------ | | **10k/simd-adler32** | **200.37 ns** | **46.480 GiB/s** | | 10k/adler32 | 4.1516 us | 2.2433 GiB/s | | 10k/adler | 10.220 us | 933.15 MiB/s | | ----------------------- | --------------- | ------------------ | | **100k/simd-adler32** | **2.3282 us** | **40.003 GiB/s** | | 100k/adler32 | 41.130 us | 2.2643 GiB/s | | 100k/adler | 83.776 us | 534.69 MiB/s | ## Safety This crate contains a significant amount of `unsafe` code due to the requirement of `unsafe` for simd intrinsics. Fuzzing is done on release and debug builds prior to publishing via `afl`. Fuzzy tests can be found under [fuzz](/fuzz) the directory. ## Resources - [LICENSE](./LICENSE.md) - MIT - [CHANGELOG](./CHANGELOG.md) ## Credits Thank you to the contributors of the following projects. - [adler](https://github.com/jonas-schievink/adler) - [adler32](https://github.com/remram44/adler32-rs) - [crc32fast](https://github.com/srijs/rust-crc32fast) - [wuffs](https://github.com/google/wuffs) - [chromium](https://bugs.chromium.org/p/chromium/issues/detail?id=762564) - [zlib](https://zlib.net/) ## Contributing Feel free to submit a issue or pull request. :smile: simd-adler32-0.3.7/src/hash.rs000064400000000000000000000030141046102023000140770ustar 00000000000000use crate::{Adler32, Adler32Hash}; impl Adler32Hash for &[u8] { fn hash(&self) -> u32 { let mut hash = Adler32::new(); hash.write(self); hash.finish() } } impl Adler32Hash for &str { fn hash(&self) -> u32 { let mut hash = Adler32::new(); hash.write(self.as_bytes()); hash.finish() } } #[cfg(feature = "const-generics")] impl Adler32Hash for [u8; SIZE] { fn hash(&self) -> u32 { let mut hash = Adler32::new(); hash.write(self); hash.finish() } } macro_rules! array_impl { ($s:expr, $($size:expr),+) => { array_impl!($s); $(array_impl!{$size})* }; ($size:expr) => { #[cfg(not(feature = "const-generics"))] impl Adler32Hash for [u8; $size] { fn hash(&self) -> u32 { let mut hash = Adler32::new(); hash.write(self); hash.finish() } } }; } array_impl!( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 1024, 1024 * 1024, 1024 * 1024 * 1024, 2048, 4096 ); simd-adler32-0.3.7/src/imp/avx2.rs000064400000000000000000000120751046102023000146300ustar 00000000000000use super::Adler32Imp; /// Resolves update implementation if CPU supports avx2 instructions. pub fn get_imp() -> Option { get_imp_inner() } #[inline] #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))] fn get_imp_inner() -> Option { if std::is_x86_feature_detected!("avx2") { Some(imp::update) } else { None } } #[inline] #[cfg(all( target_feature = "avx2", not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) ))] fn get_imp_inner() -> Option { Some(imp::update) } #[inline] #[cfg(all( not(target_feature = "avx2"), not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) ))] fn get_imp_inner() -> Option { None } #[cfg(all( any(target_arch = "x86", target_arch = "x86_64"), any(feature = "std", target_feature = "avx2") ))] mod imp { const MOD: u32 = 65521; const NMAX: usize = 5552; const BLOCK_SIZE: usize = 32; const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; #[cfg(target_arch = "x86")] use core::arch::x86::*; #[cfg(target_arch = "x86_64")] use core::arch::x86_64::*; pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { unsafe { update_imp(a, b, data) } } #[inline] #[target_feature(enable = "avx2")] unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) { let mut a = a as u32; let mut b = b as u32; let chunks = data.chunks_exact(CHUNK_SIZE); let remainder = chunks.remainder(); for chunk in chunks { update_chunk_block(&mut a, &mut b, chunk); } update_block(&mut a, &mut b, remainder); (a as u16, b as u16) } #[inline] unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { debug_assert_eq!( chunk.len(), CHUNK_SIZE, "Unexpected chunk size (expected {}, got {})", CHUNK_SIZE, chunk.len() ); reduce_add_blocks(a, b, chunk); *a %= MOD; *b %= MOD; } #[inline] unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { debug_assert!( chunk.len() <= CHUNK_SIZE, "Unexpected chunk size (expected <= {}, got {})", CHUNK_SIZE, chunk.len() ); for byte in reduce_add_blocks(a, b, chunk) { *a += *byte as u32; *b += *a; } *a %= MOD; *b %= MOD; } #[inline(always)] unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] { if chunk.len() < BLOCK_SIZE { return chunk; } let blocks = chunk.chunks_exact(BLOCK_SIZE); let blocks_remainder = blocks.remainder(); let one_v = _mm256_set1_epi16(1); let zero_v = _mm256_setzero_si256(); let weights = get_weights(); let mut p_v = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, (*a * blocks.len() as u32) as _); let mut a_v = _mm256_setzero_si256(); let mut b_v = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, *b as _); for block in blocks { let block_ptr = block.as_ptr() as *const _; let block = _mm256_loadu_si256(block_ptr); p_v = _mm256_add_epi32(p_v, a_v); a_v = _mm256_add_epi32(a_v, _mm256_sad_epu8(block, zero_v)); let mad = _mm256_maddubs_epi16(block, weights); b_v = _mm256_add_epi32(b_v, _mm256_madd_epi16(mad, one_v)); } b_v = _mm256_add_epi32(b_v, _mm256_slli_epi32(p_v, 5)); *a += reduce_add(a_v); *b = reduce_add(b_v); blocks_remainder } #[inline(always)] unsafe fn reduce_add(v: __m256i) -> u32 { let sum = _mm_add_epi32(_mm256_castsi256_si128(v), _mm256_extracti128_si256(v, 1)); let hi = _mm_unpackhi_epi64(sum, sum); let sum = _mm_add_epi32(hi, sum); let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1)); let sum = _mm_add_epi32(sum, hi); _mm_cvtsi128_si32(sum) as _ } #[inline(always)] unsafe fn get_weights() -> __m256i { _mm256_set_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ) } } #[cfg(test)] mod tests { use rand::Rng; #[test] fn zeroes() { assert_sum_eq(&[]); assert_sum_eq(&[0]); assert_sum_eq(&[0, 0]); assert_sum_eq(&[0; 100]); assert_sum_eq(&[0; 1024]); assert_sum_eq(&[0; 1024 * 1024]); } #[test] fn ones() { assert_sum_eq(&[]); assert_sum_eq(&[1]); assert_sum_eq(&[1, 1]); assert_sum_eq(&[1; 100]); assert_sum_eq(&[1; 1024]); assert_sum_eq(&[1; 1024 * 1024]); } #[test] fn random() { let mut random = [0; 1024 * 1024]; rand::thread_rng().fill(&mut random[..]); assert_sum_eq(&random[..1]); assert_sum_eq(&random[..100]); assert_sum_eq(&random[..1024]); assert_sum_eq(&random[..1024 * 1024]); } /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. #[test] fn wiki() { assert_sum_eq(b"Wikipedia"); } fn assert_sum_eq(data: &[u8]) { if let Some(update) = super::get_imp() { let (a, b) = update(1, 0, data); let left = u32::from(b) << 16 | u32::from(a); let right = adler::adler32_slice(data); assert_eq!(left, right, "len({})", data.len()); } } } simd-adler32-0.3.7/src/imp/avx512.rs000064400000000000000000000135731046102023000150020ustar 00000000000000use super::Adler32Imp; /// Resolves update implementation if CPU supports avx512f and avx512bw instructions. pub fn get_imp() -> Option { get_imp_inner() } #[inline] #[cfg(all( feature = "std", feature = "nightly", any(target_arch = "x86", target_arch = "x86_64") ))] fn get_imp_inner() -> Option { let has_avx512f = std::is_x86_feature_detected!("avx512f"); let has_avx512bw = std::is_x86_feature_detected!("avx512bw"); if has_avx512f && has_avx512bw { Some(imp::update) } else { None } } #[inline] #[cfg(all( feature = "nightly", all(target_feature = "avx512f", target_feature = "avx512bw"), not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) ))] fn get_imp_inner() -> Option { Some(imp::update) } #[inline] #[cfg(all( not(all(feature = "nightly", target_feature = "avx512f", target_feature = "avx512bw")), not(all( feature = "std", feature = "nightly", any(target_arch = "x86", target_arch = "x86_64") )) ))] fn get_imp_inner() -> Option { None } #[cfg(all( feature = "nightly", any(target_arch = "x86", target_arch = "x86_64"), any( feature = "std", all(target_feature = "avx512f", target_feature = "avx512bw") ) ))] mod imp { const MOD: u32 = 65521; const NMAX: usize = 5552; const BLOCK_SIZE: usize = 64; const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; #[cfg(target_arch = "x86")] use core::arch::x86::*; #[cfg(target_arch = "x86_64")] use core::arch::x86_64::*; pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { unsafe { update_imp(a, b, data) } } #[inline] #[target_feature(enable = "avx512f")] #[target_feature(enable = "avx512bw")] unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) { let mut a = a as u32; let mut b = b as u32; let chunks = data.chunks_exact(CHUNK_SIZE); let remainder = chunks.remainder(); for chunk in chunks { update_chunk_block(&mut a, &mut b, chunk); } update_block(&mut a, &mut b, remainder); (a as u16, b as u16) } #[inline] unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { debug_assert_eq!( chunk.len(), CHUNK_SIZE, "Unexpected chunk size (expected {}, got {})", CHUNK_SIZE, chunk.len() ); reduce_add_blocks(a, b, chunk); *a %= MOD; *b %= MOD; } #[inline] unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { debug_assert!( chunk.len() <= CHUNK_SIZE, "Unexpected chunk size (expected <= {}, got {})", CHUNK_SIZE, chunk.len() ); for byte in reduce_add_blocks(a, b, chunk) { *a += *byte as u32; *b += *a; } *a %= MOD; *b %= MOD; } #[inline(always)] unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] { if chunk.len() < BLOCK_SIZE { return chunk; } let blocks = chunk.chunks_exact(BLOCK_SIZE); let blocks_remainder = blocks.remainder(); let one_v = _mm512_set1_epi16(1); let zero_v = _mm512_setzero_si512(); let weights = get_weights(); let p_v = (*a * blocks.len() as u32) as _; let mut p_v = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, p_v); let mut a_v = _mm512_setzero_si512(); let mut b_v = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, *b as _); for block in blocks { let block_ptr = block.as_ptr() as *const _; let block = _mm512_loadu_si512(block_ptr); p_v = _mm512_add_epi32(p_v, a_v); a_v = _mm512_add_epi32(a_v, _mm512_sad_epu8(block, zero_v)); let mad = _mm512_maddubs_epi16(block, weights); b_v = _mm512_add_epi32(b_v, _mm512_madd_epi16(mad, one_v)); } b_v = _mm512_add_epi32(b_v, _mm512_slli_epi32(p_v, 6)); *a += reduce_add(a_v); *b = reduce_add(b_v); blocks_remainder } #[inline(always)] unsafe fn reduce_add(v: __m512i) -> u32 { let v: [__m256i; 2] = core::mem::transmute(v); reduce_add_256(v[0]) + reduce_add_256(v[1]) } #[inline(always)] unsafe fn reduce_add_256(v: __m256i) -> u32 { let v: [__m128i; 2] = core::mem::transmute(v); let sum = _mm_add_epi32(v[0], v[1]); let hi = _mm_unpackhi_epi64(sum, sum); let sum = _mm_add_epi32(hi, sum); let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1)); let sum = _mm_add_epi32(sum, hi); let sum = _mm_cvtsi128_si32(sum) as _; sum } #[inline(always)] unsafe fn get_weights() -> __m512i { _mm512_set_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, ) } } #[cfg(test)] mod tests { use rand::Rng; #[test] fn zeroes() { assert_sum_eq(&[]); assert_sum_eq(&[0]); assert_sum_eq(&[0, 0]); assert_sum_eq(&[0; 100]); assert_sum_eq(&[0; 1024]); assert_sum_eq(&[0; 1024 * 1024]); } #[test] fn ones() { assert_sum_eq(&[]); assert_sum_eq(&[1]); assert_sum_eq(&[1, 1]); assert_sum_eq(&[1; 100]); assert_sum_eq(&[1; 1024]); assert_sum_eq(&[1; 1024 * 1024]); } #[test] fn random() { let mut random = [0; 1024 * 1024]; rand::thread_rng().fill(&mut random[..]); assert_sum_eq(&random[..1]); assert_sum_eq(&random[..100]); assert_sum_eq(&random[..1024]); assert_sum_eq(&random[..1024 * 1024]); } /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. #[test] fn wiki() { assert_sum_eq(b"Wikipedia"); } fn assert_sum_eq(data: &[u8]) { if let Some(update) = super::get_imp() { let (a, b) = update(1, 0, data); let left = u32::from(b) << 16 | u32::from(a); let right = adler::adler32_slice(data); assert_eq!(left, right, "len({})", data.len()); } } } simd-adler32-0.3.7/src/imp/mod.rs000064400000000000000000000007611046102023000145260ustar 00000000000000pub mod avx2; pub mod avx512; pub mod scalar; pub mod sse2; pub mod ssse3; pub mod wasm; pub type Adler32Imp = fn(u16, u16, &[u8]) -> (u16, u16); #[inline] #[allow(non_snake_case)] pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 { ((z << 6) | (y << 4) | (x << 2) | w) as i32 } pub fn get_imp() -> Adler32Imp { avx512::get_imp() .or_else(avx2::get_imp) .or_else(ssse3::get_imp) .or_else(sse2::get_imp) .or_else(wasm::get_imp) .unwrap_or(scalar::update) } simd-adler32-0.3.7/src/imp/neon.rs000064400000000000000000000134121046102023000147030ustar 00000000000000use super::Adler32Imp; /// Resolves update implementation if CPU supports avx512f and avx512bw instructions. pub fn get_imp() -> Option { get_imp_inner() } #[inline] #[cfg(all(feature = "std", feature = "nightly", target_arch = "arm"))] fn get_imp_inner() -> Option { if std::is_arm_feature_detected("neon") { Some(imp::update) } else { None } } #[inline] #[cfg(all(feature = "std", feature = "nightly", target_arch = "aarch64"))] fn get_imp_inner() -> Option { if std::is_aarch64_feature_detected("neon") { Some(imp::update) } else { None } } #[inline] #[cfg(all( feature = "nightly", target_feature = "neon", not(all(feature = "std", any(target_arch = "arm", target_arch = "aarch64"))) ))] fn get_imp_inner() -> Option { Some(imp::update) } #[inline] #[cfg(all( not(target_feature = "neon"), not(all( feature = "std", feature = "nightly", any(target_arch = "arm", target_arch = "aarch64") )) ))] fn get_imp_inner() -> Option { None } #[cfg(all( feature = "nightly", any(target_arch = "arm", target_arch = "aarch64"), any(feature = "std", target_feature = "neon") ))] mod imp { const MOD: u32 = 65521; const NMAX: usize = 5552; const BLOCK_SIZE: usize = 64; const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; #[cfg(target_arch = "aarch64")] use core::arch::aarch64::*; #[cfg(target_arch = "arm")] use core::arch::arm::*; pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { unsafe { update_imp(a, b, data) } } #[inline] #[target_feature(enable = "neon")] unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) { let mut a = a as u32; let mut b = b as u32; let chunks = data.chunks_exact(CHUNK_SIZE); let remainder = chunks.remainder(); for chunk in chunks { update_chunk_block(&mut a, &mut b, chunk); } update_block(&mut a, &mut b, remainder); (a as u16, b as u16) } #[inline] unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { debug_assert_eq!( chunk.len(), CHUNK_SIZE, "Unexpected chunk size (expected {}, got {})", CHUNK_SIZE, chunk.len() ); reduce_add_blocks(a, b, chunk); *a %= MOD; *b %= MOD; } #[inline] unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { debug_assert!( chunk.len() <= CHUNK_SIZE, "Unexpected chunk size (expected <= {}, got {})", CHUNK_SIZE, chunk.len() ); for byte in reduce_add_blocks(a, b, chunk) { *a += *byte as u32; *b += *a; } *a %= MOD; *b %= MOD; } #[inline(always)] unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] { if chunk.len() < BLOCK_SIZE { return chunk; } let blocks = chunk.chunks_exact(BLOCK_SIZE); let blocks_remainder = blocks.remainder(); let one_v = _mm512_set1_epi16(1); let zero_v = _mm512_setzero_si512(); let weights = get_weights(); let p_v = (*a * blocks.len() as u32) as _; let mut p_v = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, p_v); let mut a_v = _mm512_setzero_si512(); let mut b_v = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, *b as _); for block in blocks { let block_ptr = block.as_ptr() as *const _; let block = _mm512_loadu_si512(block_ptr); p_v = _mm512_add_epi32(p_v, a_v); a_v = _mm512_add_epi32(a_v, _mm512_sad_epu8(block, zero_v)); let mad = _mm512_maddubs_epi16(block, weights); b_v = _mm512_add_epi32(b_v, _mm512_madd_epi16(mad, one_v)); } b_v = _mm512_add_epi32(b_v, _mm512_slli_epi32(p_v, 6)); *a += reduce_add(a_v); *b = reduce_add(b_v); blocks_remainder } #[inline(always)] unsafe fn reduce_add(v: __m512i) -> u32 { let v: [__m256i; 2] = core::mem::transmute(v); reduce_add_256(v[0]) + reduce_add_256(v[1]) } #[inline(always)] unsafe fn reduce_add_256(v: __m256i) -> u32 { let v: [__m128i; 2] = core::mem::transmute(v); let sum = _mm_add_epi32(v[0], v[1]); let hi = _mm_unpackhi_epi64(sum, sum); let sum = _mm_add_epi32(hi, sum); let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1)); let sum = _mm_add_epi32(sum, hi); let sum = _mm_cvtsi128_si32(sum) as _; sum } #[inline(always)] unsafe fn get_weights() -> __m512i { _mm512_set_epi8( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, ) } } #[cfg(test)] mod tests { use rand::Rng; #[test] fn zeroes() { assert_sum_eq(&[]); assert_sum_eq(&[0]); assert_sum_eq(&[0, 0]); assert_sum_eq(&[0; 100]); assert_sum_eq(&[0; 1024]); assert_sum_eq(&[0; 1024 * 1024]); } #[test] fn ones() { assert_sum_eq(&[]); assert_sum_eq(&[1]); assert_sum_eq(&[1, 1]); assert_sum_eq(&[1; 100]); assert_sum_eq(&[1; 1024]); assert_sum_eq(&[1; 1024 * 1024]); } #[test] fn random() { let mut random = [0; 1024 * 1024]; rand::thread_rng().fill(&mut random[..]); assert_sum_eq(&random[..1]); assert_sum_eq(&random[..100]); assert_sum_eq(&random[..1024]); assert_sum_eq(&random[..1024 * 1024]); } /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. #[test] fn wiki() { assert_sum_eq(b"Wikipedia"); } fn assert_sum_eq(data: &[u8]) { if let Some(update) = super::get_imp() { let (a, b) = update(1, 0, data); let left = u32::from(b) << 16 | u32::from(a); let right = adler::adler32_slice(data); assert_eq!(left, right, "len({})", data.len()); } } } simd-adler32-0.3.7/src/imp/scalar.rs000064400000000000000000000026731046102023000152200ustar 00000000000000const MOD: u32 = 65521; const NMAX: usize = 5552; pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { let mut a = a as u32; let mut b = b as u32; let chunks = data.chunks_exact(NMAX); let remainder = chunks.remainder(); for chunk in chunks { for byte in chunk { a = a.wrapping_add(*byte as _); b = b.wrapping_add(a); } a %= MOD; b %= MOD; } for byte in remainder { a = a.wrapping_add(*byte as _); b = b.wrapping_add(a); } a %= MOD; b %= MOD; (a as u16, b as u16) } #[cfg(test)] mod tests { #[test] fn zeroes() { assert_eq!(adler32(&[]), 1); assert_eq!(adler32(&[0]), 1 | 1 << 16); assert_eq!(adler32(&[0, 0]), 1 | 2 << 16); assert_eq!(adler32(&[0; 100]), 0x00640001); assert_eq!(adler32(&[0; 1024]), 0x04000001); assert_eq!(adler32(&[0; 1024 * 1024]), 0x00f00001); } #[test] fn ones() { assert_eq!(adler32(&[0xff; 1024]), 0x79a6fc2e); assert_eq!(adler32(&[0xff; 1024 * 1024]), 0x8e88ef11); } #[test] fn mixed() { assert_eq!(adler32(&[1]), 2 | 2 << 16); assert_eq!(adler32(&[40]), 41 | 41 << 16); assert_eq!(adler32(&[0xA5; 1024 * 1024]), 0xd5009ab1); } /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. #[test] fn wiki() { assert_eq!(adler32(b"Wikipedia"), 0x11E60398); } fn adler32(data: &[u8]) -> u32 { let (a, b) = super::update(1, 0, data); u32::from(b) << 16 | u32::from(a) } } simd-adler32-0.3.7/src/imp/sse2.rs000064400000000000000000000130641046102023000146230ustar 00000000000000use super::Adler32Imp; /// Resolves update implementation if CPU supports sse2 instructions. pub fn get_imp() -> Option { get_imp_inner() } #[inline] #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))] fn get_imp_inner() -> Option { if std::is_x86_feature_detected!("sse2") { Some(imp::update) } else { None } } #[inline] #[cfg(all( target_feature = "sse2", not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) ))] fn get_imp_inner() -> Option { Some(imp::update) } #[inline] #[cfg(all( not(target_feature = "sse2"), not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) ))] fn get_imp_inner() -> Option { None } #[cfg(all( any(target_arch = "x86", target_arch = "x86_64"), any(feature = "std", target_feature = "sse2") ))] mod imp { const MOD: u32 = 65521; const NMAX: usize = 5552; const BLOCK_SIZE: usize = 32; const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; #[cfg(target_arch = "x86")] use core::arch::x86::*; #[cfg(target_arch = "x86_64")] use core::arch::x86_64::*; pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { unsafe { update_imp(a, b, data) } } #[inline] #[target_feature(enable = "sse2")] unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) { let mut a = a as u32; let mut b = b as u32; let chunks = data.chunks_exact(CHUNK_SIZE); let remainder = chunks.remainder(); for chunk in chunks { update_chunk_block(&mut a, &mut b, chunk); } update_block(&mut a, &mut b, remainder); (a as u16, b as u16) } unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { debug_assert_eq!( chunk.len(), CHUNK_SIZE, "Unexpected chunk size (expected {}, got {})", CHUNK_SIZE, chunk.len() ); reduce_add_blocks(a, b, chunk); *a %= MOD; *b %= MOD; } unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { debug_assert!( chunk.len() <= CHUNK_SIZE, "Unexpected chunk size (expected <= {}, got {})", CHUNK_SIZE, chunk.len() ); for byte in reduce_add_blocks(a, b, chunk) { *a += *byte as u32; *b += *a; } *a %= MOD; *b %= MOD; } #[inline(always)] unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] { if chunk.len() < BLOCK_SIZE { return chunk; } let blocks = chunk.chunks_exact(BLOCK_SIZE); let blocks_remainder = blocks.remainder(); let zero_v = _mm_setzero_si128(); let weight_hi_v = get_weight_hi(); let weight_lo_v = get_weight_lo(); let mut p_v = _mm_set_epi32(0, 0, 0, (*a * blocks.len() as u32) as _); let mut a_v = _mm_setzero_si128(); let mut b_v = _mm_set_epi32(0, 0, 0, *b as _); for block in blocks { let block_ptr = block.as_ptr() as *const _; let left_v = _mm_loadu_si128(block_ptr); let right_v = _mm_loadu_si128(block_ptr.add(1)); p_v = _mm_add_epi32(p_v, a_v); a_v = _mm_add_epi32(a_v, _mm_sad_epu8(left_v, zero_v)); let mad = maddubs(left_v, weight_hi_v); b_v = _mm_add_epi32(b_v, mad); a_v = _mm_add_epi32(a_v, _mm_sad_epu8(right_v, zero_v)); let mad = maddubs(right_v, weight_lo_v); b_v = _mm_add_epi32(b_v, mad); } b_v = _mm_add_epi32(b_v, _mm_slli_epi32(p_v, 5)); *a += reduce_add(a_v); *b = reduce_add(b_v); blocks_remainder } #[inline(always)] unsafe fn maddubs(a: __m128i, b: __m128i) -> __m128i { let a_lo = _mm_unpacklo_epi8(a, _mm_setzero_si128()); let a_hi = _mm_unpackhi_epi8(a, _mm_setzero_si128()); let b_lo = _mm_unpacklo_epi8(b, _mm_setzero_si128()); let b_hi = _mm_unpackhi_epi8(b, _mm_setzero_si128()); let lo = _mm_madd_epi16(a_lo, b_lo); let hi = _mm_madd_epi16(a_hi, b_hi); _mm_add_epi32(lo, hi) } #[inline(always)] unsafe fn reduce_add(v: __m128i) -> u32 { let hi = _mm_unpackhi_epi64(v, v); let sum = _mm_add_epi32(hi, v); let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1)); let sum = _mm_add_epi32(sum, hi); _mm_cvtsi128_si32(sum) as _ } #[inline(always)] unsafe fn get_weight_lo() -> __m128i { _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) } #[inline(always)] unsafe fn get_weight_hi() -> __m128i { _mm_set_epi8( 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ) } } #[cfg(test)] mod tests { use rand::Rng; #[test] fn zeroes() { assert_sum_eq(&[]); assert_sum_eq(&[0]); assert_sum_eq(&[0, 0]); assert_sum_eq(&[0; 100]); assert_sum_eq(&[0; 1024]); assert_sum_eq(&[0; 1024 * 1024]); } #[test] fn ones() { assert_sum_eq(&[]); assert_sum_eq(&[1]); assert_sum_eq(&[1, 1]); assert_sum_eq(&[1; 100]); assert_sum_eq(&[1; 1024]); assert_sum_eq(&[1; 1024 * 1024]); } #[test] fn random() { let mut random = [0; 1024 * 1024]; rand::thread_rng().fill(&mut random[..]); assert_sum_eq(&random[..1]); assert_sum_eq(&random[..100]); assert_sum_eq(&random[..1024]); assert_sum_eq(&random[..1024 * 1024]); } /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. #[test] fn wiki() { assert_sum_eq(b"Wikipedia"); } fn assert_sum_eq(data: &[u8]) { if let Some(update) = super::get_imp() { let (a, b) = update(1, 0, data); let left = u32::from(b) << 16 | u32::from(a); let right = adler::adler32_slice(data); assert_eq!(left, right, "len({})", data.len()); } } } simd-adler32-0.3.7/src/imp/ssse3.rs000064400000000000000000000123711046102023000150070ustar 00000000000000use super::Adler32Imp; /// Resolves update implementation if CPU supports ssse3 instructions. pub fn get_imp() -> Option { get_imp_inner() } #[inline] #[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))] fn get_imp_inner() -> Option { if std::is_x86_feature_detected!("ssse3") { Some(imp::update) } else { None } } #[inline] #[cfg(all( target_feature = "ssse3", not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) ))] fn get_imp_inner() -> Option { Some(imp::update) } #[inline] #[cfg(all( not(target_feature = "ssse3"), not(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64"))) ))] fn get_imp_inner() -> Option { None } #[cfg(all( any(target_arch = "x86", target_arch = "x86_64"), any(feature = "std", target_feature = "ssse3") ))] mod imp { const MOD: u32 = 65521; const NMAX: usize = 5552; const BLOCK_SIZE: usize = 32; const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; #[cfg(target_arch = "x86")] use core::arch::x86::*; #[cfg(target_arch = "x86_64")] use core::arch::x86_64::*; pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { unsafe { update_imp(a, b, data) } } #[inline] #[target_feature(enable = "ssse3")] unsafe fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) { let mut a = a as u32; let mut b = b as u32; let chunks = data.chunks_exact(CHUNK_SIZE); let remainder = chunks.remainder(); for chunk in chunks { update_chunk_block(&mut a, &mut b, chunk); } update_block(&mut a, &mut b, remainder); (a as u16, b as u16) } unsafe fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { debug_assert_eq!( chunk.len(), CHUNK_SIZE, "Unexpected chunk size (expected {}, got {})", CHUNK_SIZE, chunk.len() ); reduce_add_blocks(a, b, chunk); *a %= MOD; *b %= MOD; } unsafe fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { debug_assert!( chunk.len() <= CHUNK_SIZE, "Unexpected chunk size (expected <= {}, got {})", CHUNK_SIZE, chunk.len() ); for byte in reduce_add_blocks(a, b, chunk) { *a += *byte as u32; *b += *a; } *a %= MOD; *b %= MOD; } #[inline(always)] unsafe fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] { if chunk.len() < BLOCK_SIZE { return chunk; } let blocks = chunk.chunks_exact(BLOCK_SIZE); let blocks_remainder = blocks.remainder(); let one_v = _mm_set1_epi16(1); let zero_v = _mm_set1_epi16(0); let weight_hi_v = get_weight_hi(); let weight_lo_v = get_weight_lo(); let mut p_v = _mm_set_epi32(0, 0, 0, (*a * blocks.len() as u32) as _); let mut a_v = _mm_set_epi32(0, 0, 0, 0); let mut b_v = _mm_set_epi32(0, 0, 0, *b as _); for block in blocks { let block_ptr = block.as_ptr() as *const _; let left_v = _mm_loadu_si128(block_ptr); let right_v = _mm_loadu_si128(block_ptr.add(1)); p_v = _mm_add_epi32(p_v, a_v); a_v = _mm_add_epi32(a_v, _mm_sad_epu8(left_v, zero_v)); let mad = _mm_maddubs_epi16(left_v, weight_hi_v); b_v = _mm_add_epi32(b_v, _mm_madd_epi16(mad, one_v)); a_v = _mm_add_epi32(a_v, _mm_sad_epu8(right_v, zero_v)); let mad = _mm_maddubs_epi16(right_v, weight_lo_v); b_v = _mm_add_epi32(b_v, _mm_madd_epi16(mad, one_v)); } b_v = _mm_add_epi32(b_v, _mm_slli_epi32(p_v, 5)); *a += reduce_add(a_v); *b = reduce_add(b_v); blocks_remainder } #[inline(always)] unsafe fn reduce_add(v: __m128i) -> u32 { let hi = _mm_unpackhi_epi64(v, v); let sum = _mm_add_epi32(hi, v); let hi = _mm_shuffle_epi32(sum, crate::imp::_MM_SHUFFLE(2, 3, 0, 1)); let sum = _mm_add_epi32(sum, hi); _mm_cvtsi128_si32(sum) as _ } #[inline(always)] unsafe fn get_weight_lo() -> __m128i { _mm_set_epi8(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) } #[inline(always)] unsafe fn get_weight_hi() -> __m128i { _mm_set_epi8( 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, ) } } #[cfg(test)] mod tests { use rand::Rng; #[test] fn zeroes() { assert_sum_eq(&[]); assert_sum_eq(&[0]); assert_sum_eq(&[0, 0]); assert_sum_eq(&[0; 100]); assert_sum_eq(&[0; 1024]); assert_sum_eq(&[0; 1024 * 1024]); } #[test] fn ones() { assert_sum_eq(&[]); assert_sum_eq(&[1]); assert_sum_eq(&[1, 1]); assert_sum_eq(&[1; 100]); assert_sum_eq(&[1; 1024]); assert_sum_eq(&[1; 1024 * 1024]); } #[test] fn random() { let mut random = [0; 1024 * 1024]; rand::thread_rng().fill(&mut random[..]); assert_sum_eq(&random[..1]); assert_sum_eq(&random[..100]); assert_sum_eq(&random[..1024]); assert_sum_eq(&random[..1024 * 1024]); } /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. #[test] fn wiki() { assert_sum_eq(b"Wikipedia"); } fn assert_sum_eq(data: &[u8]) { if let Some(update) = super::get_imp() { let (a, b) = update(1, 0, data); let left = u32::from(b) << 16 | u32::from(a); let right = adler::adler32_slice(data); assert_eq!(left, right, "len({})", data.len()); } } } simd-adler32-0.3.7/src/imp/wasm.rs000064400000000000000000000117251046102023000147200ustar 00000000000000use super::Adler32Imp; /// Resolves update implementation if CPU supports simd128 instructions. pub fn get_imp() -> Option { get_imp_inner() } #[inline] #[cfg(target_feature = "simd128")] fn get_imp_inner() -> Option { Some(imp::update) } #[inline] #[cfg(not(target_feature = "simd128"))] fn get_imp_inner() -> Option { None } #[cfg(target_feature = "simd128")] mod imp { const MOD: u32 = 65521; const NMAX: usize = 5552; const BLOCK_SIZE: usize = 32; const CHUNK_SIZE: usize = NMAX / BLOCK_SIZE * BLOCK_SIZE; #[cfg(target_arch = "wasm32")] use core::arch::wasm32::*; #[cfg(target_arch = "wasm64")] use core::arch::wasm64::*; pub fn update(a: u16, b: u16, data: &[u8]) -> (u16, u16) { update_imp(a, b, data) } #[inline] #[target_feature(enable = "simd128")] fn update_imp(a: u16, b: u16, data: &[u8]) -> (u16, u16) { let mut a = a as u32; let mut b = b as u32; let chunks = data.chunks_exact(CHUNK_SIZE); let remainder = chunks.remainder(); for chunk in chunks { update_chunk_block(&mut a, &mut b, chunk); } update_block(&mut a, &mut b, remainder); (a as u16, b as u16) } fn update_chunk_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { debug_assert_eq!( chunk.len(), CHUNK_SIZE, "Unexpected chunk size (expected {}, got {})", CHUNK_SIZE, chunk.len() ); reduce_add_blocks(a, b, chunk); *a %= MOD; *b %= MOD; } fn update_block(a: &mut u32, b: &mut u32, chunk: &[u8]) { debug_assert!( chunk.len() <= CHUNK_SIZE, "Unexpected chunk size (expected <= {}, got {})", CHUNK_SIZE, chunk.len() ); for byte in reduce_add_blocks(a, b, chunk) { *a += *byte as u32; *b += *a; } *a %= MOD; *b %= MOD; } #[inline(always)] fn reduce_add_blocks<'a>(a: &mut u32, b: &mut u32, chunk: &'a [u8]) -> &'a [u8] { if chunk.len() < BLOCK_SIZE { return chunk; } let blocks = chunk.chunks_exact(BLOCK_SIZE); let blocks_remainder = blocks.remainder(); let weight_hi_v = get_weight_hi(); let weight_lo_v = get_weight_lo(); let mut p_v = u32x4(*a * blocks.len() as u32, 0, 0, 0); let mut a_v = u32x4(0, 0, 0, 0); let mut b_v = u32x4(*b, 0, 0, 0); for block in blocks { let block_ptr = block.as_ptr() as *const v128; let v_lo = unsafe { block_ptr.read_unaligned() }; let v_hi = unsafe { block_ptr.add(1).read_unaligned() }; p_v = u32x4_add(p_v, a_v); a_v = u32x4_add(a_v, u32x4_extadd_quarters_u8x16(v_lo)); let mad = i32x4_dot_i8x16(v_lo, weight_lo_v); b_v = u32x4_add(b_v, mad); a_v = u32x4_add(a_v, u32x4_extadd_quarters_u8x16(v_hi)); let mad = i32x4_dot_i8x16(v_hi, weight_hi_v); b_v = u32x4_add(b_v, mad); } b_v = u32x4_add(b_v, u32x4_shl(p_v, 5)); *a += reduce_add(a_v); *b = reduce_add(b_v); blocks_remainder } #[inline(always)] fn i32x4_dot_i8x16(a: v128, b: v128) -> v128 { let a_lo = u16x8_extend_low_u8x16(a); let a_hi = u16x8_extend_high_u8x16(a); let b_lo = u16x8_extend_low_u8x16(b); let b_hi = u16x8_extend_high_u8x16(b); let lo = i32x4_dot_i16x8(a_lo, b_lo); let hi = i32x4_dot_i16x8(a_hi, b_hi); i32x4_add(lo, hi) } #[inline(always)] fn u32x4_extadd_quarters_u8x16(a: v128) -> v128 { u32x4_extadd_pairwise_u16x8(u16x8_extadd_pairwise_u8x16(a)) } #[inline(always)] fn reduce_add(v: v128) -> u32 { let arr: [u32; 4] = unsafe { std::mem::transmute(v) }; let mut sum = 0u32; for val in arr { sum = sum.wrapping_add(val); } sum } #[inline(always)] fn get_weight_lo() -> v128 { u8x16( 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, ) } #[inline(always)] fn get_weight_hi() -> v128 { u8x16(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1) } } #[cfg(test)] mod tests { use rand::Rng; #[test] fn zeroes() { assert_sum_eq(&[]); assert_sum_eq(&[0]); assert_sum_eq(&[0, 0]); assert_sum_eq(&[0; 100]); assert_sum_eq(&[0; 1024]); assert_sum_eq(&[0; 512 * 1024]); } #[test] fn ones() { assert_sum_eq(&[]); assert_sum_eq(&[1]); assert_sum_eq(&[1, 1]); assert_sum_eq(&[1; 100]); assert_sum_eq(&[1; 1024]); assert_sum_eq(&[1; 512 * 1024]); } #[test] fn random() { let mut random = [0; 512 * 1024]; rand::thread_rng().fill(&mut random[..]); assert_sum_eq(&random[..1]); assert_sum_eq(&random[..100]); assert_sum_eq(&random[..1024]); assert_sum_eq(&random[..512 * 1024]); } /// Example calculation from https://en.wikipedia.org/wiki/Adler-32. #[test] fn wiki() { assert_sum_eq(b"Wikipedia"); } fn assert_sum_eq(data: &[u8]) { if let Some(update) = super::get_imp() { let (a, b) = update(1, 0, data); let left = u32::from(b) << 16 | u32::from(a); let right = adler::adler32_slice(data); assert_eq!(left, right, "len({})", data.len()); } } } simd-adler32-0.3.7/src/lib.rs000064400000000000000000000174241046102023000137340ustar 00000000000000//! # simd-adler32 //! //! A SIMD-accelerated Adler-32 hash algorithm implementation. //! //! ## Features //! //! - No dependencies //! - Support `no_std` (with `default-features = false`) //! - Runtime CPU feature detection (when `std` enabled) //! - Blazing fast performance on as many targets as possible (currently only x86 and x86_64) //! - Default to scalar implementation when simd not available //! //! ## Quick start //! //! > Cargo.toml //! //! ```toml //! [dependencies] //! simd-adler32 = "*" //! ``` //! //! > example.rs //! //! ```rust //! use simd_adler32::Adler32; //! //! let mut adler = Adler32::new(); //! adler.write(b"rust is pretty cool, man"); //! let hash = adler.finish(); //! //! println!("{}", hash); //! // 1921255656 //! ``` //! //! ## Feature flags //! //! * `std` - Enabled by default //! //! Enables std support, see [CPU Feature Detection](#cpu-feature-detection) for runtime //! detection support. //! * `nightly` //! //! Enables nightly features required for avx512 support. //! //! * `const-generics` - Enabled by default //! //! Enables const-generics support allowing for user-defined array hashing by value. See //! [`Adler32Hash`] for details. //! //! ## Support //! //! **CPU Features** //! //! | impl | arch | feature | //! | ---- | ---------------- | ------- | //! | βœ… | `x86`, `x86_64` | avx512 | //! | βœ… | `x86`, `x86_64` | avx2 | //! | βœ… | `x86`, `x86_64` | ssse3 | //! | βœ… | `x86`, `x86_64` | sse2 | //! | 🚧 | `arm`, `aarch64` | neon | //! | | `wasm32` | simd128 | //! //! **MSRV** `1.36.0`\*\* //! //! Minimum supported rust version is tested before a new version is published. [**] Feature //! `const-generics` needs to disabled to build on rustc versions `<1.51` which can be done //! by updating your dependency definition to the following. //! //! ## CPU Feature Detection //! simd-adler32 supports both runtime and compile time CPU feature detection using the //! `std::is_x86_feature_detected` macro when the `Adler32` struct is instantiated with //! the `new` fn. //! //! Without `std` feature enabled simd-adler32 falls back to compile time feature detection //! using `target-feature` or `target-cpu` flags supplied to rustc. See [https://rust-lang.github.io/packed_simd/perf-guide/target-feature/rustflags.html](https://rust-lang.github.io/packed_simd/perf-guide/target-feature/rustflags.html) //! for more information. //! //! Feature detection tries to use the fastest supported feature first. #![cfg_attr(not(feature = "std"), no_std)] #![cfg_attr(feature = "nightly", feature(stdsimd, avx512_target_feature))] #[doc(hidden)] pub mod hash; #[doc(hidden)] pub mod imp; pub use hash::*; use imp::{get_imp, Adler32Imp}; /// An adler32 hash generator type. #[derive(Clone)] pub struct Adler32 { a: u16, b: u16, update: Adler32Imp, } impl Adler32 { /// Constructs a new `Adler32`. /// /// Potential overhead here due to runtime feature detection although in testing on 100k /// and 10k random byte arrays it was not really noticeable. /// /// # Examples /// ```rust /// use simd_adler32::Adler32; /// /// let mut adler = Adler32::new(); /// ``` pub fn new() -> Self { Default::default() } /// Constructs a new `Adler32` using existing checksum. /// /// Potential overhead here due to runtime feature detection although in testing on 100k /// and 10k random byte arrays it was not really noticeable. /// /// # Examples /// ```rust /// use simd_adler32::Adler32; /// /// let mut adler = Adler32::from_checksum(0xdeadbeaf); /// ``` pub fn from_checksum(checksum: u32) -> Self { Self { a: checksum as u16, b: (checksum >> 16) as u16, update: get_imp(), } } /// Computes hash for supplied data and stores results in internal state. pub fn write(&mut self, data: &[u8]) { let (a, b) = (self.update)(self.a, self.b, data); self.a = a; self.b = b; } /// Returns the hash value for the values written so far. /// /// Despite its name, the method does not reset the hasher’s internal state. Additional /// writes will continue from the current value. If you need to start a fresh hash /// value, you will have to use `reset`. pub fn finish(&self) -> u32 { (u32::from(self.b) << 16) | u32::from(self.a) } /// Resets the internal state. pub fn reset(&mut self) { self.a = 1; self.b = 0; } } /// Compute Adler-32 hash on `Adler32Hash` type. /// /// # Arguments /// * `hash` - A Adler-32 hash-able type. /// /// # Examples /// ```rust /// use simd_adler32::adler32; /// /// let hash = adler32(b"Adler-32"); /// println!("{}", hash); // 800813569 /// ``` pub fn adler32(hash: &H) -> u32 { hash.hash() } /// A Adler-32 hash-able type. pub trait Adler32Hash { /// Feeds this value into `Adler32`. fn hash(&self) -> u32; } impl Default for Adler32 { fn default() -> Self { Self { a: 1, b: 0, update: get_imp(), } } } #[cfg(feature = "std")] pub mod read { //! Reader-based hashing. //! //! # Example //! ```rust //! use std::io::Cursor; //! use simd_adler32::read::adler32; //! //! let mut reader = Cursor::new(b"Hello there"); //! let hash = adler32(&mut reader).unwrap(); //! //! println!("{}", hash) // 800813569 //! ``` use crate::Adler32; use std::io::{Read, Result}; /// Compute Adler-32 hash on reader until EOF. /// /// # Example /// ```rust /// use std::io::Cursor; /// use simd_adler32::read::adler32; /// /// let mut reader = Cursor::new(b"Hello there"); /// let hash = adler32(&mut reader).unwrap(); /// /// println!("{}", hash) // 800813569 /// ``` pub fn adler32(reader: &mut R) -> Result { let mut hash = Adler32::new(); let mut buf = [0; 4096]; loop { match reader.read(&mut buf) { Ok(0) => return Ok(hash.finish()), Ok(n) => { hash.write(&buf[..n]); } Err(err) => return Err(err), } } } } #[cfg(feature = "std")] pub mod bufread { //! BufRead-based hashing. //! //! Separate `BufRead` trait implemented to allow for custom buffer size optimization. //! //! # Example //! ```rust //! use std::io::{Cursor, BufReader}; //! use simd_adler32::bufread::adler32; //! //! let mut reader = Cursor::new(b"Hello there"); //! let mut reader = BufReader::new(reader); //! let hash = adler32(&mut reader).unwrap(); //! //! println!("{}", hash) // 800813569 //! ``` use crate::Adler32; use std::io::{BufRead, ErrorKind, Result}; /// Compute Adler-32 hash on buf reader until EOF. /// /// # Example /// ```rust /// use std::io::{Cursor, BufReader}; /// use simd_adler32::bufread::adler32; /// /// let mut reader = Cursor::new(b"Hello there"); /// let mut reader = BufReader::new(reader); /// let hash = adler32(&mut reader).unwrap(); /// /// println!("{}", hash) // 800813569 /// ``` pub fn adler32(reader: &mut R) -> Result { let mut hash = Adler32::new(); loop { let consumed = match reader.fill_buf() { Ok(buf) => { if buf.is_empty() { return Ok(hash.finish()); } hash.write(buf); buf.len() } Err(err) => match err.kind() { ErrorKind::Interrupted => continue, ErrorKind::UnexpectedEof => return Ok(hash.finish()), _ => return Err(err), }, }; reader.consume(consumed); } } } #[cfg(test)] mod tests { #[test] fn test_from_checksum() { let buf = b"rust is pretty cool man"; let sum = 0xdeadbeaf; let mut simd = super::Adler32::from_checksum(sum); let mut adler = adler::Adler32::from_checksum(sum); simd.write(buf); adler.write_slice(buf); let simd = simd.finish(); let scalar = adler.checksum(); assert_eq!(simd, scalar); } }