crc32c-0.6.8/.cargo_vcs_info.json0000644000000001360000000000100121260ustar { "git": { "sha1": "254a861f7cc71bfe455e86f0e1d86e3c83c33390" }, "path_in_vcs": "" }crc32c-0.6.8/.github/dependabot.yml000064400000000000000000000002211046102023000151010ustar 00000000000000version: 2 updates: - package-ecosystem: cargo directory: "/" schedule: interval: daily time: "13:00" open-pull-requests-limit: 10 crc32c-0.6.8/.github/workflows/build.yml000064400000000000000000000044341046102023000161420ustar 00000000000000name: build on: push: branches: [ master ] pull_request: branches: [ master ] env: CARGO_TERM_COLOR: always jobs: build: runs-on: ${{matrix.os}} strategy: matrix: include: - build: linux os: ubuntu-latest rust: stable target: x86_64-unknown-linux-musl cross: false - build: linux os: ubuntu-latest rust: nightly target: x86_64-unknown-linux-musl cross: false - build: aarch64 os: ubuntu-latest rust: stable target: aarch64-unknown-linux-gnu linker: gcc-aarch64-linux-gnu cross: true - build: aarch64 os: ubuntu-latest rust: nightly target: aarch64-unknown-linux-gnu linker: gcc-aarch64-linux-gnu cross: true steps: - uses: actions/checkout@v2 with: fetch-depth: 1 - name: Cache uses: actions/cache@v2 with: path: | ~/.cargo/registry ~/.cargo/git ~/.rustup target key: ${{ runner.os }}-${{ matrix.rust }} - name: Install Linker if: matrix.cross run: | sudo apt update sudo apt install ${{ matrix.linker }} - name: Install Rust run: | rustup install ${{ matrix.rust }} rustup target add ${{ matrix.target }} rustup show - name: Build run: cargo build --verbose --target ${{ matrix.target }} - name: Run tests run: cargo test --verbose fmt: name: Rustfmt runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: stable override: true - run: rustup component add rustfmt - uses: actions-rs/cargo@v1 with: command: fmt args: --all -- --check clippy: name: Clippy runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions-rs/toolchain@v1 with: toolchain: stable components: clippy override: true - uses: actions-rs/clippy-check@v1 with: token: ${{ secrets.GITHUB_TOKEN }} args: --all-features name: Clippy Output crc32c-0.6.8/.github/workflows/release.yaml000064400000000000000000000012671046102023000166250ustar 00000000000000name: release on: push: tags: - v* env: CARGO_TERM_COLOR: always jobs: publish-crate: name: Publish Crate runs-on: ubuntu-latest steps: - name: Clone uses: actions/checkout@v2 - name: Cache uses: actions/cache@v2 with: path: | ~/.cargo/registry ~/.cargo/git ~/.rustup target key: ${{ runner.os }}-stable - name: Setup run: | rustup install stable - name: Build run: cargo build --verbose - name: Run tests run: cargo test - name: Publish run: cargo publish env: CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} crc32c-0.6.8/.gitignore000064400000000000000000000000421046102023000127020ustar 00000000000000target Cargo.lock .log* .DS_Store crc32c-0.6.8/Cargo.toml0000644000000023500000000000100101240ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2018" name = "crc32c" version = "0.6.8" authors = ["Zack Owens"] build = "build.rs" exclude = [ "benches/*", "tests/*", ] description = "Safe implementation for hardware accelerated CRC32C instructions with software fallback" homepage = "https://github.com/zowens/crc32c" documentation = "http://docs.rs/crc32c" readme = "README.md" keywords = [ "crc", "simd", ] categories = ["algorithms"] license = "Apache-2.0/MIT" repository = "https://github.com/zowens/crc32c" [[bench]] name = "rand" path = "benches/rand.rs" harness = false [dev-dependencies.criterion] version = "0.5" [dev-dependencies.rand] version = "0.8" features = [ "alloc", "getrandom", ] [build-dependencies.rustc_version] version = "0.4" [target.aarch64-unknown-linux-gnu] crc32c-0.6.8/Cargo.toml.orig000064400000000000000000000013531046102023000136070ustar 00000000000000[package] name = "crc32c" version = "0.6.8" authors = ["Zack Owens"] license = "Apache-2.0/MIT" keywords = ["crc", "simd"] categories = ["algorithms"] repository = "https://github.com/zowens/crc32c" homepage = "https://github.com/zowens/crc32c" documentation = "http://docs.rs/crc32c" edition = "2018" description = "Safe implementation for hardware accelerated CRC32C instructions with software fallback" exclude = [ "benches/*", "tests/*", ] build = "build.rs" [dev-dependencies] rand = { version ="0.8", features=["alloc", "getrandom"] } criterion = "0.5" [build-dependencies] rustc_version = "0.4" [[bench]] name = "rand" path = "benches/rand.rs" harness = false [target.aarch64-unknown-linux-gnu] linker = "aarch64-linux-gnu-gcc" crc32c-0.6.8/README.md000064400000000000000000000025621046102023000122020ustar 00000000000000# CRC32C [![Crates.io](https://img.shields.io/crates/v/crc32c.svg)](https://crates.io/crates/crc32c) [![Docs.rs](https://docs.rs/crc32c/badge.svg)](https://docs.rs/crc32c/) [![Travis](https://travis-ci.org/zowens/crc32c.svg?branch=master)](https://travis-ci.org/zowens/crc32c/) Rust implementation of the CRC-32-Castagnoli algorithm with hardware acceleration where possible. Hardware acceleration on the following architectures: 1. **x84-64** with [SSE 4.2](https://software.intel.com/sites/default/files/m/8/b/8/D9156103.pdf) * All stable versions of Rust * If SSE 4.2 is enabled at compile time, it will only build the SSE implementation. Otherwise, the `cpuid` is used to find the best implementation at runtime. 1. **aarch64** with [crc feature](https://developer.arm.com/documentation/dui0801/g/A32-and-T32-Instructions/CRC32C) * Only available on rust version >= 1.80.0 or nightly All other processors utilize a software fallback. ## Usage First, add this to your `Cargo.toml`: ```toml [dependencies] crc32c = "0.6" ``` ```rust extern crate crc32c; fn main() { let message = b"Hello world!"; let crc = crc32c::crc32c(message); println!("hash = {}", crc); } ``` ## License You may use this code under either the [Apache 2.0 license](https://www.apache.org/licenses/LICENSE-2.0) or the [MIT license](https://opensource.org/licenses/MIT), at your option. crc32c-0.6.8/build.rs000064400000000000000000000103021046102023000123570ustar 00000000000000#![allow(clippy::uninit_assumed_init)] extern crate rustc_version; use rustc_version::{version, Version}; use std::io::Write; use std::path::Path; use std::{io, ops}; /// CRC-32-Castagnoli polynomial in reversed bit order. pub const POLYNOMIAL: u32 = 0x82_F6_3B_78; /// Table for a quadword-at-a-time software CRC. fn sw_table() -> [[u32; 256]; 8] { let mut table: [[u32; 256]; 8] = [[0u32; 256]; 8]; for n in 0..256_u32 { let mut crc = n; for _ in 0..8 { if crc % 2 == 0 { crc /= 2; } else { crc /= 2; crc ^= POLYNOMIAL; } } table[0][n as usize] = crc; } for n in 0..256 { let mut crc = table[0][n as usize]; for k in 1..8 { crc = table[0][(crc as u8) as usize] ^ (crc >> 8); table[k as usize][n as usize] = crc; } } table } /// A matrix over the Galois field of two elements (0 and 1). /// In this field, multiplication is equivalent to the (very fast) bitwise XOR. #[derive(Debug, Copy, Clone)] pub struct Matrix([u32; 32]); impl Matrix { /// Allocates space for a new matrix. const fn new() -> Self { Self([0u32; 32]) } /// Multiplies a matrix by itself. fn square(self) -> Self { let mut result = Self::new(); for i in 0..32 { result[i] = self * self[i]; } result } } impl ops::Index for Matrix { type Output = u32; #[inline] fn index(&self, i: u8) -> &Self::Output { &self.0[i as usize] } } impl ops::IndexMut for Matrix { #[inline] fn index_mut(&mut self, i: u8) -> &mut Self::Output { &mut self.0[i as usize] } } impl ops::Mul for Matrix { type Output = u32; /// Multiplies the matrix with a vector. fn mul(self, mut vec: u32) -> Self::Output { let mut sum = 0; let mut i = 0; while vec != 0 { if vec % 2 != 0 { sum ^= self[i]; } vec /= 2; i += 1; } sum } } fn create_zero_operator(mut len: usize) -> Matrix { // Operator for odd powers-of-two. let mut odd = Matrix::new(); odd[0] = POLYNOMIAL; for i in 1..32 { odd[i] = 1 << (i - 1); } let mut even = odd.square(); let mut odd = even.square(); loop { even = odd.square(); len /= 2; if len == 0 { return even; } odd = even.square(); len /= 2; if len == 0 { return odd; } } } fn hw_table(len: usize) -> [[u32; 256]; 4] { let mut zeroes: [[u32; 256]; 4] = [[0u32; 256]; 4]; let op = create_zero_operator(len); for n in 0..256_u32 { for i in 0..4_u32 { let shift = i * 8; zeroes[i as usize][n as usize] = op * (n << shift); } } zeroes } // LONG/SHORT VALUES MUST BE SYNCHRONIZED WITH src/tables.rs pub const LONG: usize = 8192; pub const SHORT: usize = 256; fn write_table(table: &[[u32; 256]], path: &Path) -> io::Result<()> { let mut file = { let file = std::fs::File::create(path)?; io::BufWriter::new(file) }; write!(file, "[")?; for row in table { write!(file, "[")?; for element in row { write!(file, "{element}, ")?; } write!(file, "],")?; } write!(file, "]")?; Ok(()) } fn write_tables() -> io::Result<()> { let out_dir = std::env::var("OUT_DIR").unwrap(); let out_dir = std::path::Path::new(&out_dir); write_table(&sw_table(), &out_dir.join("sw.table"))?; write_table(&hw_table(LONG), &out_dir.join("hw.long.table"))?; write_table(&hw_table(SHORT), &out_dir.join("hw.short.table")) } fn main() { write_tables().expect("Failed to write CRC tables"); let min_version = Version::new(1, 80, 0); let current_version = { // remove prerelease tag for now, if it exists. let vers = version().unwrap(); Version::new(vers.major, vers.minor, vers.patch) }; if current_version >= min_version { println!("cargo::rustc-check-cfg=cfg(armsimd)"); println!("cargo::rustc-cfg=armsimd"); } } crc32c-0.6.8/src/combine.rs000064400000000000000000000063341046102023000134750ustar 00000000000000//! Implements the CRC32c "combine" function, which calculates the CRC32c of two byte streams //! concatenated together using their individual CRC32c values (plus the length of the second byte //! stream). //! //! This module is essentially a line-by-line translation of ZLIB's CRC "combine" function //! implementation from C to Rust, except for the CRC polynomial used (original uses the CRC32 //! polynomial 0xedb88320UL, we use the CRC32c polynomial 0x82F63B78). //! //! Link to original implementation: https://github.com/madler/zlib/blob/master/crc32.c //! //! This file is based on the Zlib project, located at: https://github.com/madler/zlib, //! which includes the following notice: //! //! crc32.c -- compute the CRC-32 of a data stream //! Copyright (C) 1995-2006, 2010, 2011, 2012, 2016 Mark Adler //! For conditions of distribution and use, see copyright notice in zlib.h //! //! Thanks to Rodney Brown for his contribution of faster //! CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing //! tables for updating the shift register in one step with three exclusive-ors //! instead of four steps with four exclusive-ors. This results in about a //! factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3. const GF2_DIM: usize = 32; fn gf2_matrix_times(mat: &[u32; GF2_DIM], mut vec: u32) -> u32 { let mut sum = 0; let mut idx = 0; while vec > 0 { if vec & 1 == 1 { sum ^= mat[idx]; } vec >>= 1; idx += 1; } sum } fn gf2_matrix_square(square: &mut [u32; GF2_DIM], mat: &[u32; GF2_DIM]) { for n in 0..GF2_DIM { square[n] = gf2_matrix_times(mat, mat[n]); } } pub(crate) fn crc32c_combine(mut crc1: u32, crc2: u32, mut len2: usize) -> u32 { let mut row: u32 = 1; let mut even = [0u32; GF2_DIM]; /* even-power-of-two zeros operator */ let mut odd = [0u32; GF2_DIM]; /* odd-power-of-two zeros operator */ /* degenerate case (also disallow negative lengths) */ if len2 == 0 { return crc1; } /* put operator for one zero bit in odd */ odd[0] = 0x82F63B78; /* CRC-32c polynomial */ #[allow(clippy::needless_range_loop)] for n in 1..GF2_DIM { odd[n] = row; row <<= 1; } /* put operator for two zero bits in even */ gf2_matrix_square(&mut even, &odd); /* put operator for four zero bits in odd */ gf2_matrix_square(&mut odd, &even); /* apply len2 zeros to crc1 (first square will put the operator for one zero byte, eight zero bits, in even) */ loop { /* apply zeros operator for this bit of len2 */ gf2_matrix_square(&mut even, &odd); if len2 & 1 == 1 { crc1 = gf2_matrix_times(&even, crc1); } len2 >>= 1; /* if no more bits set, then done */ if len2 == 0 { break; } /* another iteration of the loop with odd and even swapped */ gf2_matrix_square(&mut odd, &even); if len2 & 1 == 1 { crc1 = gf2_matrix_times(&odd, crc1); } len2 >>= 1; /* if no more bits set, then done */ if len2 == 0 { break; } } /* return combined crc */ crc1 ^= crc2; crc1 } crc32c-0.6.8/src/hasher.rs000064400000000000000000000022201046102023000133210ustar 00000000000000//! Provide a CRC-32C implementor of [Hasher]. use std::hash::Hasher; use crate::crc32c_append; /// Implementor of [Hasher] for CRC-32C. /// /// Note that CRC-32C produces a 32-bit hash (as [u32]), /// but the trait requires that the output value be [u64]. #[derive(Default)] pub struct Crc32cHasher { checksum: u32, } impl Crc32cHasher { /// Create the [Hasher] pre-loaded with a particular checksum. /// /// Use the [Default::default()] constructor for a clean start. pub fn new(initial: u32) -> Self { Self { checksum: initial } } } impl Hasher for Crc32cHasher { fn finish(&self) -> u64 { self.checksum as u64 } fn write(&mut self, bytes: &[u8]) { self.checksum = crc32c_append(self.checksum, bytes); } } #[cfg(test)] mod tests { use super::*; const TEST_STRING: &[u8] = b"This is a very long string which is used to test the CRC-32-Castagnoli function."; const CHECKSUM: u32 = 0x20_CB_1E_59; #[test] fn can_hash() { let mut hasher = Crc32cHasher::default(); hasher.write(TEST_STRING); assert_eq!(hasher.finish(), CHECKSUM as u64); } } crc32c-0.6.8/src/hw_aarch64.rs000064400000000000000000000055051046102023000140060ustar 00000000000000use crate::hw_tables; use crate::util::{self, U64Le}; use std::arch::aarch64 as simd; pub unsafe fn crc32c(crci: u32, buffer: &[u8]) -> u32 { let mut crc0 = !crci; let (begin, middle, end) = util::split(buffer); // We're effectively cheating by using the software implementation // for now. The bit-flips simulate going back-and-forth between // the inner computations of the software implementation // // This needs a little more optimization, and to use the typical // crc32cb instruction rather than using the software implementation. crc0 = crc_u8(crc0, begin); // Most CPUs have a latency of 3 on these instructions, // meaning we must use 3 of them at a time, to leverage // hardware parallelism. // // TODO: validate that this is true on ARM // // First do chunks of size LONG * 3. let chunk_size = (hw_tables::LONG * 3) / 8; let last_chunk = middle.len() / chunk_size * chunk_size; let (middle_first, middle_last) = middle.split_at(last_chunk); crc0 = crc_u64_parallel3(crc0, chunk_size, &hw_tables::LONG_TABLE, middle_first); // Now do chunks of size SHORT * 3. let chunk_size = (hw_tables::SHORT * 3) / 8; let last_chunk = middle_last.len() / chunk_size * chunk_size; let (middle_last_first, middle_last_last) = middle_last.split_at(last_chunk); crc0 = crc_u64_parallel3(crc0, chunk_size, &hw_tables::SHORT_TABLE, middle_last_first); // Now the last part, less than SHORT * 3 but still a multiple of 8-bytes. crc0 = crc_u64(crc0, middle_last_last); !crc_u8(crc0, end) } #[inline] #[target_feature(enable = "crc")] unsafe fn crc_u8(crc: u32, buffer: &[u8]) -> u32 { buffer .iter() .fold(crc, |crc, &next| simd::__crc32cb(crc, next)) } #[inline(always)] unsafe fn crc_u64(crc: u32, words: &[U64Le]) -> u32 { words .iter() .fold(crc, |crc, &next| crc_u64_append(crc, next.get())) } #[inline(always)] unsafe fn crc_u64_append(crc: u32, next: u64) -> u32 { simd::__crc32cd(crc, next) } #[inline(always)] unsafe fn crc_u64_parallel3( crc: u32, chunk_size: usize, table: &hw_tables::CrcTable, buffer: &[U64Le], ) -> u32 { buffer.chunks(chunk_size).fold(crc, |mut crc0, chunk| { let mut crc1 = 0; let mut crc2 = 0; // Divide it in three. let block_size = chunk_size / 3; let mut blocks = chunk.chunks(block_size); let a = blocks.next().unwrap(); let b = blocks.next().unwrap(); let c = blocks.next().unwrap(); for i in 0..block_size { crc0 = crc_u64_append(crc0, a[i].get()); crc1 = crc_u64_append(crc1, b[i].get()); crc2 = crc_u64_append(crc2, c[i].get()); } crc0 = table.shift_u32(crc0) ^ crc1; crc0 = table.shift_u32(crc0) ^ crc2; crc0 }) } crc32c-0.6.8/src/hw_tables.rs000064400000000000000000000017171046102023000140310ustar 00000000000000pub struct CrcTable([[u32; 256]; 4]); #[allow(dead_code)] impl CrcTable { pub fn at(&self, i: u8, j: u8) -> u32 { let i = i as usize; let j = j as usize; self.0[i][j] } pub fn shift_u32(&self, crc: u32) -> u32 { let mut result = self.at(0, crc as u8); for i in 1..4 { let shift = i * 8; result ^= self.at(i, (crc >> shift) as u8); } result } pub fn shift_u64(&self, crc: u64) -> u64 { let mut result = u64::from(self.at(0, crc as u8)); for i in 1..4 { let shift = i * 8; result ^= u64::from(self.at(i, (crc >> shift) as u8)); } result } } pub const LONG: usize = 8192; pub const SHORT: usize = 256; pub const LONG_TABLE: CrcTable = CrcTable(include!(concat!(env!("OUT_DIR"), "/", "hw.long.table"))); pub const SHORT_TABLE: CrcTable = CrcTable(include!(concat!(env!("OUT_DIR"), "/", "hw.short.table"))); crc32c-0.6.8/src/hw_x86_64.rs000064400000000000000000000060071046102023000135120ustar 00000000000000//! Implements crc32c with SSE 4.2 support. use crate::hw_tables; use crate::util::{self, U64Le}; use std::arch::x86_64 as simd; /// Computes CRC-32C using the SSE 4.2 hardware instruction. pub unsafe fn crc32c(crci: u32, buffer: &[u8]) -> u32 { let mut crc0 = u64::from(!crci); let (begin, middle, end) = util::split(buffer); // Leading bytes, up to the first one aligned on 8 bytes. crc0 = crc_u8(crc0, begin); // Most CPUs have a latency of 3 on these instructions, // meaning we must use 3 of them at a time, to leverage // hardware parallelism. // First do chunks of size LONG * 3. let chunk_size = (hw_tables::LONG * 3) / 8; let last_chunk = middle.len() / chunk_size * chunk_size; let (middle_first, middle_last) = middle.split_at(last_chunk); crc0 = crc_u64_parallel3(crc0, chunk_size, &hw_tables::LONG_TABLE, middle_first); // Now do chunks of size SHORT * 3. let chunk_size = (hw_tables::SHORT * 3) / 8; let last_chunk = middle_last.len() / chunk_size * chunk_size; let (middle_last_first, middle_last_last) = middle_last.split_at(last_chunk); crc0 = crc_u64_parallel3(crc0, chunk_size, &hw_tables::SHORT_TABLE, middle_last_first); // Now the last part, less than SHORT * 3 but still a multiple of 8-bytes. crc0 = crc_u64(crc0, middle_last_last); // Final unaligned remainder. crc0 = crc_u8(crc0, end); !(crc0 as u32) } #[inline] #[target_feature(enable = "sse4.2")] unsafe fn crc_u8_append(crc: u64, next: u8) -> u64 { u64::from(self::simd::_mm_crc32_u8(crc as u32, next)) } #[inline] #[target_feature(enable = "sse4.2")] unsafe fn crc_u64_append(crc: u64, next: u64) -> u64 { self::simd::_mm_crc32_u64(crc, next) } #[inline] unsafe fn crc_u8(crc: u64, buffer: &[u8]) -> u64 { buffer .iter() .fold(crc, |crc, &next| crc_u8_append(crc, next)) } #[inline] unsafe fn crc_u64(crc: u64, buffer: &[U64Le]) -> u64 { buffer .iter() .fold(crc, |crc, &next| crc_u64_append(crc, next.get())) } /// Hardware-parallel version of the algorithm. /// /// Calculates the CRC for a chunk of `chunk_size`, /// by dividing it in 3 separate blocks. /// /// Uses a pre-made CRC table designed for the given chunk size. #[inline] unsafe fn crc_u64_parallel3( crc: u64, chunk_size: usize, table: &hw_tables::CrcTable, buffer: &[U64Le], ) -> u64 { buffer.chunks(chunk_size).fold(crc, |mut crc0, chunk| { let mut crc1 = 0; let mut crc2 = 0; // Divide it in three. let block_size = chunk_size / 3; let mut blocks = chunk.chunks(block_size); let a = blocks.next().unwrap(); let b = blocks.next().unwrap(); let c = blocks.next().unwrap(); for i in 0..block_size { crc0 = crc_u64_append(crc0, a[i].get()); crc1 = crc_u64_append(crc1, b[i].get()); crc2 = crc_u64_append(crc2, c[i].get()); } crc0 = table.shift_u64(crc0) ^ crc1; crc0 = table.shift_u64(crc0) ^ crc2; crc0 }) } crc32c-0.6.8/src/io.rs000064400000000000000000000057431046102023000124730ustar 00000000000000//! Provides wrappers for [Read] and [Write] types which checksum the bytes being read/written. use std::io::{Read, Write}; use crate::crc32c_append; /// [Read]er wrapper which tracks the checksum of all bytes read. pub struct Crc32cReader { checksum: u32, inner: R, } impl Crc32cReader { /// Wrap an instance of a [Read]er. pub fn new(r: R) -> Self { Self::new_with_seed(r, 0) } /// Wrap a [Read]er, with the checksum seeded with a particular value. pub fn new_with_seed(r: R, seed: u32) -> Self { Self { checksum: seed, inner: r, } } /// Unwrap the inner [Read]er. pub fn into_inner(self) -> R { self.inner } /// Get the checksum of all bytes read. pub fn crc32c(&self) -> u32 { self.checksum } } impl Read for Crc32cReader { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { let out = self.inner.read(buf)?; self.checksum = crc32c_append(self.checksum, &buf[..out]); Ok(out) } } /// [Write]r wrapper which tracks the checksum of all bytes written. pub struct Crc32cWriter { checksum: u32, inner: W, } impl Crc32cWriter { /// Wrap an instance of a [Write]r. pub fn new(w: W) -> Self { Self::new_with_seed(w, 0) } /// Wrap a [Write]r, with the checksum seeded with a particular value. pub fn new_with_seed(w: W, seed: u32) -> Self { Self { checksum: seed, inner: w, } } /// Unwrap the inner [Write]r. pub fn into_inner(self) -> W { self.inner } /// Get the checksum of all bytes written. pub fn crc32c(&self) -> u32 { self.checksum } } impl Write for Crc32cWriter { fn write(&mut self, buf: &[u8]) -> std::io::Result { let out = self.inner.write(buf)?; self.checksum = crc32c_append(self.checksum, &buf[..out]); Ok(out) } fn flush(&mut self) -> std::io::Result<()> { self.inner.flush() } } #[cfg(test)] mod tests { use std::io::Cursor; use super::*; const TEST_STRING: &[u8] = b"This is a very long string which is used to test the CRC-32-Castagnoli function."; const CHECKSUM: u32 = 0x20_CB_1E_59; #[test] fn can_read() { let mut reader = Crc32cReader::new(TEST_STRING); let mut buf = Vec::default(); let n_read = reader.read_to_end(&mut buf).unwrap(); assert_eq!(n_read, TEST_STRING.len()); assert_eq!(buf.as_slice(), TEST_STRING); assert_eq!(reader.crc32c(), CHECKSUM); } #[test] fn can_write() { let mut buf = Vec::::default(); let mut writer = Crc32cWriter::>>::new(Cursor::new(&mut buf)); writer.write_all(TEST_STRING).unwrap(); let checksum = writer.crc32c(); assert_eq!(buf.as_slice(), TEST_STRING); assert_eq!(checksum, CHECKSUM); } } crc32c-0.6.8/src/lib.rs000064400000000000000000000037501046102023000126260ustar 00000000000000//! This crate provides the CRC-32-Castagnoli algorithm. //! //! It provides both a software implementation, and a hardware-optimized one for SSE 4.2. //! //! # Example //! //! ```rust //! let message = b"Hello world!"; //! //! let crc = crc32c::crc32c(message); //! //! assert_eq!(crc, 0x7B_98_E7_51); //! ``` //! //! # Enabling hardware acceleration //! //! If you compile your code with `-C target-features=+sse4.2`, //! then the hardware-optimized version will be compiled into the code. //! //! Otherwise, the crate will use `cpuid` at runtime to detect the //! running CPU's features, and enable the appropriate algorithm. mod combine; mod hasher; #[cfg(all(target_arch = "aarch64", armsimd))] mod hw_aarch64; #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] mod hw_tables; #[cfg(target_arch = "x86_64")] mod hw_x86_64; mod io; mod sw; mod util; pub use hasher::Crc32cHasher; pub use io::{Crc32cReader, Crc32cWriter}; /// Computes the CRC for the data payload. /// /// Equivalent to calling `crc32c_append(0, data)`. #[inline] pub fn crc32c(data: &[u8]) -> u32 { crc32c_append(0, data) } /// Computes the CRC for the data payload, starting with a previous CRC value. #[inline] pub fn crc32c_append(crc: u32, data: &[u8]) -> u32 { #[cfg(target_arch = "x86_64")] { if is_x86_feature_detected!("sse4.2") { return unsafe { hw_x86_64::crc32c(crc, data) }; } } #[cfg(all(target_arch = "aarch64", armsimd))] { if std::arch::is_aarch64_feature_detected!("crc") { return unsafe { hw_aarch64::crc32c(crc, data) }; } } sw::crc32c(crc, data) } /// Computes the "combined" value of two CRC32c values. Specifically, given two byte streams A and /// B and their CRC32c check values crc32c(A) and crc32c(B), this function calculates crc32c(AB) /// using only crc32c(A), crc32c(B), and the length of B. #[inline] pub fn crc32c_combine(crc1: u32, crc2: u32, len2: usize) -> u32 { combine::crc32c_combine(crc1, crc2, len2) } crc32c-0.6.8/src/sw.rs000064400000000000000000000030511046102023000125030ustar 00000000000000//! Implements crc32c without hardware support. use crate::util::{self, U64Le}; /// 8-KiB lookup table. pub struct CrcTable([[u32; 256]; 8]); impl CrcTable { /// Returns an entry from the table. #[inline] pub fn at(&self, i: u8, j: u8) -> u64 { let i = i as usize; let j = j as usize; u64::from(self.0[i][j]) } } const CRC_TABLE: CrcTable = CrcTable(include!(concat!(env!("OUT_DIR"), "/", "sw.table"))); /// Software implementation of the algorithm. pub fn crc32c(crci: u32, buffer: &[u8]) -> u32 { let mut crc = u64::from(!crci); let (start, mid, end) = util::split(buffer); crc = crc_u8(crc, start); crc = crc_u64(crc, mid); crc = crc_u8(crc, end); !(crc as u32) } #[inline] fn crc_u8(crc: u64, buffer: &[u8]) -> u64 { buffer.iter().fold(crc, |crc, &next| { let index = (crc ^ u64::from(next)) as u8; CRC_TABLE.at(0, index) ^ (crc >> 8) }) } #[inline] fn crc_u64(crci: u64, buffer: &[U64Le]) -> u64 { buffer.iter().fold(crci, |crc, &next| { let crc = crc ^ next.get(); // Note: I've tried refactoring this to a for-loop, // but then it gets worse performance. CRC_TABLE.at(7, crc as u8) ^ CRC_TABLE.at(6, (crc >> 8) as u8) ^ CRC_TABLE.at(5, (crc >> 16) as u8) ^ CRC_TABLE.at(4, (crc >> 24) as u8) ^ CRC_TABLE.at(3, (crc >> 32) as u8) ^ CRC_TABLE.at(2, (crc >> 40) as u8) ^ CRC_TABLE.at(1, (crc >> 48) as u8) ^ CRC_TABLE.at(0, (crc >> 56) as u8) }) } crc32c-0.6.8/src/util.rs000064400000000000000000000035171046102023000130360ustar 00000000000000use std::ptr::NonNull; use std::{cmp, slice}; /// A newtype wrapper for a little endian `u64`. /// /// It is safe to transmute between a `u64` and `U64Le`. #[repr(transparent)] #[derive(Clone, Copy)] pub(crate) struct U64Le(u64); impl U64Le { /// Returns a `u64` with correct endianness for the target. /// /// On little endian targets, this is a no-op. #[allow(clippy::inline_always)] #[inline(always)] pub const fn get(self) -> u64 { u64::from_le(self.0) } } /// Splits a buffer into three subslices: /// - the first one is up to the first 8-byte aligned address. /// - the second one is 8-byte aligned and its length is a multiple of 8. /// - the third one is 8-byte aligned but its length is less than 8. pub(crate) fn split(buffer: &[u8]) -> (&[u8], &[U64Le], &[u8]) { let (start, mid) = { let split_index = { let addr = buffer.as_ptr() as usize; // Align to multiples of 8. let aligned_addr = (addr + 7) & (!7); // Index of the next aligned element. let next_i = aligned_addr - addr; // Buffer might be too small. cmp::min(next_i, buffer.len()) }; buffer.split_at(split_index) }; let (mid, end) = { // Round length down to multiples of 8. let split_index = mid.len() & (!7); mid.split_at(split_index) }; let mid = unsafe { let length = mid.len() / 8; let ptr = if length == 0 { // `slice::from_raw_parts` requires that pointers be nonnull and // aligned even for zero-length slices. NonNull::::dangling().as_ptr() } else { #[allow(clippy::cast_ptr_alignment)] mid.as_ptr().cast::() }; slice::from_raw_parts(ptr, length) }; (start, mid, end) }