sha2-asm-0.6.2/.cargo_vcs_info.json0000644000000001120000000000000124450ustar { "git": { "sha1": "fc3689b17fed29efdf7f0e508e502ea41688c9f3" } } sha2-asm-0.6.2/CHANGELOG.md000064400000000000000000000014420000000000000130330ustar 00000000000000# Changelog All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## 0.6.2 (2021-07-16) ### Fixed - Builds on iOS targets ([#38]) [#38]: https://github.com/RustCrypto/asm-hashes/pull/38 ## 0.6.1 (2021-05-05) ### Added - `aarch64` implementation of SHA-256 for the M1 chip ([#35]) [#35]: https://github.com/RustCrypto/asm-hashes/pull/35 ## 0.6.0 (2021-02-09) ## 0.5.5 (2021-01-25) ## 0.5.4 (2020-06-11) ## 0.5.3 (2020-01-05) ## 0.5.2 (2019-04-15) ## 0.5.1 (2018-05-15) ## 0.5.0 (2018-04-27) ## 0.4.0 (2018-03-19) ## 0.3.0 (2017-06-27) ## 0.2.1 (2017-05-09) ## 0.2.0 (2017-05-08) ## 0.1.0 (2017-05-07) sha2-asm-0.6.2/Cargo.toml0000644000000016370000000000000104600ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] edition = "2018" name = "sha2-asm" version = "0.6.2" authors = ["RustCrypto Developers"] description = "Assembly implementation of SHA-2 compression functions" documentation = "https://docs.rs/sha2-asm" keywords = ["crypto", "sha2", "asm"] categories = ["cryptography", "no-std"] license = "MIT" repository = "https://github.com/RustCrypto/asm-hashes" [build-dependencies.cc] version = "1.0" sha2-asm-0.6.2/Cargo.toml.orig000064400000000000000000000006130000000000000141100ustar 00000000000000[package] name = "sha2-asm" version = "0.6.2" authors = ["RustCrypto Developers"] license = "MIT" description = "Assembly implementation of SHA-2 compression functions" documentation = "https://docs.rs/sha2-asm" repository = "https://github.com/RustCrypto/asm-hashes" keywords = ["crypto", "sha2", "asm"] categories = ["cryptography", "no-std"] edition = "2018" [build-dependencies] cc = "1.0" sha2-asm-0.6.2/LICENSE000064400000000000000000000021320000000000000122240ustar 00000000000000Copyright (c) 2020 RustCrypto Developers Copyright (c) 2017 Project Nayuki, Artyom Pavlov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. sha2-asm-0.6.2/benches/lib.rs000064400000000000000000000010650000000000000137460ustar 00000000000000#![no_std] #![feature(test)] extern crate test; use test::Bencher; #[bench] fn bench_compress256(b: &mut Bencher) { let mut state = Default::default(); let data = [[0u8; 64]]; b.iter(|| { sha2_asm::compress256(&mut state, &data); }); b.bytes = data.len() as u64; } #[cfg(not(target_arch = "aarch64"))] #[bench] fn bench_compress512(b: &mut Bencher) { let mut state = Default::default(); let data = [[0u8; 128]]; b.iter(|| { sha2_asm::compress512(&mut state, &data); }); b.bytes = data.len() as u64; } sha2-asm-0.6.2/build.rs000064400000000000000000000017670000000000000127010ustar 00000000000000fn main() { use std::env; let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap_or_default(); let target_vendor = env::var("CARGO_CFG_TARGET_VENDOR").unwrap_or_default(); let mut build256 = cc::Build::new(); let (sha256_path, sha512_path) = if target_arch == "x86" { ("src/sha256_x86.S", "src/sha512_x86.S") } else if target_arch == "x86_64" { ("src/sha256_x64.S", "src/sha512_x64.S") } else if target_arch == "aarch64" && target_vendor == "apple" { build256.flag("-march=armv8-a+crypto"); ("src/sha256_aarch64_apple.S", "") } else if target_arch == "aarch64" { build256.flag("-march=armv8-a+crypto"); ("src/sha256_aarch64.S", "") } else { panic!("Unsupported target architecture"); }; if target_arch != "aarch64" { cc::Build::new() .flag("-c") .file(sha512_path) .compile("libsha512.a"); } build256.flag("-c").file(sha256_path).compile("libsha256.a"); } sha2-asm-0.6.2/src/lib.rs000064400000000000000000000026320000000000000131270ustar 00000000000000//! Assembly implementation of the [SHA-2] compression functions. //! //! This crate is not intended for direct use, most users should //! prefer the [`sha2`] crate with enabled `asm` feature instead. //! //! Only x86, x86-64, and (partially) AArch64 architectures are //! currently supported. //! //! [SHA-2]: https://en.wikipedia.org/wiki/SHA-2 //! [`sha2`]: https://crates.io/crates/sha2 #![no_std] #[cfg(not(any(target_arch = "x86_64", target_arch = "x86", target_arch = "aarch64")))] compile_error!("crate can only be used on x86, x86-64 and aarch64 architectures"); #[link(name = "sha256", kind = "static")] extern "C" { fn sha256_compress(state: &mut [u32; 8], block: &[u8; 64]); } /// Safe wrapper around assembly implementation of SHA256 compression function #[inline] pub fn compress256(state: &mut [u32; 8], blocks: &[[u8; 64]]) { for block in blocks { unsafe { sha256_compress(state, block) } } } #[cfg(not(target_arch = "aarch64"))] #[link(name = "sha512", kind = "static")] extern "C" { fn sha512_compress(state: &mut [u64; 8], block: &[u8; 128]); } /// Safe wrapper around assembly implementation of SHA512 compression function /// /// This function is available only on x86 and x86-64 targets. #[cfg(not(target_arch = "aarch64"))] #[inline] pub fn compress512(state: &mut [u64; 8], blocks: &[[u8; 128]]) { for block in blocks { unsafe { sha512_compress(state, block) } } } sha2-asm-0.6.2/src/sha256_aarch64.S000064400000000000000000000145050000000000000144610ustar 00000000000000/* * SHA-256 hash in AArch64 assembly * * Copyright (c) 2020 Emmanuel Gil Peyrot . (MIT License) * * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of * the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * - The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * - The Software is provided "as is", without warranty of any kind, express or * implied, including but not limited to the warranties of merchantability, * fitness for a particular purpose and noninfringement. In no event shall the * authors or copyright holders be liable for any claim, damages or other * liability, whether in an action of contract, tort or otherwise, arising from, * out of or in connection with the Software or the use or other dealings in the * Software. */ /* void sha256_compress(uint32_t state[8], const uint8_t block[64]) */ .global sha256_compress sha256_compress: /* * Storage usage: * Bytes Location Description * 4 x0 state argument * 4 x1 block argument * 4 x2 pointer to k * 16 q0 state0 * 16 q1 state1 * 16 q2 abef * 16 q3 cdgh * 16 q4 k0 * 16 q5 k1 * 16 q8 W0 * 16 q9 W1 * 16 q10 W2 * 16 q11 W3 */ // save the lower half of q8-q11 stp d8, d9, [sp,#-32]! stp d10, d11, [sp,#16] // Load state in registers ldp q0, q1, [x0] mov v2.16b, v0.16b mov v3.16b, v1.16b // Load block in registers ld1 {v8.4s-v11.4s}, [x1] // TODO: only do that on little endian rev32 v8.16b, v8.16b rev32 v9.16b, v9.16b rev32 v10.16b, v10.16b rev32 v11.16b, v11.16b // Compute the pointer to k adrp x2, .K add x2, x2, :lo12:.K // load k ld1 {v16.4s-v19.4s}, [x2], #64 ld1 {v20.4s-v23.4s}, [x2], #64 ld1 {v24.4s-v27.4s}, [x2], #64 ld1 {v28.4s-v31.4s}, [x2] add v6.4s, v8.4s, v16.4s // Rounds 0-3 sha256su0 v8.4s, v9.4s mov v4.16b, v2.16b add v7.4s, v9.4s, v17.4s sha256h q2, q3, v6.4s sha256h2 q3, q4, v6.4s sha256su1 v8.4s, v10.4s, v11.4s // Rounds 4-7 sha256su0 v9.4s, v10.4s mov v4.16b, v2.16b add v6.4s, v10.4s, v18.4s sha256h q2, q3, v7.4s sha256h2 q3, q4, v7.4s sha256su1 v9.4s, v11.4s, v8.4s // Rounds 8-11 sha256su0 v10.4s, v11.4s mov v4.16b, v2.16b add v7.4s, v11.4s, v19.4s sha256h q2, q3, v6.4s sha256h2 q3, q4, v6.4s sha256su1 v10.4s, v8.4s, v9.4s // Rounds 12-15 sha256su0 v11.4s, v8.4s mov v4.16b, v2.16b add v6.4s, v8.4s, v20.4s sha256h q2, q3, v7.4s sha256h2 q3, q4, v7.4s sha256su1 v11.4s, v9.4s, v10.4s // Rounds 16-19 sha256su0 v8.4s, v9.4s mov v4.16b, v2.16b add v7.4s, v9.4s, v21.4s sha256h q2, q3, v6.4s sha256h2 q3, q4, v6.4s sha256su1 v8.4s, v10.4s, v11.4s // Rounds 20-23 sha256su0 v9.4s, v10.4s mov v4.16b, v2.16b add v6.4s, v10.4s, v22.4s sha256h q2, q3, v7.4s sha256h2 q3, q4, v7.4s sha256su1 v9.4s, v11.4s, v8.4s // Rounds 24-27 sha256su0 v10.4s, v11.4s mov v4.16b, v2.16b add v7.4s, v11.4s, v23.4s sha256h q2, q3, v6.4s sha256h2 q3, q4, v6.4s sha256su1 v10.4s, v8.4s, v9.4s // Rounds 28-31 sha256su0 v11.4s, v8.4s mov v4.16b, v2.16b add v6.4s, v8.4s, v24.4s sha256h q2, q3, v7.4s sha256h2 q3, q4, v7.4s sha256su1 v11.4s, v9.4s, v10.4s // Rounds 32-35 sha256su0 v8.4s, v9.4s mov v4.16b, v2.16b add v7.4s, v9.4s, v25.4s sha256h q2, q3, v6.4s sha256h2 q3, q4, v6.4s sha256su1 v8.4s, v10.4s, v11.4s // Rounds 36-39 sha256su0 v9.4s, v10.4s mov v4.16b, v2.16b add v6.4s, v10.4s, v26.4s sha256h q2, q3, v7.4s sha256h2 q3, q4, v7.4s sha256su1 v9.4s, v11.4s, v8.4s // Rounds 40-43 sha256su0 v10.4s, v11.4s mov v4.16b, v2.16b add v7.4s, v11.4s, v27.4s sha256h q2, q3, v6.4s sha256h2 q3, q4, v6.4s sha256su1 v10.4s, v8.4s, v9.4s // Rounds 44-47 sha256su0 v11.4s, v8.4s mov v4.16b, v2.16b add v6.4s, v8.4s, v28.4s sha256h q2, q3, v7.4s sha256h2 q3, q4, v7.4s sha256su1 v11.4s, v9.4s, v10.4s // Rounds 48-51 mov v4.16b, v2.16b add v7.4s, v9.4s, v29.4s sha256h q2, q3, v6.4s sha256h2 q3, q4, v6.4s // Rounds 52-55 mov v4.16b, v2.16b add v6.4s, v10.4s, v30.4s sha256h q2, q3, v7.4s sha256h2 q3, q4, v7.4s // Rounds 56-59 mov v4.16b, v2.16b add v7.4s, v11.4s, v31.4s sha256h q2, q3, v6.4s sha256h2 q3, q4, v6.4s // Rounds 60-63 mov v4.16b, v2.16b sha256h q2, q3, v7.4s sha256h2 q3, q4, v7.4s // Update state add v0.4s, v0.4s, v2.4s add v1.4s, v1.4s, v3.4s stp q0, q1, [x0] // restore ldp d10, d11, [sp,#16] ldp d8, d9, [sp],#32 ret .align 4 .K: .word 0x428A2F98 .word 0x71374491 .word 0xB5C0FBCF .word 0xE9B5DBA5 .word 0x3956C25B .word 0x59F111F1 .word 0x923F82A4 .word 0xAB1C5ED5 .word 0xD807AA98 .word 0x12835B01 .word 0x243185BE .word 0x550C7DC3 .word 0x72BE5D74 .word 0x80DEB1FE .word 0x9BDC06A7 .word 0xC19BF174 .word 0xE49B69C1 .word 0xEFBE4786 .word 0x0FC19DC6 .word 0x240CA1CC .word 0x2DE92C6F .word 0x4A7484AA .word 0x5CB0A9DC .word 0x76F988DA .word 0x983E5152 .word 0xA831C66D .word 0xB00327C8 .word 0xBF597FC7 .word 0xC6E00BF3 .word 0xD5A79147 .word 0x06CA6351 .word 0x14292967 .word 0x27B70A85 .word 0x2E1B2138 .word 0x4D2C6DFC .word 0x53380D13 .word 0x650A7354 .word 0x766A0ABB .word 0x81C2C92E .word 0x92722C85 .word 0xA2BFE8A1 .word 0xA81A664B .word 0xC24B8B70 .word 0xC76C51A3 .word 0xD192E819 .word 0xD6990624 .word 0xF40E3585 .word 0x106AA070 .word 0x19A4C116 .word 0x1E376C08 .word 0x2748774C .word 0x34B0BCB5 .word 0x391C0CB3 .word 0x4ED8AA4A .word 0x5B9CCA4F .word 0x682E6FF3 .word 0x748F82EE .word 0x78A5636F .word 0x84C87814 .word 0x8CC70208 .word 0x90BEFFFA .word 0xA4506CEB .word 0xBEF9A3F7 .word 0xC67178F2 sha2-asm-0.6.2/src/sha256_aarch64_apple.S000064400000000000000000000162600000000000000156420ustar 00000000000000/* * SHA-256 hash in AArch64 assembly for macos/M1 * * Based on the following C intrinsics implementation: * * * Original C written and placed in public domain by Jeffrey Walton. * Based on code from ARM, and by Johannes Schneiders, Skip Hovsmith and * Barry O'Rourke for the mbedTLS project. */ /* void sha256_compress(uint32_t state[8], const uint8_t block[64]) */ .global _sha256_compress _sha256_compress: mov x8, #0 ldp q0, q1, [x0] ldp q2, q3, [x1] ldp q4, q5, [x1, #32] stp q2, q3, [sp, #-64]! stp q4, q5, [sp, #32] mov x9, sp LBB0_1: ldr q2, [x9, x8] rev32.16b v2, v2 str q2, [x9, x8] add x8, x8, #16 cmp x8, #64 b.ne LBB0_1 adrp x8, K_0@PAGE ldr q2, [x8, K_0@PAGEOFF] ldp q6, q4, [sp] add.4s v3, v6, v2 // Rounds 0-3 sha256su0.4s v6, v4 adrp x8, K_1@PAGE ldr q2, [x8, K_1@PAGEOFF] add.4s v7, v4, v2 mov.16b v16, v0 sha256h.4s q16, q1, v3 mov.16b v2, v1 sha256h2.4s q2, q0, v3 ldp q5, q3, [sp, #32] sha256su1.4s v6, v5, v3 // Rounds 4-7 sha256su0.4s v4, v5 adrp x8, K_2@PAGE ldr q17, [x8, K_2@PAGEOFF] add.4s v17, v5, v17 mov.16b v18, v16 sha256h.4s q18, q2, v7 sha256h2.4s q2, q16, v7 sha256su1.4s v4, v3, v6 // Rounds 8-11 sha256su0.4s v5, v3 adrp x8, K_3@PAGE ldr q7, [x8, K_3@PAGEOFF] add.4s v7, v3, v7 mov.16b v16, v18 sha256h.4s q16, q2, v17 sha256h2.4s q2, q18, v17 sha256su1.4s v5, v6, v4 // Rounds 12-15 sha256su0.4s v3, v6 adrp x8, K_4@PAGE ldr q17, [x8, K_4@PAGEOFF] add.4s v17, v6, v17 mov.16b v18, v16 sha256h.4s q18, q2, v7 sha256h2.4s q2, q16, v7 sha256su1.4s v3, v4, v5 // Rounds 16-19 sha256su0.4s v6, v4 adrp x8, K_5@PAGE ldr q7, [x8, K_5@PAGEOFF] add.4s v7, v4, v7 mov.16b v16, v18 sha256h.4s q16, q2, v17 sha256h2.4s q2, q18, v17 sha256su1.4s v6, v5, v3 // Rounds 20-23 sha256su0.4s v4, v5 adrp x8, K_6@PAGE ldr q17, [x8, K_6@PAGEOFF] add.4s v17, v5, v17 mov.16b v18, v16 sha256h.4s q18, q2, v7 sha256h2.4s q2, q16, v7 sha256su1.4s v4, v3, v6 // Rounds 24-27 sha256su0.4s v5, v3 adrp x8, K_7@PAGE ldr q7, [x8, K_7@PAGEOFF] add.4s v7, v3, v7 mov.16b v16, v18 sha256h.4s q16, q2, v17 sha256h2.4s q2, q18, v17 sha256su1.4s v5, v6, v4 // Rounds 28-31 sha256su0.4s v3, v6 adrp x8, K_8@PAGE ldr q17, [x8, K_8@PAGEOFF] add.4s v17, v6, v17 mov.16b v18, v16 sha256h.4s q18, q2, v7 sha256h2.4s q2, q16, v7 sha256su1.4s v3, v4, v5 // Rounds 32-35 sha256su0.4s v6, v4 adrp x8, K_9@PAGE ldr q7, [x8, K_9@PAGEOFF] add.4s v7, v4, v7 mov.16b v16, v18 sha256h.4s q16, q2, v17 sha256h2.4s q2, q18, v17 sha256su1.4s v6, v5, v3 // Rounds 36-39 sha256su0.4s v4, v5 adrp x8, K_10@PAGE ldr q17, [x8, K_10@PAGEOFF] add.4s v17, v5, v17 mov.16b v18, v16 sha256h.4s q18, q2, v7 sha256h2.4s q2, q16, v7 sha256su1.4s v4, v3, v6 // Rounds 40-43 sha256su0.4s v5, v3 adrp x8, K_11@PAGE ldr q7, [x8, K_11@PAGEOFF] add.4s v7, v3, v7 mov.16b v16, v18 sha256h.4s q16, q2, v17 sha256h2.4s q2, q18, v17 sha256su1.4s v5, v6, v4 // Rounds 44-47 sha256su0.4s v3, v6 adrp x8, K_12@PAGE ldr q17, [x8, K_12@PAGEOFF] add.4s v6, v6, v17 mov.16b v17, v16 sha256h.4s q17, q2, v7 sha256h2.4s q2, q16, v7 sha256su1.4s v3, v4, v5 // Rounds 48-51 adrp x8, K_13@PAGE ldr q7, [x8, K_13@PAGEOFF] add.4s v4, v4, v7 mov.16b v7, v17 sha256h.4s q7, q2, v6 sha256h2.4s q2, q17, v6 // Rounds 52-55 adrp x8, K_14@PAGE ldr q6, [x8, K_14@PAGEOFF] add.4s v5, v5, v6 mov.16b v6, v7 sha256h.4s q6, q2, v4 sha256h2.4s q2, q7, v4 // Rounds 56-59 adrp x8, K_15@PAGE ldr q4, [x8, K_15@PAGEOFF] add.4s v3, v3, v4 mov.16b v4, v6 sha256h.4s q4, q2, v5 sha256h2.4s q2, q6, v5 // Rounds 60-63 mov.16b v5, v4 sha256h.4s q5, q2, v3 sha256h2.4s q2, q4, v3 // Update state add.4s v0, v5, v0 add.4s v1, v2, v1 // restore stp q0, q1, [x0] add sp, sp, #64 ret .align 4 K_0: .long 1116352408 .long 1899447441 .long 3049323471 .long 3921009573 .align 4 K_1: .long 961987163 .long 1508970993 .long 2453635748 .long 2870763221 .align 4 K_2: .long 3624381080 .long 310598401 .long 607225278 .long 1426881987 .align 4 K_3: .long 1925078388 .long 2162078206 .long 2614888103 .long 3248222580 .align 4 K_4: .long 3835390401 .long 4022224774 .long 264347078 .long 604807628 .align 4 K_5: .long 770255983 .long 1249150122 .long 1555081692 .long 1996064986 .align 4 K_6: .long 2554220882 .long 2821834349 .long 2952996808 .long 3210313671 .align 4 K_7: .long 3336571891 .long 3584528711 .long 113926993 .long 338241895 .align 4 K_8: .long 666307205 .long 773529912 .long 1294757372 .long 1396182291 .align 4 K_9: .long 1695183700 .long 1986661051 .long 2177026350 .long 2456956037 .align 4 K_10: .long 2730485921 .long 2820302411 .long 3259730800 .long 3345764771 .align 4 K_11: .long 3516065817 .long 3600352804 .long 4094571909 .long 275423344 .align 4 K_12: .long 430227734 .long 506948616 .long 659060556 .long 883997877 .align 4 K_13: .long 958139571 .long 1322822218 .long 1537002063 .long 1747873779 .align 4 K_14: .long 1955562222 .long 2024104815 .long 2227730452 .long 2361852424 .align 4 K_15: .long 2428436474 .long 2756734187 .long 3204031479 .long 3329325298 sha2-asm-0.6.2/src/sha256_x64.S000064400000000000000000000261430000000000000136530ustar 00000000000000/* * SHA-256 hash in x86-64 assembly * * Copyright (c) 2015 Project Nayuki. (MIT License) * https://www.nayuki.io/page/fast-sha2-hashes-in-x86-assembly * * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of * the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * - The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * - The Software is provided "as is", without warranty of any kind, express or * implied, including but not limited to the warranties of merchantability, * fitness for a particular purpose and noninfringement. In no event shall the * authors or copyright holders be liable for any claim, damages or other * liability, whether in an action of contract, tort or otherwise, arising from, * out of or in connection with the Software or the use or other dealings in the * Software. */ /* void sha256_compress(uint32_t state[8], const uint8_t block[64]) */ #ifdef __APPLE__ .globl _sha256_compress _sha256_compress: #else .globl sha256_compress sha256_compress: #endif /* * Storage usage: * Bytes Location Description * 4 eax Temporary for calculation per round * 4 ebx Temporary for calculation per round * 4 ecx Temporary for calculation per round * 4 edx Temporary for calculation per round * 8 rsi Base address of block array argument (read-only) * 8 rdi Base address of state array argument (read-only) * 8 rsp x86-64 stack pointer * 4 r8d SHA-256 state variable A * 4 r9d SHA-256 state variable B * 4 r10d SHA-256 state variable C * 4 r11d SHA-256 state variable D * 4 r12d SHA-256 state variable E * 4 r13d SHA-256 state variable F * 4 r14d SHA-256 state variable G * 4 r15d SHA-256 state variable H * 64 [rsp+0] Circular buffer of most recent 16 key schedule items, 4 bytes each * 16 xmm0 Caller's value of r10 (only low 64 bits are used) * 16 xmm1 Caller's value of r11 (only low 64 bits are used) * 16 xmm2 Caller's value of r12 (only low 64 bits are used) * 16 xmm3 Caller's value of r13 (only low 64 bits are used) * 16 xmm4 Caller's value of r14 (only low 64 bits are used) * 16 xmm5 Caller's value of r15 (only low 64 bits are used) * 16 xmm6 Caller's value of rbx (only low 64 bits are used) */ #define SCHED(i) (((i)&0xF)*4)(%rsp) #define ROUNDa(i, a, b, c, d, e, f, g, h, k) \ movl (i*4)(%rsi), %ebx; \ bswapl %ebx; \ movl %ebx, SCHED(i); \ ROUNDTAIL(a, b, c, d, e, f, g, h, k) #define ROUNDb(i, a, b, c, d, e, f, g, h, k) \ movl SCHED(i-15), %eax; \ movl SCHED(i-16), %ebx; \ addl SCHED(i- 7), %ebx; \ movl %eax, %ecx; \ movl %eax, %edx; \ rorl $18, %ecx; \ shrl $3, %edx; \ rorl $7, %eax; \ xorl %edx, %ecx; \ xorl %ecx, %eax; \ addl %eax, %ebx; \ movl SCHED(i- 2), %eax; \ movl %eax, %ecx; \ movl %eax, %edx; \ rorl $19, %ecx; \ shrl $10, %edx; \ rorl $17, %eax; \ xorl %edx, %ecx; \ xorl %ecx, %eax; \ addl %eax, %ebx; \ movl %ebx, SCHED(i); \ ROUNDTAIL(a, b, c, d, e, f, g, h, k) #define ROUNDTAIL(a, b, c, d, e, f, g, h, k) \ /* Part 0 */ \ /* See Intel's "Fast SHA-256 Implementations" for the ROR transformation */ \ movl %e, %eax; \ rorl $14, %eax; \ xorl %e, %eax; \ rorl $5, %eax; \ xorl %e, %eax; \ rorl $6, %eax; \ addl %ebx, %h; \ movl %g, %ecx; \ xorl %f, %ecx; \ andl %e, %ecx; \ xorl %g, %ecx; \ leal k(%rax,%rcx), %eax; \ addl %eax, %h; \ /* Part 1 */ \ addl %h, %d; \ /* Part 2 */ \ /* See Intel's "Fast SHA-256 Implementations" for the ROR transformation */ \ movl %a, %eax; \ rorl $9, %eax; \ xorl %a, %eax; \ rorl $11, %eax; \ xorl %a, %eax; \ rorl $2, %eax; \ movl %c, %ecx; \ addl %eax, %h; \ movl %c, %eax; \ orl %b, %eax; \ andl %b, %ecx; \ andl %a, %eax; \ orl %ecx, %eax; \ addl %eax, %h; /* Save registers, allocate scratch space */ movq %r10, %xmm0 movq %r11, %xmm1 movq %r12, %xmm2 movq %r13, %xmm3 movq %r14, %xmm4 movq %r15, %xmm5 movq %rbx, %xmm6 subq $64, %rsp /* Load state */ movl 0(%rdi), %r8d /* a */ movl 4(%rdi), %r9d /* b */ movl 8(%rdi), %r10d /* c */ movl 12(%rdi), %r11d /* d */ movl 16(%rdi), %r12d /* e */ movl 20(%rdi), %r13d /* f */ movl 24(%rdi), %r14d /* g */ movl 28(%rdi), %r15d /* h */ /* Do 64 rounds of hashing */ ROUNDa( 0, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 0x428A2F98) ROUNDa( 1, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 0x71374491) ROUNDa( 2, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -0x4A3F0431) ROUNDa( 3, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -0x164A245B) ROUNDa( 4, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 0x3956C25B) ROUNDa( 5, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 0x59F111F1) ROUNDa( 6, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -0x6DC07D5C) ROUNDa( 7, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -0x54E3A12B) ROUNDa( 8, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -0x27F85568) ROUNDa( 9, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 0x12835B01) ROUNDa(10, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 0x243185BE) ROUNDa(11, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 0x550C7DC3) ROUNDa(12, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 0x72BE5D74) ROUNDa(13, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -0x7F214E02) ROUNDa(14, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -0x6423F959) ROUNDa(15, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -0x3E640E8C) ROUNDb(16, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -0x1B64963F) ROUNDb(17, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, -0x1041B87A) ROUNDb(18, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 0x0FC19DC6) ROUNDb(19, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 0x240CA1CC) ROUNDb(20, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 0x2DE92C6F) ROUNDb(21, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 0x4A7484AA) ROUNDb(22, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , 0x5CB0A9DC) ROUNDb(23, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 0x76F988DA) ROUNDb(24, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -0x67C1AEAE) ROUNDb(25, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, -0x57CE3993) ROUNDb(26, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -0x4FFCD838) ROUNDb(27, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -0x40A68039) ROUNDb(28, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, -0x391FF40D) ROUNDb(29, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -0x2A586EB9) ROUNDb(30, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , 0x06CA6351) ROUNDb(31, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 0x14292967) ROUNDb(32, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 0x27B70A85) ROUNDb(33, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 0x2E1B2138) ROUNDb(34, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 0x4D2C6DFC) ROUNDb(35, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 0x53380D13) ROUNDb(36, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 0x650A7354) ROUNDb(37, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 0x766A0ABB) ROUNDb(38, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -0x7E3D36D2) ROUNDb(39, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -0x6D8DD37B) ROUNDb(40, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -0x5D40175F) ROUNDb(41, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, -0x57E599B5) ROUNDb(42, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -0x3DB47490) ROUNDb(43, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -0x3893AE5D) ROUNDb(44, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, -0x2E6D17E7) ROUNDb(45, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -0x2966F9DC) ROUNDb(46, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -0x0BF1CA7B) ROUNDb(47, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 0x106AA070) ROUNDb(48, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 0x19A4C116) ROUNDb(49, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 0x1E376C08) ROUNDb(50, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 0x2748774C) ROUNDb(51, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 0x34B0BCB5) ROUNDb(52, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 0x391C0CB3) ROUNDb(53, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 0x4ED8AA4A) ROUNDb(54, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , 0x5B9CCA4F) ROUNDb(55, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 0x682E6FF3) ROUNDb(56, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 0x748F82EE) ROUNDb(57, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 0x78A5636F) ROUNDb(58, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -0x7B3787EC) ROUNDb(59, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -0x7338FDF8) ROUNDb(60, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, -0x6F410006) ROUNDb(61, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -0x5BAF9315) ROUNDb(62, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -0x41065C09) ROUNDb(63, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -0x398E870E) /* Add to state */ addl %r8d , 0(%rdi) addl %r9d , 4(%rdi) addl %r10d, 8(%rdi) addl %r11d, 12(%rdi) addl %r12d, 16(%rdi) addl %r13d, 20(%rdi) addl %r14d, 24(%rdi) addl %r15d, 28(%rdi) /* Restore registers */ movq %xmm0, %r10 movq %xmm1, %r11 movq %xmm2, %r12 movq %xmm3, %r13 movq %xmm4, %r14 movq %xmm5, %r15 movq %xmm6, %rbx addq $64, %rsp retq sha2-asm-0.6.2/src/sha256_x86.S000064400000000000000000000231530000000000000136550ustar 00000000000000/* * SHA-256 hash in x86 assembly * * Copyright (c) 2014 Project Nayuki. (MIT License) * https://www.nayuki.io/page/fast-sha2-hashes-in-x86-assembly * * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of * the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * - The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * - The Software is provided "as is", without warranty of any kind, express or * implied, including but not limited to the warranties of merchantability, * fitness for a particular purpose and noninfringement. In no event shall the * authors or copyright holders be liable for any claim, damages or other * liability, whether in an action of contract, tort or otherwise, arising from, * out of or in connection with the Software or the use or other dealings in the * Software. */ /* void sha256_compress(uint32_t state[8], const uint8_t block[64]) */ #ifdef __APPLE__ .globl _sha256_compress _sha256_compress: #else .globl sha256_compress sha256_compress: #endif /* * Storage usage: * Bytes Location Description * 4 eax Temporary for calculation per round * 4 ebx Temporary for calculation per round * 4 ecx Temporary for calculation per round * 4 edx Temporary for calculation per round * 4 ebp Temporary for calculation per round * 4 esi (During state loading and update) base address of state array argument * (During hash rounds) temporary for calculation per round * 4 edi Base address of block array argument (during key schedule loading rounds only) * 4 esp x86 stack pointer * 32 [esp+ 0] SHA-256 state variables A,B,C,D,E,F,G,H (4 bytes each) * 64 [esp+ 32] Key schedule of 16 * 4 bytes * 4 [esp+ 96] Caller's value of ebx * 4 [esp+100] Caller's value of esi * 4 [esp+104] Caller's value of edi * 4 [esp+108] Caller's value of ebp */ #define SCHED(i) ((((i)&0xF)+8)*4)(%esp) #define ROUNDa(i, a, b, c, d, e, f, g, h, k) \ movl (i*4)(%edi), %ebp; \ bswapl %ebp; \ movl %ebp, SCHED(i); \ ROUNDTAIL(i, a, b, c, d, e, f, g, h, k) #define ROUNDb(i, a, b, c, d, e, f, g, h, k) \ movl SCHED(i-15), %eax; \ movl SCHED(i-16), %ebp; \ movl %eax, %ebx; \ addl SCHED(i- 7), %ebp; \ movl %eax, %ecx; \ rorl $18, %ebx; \ shrl $3, %ecx; \ rorl $7, %eax; \ xorl %ecx, %ebx; \ xorl %ebx, %eax; \ addl %eax, %ebp; \ movl SCHED(i- 2), %eax; \ movl %eax, %ebx; \ movl %eax, %ecx; \ rorl $19, %ebx; \ shrl $10, %ecx; \ rorl $17, %eax; \ xorl %ecx, %ebx; \ xorl %ebx, %eax; \ addl %eax, %ebp; \ movl %ebp, SCHED(i); \ ROUNDTAIL(i, a, b, c, d, e, f, g, h, k) #define STATE(i) (i*4)(%esp) #define ROUNDTAIL(i, a, b, c, d, e, f, g, h, k) \ /* Part 0 */ \ movl STATE(e), %eax; \ movl %eax, %ebx; \ movl %eax, %ecx; \ movl %eax, %edx; \ rorl $11, %eax; \ rorl $25, %ebx; \ rorl $6, %ecx; \ movl STATE(h), %esi; \ xorl %ebx, %eax; \ xorl %eax, %ecx; \ addl %ebp, %esi; \ movl STATE(g), %ebx; \ movl STATE(f), %eax; \ xorl %ebx, %eax; \ andl %edx, %eax; \ xorl %ebx, %eax; \ leal k(%ecx,%eax), %ecx; \ addl %ecx, %esi; \ /* Part 1 */ \ addl %esi, STATE(d); \ /* Part 2 */ \ movl STATE(a), %eax; \ movl %eax, %ebx; \ movl %eax, %ecx; \ movl %eax, %edx; \ rorl $13, %eax; \ rorl $22, %ebx; \ rorl $2, %ecx; \ xorl %ebx, %eax; \ xorl %eax, %ecx; \ movl STATE(c), %eax; \ addl %ecx, %esi; \ movl %eax, %ecx; \ movl STATE(b), %ebx; \ orl %ebx, %ecx; \ andl %ebx, %eax; \ andl %edx, %ecx; \ orl %eax, %ecx; \ addl %ecx, %esi; \ movl %esi, STATE(h); /* Allocate scratch space, save registers */ subl $112, %esp movl %ebx, 96(%esp) movl %esi, 100(%esp) movl %edi, 104(%esp) movl %ebp, 108(%esp) /* Copy state */ movl 116(%esp), %esi /* Argument: state */ movl 0(%esi), %eax; movl %eax, 0(%esp) movl 4(%esi), %eax; movl %eax, 4(%esp) movl 8(%esi), %eax; movl %eax, 8(%esp) movl 12(%esi), %eax; movl %eax, 12(%esp) movl 16(%esi), %eax; movl %eax, 16(%esp) movl 20(%esi), %eax; movl %eax, 20(%esp) movl 24(%esi), %eax; movl %eax, 24(%esp) movl 28(%esi), %eax; movl %eax, 28(%esp) /* Do 64 rounds of hashing */ movl 120(%esp), %edi /* Argument: block */ ROUNDa( 0, 0, 1, 2, 3, 4, 5, 6, 7, 0x428A2F98) ROUNDa( 1, 7, 0, 1, 2, 3, 4, 5, 6, 0x71374491) ROUNDa( 2, 6, 7, 0, 1, 2, 3, 4, 5, 0xB5C0FBCF) ROUNDa( 3, 5, 6, 7, 0, 1, 2, 3, 4, 0xE9B5DBA5) ROUNDa( 4, 4, 5, 6, 7, 0, 1, 2, 3, 0x3956C25B) ROUNDa( 5, 3, 4, 5, 6, 7, 0, 1, 2, 0x59F111F1) ROUNDa( 6, 2, 3, 4, 5, 6, 7, 0, 1, 0x923F82A4) ROUNDa( 7, 1, 2, 3, 4, 5, 6, 7, 0, 0xAB1C5ED5) ROUNDa( 8, 0, 1, 2, 3, 4, 5, 6, 7, 0xD807AA98) ROUNDa( 9, 7, 0, 1, 2, 3, 4, 5, 6, 0x12835B01) ROUNDa(10, 6, 7, 0, 1, 2, 3, 4, 5, 0x243185BE) ROUNDa(11, 5, 6, 7, 0, 1, 2, 3, 4, 0x550C7DC3) ROUNDa(12, 4, 5, 6, 7, 0, 1, 2, 3, 0x72BE5D74) ROUNDa(13, 3, 4, 5, 6, 7, 0, 1, 2, 0x80DEB1FE) ROUNDa(14, 2, 3, 4, 5, 6, 7, 0, 1, 0x9BDC06A7) ROUNDa(15, 1, 2, 3, 4, 5, 6, 7, 0, 0xC19BF174) ROUNDb(16, 0, 1, 2, 3, 4, 5, 6, 7, 0xE49B69C1) ROUNDb(17, 7, 0, 1, 2, 3, 4, 5, 6, 0xEFBE4786) ROUNDb(18, 6, 7, 0, 1, 2, 3, 4, 5, 0x0FC19DC6) ROUNDb(19, 5, 6, 7, 0, 1, 2, 3, 4, 0x240CA1CC) ROUNDb(20, 4, 5, 6, 7, 0, 1, 2, 3, 0x2DE92C6F) ROUNDb(21, 3, 4, 5, 6, 7, 0, 1, 2, 0x4A7484AA) ROUNDb(22, 2, 3, 4, 5, 6, 7, 0, 1, 0x5CB0A9DC) ROUNDb(23, 1, 2, 3, 4, 5, 6, 7, 0, 0x76F988DA) ROUNDb(24, 0, 1, 2, 3, 4, 5, 6, 7, 0x983E5152) ROUNDb(25, 7, 0, 1, 2, 3, 4, 5, 6, 0xA831C66D) ROUNDb(26, 6, 7, 0, 1, 2, 3, 4, 5, 0xB00327C8) ROUNDb(27, 5, 6, 7, 0, 1, 2, 3, 4, 0xBF597FC7) ROUNDb(28, 4, 5, 6, 7, 0, 1, 2, 3, 0xC6E00BF3) ROUNDb(29, 3, 4, 5, 6, 7, 0, 1, 2, 0xD5A79147) ROUNDb(30, 2, 3, 4, 5, 6, 7, 0, 1, 0x06CA6351) ROUNDb(31, 1, 2, 3, 4, 5, 6, 7, 0, 0x14292967) ROUNDb(32, 0, 1, 2, 3, 4, 5, 6, 7, 0x27B70A85) ROUNDb(33, 7, 0, 1, 2, 3, 4, 5, 6, 0x2E1B2138) ROUNDb(34, 6, 7, 0, 1, 2, 3, 4, 5, 0x4D2C6DFC) ROUNDb(35, 5, 6, 7, 0, 1, 2, 3, 4, 0x53380D13) ROUNDb(36, 4, 5, 6, 7, 0, 1, 2, 3, 0x650A7354) ROUNDb(37, 3, 4, 5, 6, 7, 0, 1, 2, 0x766A0ABB) ROUNDb(38, 2, 3, 4, 5, 6, 7, 0, 1, 0x81C2C92E) ROUNDb(39, 1, 2, 3, 4, 5, 6, 7, 0, 0x92722C85) ROUNDb(40, 0, 1, 2, 3, 4, 5, 6, 7, 0xA2BFE8A1) ROUNDb(41, 7, 0, 1, 2, 3, 4, 5, 6, 0xA81A664B) ROUNDb(42, 6, 7, 0, 1, 2, 3, 4, 5, 0xC24B8B70) ROUNDb(43, 5, 6, 7, 0, 1, 2, 3, 4, 0xC76C51A3) ROUNDb(44, 4, 5, 6, 7, 0, 1, 2, 3, 0xD192E819) ROUNDb(45, 3, 4, 5, 6, 7, 0, 1, 2, 0xD6990624) ROUNDb(46, 2, 3, 4, 5, 6, 7, 0, 1, 0xF40E3585) ROUNDb(47, 1, 2, 3, 4, 5, 6, 7, 0, 0x106AA070) ROUNDb(48, 0, 1, 2, 3, 4, 5, 6, 7, 0x19A4C116) ROUNDb(49, 7, 0, 1, 2, 3, 4, 5, 6, 0x1E376C08) ROUNDb(50, 6, 7, 0, 1, 2, 3, 4, 5, 0x2748774C) ROUNDb(51, 5, 6, 7, 0, 1, 2, 3, 4, 0x34B0BCB5) ROUNDb(52, 4, 5, 6, 7, 0, 1, 2, 3, 0x391C0CB3) ROUNDb(53, 3, 4, 5, 6, 7, 0, 1, 2, 0x4ED8AA4A) ROUNDb(54, 2, 3, 4, 5, 6, 7, 0, 1, 0x5B9CCA4F) ROUNDb(55, 1, 2, 3, 4, 5, 6, 7, 0, 0x682E6FF3) ROUNDb(56, 0, 1, 2, 3, 4, 5, 6, 7, 0x748F82EE) ROUNDb(57, 7, 0, 1, 2, 3, 4, 5, 6, 0x78A5636F) ROUNDb(58, 6, 7, 0, 1, 2, 3, 4, 5, 0x84C87814) ROUNDb(59, 5, 6, 7, 0, 1, 2, 3, 4, 0x8CC70208) ROUNDb(60, 4, 5, 6, 7, 0, 1, 2, 3, 0x90BEFFFA) ROUNDb(61, 3, 4, 5, 6, 7, 0, 1, 2, 0xA4506CEB) ROUNDb(62, 2, 3, 4, 5, 6, 7, 0, 1, 0xBEF9A3F7) ROUNDb(63, 1, 2, 3, 4, 5, 6, 7, 0, 0xC67178F2) /* Add to state */ movl 116(%esp), %esi /* Argument: state */ movl 0(%esp), %eax; addl %eax, 0(%esi) movl 4(%esp), %eax; addl %eax, 4(%esi) movl 8(%esp), %eax; addl %eax, 8(%esi) movl 12(%esp), %eax; addl %eax, 12(%esi) movl 16(%esp), %eax; addl %eax, 16(%esi) movl 20(%esp), %eax; addl %eax, 20(%esi) movl 24(%esp), %eax; addl %eax, 24(%esi) movl 28(%esp), %eax; addl %eax, 28(%esi) /* Restore registers */ movl 96(%esp), %ebx movl 100(%esp), %esi movl 104(%esp), %edi movl 108(%esp), %ebp addl $112, %esp retl sha2-asm-0.6.2/src/sha512_x64.S000064400000000000000000000300000000000000000136310ustar 00000000000000/* * SHA-512 hash in x86-64 assembly * * Copyright (c) 2017 Project Nayuki. (MIT License) * https://www.nayuki.io/page/fast-sha2-hashes-in-x86-assembly * * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of * the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * - The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * - The Software is provided "as is", without warranty of any kind, express or * implied, including but not limited to the warranties of merchantability, * fitness for a particular purpose and noninfringement. In no event shall the * authors or copyright holders be liable for any claim, damages or other * liability, whether in an action of contract, tort or otherwise, arising from, * out of or in connection with the Software or the use or other dealings in the * Software. */ /* void sha512_compress(uint64_t state[8], const uint8_t block[128]) */ #ifdef __APPLE__ .globl _sha512_compress _sha512_compress: #else .globl sha512_compress sha512_compress: #endif /* * Storage usage: * Bytes Location Description * 8 rax Temporary for calculation per round * 8 rbx Temporary for calculation per round * 8 rcx Temporary for calculation per round * 8 rdx Temporary for calculation per round * 8 rsi Base address of block array argument (read-only) * 8 rdi Base address of state array argument (read-only) * 8 rsp x86-64 stack pointer * 8 r8 SHA-512 state variable A * 8 r9 SHA-512 state variable B * 8 r10 SHA-512 state variable C * 8 r11 SHA-512 state variable D * 8 r12 SHA-512 state variable E * 8 r13 SHA-512 state variable F * 8 r14 SHA-512 state variable G * 8 r15 SHA-512 state variable H * 128 [rsp+0] Circular buffer of most recent 16 key schedule items, 8 bytes each * 16 xmm0 Caller's value of r10 (only low 64 bits are used) * 16 xmm1 Caller's value of r11 (only low 64 bits are used) * 16 xmm2 Caller's value of r12 (only low 64 bits are used) * 16 xmm3 Caller's value of r13 (only low 64 bits are used) * 16 xmm4 Caller's value of r14 (only low 64 bits are used) * 16 xmm5 Caller's value of r15 (only low 64 bits are used) * 16 xmm6 Caller's value of rbx (only low 64 bits are used) */ #define SCHED(i) (((i)&0xF)*8)(%rsp) #define ROUNDa(i, a, b, c, d, e, f, g, h, k) \ movq (i*8)(%rsi), %rbx; \ bswapq %rbx; \ movq %rbx, SCHED(i); \ ROUNDTAIL(a, b, c, d, e, f, g, h, k) #define ROUNDb(i, a, b, c, d, e, f, g, h, k) \ movq SCHED(i-15), %rax; \ movq SCHED(i-16), %rbx; \ addq SCHED(i- 7), %rbx; \ movq %rax, %rcx; \ movq %rax, %rdx; \ rorq $8, %rcx; \ shrq $7, %rdx; \ rorq $1, %rax; \ xorq %rdx, %rcx; \ xorq %rcx, %rax; \ addq %rax, %rbx; \ movq SCHED(i- 2), %rax; \ movq %rax, %rcx; \ movq %rax, %rdx; \ rorq $61, %rcx; \ shrq $6, %rdx; \ rorq $19, %rax; \ xorq %rdx, %rcx; \ xorq %rcx, %rax; \ addq %rax, %rbx; \ movq %rbx, SCHED(i); \ ROUNDTAIL(a, b, c, d, e, f, g, h, k) #define ROUNDTAIL(a, b, c, d, e, f, g, h, k) \ /* Part 0 */ \ /* ROR transformation inspired by Intel's SHA-256 implementation */ \ movq %e, %rax; \ rorq $23, %rax; \ xorq %e, %rax; \ rorq $4, %rax; \ xorq %e, %rax; \ rorq $14, %rax; \ addq %rbx, %h; \ movq %g, %rcx; \ xorq %f, %rcx; \ andq %e, %rcx; \ xorq %g, %rcx; \ addq %rax, %h; \ movabs $k, %rax; \ addq %rcx, %h; \ addq %rax, %h; \ /* Part 1 */ \ addq %h, %d; \ /* Part 2 */ \ /* ROR transformation inspired by Intel's SHA-256 implementation */ \ movq %a, %rax; \ rorq $5, %rax; \ xorq %a, %rax; \ rorq $6, %rax; \ xorq %a, %rax; \ rorq $28, %rax; \ movq %c, %rcx; \ addq %rax, %h; \ movq %c, %rax; \ orq %b, %rax; \ andq %b, %rcx; \ andq %a, %rax; \ orq %rcx, %rax; \ addq %rax, %h; /* Save registers, allocate scratch space */ movq %r10, %xmm0 movq %r11, %xmm1 movq %r12, %xmm2 movq %r13, %xmm3 movq %r14, %xmm4 movq %r15, %xmm5 movq %rbx, %xmm6 subq $128, %rsp /* Load state */ movq 0(%rdi), %r8 /* a */ movq 8(%rdi), %r9 /* b */ movq 16(%rdi), %r10 /* c */ movq 24(%rdi), %r11 /* d */ movq 32(%rdi), %r12 /* e */ movq 40(%rdi), %r13 /* f */ movq 48(%rdi), %r14 /* g */ movq 56(%rdi), %r15 /* h */ /* Do 80 rounds of hashing */ ROUNDa( 0, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x428A2F98D728AE22) ROUNDa( 1, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x7137449123EF65CD) ROUNDa( 2, r14, r15, r8 , r9 , r10, r11, r12, r13, 0xB5C0FBCFEC4D3B2F) ROUNDa( 3, r13, r14, r15, r8 , r9 , r10, r11, r12, 0xE9B5DBA58189DBBC) ROUNDa( 4, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x3956C25BF348B538) ROUNDa( 5, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x59F111F1B605D019) ROUNDa( 6, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x923F82A4AF194F9B) ROUNDa( 7, r9 , r10, r11, r12, r13, r14, r15, r8 , 0xAB1C5ED5DA6D8118) ROUNDa( 8, r8 , r9 , r10, r11, r12, r13, r14, r15, 0xD807AA98A3030242) ROUNDa( 9, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x12835B0145706FBE) ROUNDa(10, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x243185BE4EE4B28C) ROUNDa(11, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x550C7DC3D5FFB4E2) ROUNDa(12, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x72BE5D74F27B896F) ROUNDa(13, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x80DEB1FE3B1696B1) ROUNDa(14, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x9BDC06A725C71235) ROUNDa(15, r9 , r10, r11, r12, r13, r14, r15, r8 , 0xC19BF174CF692694) ROUNDb(16, r8 , r9 , r10, r11, r12, r13, r14, r15, 0xE49B69C19EF14AD2) ROUNDb(17, r15, r8 , r9 , r10, r11, r12, r13, r14, 0xEFBE4786384F25E3) ROUNDb(18, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x0FC19DC68B8CD5B5) ROUNDb(19, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x240CA1CC77AC9C65) ROUNDb(20, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x2DE92C6F592B0275) ROUNDb(21, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x4A7484AA6EA6E483) ROUNDb(22, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x5CB0A9DCBD41FBD4) ROUNDb(23, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x76F988DA831153B5) ROUNDb(24, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x983E5152EE66DFAB) ROUNDb(25, r15, r8 , r9 , r10, r11, r12, r13, r14, 0xA831C66D2DB43210) ROUNDb(26, r14, r15, r8 , r9 , r10, r11, r12, r13, 0xB00327C898FB213F) ROUNDb(27, r13, r14, r15, r8 , r9 , r10, r11, r12, 0xBF597FC7BEEF0EE4) ROUNDb(28, r12, r13, r14, r15, r8 , r9 , r10, r11, 0xC6E00BF33DA88FC2) ROUNDb(29, r11, r12, r13, r14, r15, r8 , r9 , r10, 0xD5A79147930AA725) ROUNDb(30, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x06CA6351E003826F) ROUNDb(31, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x142929670A0E6E70) ROUNDb(32, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x27B70A8546D22FFC) ROUNDb(33, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x2E1B21385C26C926) ROUNDb(34, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x4D2C6DFC5AC42AED) ROUNDb(35, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x53380D139D95B3DF) ROUNDb(36, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x650A73548BAF63DE) ROUNDb(37, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x766A0ABB3C77B2A8) ROUNDb(38, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x81C2C92E47EDAEE6) ROUNDb(39, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x92722C851482353B) ROUNDb(40, r8 , r9 , r10, r11, r12, r13, r14, r15, 0xA2BFE8A14CF10364) ROUNDb(41, r15, r8 , r9 , r10, r11, r12, r13, r14, 0xA81A664BBC423001) ROUNDb(42, r14, r15, r8 , r9 , r10, r11, r12, r13, 0xC24B8B70D0F89791) ROUNDb(43, r13, r14, r15, r8 , r9 , r10, r11, r12, 0xC76C51A30654BE30) ROUNDb(44, r12, r13, r14, r15, r8 , r9 , r10, r11, 0xD192E819D6EF5218) ROUNDb(45, r11, r12, r13, r14, r15, r8 , r9 , r10, 0xD69906245565A910) ROUNDb(46, r10, r11, r12, r13, r14, r15, r8 , r9 , 0xF40E35855771202A) ROUNDb(47, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x106AA07032BBD1B8) ROUNDb(48, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x19A4C116B8D2D0C8) ROUNDb(49, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x1E376C085141AB53) ROUNDb(50, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x2748774CDF8EEB99) ROUNDb(51, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x34B0BCB5E19B48A8) ROUNDb(52, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x391C0CB3C5C95A63) ROUNDb(53, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x4ED8AA4AE3418ACB) ROUNDb(54, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x5B9CCA4F7763E373) ROUNDb(55, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x682E6FF3D6B2B8A3) ROUNDb(56, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x748F82EE5DEFB2FC) ROUNDb(57, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x78A5636F43172F60) ROUNDb(58, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x84C87814A1F0AB72) ROUNDb(59, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x8CC702081A6439EC) ROUNDb(60, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x90BEFFFA23631E28) ROUNDb(61, r11, r12, r13, r14, r15, r8 , r9 , r10, 0xA4506CEBDE82BDE9) ROUNDb(62, r10, r11, r12, r13, r14, r15, r8 , r9 , 0xBEF9A3F7B2C67915) ROUNDb(63, r9 , r10, r11, r12, r13, r14, r15, r8 , 0xC67178F2E372532B) ROUNDb(64, r8 , r9 , r10, r11, r12, r13, r14, r15, 0xCA273ECEEA26619C) ROUNDb(65, r15, r8 , r9 , r10, r11, r12, r13, r14, 0xD186B8C721C0C207) ROUNDb(66, r14, r15, r8 , r9 , r10, r11, r12, r13, 0xEADA7DD6CDE0EB1E) ROUNDb(67, r13, r14, r15, r8 , r9 , r10, r11, r12, 0xF57D4F7FEE6ED178) ROUNDb(68, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x06F067AA72176FBA) ROUNDb(69, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x0A637DC5A2C898A6) ROUNDb(70, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x113F9804BEF90DAE) ROUNDb(71, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x1B710B35131C471B) ROUNDb(72, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x28DB77F523047D84) ROUNDb(73, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x32CAAB7B40C72493) ROUNDb(74, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x3C9EBE0A15C9BEBC) ROUNDb(75, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x431D67C49C100D4C) ROUNDb(76, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x4CC5D4BECB3E42B6) ROUNDb(77, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x597F299CFC657E2A) ROUNDb(78, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x5FCB6FAB3AD6FAEC) ROUNDb(79, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x6C44198C4A475817) /* Add to state */ addq %r8 , 0(%rdi) addq %r9 , 8(%rdi) addq %r10, 16(%rdi) addq %r11, 24(%rdi) addq %r12, 32(%rdi) addq %r13, 40(%rdi) addq %r14, 48(%rdi) addq %r15, 56(%rdi) /* Restore registers */ movq %xmm0, %r10 movq %xmm1, %r11 movq %xmm2, %r12 movq %xmm3, %r13 movq %xmm4, %r14 movq %xmm5, %r15 movq %xmm6, %rbx addq $128, %rsp retq sha2-asm-0.6.2/src/sha512_x86.S000064400000000000000000000255050000000000000136530ustar 00000000000000/* * SHA-512 hash in x86 assembly * * Copyright (c) 2014 Project Nayuki. (MIT License) * https://www.nayuki.io/page/fast-sha2-hashes-in-x86-assembly * * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of * the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * - The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * - The Software is provided "as is", without warranty of any kind, express or * implied, including but not limited to the warranties of merchantability, * fitness for a particular purpose and noninfringement. In no event shall the * authors or copyright holders be liable for any claim, damages or other * liability, whether in an action of contract, tort or otherwise, arising from, * out of or in connection with the Software or the use or other dealings in the * Software. */ /* void sha512_compress(uint64_t state[8], const uint8_t block[128]) */ #ifdef __APPLE__ .globl _sha512_compress _sha512_compress: #else .globl sha512_compress sha512_compress: #endif /* * Storage usage: * Bytes Location Description * 4 eax Temporary base address of state or block array arguments * 4 ecx Old value of esp * 4 esp x86 stack pointer * 64 [esp+ 0] SHA-512 state variables A,B,C,D,E,F,G,H (8 bytes each) * 128 [esp+64] Circular buffer of most recent 16 key schedule items, 8 bytes each * 56 mm0..mm6 Temporary for calculation per round * 8 mm7 Control value for byte endian reversal * 64 xmm0..xmm3 Temporary for copying or calculation */ #define SCHED(i) (((i)&0xF)*8+64)(%esp) #define STATE(i) (i*8)(%esp) #define RORQ(reg, shift, temp) \ movq %reg, %temp; \ psllq $(64-shift), %temp; \ psrlq $shift, %reg; \ por %temp, %reg; #define ROUNDa(i, a, b, c, d, e, f, g, h) \ movq (i*8)(%eax), %mm0; \ pshufb %mm7, %mm0; \ movq %mm0, SCHED(i); \ ROUNDTAIL(i, a, b, c, d, e, f, g, h) #define ROUNDb(i, a, b, c, d, e, f, g, h) \ movq SCHED(i-16), %mm0; \ paddq SCHED(i- 7), %mm0; \ movq SCHED(i-15), %mm1; \ movq %mm1, %mm2; \ movq %mm1, %mm3; \ RORQ(mm1, 1, mm5) \ RORQ(mm2, 8, mm4) \ psrlq $7, %mm3; \ pxor %mm3, %mm2; \ pxor %mm2, %mm1; \ paddq %mm1, %mm0; \ movq SCHED(i- 2), %mm1; \ movq %mm1, %mm2; \ movq %mm1, %mm3; \ RORQ(mm1, 19, mm5) \ RORQ(mm2, 61, mm4) \ psrlq $6, %mm3; \ pxor %mm3, %mm2; \ pxor %mm2, %mm1; \ paddq %mm1, %mm0; \ movq %mm0, SCHED(i); \ ROUNDTAIL(i, a, b, c, d, e, f, g, h) #define ROUNDTAIL(i, a, b, c, d, e, f, g, h) \ /* Part 0 */ \ paddq STATE(h), %mm0; \ movq STATE(e), %mm1; \ movq %mm1, %mm2; \ movq %mm1, %mm3; \ RORQ(mm1, 18, mm4) \ RORQ(mm2, 41, mm5) \ RORQ(mm3, 14, mm6) \ pxor %mm2, %mm1; \ pxor %mm3, %mm1; \ paddq .roundconstants+i*8, %mm0; \ movq STATE(g), %mm2; \ pxor STATE(f), %mm2; \ pand STATE(e), %mm2; \ pxor STATE(g), %mm2; \ paddq %mm1, %mm0; \ paddq %mm2, %mm0; \ /* Part 1 */ \ movq STATE(d), %mm1; \ paddq %mm0, %mm1; \ movq %mm1, STATE(d); \ /* Part 2 */ \ movq STATE(a), %mm1; \ movq %mm1, %mm2; \ movq %mm1, %mm3; \ RORQ(mm1, 39, mm4) \ RORQ(mm2, 34, mm5) \ RORQ(mm3, 28, mm6) \ pxor %mm2, %mm1; \ pxor %mm3, %mm1; \ movq STATE(c), %mm2; \ paddq %mm1, %mm0; \ movq %mm2, %mm3; \ por STATE(b), %mm3; \ pand STATE(b), %mm2; \ pand STATE(a), %mm3; \ por %mm2, %mm3; \ paddq %mm3, %mm0; \ movq %mm0, STATE(h); /* Allocate 16-byte aligned scratch space */ movl %esp, %ecx subl $192, %esp andl $~0xF, %esp /* Copy state */ movl 4(%ecx), %eax movdqu 0(%eax), %xmm0; movdqu %xmm0, 0(%esp) movdqu 16(%eax), %xmm1; movdqu %xmm1, 16(%esp) movdqu 32(%eax), %xmm2; movdqu %xmm2, 32(%esp) movdqu 48(%eax), %xmm3; movdqu %xmm3, 48(%esp) /* Do 80 rounds of hashing */ movl 8(%ecx), %eax movq .bswap64, %mm7 ROUNDa( 0, 0, 1, 2, 3, 4, 5, 6, 7) ROUNDa( 1, 7, 0, 1, 2, 3, 4, 5, 6) ROUNDa( 2, 6, 7, 0, 1, 2, 3, 4, 5) ROUNDa( 3, 5, 6, 7, 0, 1, 2, 3, 4) ROUNDa( 4, 4, 5, 6, 7, 0, 1, 2, 3) ROUNDa( 5, 3, 4, 5, 6, 7, 0, 1, 2) ROUNDa( 6, 2, 3, 4, 5, 6, 7, 0, 1) ROUNDa( 7, 1, 2, 3, 4, 5, 6, 7, 0) ROUNDa( 8, 0, 1, 2, 3, 4, 5, 6, 7) ROUNDa( 9, 7, 0, 1, 2, 3, 4, 5, 6) ROUNDa(10, 6, 7, 0, 1, 2, 3, 4, 5) ROUNDa(11, 5, 6, 7, 0, 1, 2, 3, 4) ROUNDa(12, 4, 5, 6, 7, 0, 1, 2, 3) ROUNDa(13, 3, 4, 5, 6, 7, 0, 1, 2) ROUNDa(14, 2, 3, 4, 5, 6, 7, 0, 1) ROUNDa(15, 1, 2, 3, 4, 5, 6, 7, 0) ROUNDb(16, 0, 1, 2, 3, 4, 5, 6, 7) ROUNDb(17, 7, 0, 1, 2, 3, 4, 5, 6) ROUNDb(18, 6, 7, 0, 1, 2, 3, 4, 5) ROUNDb(19, 5, 6, 7, 0, 1, 2, 3, 4) ROUNDb(20, 4, 5, 6, 7, 0, 1, 2, 3) ROUNDb(21, 3, 4, 5, 6, 7, 0, 1, 2) ROUNDb(22, 2, 3, 4, 5, 6, 7, 0, 1) ROUNDb(23, 1, 2, 3, 4, 5, 6, 7, 0) ROUNDb(24, 0, 1, 2, 3, 4, 5, 6, 7) ROUNDb(25, 7, 0, 1, 2, 3, 4, 5, 6) ROUNDb(26, 6, 7, 0, 1, 2, 3, 4, 5) ROUNDb(27, 5, 6, 7, 0, 1, 2, 3, 4) ROUNDb(28, 4, 5, 6, 7, 0, 1, 2, 3) ROUNDb(29, 3, 4, 5, 6, 7, 0, 1, 2) ROUNDb(30, 2, 3, 4, 5, 6, 7, 0, 1) ROUNDb(31, 1, 2, 3, 4, 5, 6, 7, 0) ROUNDb(32, 0, 1, 2, 3, 4, 5, 6, 7) ROUNDb(33, 7, 0, 1, 2, 3, 4, 5, 6) ROUNDb(34, 6, 7, 0, 1, 2, 3, 4, 5) ROUNDb(35, 5, 6, 7, 0, 1, 2, 3, 4) ROUNDb(36, 4, 5, 6, 7, 0, 1, 2, 3) ROUNDb(37, 3, 4, 5, 6, 7, 0, 1, 2) ROUNDb(38, 2, 3, 4, 5, 6, 7, 0, 1) ROUNDb(39, 1, 2, 3, 4, 5, 6, 7, 0) ROUNDb(40, 0, 1, 2, 3, 4, 5, 6, 7) ROUNDb(41, 7, 0, 1, 2, 3, 4, 5, 6) ROUNDb(42, 6, 7, 0, 1, 2, 3, 4, 5) ROUNDb(43, 5, 6, 7, 0, 1, 2, 3, 4) ROUNDb(44, 4, 5, 6, 7, 0, 1, 2, 3) ROUNDb(45, 3, 4, 5, 6, 7, 0, 1, 2) ROUNDb(46, 2, 3, 4, 5, 6, 7, 0, 1) ROUNDb(47, 1, 2, 3, 4, 5, 6, 7, 0) ROUNDb(48, 0, 1, 2, 3, 4, 5, 6, 7) ROUNDb(49, 7, 0, 1, 2, 3, 4, 5, 6) ROUNDb(50, 6, 7, 0, 1, 2, 3, 4, 5) ROUNDb(51, 5, 6, 7, 0, 1, 2, 3, 4) ROUNDb(52, 4, 5, 6, 7, 0, 1, 2, 3) ROUNDb(53, 3, 4, 5, 6, 7, 0, 1, 2) ROUNDb(54, 2, 3, 4, 5, 6, 7, 0, 1) ROUNDb(55, 1, 2, 3, 4, 5, 6, 7, 0) ROUNDb(56, 0, 1, 2, 3, 4, 5, 6, 7) ROUNDb(57, 7, 0, 1, 2, 3, 4, 5, 6) ROUNDb(58, 6, 7, 0, 1, 2, 3, 4, 5) ROUNDb(59, 5, 6, 7, 0, 1, 2, 3, 4) ROUNDb(60, 4, 5, 6, 7, 0, 1, 2, 3) ROUNDb(61, 3, 4, 5, 6, 7, 0, 1, 2) ROUNDb(62, 2, 3, 4, 5, 6, 7, 0, 1) ROUNDb(63, 1, 2, 3, 4, 5, 6, 7, 0) ROUNDb(64, 0, 1, 2, 3, 4, 5, 6, 7) ROUNDb(65, 7, 0, 1, 2, 3, 4, 5, 6) ROUNDb(66, 6, 7, 0, 1, 2, 3, 4, 5) ROUNDb(67, 5, 6, 7, 0, 1, 2, 3, 4) ROUNDb(68, 4, 5, 6, 7, 0, 1, 2, 3) ROUNDb(69, 3, 4, 5, 6, 7, 0, 1, 2) ROUNDb(70, 2, 3, 4, 5, 6, 7, 0, 1) ROUNDb(71, 1, 2, 3, 4, 5, 6, 7, 0) ROUNDb(72, 0, 1, 2, 3, 4, 5, 6, 7) ROUNDb(73, 7, 0, 1, 2, 3, 4, 5, 6) ROUNDb(74, 6, 7, 0, 1, 2, 3, 4, 5) ROUNDb(75, 5, 6, 7, 0, 1, 2, 3, 4) ROUNDb(76, 4, 5, 6, 7, 0, 1, 2, 3) ROUNDb(77, 3, 4, 5, 6, 7, 0, 1, 2) ROUNDb(78, 2, 3, 4, 5, 6, 7, 0, 1) ROUNDb(79, 1, 2, 3, 4, 5, 6, 7, 0) /* Add to state */ movl 4(%ecx), %eax movdqu 0(%eax), %xmm0; paddq 0(%esp), %xmm0; movdqu %xmm0, 0(%eax) movdqu 16(%eax), %xmm1; paddq 16(%esp), %xmm1; movdqu %xmm1, 16(%eax) movdqu 32(%eax), %xmm2; paddq 32(%esp), %xmm2; movdqu %xmm2, 32(%eax) movdqu 48(%eax), %xmm3; paddq 48(%esp), %xmm3; movdqu %xmm3, 48(%eax) /* Clean up */ emms movl %ecx, %esp retl .balign 8 .bswap64: .quad 0x0001020304050607 .roundconstants: .quad 0x428A2F98D728AE22, 0x7137449123EF65CD, 0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC .quad 0x3956C25BF348B538, 0x59F111F1B605D019, 0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118 .quad 0xD807AA98A3030242, 0x12835B0145706FBE, 0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2 .quad 0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1, 0x9BDC06A725C71235, 0xC19BF174CF692694 .quad 0xE49B69C19EF14AD2, 0xEFBE4786384F25E3, 0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65 .quad 0x2DE92C6F592B0275, 0x4A7484AA6EA6E483, 0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5 .quad 0x983E5152EE66DFAB, 0xA831C66D2DB43210, 0xB00327C898FB213F, 0xBF597FC7BEEF0EE4 .quad 0xC6E00BF33DA88FC2, 0xD5A79147930AA725, 0x06CA6351E003826F, 0x142929670A0E6E70 .quad 0x27B70A8546D22FFC, 0x2E1B21385C26C926, 0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF .quad 0x650A73548BAF63DE, 0x766A0ABB3C77B2A8, 0x81C2C92E47EDAEE6, 0x92722C851482353B .quad 0xA2BFE8A14CF10364, 0xA81A664BBC423001, 0xC24B8B70D0F89791, 0xC76C51A30654BE30 .quad 0xD192E819D6EF5218, 0xD69906245565A910, 0xF40E35855771202A, 0x106AA07032BBD1B8 .quad 0x19A4C116B8D2D0C8, 0x1E376C085141AB53, 0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8 .quad 0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB, 0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3 .quad 0x748F82EE5DEFB2FC, 0x78A5636F43172F60, 0x84C87814A1F0AB72, 0x8CC702081A6439EC .quad 0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9, 0xBEF9A3F7B2C67915, 0xC67178F2E372532B .quad 0xCA273ECEEA26619C, 0xD186B8C721C0C207, 0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178 .quad 0x06F067AA72176FBA, 0x0A637DC5A2C898A6, 0x113F9804BEF90DAE, 0x1B710B35131C471B .quad 0x28DB77F523047D84, 0x32CAAB7B40C72493, 0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C .quad 0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A, 0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817