sha2-asm-0.5.1/Cargo.toml.orig010064400017500001750000000007111327661323600142500ustar0000000000000000[package] name = "sha2-asm" version = "0.5.1" authors = ["RustCrypto Developers"] license = "MIT" description = "Assembly implementation of SHA-2 compression functions" documentation = "https://docs.rs/sha2-asm" repository = "https://github.com/RustCrypto/asm-hashes" keywords = ["crypto", "sha2", "asm"] categories = ["cryptography", "no-std"] [dependencies] [build-dependencies] cc = "1.0" [badges] travis-ci = { repository = "RustCrypto/asm-hashes" } sha2-asm-0.5.1/Cargo.toml0000644000000017250000000000000105140ustar00# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g. crates.io) dependencies # # If you believe there's an error in this file please file an # issue against the rust-lang/cargo repository. If you're # editing this file be aware that the upstream Cargo.toml # will likely look very different (and much more reasonable) [package] name = "sha2-asm" version = "0.5.1" authors = ["RustCrypto Developers"] description = "Assembly implementation of SHA-2 compression functions" documentation = "https://docs.rs/sha2-asm" keywords = ["crypto", "sha2", "asm"] categories = ["cryptography", "no-std"] license = "MIT" repository = "https://github.com/RustCrypto/asm-hashes" [dependencies] [build-dependencies.cc] version = "1.0" [badges.travis-ci] repository = "RustCrypto/asm-hashes" sha2-asm-0.5.1/LICENSE010064400017500001750000000020611311741210400123470ustar0000000000000000Copyright (c) 2017 Project Nayuki, Artyom Pavlov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. sha2-asm-0.5.1/benches/lib.rs010064400017500001750000000010631311741210400140660ustar0000000000000000#![no_std] #![feature(test)] extern crate test; extern crate sha2_asm; use test::Bencher; #[bench] fn bench_compress256(b: &mut Bencher) { let mut state = Default::default(); let data = Default::default(); b.iter(|| { sha2_asm::compress256(&mut state, &data); }); b.bytes = data.len() as u64; } #[bench] fn bench_compress512(b: &mut Bencher) { let mut state = Default::default(); let data = Default::default(); b.iter(|| { sha2_asm::compress512(&mut state, &data); }); b.bytes = data.len() as u64; } sha2-asm-0.5.1/build.rs010064400017500001750000000010501325373231200130130ustar0000000000000000extern crate cc; fn main() { let (sha256_path, sha512_path) = if cfg!(target_arch = "x86") { ("src/sha256_x86.S", "src/sha512_x86.S") } else if cfg!(target_arch = "x86_64") { ("src/sha256_x64.S", "src/sha512_x64.S") } else { panic!("Unsupported target architecture"); }; cc::Build::new() .flag("-c") .file(sha256_path) .compile("libsha256.a"); cc::Build::new() .flag("-c") .file(sha512_path) .compile("libsha512.a"); } sha2-asm-0.5.1/src/lib.rs010064400017500001750000000017551327055571300132740ustar0000000000000000//! Assembly implementation of [SHA-2][1] compression functions. //! //! For full SHA-2 hash functions with this implementation of compression //! functions use [sha-2](https://crates.io/crates/sha-2) crate with //! the enabled "asm" feature. //! //! Only x86 and x86-64 architectures are currently supported. //! //! [1]: https://en.wikipedia.org/wiki/SHA-2 #![no_std] #[link(name="sha256", kind="static")] extern "C" { fn sha256_compress(state: &mut [u32; 8], block: &[u8; 64]); } /// Safe wrapper around assembly implementation of SHA256 compression function #[inline] pub fn compress256(state: &mut [u32; 8], block: &[u8; 64]) { unsafe { sha256_compress(state, block) } } #[link(name="sha512", kind="static")] extern "C" { fn sha512_compress(state: &mut [u64; 8], block: &[u8; 128]); } /// Safe wrapper around assembly implementation of SHA512 compression function #[inline] pub fn compress512(state: &mut [u64; 8], block: &[u8; 128]) { unsafe { sha512_compress(state, block) } } sha2-asm-0.5.1/src/sha256_x64.S010064400017500001750000000261121327661315200140050ustar0000000000000000/* * SHA-256 hash in x86-64 assembly * * Copyright (c) 2015 Project Nayuki. (MIT License) * https://www.nayuki.io/page/fast-sha2-hashes-in-x86-assembly * * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of * the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * - The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * - The Software is provided "as is", without warranty of any kind, express or * implied, including but not limited to the warranties of merchantability, * fitness for a particular purpose and noninfringement. In no event shall the * authors or copyright holders be liable for any claim, damages or other * liability, whether in an action of contract, tort or otherwise, arising from, * out of or in connection with the Software or the use or other dealings in the * Software. */ /* void sha256_compress(uint32_t state[8], const uint8_t block[64]) */ #ifdef __APPLE__ .globl _sha256_compress _sha256_compress: #else .globl sha256_compress sha256_compress: #endif /* * Storage usage: * Bytes Location Description * 4 eax Temporary for calculation per round * 4 ebx Temporary for calculation per round * 4 ecx Temporary for calculation per round * 4 edx Temporary for calculation per round * 8 rsi Base address of block array argument (read-only) * 8 rdi Base address of state array argument (read-only) * 8 rsp x86-64 stack pointer * 4 r8d SHA-256 state variable A * 4 r9d SHA-256 state variable B * 4 r10d SHA-256 state variable C * 4 r11d SHA-256 state variable D * 4 r12d SHA-256 state variable E * 4 r13d SHA-256 state variable F * 4 r14d SHA-256 state variable G * 4 r15d SHA-256 state variable H * 64 [rsp+0] Circular buffer of most recent 16 key schedule items, 4 bytes each * 16 xmm0 Caller's value of r10 (only low 64 bits are used) * 16 xmm1 Caller's value of r11 (only low 64 bits are used) * 16 xmm2 Caller's value of r12 (only low 64 bits are used) * 16 xmm3 Caller's value of r13 (only low 64 bits are used) * 16 xmm4 Caller's value of r14 (only low 64 bits are used) * 16 xmm5 Caller's value of r15 (only low 64 bits are used) * 16 xmm6 Caller's value of rbx (only low 64 bits are used) */ #define SCHED(i) (((i)&0xF)*4)(%rsp) #define ROUNDa(i, a, b, c, d, e, f, g, h, k) \ movl (i*4)(%rsi), %ebx; \ bswapl %ebx; \ movl %ebx, SCHED(i); \ ROUNDTAIL(a, b, c, d, e, f, g, h, k) #define ROUNDb(i, a, b, c, d, e, f, g, h, k) \ movl SCHED(i-15), %eax; \ movl SCHED(i-16), %ebx; \ addl SCHED(i- 7), %ebx; \ movl %eax, %ecx; \ movl %eax, %edx; \ rorl $18, %ecx; \ shrl $3, %edx; \ rorl $7, %eax; \ xorl %edx, %ecx; \ xorl %ecx, %eax; \ addl %eax, %ebx; \ movl SCHED(i- 2), %eax; \ movl %eax, %ecx; \ movl %eax, %edx; \ rorl $19, %ecx; \ shrl $10, %edx; \ rorl $17, %eax; \ xorl %edx, %ecx; \ xorl %ecx, %eax; \ addl %eax, %ebx; \ movl %ebx, SCHED(i); \ ROUNDTAIL(a, b, c, d, e, f, g, h, k) #define ROUNDTAIL(a, b, c, d, e, f, g, h, k) \ /* Part 0 */ \ movl %e, %ecx; \ movl %e, %edx; \ movl %e, %eax; \ rorl $11, %ecx; \ rorl $25, %edx; \ rorl $6, %eax; \ xorl %edx, %ecx; \ xorl %ecx, %eax; \ addl %ebx, %h; \ movl %g, %ecx; \ xorl %f, %ecx; \ andl %e, %ecx; \ xorl %g, %ecx; \ leal k(%rax,%rcx), %eax; \ addl %eax, %h; \ /* Part 1 */ \ addl %h, %d; \ /* Part 2 */ \ movl %a, %ecx; \ movl %a, %edx; \ movl %a, %eax; \ rorl $13, %ecx; \ rorl $22, %edx; \ rorl $2, %eax; \ xorl %edx, %ecx; \ xorl %ecx, %eax; \ movl %c, %ecx; \ addl %eax, %h; \ movl %c, %eax; \ orl %b, %eax; \ andl %b, %ecx; \ andl %a, %eax; \ orl %ecx, %eax; \ addl %eax, %h; /* Save registers, allocate scratch space */ movq %r10, %xmm0 movq %r11, %xmm1 movq %r12, %xmm2 movq %r13, %xmm3 movq %r14, %xmm4 movq %r15, %xmm5 movq %rbx, %xmm6 subq $64, %rsp /* Load state */ movl 0(%rdi), %r8d /* a */ movl 4(%rdi), %r9d /* b */ movl 8(%rdi), %r10d /* c */ movl 12(%rdi), %r11d /* d */ movl 16(%rdi), %r12d /* e */ movl 20(%rdi), %r13d /* f */ movl 24(%rdi), %r14d /* g */ movl 28(%rdi), %r15d /* h */ /* Do 64 rounds of hashing */ ROUNDa( 0, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 0x428A2F98) ROUNDa( 1, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 0x71374491) ROUNDa( 2, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -0x4A3F0431) ROUNDa( 3, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -0x164A245B) ROUNDa( 4, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 0x3956C25B) ROUNDa( 5, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 0x59F111F1) ROUNDa( 6, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -0x6DC07D5C) ROUNDa( 7, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -0x54E3A12B) ROUNDa( 8, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -0x27F85568) ROUNDa( 9, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 0x12835B01) ROUNDa(10, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 0x243185BE) ROUNDa(11, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 0x550C7DC3) ROUNDa(12, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 0x72BE5D74) ROUNDa(13, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -0x7F214E02) ROUNDa(14, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -0x6423F959) ROUNDa(15, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -0x3E640E8C) ROUNDb(16, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -0x1B64963F) ROUNDb(17, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, -0x1041B87A) ROUNDb(18, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 0x0FC19DC6) ROUNDb(19, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 0x240CA1CC) ROUNDb(20, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 0x2DE92C6F) ROUNDb(21, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 0x4A7484AA) ROUNDb(22, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , 0x5CB0A9DC) ROUNDb(23, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 0x76F988DA) ROUNDb(24, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -0x67C1AEAE) ROUNDb(25, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, -0x57CE3993) ROUNDb(26, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -0x4FFCD838) ROUNDb(27, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -0x40A68039) ROUNDb(28, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, -0x391FF40D) ROUNDb(29, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -0x2A586EB9) ROUNDb(30, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , 0x06CA6351) ROUNDb(31, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 0x14292967) ROUNDb(32, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 0x27B70A85) ROUNDb(33, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 0x2E1B2138) ROUNDb(34, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 0x4D2C6DFC) ROUNDb(35, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 0x53380D13) ROUNDb(36, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 0x650A7354) ROUNDb(37, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 0x766A0ABB) ROUNDb(38, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -0x7E3D36D2) ROUNDb(39, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -0x6D8DD37B) ROUNDb(40, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, -0x5D40175F) ROUNDb(41, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, -0x57E599B5) ROUNDb(42, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -0x3DB47490) ROUNDb(43, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -0x3893AE5D) ROUNDb(44, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, -0x2E6D17E7) ROUNDb(45, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -0x2966F9DC) ROUNDb(46, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -0x0BF1CA7B) ROUNDb(47, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 0x106AA070) ROUNDb(48, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 0x19A4C116) ROUNDb(49, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 0x1E376C08) ROUNDb(50, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, 0x2748774C) ROUNDb(51, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, 0x34B0BCB5) ROUNDb(52, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, 0x391C0CB3) ROUNDb(53, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, 0x4ED8AA4A) ROUNDb(54, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , 0x5B9CCA4F) ROUNDb(55, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , 0x682E6FF3) ROUNDb(56, r8d , r9d , r10d, r11d, r12d, r13d, r14d, r15d, 0x748F82EE) ROUNDb(57, r15d, r8d , r9d , r10d, r11d, r12d, r13d, r14d, 0x78A5636F) ROUNDb(58, r14d, r15d, r8d , r9d , r10d, r11d, r12d, r13d, -0x7B3787EC) ROUNDb(59, r13d, r14d, r15d, r8d , r9d , r10d, r11d, r12d, -0x7338FDF8) ROUNDb(60, r12d, r13d, r14d, r15d, r8d , r9d , r10d, r11d, -0x6F410006) ROUNDb(61, r11d, r12d, r13d, r14d, r15d, r8d , r9d , r10d, -0x5BAF9315) ROUNDb(62, r10d, r11d, r12d, r13d, r14d, r15d, r8d , r9d , -0x41065C09) ROUNDb(63, r9d , r10d, r11d, r12d, r13d, r14d, r15d, r8d , -0x398E870E) /* Add to state */ addl %r8d , 0(%rdi) addl %r9d , 4(%rdi) addl %r10d, 8(%rdi) addl %r11d, 12(%rdi) addl %r12d, 16(%rdi) addl %r13d, 20(%rdi) addl %r14d, 24(%rdi) addl %r15d, 28(%rdi) /* Restore registers */ movq %xmm0, %r10 movq %xmm1, %r11 movq %xmm2, %r12 movq %xmm3, %r13 movq %xmm4, %r14 movq %xmm5, %r15 movq %xmm6, %rbx addq $64, %rsp retq sha2-asm-0.5.1/src/sha256_x86.S010064400017500001750000000231531327661315200140130ustar0000000000000000/* * SHA-256 hash in x86 assembly * * Copyright (c) 2014 Project Nayuki. (MIT License) * https://www.nayuki.io/page/fast-sha2-hashes-in-x86-assembly * * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of * the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * - The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * - The Software is provided "as is", without warranty of any kind, express or * implied, including but not limited to the warranties of merchantability, * fitness for a particular purpose and noninfringement. In no event shall the * authors or copyright holders be liable for any claim, damages or other * liability, whether in an action of contract, tort or otherwise, arising from, * out of or in connection with the Software or the use or other dealings in the * Software. */ /* void sha256_compress(uint32_t state[8], const uint8_t block[64]) */ #ifdef __APPLE__ .globl _sha256_compress _sha256_compress: #else .globl sha256_compress sha256_compress: #endif /* * Storage usage: * Bytes Location Description * 4 eax Temporary for calculation per round * 4 ebx Temporary for calculation per round * 4 ecx Temporary for calculation per round * 4 edx Temporary for calculation per round * 4 ebp Temporary for calculation per round * 4 esi (During state loading and update) base address of state array argument * (During hash rounds) temporary for calculation per round * 4 edi Base address of block array argument (during key schedule loading rounds only) * 4 esp x86 stack pointer * 32 [esp+ 0] SHA-256 state variables A,B,C,D,E,F,G,H (4 bytes each) * 64 [esp+ 32] Key schedule of 16 * 4 bytes * 4 [esp+ 96] Caller's value of ebx * 4 [esp+100] Caller's value of esi * 4 [esp+104] Caller's value of edi * 4 [esp+108] Caller's value of ebp */ #define SCHED(i) ((((i)&0xF)+8)*4)(%esp) #define ROUNDa(i, a, b, c, d, e, f, g, h, k) \ movl (i*4)(%edi), %ebp; \ bswapl %ebp; \ movl %ebp, SCHED(i); \ ROUNDTAIL(i, a, b, c, d, e, f, g, h, k) #define ROUNDb(i, a, b, c, d, e, f, g, h, k) \ movl SCHED(i-15), %eax; \ movl SCHED(i-16), %ebp; \ movl %eax, %ebx; \ addl SCHED(i- 7), %ebp; \ movl %eax, %ecx; \ rorl $18, %ebx; \ shrl $3, %ecx; \ rorl $7, %eax; \ xorl %ecx, %ebx; \ xorl %ebx, %eax; \ addl %eax, %ebp; \ movl SCHED(i- 2), %eax; \ movl %eax, %ebx; \ movl %eax, %ecx; \ rorl $19, %ebx; \ shrl $10, %ecx; \ rorl $17, %eax; \ xorl %ecx, %ebx; \ xorl %ebx, %eax; \ addl %eax, %ebp; \ movl %ebp, SCHED(i); \ ROUNDTAIL(i, a, b, c, d, e, f, g, h, k) #define STATE(i) (i*4)(%esp) #define ROUNDTAIL(i, a, b, c, d, e, f, g, h, k) \ /* Part 0 */ \ movl STATE(e), %eax; \ movl %eax, %ebx; \ movl %eax, %ecx; \ movl %eax, %edx; \ rorl $11, %eax; \ rorl $25, %ebx; \ rorl $6, %ecx; \ movl STATE(h), %esi; \ xorl %ebx, %eax; \ xorl %eax, %ecx; \ addl %ebp, %esi; \ movl STATE(g), %ebx; \ movl STATE(f), %eax; \ xorl %ebx, %eax; \ andl %edx, %eax; \ xorl %ebx, %eax; \ leal k(%ecx,%eax), %ecx; \ addl %ecx, %esi; \ /* Part 1 */ \ addl %esi, STATE(d); \ /* Part 2 */ \ movl STATE(a), %eax; \ movl %eax, %ebx; \ movl %eax, %ecx; \ movl %eax, %edx; \ rorl $13, %eax; \ rorl $22, %ebx; \ rorl $2, %ecx; \ xorl %ebx, %eax; \ xorl %eax, %ecx; \ movl STATE(c), %eax; \ addl %ecx, %esi; \ movl %eax, %ecx; \ movl STATE(b), %ebx; \ orl %ebx, %ecx; \ andl %ebx, %eax; \ andl %edx, %ecx; \ orl %eax, %ecx; \ addl %ecx, %esi; \ movl %esi, STATE(h); /* Allocate scratch space, save registers */ subl $112, %esp movl %ebx, 96(%esp) movl %esi, 100(%esp) movl %edi, 104(%esp) movl %ebp, 108(%esp) /* Copy state */ movl 116(%esp), %esi /* Argument: state */ movl 0(%esi), %eax; movl %eax, 0(%esp) movl 4(%esi), %eax; movl %eax, 4(%esp) movl 8(%esi), %eax; movl %eax, 8(%esp) movl 12(%esi), %eax; movl %eax, 12(%esp) movl 16(%esi), %eax; movl %eax, 16(%esp) movl 20(%esi), %eax; movl %eax, 20(%esp) movl 24(%esi), %eax; movl %eax, 24(%esp) movl 28(%esi), %eax; movl %eax, 28(%esp) /* Do 64 rounds of hashing */ movl 120(%esp), %edi /* Argument: block */ ROUNDa( 0, 0, 1, 2, 3, 4, 5, 6, 7, 0x428A2F98) ROUNDa( 1, 7, 0, 1, 2, 3, 4, 5, 6, 0x71374491) ROUNDa( 2, 6, 7, 0, 1, 2, 3, 4, 5, 0xB5C0FBCF) ROUNDa( 3, 5, 6, 7, 0, 1, 2, 3, 4, 0xE9B5DBA5) ROUNDa( 4, 4, 5, 6, 7, 0, 1, 2, 3, 0x3956C25B) ROUNDa( 5, 3, 4, 5, 6, 7, 0, 1, 2, 0x59F111F1) ROUNDa( 6, 2, 3, 4, 5, 6, 7, 0, 1, 0x923F82A4) ROUNDa( 7, 1, 2, 3, 4, 5, 6, 7, 0, 0xAB1C5ED5) ROUNDa( 8, 0, 1, 2, 3, 4, 5, 6, 7, 0xD807AA98) ROUNDa( 9, 7, 0, 1, 2, 3, 4, 5, 6, 0x12835B01) ROUNDa(10, 6, 7, 0, 1, 2, 3, 4, 5, 0x243185BE) ROUNDa(11, 5, 6, 7, 0, 1, 2, 3, 4, 0x550C7DC3) ROUNDa(12, 4, 5, 6, 7, 0, 1, 2, 3, 0x72BE5D74) ROUNDa(13, 3, 4, 5, 6, 7, 0, 1, 2, 0x80DEB1FE) ROUNDa(14, 2, 3, 4, 5, 6, 7, 0, 1, 0x9BDC06A7) ROUNDa(15, 1, 2, 3, 4, 5, 6, 7, 0, 0xC19BF174) ROUNDb(16, 0, 1, 2, 3, 4, 5, 6, 7, 0xE49B69C1) ROUNDb(17, 7, 0, 1, 2, 3, 4, 5, 6, 0xEFBE4786) ROUNDb(18, 6, 7, 0, 1, 2, 3, 4, 5, 0x0FC19DC6) ROUNDb(19, 5, 6, 7, 0, 1, 2, 3, 4, 0x240CA1CC) ROUNDb(20, 4, 5, 6, 7, 0, 1, 2, 3, 0x2DE92C6F) ROUNDb(21, 3, 4, 5, 6, 7, 0, 1, 2, 0x4A7484AA) ROUNDb(22, 2, 3, 4, 5, 6, 7, 0, 1, 0x5CB0A9DC) ROUNDb(23, 1, 2, 3, 4, 5, 6, 7, 0, 0x76F988DA) ROUNDb(24, 0, 1, 2, 3, 4, 5, 6, 7, 0x983E5152) ROUNDb(25, 7, 0, 1, 2, 3, 4, 5, 6, 0xA831C66D) ROUNDb(26, 6, 7, 0, 1, 2, 3, 4, 5, 0xB00327C8) ROUNDb(27, 5, 6, 7, 0, 1, 2, 3, 4, 0xBF597FC7) ROUNDb(28, 4, 5, 6, 7, 0, 1, 2, 3, 0xC6E00BF3) ROUNDb(29, 3, 4, 5, 6, 7, 0, 1, 2, 0xD5A79147) ROUNDb(30, 2, 3, 4, 5, 6, 7, 0, 1, 0x06CA6351) ROUNDb(31, 1, 2, 3, 4, 5, 6, 7, 0, 0x14292967) ROUNDb(32, 0, 1, 2, 3, 4, 5, 6, 7, 0x27B70A85) ROUNDb(33, 7, 0, 1, 2, 3, 4, 5, 6, 0x2E1B2138) ROUNDb(34, 6, 7, 0, 1, 2, 3, 4, 5, 0x4D2C6DFC) ROUNDb(35, 5, 6, 7, 0, 1, 2, 3, 4, 0x53380D13) ROUNDb(36, 4, 5, 6, 7, 0, 1, 2, 3, 0x650A7354) ROUNDb(37, 3, 4, 5, 6, 7, 0, 1, 2, 0x766A0ABB) ROUNDb(38, 2, 3, 4, 5, 6, 7, 0, 1, 0x81C2C92E) ROUNDb(39, 1, 2, 3, 4, 5, 6, 7, 0, 0x92722C85) ROUNDb(40, 0, 1, 2, 3, 4, 5, 6, 7, 0xA2BFE8A1) ROUNDb(41, 7, 0, 1, 2, 3, 4, 5, 6, 0xA81A664B) ROUNDb(42, 6, 7, 0, 1, 2, 3, 4, 5, 0xC24B8B70) ROUNDb(43, 5, 6, 7, 0, 1, 2, 3, 4, 0xC76C51A3) ROUNDb(44, 4, 5, 6, 7, 0, 1, 2, 3, 0xD192E819) ROUNDb(45, 3, 4, 5, 6, 7, 0, 1, 2, 0xD6990624) ROUNDb(46, 2, 3, 4, 5, 6, 7, 0, 1, 0xF40E3585) ROUNDb(47, 1, 2, 3, 4, 5, 6, 7, 0, 0x106AA070) ROUNDb(48, 0, 1, 2, 3, 4, 5, 6, 7, 0x19A4C116) ROUNDb(49, 7, 0, 1, 2, 3, 4, 5, 6, 0x1E376C08) ROUNDb(50, 6, 7, 0, 1, 2, 3, 4, 5, 0x2748774C) ROUNDb(51, 5, 6, 7, 0, 1, 2, 3, 4, 0x34B0BCB5) ROUNDb(52, 4, 5, 6, 7, 0, 1, 2, 3, 0x391C0CB3) ROUNDb(53, 3, 4, 5, 6, 7, 0, 1, 2, 0x4ED8AA4A) ROUNDb(54, 2, 3, 4, 5, 6, 7, 0, 1, 0x5B9CCA4F) ROUNDb(55, 1, 2, 3, 4, 5, 6, 7, 0, 0x682E6FF3) ROUNDb(56, 0, 1, 2, 3, 4, 5, 6, 7, 0x748F82EE) ROUNDb(57, 7, 0, 1, 2, 3, 4, 5, 6, 0x78A5636F) ROUNDb(58, 6, 7, 0, 1, 2, 3, 4, 5, 0x84C87814) ROUNDb(59, 5, 6, 7, 0, 1, 2, 3, 4, 0x8CC70208) ROUNDb(60, 4, 5, 6, 7, 0, 1, 2, 3, 0x90BEFFFA) ROUNDb(61, 3, 4, 5, 6, 7, 0, 1, 2, 0xA4506CEB) ROUNDb(62, 2, 3, 4, 5, 6, 7, 0, 1, 0xBEF9A3F7) ROUNDb(63, 1, 2, 3, 4, 5, 6, 7, 0, 0xC67178F2) /* Add to state */ movl 116(%esp), %esi /* Argument: state */ movl 0(%esp), %eax; addl %eax, 0(%esi) movl 4(%esp), %eax; addl %eax, 4(%esi) movl 8(%esp), %eax; addl %eax, 8(%esi) movl 12(%esp), %eax; addl %eax, 12(%esi) movl 16(%esp), %eax; addl %eax, 16(%esi) movl 20(%esp), %eax; addl %eax, 20(%esi) movl 24(%esp), %eax; addl %eax, 24(%esi) movl 28(%esp), %eax; addl %eax, 28(%esi) /* Restore registers */ movl 96(%esp), %ebx movl 100(%esp), %esi movl 104(%esp), %edi movl 108(%esp), %ebp addl $112, %esp retl sha2-asm-0.5.1/src/sha512_x64.S010064400017500001750000000277271327661315200140150ustar0000000000000000/* * SHA-512 hash in x86-64 assembly * * Copyright (c) 2017 Project Nayuki. (MIT License) * https://www.nayuki.io/page/fast-sha2-hashes-in-x86-assembly * * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of * the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * - The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * - The Software is provided "as is", without warranty of any kind, express or * implied, including but not limited to the warranties of merchantability, * fitness for a particular purpose and noninfringement. In no event shall the * authors or copyright holders be liable for any claim, damages or other * liability, whether in an action of contract, tort or otherwise, arising from, * out of or in connection with the Software or the use or other dealings in the * Software. */ /* void sha512_compress(uint64_t state[8], const uint8_t block[128]) */ #ifdef __APPLE__ .globl _sha512_compress _sha512_compress: #else .globl sha512_compress sha512_compress: #endif /* * Storage usage: * Bytes Location Description * 8 rax Temporary for calculation per round * 8 rbx Temporary for calculation per round * 8 rcx Temporary for calculation per round * 8 rdx Temporary for calculation per round * 8 rsi Base address of block array argument (read-only) * 8 rdi Base address of state array argument (read-only) * 8 rsp x86-64 stack pointer * 8 r8 SHA-512 state variable A * 8 r9 SHA-512 state variable B * 8 r10 SHA-512 state variable C * 8 r11 SHA-512 state variable D * 8 r12 SHA-512 state variable E * 8 r13 SHA-512 state variable F * 8 r14 SHA-512 state variable G * 8 r15 SHA-512 state variable H * 128 [rsp+0] Circular buffer of most recent 16 key schedule items, 8 bytes each * 16 xmm0 Caller's value of r10 (only low 64 bits are used) * 16 xmm1 Caller's value of r11 (only low 64 bits are used) * 16 xmm2 Caller's value of r12 (only low 64 bits are used) * 16 xmm3 Caller's value of r13 (only low 64 bits are used) * 16 xmm4 Caller's value of r14 (only low 64 bits are used) * 16 xmm5 Caller's value of r15 (only low 64 bits are used) * 16 xmm6 Caller's value of rbx (only low 64 bits are used) */ #define SCHED(i) (((i)&0xF)*8)(%rsp) #define ROUNDa(i, a, b, c, d, e, f, g, h, k) \ movq (i*8)(%rsi), %rbx; \ bswapq %rbx; \ movq %rbx, SCHED(i); \ ROUNDTAIL(a, b, c, d, e, f, g, h, k) #define ROUNDb(i, a, b, c, d, e, f, g, h, k) \ movq SCHED(i-15), %rax; \ movq SCHED(i-16), %rbx; \ addq SCHED(i- 7), %rbx; \ movq %rax, %rcx; \ movq %rax, %rdx; \ rorq $8, %rcx; \ shrq $7, %rdx; \ rorq $1, %rax; \ xorq %rdx, %rcx; \ xorq %rcx, %rax; \ addq %rax, %rbx; \ movq SCHED(i- 2), %rax; \ movq %rax, %rcx; \ movq %rax, %rdx; \ rorq $61, %rcx; \ shrq $6, %rdx; \ rorq $19, %rax; \ xorq %rdx, %rcx; \ xorq %rcx, %rax; \ addq %rax, %rbx; \ movq %rbx, SCHED(i); \ ROUNDTAIL(a, b, c, d, e, f, g, h, k) #define ROUNDTAIL(a, b, c, d, e, f, g, h, k) \ /* Part 0 */ \ movq %e, %rcx; \ movq %e, %rdx; \ movq %e, %rax; \ rorq $18, %rcx; \ rorq $41, %rdx; \ rorq $14, %rax; \ xorq %rdx, %rcx; \ xorq %rcx, %rax; \ addq %rbx, %h; \ movq %g, %rcx; \ xorq %f, %rcx; \ andq %e, %rcx; \ xorq %g, %rcx; \ addq %rax, %h; \ movabs $k, %rax; \ addq %rcx, %h; \ addq %rax, %h; \ /* Part 1 */ \ addq %h, %d; \ /* Part 2 */ \ movq %a, %rcx; \ movq %a, %rdx; \ movq %a, %rax; \ rorq $39, %rcx; \ rorq $34, %rdx; \ rorq $28, %rax; \ xorq %rdx, %rcx; \ xorq %rcx, %rax; \ movq %c, %rcx; \ addq %rax, %h; \ movq %c, %rax; \ orq %b, %rax; \ andq %b, %rcx; \ andq %a, %rax; \ orq %rcx, %rax; \ addq %rax, %h; /* Save registers, allocate scratch space */ movq %r10, %xmm0 movq %r11, %xmm1 movq %r12, %xmm2 movq %r13, %xmm3 movq %r14, %xmm4 movq %r15, %xmm5 movq %rbx, %xmm6 subq $128, %rsp /* Load state */ movq 0(%rdi), %r8 /* a */ movq 8(%rdi), %r9 /* b */ movq 16(%rdi), %r10 /* c */ movq 24(%rdi), %r11 /* d */ movq 32(%rdi), %r12 /* e */ movq 40(%rdi), %r13 /* f */ movq 48(%rdi), %r14 /* g */ movq 56(%rdi), %r15 /* h */ /* Do 80 rounds of hashing */ ROUNDa( 0, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x428A2F98D728AE22) ROUNDa( 1, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x7137449123EF65CD) ROUNDa( 2, r14, r15, r8 , r9 , r10, r11, r12, r13, 0xB5C0FBCFEC4D3B2F) ROUNDa( 3, r13, r14, r15, r8 , r9 , r10, r11, r12, 0xE9B5DBA58189DBBC) ROUNDa( 4, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x3956C25BF348B538) ROUNDa( 5, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x59F111F1B605D019) ROUNDa( 6, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x923F82A4AF194F9B) ROUNDa( 7, r9 , r10, r11, r12, r13, r14, r15, r8 , 0xAB1C5ED5DA6D8118) ROUNDa( 8, r8 , r9 , r10, r11, r12, r13, r14, r15, 0xD807AA98A3030242) ROUNDa( 9, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x12835B0145706FBE) ROUNDa(10, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x243185BE4EE4B28C) ROUNDa(11, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x550C7DC3D5FFB4E2) ROUNDa(12, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x72BE5D74F27B896F) ROUNDa(13, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x80DEB1FE3B1696B1) ROUNDa(14, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x9BDC06A725C71235) ROUNDa(15, r9 , r10, r11, r12, r13, r14, r15, r8 , 0xC19BF174CF692694) ROUNDb(16, r8 , r9 , r10, r11, r12, r13, r14, r15, 0xE49B69C19EF14AD2) ROUNDb(17, r15, r8 , r9 , r10, r11, r12, r13, r14, 0xEFBE4786384F25E3) ROUNDb(18, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x0FC19DC68B8CD5B5) ROUNDb(19, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x240CA1CC77AC9C65) ROUNDb(20, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x2DE92C6F592B0275) ROUNDb(21, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x4A7484AA6EA6E483) ROUNDb(22, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x5CB0A9DCBD41FBD4) ROUNDb(23, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x76F988DA831153B5) ROUNDb(24, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x983E5152EE66DFAB) ROUNDb(25, r15, r8 , r9 , r10, r11, r12, r13, r14, 0xA831C66D2DB43210) ROUNDb(26, r14, r15, r8 , r9 , r10, r11, r12, r13, 0xB00327C898FB213F) ROUNDb(27, r13, r14, r15, r8 , r9 , r10, r11, r12, 0xBF597FC7BEEF0EE4) ROUNDb(28, r12, r13, r14, r15, r8 , r9 , r10, r11, 0xC6E00BF33DA88FC2) ROUNDb(29, r11, r12, r13, r14, r15, r8 , r9 , r10, 0xD5A79147930AA725) ROUNDb(30, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x06CA6351E003826F) ROUNDb(31, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x142929670A0E6E70) ROUNDb(32, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x27B70A8546D22FFC) ROUNDb(33, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x2E1B21385C26C926) ROUNDb(34, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x4D2C6DFC5AC42AED) ROUNDb(35, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x53380D139D95B3DF) ROUNDb(36, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x650A73548BAF63DE) ROUNDb(37, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x766A0ABB3C77B2A8) ROUNDb(38, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x81C2C92E47EDAEE6) ROUNDb(39, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x92722C851482353B) ROUNDb(40, r8 , r9 , r10, r11, r12, r13, r14, r15, 0xA2BFE8A14CF10364) ROUNDb(41, r15, r8 , r9 , r10, r11, r12, r13, r14, 0xA81A664BBC423001) ROUNDb(42, r14, r15, r8 , r9 , r10, r11, r12, r13, 0xC24B8B70D0F89791) ROUNDb(43, r13, r14, r15, r8 , r9 , r10, r11, r12, 0xC76C51A30654BE30) ROUNDb(44, r12, r13, r14, r15, r8 , r9 , r10, r11, 0xD192E819D6EF5218) ROUNDb(45, r11, r12, r13, r14, r15, r8 , r9 , r10, 0xD69906245565A910) ROUNDb(46, r10, r11, r12, r13, r14, r15, r8 , r9 , 0xF40E35855771202A) ROUNDb(47, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x106AA07032BBD1B8) ROUNDb(48, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x19A4C116B8D2D0C8) ROUNDb(49, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x1E376C085141AB53) ROUNDb(50, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x2748774CDF8EEB99) ROUNDb(51, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x34B0BCB5E19B48A8) ROUNDb(52, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x391C0CB3C5C95A63) ROUNDb(53, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x4ED8AA4AE3418ACB) ROUNDb(54, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x5B9CCA4F7763E373) ROUNDb(55, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x682E6FF3D6B2B8A3) ROUNDb(56, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x748F82EE5DEFB2FC) ROUNDb(57, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x78A5636F43172F60) ROUNDb(58, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x84C87814A1F0AB72) ROUNDb(59, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x8CC702081A6439EC) ROUNDb(60, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x90BEFFFA23631E28) ROUNDb(61, r11, r12, r13, r14, r15, r8 , r9 , r10, 0xA4506CEBDE82BDE9) ROUNDb(62, r10, r11, r12, r13, r14, r15, r8 , r9 , 0xBEF9A3F7B2C67915) ROUNDb(63, r9 , r10, r11, r12, r13, r14, r15, r8 , 0xC67178F2E372532B) ROUNDb(64, r8 , r9 , r10, r11, r12, r13, r14, r15, 0xCA273ECEEA26619C) ROUNDb(65, r15, r8 , r9 , r10, r11, r12, r13, r14, 0xD186B8C721C0C207) ROUNDb(66, r14, r15, r8 , r9 , r10, r11, r12, r13, 0xEADA7DD6CDE0EB1E) ROUNDb(67, r13, r14, r15, r8 , r9 , r10, r11, r12, 0xF57D4F7FEE6ED178) ROUNDb(68, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x06F067AA72176FBA) ROUNDb(69, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x0A637DC5A2C898A6) ROUNDb(70, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x113F9804BEF90DAE) ROUNDb(71, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x1B710B35131C471B) ROUNDb(72, r8 , r9 , r10, r11, r12, r13, r14, r15, 0x28DB77F523047D84) ROUNDb(73, r15, r8 , r9 , r10, r11, r12, r13, r14, 0x32CAAB7B40C72493) ROUNDb(74, r14, r15, r8 , r9 , r10, r11, r12, r13, 0x3C9EBE0A15C9BEBC) ROUNDb(75, r13, r14, r15, r8 , r9 , r10, r11, r12, 0x431D67C49C100D4C) ROUNDb(76, r12, r13, r14, r15, r8 , r9 , r10, r11, 0x4CC5D4BECB3E42B6) ROUNDb(77, r11, r12, r13, r14, r15, r8 , r9 , r10, 0x597F299CFC657E2A) ROUNDb(78, r10, r11, r12, r13, r14, r15, r8 , r9 , 0x5FCB6FAB3AD6FAEC) ROUNDb(79, r9 , r10, r11, r12, r13, r14, r15, r8 , 0x6C44198C4A475817) /* Add to state */ addq %r8 , 0(%rdi) addq %r9 , 8(%rdi) addq %r10, 16(%rdi) addq %r11, 24(%rdi) addq %r12, 32(%rdi) addq %r13, 40(%rdi) addq %r14, 48(%rdi) addq %r15, 56(%rdi) /* Restore registers */ movq %xmm0, %r10 movq %xmm1, %r11 movq %xmm2, %r12 movq %xmm3, %r13 movq %xmm4, %r14 movq %xmm5, %r15 movq %xmm6, %rbx addq $128, %rsp retq sha2-asm-0.5.1/src/sha512_x86.S010064400017500001750000000255051327661315200140110ustar0000000000000000/* * SHA-512 hash in x86 assembly * * Copyright (c) 2014 Project Nayuki. (MIT License) * https://www.nayuki.io/page/fast-sha2-hashes-in-x86-assembly * * Permission is hereby granted, free of charge, to any person obtaining a copy of * this software and associated documentation files (the "Software"), to deal in * the Software without restriction, including without limitation the rights to * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of * the Software, and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * - The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * - The Software is provided "as is", without warranty of any kind, express or * implied, including but not limited to the warranties of merchantability, * fitness for a particular purpose and noninfringement. In no event shall the * authors or copyright holders be liable for any claim, damages or other * liability, whether in an action of contract, tort or otherwise, arising from, * out of or in connection with the Software or the use or other dealings in the * Software. */ /* void sha512_compress(uint64_t state[8], const uint8_t block[128]) */ #ifdef __APPLE__ .globl _sha512_compress _sha512_compress: #else .globl sha512_compress sha512_compress: #endif /* * Storage usage: * Bytes Location Description * 4 eax Temporary base address of state or block array arguments * 4 ecx Old value of esp * 4 esp x86 stack pointer * 64 [esp+ 0] SHA-512 state variables A,B,C,D,E,F,G,H (8 bytes each) * 128 [esp+64] Circular buffer of most recent 16 key schedule items, 8 bytes each * 56 mm0..mm6 Temporary for calculation per round * 8 mm7 Control value for byte endian reversal * 64 xmm0..xmm3 Temporary for copying or calculation */ #define SCHED(i) (((i)&0xF)*8+64)(%esp) #define STATE(i) (i*8)(%esp) #define RORQ(reg, shift, temp) \ movq %reg, %temp; \ psllq $(64-shift), %temp; \ psrlq $shift, %reg; \ por %temp, %reg; #define ROUNDa(i, a, b, c, d, e, f, g, h) \ movq (i*8)(%eax), %mm0; \ pshufb %mm7, %mm0; \ movq %mm0, SCHED(i); \ ROUNDTAIL(i, a, b, c, d, e, f, g, h) #define ROUNDb(i, a, b, c, d, e, f, g, h) \ movq SCHED(i-16), %mm0; \ paddq SCHED(i- 7), %mm0; \ movq SCHED(i-15), %mm1; \ movq %mm1, %mm2; \ movq %mm1, %mm3; \ RORQ(mm1, 1, mm5) \ RORQ(mm2, 8, mm4) \ psrlq $7, %mm3; \ pxor %mm3, %mm2; \ pxor %mm2, %mm1; \ paddq %mm1, %mm0; \ movq SCHED(i- 2), %mm1; \ movq %mm1, %mm2; \ movq %mm1, %mm3; \ RORQ(mm1, 19, mm5) \ RORQ(mm2, 61, mm4) \ psrlq $6, %mm3; \ pxor %mm3, %mm2; \ pxor %mm2, %mm1; \ paddq %mm1, %mm0; \ movq %mm0, SCHED(i); \ ROUNDTAIL(i, a, b, c, d, e, f, g, h) #define ROUNDTAIL(i, a, b, c, d, e, f, g, h) \ /* Part 0 */ \ paddq STATE(h), %mm0; \ movq STATE(e), %mm1; \ movq %mm1, %mm2; \ movq %mm1, %mm3; \ RORQ(mm1, 18, mm4) \ RORQ(mm2, 41, mm5) \ RORQ(mm3, 14, mm6) \ pxor %mm2, %mm1; \ pxor %mm3, %mm1; \ paddq .roundconstants+i*8, %mm0; \ movq STATE(g), %mm2; \ pxor STATE(f), %mm2; \ pand STATE(e), %mm2; \ pxor STATE(g), %mm2; \ paddq %mm1, %mm0; \ paddq %mm2, %mm0; \ /* Part 1 */ \ movq STATE(d), %mm1; \ paddq %mm0, %mm1; \ movq %mm1, STATE(d); \ /* Part 2 */ \ movq STATE(a), %mm1; \ movq %mm1, %mm2; \ movq %mm1, %mm3; \ RORQ(mm1, 39, mm4) \ RORQ(mm2, 34, mm5) \ RORQ(mm3, 28, mm6) \ pxor %mm2, %mm1; \ pxor %mm3, %mm1; \ movq STATE(c), %mm2; \ paddq %mm1, %mm0; \ movq %mm2, %mm3; \ por STATE(b), %mm3; \ pand STATE(b), %mm2; \ pand STATE(a), %mm3; \ por %mm2, %mm3; \ paddq %mm3, %mm0; \ movq %mm0, STATE(h); /* Allocate 16-byte aligned scratch space */ movl %esp, %ecx subl $192, %esp andl $~0xF, %esp /* Copy state */ movl 4(%ecx), %eax movdqu 0(%eax), %xmm0; movdqu %xmm0, 0(%esp) movdqu 16(%eax), %xmm1; movdqu %xmm1, 16(%esp) movdqu 32(%eax), %xmm2; movdqu %xmm2, 32(%esp) movdqu 48(%eax), %xmm3; movdqu %xmm3, 48(%esp) /* Do 80 rounds of hashing */ movl 8(%ecx), %eax movq .bswap64, %mm7 ROUNDa( 0, 0, 1, 2, 3, 4, 5, 6, 7) ROUNDa( 1, 7, 0, 1, 2, 3, 4, 5, 6) ROUNDa( 2, 6, 7, 0, 1, 2, 3, 4, 5) ROUNDa( 3, 5, 6, 7, 0, 1, 2, 3, 4) ROUNDa( 4, 4, 5, 6, 7, 0, 1, 2, 3) ROUNDa( 5, 3, 4, 5, 6, 7, 0, 1, 2) ROUNDa( 6, 2, 3, 4, 5, 6, 7, 0, 1) ROUNDa( 7, 1, 2, 3, 4, 5, 6, 7, 0) ROUNDa( 8, 0, 1, 2, 3, 4, 5, 6, 7) ROUNDa( 9, 7, 0, 1, 2, 3, 4, 5, 6) ROUNDa(10, 6, 7, 0, 1, 2, 3, 4, 5) ROUNDa(11, 5, 6, 7, 0, 1, 2, 3, 4) ROUNDa(12, 4, 5, 6, 7, 0, 1, 2, 3) ROUNDa(13, 3, 4, 5, 6, 7, 0, 1, 2) ROUNDa(14, 2, 3, 4, 5, 6, 7, 0, 1) ROUNDa(15, 1, 2, 3, 4, 5, 6, 7, 0) ROUNDb(16, 0, 1, 2, 3, 4, 5, 6, 7) ROUNDb(17, 7, 0, 1, 2, 3, 4, 5, 6) ROUNDb(18, 6, 7, 0, 1, 2, 3, 4, 5) ROUNDb(19, 5, 6, 7, 0, 1, 2, 3, 4) ROUNDb(20, 4, 5, 6, 7, 0, 1, 2, 3) ROUNDb(21, 3, 4, 5, 6, 7, 0, 1, 2) ROUNDb(22, 2, 3, 4, 5, 6, 7, 0, 1) ROUNDb(23, 1, 2, 3, 4, 5, 6, 7, 0) ROUNDb(24, 0, 1, 2, 3, 4, 5, 6, 7) ROUNDb(25, 7, 0, 1, 2, 3, 4, 5, 6) ROUNDb(26, 6, 7, 0, 1, 2, 3, 4, 5) ROUNDb(27, 5, 6, 7, 0, 1, 2, 3, 4) ROUNDb(28, 4, 5, 6, 7, 0, 1, 2, 3) ROUNDb(29, 3, 4, 5, 6, 7, 0, 1, 2) ROUNDb(30, 2, 3, 4, 5, 6, 7, 0, 1) ROUNDb(31, 1, 2, 3, 4, 5, 6, 7, 0) ROUNDb(32, 0, 1, 2, 3, 4, 5, 6, 7) ROUNDb(33, 7, 0, 1, 2, 3, 4, 5, 6) ROUNDb(34, 6, 7, 0, 1, 2, 3, 4, 5) ROUNDb(35, 5, 6, 7, 0, 1, 2, 3, 4) ROUNDb(36, 4, 5, 6, 7, 0, 1, 2, 3) ROUNDb(37, 3, 4, 5, 6, 7, 0, 1, 2) ROUNDb(38, 2, 3, 4, 5, 6, 7, 0, 1) ROUNDb(39, 1, 2, 3, 4, 5, 6, 7, 0) ROUNDb(40, 0, 1, 2, 3, 4, 5, 6, 7) ROUNDb(41, 7, 0, 1, 2, 3, 4, 5, 6) ROUNDb(42, 6, 7, 0, 1, 2, 3, 4, 5) ROUNDb(43, 5, 6, 7, 0, 1, 2, 3, 4) ROUNDb(44, 4, 5, 6, 7, 0, 1, 2, 3) ROUNDb(45, 3, 4, 5, 6, 7, 0, 1, 2) ROUNDb(46, 2, 3, 4, 5, 6, 7, 0, 1) ROUNDb(47, 1, 2, 3, 4, 5, 6, 7, 0) ROUNDb(48, 0, 1, 2, 3, 4, 5, 6, 7) ROUNDb(49, 7, 0, 1, 2, 3, 4, 5, 6) ROUNDb(50, 6, 7, 0, 1, 2, 3, 4, 5) ROUNDb(51, 5, 6, 7, 0, 1, 2, 3, 4) ROUNDb(52, 4, 5, 6, 7, 0, 1, 2, 3) ROUNDb(53, 3, 4, 5, 6, 7, 0, 1, 2) ROUNDb(54, 2, 3, 4, 5, 6, 7, 0, 1) ROUNDb(55, 1, 2, 3, 4, 5, 6, 7, 0) ROUNDb(56, 0, 1, 2, 3, 4, 5, 6, 7) ROUNDb(57, 7, 0, 1, 2, 3, 4, 5, 6) ROUNDb(58, 6, 7, 0, 1, 2, 3, 4, 5) ROUNDb(59, 5, 6, 7, 0, 1, 2, 3, 4) ROUNDb(60, 4, 5, 6, 7, 0, 1, 2, 3) ROUNDb(61, 3, 4, 5, 6, 7, 0, 1, 2) ROUNDb(62, 2, 3, 4, 5, 6, 7, 0, 1) ROUNDb(63, 1, 2, 3, 4, 5, 6, 7, 0) ROUNDb(64, 0, 1, 2, 3, 4, 5, 6, 7) ROUNDb(65, 7, 0, 1, 2, 3, 4, 5, 6) ROUNDb(66, 6, 7, 0, 1, 2, 3, 4, 5) ROUNDb(67, 5, 6, 7, 0, 1, 2, 3, 4) ROUNDb(68, 4, 5, 6, 7, 0, 1, 2, 3) ROUNDb(69, 3, 4, 5, 6, 7, 0, 1, 2) ROUNDb(70, 2, 3, 4, 5, 6, 7, 0, 1) ROUNDb(71, 1, 2, 3, 4, 5, 6, 7, 0) ROUNDb(72, 0, 1, 2, 3, 4, 5, 6, 7) ROUNDb(73, 7, 0, 1, 2, 3, 4, 5, 6) ROUNDb(74, 6, 7, 0, 1, 2, 3, 4, 5) ROUNDb(75, 5, 6, 7, 0, 1, 2, 3, 4) ROUNDb(76, 4, 5, 6, 7, 0, 1, 2, 3) ROUNDb(77, 3, 4, 5, 6, 7, 0, 1, 2) ROUNDb(78, 2, 3, 4, 5, 6, 7, 0, 1) ROUNDb(79, 1, 2, 3, 4, 5, 6, 7, 0) /* Add to state */ movl 4(%ecx), %eax movdqu 0(%eax), %xmm0; paddq 0(%esp), %xmm0; movdqu %xmm0, 0(%eax) movdqu 16(%eax), %xmm1; paddq 16(%esp), %xmm1; movdqu %xmm1, 16(%eax) movdqu 32(%eax), %xmm2; paddq 32(%esp), %xmm2; movdqu %xmm2, 32(%eax) movdqu 48(%eax), %xmm3; paddq 48(%esp), %xmm3; movdqu %xmm3, 48(%eax) /* Clean up */ emms movl %ecx, %esp retl .balign 8 .bswap64: .quad 0x0001020304050607 .roundconstants: .quad 0x428A2F98D728AE22, 0x7137449123EF65CD, 0xB5C0FBCFEC4D3B2F, 0xE9B5DBA58189DBBC .quad 0x3956C25BF348B538, 0x59F111F1B605D019, 0x923F82A4AF194F9B, 0xAB1C5ED5DA6D8118 .quad 0xD807AA98A3030242, 0x12835B0145706FBE, 0x243185BE4EE4B28C, 0x550C7DC3D5FFB4E2 .quad 0x72BE5D74F27B896F, 0x80DEB1FE3B1696B1, 0x9BDC06A725C71235, 0xC19BF174CF692694 .quad 0xE49B69C19EF14AD2, 0xEFBE4786384F25E3, 0x0FC19DC68B8CD5B5, 0x240CA1CC77AC9C65 .quad 0x2DE92C6F592B0275, 0x4A7484AA6EA6E483, 0x5CB0A9DCBD41FBD4, 0x76F988DA831153B5 .quad 0x983E5152EE66DFAB, 0xA831C66D2DB43210, 0xB00327C898FB213F, 0xBF597FC7BEEF0EE4 .quad 0xC6E00BF33DA88FC2, 0xD5A79147930AA725, 0x06CA6351E003826F, 0x142929670A0E6E70 .quad 0x27B70A8546D22FFC, 0x2E1B21385C26C926, 0x4D2C6DFC5AC42AED, 0x53380D139D95B3DF .quad 0x650A73548BAF63DE, 0x766A0ABB3C77B2A8, 0x81C2C92E47EDAEE6, 0x92722C851482353B .quad 0xA2BFE8A14CF10364, 0xA81A664BBC423001, 0xC24B8B70D0F89791, 0xC76C51A30654BE30 .quad 0xD192E819D6EF5218, 0xD69906245565A910, 0xF40E35855771202A, 0x106AA07032BBD1B8 .quad 0x19A4C116B8D2D0C8, 0x1E376C085141AB53, 0x2748774CDF8EEB99, 0x34B0BCB5E19B48A8 .quad 0x391C0CB3C5C95A63, 0x4ED8AA4AE3418ACB, 0x5B9CCA4F7763E373, 0x682E6FF3D6B2B8A3 .quad 0x748F82EE5DEFB2FC, 0x78A5636F43172F60, 0x84C87814A1F0AB72, 0x8CC702081A6439EC .quad 0x90BEFFFA23631E28, 0xA4506CEBDE82BDE9, 0xBEF9A3F7B2C67915, 0xC67178F2E372532B .quad 0xCA273ECEEA26619C, 0xD186B8C721C0C207, 0xEADA7DD6CDE0EB1E, 0xF57D4F7FEE6ED178 .quad 0x06F067AA72176FBA, 0x0A637DC5A2C898A6, 0x113F9804BEF90DAE, 0x1B710B35131C471B .quad 0x28DB77F523047D84, 0x32CAAB7B40C72493, 0x3C9EBE0A15C9BEBC, 0x431D67C49C100D4C .quad 0x4CC5D4BECB3E42B6, 0x597F299CFC657E2A, 0x5FCB6FAB3AD6FAEC, 0x6C44198C4A475817