From b2636354d9074fdbe4f7c2164a443a45583d6a28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=90=D1=80=D1=82=D1=91=D0=BC=20=D0=9F=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=BE=D0=B2=20=5BArtyom=20Pavlov=5D?= Date: Wed, 21 Aug 2024 17:23:21 +0300 Subject: [PATCH] Replace macros with fns --- sha2/src/sha256/riscv_zknh.rs | 277 ++++++++++----------------- sha2/src/sha512/riscv_zknh.rs | 351 ++++++++++++---------------------- 2 files changed, 221 insertions(+), 407 deletions(-) diff --git a/sha2/src/sha256/riscv_zknh.rs b/sha2/src/sha256/riscv_zknh.rs index 275e1bd6..2a5a95f4 100644 --- a/sha2/src/sha256/riscv_zknh.rs +++ b/sha2/src/sha256/riscv_zknh.rs @@ -18,190 +18,109 @@ fn maj(x: u32, y: u32, z: u32) -> u32 { (x & y) ^ (x & z) ^ (y & z) } -/// Forcefully read the round constant to prevent its reconstruction on stack. -fn read_rk() -> u32 { - assert!(IDX < K32.len()); - let res; - unsafe { - core::arch::asm!( - "lw {dst}, 4*{IDX}({p})", - IDX = const IDX, - p = in(reg) &K32, - dst = out(reg) res, - // note: the `pure` option is intentionally not used to prevent - // caching of the round constant on stack - options(preserves_flags, nostack, readonly) - ); - } - res +fn round(state: &mut [u32; 8], block: &[u32; 16]) { + let a = (K32.len() - R) % 8; + let b = (K32.len() - R + 1) % 8; + let c = (K32.len() - R + 2) % 8; + let d = (K32.len() - R + 3) % 8; + let e = (K32.len() - R + 4) % 8; + let f = (K32.len() - R + 5) % 8; + let g = (K32.len() - R + 6) % 8; + let h = (K32.len() - R + 7) % 8; + + state[h] = state[h] + .wrapping_add(unsafe { sha256sum1(state[e]) }) + .wrapping_add(ch(state[e], state[f], state[g])) + // Force reading of constants from the static to prevent bad codegen + .wrapping_add(unsafe { core::ptr::read_volatile(&K32[R]) }) + .wrapping_add(block[R % 16]); + state[d] = state[d].wrapping_add(state[h]); + state[h] = state[h] + .wrapping_add(unsafe { sha256sum0(state[a]) }) + .wrapping_add(maj(state[a], state[b], state[c])) } -macro_rules! round { - ( - $a: ident, $b: ident, $c: ident, $d: ident, - $e: ident, $f: ident, $g: ident, $h: ident, - $k: expr, $w: expr - ) => { - // SAFETY: we have checked that the zknh target feature - // required by the intrinsics is enabled - $h = $h - .wrapping_add(unsafe { sha256sum1($e) }) - .wrapping_add(ch($e, $f, $g)) - .wrapping_add(read_rk::<$k>()) - .wrapping_add($w); - $d = $d.wrapping_add($h); - $h = $h - .wrapping_add(unsafe { sha256sum0($a) }) - .wrapping_add(maj($a, $b, $c)) - }; -} +fn round_schedule(state: &mut [u32; 8], block: &mut [u32; 16]) { + round::(state, block); -macro_rules! schedule { - ($m0: ident, $m1: ident, $m9: ident, $me: ident) => { - // SAFETY: we have checked that the zknh target feature - // required by the intrinsics is enabled - $m0 = $m0 - .wrapping_add(unsafe { sha256sig1($me) }) - .wrapping_add($m9) - .wrapping_add(unsafe { sha256sig0($m1) }); - }; + block[R % 16] = block[R % 16] + .wrapping_add(unsafe { sha256sig1(block[(R + 14) % 16]) }) + .wrapping_add(block[(R + 9) % 16]) + .wrapping_add(unsafe { sha256sig0(block[(R + 1) % 16]) }); } -fn compress_block(state: &mut [u32; 8], block: [u32; 16]) { - #[rustfmt::skip] - let [ - mut m0, mut m1, mut m2, mut m3, mut m4, mut m5, mut m6, mut m7, - mut m8, mut m9, mut ma, mut mb, mut mc, mut md, mut me, mut mf, - ] = block; - let [mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut h] = *state; - - round!(a, b, c, d, e, f, g, h, 0, m0); - round!(h, a, b, c, d, e, f, g, 1, m1); - round!(g, h, a, b, c, d, e, f, 2, m2); - round!(f, g, h, a, b, c, d, e, 3, m3); - round!(e, f, g, h, a, b, c, d, 4, m4); - round!(d, e, f, g, h, a, b, c, 5, m5); - round!(c, d, e, f, g, h, a, b, 6, m6); - round!(b, c, d, e, f, g, h, a, 7, m7); - round!(a, b, c, d, e, f, g, h, 8, m8); - round!(h, a, b, c, d, e, f, g, 9, m9); - round!(g, h, a, b, c, d, e, f, 10, ma); - round!(f, g, h, a, b, c, d, e, 11, mb); - round!(e, f, g, h, a, b, c, d, 12, mc); - round!(d, e, f, g, h, a, b, c, 13, md); - round!(c, d, e, f, g, h, a, b, 14, me); - round!(b, c, d, e, f, g, h, a, 15, mf); - - schedule!(m0, m1, m9, me); - schedule!(m1, m2, ma, mf); - schedule!(m2, m3, mb, m0); - schedule!(m3, m4, mc, m1); - schedule!(m4, m5, md, m2); - schedule!(m5, m6, me, m3); - schedule!(m6, m7, mf, m4); - schedule!(m7, m8, m0, m5); - schedule!(m8, m9, m1, m6); - schedule!(m9, ma, m2, m7); - schedule!(ma, mb, m3, m8); - schedule!(mb, mc, m4, m9); - schedule!(mc, md, m5, ma); - schedule!(md, me, m6, mb); - schedule!(me, mf, m7, mc); - schedule!(mf, m0, m8, md); - - round!(a, b, c, d, e, f, g, h, 16, m0); - round!(h, a, b, c, d, e, f, g, 17, m1); - round!(g, h, a, b, c, d, e, f, 18, m2); - round!(f, g, h, a, b, c, d, e, 19, m3); - round!(e, f, g, h, a, b, c, d, 20, m4); - round!(d, e, f, g, h, a, b, c, 21, m5); - round!(c, d, e, f, g, h, a, b, 22, m6); - round!(b, c, d, e, f, g, h, a, 23, m7); - round!(a, b, c, d, e, f, g, h, 24, m8); - round!(h, a, b, c, d, e, f, g, 25, m9); - round!(g, h, a, b, c, d, e, f, 26, ma); - round!(f, g, h, a, b, c, d, e, 27, mb); - round!(e, f, g, h, a, b, c, d, 28, mc); - round!(d, e, f, g, h, a, b, c, 29, md); - round!(c, d, e, f, g, h, a, b, 30, me); - round!(b, c, d, e, f, g, h, a, 31, mf); - - schedule!(m0, m1, m9, me); - schedule!(m1, m2, ma, mf); - schedule!(m2, m3, mb, m0); - schedule!(m3, m4, mc, m1); - schedule!(m4, m5, md, m2); - schedule!(m5, m6, me, m3); - schedule!(m6, m7, mf, m4); - schedule!(m7, m8, m0, m5); - schedule!(m8, m9, m1, m6); - schedule!(m9, ma, m2, m7); - schedule!(ma, mb, m3, m8); - schedule!(mb, mc, m4, m9); - schedule!(mc, md, m5, ma); - schedule!(md, me, m6, mb); - schedule!(me, mf, m7, mc); - schedule!(mf, m0, m8, md); - - round!(a, b, c, d, e, f, g, h, 32, m0); - round!(h, a, b, c, d, e, f, g, 33, m1); - round!(g, h, a, b, c, d, e, f, 34, m2); - round!(f, g, h, a, b, c, d, e, 35, m3); - round!(e, f, g, h, a, b, c, d, 36, m4); - round!(d, e, f, g, h, a, b, c, 37, m5); - round!(c, d, e, f, g, h, a, b, 38, m6); - round!(b, c, d, e, f, g, h, a, 39, m7); - round!(a, b, c, d, e, f, g, h, 40, m8); - round!(h, a, b, c, d, e, f, g, 41, m9); - round!(g, h, a, b, c, d, e, f, 42, ma); - round!(f, g, h, a, b, c, d, e, 43, mb); - round!(e, f, g, h, a, b, c, d, 44, mc); - round!(d, e, f, g, h, a, b, c, 45, md); - round!(c, d, e, f, g, h, a, b, 46, me); - round!(b, c, d, e, f, g, h, a, 47, mf); - - schedule!(m0, m1, m9, me); - schedule!(m1, m2, ma, mf); - schedule!(m2, m3, mb, m0); - schedule!(m3, m4, mc, m1); - schedule!(m4, m5, md, m2); - schedule!(m5, m6, me, m3); - schedule!(m6, m7, mf, m4); - schedule!(m7, m8, m0, m5); - schedule!(m8, m9, m1, m6); - schedule!(m9, ma, m2, m7); - schedule!(ma, mb, m3, m8); - schedule!(mb, mc, m4, m9); - schedule!(mc, md, m5, ma); - schedule!(md, me, m6, mb); - schedule!(me, mf, m7, mc); - schedule!(mf, m0, m8, md); - - round!(a, b, c, d, e, f, g, h, 48, m0); - round!(h, a, b, c, d, e, f, g, 49, m1); - round!(g, h, a, b, c, d, e, f, 50, m2); - round!(f, g, h, a, b, c, d, e, 51, m3); - round!(e, f, g, h, a, b, c, d, 52, m4); - round!(d, e, f, g, h, a, b, c, 53, m5); - round!(c, d, e, f, g, h, a, b, 54, m6); - round!(b, c, d, e, f, g, h, a, 55, m7); - round!(a, b, c, d, e, f, g, h, 56, m8); - round!(h, a, b, c, d, e, f, g, 57, m9); - round!(g, h, a, b, c, d, e, f, 58, ma); - round!(f, g, h, a, b, c, d, e, 59, mb); - round!(e, f, g, h, a, b, c, d, 60, mc); - round!(d, e, f, g, h, a, b, c, 61, md); - round!(c, d, e, f, g, h, a, b, 62, me); - round!(b, c, d, e, f, g, h, a, 63, mf); - - state[0] = state[0].wrapping_add(a); - state[1] = state[1].wrapping_add(b); - state[2] = state[2].wrapping_add(c); - state[3] = state[3].wrapping_add(d); - state[4] = state[4].wrapping_add(e); - state[5] = state[5].wrapping_add(f); - state[6] = state[6].wrapping_add(g); - state[7] = state[7].wrapping_add(h); +fn compress_block(state: &mut [u32; 8], mut block: [u32; 16]) { + let s = &mut state.clone(); + let b = &mut block; + + round_schedule::<0>(s, b); + round_schedule::<1>(s, b); + round_schedule::<2>(s, b); + round_schedule::<3>(s, b); + round_schedule::<4>(s, b); + round_schedule::<5>(s, b); + round_schedule::<6>(s, b); + round_schedule::<7>(s, b); + round_schedule::<8>(s, b); + round_schedule::<9>(s, b); + round_schedule::<10>(s, b); + round_schedule::<11>(s, b); + round_schedule::<12>(s, b); + round_schedule::<13>(s, b); + round_schedule::<14>(s, b); + round_schedule::<15>(s, b); + round_schedule::<16>(s, b); + round_schedule::<17>(s, b); + round_schedule::<18>(s, b); + round_schedule::<19>(s, b); + round_schedule::<20>(s, b); + round_schedule::<21>(s, b); + round_schedule::<22>(s, b); + round_schedule::<23>(s, b); + round_schedule::<24>(s, b); + round_schedule::<25>(s, b); + round_schedule::<26>(s, b); + round_schedule::<27>(s, b); + round_schedule::<28>(s, b); + round_schedule::<29>(s, b); + round_schedule::<30>(s, b); + round_schedule::<31>(s, b); + round_schedule::<32>(s, b); + round_schedule::<33>(s, b); + round_schedule::<34>(s, b); + round_schedule::<35>(s, b); + round_schedule::<36>(s, b); + round_schedule::<37>(s, b); + round_schedule::<38>(s, b); + round_schedule::<39>(s, b); + round_schedule::<40>(s, b); + round_schedule::<41>(s, b); + round_schedule::<42>(s, b); + round_schedule::<43>(s, b); + round_schedule::<44>(s, b); + round_schedule::<45>(s, b); + round_schedule::<46>(s, b); + round_schedule::<47>(s, b); + round::<48>(s, b); + round::<49>(s, b); + round::<50>(s, b); + round::<51>(s, b); + round::<52>(s, b); + round::<53>(s, b); + round::<54>(s, b); + round::<55>(s, b); + round::<56>(s, b); + round::<57>(s, b); + round::<58>(s, b); + round::<59>(s, b); + round::<60>(s, b); + round::<61>(s, b); + round::<62>(s, b); + round::<63>(s, b); + + for i in 0..8 { + state[i] = state[i].wrapping_add(s[i]); + } } pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) { diff --git a/sha2/src/sha512/riscv_zknh.rs b/sha2/src/sha512/riscv_zknh.rs index d1eed3cf..cd2c76af 100644 --- a/sha2/src/sha512/riscv_zknh.rs +++ b/sha2/src/sha512/riscv_zknh.rs @@ -8,41 +8,6 @@ use core::arch::riscv64::*; #[cfg(not(target_feature = "zknh"))] compile_error!("riscv-zknh backend requires enabled zknh target feature"); -#[inline(always)] -fn ch(x: u64, y: u64, z: u64) -> u64 { - (x & y) ^ (!x & z) -} - -#[inline(always)] -fn maj(x: u64, y: u64, z: u64) -> u64 { - (x & y) ^ (x & z) ^ (y & z) -} - -/// Forcefully read the round constant to prevent its reconstruction on stack. -fn read_rk() -> u64 { - assert!(IDX < K64.len()); - let res; - unsafe { - #[cfg(target_arch = "riscv64")] - { - core::arch::asm!( - "ld {dst}, 8*{IDX}({p})", - IDX = const IDX, - p = in(reg) &K64, - dst = out(reg) res, - // note: the `pure` option is intentionally not used to prevent - // caching of the round constant on stack - options(preserves_flags, nostack, readonly) - ); - } - #[cfg(target_arch = "riscv32")] - { - res = core::ptr::read_volatile(&K64[IDX]); - } - } - res -} - #[cfg(target_arch = "riscv32")] unsafe fn sha512sum0(x: u64) -> u64 { let a = sha512sum0r((x >> 32) as u32, x as u32); @@ -71,206 +36,136 @@ unsafe fn sha512sig1(x: u64) -> u64 { ((a as u64) << 32) | (b as u64) } -macro_rules! round { - ( - $a: ident, $b: ident, $c: ident, $d: ident, - $e: ident, $f: ident, $g: ident, $h: ident, - $k: expr, $w: expr - ) => { - // SAFETY: we have checked that the zknh target feature - // required by the intrinsics is enabled - $h = $h - .wrapping_add(unsafe { sha512sum1($e) }) - .wrapping_add(ch($e, $f, $g)) - .wrapping_add(read_rk::<$k>()) - .wrapping_add($w); - $d = $d.wrapping_add($h); - $h = $h - .wrapping_add(unsafe { sha512sum0($a) }) - .wrapping_add(maj($a, $b, $c)) - }; +#[inline(always)] +fn ch(x: u64, y: u64, z: u64) -> u64 { + (x & y) ^ (!x & z) } -macro_rules! schedule { - ($m0: ident, $m1: ident, $m9: ident, $me: ident) => { - // SAFETY: we have checked that the zknh target feature - // required by the intrinsics is enabled - $m0 = $m0 - .wrapping_add(unsafe { sha512sig1($me) }) - .wrapping_add($m9) - .wrapping_add(unsafe { sha512sig0($m1) }); - }; +#[inline(always)] +fn maj(x: u64, y: u64, z: u64) -> u64 { + (x & y) ^ (x & z) ^ (y & z) } -fn compress_block(state: &mut [u64; 8], block: [u64; 16]) { - #[rustfmt::skip] - let [ - mut m0, mut m1, mut m2, mut m3, mut m4, mut m5, mut m6, mut m7, - mut m8, mut m9, mut ma, mut mb, mut mc, mut md, mut me, mut mf, - ] = block; - let [mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut h] = *state; - - round!(a, b, c, d, e, f, g, h, 0, m0); - round!(h, a, b, c, d, e, f, g, 1, m1); - round!(g, h, a, b, c, d, e, f, 2, m2); - round!(f, g, h, a, b, c, d, e, 3, m3); - round!(e, f, g, h, a, b, c, d, 4, m4); - round!(d, e, f, g, h, a, b, c, 5, m5); - round!(c, d, e, f, g, h, a, b, 6, m6); - round!(b, c, d, e, f, g, h, a, 7, m7); - round!(a, b, c, d, e, f, g, h, 8, m8); - round!(h, a, b, c, d, e, f, g, 9, m9); - round!(g, h, a, b, c, d, e, f, 10, ma); - round!(f, g, h, a, b, c, d, e, 11, mb); - round!(e, f, g, h, a, b, c, d, 12, mc); - round!(d, e, f, g, h, a, b, c, 13, md); - round!(c, d, e, f, g, h, a, b, 14, me); - round!(b, c, d, e, f, g, h, a, 15, mf); - - schedule!(m0, m1, m9, me); - schedule!(m1, m2, ma, mf); - schedule!(m2, m3, mb, m0); - schedule!(m3, m4, mc, m1); - schedule!(m4, m5, md, m2); - schedule!(m5, m6, me, m3); - schedule!(m6, m7, mf, m4); - schedule!(m7, m8, m0, m5); - schedule!(m8, m9, m1, m6); - schedule!(m9, ma, m2, m7); - schedule!(ma, mb, m3, m8); - schedule!(mb, mc, m4, m9); - schedule!(mc, md, m5, ma); - schedule!(md, me, m6, mb); - schedule!(me, mf, m7, mc); - schedule!(mf, m0, m8, md); - - round!(a, b, c, d, e, f, g, h, 16, m0); - round!(h, a, b, c, d, e, f, g, 17, m1); - round!(g, h, a, b, c, d, e, f, 18, m2); - round!(f, g, h, a, b, c, d, e, 19, m3); - round!(e, f, g, h, a, b, c, d, 20, m4); - round!(d, e, f, g, h, a, b, c, 21, m5); - round!(c, d, e, f, g, h, a, b, 22, m6); - round!(b, c, d, e, f, g, h, a, 23, m7); - round!(a, b, c, d, e, f, g, h, 24, m8); - round!(h, a, b, c, d, e, f, g, 25, m9); - round!(g, h, a, b, c, d, e, f, 26, ma); - round!(f, g, h, a, b, c, d, e, 27, mb); - round!(e, f, g, h, a, b, c, d, 28, mc); - round!(d, e, f, g, h, a, b, c, 29, md); - round!(c, d, e, f, g, h, a, b, 30, me); - round!(b, c, d, e, f, g, h, a, 31, mf); - - schedule!(m0, m1, m9, me); - schedule!(m1, m2, ma, mf); - schedule!(m2, m3, mb, m0); - schedule!(m3, m4, mc, m1); - schedule!(m4, m5, md, m2); - schedule!(m5, m6, me, m3); - schedule!(m6, m7, mf, m4); - schedule!(m7, m8, m0, m5); - schedule!(m8, m9, m1, m6); - schedule!(m9, ma, m2, m7); - schedule!(ma, mb, m3, m8); - schedule!(mb, mc, m4, m9); - schedule!(mc, md, m5, ma); - schedule!(md, me, m6, mb); - schedule!(me, mf, m7, mc); - schedule!(mf, m0, m8, md); - - round!(a, b, c, d, e, f, g, h, 32, m0); - round!(h, a, b, c, d, e, f, g, 33, m1); - round!(g, h, a, b, c, d, e, f, 34, m2); - round!(f, g, h, a, b, c, d, e, 35, m3); - round!(e, f, g, h, a, b, c, d, 36, m4); - round!(d, e, f, g, h, a, b, c, 37, m5); - round!(c, d, e, f, g, h, a, b, 38, m6); - round!(b, c, d, e, f, g, h, a, 39, m7); - round!(a, b, c, d, e, f, g, h, 40, m8); - round!(h, a, b, c, d, e, f, g, 41, m9); - round!(g, h, a, b, c, d, e, f, 42, ma); - round!(f, g, h, a, b, c, d, e, 43, mb); - round!(e, f, g, h, a, b, c, d, 44, mc); - round!(d, e, f, g, h, a, b, c, 45, md); - round!(c, d, e, f, g, h, a, b, 46, me); - round!(b, c, d, e, f, g, h, a, 47, mf); - - schedule!(m0, m1, m9, me); - schedule!(m1, m2, ma, mf); - schedule!(m2, m3, mb, m0); - schedule!(m3, m4, mc, m1); - schedule!(m4, m5, md, m2); - schedule!(m5, m6, me, m3); - schedule!(m6, m7, mf, m4); - schedule!(m7, m8, m0, m5); - schedule!(m8, m9, m1, m6); - schedule!(m9, ma, m2, m7); - schedule!(ma, mb, m3, m8); - schedule!(mb, mc, m4, m9); - schedule!(mc, md, m5, ma); - schedule!(md, me, m6, mb); - schedule!(me, mf, m7, mc); - schedule!(mf, m0, m8, md); - - round!(a, b, c, d, e, f, g, h, 48, m0); - round!(h, a, b, c, d, e, f, g, 49, m1); - round!(g, h, a, b, c, d, e, f, 50, m2); - round!(f, g, h, a, b, c, d, e, 51, m3); - round!(e, f, g, h, a, b, c, d, 52, m4); - round!(d, e, f, g, h, a, b, c, 53, m5); - round!(c, d, e, f, g, h, a, b, 54, m6); - round!(b, c, d, e, f, g, h, a, 55, m7); - round!(a, b, c, d, e, f, g, h, 56, m8); - round!(h, a, b, c, d, e, f, g, 57, m9); - round!(g, h, a, b, c, d, e, f, 58, ma); - round!(f, g, h, a, b, c, d, e, 59, mb); - round!(e, f, g, h, a, b, c, d, 60, mc); - round!(d, e, f, g, h, a, b, c, 61, md); - round!(c, d, e, f, g, h, a, b, 62, me); - round!(b, c, d, e, f, g, h, a, 63, mf); +fn round(state: &mut [u64; 8], block: &[u64; 16]) { + let n = K64.len() - R; + let a = n % 8; + let b = (n + 1) % 8; + let c = (n + 2) % 8; + let d = (n + 3) % 8; + let e = (n + 4) % 8; + let f = (n + 5) % 8; + let g = (n + 6) % 8; + let h = (n + 7) % 8; + + state[h] = state[h] + .wrapping_add(unsafe { sha512sum1(state[e]) }) + .wrapping_add(ch(state[e], state[f], state[g])) + // Force reading of constants from the static to prevent bad codegen + .wrapping_add(unsafe { core::ptr::read_volatile(&K64[R]) }) + .wrapping_add(block[R % 16]); + state[d] = state[d].wrapping_add(state[h]); + state[h] = state[h] + .wrapping_add(unsafe { sha512sum0(state[a]) }) + .wrapping_add(maj(state[a], state[b], state[c])) +} - schedule!(m0, m1, m9, me); - schedule!(m1, m2, ma, mf); - schedule!(m2, m3, mb, m0); - schedule!(m3, m4, mc, m1); - schedule!(m4, m5, md, m2); - schedule!(m5, m6, me, m3); - schedule!(m6, m7, mf, m4); - schedule!(m7, m8, m0, m5); - schedule!(m8, m9, m1, m6); - schedule!(m9, ma, m2, m7); - schedule!(ma, mb, m3, m8); - schedule!(mb, mc, m4, m9); - schedule!(mc, md, m5, ma); - schedule!(md, me, m6, mb); - schedule!(me, mf, m7, mc); - schedule!(mf, m0, m8, md); +fn round_schedule(state: &mut [u64; 8], block: &mut [u64; 16]) { + round::(state, block); - round!(a, b, c, d, e, f, g, h, 64, m0); - round!(h, a, b, c, d, e, f, g, 65, m1); - round!(g, h, a, b, c, d, e, f, 66, m2); - round!(f, g, h, a, b, c, d, e, 67, m3); - round!(e, f, g, h, a, b, c, d, 68, m4); - round!(d, e, f, g, h, a, b, c, 69, m5); - round!(c, d, e, f, g, h, a, b, 70, m6); - round!(b, c, d, e, f, g, h, a, 71, m7); - round!(a, b, c, d, e, f, g, h, 72, m8); - round!(h, a, b, c, d, e, f, g, 73, m9); - round!(g, h, a, b, c, d, e, f, 74, ma); - round!(f, g, h, a, b, c, d, e, 75, mb); - round!(e, f, g, h, a, b, c, d, 76, mc); - round!(d, e, f, g, h, a, b, c, 77, md); - round!(c, d, e, f, g, h, a, b, 78, me); - round!(b, c, d, e, f, g, h, a, 79, mf); + block[R % 16] = block[R % 16] + .wrapping_add(unsafe { sha512sig1(block[(R + 14) % 16]) }) + .wrapping_add(block[(R + 9) % 16]) + .wrapping_add(unsafe { sha512sig0(block[(R + 1) % 16]) }); +} - state[0] = state[0].wrapping_add(a); - state[1] = state[1].wrapping_add(b); - state[2] = state[2].wrapping_add(c); - state[3] = state[3].wrapping_add(d); - state[4] = state[4].wrapping_add(e); - state[5] = state[5].wrapping_add(f); - state[6] = state[6].wrapping_add(g); - state[7] = state[7].wrapping_add(h); +fn compress_block(state: &mut [u64; 8], mut block: [u64; 16]) { + let s = &mut state.clone(); + let b = &mut block; + + round_schedule::<0>(s, b); + round_schedule::<1>(s, b); + round_schedule::<2>(s, b); + round_schedule::<3>(s, b); + round_schedule::<4>(s, b); + round_schedule::<5>(s, b); + round_schedule::<6>(s, b); + round_schedule::<7>(s, b); + round_schedule::<8>(s, b); + round_schedule::<9>(s, b); + round_schedule::<10>(s, b); + round_schedule::<11>(s, b); + round_schedule::<12>(s, b); + round_schedule::<13>(s, b); + round_schedule::<14>(s, b); + round_schedule::<15>(s, b); + round_schedule::<16>(s, b); + round_schedule::<17>(s, b); + round_schedule::<18>(s, b); + round_schedule::<19>(s, b); + round_schedule::<20>(s, b); + round_schedule::<21>(s, b); + round_schedule::<22>(s, b); + round_schedule::<23>(s, b); + round_schedule::<24>(s, b); + round_schedule::<25>(s, b); + round_schedule::<26>(s, b); + round_schedule::<27>(s, b); + round_schedule::<28>(s, b); + round_schedule::<29>(s, b); + round_schedule::<30>(s, b); + round_schedule::<31>(s, b); + round_schedule::<32>(s, b); + round_schedule::<33>(s, b); + round_schedule::<34>(s, b); + round_schedule::<35>(s, b); + round_schedule::<36>(s, b); + round_schedule::<37>(s, b); + round_schedule::<38>(s, b); + round_schedule::<39>(s, b); + round_schedule::<40>(s, b); + round_schedule::<41>(s, b); + round_schedule::<42>(s, b); + round_schedule::<43>(s, b); + round_schedule::<44>(s, b); + round_schedule::<45>(s, b); + round_schedule::<46>(s, b); + round_schedule::<47>(s, b); + round_schedule::<48>(s, b); + round_schedule::<49>(s, b); + round_schedule::<50>(s, b); + round_schedule::<51>(s, b); + round_schedule::<52>(s, b); + round_schedule::<53>(s, b); + round_schedule::<54>(s, b); + round_schedule::<55>(s, b); + round_schedule::<56>(s, b); + round_schedule::<57>(s, b); + round_schedule::<58>(s, b); + round_schedule::<59>(s, b); + round_schedule::<60>(s, b); + round_schedule::<61>(s, b); + round_schedule::<62>(s, b); + round_schedule::<63>(s, b); + round::<64>(s, b); + round::<65>(s, b); + round::<66>(s, b); + round::<67>(s, b); + round::<68>(s, b); + round::<69>(s, b); + round::<70>(s, b); + round::<71>(s, b); + round::<72>(s, b); + round::<73>(s, b); + round::<74>(s, b); + round::<75>(s, b); + round::<76>(s, b); + round::<77>(s, b); + round::<78>(s, b); + round::<79>(s, b); + + for i in 0..8 { + state[i] = state[i].wrapping_add(s[i]); + } } pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {