Skip to content

Commit

Permalink
Replace macros with fns
Browse files Browse the repository at this point in the history
  • Loading branch information
newpavlov committed Aug 21, 2024
1 parent b37cfa0 commit b263635
Show file tree
Hide file tree
Showing 2 changed files with 221 additions and 407 deletions.
277 changes: 98 additions & 179 deletions sha2/src/sha256/riscv_zknh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,190 +18,109 @@ fn maj(x: u32, y: u32, z: u32) -> u32 {
(x & y) ^ (x & z) ^ (y & z)
}

/// Forcefully read the round constant to prevent its reconstruction on stack.
fn read_rk<const IDX: usize>() -> u32 {
assert!(IDX < K32.len());
let res;
unsafe {
core::arch::asm!(
"lw {dst}, 4*{IDX}({p})",
IDX = const IDX,
p = in(reg) &K32,
dst = out(reg) res,
// note: the `pure` option is intentionally not used to prevent
// caching of the round constant on stack
options(preserves_flags, nostack, readonly)
);
}
res
fn round<const R: usize>(state: &mut [u32; 8], block: &[u32; 16]) {
let a = (K32.len() - R) % 8;
let b = (K32.len() - R + 1) % 8;
let c = (K32.len() - R + 2) % 8;
let d = (K32.len() - R + 3) % 8;
let e = (K32.len() - R + 4) % 8;
let f = (K32.len() - R + 5) % 8;
let g = (K32.len() - R + 6) % 8;
let h = (K32.len() - R + 7) % 8;

state[h] = state[h]
.wrapping_add(unsafe { sha256sum1(state[e]) })
.wrapping_add(ch(state[e], state[f], state[g]))
// Force reading of constants from the static to prevent bad codegen
.wrapping_add(unsafe { core::ptr::read_volatile(&K32[R]) })
.wrapping_add(block[R % 16]);
state[d] = state[d].wrapping_add(state[h]);
state[h] = state[h]
.wrapping_add(unsafe { sha256sum0(state[a]) })
.wrapping_add(maj(state[a], state[b], state[c]))
}

macro_rules! round {
(
$a: ident, $b: ident, $c: ident, $d: ident,
$e: ident, $f: ident, $g: ident, $h: ident,
$k: expr, $w: expr
) => {
// SAFETY: we have checked that the zknh target feature
// required by the intrinsics is enabled
$h = $h
.wrapping_add(unsafe { sha256sum1($e) })
.wrapping_add(ch($e, $f, $g))
.wrapping_add(read_rk::<$k>())
.wrapping_add($w);
$d = $d.wrapping_add($h);
$h = $h
.wrapping_add(unsafe { sha256sum0($a) })
.wrapping_add(maj($a, $b, $c))
};
}
fn round_schedule<const R: usize>(state: &mut [u32; 8], block: &mut [u32; 16]) {
round::<R>(state, block);

macro_rules! schedule {
($m0: ident, $m1: ident, $m9: ident, $me: ident) => {
// SAFETY: we have checked that the zknh target feature
// required by the intrinsics is enabled
$m0 = $m0
.wrapping_add(unsafe { sha256sig1($me) })
.wrapping_add($m9)
.wrapping_add(unsafe { sha256sig0($m1) });
};
block[R % 16] = block[R % 16]
.wrapping_add(unsafe { sha256sig1(block[(R + 14) % 16]) })
.wrapping_add(block[(R + 9) % 16])
.wrapping_add(unsafe { sha256sig0(block[(R + 1) % 16]) });
}

fn compress_block(state: &mut [u32; 8], block: [u32; 16]) {
#[rustfmt::skip]
let [
mut m0, mut m1, mut m2, mut m3, mut m4, mut m5, mut m6, mut m7,
mut m8, mut m9, mut ma, mut mb, mut mc, mut md, mut me, mut mf,
] = block;
let [mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut h] = *state;

round!(a, b, c, d, e, f, g, h, 0, m0);
round!(h, a, b, c, d, e, f, g, 1, m1);
round!(g, h, a, b, c, d, e, f, 2, m2);
round!(f, g, h, a, b, c, d, e, 3, m3);
round!(e, f, g, h, a, b, c, d, 4, m4);
round!(d, e, f, g, h, a, b, c, 5, m5);
round!(c, d, e, f, g, h, a, b, 6, m6);
round!(b, c, d, e, f, g, h, a, 7, m7);
round!(a, b, c, d, e, f, g, h, 8, m8);
round!(h, a, b, c, d, e, f, g, 9, m9);
round!(g, h, a, b, c, d, e, f, 10, ma);
round!(f, g, h, a, b, c, d, e, 11, mb);
round!(e, f, g, h, a, b, c, d, 12, mc);
round!(d, e, f, g, h, a, b, c, 13, md);
round!(c, d, e, f, g, h, a, b, 14, me);
round!(b, c, d, e, f, g, h, a, 15, mf);

schedule!(m0, m1, m9, me);
schedule!(m1, m2, ma, mf);
schedule!(m2, m3, mb, m0);
schedule!(m3, m4, mc, m1);
schedule!(m4, m5, md, m2);
schedule!(m5, m6, me, m3);
schedule!(m6, m7, mf, m4);
schedule!(m7, m8, m0, m5);
schedule!(m8, m9, m1, m6);
schedule!(m9, ma, m2, m7);
schedule!(ma, mb, m3, m8);
schedule!(mb, mc, m4, m9);
schedule!(mc, md, m5, ma);
schedule!(md, me, m6, mb);
schedule!(me, mf, m7, mc);
schedule!(mf, m0, m8, md);

round!(a, b, c, d, e, f, g, h, 16, m0);
round!(h, a, b, c, d, e, f, g, 17, m1);
round!(g, h, a, b, c, d, e, f, 18, m2);
round!(f, g, h, a, b, c, d, e, 19, m3);
round!(e, f, g, h, a, b, c, d, 20, m4);
round!(d, e, f, g, h, a, b, c, 21, m5);
round!(c, d, e, f, g, h, a, b, 22, m6);
round!(b, c, d, e, f, g, h, a, 23, m7);
round!(a, b, c, d, e, f, g, h, 24, m8);
round!(h, a, b, c, d, e, f, g, 25, m9);
round!(g, h, a, b, c, d, e, f, 26, ma);
round!(f, g, h, a, b, c, d, e, 27, mb);
round!(e, f, g, h, a, b, c, d, 28, mc);
round!(d, e, f, g, h, a, b, c, 29, md);
round!(c, d, e, f, g, h, a, b, 30, me);
round!(b, c, d, e, f, g, h, a, 31, mf);

schedule!(m0, m1, m9, me);
schedule!(m1, m2, ma, mf);
schedule!(m2, m3, mb, m0);
schedule!(m3, m4, mc, m1);
schedule!(m4, m5, md, m2);
schedule!(m5, m6, me, m3);
schedule!(m6, m7, mf, m4);
schedule!(m7, m8, m0, m5);
schedule!(m8, m9, m1, m6);
schedule!(m9, ma, m2, m7);
schedule!(ma, mb, m3, m8);
schedule!(mb, mc, m4, m9);
schedule!(mc, md, m5, ma);
schedule!(md, me, m6, mb);
schedule!(me, mf, m7, mc);
schedule!(mf, m0, m8, md);

round!(a, b, c, d, e, f, g, h, 32, m0);
round!(h, a, b, c, d, e, f, g, 33, m1);
round!(g, h, a, b, c, d, e, f, 34, m2);
round!(f, g, h, a, b, c, d, e, 35, m3);
round!(e, f, g, h, a, b, c, d, 36, m4);
round!(d, e, f, g, h, a, b, c, 37, m5);
round!(c, d, e, f, g, h, a, b, 38, m6);
round!(b, c, d, e, f, g, h, a, 39, m7);
round!(a, b, c, d, e, f, g, h, 40, m8);
round!(h, a, b, c, d, e, f, g, 41, m9);
round!(g, h, a, b, c, d, e, f, 42, ma);
round!(f, g, h, a, b, c, d, e, 43, mb);
round!(e, f, g, h, a, b, c, d, 44, mc);
round!(d, e, f, g, h, a, b, c, 45, md);
round!(c, d, e, f, g, h, a, b, 46, me);
round!(b, c, d, e, f, g, h, a, 47, mf);

schedule!(m0, m1, m9, me);
schedule!(m1, m2, ma, mf);
schedule!(m2, m3, mb, m0);
schedule!(m3, m4, mc, m1);
schedule!(m4, m5, md, m2);
schedule!(m5, m6, me, m3);
schedule!(m6, m7, mf, m4);
schedule!(m7, m8, m0, m5);
schedule!(m8, m9, m1, m6);
schedule!(m9, ma, m2, m7);
schedule!(ma, mb, m3, m8);
schedule!(mb, mc, m4, m9);
schedule!(mc, md, m5, ma);
schedule!(md, me, m6, mb);
schedule!(me, mf, m7, mc);
schedule!(mf, m0, m8, md);

round!(a, b, c, d, e, f, g, h, 48, m0);
round!(h, a, b, c, d, e, f, g, 49, m1);
round!(g, h, a, b, c, d, e, f, 50, m2);
round!(f, g, h, a, b, c, d, e, 51, m3);
round!(e, f, g, h, a, b, c, d, 52, m4);
round!(d, e, f, g, h, a, b, c, 53, m5);
round!(c, d, e, f, g, h, a, b, 54, m6);
round!(b, c, d, e, f, g, h, a, 55, m7);
round!(a, b, c, d, e, f, g, h, 56, m8);
round!(h, a, b, c, d, e, f, g, 57, m9);
round!(g, h, a, b, c, d, e, f, 58, ma);
round!(f, g, h, a, b, c, d, e, 59, mb);
round!(e, f, g, h, a, b, c, d, 60, mc);
round!(d, e, f, g, h, a, b, c, 61, md);
round!(c, d, e, f, g, h, a, b, 62, me);
round!(b, c, d, e, f, g, h, a, 63, mf);

state[0] = state[0].wrapping_add(a);
state[1] = state[1].wrapping_add(b);
state[2] = state[2].wrapping_add(c);
state[3] = state[3].wrapping_add(d);
state[4] = state[4].wrapping_add(e);
state[5] = state[5].wrapping_add(f);
state[6] = state[6].wrapping_add(g);
state[7] = state[7].wrapping_add(h);
fn compress_block(state: &mut [u32; 8], mut block: [u32; 16]) {
let s = &mut state.clone();
let b = &mut block;

round_schedule::<0>(s, b);
round_schedule::<1>(s, b);
round_schedule::<2>(s, b);
round_schedule::<3>(s, b);
round_schedule::<4>(s, b);
round_schedule::<5>(s, b);
round_schedule::<6>(s, b);
round_schedule::<7>(s, b);
round_schedule::<8>(s, b);
round_schedule::<9>(s, b);
round_schedule::<10>(s, b);
round_schedule::<11>(s, b);
round_schedule::<12>(s, b);
round_schedule::<13>(s, b);
round_schedule::<14>(s, b);
round_schedule::<15>(s, b);
round_schedule::<16>(s, b);
round_schedule::<17>(s, b);
round_schedule::<18>(s, b);
round_schedule::<19>(s, b);
round_schedule::<20>(s, b);
round_schedule::<21>(s, b);
round_schedule::<22>(s, b);
round_schedule::<23>(s, b);
round_schedule::<24>(s, b);
round_schedule::<25>(s, b);
round_schedule::<26>(s, b);
round_schedule::<27>(s, b);
round_schedule::<28>(s, b);
round_schedule::<29>(s, b);
round_schedule::<30>(s, b);
round_schedule::<31>(s, b);
round_schedule::<32>(s, b);
round_schedule::<33>(s, b);
round_schedule::<34>(s, b);
round_schedule::<35>(s, b);
round_schedule::<36>(s, b);
round_schedule::<37>(s, b);
round_schedule::<38>(s, b);
round_schedule::<39>(s, b);
round_schedule::<40>(s, b);
round_schedule::<41>(s, b);
round_schedule::<42>(s, b);
round_schedule::<43>(s, b);
round_schedule::<44>(s, b);
round_schedule::<45>(s, b);
round_schedule::<46>(s, b);
round_schedule::<47>(s, b);
round::<48>(s, b);
round::<49>(s, b);
round::<50>(s, b);
round::<51>(s, b);
round::<52>(s, b);
round::<53>(s, b);
round::<54>(s, b);
round::<55>(s, b);
round::<56>(s, b);
round::<57>(s, b);
round::<58>(s, b);
round::<59>(s, b);
round::<60>(s, b);
round::<61>(s, b);
round::<62>(s, b);
round::<63>(s, b);

for i in 0..8 {
state[i] = state[i].wrapping_add(s[i]);
}
}

pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
Expand Down
Loading

0 comments on commit b263635

Please sign in to comment.