From 9a2bb2590a6a6bf829965835c9825191e81f1e1a Mon Sep 17 00:00:00 2001 From: Artyom Pavlov Date: Tue, 27 Aug 2024 15:12:50 +0300 Subject: [PATCH] sha2: move `opaque_load` to the utils module (#618) --- sha2/src/sha256/riscv_zknh.rs | 29 +------------------ sha2/src/sha256/riscv_zknh_utils.rs | 45 ++++++++++++++++++++--------- sha2/src/sha512/riscv_zknh.rs | 35 +--------------------- sha2/src/sha512/riscv_zknh_utils.rs | 32 ++++++++++++++++++++ 4 files changed, 65 insertions(+), 76 deletions(-) diff --git a/sha2/src/sha256/riscv_zknh.rs b/sha2/src/sha256/riscv_zknh.rs index 7477c640..fa0ff87a 100644 --- a/sha2/src/sha256/riscv_zknh.rs +++ b/sha2/src/sha256/riscv_zknh.rs @@ -21,33 +21,6 @@ fn maj(x: u32, y: u32, z: u32) -> u32 { (x & y) ^ (x & z) ^ (y & z) } -/// This function returns `k[R]`, but prevents compiler from inlining the indexed value -pub(super) fn opaque_load(k: &[u32]) -> u32 { - assert!(R < k.len()); - let dst; - #[cfg(target_arch = "riscv64")] - unsafe { - core::arch::asm!( - "lwu {dst}, 4*{R}({k})", - R = const R, - k = in(reg) k.as_ptr(), - dst = out(reg) dst, - options(pure, readonly, nostack, preserves_flags), - ); - } - #[cfg(target_arch = "riscv32")] - unsafe { - core::arch::asm!( - "lw {dst}, 4*{R}({k})", - R = const R, - k = in(reg) k.as_ptr(), - dst = out(reg) dst, - options(pure, readonly, nostack, preserves_flags), - ); - } - dst -} - fn round(state: &mut [u32; 8], block: &[u32; 16], k: &[u32]) { let n = K32.len() - R; #[allow(clippy::identity_op)] @@ -63,7 +36,7 @@ fn round(state: &mut [u32; 8], block: &[u32; 16], k: &[u32]) { state[h] = state[h] .wrapping_add(unsafe { sha256sum1(state[e]) }) .wrapping_add(ch(state[e], state[f], state[g])) - .wrapping_add(opaque_load::(k)) + .wrapping_add(super::riscv_zknh_utils::opaque_load::(k)) .wrapping_add(block[R]); state[d] = state[d].wrapping_add(state[h]); state[h] = state[h] diff --git a/sha2/src/sha256/riscv_zknh_utils.rs b/sha2/src/sha256/riscv_zknh_utils.rs index d75a0b1c..d5c07267 100644 --- a/sha2/src/sha256/riscv_zknh_utils.rs +++ b/sha2/src/sha256/riscv_zknh_utils.rs @@ -21,6 +21,20 @@ fn load_aligned_block(block: &[u8; 64]) -> [u32; 16] { res } +/// Use LW instruction on RV32 and LWU on RV64 +#[cfg(target_arch = "riscv32")] +macro_rules! lw { + ($r:literal) => { + concat!("lw ", $r) + }; +} +#[cfg(target_arch = "riscv64")] +macro_rules! lw { + ($r:literal) => { + concat!("lwu ", $r) + }; +} + #[inline(always)] fn load_unaligned_block(block: &[u8; 64]) -> [u32; 16] { let offset = (block.as_ptr() as usize) % align_of::(); @@ -32,20 +46,6 @@ fn load_unaligned_block(block: &[u8; 64]) -> [u32; 16] { let mut left: u32; let mut res = [0u32; 16]; - /// Use LW instruction on RV32 and LWU on RV64 - #[cfg(target_arch = "riscv32")] - macro_rules! lw { - ($r:literal) => { - concat!("lw ", $r) - }; - } - #[cfg(target_arch = "riscv64")] - macro_rules! lw { - ($r:literal) => { - concat!("lwu ", $r) - }; - } - unsafe { asm!( lw!("{left}, 0({bp})"), // left = unsafe { ptr::read(bp) }; @@ -78,3 +78,20 @@ fn load_unaligned_block(block: &[u8; 64]) -> [u32; 16] { res } + +/// This function returns `k[R]`, but prevents compiler from inlining the indexed value +#[cfg(sha2_backend = "riscv-zknh")] +pub(super) fn opaque_load(k: &[u32]) -> u32 { + assert!(R < k.len()); + let dst; + unsafe { + core::arch::asm!( + lw!("{dst}, 4*{R}({k})"), + R = const R, + k = in(reg) k.as_ptr(), + dst = out(reg) dst, + options(pure, readonly, nostack, preserves_flags), + ); + } + dst +} diff --git a/sha2/src/sha512/riscv_zknh.rs b/sha2/src/sha512/riscv_zknh.rs index 7be35ee8..be7310f7 100644 --- a/sha2/src/sha512/riscv_zknh.rs +++ b/sha2/src/sha512/riscv_zknh.rs @@ -49,39 +49,6 @@ fn maj(x: u64, y: u64, z: u64) -> u64 { (x & y) ^ (x & z) ^ (y & z) } -/// This function returns `k[R]`, but prevents compiler from inlining the indexed value -pub(super) fn opaque_load(k: &[u64]) -> u64 { - use core::arch::asm; - assert!(R < k.len()); - #[cfg(target_arch = "riscv64")] - unsafe { - let dst; - asm!( - "ld {dst}, {N}({k})", - N = const 8 * R, - k = in(reg) k.as_ptr(), - dst = out(reg) dst, - options(pure, readonly, nostack, preserves_flags), - ); - dst - } - #[cfg(target_arch = "riscv32")] - unsafe { - let [hi, lo]: [u32; 2]; - asm!( - "lw {lo}, {N1}({k})", - "lw {hi}, {N2}({k})", - N1 = const 8 * R, - N2 = const 8 * R + 4, - k = in(reg) k.as_ptr(), - lo = out(reg) lo, - hi = out(reg) hi, - options(pure, readonly, nostack, preserves_flags), - ); - ((hi as u64) << 32) | (lo as u64) - } -} - fn round(state: &mut [u64; 8], block: &[u64; 16], k: &[u64]) { let n = K64.len() - R; #[allow(clippy::identity_op)] @@ -97,7 +64,7 @@ fn round(state: &mut [u64; 8], block: &[u64; 16], k: &[u64]) { state[h] = state[h] .wrapping_add(unsafe { sha512sum1(state[e]) }) .wrapping_add(ch(state[e], state[f], state[g])) - .wrapping_add(opaque_load::(k)) + .wrapping_add(super::riscv_zknh_utils::opaque_load::(k)) .wrapping_add(block[R]); state[d] = state[d].wrapping_add(state[h]); state[h] = state[h] diff --git a/sha2/src/sha512/riscv_zknh_utils.rs b/sha2/src/sha512/riscv_zknh_utils.rs index 0b474606..41197d11 100644 --- a/sha2/src/sha512/riscv_zknh_utils.rs +++ b/sha2/src/sha512/riscv_zknh_utils.rs @@ -127,3 +127,35 @@ fn load_unaligned_block(block: &[u8; 128]) -> [u64; 16] { res } + +/// This function returns `k[R]`, but prevents compiler from inlining the indexed value +#[cfg(sha2_backend = "riscv-zknh")] +pub(super) fn opaque_load(k: &[u64]) -> u64 { + assert!(R < k.len()); + #[cfg(target_arch = "riscv64")] + unsafe { + let dst; + asm!( + "ld {dst}, 8 * {R}({k})", + R = const R, + k = in(reg) k.as_ptr(), + dst = out(reg) dst, + options(pure, readonly, nostack, preserves_flags), + ); + dst + } + #[cfg(target_arch = "riscv32")] + unsafe { + let [hi, lo]: [u32; 2]; + asm!( + "lw {lo}, 8 * {R}({k})", + "lw {hi}, 8 * {R} + 4({k})", + R = const R, + k = in(reg) k.as_ptr(), + lo = out(reg) lo, + hi = out(reg) hi, + options(pure, readonly, nostack, preserves_flags), + ); + ((hi as u64) << 32) | (lo as u64) + } +}