Skip to content

Commit

Permalink
sha2: RISC-V scalar crypto extension support (#614)
Browse files Browse the repository at this point in the history
The support is Nightly-only and requires to enable the `sha2_backend`
configuration flag with a value equal to `riscv-zknh` or `riscv-zknh-compact`.

The resulting assembly and binary size of the `compress` function (not
counting the `K32` and `K64` statics):
- SHA-256, unrolled: https://rust.godbolt.org/z/177bqKd3h (5280 bytes)
- SHA-256, compact: https://rust.godbolt.org/z/Kzx59bsdP (1308 bytes)
- SHA-512, unrolled: https://rust.godbolt.org/z/ExqqrfE1r (7964 bytes)
- SHA-512: compact: https://rust.godbolt.org/z/z41v6d4do (2852 bytes)
  • Loading branch information
newpavlov authored Aug 23, 2024
1 parent 13cc6c8 commit 03e1d29
Show file tree
Hide file tree
Showing 11 changed files with 593 additions and 18 deletions.
36 changes: 36 additions & 0 deletions .github/workflows/sha2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,42 @@ jobs:
target: ${{ matrix.target }}
features: ${{ matrix.features }}

riscv64-zknh:
runs-on: ubuntu-latest
defaults:
run:
# Cross mounts only current package, i.e. by default it ignores workspace's Cargo.toml
working-directory: .
steps:
- uses: actions/checkout@v4
- uses: RustCrypto/actions/cargo-cache@master
- uses: dtolnay/rust-toolchain@master
with:
toolchain: nightly
- run: cargo install cross --git https://github.com/cross-rs/cross
- run: cross test --package sha2 --target riscv64gc-unknown-linux-gnu
env:
RUSTFLAGS: '-Dwarnings --cfg sha2_backend="riscv-zknh" -C target-feature=+zknh'
- run: cross test --package sha2 --target riscv64gc-unknown-linux-gnu
env:
RUSTFLAGS: '-Dwarnings --cfg sha2_backend="riscv-zknh-compact" -C target-feature=+zknh'

riscv32-zknh:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: RustCrypto/actions/cargo-cache@master
- uses: dtolnay/rust-toolchain@master
with:
toolchain: nightly
components: rust-src
- run: cargo build --target riscv32gc-unknown-linux-gnu -Z build-std
env:
RUSTFLAGS: '-Dwarnings --cfg sha2_backend="riscv-zknh" -C target-feature=+zknh'
- run: cargo build --target riscv32gc-unknown-linux-gnu -Z build-std
env:
RUSTFLAGS: '-Dwarnings --cfg sha2_backend="riscv-zknh-compact" -C target-feature=+zknh'

minimal-versions:
uses: RustCrypto/actions/.github/workflows/minimal-versions.yml@master
with:
Expand Down
8 changes: 6 additions & 2 deletions sha2/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,13 @@ base16ct = { version = "0.2", features = ["alloc"] }
[features]
default = ["oid", "std"]
std = ["digest/std"]
oid = ["digest/oid"] # Enable OID support
oid = ["digest/oid"] # Enable OID support
zeroize = ["digest/zeroize"]
force-soft = [] # Force software implementation
force-soft = [] # Force software implementation

[lints.rust.unexpected_cfgs]
level = "warn"
check-cfg = ['cfg(sha2_backend, values("riscv-zknh", "riscv-zknh-compact"))']

[package.metadata.docs.rs]
all-features = true
Expand Down
10 changes: 10 additions & 0 deletions sha2/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,16 @@
)]
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
#![warn(missing_docs, rust_2018_idioms)]
#![cfg_attr(
any(sha2_backend = "riscv-zknh", sha2_backend = "riscv-zknh-compact"),
feature(riscv_ext_intrinsics)
)]

#[cfg(all(
any(sha2_backend = "riscv-zknh", sha2_backend = "riscv-zknh-compact"),
not(any(any(target_arch = "riscv32", target_arch = "riscv64")))
))]
compile_error!("The Zknh backends can be enabled only for RISC-V targets");

pub use digest::{self, Digest};

Expand Down
22 changes: 22 additions & 0 deletions sha2/src/sha256.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,18 @@ cfg_if::cfg_if! {
mod soft;
mod x86;
use x86::compress;
} else if #[cfg(all(
any(target_arch = "riscv32", target_arch = "riscv64"),
sha2_backend = "riscv-zknh"
))] {
mod riscv_zknh;
use riscv_zknh::compress;
} else if #[cfg(all(
any(target_arch = "riscv32", target_arch = "riscv64"),
sha2_backend = "riscv-zknh-compact"
))] {
mod riscv_zknh_compact;
use riscv_zknh_compact::compress;
} else if #[cfg(target_arch = "aarch64")] {
mod soft;
mod aarch64;
Expand All @@ -19,6 +31,16 @@ cfg_if::cfg_if! {
}
}

#[inline(always)]
#[allow(dead_code)]
fn to_u32s(block: &[u8; 64]) -> [u32; 16] {
let mut res = [0u32; 16];
for (src, dst) in block.chunks_exact(4).zip(res.iter_mut()) {
*dst = u32::from_be_bytes(src.try_into().unwrap());
}
res
}

/// Raw SHA-256 compression function.
///
/// This is a low-level "hazmat" API which provides direct access to the core
Expand Down
133 changes: 133 additions & 0 deletions sha2/src/sha256/riscv_zknh.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
use crate::consts::K32;

#[cfg(target_arch = "riscv32")]
use core::arch::riscv32::*;
#[cfg(target_arch = "riscv64")]
use core::arch::riscv64::*;

#[cfg(not(target_feature = "zknh"))]
compile_error!("riscv-zknh backend requires enabled zknh target feature");

#[inline(always)]
fn ch(x: u32, y: u32, z: u32) -> u32 {
(x & y) ^ (!x & z)
}

#[inline(always)]
fn maj(x: u32, y: u32, z: u32) -> u32 {
(x & y) ^ (x & z) ^ (y & z)
}

#[allow(clippy::identity_op)]
fn round<const R: usize>(state: &mut [u32; 8], block: &[u32; 16]) {
let n = K32.len() - R;
#[allow(clippy::identity_op)]
let a = (n + 0) % 8;
let b = (n + 1) % 8;
let c = (n + 2) % 8;
let d = (n + 3) % 8;
let e = (n + 4) % 8;
let f = (n + 5) % 8;
let g = (n + 6) % 8;
let h = (n + 7) % 8;

state[h] = state[h]
.wrapping_add(unsafe { sha256sum1(state[e]) })
.wrapping_add(ch(state[e], state[f], state[g]))
// Force reading of constants from the static to prevent bad codegen
.wrapping_add(unsafe { core::ptr::read_volatile(&K32[R]) })
.wrapping_add(block[R % 16]);
state[d] = state[d].wrapping_add(state[h]);
state[h] = state[h]
.wrapping_add(unsafe { sha256sum0(state[a]) })
.wrapping_add(maj(state[a], state[b], state[c]))
}

fn round_schedule<const R: usize>(state: &mut [u32; 8], block: &mut [u32; 16]) {
round::<R>(state, block);

block[R % 16] = block[R % 16]
.wrapping_add(unsafe { sha256sig1(block[(R + 14) % 16]) })
.wrapping_add(block[(R + 9) % 16])
.wrapping_add(unsafe { sha256sig0(block[(R + 1) % 16]) });
}

fn compress_block(state: &mut [u32; 8], mut block: [u32; 16]) {
let s = &mut state.clone();
let b = &mut block;

round_schedule::<0>(s, b);
round_schedule::<1>(s, b);
round_schedule::<2>(s, b);
round_schedule::<3>(s, b);
round_schedule::<4>(s, b);
round_schedule::<5>(s, b);
round_schedule::<6>(s, b);
round_schedule::<7>(s, b);
round_schedule::<8>(s, b);
round_schedule::<9>(s, b);
round_schedule::<10>(s, b);
round_schedule::<11>(s, b);
round_schedule::<12>(s, b);
round_schedule::<13>(s, b);
round_schedule::<14>(s, b);
round_schedule::<15>(s, b);
round_schedule::<16>(s, b);
round_schedule::<17>(s, b);
round_schedule::<18>(s, b);
round_schedule::<19>(s, b);
round_schedule::<20>(s, b);
round_schedule::<21>(s, b);
round_schedule::<22>(s, b);
round_schedule::<23>(s, b);
round_schedule::<24>(s, b);
round_schedule::<25>(s, b);
round_schedule::<26>(s, b);
round_schedule::<27>(s, b);
round_schedule::<28>(s, b);
round_schedule::<29>(s, b);
round_schedule::<30>(s, b);
round_schedule::<31>(s, b);
round_schedule::<32>(s, b);
round_schedule::<33>(s, b);
round_schedule::<34>(s, b);
round_schedule::<35>(s, b);
round_schedule::<36>(s, b);
round_schedule::<37>(s, b);
round_schedule::<38>(s, b);
round_schedule::<39>(s, b);
round_schedule::<40>(s, b);
round_schedule::<41>(s, b);
round_schedule::<42>(s, b);
round_schedule::<43>(s, b);
round_schedule::<44>(s, b);
round_schedule::<45>(s, b);
round_schedule::<46>(s, b);
round_schedule::<47>(s, b);
round::<48>(s, b);
round::<49>(s, b);
round::<50>(s, b);
round::<51>(s, b);
round::<52>(s, b);
round::<53>(s, b);
round::<54>(s, b);
round::<55>(s, b);
round::<56>(s, b);
round::<57>(s, b);
round::<58>(s, b);
round::<59>(s, b);
round::<60>(s, b);
round::<61>(s, b);
round::<62>(s, b);
round::<63>(s, b);

for i in 0..8 {
state[i] = state[i].wrapping_add(s[i]);
}
}

pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
for block in blocks.iter().map(super::to_u32s) {
compress_block(state, block);
}
}
76 changes: 76 additions & 0 deletions sha2/src/sha256/riscv_zknh_compact.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
use crate::consts::K32;

#[cfg(target_arch = "riscv32")]
use core::arch::riscv32::*;
#[cfg(target_arch = "riscv64")]
use core::arch::riscv64::*;

#[cfg(not(target_feature = "zknh"))]
compile_error!("riscv-zknh backend requires enabled zknh target feature");

#[inline(always)]
fn ch(x: u32, y: u32, z: u32) -> u32 {
(x & y) ^ (!x & z)
}

#[inline(always)]
fn maj(x: u32, y: u32, z: u32) -> u32 {
(x & y) ^ (x & z) ^ (y & z)
}

#[inline(always)]
fn round(state: &mut [u32; 8], block: &[u32; 16], r: usize) {
let n = K32.len() - r;
#[allow(clippy::identity_op)]
let a = (n + 0) % 8;
let b = (n + 1) % 8;
let c = (n + 2) % 8;
let d = (n + 3) % 8;
let e = (n + 4) % 8;
let f = (n + 5) % 8;
let g = (n + 6) % 8;
let h = (n + 7) % 8;

state[h] = state[h]
.wrapping_add(unsafe { sha256sum1(state[e]) })
.wrapping_add(ch(state[e], state[f], state[g]))
.wrapping_add(K32[r])
.wrapping_add(block[r % 16]);
state[d] = state[d].wrapping_add(state[h]);
state[h] = state[h]
.wrapping_add(unsafe { sha256sum0(state[a]) })
.wrapping_add(maj(state[a], state[b], state[c]))
}

#[inline(always)]
fn round_schedule(state: &mut [u32; 8], block: &mut [u32; 16], r: usize) {
round(state, block, r);

block[r % 16] = block[r % 16]
.wrapping_add(unsafe { sha256sig1(block[(r + 14) % 16]) })
.wrapping_add(block[(r + 9) % 16])
.wrapping_add(unsafe { sha256sig0(block[(r + 1) % 16]) });
}

#[inline(always)]
fn compress_block(state: &mut [u32; 8], mut block: [u32; 16]) {
let s = &mut state.clone();
let b = &mut block;

for i in 0..48 {
round_schedule(s, b, i);
}
for i in 48..64 {
round(s, b, i);
}

for i in 0..8 {
state[i] = state[i].wrapping_add(s[i]);
}
}

pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
for block in blocks.iter().map(super::to_u32s) {
compress_block(state, block);
}
}
10 changes: 3 additions & 7 deletions sha2/src/sha256/soft.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ macro_rules! schedule_rounds4 {
}

/// Process a block with the SHA-256 algorithm.
fn sha256_digest_block_u32(state: &mut [u32; 8], block: &[u32; 16]) {
fn sha256_digest_block_u32(state: &mut [u32; 8], block: [u32; 16]) {
let mut abef = [state[0], state[1], state[4], state[5]];
let mut cdgh = [state[2], state[3], state[6], state[7]];

Expand Down Expand Up @@ -228,11 +228,7 @@ fn sha256_digest_block_u32(state: &mut [u32; 8], block: &[u32; 16]) {
}

pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
for block in blocks {
let mut block_u32 = [0u32; 16];
for (o, chunk) in block_u32.iter_mut().zip(block.chunks_exact(4)) {
*o = u32::from_be_bytes(chunk.try_into().unwrap());
}
sha256_digest_block_u32(state, &block_u32);
for block in blocks.iter().map(super::to_u32s) {
sha256_digest_block_u32(state, block);
}
}
22 changes: 22 additions & 0 deletions sha2/src/sha512.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,18 @@ cfg_if::cfg_if! {
mod soft;
mod x86;
use x86::compress;
} else if #[cfg(all(
any(target_arch = "riscv32", target_arch = "riscv64"),
sha2_backend = "riscv-zknh"
))] {
mod riscv_zknh;
use riscv_zknh::compress;
} else if #[cfg(all(
any(target_arch = "riscv32", target_arch = "riscv64"),
sha2_backend = "riscv-zknh-compact"
))] {
mod riscv_zknh_compact;
use riscv_zknh_compact::compress;
} else if #[cfg(target_arch = "aarch64")] {
mod soft;
mod aarch64;
Expand All @@ -19,6 +31,16 @@ cfg_if::cfg_if! {
}
}

#[inline(always)]
#[allow(dead_code)]
fn to_u64s(block: &[u8; 128]) -> [u64; 16] {
let mut res = [0u64; 16];
for (src, dst) in block.chunks_exact(8).zip(res.iter_mut()) {
*dst = u64::from_be_bytes(src.try_into().unwrap());
}
res
}

/// Raw SHA-512 compression function.
///
/// This is a low-level "hazmat" API which provides direct access to the core
Expand Down
Loading

0 comments on commit 03e1d29

Please sign in to comment.