Skip to content

Commit

Permalink
sha2: make compress consume blocks
Browse files Browse the repository at this point in the history
  • Loading branch information
baloo committed May 11, 2024
1 parent fd2a0f4 commit 92ceedb
Show file tree
Hide file tree
Showing 11 changed files with 38 additions and 20 deletions.
7 changes: 2 additions & 5 deletions sha2/src/core_api.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use crate::{consts, sha256::compress256, sha512::compress512};
use core::{convert::TryInto, fmt, slice::from_ref};
use digest::{
array::Array,
block_buffer::Eager,
core_api::{
AlgorithmName, Block, BlockSizeUser, Buffer, BufferKindUser, OutputSizeUser, TruncSide,
Expand Down Expand Up @@ -39,7 +38,6 @@ impl UpdateCore for Sha256VarCore {
#[inline]
fn update_blocks(&mut self, blocks: &[Block<Self>]) {
self.block_len += blocks.len() as u64;
let blocks = Array::cast_slice_to_core(blocks);
compress256(&mut self.state, blocks);
}
}
Expand All @@ -66,7 +64,7 @@ impl VariableOutputCore for Sha256VarCore {
fn finalize_variable_core(&mut self, buffer: &mut Buffer<Self>, out: &mut Output<Self>) {
let bs = Self::BlockSize::U64;
let bit_len = 8 * (buffer.get_pos() as u64 + bs * self.block_len);
buffer.len64_padding_be(bit_len, |b| compress256(&mut self.state, from_ref(&b.0)));
buffer.len64_padding_be(bit_len, |b| compress256(&mut self.state, from_ref(b)));

for (chunk, v) in out.chunks_exact_mut(4).zip(self.state.iter()) {
chunk.copy_from_slice(&v.to_be_bytes());
Expand Down Expand Up @@ -155,7 +153,6 @@ impl UpdateCore for Sha512VarCore {
#[inline]
fn update_blocks(&mut self, blocks: &[Block<Self>]) {
self.block_len += blocks.len() as u128;
let blocks = Array::cast_slice_to_core(blocks);
compress512(&mut self.state, blocks);
}
}
Expand Down Expand Up @@ -184,7 +181,7 @@ impl VariableOutputCore for Sha512VarCore {
fn finalize_variable_core(&mut self, buffer: &mut Buffer<Self>, out: &mut Output<Self>) {
let bs = Self::BlockSize::U64 as u128;
let bit_len = 8 * (buffer.get_pos() as u128 + bs * self.block_len);
buffer.len128_padding_be(bit_len, |b| compress512(&mut self.state, from_ref(&b.0)));
buffer.len128_padding_be(bit_len, |b| compress512(&mut self.state, from_ref(b)));

for (chunk, v) in out.chunks_exact_mut(8).zip(self.state.iter()) {
chunk.copy_from_slice(&v.to_be_bytes());
Expand Down
6 changes: 5 additions & 1 deletion sha2/src/sha256.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
use crate::Sha256VarCore;

type Block = digest::core_api::Block<Sha256VarCore>;

cfg_if::cfg_if! {
if #[cfg(feature = "force-soft")] {
mod soft;
Expand All @@ -24,6 +28,6 @@ cfg_if::cfg_if! {
/// This is a low-level "hazmat" API which provides direct access to the core
/// functionality of SHA-256.
#[cfg_attr(docsrs, doc(cfg(feature = "compress")))]
pub fn compress256(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
pub fn compress256(state: &mut [u32; 8], blocks: &[Block]) {
compress(state, blocks)
}
5 changes: 3 additions & 2 deletions sha2/src/sha256/aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@

use core::arch::{aarch64::*, asm};

use super::Block;
use crate::consts::K32;

cpufeatures::new!(sha2_hwcap, "sha2");

pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
pub fn compress(state: &mut [u32; 8], blocks: &[Block]) {
// TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725
// after stabilization
if sha2_hwcap::get() {
Expand All @@ -21,7 +22,7 @@ pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
}

#[target_feature(enable = "sha2")]
unsafe fn sha256_compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
unsafe fn sha256_compress(state: &mut [u32; 8], blocks: &[Block]) {
// SAFETY: Requires the sha2 feature.

// Load state into vectors.
Expand Down
4 changes: 3 additions & 1 deletion sha2/src/sha256/loongarch64_asm.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
//! LoongArch64 assembly backend
use super::Block;

macro_rules! c {
($($l:expr)*) => {
concat!($($l ,)*)
Expand Down Expand Up @@ -78,7 +80,7 @@ macro_rules! roundtail {
};
}

pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
pub fn compress(state: &mut [u32; 8], blocks: &[Block]) {
if blocks.is_empty() {
return;
}
Expand Down
4 changes: 3 additions & 1 deletion sha2/src/sha256/soft.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#![allow(clippy::many_single_char_names)]
use crate::consts::K32;

use super::Block;

#[inline(always)]
fn shr(v: [u32; 4], o: u32) -> [u32; 4] {
[v[0] >> o, v[1] >> o, v[2] >> o, v[3] >> o]
Expand Down Expand Up @@ -227,7 +229,7 @@ fn sha256_digest_block_u32(state: &mut [u32; 8], block: &[u32; 16]) {
state[7] = state[7].wrapping_add(h);
}

pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
pub fn compress(state: &mut [u32; 8], blocks: &[Block]) {
for block in blocks {
let mut block_u32 = [0u32; 16];
for (o, chunk) in block_u32.iter_mut().zip(block.chunks_exact(4)) {
Expand Down
6 changes: 4 additions & 2 deletions sha2/src/sha256/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;

use super::Block;

unsafe fn schedule(v0: __m128i, v1: __m128i, v2: __m128i, v3: __m128i) -> __m128i {
let t1 = _mm_sha256msg1_epu32(v0, v1);
let t2 = _mm_alignr_epi8(v3, v2, 4);
Expand Down Expand Up @@ -39,7 +41,7 @@ macro_rules! schedule_rounds4 {
// we use unaligned loads with `__m128i` pointers
#[allow(clippy::cast_ptr_alignment)]
#[target_feature(enable = "sha,sse2,ssse3,sse4.1")]
unsafe fn digest_blocks(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
unsafe fn digest_blocks(state: &mut [u32; 8], blocks: &[Block]) {
#[allow(non_snake_case)]
let MASK: __m128i = _mm_set_epi64x(
0x0C0D_0E0F_0809_0A0Bu64 as i64,
Expand Down Expand Up @@ -99,7 +101,7 @@ unsafe fn digest_blocks(state: &mut [u32; 8], blocks: &[[u8; 64]]) {

cpufeatures::new!(shani_cpuid, "sha", "sse2", "ssse3", "sse4.1");

pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
pub fn compress(state: &mut [u32; 8], blocks: &[Block]) {
// TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725
// after stabilization
if shani_cpuid::get() {
Expand Down
6 changes: 5 additions & 1 deletion sha2/src/sha512.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
use crate::Sha512VarCore;

type Block = digest::core_api::Block<Sha512VarCore>;

cfg_if::cfg_if! {
if #[cfg(feature = "force-soft")] {
mod soft;
Expand All @@ -24,6 +28,6 @@ cfg_if::cfg_if! {
/// This is a low-level "hazmat" API which provides direct access to the core
/// functionality of SHA-512.
#[cfg_attr(docsrs, doc(cfg(feature = "compress")))]
pub fn compress512(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
pub fn compress512(state: &mut [u64; 8], blocks: &[Block]) {
compress(state, blocks)
}
5 changes: 3 additions & 2 deletions sha2/src/sha512/aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@

use core::arch::{aarch64::*, asm};

use super::Block;
use crate::consts::K64;

cpufeatures::new!(sha3_hwcap, "sha3");

pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
pub fn compress(state: &mut [u64; 8], blocks: &[Block]) {
// TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725
// after stabilization
if sha3_hwcap::get() {
Expand All @@ -17,7 +18,7 @@ pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
}

#[target_feature(enable = "sha3")]
unsafe fn sha512_compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
unsafe fn sha512_compress(state: &mut [u64; 8], blocks: &[Block]) {
// SAFETY: Requires the sha3 feature.

// Load state into vectors.
Expand Down
4 changes: 3 additions & 1 deletion sha2/src/sha512/loongarch64_asm.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
//! LoongArch64 assembly backend
use super::Block;

macro_rules! c {
($($l:expr)*) => {
concat!($($l ,)*)
Expand Down Expand Up @@ -77,7 +79,7 @@ macro_rules! roundtail {
};
}

pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
pub fn compress(state: &mut [u64; 8], blocks: &[Block]) {
if blocks.is_empty() {
return;
}
Expand Down
4 changes: 3 additions & 1 deletion sha2/src/sha512/soft.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#![allow(clippy::many_single_char_names)]
use crate::consts::K64;

use super::Block;

/// Not an intrinsic, but works like an unaligned load.
fn sha512load(v0: [u64; 2], v1: [u64; 2]) -> [u64; 2] {
[v1[1], v0[0]]
Expand Down Expand Up @@ -208,7 +210,7 @@ pub fn sha512_digest_block_u64(state: &mut [u64; 8], block: &[u64; 16]) {
state[7] = state[7].wrapping_add(h);
}

pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
pub fn compress(state: &mut [u64; 8], blocks: &[Block]) {
for block in blocks {
let mut block_u32 = [0u64; 16];
for (o, chunk) in block_u32.iter_mut().zip(block.chunks_exact(8)) {
Expand Down
7 changes: 4 additions & 3 deletions sha2/src/sha512/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;

use super::Block;
use crate::consts::K64;

cpufeatures::new!(avx2_cpuid, "avx2");

pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
pub fn compress(state: &mut [u64; 8], blocks: &[Block]) {
// TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725
// after stabilization
if avx2_cpuid::get() {
Expand All @@ -26,7 +27,7 @@ pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
}

#[target_feature(enable = "avx2")]
unsafe fn sha512_compress_x86_64_avx2(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
unsafe fn sha512_compress_x86_64_avx2(state: &mut [u64; 8], blocks: &[Block]) {
let mut start_block = 0;

if blocks.len() & 0b1 != 0 {
Expand Down Expand Up @@ -55,7 +56,7 @@ unsafe fn sha512_compress_x86_64_avx2(state: &mut [u64; 8], blocks: &[[u8; 128]]
}

#[inline(always)]
unsafe fn sha512_compress_x86_64_avx(state: &mut [u64; 8], block: &[u8; 128]) {
unsafe fn sha512_compress_x86_64_avx(state: &mut [u64; 8], block: &Block) {
let mut ms = [_mm_setzero_si128(); 8];
let mut x = [_mm_setzero_si128(); 8];

Expand Down

0 comments on commit 92ceedb

Please sign in to comment.