Skip to content

Commit

Permalink
added VAES intrinsics
Browse files Browse the repository at this point in the history
This adds support for the x86-64 VAES (vectorized AES) intrinsics.
It adds in both the full-width and the VL'ed 256-bit versions
and ensures via tests that they match the same test vectors as the AES-NI
versions and that they match (though sadly w/o quickcheck validation for now)
  • Loading branch information
DevJPM committed Nov 1, 2020
1 parent 9b9a7d5 commit 52ccd73
Show file tree
Hide file tree
Showing 2 changed files with 331 additions and 0 deletions.
328 changes: 328 additions & 0 deletions crates/core_arch/src/x86/avx512vaes.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,328 @@
//! Vectorized AES Instructions (VAES)
//!
//! The intrinsics here correspond to those in the `immintrin.h` C header.
//!
//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
//!
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
use crate::core_arch::x86::__m512i;
use crate::core_arch::x86::__m256i;

#[cfg(test)]
use stdarch_test::assert_instr;

#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.aesni.aesenc.256"]
fn aesenc_256(a: __m256i, round_key: __m256i) -> __m256i;
#[link_name = "llvm.x86.aesni.aesenclast.256"]
fn aesenclast_256(a: __m256i, round_key: __m256i) -> __m256i;
#[link_name = "llvm.x86.aesni.aesdec.256"]
fn aesdec_256(a: __m256i, round_key: __m256i) -> __m256i;
#[link_name = "llvm.x86.aesni.aesdeclast.256"]
fn aesdeclast_256(a: __m256i, round_key: __m256i) -> __m256i;
#[link_name = "llvm.x86.aesni.aesenc.512"]
fn aesenc_512(a: __m512i, round_key: __m512i) -> __m512i;
#[link_name = "llvm.x86.aesni.aesenclast.512"]
fn aesenclast_512(a: __m512i, round_key: __m512i) -> __m512i;
#[link_name = "llvm.x86.aesni.aesdec.512"]
fn aesdec_512(a: __m512i, round_key: __m512i) -> __m512i;
#[link_name = "llvm.x86.aesni.aesdeclast.512"]
fn aesdeclast_512(a: __m512i, round_key: __m512i) -> __m512i;
}

/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using
/// the corresponding 128-bit word (key) in `round_key`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenc_epi128)
#[inline]
#[target_feature(enable = "avx512vaes,avx512vl")]
#[cfg_attr(test, assert_instr(vaesenc))]
pub unsafe fn _mm256_aesenc_epi128(a: __m256i, round_key: __m256i) -> __m256i {
aesenc_256(a, round_key)
}

/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using
/// the corresponding 128-bit word (key) in `round_key`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenclast_epi128)
#[inline]
#[target_feature(enable = "avx512vaes,avx512vl")]
#[cfg_attr(test, assert_instr(vaesenclast))]
pub unsafe fn _mm256_aesenclast_epi128(a: __m256i, round_key: __m256i) -> __m256i {
aesenclast_256(a, round_key)
}

/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using
/// the corresponding 128-bit word (key) in `round_key`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdec_epi128)
#[inline]
#[target_feature(enable = "avx512vaes,avx512vl")]
#[cfg_attr(test, assert_instr(vaesdec))]
pub unsafe fn _mm256_aesdec_epi128(a: __m256i, round_key: __m256i) -> __m256i {
aesdec_256(a, round_key)
}

/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using
/// the corresponding 128-bit word (key) in `round_key`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdeclast_epi128)
#[inline]
#[target_feature(enable = "avx512vaes,avx512vl")]
#[cfg_attr(test, assert_instr(vaesdeclast))]
pub unsafe fn _mm256_aesdeclast_epi128(a: __m256i, round_key: __m256i) -> __m256i {
aesdeclast_256(a, round_key)
}


/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using
/// the corresponding 128-bit word (key) in `round_key`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenc_epi128)
#[inline]
#[target_feature(enable = "avx512vaes,avx512f")]
#[cfg_attr(test, assert_instr(vaesenc))]
pub unsafe fn _mm512_aesenc_epi128(a: __m512i, round_key: __m512i) -> __m512i {
aesenc_512(a, round_key)
}

/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using
/// the corresponding 128-bit word (key) in `round_key`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenclast_epi128)
#[inline]
#[target_feature(enable = "avx512vaes,avx512f")]
#[cfg_attr(test, assert_instr(vaesenclast))]
pub unsafe fn _mm512_aesenclast_epi128(a: __m512i, round_key: __m512i) -> __m512i {
aesenclast_512(a, round_key)
}

/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using
/// the corresponding 128-bit word (key) in `round_key`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdec_epi128)
#[inline]
#[target_feature(enable = "avx512vaes,avx512f")]
#[cfg_attr(test, assert_instr(vaesdec))]
pub unsafe fn _mm512_aesdec_epi128(a: __m512i, round_key: __m512i) -> __m512i {
aesdec_512(a, round_key)
}

/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using
/// the corresponding 128-bit word (key) in `round_key`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdeclast_epi128)
#[inline]
#[target_feature(enable = "avx512vaes,avx512f")]
#[cfg_attr(test, assert_instr(vaesdeclast))]
pub unsafe fn _mm512_aesdeclast_epi128(a: __m512i, round_key: __m512i) -> __m512i {
aesdeclast_512(a, round_key)
}

#[cfg(test)]
mod tests {
// The constants in the tests below are just bit patterns. They should not
// be interpreted as integers; signedness does not make sense for them, but
// __mXXXi happens to be defined in terms of signed integers.
#![allow(overflowing_literals)]

use stdarch_test::simd_test;

use crate::core_arch::x86::*;

// the first parts of these tests are straight ports from the AES-NI tests
// the second parts directly compare the two, for inputs that are different across lanes
// and "more random" than the standard test vectors
// ideally we'd be using quickcheck here instead

#[target_feature(enable = "avx512vaes,avx512vl")]
unsafe fn helper_for_256_avx512vaes(linear : unsafe fn(__m128i,__m128i)->__m128i, vectorized : unsafe fn(__m256i,__m256i)->__m256i) {
let a = _mm256_set_epi64x(
0xDCB4DB3657BF0B7D, 0x18DB0601068EDD9F, 0xB76B908233200DC5, 0xE478235FA8E22D5E
);
let k = _mm256_set_epi64x(
0x672F6F105A94CEA7, 0x8298B8FFCA5F829C, 0xA3927047B3FB61D8, 0x978093862CDE7187
);
let mut a_decomp = [_mm_setzero_si128();2];
a_decomp[0] = _mm256_extracti128_si256(a,0);
a_decomp[1] = _mm256_extracti128_si256(a,1);
let mut k_decomp = [_mm_setzero_si128();2];
k_decomp[0] = _mm256_extracti128_si256(k,0);
k_decomp[1] = _mm256_extracti128_si256(k,1);
let r = vectorized(a,k);
let mut e_decomp = [_mm_setzero_si128();2];
for i in 0..2 {
e_decomp[i] = linear(a_decomp[i],k_decomp[i]);
}
assert_eq_m128i(_mm256_extracti128_si256(r,0),e_decomp[0]);
assert_eq_m128i(_mm256_extracti128_si256(r,1),e_decomp[1]);
}

#[simd_test(enable = "avx512vaes,avx512vl")]
unsafe fn test_mm256_aesdec_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
let a = _mm256_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff);
let k = _mm256_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee);
let e = _mm256_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee,0x044e4f5176fec48f, 0xb57ecfa381da39ee);
let r = _mm256_aesdec_epi128(a, k);
assert_eq_m256i(r, e);

helper_for_256_avx512vaes(_mm_aesdec_si128,_mm256_aesdec_epi128);
}

#[simd_test(enable = "avx512vaes,avx512vl")]
unsafe fn test_mm256_aesdeclast_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
let a = _mm256_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff);
let k = _mm256_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee);
let e = _mm256_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493,0x36cad57d9072bf9e, 0xf210dd981fa4a493);
let r = _mm256_aesdeclast_epi128(a, k);
assert_eq_m256i(r, e);

helper_for_256_avx512vaes(_mm_aesdeclast_si128,_mm256_aesdeclast_epi128);
}

#[simd_test(enable = "avx512vaes,avx512vl")]
unsafe fn test_mm256_aesenc_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
// they are repeated appropriately
let a = _mm256_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff, 0x0123456789abcdef, 0x8899aabbccddeeff);
let k = _mm256_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee, 0x1133557799bbddff, 0x0022446688aaccee);
let e = _mm256_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333, 0x16ab0e57dfc442ed, 0x28e4ee1884504333);
let r = _mm256_aesenc_epi128(a, k);
assert_eq_m256i(r, e);

helper_for_256_avx512vaes(_mm_aesenc_si128,_mm256_aesenc_epi128);
}

#[simd_test(enable = "avx512vaes,avx512vl")]
unsafe fn test_mm256_aesenclast_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
let a = _mm256_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff);
let k = _mm256_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee);
let e = _mm256_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8,0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8);
let r = _mm256_aesenclast_epi128(a, k);
assert_eq_m256i(r, e);

helper_for_256_avx512vaes(_mm_aesenclast_si128,_mm256_aesenclast_epi128);
}

#[target_feature(enable = "avx512vaes,avx512f")]
unsafe fn helper_for_512_avx512vaes(linear : unsafe fn(__m128i,__m128i)->__m128i, vectorized : unsafe fn(__m512i,__m512i)->__m512i) {
let a = _mm512_set_epi64(
0xDCB4DB3657BF0B7D, 0x18DB0601068EDD9F, 0xB76B908233200DC5, 0xE478235FA8E22D5E,
0xAB05CFFA2621154C, 0x1171B47A186174C9, 0x8C6B6C0E7595CEC9, 0xBE3E7D4934E961BD
);
let k = _mm512_set_epi64(
0x672F6F105A94CEA7, 0x8298B8FFCA5F829C, 0xA3927047B3FB61D8, 0x978093862CDE7187,
0xB1927AB22F31D0EC, 0xA9A5DA619BE4D7AF, 0xCA2590F56884FDC6, 0x19BE9F660038BDB5
);
let mut a_decomp = [_mm_setzero_si128();4];
a_decomp[0] = _mm512_extracti32x4_epi32(a,0);
a_decomp[1] = _mm512_extracti32x4_epi32(a,1);
a_decomp[2] = _mm512_extracti32x4_epi32(a,2);
a_decomp[3] = _mm512_extracti32x4_epi32(a,3);
let mut k_decomp = [_mm_setzero_si128();4];
k_decomp[0] = _mm512_extracti32x4_epi32(k,0);
k_decomp[1] = _mm512_extracti32x4_epi32(k,1);
k_decomp[2] = _mm512_extracti32x4_epi32(k,2);
k_decomp[3] = _mm512_extracti32x4_epi32(k,3);
let r = vectorized(a,k);
let mut e_decomp = [_mm_setzero_si128();4];
for i in 0..4 {
e_decomp[i] = linear(a_decomp[i],k_decomp[i]);
}
assert_eq_m128i(_mm512_extracti32x4_epi32(r,0),e_decomp[0]);
assert_eq_m128i(_mm512_extracti32x4_epi32(r,1),e_decomp[1]);
assert_eq_m128i(_mm512_extracti32x4_epi32(r,2),e_decomp[2]);
assert_eq_m128i(_mm512_extracti32x4_epi32(r,3),e_decomp[3]);
}

#[simd_test(enable = "avx512vaes,avx512f")]
unsafe fn test_mm512_aesdec_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
let a = _mm512_set_epi64(
0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff,
0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff
);
let k = _mm512_set_epi64(
0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee,
0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee
);
let e = _mm512_set_epi64(
0x044e4f5176fec48f, 0xb57ecfa381da39ee,0x044e4f5176fec48f, 0xb57ecfa381da39ee,
0x044e4f5176fec48f, 0xb57ecfa381da39ee,0x044e4f5176fec48f, 0xb57ecfa381da39ee
);
let r = _mm512_aesdec_epi128(a, k);
assert_eq_m512i(r, e);

helper_for_512_avx512vaes(_mm_aesdec_si128,_mm512_aesdec_epi128);
}

#[simd_test(enable = "avx512vaes,avx512f")]
unsafe fn test_mm512_aesdeclast_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
let a = _mm512_set_epi64(
0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff,
0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff
);
let k = _mm512_set_epi64(
0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee,
0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee
);
let e = _mm512_set_epi64(
0x36cad57d9072bf9e, 0xf210dd981fa4a493,0x36cad57d9072bf9e, 0xf210dd981fa4a493,
0x36cad57d9072bf9e, 0xf210dd981fa4a493,0x36cad57d9072bf9e, 0xf210dd981fa4a493
);
let r = _mm512_aesdeclast_epi128(a, k);
assert_eq_m512i(r, e);

helper_for_512_avx512vaes(_mm_aesdeclast_si128,_mm512_aesdeclast_epi128);
}

#[simd_test(enable = "avx512vaes,avx512f")]
unsafe fn test_mm512_aesenc_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
// they are repeated appropriately
let a = _mm512_set_epi64(
0x0123456789abcdef, 0x8899aabbccddeeff, 0x0123456789abcdef, 0x8899aabbccddeeff,
0x0123456789abcdef, 0x8899aabbccddeeff, 0x0123456789abcdef, 0x8899aabbccddeeff
);
let k = _mm512_set_epi64(
0x1133557799bbddff, 0x0022446688aaccee, 0x1133557799bbddff, 0x0022446688aaccee,
0x1133557799bbddff, 0x0022446688aaccee, 0x1133557799bbddff, 0x0022446688aaccee,
);
let e = _mm512_set_epi64(
0x16ab0e57dfc442ed, 0x28e4ee1884504333, 0x16ab0e57dfc442ed, 0x28e4ee1884504333,
0x16ab0e57dfc442ed, 0x28e4ee1884504333, 0x16ab0e57dfc442ed, 0x28e4ee1884504333
);
let r = _mm512_aesenc_epi128(a, k);
assert_eq_m512i(r, e);

helper_for_512_avx512vaes(_mm_aesenc_si128,_mm512_aesenc_epi128);
}

#[simd_test(enable = "avx512vaes,avx512f")]
unsafe fn test_mm512_aesenclast_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
let a = _mm512_set_epi64(
0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff,
0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff,
);
let k = _mm512_set_epi64(
0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee,
0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee
);
let e = _mm512_set_epi64(
0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8,0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8,
0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8,0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8
);
let r = _mm512_aesenclast_epi128(a, k);
assert_eq_m512i(r, e);

helper_for_512_avx512vaes(_mm_aesenclast_si128,_mm512_aesenclast_epi128);
}
}
3 changes: 3 additions & 0 deletions crates/core_arch/src/x86/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,9 @@ pub use self::avx512f::*;
mod avx512ifma;
pub use self::avx512ifma::*;

mod avx512vaes;
pub use self::avx512vaes::*;

mod bt;
pub use self::bt::*;

Expand Down

0 comments on commit 52ccd73

Please sign in to comment.