-
Notifications
You must be signed in to change notification settings - Fork 275
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This adds support for the x86-64 VAES (vectorized AES) intrinsics. It adds in both the full-width and the VL'ed 256-bit versions and ensures via tests that they match the same test vectors as the AES-NI versions and that they match (though sadly w/o quickcheck validation for now)
- Loading branch information
Showing
2 changed files
with
331 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,328 @@ | ||
//! Vectorized AES Instructions (VAES) | ||
//! | ||
//! The intrinsics here correspond to those in the `immintrin.h` C header. | ||
//! | ||
//! The reference is [Intel 64 and IA-32 Architectures Software Developer's | ||
//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. | ||
//! | ||
//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf | ||
use crate::core_arch::x86::__m512i; | ||
use crate::core_arch::x86::__m256i; | ||
|
||
#[cfg(test)] | ||
use stdarch_test::assert_instr; | ||
|
||
#[allow(improper_ctypes)] | ||
extern "C" { | ||
#[link_name = "llvm.x86.aesni.aesenc.256"] | ||
fn aesenc_256(a: __m256i, round_key: __m256i) -> __m256i; | ||
#[link_name = "llvm.x86.aesni.aesenclast.256"] | ||
fn aesenclast_256(a: __m256i, round_key: __m256i) -> __m256i; | ||
#[link_name = "llvm.x86.aesni.aesdec.256"] | ||
fn aesdec_256(a: __m256i, round_key: __m256i) -> __m256i; | ||
#[link_name = "llvm.x86.aesni.aesdeclast.256"] | ||
fn aesdeclast_256(a: __m256i, round_key: __m256i) -> __m256i; | ||
#[link_name = "llvm.x86.aesni.aesenc.512"] | ||
fn aesenc_512(a: __m512i, round_key: __m512i) -> __m512i; | ||
#[link_name = "llvm.x86.aesni.aesenclast.512"] | ||
fn aesenclast_512(a: __m512i, round_key: __m512i) -> __m512i; | ||
#[link_name = "llvm.x86.aesni.aesdec.512"] | ||
fn aesdec_512(a: __m512i, round_key: __m512i) -> __m512i; | ||
#[link_name = "llvm.x86.aesni.aesdeclast.512"] | ||
fn aesdeclast_512(a: __m512i, round_key: __m512i) -> __m512i; | ||
} | ||
|
||
/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using | ||
/// the corresponding 128-bit word (key) in `round_key`. | ||
/// | ||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenc_epi128) | ||
#[inline] | ||
#[target_feature(enable = "avx512vaes,avx512vl")] | ||
#[cfg_attr(test, assert_instr(vaesenc))] | ||
pub unsafe fn _mm256_aesenc_epi128(a: __m256i, round_key: __m256i) -> __m256i { | ||
aesenc_256(a, round_key) | ||
} | ||
|
||
/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using | ||
/// the corresponding 128-bit word (key) in `round_key`. | ||
/// | ||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenclast_epi128) | ||
#[inline] | ||
#[target_feature(enable = "avx512vaes,avx512vl")] | ||
#[cfg_attr(test, assert_instr(vaesenclast))] | ||
pub unsafe fn _mm256_aesenclast_epi128(a: __m256i, round_key: __m256i) -> __m256i { | ||
aesenclast_256(a, round_key) | ||
} | ||
|
||
/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using | ||
/// the corresponding 128-bit word (key) in `round_key`. | ||
/// | ||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdec_epi128) | ||
#[inline] | ||
#[target_feature(enable = "avx512vaes,avx512vl")] | ||
#[cfg_attr(test, assert_instr(vaesdec))] | ||
pub unsafe fn _mm256_aesdec_epi128(a: __m256i, round_key: __m256i) -> __m256i { | ||
aesdec_256(a, round_key) | ||
} | ||
|
||
/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using | ||
/// the corresponding 128-bit word (key) in `round_key`. | ||
/// | ||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdeclast_epi128) | ||
#[inline] | ||
#[target_feature(enable = "avx512vaes,avx512vl")] | ||
#[cfg_attr(test, assert_instr(vaesdeclast))] | ||
pub unsafe fn _mm256_aesdeclast_epi128(a: __m256i, round_key: __m256i) -> __m256i { | ||
aesdeclast_256(a, round_key) | ||
} | ||
|
||
|
||
/// Performs one round of an AES encryption flow on each 128-bit word (state) in `a` using | ||
/// the corresponding 128-bit word (key) in `round_key`. | ||
/// | ||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenc_epi128) | ||
#[inline] | ||
#[target_feature(enable = "avx512vaes,avx512f")] | ||
#[cfg_attr(test, assert_instr(vaesenc))] | ||
pub unsafe fn _mm512_aesenc_epi128(a: __m512i, round_key: __m512i) -> __m512i { | ||
aesenc_512(a, round_key) | ||
} | ||
|
||
/// Performs the last round of an AES encryption flow on each 128-bit word (state) in `a` using | ||
/// the corresponding 128-bit word (key) in `round_key`. | ||
/// | ||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenclast_epi128) | ||
#[inline] | ||
#[target_feature(enable = "avx512vaes,avx512f")] | ||
#[cfg_attr(test, assert_instr(vaesenclast))] | ||
pub unsafe fn _mm512_aesenclast_epi128(a: __m512i, round_key: __m512i) -> __m512i { | ||
aesenclast_512(a, round_key) | ||
} | ||
|
||
/// Performs one round of an AES decryption flow on each 128-bit word (state) in `a` using | ||
/// the corresponding 128-bit word (key) in `round_key`. | ||
/// | ||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdec_epi128) | ||
#[inline] | ||
#[target_feature(enable = "avx512vaes,avx512f")] | ||
#[cfg_attr(test, assert_instr(vaesdec))] | ||
pub unsafe fn _mm512_aesdec_epi128(a: __m512i, round_key: __m512i) -> __m512i { | ||
aesdec_512(a, round_key) | ||
} | ||
|
||
/// Performs the last round of an AES decryption flow on each 128-bit word (state) in `a` using | ||
/// the corresponding 128-bit word (key) in `round_key`. | ||
/// | ||
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdeclast_epi128) | ||
#[inline] | ||
#[target_feature(enable = "avx512vaes,avx512f")] | ||
#[cfg_attr(test, assert_instr(vaesdeclast))] | ||
pub unsafe fn _mm512_aesdeclast_epi128(a: __m512i, round_key: __m512i) -> __m512i { | ||
aesdeclast_512(a, round_key) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
// The constants in the tests below are just bit patterns. They should not | ||
// be interpreted as integers; signedness does not make sense for them, but | ||
// __mXXXi happens to be defined in terms of signed integers. | ||
#![allow(overflowing_literals)] | ||
|
||
use stdarch_test::simd_test; | ||
|
||
use crate::core_arch::x86::*; | ||
|
||
// the first parts of these tests are straight ports from the AES-NI tests | ||
// the second parts directly compare the two, for inputs that are different across lanes | ||
// and "more random" than the standard test vectors | ||
// ideally we'd be using quickcheck here instead | ||
|
||
#[target_feature(enable = "avx512vaes,avx512vl")] | ||
unsafe fn helper_for_256_avx512vaes(linear : unsafe fn(__m128i,__m128i)->__m128i, vectorized : unsafe fn(__m256i,__m256i)->__m256i) { | ||
let a = _mm256_set_epi64x( | ||
0xDCB4DB3657BF0B7D, 0x18DB0601068EDD9F, 0xB76B908233200DC5, 0xE478235FA8E22D5E | ||
); | ||
let k = _mm256_set_epi64x( | ||
0x672F6F105A94CEA7, 0x8298B8FFCA5F829C, 0xA3927047B3FB61D8, 0x978093862CDE7187 | ||
); | ||
let mut a_decomp = [_mm_setzero_si128();2]; | ||
a_decomp[0] = _mm256_extracti128_si256(a,0); | ||
a_decomp[1] = _mm256_extracti128_si256(a,1); | ||
let mut k_decomp = [_mm_setzero_si128();2]; | ||
k_decomp[0] = _mm256_extracti128_si256(k,0); | ||
k_decomp[1] = _mm256_extracti128_si256(k,1); | ||
let r = vectorized(a,k); | ||
let mut e_decomp = [_mm_setzero_si128();2]; | ||
for i in 0..2 { | ||
e_decomp[i] = linear(a_decomp[i],k_decomp[i]); | ||
} | ||
assert_eq_m128i(_mm256_extracti128_si256(r,0),e_decomp[0]); | ||
assert_eq_m128i(_mm256_extracti128_si256(r,1),e_decomp[1]); | ||
} | ||
|
||
#[simd_test(enable = "avx512vaes,avx512vl")] | ||
unsafe fn test_mm256_aesdec_epi128() { | ||
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx. | ||
let a = _mm256_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff); | ||
let k = _mm256_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee); | ||
let e = _mm256_set_epi64x(0x044e4f5176fec48f, 0xb57ecfa381da39ee,0x044e4f5176fec48f, 0xb57ecfa381da39ee); | ||
let r = _mm256_aesdec_epi128(a, k); | ||
assert_eq_m256i(r, e); | ||
|
||
helper_for_256_avx512vaes(_mm_aesdec_si128,_mm256_aesdec_epi128); | ||
} | ||
|
||
#[simd_test(enable = "avx512vaes,avx512vl")] | ||
unsafe fn test_mm256_aesdeclast_epi128() { | ||
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx. | ||
let a = _mm256_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff); | ||
let k = _mm256_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee); | ||
let e = _mm256_set_epi64x(0x36cad57d9072bf9e, 0xf210dd981fa4a493,0x36cad57d9072bf9e, 0xf210dd981fa4a493); | ||
let r = _mm256_aesdeclast_epi128(a, k); | ||
assert_eq_m256i(r, e); | ||
|
||
helper_for_256_avx512vaes(_mm_aesdeclast_si128,_mm256_aesdeclast_epi128); | ||
} | ||
|
||
#[simd_test(enable = "avx512vaes,avx512vl")] | ||
unsafe fn test_mm256_aesenc_epi128() { | ||
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx. | ||
// they are repeated appropriately | ||
let a = _mm256_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff, 0x0123456789abcdef, 0x8899aabbccddeeff); | ||
let k = _mm256_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee, 0x1133557799bbddff, 0x0022446688aaccee); | ||
let e = _mm256_set_epi64x(0x16ab0e57dfc442ed, 0x28e4ee1884504333, 0x16ab0e57dfc442ed, 0x28e4ee1884504333); | ||
let r = _mm256_aesenc_epi128(a, k); | ||
assert_eq_m256i(r, e); | ||
|
||
helper_for_256_avx512vaes(_mm_aesenc_si128,_mm256_aesenc_epi128); | ||
} | ||
|
||
#[simd_test(enable = "avx512vaes,avx512vl")] | ||
unsafe fn test_mm256_aesenclast_epi128() { | ||
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx. | ||
let a = _mm256_set_epi64x(0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff); | ||
let k = _mm256_set_epi64x(0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee); | ||
let e = _mm256_set_epi64x(0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8,0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8); | ||
let r = _mm256_aesenclast_epi128(a, k); | ||
assert_eq_m256i(r, e); | ||
|
||
helper_for_256_avx512vaes(_mm_aesenclast_si128,_mm256_aesenclast_epi128); | ||
} | ||
|
||
#[target_feature(enable = "avx512vaes,avx512f")] | ||
unsafe fn helper_for_512_avx512vaes(linear : unsafe fn(__m128i,__m128i)->__m128i, vectorized : unsafe fn(__m512i,__m512i)->__m512i) { | ||
let a = _mm512_set_epi64( | ||
0xDCB4DB3657BF0B7D, 0x18DB0601068EDD9F, 0xB76B908233200DC5, 0xE478235FA8E22D5E, | ||
0xAB05CFFA2621154C, 0x1171B47A186174C9, 0x8C6B6C0E7595CEC9, 0xBE3E7D4934E961BD | ||
); | ||
let k = _mm512_set_epi64( | ||
0x672F6F105A94CEA7, 0x8298B8FFCA5F829C, 0xA3927047B3FB61D8, 0x978093862CDE7187, | ||
0xB1927AB22F31D0EC, 0xA9A5DA619BE4D7AF, 0xCA2590F56884FDC6, 0x19BE9F660038BDB5 | ||
); | ||
let mut a_decomp = [_mm_setzero_si128();4]; | ||
a_decomp[0] = _mm512_extracti32x4_epi32(a,0); | ||
a_decomp[1] = _mm512_extracti32x4_epi32(a,1); | ||
a_decomp[2] = _mm512_extracti32x4_epi32(a,2); | ||
a_decomp[3] = _mm512_extracti32x4_epi32(a,3); | ||
let mut k_decomp = [_mm_setzero_si128();4]; | ||
k_decomp[0] = _mm512_extracti32x4_epi32(k,0); | ||
k_decomp[1] = _mm512_extracti32x4_epi32(k,1); | ||
k_decomp[2] = _mm512_extracti32x4_epi32(k,2); | ||
k_decomp[3] = _mm512_extracti32x4_epi32(k,3); | ||
let r = vectorized(a,k); | ||
let mut e_decomp = [_mm_setzero_si128();4]; | ||
for i in 0..4 { | ||
e_decomp[i] = linear(a_decomp[i],k_decomp[i]); | ||
} | ||
assert_eq_m128i(_mm512_extracti32x4_epi32(r,0),e_decomp[0]); | ||
assert_eq_m128i(_mm512_extracti32x4_epi32(r,1),e_decomp[1]); | ||
assert_eq_m128i(_mm512_extracti32x4_epi32(r,2),e_decomp[2]); | ||
assert_eq_m128i(_mm512_extracti32x4_epi32(r,3),e_decomp[3]); | ||
} | ||
|
||
#[simd_test(enable = "avx512vaes,avx512f")] | ||
unsafe fn test_mm512_aesdec_epi128() { | ||
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx. | ||
let a = _mm512_set_epi64( | ||
0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff, | ||
0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff | ||
); | ||
let k = _mm512_set_epi64( | ||
0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee, | ||
0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee | ||
); | ||
let e = _mm512_set_epi64( | ||
0x044e4f5176fec48f, 0xb57ecfa381da39ee,0x044e4f5176fec48f, 0xb57ecfa381da39ee, | ||
0x044e4f5176fec48f, 0xb57ecfa381da39ee,0x044e4f5176fec48f, 0xb57ecfa381da39ee | ||
); | ||
let r = _mm512_aesdec_epi128(a, k); | ||
assert_eq_m512i(r, e); | ||
|
||
helper_for_512_avx512vaes(_mm_aesdec_si128,_mm512_aesdec_epi128); | ||
} | ||
|
||
#[simd_test(enable = "avx512vaes,avx512f")] | ||
unsafe fn test_mm512_aesdeclast_epi128() { | ||
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx. | ||
let a = _mm512_set_epi64( | ||
0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff, | ||
0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff | ||
); | ||
let k = _mm512_set_epi64( | ||
0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee, | ||
0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee | ||
); | ||
let e = _mm512_set_epi64( | ||
0x36cad57d9072bf9e, 0xf210dd981fa4a493,0x36cad57d9072bf9e, 0xf210dd981fa4a493, | ||
0x36cad57d9072bf9e, 0xf210dd981fa4a493,0x36cad57d9072bf9e, 0xf210dd981fa4a493 | ||
); | ||
let r = _mm512_aesdeclast_epi128(a, k); | ||
assert_eq_m512i(r, e); | ||
|
||
helper_for_512_avx512vaes(_mm_aesdeclast_si128,_mm512_aesdeclast_epi128); | ||
} | ||
|
||
#[simd_test(enable = "avx512vaes,avx512f")] | ||
unsafe fn test_mm512_aesenc_epi128() { | ||
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx. | ||
// they are repeated appropriately | ||
let a = _mm512_set_epi64( | ||
0x0123456789abcdef, 0x8899aabbccddeeff, 0x0123456789abcdef, 0x8899aabbccddeeff, | ||
0x0123456789abcdef, 0x8899aabbccddeeff, 0x0123456789abcdef, 0x8899aabbccddeeff | ||
); | ||
let k = _mm512_set_epi64( | ||
0x1133557799bbddff, 0x0022446688aaccee, 0x1133557799bbddff, 0x0022446688aaccee, | ||
0x1133557799bbddff, 0x0022446688aaccee, 0x1133557799bbddff, 0x0022446688aaccee, | ||
); | ||
let e = _mm512_set_epi64( | ||
0x16ab0e57dfc442ed, 0x28e4ee1884504333, 0x16ab0e57dfc442ed, 0x28e4ee1884504333, | ||
0x16ab0e57dfc442ed, 0x28e4ee1884504333, 0x16ab0e57dfc442ed, 0x28e4ee1884504333 | ||
); | ||
let r = _mm512_aesenc_epi128(a, k); | ||
assert_eq_m512i(r, e); | ||
|
||
helper_for_512_avx512vaes(_mm_aesenc_si128,_mm512_aesenc_epi128); | ||
} | ||
|
||
#[simd_test(enable = "avx512vaes,avx512f")] | ||
unsafe fn test_mm512_aesenclast_epi128() { | ||
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx. | ||
let a = _mm512_set_epi64( | ||
0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff, | ||
0x0123456789abcdef, 0x8899aabbccddeeff,0x0123456789abcdef, 0x8899aabbccddeeff, | ||
); | ||
let k = _mm512_set_epi64( | ||
0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee, | ||
0x1133557799bbddff, 0x0022446688aaccee,0x1133557799bbddff, 0x0022446688aaccee | ||
); | ||
let e = _mm512_set_epi64( | ||
0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8,0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8, | ||
0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8,0xb6dd7df25d7ab320, 0x4b04f98cf4c860f8 | ||
); | ||
let r = _mm512_aesenclast_epi128(a, k); | ||
assert_eq_m512i(r, e); | ||
|
||
helper_for_512_avx512vaes(_mm_aesenclast_si128,_mm512_aesenclast_epi128); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters