Skip to content

Commit

Permalink
workaround a miscompilation issue in clang 12 (XCode 13)
Browse files Browse the repository at this point in the history
  • Loading branch information
reneme committed Oct 22, 2021
1 parent cb4c714 commit 0043fe8
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 22 deletions.
46 changes: 35 additions & 11 deletions src/lib/hash/sha3/sha3.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,47 @@
#include <botan/exceptn.h>
#include <botan/cpuid.h>

#include <tuple>

namespace Botan {

namespace {

// This is a workaround for a suspected bug in clang 12 (and XCode 13)
// that caused a miscompile of the SHA3 implementation for optimization
// level -O2 and higher.
//
// For details, see: https://github.com/randombit/botan/issues/2802
#if defined(__clang__) && \
(( defined(__apple_build_version__) && __clang_major__ == 13) || \
(!defined(__apple_build_version__) && __clang_major__ == 12))
#define BOTAN_WORKAROUND_MAYBE_INLINE __attribute__((noinline))
#else
#define BOTAN_WORKAROUND_MAYBE_INLINE inline
#endif

BOTAN_WORKAROUND_MAYBE_INLINE std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t>
xor_CNs(const uint64_t A[25])
{
return std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t>(
A[0] ^ A[5] ^ A[10] ^ A[15] ^ A[20],
A[1] ^ A[6] ^ A[11] ^ A[16] ^ A[21],
A[2] ^ A[7] ^ A[12] ^ A[17] ^ A[22],
A[3] ^ A[8] ^ A[13] ^ A[18] ^ A[23],
A[4] ^ A[9] ^ A[14] ^ A[19] ^ A[24]);
}

#undef BOTAN_WORKAROUND_MAYBE_INLINE

inline void SHA3_round(uint64_t T[25], const uint64_t A[25], uint64_t RC)
{
const uint64_t C0 = A[0] ^ A[5] ^ A[10] ^ A[15] ^ A[20];
const uint64_t C1 = A[1] ^ A[6] ^ A[11] ^ A[16] ^ A[21];
const uint64_t C2 = A[2] ^ A[7] ^ A[12] ^ A[17] ^ A[22];
const uint64_t C3 = A[3] ^ A[8] ^ A[13] ^ A[18] ^ A[23];
const uint64_t C4 = A[4] ^ A[9] ^ A[14] ^ A[19] ^ A[24];

const uint64_t D0 = rotl<1>(C0) ^ C3;
const uint64_t D1 = rotl<1>(C1) ^ C4;
const uint64_t D2 = rotl<1>(C2) ^ C0;
const uint64_t D3 = rotl<1>(C3) ^ C1;
const uint64_t D4 = rotl<1>(C4) ^ C2;
const auto Cs = xor_CNs(A);

const uint64_t D0 = rotl<1>(std::get<0>(Cs)) ^ std::get<3>(Cs);
const uint64_t D1 = rotl<1>(std::get<1>(Cs)) ^ std::get<4>(Cs);
const uint64_t D2 = rotl<1>(std::get<2>(Cs)) ^ std::get<0>(Cs);
const uint64_t D3 = rotl<1>(std::get<3>(Cs)) ^ std::get<1>(Cs);
const uint64_t D4 = rotl<1>(std::get<4>(Cs)) ^ std::get<2>(Cs);

const uint64_t B00 = A[ 0] ^ D1;
const uint64_t B01 = rotl<44>(A[ 6] ^ D2);
Expand Down
46 changes: 35 additions & 11 deletions src/lib/hash/sha3/sha3_bmi2/sha3_bmi2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,47 @@
#include <botan/sha3.h>
#include <botan/rotate.h>

#include <tuple>

namespace Botan {

namespace {

// This is a workaround for a suspected bug in clang 12 (and XCode 13)
// that caused a miscompile of the SHA3 implementation for optimization
// level -O2 and higher.
//
// For details, see: https://github.com/randombit/botan/issues/2802
#if defined(__clang__) && \
(( defined(__apple_build_version__) && __clang_major__ == 13) || \
(!defined(__apple_build_version__) && __clang_major__ == 12))
#define BOTAN_WORKAROUND_MAYBE_INLINE __attribute__((noinline))
#else
#define BOTAN_WORKAROUND_MAYBE_INLINE inline
#endif

BOTAN_WORKAROUND_MAYBE_INLINE std::tuple<uint64_t, uint64_t, uint64_t, uint64_t, uint64_t>
xor_CNs(const uint64_t A[25])
{
return {
A[0] ^ A[5] ^ A[10] ^ A[15] ^ A[20],
A[1] ^ A[6] ^ A[11] ^ A[16] ^ A[21],
A[2] ^ A[7] ^ A[12] ^ A[17] ^ A[22],
A[3] ^ A[8] ^ A[13] ^ A[18] ^ A[23],
A[4] ^ A[9] ^ A[14] ^ A[19] ^ A[24]};
}

#undef BOTAN_WORKAROUND_MAYBE_INLINE

inline void SHA3_BMI2_round(uint64_t T[25], const uint64_t A[25], uint64_t RC)
{
const uint64_t C0 = A[0] ^ A[5] ^ A[10] ^ A[15] ^ A[20];
const uint64_t C1 = A[1] ^ A[6] ^ A[11] ^ A[16] ^ A[21];
const uint64_t C2 = A[2] ^ A[7] ^ A[12] ^ A[17] ^ A[22];
const uint64_t C3 = A[3] ^ A[8] ^ A[13] ^ A[18] ^ A[23];
const uint64_t C4 = A[4] ^ A[9] ^ A[14] ^ A[19] ^ A[24];

const uint64_t D0 = rotl<1>(C0) ^ C3;
const uint64_t D1 = rotl<1>(C1) ^ C4;
const uint64_t D2 = rotl<1>(C2) ^ C0;
const uint64_t D3 = rotl<1>(C3) ^ C1;
const uint64_t D4 = rotl<1>(C4) ^ C2;
const auto Cs = xor_CNs(A);

const uint64_t D0 = rotl<1>(std::get<0>(Cs)) ^ std::get<3>(Cs);
const uint64_t D1 = rotl<1>(std::get<1>(Cs)) ^ std::get<4>(Cs);
const uint64_t D2 = rotl<1>(std::get<2>(Cs)) ^ std::get<0>(Cs);
const uint64_t D3 = rotl<1>(std::get<3>(Cs)) ^ std::get<1>(Cs);
const uint64_t D4 = rotl<1>(std::get<4>(Cs)) ^ std::get<2>(Cs);

const uint64_t B00 = A[ 0] ^ D1;
const uint64_t B01 = rotl<44>(A[ 6] ^ D2);
Expand Down

0 comments on commit 0043fe8

Please sign in to comment.