From a8b1aeffa4ea9c84a278a65892033db2a5fe149f Mon Sep 17 00:00:00 2001 From: Charles Barto Date: Mon, 27 Jun 2022 15:49:08 -0700 Subject: [PATCH 01/17] Move Atomic_store (and atomic_load32) to xatomic.h --- stl/inc/xatomic.h | 165 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) diff --git a/stl/inc/xatomic.h b/stl/inc/xatomic.h index d0472d7ed4..40e0c5930c 100644 --- a/stl/inc/xatomic.h +++ b/stl/inc/xatomic.h @@ -50,6 +50,171 @@ _STL_DISABLE_CLANG_WARNINGS #define _MT_INCR(x) _INTRIN_RELAXED(_InterlockedIncrement)(reinterpret_cast(&x)) #define _MT_DECR(x) _INTRIN_ACQ_REL(_InterlockedDecrement)(reinterpret_cast(&x)) +#ifndef _INVALID_MEMORY_ORDER +#ifdef _DEBUG +#define _INVALID_MEMORY_ORDER _STL_REPORT_ERROR("Invalid memory order") +#else // ^^^ _DEBUG / !_DEBUG vvv +#define _INVALID_MEMORY_ORDER +#endif // _DEBUG +#endif // _INVALID_MEMORY_ORDER + +#define _Compiler_barrier() _STL_DISABLE_DEPRECATED_WARNING _ReadWriteBarrier() _STL_RESTORE_DEPRECATED_WARNING + +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) +#define _Memory_barrier() __dmb(0xB) // inner shared data memory barrier +#define _Compiler_or_memory_barrier() _Memory_barrier() +#elif defined(_M_IX86) || defined(_M_X64) +// x86/x64 hardware only emits memory barriers inside _Interlocked intrinsics +#define _Compiler_or_memory_barrier() _Compiler_barrier() +#else // ^^^ x86/x64 / unsupported hardware vvv +#error Unsupported hardware +#endif // hardware + +_EXTERN_C +enum { + _Atomic_memory_order_relaxed, + _Atomic_memory_order_consume, + _Atomic_memory_order_acquire, + _Atomic_memory_order_release, + _Atomic_memory_order_acq_rel, + _Atomic_memory_order_seq_cst, +}; + +inline void _Atomic_store8(volatile char* _Ptr, char _Desired, int _Order) { + switch (_Order) { + case _Atomic_memory_order_relaxed: + __iso_volatile_store8(_Ptr, _Desired); + return; + case _Atomic_memory_order_release: + _Compiler_or_memory_barrier(); + __iso_volatile_store8(_Ptr, _Desired); + return; + default: + case _Atomic_memory_order_consume: + case _Atomic_memory_order_acquire: + case _Atomic_memory_order_acq_rel: + _INVALID_MEMORY_ORDER; + // [[fallthrough]]; + case _Atomic_memory_order_seq_cst: +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) + _Memory_barrier(); + __iso_volatile_store8(_Ptr, _Desired); + _Memory_barrier(); +#else // ^^^ ARM32/ARM64/ARM64EC hardware / x86/x64 hardware vvv + (void) _InterlockedExchange8(_Ptr, _Desired); +#endif // hardware + return; + } +} + +inline void _Atomic_store16(volatile short* _Ptr, short _Desired, int _Order) { + switch (_Order) { + case _Atomic_memory_order_relaxed: + __iso_volatile_store16(_Ptr, _Desired); + return; + case _Atomic_memory_order_release: + _Compiler_or_memory_barrier(); + __iso_volatile_store16(_Ptr, _Desired); + return; + default: + case _Atomic_memory_order_consume: + case _Atomic_memory_order_acquire: + case _Atomic_memory_order_acq_rel: + _INVALID_MEMORY_ORDER; + // [[fallthrough]]; + case _Atomic_memory_order_seq_cst: +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) + _Memory_barrier(); + __iso_volatile_store16(_Ptr, _Desired); + _Memory_barrier(); +#else // ^^^ ARM32/ARM64/ARM64EC hardware / x86/x64 hardware vvv + (void) _InterlockedExchange16(_Ptr, _Desired); +#endif // hardware + return; + } +} + +inline void _Atomic_store32(volatile int* _Ptr, int _Desired, int _Order) { + switch (_Order) { + case _Atomic_memory_order_relaxed: + __iso_volatile_store32(_Ptr, _Desired); + return; + case _Atomic_memory_order_release: + _Compiler_or_memory_barrier(); + __iso_volatile_store32(_Ptr, _Desired); + return; + default: + case _Atomic_memory_order_consume: + case _Atomic_memory_order_acquire: + case _Atomic_memory_order_acq_rel: + _INVALID_MEMORY_ORDER; + // [[fallthrough]]; + case _Atomic_memory_order_seq_cst: +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) + _Memory_barrier(); + __iso_volatile_store32(_Ptr, _Desired); + _Memory_barrier(); +#else // ^^^ ARM32/ARM64/ARM64EC hardware / x86/x64 hardware vvv + (void) _InterlockedExchange(reinterpret_cast(_Ptr), static_cast(_Desired)); +#endif // hardware + return; + } +} + +inline void _Atomic_store64(volatile long long* _Ptr, long long _Desired, int _Order) { + switch (_Order) { + case _Atomic_memory_order_relaxed: + __iso_volatile_store64(_Ptr, _Desired); + return; + case _Atomic_memory_order_release: + _Compiler_or_memory_barrier(); + __iso_volatile_store64(_Ptr, _Desired); + return; + default: + case _Atomic_memory_order_consume: + case _Atomic_memory_order_acquire: + case _Atomic_memory_order_acq_rel: + _INVALID_MEMORY_ORDER; + // [[fallthrough]]; + case _Atomic_memory_order_seq_cst: +#if defined(_M_IX86) + _Compiler_barrier(); + __iso_volatile_store64(_Ptr, _Desired); + _STD atomic_thread_fence(memory_order_seq_cst); +#elif defined(_M_ARM64) || defined(_M_ARM64EC) + _Memory_barrier(); + __iso_volatile_store64(_Ptr, _Desired); + _Memory_barrier(); +#else // ^^^ _M_ARM64, _M_ARM64EC / ARM32, x64 vvv + (void) _InterlockedExchange64(_Ptr, _Desired); +#endif // ^^^ ARM32, x64 ^^^ + return; + } +} + +inline int _Atomic_load32(const volatile int* _Ptr, int _Order) { + int _As_bytes = __iso_volatile_load32(_Ptr); + switch (_Order) { + case _Atomic_memory_order_relaxed: + break; + case _Atomic_memory_order_consume: + case _Atomic_memory_order_acquire: + case _Atomic_memory_order_seq_cst: + _Compiler_or_memory_barrier(); + // load barrier + break; + case _Atomic_memory_order_release: + case _Atomic_memory_order_acq_rel: + default: + _INVALID_MEMORY_ORDER; + break; + } + return _As_bytes; +} + + +_END_EXTERN_C + _STD_BEGIN #if _HAS_CXX20 From ba8a1cdcec0d794c7f03bdda0697d3acd39cd3ff Mon Sep 17 00:00:00 2001 From: Charles Barto Date: Mon, 27 Jun 2022 17:46:31 -0700 Subject: [PATCH 02/17] Remove Invalid memory order and the compiler barrier machinery from (they are now in xatomic.h) --- stl/inc/atomic | 184 +++++++------------------------------------------ 1 file changed, 26 insertions(+), 158 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index f4761a13c1..aafe62dae3 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -29,26 +29,6 @@ _STL_DISABLE_CLANG_WARNINGS #pragma push_macro("new") #undef new -#define _Compiler_barrier() _STL_DISABLE_DEPRECATED_WARNING _ReadWriteBarrier() _STL_RESTORE_DEPRECATED_WARNING - -#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) -#define _Memory_barrier() __dmb(0xB) // inner shared data memory barrier -#define _Compiler_or_memory_barrier() _Memory_barrier() -#elif defined(_M_IX86) || defined(_M_X64) -// x86/x64 hardware only emits memory barriers inside _Interlocked intrinsics -#define _Compiler_or_memory_barrier() _Compiler_barrier() -#else // ^^^ x86/x64 / unsupported hardware vvv -#error Unsupported hardware -#endif // hardware - -#ifndef _INVALID_MEMORY_ORDER -#ifdef _DEBUG -#define _INVALID_MEMORY_ORDER _STL_REPORT_ERROR("Invalid memory order") -#else // ^^^ _DEBUG / !_DEBUG vvv -#define _INVALID_MEMORY_ORDER -#endif // _DEBUG -#endif // _INVALID_MEMORY_ORDER - #ifdef _WIN64 #if _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 #define _STD_COMPARE_EXCHANGE_128 _InterlockedCompareExchange128 @@ -302,23 +282,23 @@ struct _Atomic_padded { #else // ^^^ don't break ABI / break ABI vvv template struct _Atomic_storage_traits { // properties for how _Ty is stored in an atomic - static constexpr size_t _Storage_size = sizeof(_Ty) == 1 ? 1 - : sizeof(_Ty) == 2 ? 2 - : sizeof(_Ty) <= 4 ? 4 - : sizeof(_Ty) <= 8 ? 8 + static constexpr size_t _Storage_size = sizeof(_Ty) == 1 ? 1 + : sizeof(_Ty) == 2 ? 2 + : sizeof(_Ty) <= 4 ? 4 + : sizeof(_Ty) <= 8 ? 8 #if defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM64EC) : sizeof(_Ty) <= 16 ? 16 #endif // 64 bits : sizeof(_Ty); static constexpr size_t _Padding_size = _Storage_size - sizeof(_Ty); - static constexpr bool _Uses_padding = _Padding_size != 0; + static constexpr bool _Uses_padding = _Padding_size != 0; }; template struct _Atomic_storage_traits<_Ty&> { // properties for how _Ty is stored in an atomic_ref static constexpr size_t _Storage_size = sizeof(_Ty); - static constexpr bool _Uses_padding = false; + static constexpr bool _Uses_padding = false; }; template ::_Uses_padding> @@ -629,49 +609,24 @@ struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics void store(const _TVal _Value) noexcept { // store with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); const char _As_bytes = _Atomic_reinterpret_as(_Value); -#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) - _Memory_barrier(); - __iso_volatile_store8(_Mem, _As_bytes); - _Memory_barrier(); -#else // ^^^ ARM32/ARM64/ARM64EC hardware / x86/x64 hardware vvv - (void) _InterlockedExchange8(_Mem, _As_bytes); -#endif // hardware + _Atomic_store8(_Mem, _As_bytes, _Atomic_memory_order_seq_cst); } void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order const auto _Mem = _Atomic_address_as(_Storage); const char _As_bytes = _Atomic_reinterpret_as(_Value); - switch (_Order) { - case memory_order_relaxed: - __iso_volatile_store8(_Mem, _As_bytes); - return; - case memory_order_release: - _Compiler_or_memory_barrier(); - __iso_volatile_store8(_Mem, _As_bytes); - return; - default: - case memory_order_consume: - case memory_order_acquire: - case memory_order_acq_rel: - _INVALID_MEMORY_ORDER; - // [[fallthrough]]; - case memory_order_seq_cst: - store(_Value); - return; - } + _Atomic_store8(_Mem, _As_bytes, static_cast(_Order)); } _NODISCARD _TVal load() const noexcept { // load with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); - char _As_bytes = __iso_volatile_load8(_Mem); - _Compiler_or_memory_barrier(); + char _As_bytes = _Atomic_load8(_Mem, _Atomic_memory_order_seq_cst); return reinterpret_cast<_TVal&>(_As_bytes); } _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order const auto _Mem = _Atomic_address_as(_Storage); - char _As_bytes = __iso_volatile_load8(_Mem); - _Load_barrier(_Order); + char _As_bytes = _Atomic_load8(_Mem, static_cast(_Order)); return reinterpret_cast<_TVal&>(_As_bytes); } @@ -750,49 +705,24 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics void store(const _TVal _Value) noexcept { // store with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); const short _As_bytes = _Atomic_reinterpret_as(_Value); -#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) - _Memory_barrier(); - __iso_volatile_store16(_Mem, _As_bytes); - _Memory_barrier(); -#else // ^^^ ARM32/ARM64/ARM64EC hardware / x86/x64 hardware vvv - (void) _InterlockedExchange16(_Mem, _As_bytes); -#endif // hardware + _Atomic_store16(_Mem, _As_bytes, _Atomic_memory_order_seq_cst); } void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order const auto _Mem = _Atomic_address_as(_Storage); const short _As_bytes = _Atomic_reinterpret_as(_Value); - switch (_Order) { - case memory_order_relaxed: - __iso_volatile_store16(_Mem, _As_bytes); - return; - case memory_order_release: - _Compiler_or_memory_barrier(); - __iso_volatile_store16(_Mem, _As_bytes); - return; - default: - case memory_order_consume: - case memory_order_acquire: - case memory_order_acq_rel: - _INVALID_MEMORY_ORDER; - // [[fallthrough]]; - case memory_order_seq_cst: - store(_Value); - return; - } + _Atomic_store16(_Mem, _As_bytes, static_cast(_Order)); } _NODISCARD _TVal load() const noexcept { // load with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); - short _As_bytes = __iso_volatile_load16(_Mem); - _Compiler_or_memory_barrier(); + short _As_bytes = _Atomic_load16(_Mem, _Atomic_memory_order_seq_cst); return reinterpret_cast<_TVal&>(_As_bytes); } _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order const auto _Mem = _Atomic_address_as(_Storage); - short _As_bytes = __iso_volatile_load16(_Mem); - _Load_barrier(_Order); + short _As_bytes = _Atomic_load16(_Mem, static_cast(_Order)); return reinterpret_cast<_TVal&>(_As_bytes); } @@ -868,49 +798,26 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics } void store(const _TVal _Value) noexcept { // store with sequential consistency -#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) - _Memory_barrier(); - __iso_volatile_store32(_Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Value)); - _Memory_barrier(); -#else // ^^^ ARM32/ARM64/ARM64EC hardware / x86/x64 hardware vvv - (void) _InterlockedExchange(_Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Value)); -#endif // hardware + const auto _Mem = _Atomic_address_as(_Storage); + const int _As_bytes = _Atomic_reinterpret_as(_Value); + _Atomic_store32(_Mem, _As_bytes, _Atomic_memory_order_seq_cst); } void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order const auto _Mem = _Atomic_address_as(_Storage); const int _As_bytes = _Atomic_reinterpret_as(_Value); - switch (_Order) { - case memory_order_relaxed: - __iso_volatile_store32(_Mem, _As_bytes); - return; - case memory_order_release: - _Compiler_or_memory_barrier(); - __iso_volatile_store32(_Mem, _As_bytes); - return; - default: - case memory_order_consume: - case memory_order_acquire: - case memory_order_acq_rel: - _INVALID_MEMORY_ORDER; - // [[fallthrough]]; - case memory_order_seq_cst: - store(_Value); - return; - } + _Atomic_store32(_Mem, _As_bytes, static_cast(_Order)); } _NODISCARD _TVal load() const noexcept { // load with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); - auto _As_bytes = __iso_volatile_load32(_Mem); - _Compiler_or_memory_barrier(); + int _As_bytes = _Atomic_load32(_Mem, _Atomic_memory_order_seq_cst); return reinterpret_cast<_TVal&>(_As_bytes); } _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order const auto _Mem = _Atomic_address_as(_Storage); - auto _As_bytes = __iso_volatile_load32(_Mem); - _Load_barrier(_Order); + int _As_bytes = _Atomic_load32(_Mem, static_cast(_Order)); return reinterpret_cast<_TVal&>(_As_bytes); } @@ -988,63 +895,24 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics void store(const _TVal _Value) noexcept { // store with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); const long long _As_bytes = _Atomic_reinterpret_as(_Value); -#if defined(_M_IX86) - _Compiler_barrier(); - __iso_volatile_store64(_Mem, _As_bytes); - _STD atomic_thread_fence(memory_order_seq_cst); -#elif defined(_M_ARM64) || defined(_M_ARM64EC) - _Memory_barrier(); - __iso_volatile_store64(_Mem, _As_bytes); - _Memory_barrier(); -#else // ^^^ _M_ARM64, _M_ARM64EC / ARM32, x64 vvv - (void) _InterlockedExchange64(_Mem, _As_bytes); -#endif // ^^^ ARM32, x64 ^^^ + _Atomic_store64(_Mem, _As_bytes, _Atomic_memory_order_seq_cst); } void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order const auto _Mem = _Atomic_address_as(_Storage); const long long _As_bytes = _Atomic_reinterpret_as(_Value); - switch (_Order) { - case memory_order_relaxed: - __iso_volatile_store64(_Mem, _As_bytes); - return; - case memory_order_release: - _Compiler_or_memory_barrier(); - __iso_volatile_store64(_Mem, _As_bytes); - return; - default: - case memory_order_consume: - case memory_order_acquire: - case memory_order_acq_rel: - _INVALID_MEMORY_ORDER; - // [[fallthrough]]; - case memory_order_seq_cst: - store(_Value); - return; - } + _Atomic_store64(_Mem, _As_bytes, static_cast(_Order)); } _NODISCARD _TVal load() const noexcept { // load with sequential consistency - const auto _Mem = _Atomic_address_as(_Storage); - long long _As_bytes; -#ifdef _M_ARM - _As_bytes = __ldrexd(_Mem); - _Memory_barrier(); -#else - _As_bytes = __iso_volatile_load64(_Mem); - _Compiler_or_memory_barrier(); -#endif + const auto _Mem = _Atomic_address_as(_Storage); + long long _As_bytes = _Atomic_load64(_Mem, _Atomic_memory_order_seq_cst); return reinterpret_cast<_TVal&>(_As_bytes); } _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order - const auto _Mem = _Atomic_address_as(_Storage); -#ifdef _M_ARM - long long _As_bytes = __ldrexd(_Mem); -#else - long long _As_bytes = __iso_volatile_load64(_Mem); -#endif - _Load_barrier(_Order); + const auto _Mem = _Atomic_address_as(_Storage); + long long _As_bytes = _Atomic_load64(_Mem, static_cast(_Order)); return reinterpret_cast<_TVal&>(_As_bytes); } From e10e730c1ddd3a01777bc42672673ca02b297bd5 Mon Sep 17 00:00:00 2001 From: Charles Barto Date: Tue, 28 Jun 2022 12:26:42 -0700 Subject: [PATCH 03/17] add remainting atomic loads to xatomic.h --- stl/inc/xatomic.h | 61 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/stl/inc/xatomic.h b/stl/inc/xatomic.h index 40e0c5930c..f82aaa7b83 100644 --- a/stl/inc/xatomic.h +++ b/stl/inc/xatomic.h @@ -191,6 +191,43 @@ inline void _Atomic_store64(volatile long long* _Ptr, long long _Desired, int _O return; } } +inline char _Atomic_load8(const volatile char* _Ptr, int _Order) { + char _As_bytes = __iso_volatile_load8(_Ptr); + switch (_Order) { + case _Atomic_memory_order_relaxed: + break; + case _Atomic_memory_order_consume: + case _Atomic_memory_order_acquire: + case _Atomic_memory_order_seq_cst: + _Compiler_or_memory_barrier(); + break; + case _Atomic_memory_order_release: + case _Atomic_memory_order_acq_rel: + default: + _INVALID_MEMORY_ORDER; + break; + } + return _As_bytes; +} + +inline short _Atomic_load16(const volatile short* _Ptr, int _Order) { + short _As_bytes = __iso_volatile_load16(_Ptr); + switch (_Order) { + case _Atomic_memory_order_relaxed: + break; + case _Atomic_memory_order_consume: + case _Atomic_memory_order_acquire: + case _Atomic_memory_order_seq_cst: + _Compiler_or_memory_barrier(); + break; + case _Atomic_memory_order_release: + case _Atomic_memory_order_acq_rel: + default: + _INVALID_MEMORY_ORDER; + break; + } + return _As_bytes; +} inline int _Atomic_load32(const volatile int* _Ptr, int _Order) { int _As_bytes = __iso_volatile_load32(_Ptr); @@ -201,7 +238,29 @@ inline int _Atomic_load32(const volatile int* _Ptr, int _Order) { case _Atomic_memory_order_acquire: case _Atomic_memory_order_seq_cst: _Compiler_or_memory_barrier(); - // load barrier + break; + case _Atomic_memory_order_release: + case _Atomic_memory_order_acq_rel: + default: + _INVALID_MEMORY_ORDER; + break; + } + return _As_bytes; +} + +inline long long _Atomic_load64(const volatile long long* _Ptr, int _Order) { +#ifdef _M_ARM + long long _As_bytes = __ldrexd(_Ptr); +#else + long long _As_bytes = __iso_volatile_load64(_Ptr); +#endif + switch (_Order) { + case _Atomic_memory_order_relaxed: + break; + case _Atomic_memory_order_consume: + case _Atomic_memory_order_acquire: + case _Atomic_memory_order_seq_cst: + _Compiler_or_memory_barrier(); break; case _Atomic_memory_order_release: case _Atomic_memory_order_acq_rel: From 73c44554c440802285cd872f50419ea781d3f8d9 Mon Sep 17 00:00:00 2001 From: Charles Barto Date: Tue, 28 Jun 2022 17:00:01 -0700 Subject: [PATCH 04/17] Move _ATOMIC_CHOOSE_INTRINSIC to xatomic.h. --- stl/inc/atomic | 31 +------------------------------ stl/inc/xatomic.h | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index aafe62dae3..96f84e165e 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -53,33 +53,6 @@ extern "C" _NODISCARD char __stdcall __std_atomic_has_cmpxchg16b() noexcept; #define _ATOMIC_HAS_DCAS 0 #endif // _STD_ATOMIC_ALWAYS_USE_CMPXCHG16B == 1 || !defined(_M_X64) || defined(_M_ARM64EC) -#if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC)) -#define _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _Intrinsic, ...) \ - _Check_memory_order(_Order); \ - _Result = _Intrinsic(__VA_ARGS__) -#elif defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) -#define _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _Intrinsic, ...) \ - switch (_Order) { \ - case memory_order_relaxed: \ - _Result = _INTRIN_RELAXED(_Intrinsic)(__VA_ARGS__); \ - break; \ - case memory_order_consume: \ - case memory_order_acquire: \ - _Result = _INTRIN_ACQUIRE(_Intrinsic)(__VA_ARGS__); \ - break; \ - case memory_order_release: \ - _Result = _INTRIN_RELEASE(_Intrinsic)(__VA_ARGS__); \ - break; \ - default: \ - _INVALID_MEMORY_ORDER; \ - /* [[fallthrough]]; */ \ - case memory_order_acq_rel: \ - case memory_order_seq_cst: \ - _Result = _Intrinsic(__VA_ARGS__); \ - break; \ - } -#endif // hardware - #define ATOMIC_BOOL_LOCK_FREE 2 #define ATOMIC_CHAR_LOCK_FREE 2 #ifdef __cpp_lib_char8_t @@ -182,9 +155,7 @@ _Ty kill_dependency(_Ty _Arg) noexcept { // "magic" template that kills dependen inline void _Check_memory_order(const memory_order _Order) noexcept { // check that _Order is a valid memory_order - if (static_cast(_Order) > static_cast(memory_order_seq_cst)) { - _INVALID_MEMORY_ORDER; - } + ::_Check_memory_order(static_cast(_Order)); } inline void _Check_store_memory_order(const memory_order _Order) noexcept { diff --git a/stl/inc/xatomic.h b/stl/inc/xatomic.h index f82aaa7b83..b13cff4487 100644 --- a/stl/inc/xatomic.h +++ b/stl/inc/xatomic.h @@ -70,6 +70,33 @@ _STL_DISABLE_CLANG_WARNINGS #error Unsupported hardware #endif // hardware +#if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC)) +#define _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _Intrinsic, ...) \ + _Check_memory_order(_Order); \ + _Result = _Intrinsic(__VA_ARGS__) +#elif defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) +#define _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _Intrinsic, ...) \ + switch (_Order) { \ + case memory_order_relaxed: \ + _Result = _INTRIN_RELAXED(_Intrinsic)(__VA_ARGS__); \ + break; \ + case memory_order_consume: \ + case memory_order_acquire: \ + _Result = _INTRIN_ACQUIRE(_Intrinsic)(__VA_ARGS__); \ + break; \ + case memory_order_release: \ + _Result = _INTRIN_RELEASE(_Intrinsic)(__VA_ARGS__); \ + break; \ + default: \ + _INVALID_MEMORY_ORDER; \ + /* [[fallthrough]]; */ \ + case memory_order_acq_rel: \ + case memory_order_seq_cst: \ + _Result = _Intrinsic(__VA_ARGS__); \ + break; \ + } +#endif // hardware + _EXTERN_C enum { _Atomic_memory_order_relaxed, @@ -80,6 +107,12 @@ enum { _Atomic_memory_order_seq_cst, }; +inline void _Check_memory_order(const int _Order) { + if (_Order > _Atomic_memory_order_seq_cst) { + _INVALID_MEMORY_ORDER; + } +} + inline void _Atomic_store8(volatile char* _Ptr, char _Desired, int _Order) { switch (_Order) { case _Atomic_memory_order_relaxed: From e0f8df38c564bad64f9763fec1ef35a4fe74f437 Mon Sep 17 00:00:00 2001 From: Charles Barto Date: Tue, 28 Jun 2022 17:00:22 -0700 Subject: [PATCH 05/17] add compare exchange strong functions for C --- stl/inc/xatomic.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/stl/inc/xatomic.h b/stl/inc/xatomic.h index b13cff4487..95dc6af63b 100644 --- a/stl/inc/xatomic.h +++ b/stl/inc/xatomic.h @@ -304,6 +304,51 @@ inline long long _Atomic_load64(const volatile long long* _Ptr, int _Order) { return _As_bytes; } +inline bool _Atomic_compare_exchange_strong8(volatile char* _Ptr, char* _Expected, char _Desired, int _Order) { + char _Prev_bytes; + char _Expected_bytes = *_Expected; + _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange8, _Ptr, _Desired, _Expected_bytes); + if (_Prev_bytes == _Expected_bytes) { + return true; + } + *_Expected = _Prev_bytes; + return false; +} + +inline bool _Atomic_compare_exchange_strong16(volatile short* _Ptr, short* _Expected, short _Desired, int _Order) { + short _Prev_bytes; + short _Expected_bytes = *_Expected; + _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange16, _Ptr, _Desired, _Expected_bytes); + if (_Prev_bytes == _Expected_bytes) { + return true; + } + *_Expected = _Prev_bytes; + return false; +} + +inline bool _Atomic_compare_exchange_strong32(volatile int* _Ptr, int* _Expected, int _Desired, int _Order) { + long _Prev_bytes; + long _Expected_bytes = *_Expected; + _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange, reinterpret_cast(_Ptr), + _Desired, _Expected_bytes); + if (_Prev_bytes == _Expected_bytes) { + return true; + } + *_Expected = _Prev_bytes; + return false; +} + +inline bool _Atomic_compare_exchange_strong64( + volatile long long* _Ptr, long long* _Expected, long long _Desired, int _Order) { + long long _Prev_bytes; + long long _Expected_bytes = *_Expected; + _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange64, _Ptr, _Desired, _Expected_bytes); + if (_Prev_bytes == _Expected_bytes) { + return true; + } + *_Expected = _Prev_bytes; + return false; +} _END_EXTERN_C From 39e2c1d7d5bd062afee3fd196e9e3544b333c975 Mon Sep 17 00:00:00 2001 From: Charles Barto Date: Wed, 29 Jun 2022 16:50:31 -0700 Subject: [PATCH 06/17] move C atomic functions back to --- stl/inc/atomic | 319 +++++++++++++++++++++++++++++++++++++++++++++- stl/inc/xatomic.h | 302 ------------------------------------------- 2 files changed, 313 insertions(+), 308 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 96f84e165e..49a6163557 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -67,6 +67,313 @@ extern "C" _NODISCARD char __stdcall __std_atomic_has_cmpxchg16b() noexcept; #define ATOMIC_LLONG_LOCK_FREE 2 #define ATOMIC_POINTER_LOCK_FREE 2 +// The following code is SHARED with vcruntime and any updates +// should be mirrored. Also: if any macros are added they should be +// #undefed in vcruntime as well +#ifndef _INVALID_MEMORY_ORDER +#ifdef _DEBUG +#define _INVALID_MEMORY_ORDER _STL_REPORT_ERROR("Invalid memory order") +#else // ^^^ _DEBUG / !_DEBUG vvv +#define _INVALID_MEMORY_ORDER +#endif // _DEBUG +#endif // _INVALID_MEMORY_ORDER + +#define _Compiler_barrier() _STL_DISABLE_DEPRECATED_WARNING _ReadWriteBarrier() _STL_RESTORE_DEPRECATED_WARNING + +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) +#define _Memory_barrier() __dmb(0xB) // inner shared data memory barrier +#define _Compiler_or_memory_barrier() _Memory_barrier() +#elif defined(_M_IX86) || defined(_M_X64) +// x86/x64 hardware only emits memory barriers inside _Interlocked intrinsics +#define _Compiler_or_memory_barrier() _Compiler_barrier() +#else // ^^^ x86/x64 / unsupported hardware vvv +#error Unsupported hardware +#endif // hardware + +#if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC)) +#define _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _Intrinsic, ...) \ + _Check_memory_order(_Order); \ + _Result = _Intrinsic(__VA_ARGS__) +#elif defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) +#define _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _Intrinsic, ...) \ + switch (_Order) { \ + case memory_order_relaxed: \ + _Result = _INTRIN_RELAXED(_Intrinsic)(__VA_ARGS__); \ + break; \ + case memory_order_consume: \ + case memory_order_acquire: \ + _Result = _INTRIN_ACQUIRE(_Intrinsic)(__VA_ARGS__); \ + break; \ + case memory_order_release: \ + _Result = _INTRIN_RELEASE(_Intrinsic)(__VA_ARGS__); \ + break; \ + default: \ + _INVALID_MEMORY_ORDER; \ + /* [[fallthrough]]; */ \ + case memory_order_acq_rel: \ + case memory_order_seq_cst: \ + _Result = _Intrinsic(__VA_ARGS__); \ + break; \ + } +#endif // hardware + +_EXTERN_C + +enum { + _Atomic_memory_order_relaxed, + _Atomic_memory_order_consume, + _Atomic_memory_order_acquire, + _Atomic_memory_order_release, + _Atomic_memory_order_acq_rel, + _Atomic_memory_order_seq_cst, +}; + +inline void _Check_memory_order(const int _Order) { + if (_Order > _Atomic_memory_order_seq_cst) { + _INVALID_MEMORY_ORDER; + } +} + +inline void _Atomic_store8(volatile char* _Ptr, char _Desired, int _Order) { + switch (_Order) { + case _Atomic_memory_order_relaxed: + __iso_volatile_store8(_Ptr, _Desired); + return; + case _Atomic_memory_order_release: + _Compiler_or_memory_barrier(); + __iso_volatile_store8(_Ptr, _Desired); + return; + default: + case _Atomic_memory_order_consume: + case _Atomic_memory_order_acquire: + case _Atomic_memory_order_acq_rel: + _INVALID_MEMORY_ORDER; + // [[fallthrough]]; + case _Atomic_memory_order_seq_cst: +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) + _Memory_barrier(); + __iso_volatile_store8(_Ptr, _Desired); + _Memory_barrier(); +#else // ^^^ ARM32/ARM64/ARM64EC hardware / x86/x64 hardware vvv + (void) _InterlockedExchange8(_Ptr, _Desired); +#endif // hardware + return; + } +} + +inline void _Atomic_store16(volatile short* _Ptr, short _Desired, int _Order) { + switch (_Order) { + case _Atomic_memory_order_relaxed: + __iso_volatile_store16(_Ptr, _Desired); + return; + case _Atomic_memory_order_release: + _Compiler_or_memory_barrier(); + __iso_volatile_store16(_Ptr, _Desired); + return; + default: + case _Atomic_memory_order_consume: + case _Atomic_memory_order_acquire: + case _Atomic_memory_order_acq_rel: + _INVALID_MEMORY_ORDER; + // [[fallthrough]]; + case _Atomic_memory_order_seq_cst: +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) + _Memory_barrier(); + __iso_volatile_store16(_Ptr, _Desired); + _Memory_barrier(); +#else // ^^^ ARM32/ARM64/ARM64EC hardware / x86/x64 hardware vvv + (void) _InterlockedExchange16(_Ptr, _Desired); +#endif // hardware + return; + } +} + +inline void _Atomic_store32(volatile int* _Ptr, int _Desired, int _Order) { + switch (_Order) { + case _Atomic_memory_order_relaxed: + __iso_volatile_store32(_Ptr, _Desired); + return; + case _Atomic_memory_order_release: + _Compiler_or_memory_barrier(); + __iso_volatile_store32(_Ptr, _Desired); + return; + default: + case _Atomic_memory_order_consume: + case _Atomic_memory_order_acquire: + case _Atomic_memory_order_acq_rel: + _INVALID_MEMORY_ORDER; + // [[fallthrough]]; + case _Atomic_memory_order_seq_cst: +#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) + _Memory_barrier(); + __iso_volatile_store32(_Ptr, _Desired); + _Memory_barrier(); +#else // ^^^ ARM32/ARM64/ARM64EC hardware / x86/x64 hardware vvv + (void) _InterlockedExchange(reinterpret_cast(_Ptr), static_cast(_Desired)); +#endif // hardware + return; + } +} + +inline void _Atomic_store64(volatile long long* _Ptr, long long _Desired, int _Order) { + switch (_Order) { + case _Atomic_memory_order_relaxed: + __iso_volatile_store64(_Ptr, _Desired); + return; + case _Atomic_memory_order_release: + _Compiler_or_memory_barrier(); + __iso_volatile_store64(_Ptr, _Desired); + return; + default: + case _Atomic_memory_order_consume: + case _Atomic_memory_order_acquire: + case _Atomic_memory_order_acq_rel: + _INVALID_MEMORY_ORDER; + // [[fallthrough]]; + case _Atomic_memory_order_seq_cst: +#if defined(_M_IX86) + _Compiler_barrier(); + __iso_volatile_store64(_Ptr, _Desired); + _STD atomic_thread_fence(memory_order_seq_cst); +#elif defined(_M_ARM64) || defined(_M_ARM64EC) + _Memory_barrier(); + __iso_volatile_store64(_Ptr, _Desired); + _Memory_barrier(); +#else // ^^^ _M_ARM64, _M_ARM64EC / ARM32, x64 vvv + (void) _InterlockedExchange64(_Ptr, _Desired); +#endif // ^^^ ARM32, x64 ^^^ + return; + } +} +inline char _Atomic_load8(const volatile char* _Ptr, int _Order) { + char _As_bytes = __iso_volatile_load8(_Ptr); + switch (_Order) { + case _Atomic_memory_order_relaxed: + break; + case _Atomic_memory_order_consume: + case _Atomic_memory_order_acquire: + case _Atomic_memory_order_seq_cst: + _Compiler_or_memory_barrier(); + break; + case _Atomic_memory_order_release: + case _Atomic_memory_order_acq_rel: + default: + _INVALID_MEMORY_ORDER; + break; + } + return _As_bytes; +} + +inline short _Atomic_load16(const volatile short* _Ptr, int _Order) { + short _As_bytes = __iso_volatile_load16(_Ptr); + switch (_Order) { + case _Atomic_memory_order_relaxed: + break; + case _Atomic_memory_order_consume: + case _Atomic_memory_order_acquire: + case _Atomic_memory_order_seq_cst: + _Compiler_or_memory_barrier(); + break; + case _Atomic_memory_order_release: + case _Atomic_memory_order_acq_rel: + default: + _INVALID_MEMORY_ORDER; + break; + } + return _As_bytes; +} + +inline int _Atomic_load32(const volatile int* _Ptr, int _Order) { + int _As_bytes = __iso_volatile_load32(_Ptr); + switch (_Order) { + case _Atomic_memory_order_relaxed: + break; + case _Atomic_memory_order_consume: + case _Atomic_memory_order_acquire: + case _Atomic_memory_order_seq_cst: + _Compiler_or_memory_barrier(); + break; + case _Atomic_memory_order_release: + case _Atomic_memory_order_acq_rel: + default: + _INVALID_MEMORY_ORDER; + break; + } + return _As_bytes; +} + +inline long long _Atomic_load64(const volatile long long* _Ptr, int _Order) { +#ifdef _M_ARM + long long _As_bytes = __ldrexd(_Ptr); +#else + long long _As_bytes = __iso_volatile_load64(_Ptr); +#endif + switch (_Order) { + case _Atomic_memory_order_relaxed: + break; + case _Atomic_memory_order_consume: + case _Atomic_memory_order_acquire: + case _Atomic_memory_order_seq_cst: + _Compiler_or_memory_barrier(); + break; + case _Atomic_memory_order_release: + case _Atomic_memory_order_acq_rel: + default: + _INVALID_MEMORY_ORDER; + break; + } + return _As_bytes; +} + +inline bool _Atomic_compare_exchange_strong8(volatile char* _Ptr, char* _Expected, char _Desired, int _Order) { + char _Prev_bytes; + char _Expected_bytes = *_Expected; + _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange8, _Ptr, _Desired, _Expected_bytes); + if (_Prev_bytes == _Expected_bytes) { + return true; + } + *_Expected = _Prev_bytes; + return false; +} + +inline bool _Atomic_compare_exchange_strong16(volatile short* _Ptr, short* _Expected, short _Desired, int _Order) { + short _Prev_bytes; + short _Expected_bytes = *_Expected; + _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange16, _Ptr, _Desired, _Expected_bytes); + if (_Prev_bytes == _Expected_bytes) { + return true; + } + *_Expected = _Prev_bytes; + return false; +} + +inline bool _Atomic_compare_exchange_strong32(volatile int* _Ptr, int* _Expected, int _Desired, int _Order) { + long _Prev_bytes; + long _Expected_bytes = *_Expected; + _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange, reinterpret_cast(_Ptr), + _Desired, _Expected_bytes); + if (_Prev_bytes == _Expected_bytes) { + return true; + } + *_Expected = _Prev_bytes; + return false; +} + +inline bool _Atomic_compare_exchange_strong64( + volatile long long* _Ptr, long long* _Expected, long long _Desired, int _Order) { + long long _Prev_bytes; + long long _Expected_bytes = *_Expected; + _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange64, _Ptr, _Desired, _Expected_bytes); + if (_Prev_bytes == _Expected_bytes) { + return true; + } + *_Expected = _Prev_bytes; + return false; +} + +_END_EXTERN_C +// End of code shared with vcruntime + _EXTERN_C _Smtx_t* __stdcall __std_atomic_get_mutex(const void* _Key) noexcept; _END_EXTERN_C @@ -253,23 +560,23 @@ struct _Atomic_padded { #else // ^^^ don't break ABI / break ABI vvv template struct _Atomic_storage_traits { // properties for how _Ty is stored in an atomic - static constexpr size_t _Storage_size = sizeof(_Ty) == 1 ? 1 - : sizeof(_Ty) == 2 ? 2 - : sizeof(_Ty) <= 4 ? 4 - : sizeof(_Ty) <= 8 ? 8 + static constexpr size_t _Storage_size = sizeof(_Ty) == 1 ? 1 + : sizeof(_Ty) == 2 ? 2 + : sizeof(_Ty) <= 4 ? 4 + : sizeof(_Ty) <= 8 ? 8 #if defined(_M_X64) || defined(_M_ARM64) || defined(_M_ARM64EC) : sizeof(_Ty) <= 16 ? 16 #endif // 64 bits : sizeof(_Ty); static constexpr size_t _Padding_size = _Storage_size - sizeof(_Ty); - static constexpr bool _Uses_padding = _Padding_size != 0; + static constexpr bool _Uses_padding = _Padding_size != 0; }; template struct _Atomic_storage_traits<_Ty&> { // properties for how _Ty is stored in an atomic_ref static constexpr size_t _Storage_size = sizeof(_Ty); - static constexpr bool _Uses_padding = false; + static constexpr bool _Uses_padding = false; }; template ::_Uses_padding> diff --git a/stl/inc/xatomic.h b/stl/inc/xatomic.h index 95dc6af63b..d0472d7ed4 100644 --- a/stl/inc/xatomic.h +++ b/stl/inc/xatomic.h @@ -50,308 +50,6 @@ _STL_DISABLE_CLANG_WARNINGS #define _MT_INCR(x) _INTRIN_RELAXED(_InterlockedIncrement)(reinterpret_cast(&x)) #define _MT_DECR(x) _INTRIN_ACQ_REL(_InterlockedDecrement)(reinterpret_cast(&x)) -#ifndef _INVALID_MEMORY_ORDER -#ifdef _DEBUG -#define _INVALID_MEMORY_ORDER _STL_REPORT_ERROR("Invalid memory order") -#else // ^^^ _DEBUG / !_DEBUG vvv -#define _INVALID_MEMORY_ORDER -#endif // _DEBUG -#endif // _INVALID_MEMORY_ORDER - -#define _Compiler_barrier() _STL_DISABLE_DEPRECATED_WARNING _ReadWriteBarrier() _STL_RESTORE_DEPRECATED_WARNING - -#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) -#define _Memory_barrier() __dmb(0xB) // inner shared data memory barrier -#define _Compiler_or_memory_barrier() _Memory_barrier() -#elif defined(_M_IX86) || defined(_M_X64) -// x86/x64 hardware only emits memory barriers inside _Interlocked intrinsics -#define _Compiler_or_memory_barrier() _Compiler_barrier() -#else // ^^^ x86/x64 / unsupported hardware vvv -#error Unsupported hardware -#endif // hardware - -#if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC)) -#define _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _Intrinsic, ...) \ - _Check_memory_order(_Order); \ - _Result = _Intrinsic(__VA_ARGS__) -#elif defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) -#define _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _Intrinsic, ...) \ - switch (_Order) { \ - case memory_order_relaxed: \ - _Result = _INTRIN_RELAXED(_Intrinsic)(__VA_ARGS__); \ - break; \ - case memory_order_consume: \ - case memory_order_acquire: \ - _Result = _INTRIN_ACQUIRE(_Intrinsic)(__VA_ARGS__); \ - break; \ - case memory_order_release: \ - _Result = _INTRIN_RELEASE(_Intrinsic)(__VA_ARGS__); \ - break; \ - default: \ - _INVALID_MEMORY_ORDER; \ - /* [[fallthrough]]; */ \ - case memory_order_acq_rel: \ - case memory_order_seq_cst: \ - _Result = _Intrinsic(__VA_ARGS__); \ - break; \ - } -#endif // hardware - -_EXTERN_C -enum { - _Atomic_memory_order_relaxed, - _Atomic_memory_order_consume, - _Atomic_memory_order_acquire, - _Atomic_memory_order_release, - _Atomic_memory_order_acq_rel, - _Atomic_memory_order_seq_cst, -}; - -inline void _Check_memory_order(const int _Order) { - if (_Order > _Atomic_memory_order_seq_cst) { - _INVALID_MEMORY_ORDER; - } -} - -inline void _Atomic_store8(volatile char* _Ptr, char _Desired, int _Order) { - switch (_Order) { - case _Atomic_memory_order_relaxed: - __iso_volatile_store8(_Ptr, _Desired); - return; - case _Atomic_memory_order_release: - _Compiler_or_memory_barrier(); - __iso_volatile_store8(_Ptr, _Desired); - return; - default: - case _Atomic_memory_order_consume: - case _Atomic_memory_order_acquire: - case _Atomic_memory_order_acq_rel: - _INVALID_MEMORY_ORDER; - // [[fallthrough]]; - case _Atomic_memory_order_seq_cst: -#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) - _Memory_barrier(); - __iso_volatile_store8(_Ptr, _Desired); - _Memory_barrier(); -#else // ^^^ ARM32/ARM64/ARM64EC hardware / x86/x64 hardware vvv - (void) _InterlockedExchange8(_Ptr, _Desired); -#endif // hardware - return; - } -} - -inline void _Atomic_store16(volatile short* _Ptr, short _Desired, int _Order) { - switch (_Order) { - case _Atomic_memory_order_relaxed: - __iso_volatile_store16(_Ptr, _Desired); - return; - case _Atomic_memory_order_release: - _Compiler_or_memory_barrier(); - __iso_volatile_store16(_Ptr, _Desired); - return; - default: - case _Atomic_memory_order_consume: - case _Atomic_memory_order_acquire: - case _Atomic_memory_order_acq_rel: - _INVALID_MEMORY_ORDER; - // [[fallthrough]]; - case _Atomic_memory_order_seq_cst: -#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) - _Memory_barrier(); - __iso_volatile_store16(_Ptr, _Desired); - _Memory_barrier(); -#else // ^^^ ARM32/ARM64/ARM64EC hardware / x86/x64 hardware vvv - (void) _InterlockedExchange16(_Ptr, _Desired); -#endif // hardware - return; - } -} - -inline void _Atomic_store32(volatile int* _Ptr, int _Desired, int _Order) { - switch (_Order) { - case _Atomic_memory_order_relaxed: - __iso_volatile_store32(_Ptr, _Desired); - return; - case _Atomic_memory_order_release: - _Compiler_or_memory_barrier(); - __iso_volatile_store32(_Ptr, _Desired); - return; - default: - case _Atomic_memory_order_consume: - case _Atomic_memory_order_acquire: - case _Atomic_memory_order_acq_rel: - _INVALID_MEMORY_ORDER; - // [[fallthrough]]; - case _Atomic_memory_order_seq_cst: -#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) - _Memory_barrier(); - __iso_volatile_store32(_Ptr, _Desired); - _Memory_barrier(); -#else // ^^^ ARM32/ARM64/ARM64EC hardware / x86/x64 hardware vvv - (void) _InterlockedExchange(reinterpret_cast(_Ptr), static_cast(_Desired)); -#endif // hardware - return; - } -} - -inline void _Atomic_store64(volatile long long* _Ptr, long long _Desired, int _Order) { - switch (_Order) { - case _Atomic_memory_order_relaxed: - __iso_volatile_store64(_Ptr, _Desired); - return; - case _Atomic_memory_order_release: - _Compiler_or_memory_barrier(); - __iso_volatile_store64(_Ptr, _Desired); - return; - default: - case _Atomic_memory_order_consume: - case _Atomic_memory_order_acquire: - case _Atomic_memory_order_acq_rel: - _INVALID_MEMORY_ORDER; - // [[fallthrough]]; - case _Atomic_memory_order_seq_cst: -#if defined(_M_IX86) - _Compiler_barrier(); - __iso_volatile_store64(_Ptr, _Desired); - _STD atomic_thread_fence(memory_order_seq_cst); -#elif defined(_M_ARM64) || defined(_M_ARM64EC) - _Memory_barrier(); - __iso_volatile_store64(_Ptr, _Desired); - _Memory_barrier(); -#else // ^^^ _M_ARM64, _M_ARM64EC / ARM32, x64 vvv - (void) _InterlockedExchange64(_Ptr, _Desired); -#endif // ^^^ ARM32, x64 ^^^ - return; - } -} -inline char _Atomic_load8(const volatile char* _Ptr, int _Order) { - char _As_bytes = __iso_volatile_load8(_Ptr); - switch (_Order) { - case _Atomic_memory_order_relaxed: - break; - case _Atomic_memory_order_consume: - case _Atomic_memory_order_acquire: - case _Atomic_memory_order_seq_cst: - _Compiler_or_memory_barrier(); - break; - case _Atomic_memory_order_release: - case _Atomic_memory_order_acq_rel: - default: - _INVALID_MEMORY_ORDER; - break; - } - return _As_bytes; -} - -inline short _Atomic_load16(const volatile short* _Ptr, int _Order) { - short _As_bytes = __iso_volatile_load16(_Ptr); - switch (_Order) { - case _Atomic_memory_order_relaxed: - break; - case _Atomic_memory_order_consume: - case _Atomic_memory_order_acquire: - case _Atomic_memory_order_seq_cst: - _Compiler_or_memory_barrier(); - break; - case _Atomic_memory_order_release: - case _Atomic_memory_order_acq_rel: - default: - _INVALID_MEMORY_ORDER; - break; - } - return _As_bytes; -} - -inline int _Atomic_load32(const volatile int* _Ptr, int _Order) { - int _As_bytes = __iso_volatile_load32(_Ptr); - switch (_Order) { - case _Atomic_memory_order_relaxed: - break; - case _Atomic_memory_order_consume: - case _Atomic_memory_order_acquire: - case _Atomic_memory_order_seq_cst: - _Compiler_or_memory_barrier(); - break; - case _Atomic_memory_order_release: - case _Atomic_memory_order_acq_rel: - default: - _INVALID_MEMORY_ORDER; - break; - } - return _As_bytes; -} - -inline long long _Atomic_load64(const volatile long long* _Ptr, int _Order) { -#ifdef _M_ARM - long long _As_bytes = __ldrexd(_Ptr); -#else - long long _As_bytes = __iso_volatile_load64(_Ptr); -#endif - switch (_Order) { - case _Atomic_memory_order_relaxed: - break; - case _Atomic_memory_order_consume: - case _Atomic_memory_order_acquire: - case _Atomic_memory_order_seq_cst: - _Compiler_or_memory_barrier(); - break; - case _Atomic_memory_order_release: - case _Atomic_memory_order_acq_rel: - default: - _INVALID_MEMORY_ORDER; - break; - } - return _As_bytes; -} - -inline bool _Atomic_compare_exchange_strong8(volatile char* _Ptr, char* _Expected, char _Desired, int _Order) { - char _Prev_bytes; - char _Expected_bytes = *_Expected; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange8, _Ptr, _Desired, _Expected_bytes); - if (_Prev_bytes == _Expected_bytes) { - return true; - } - *_Expected = _Prev_bytes; - return false; -} - -inline bool _Atomic_compare_exchange_strong16(volatile short* _Ptr, short* _Expected, short _Desired, int _Order) { - short _Prev_bytes; - short _Expected_bytes = *_Expected; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange16, _Ptr, _Desired, _Expected_bytes); - if (_Prev_bytes == _Expected_bytes) { - return true; - } - *_Expected = _Prev_bytes; - return false; -} - -inline bool _Atomic_compare_exchange_strong32(volatile int* _Ptr, int* _Expected, int _Desired, int _Order) { - long _Prev_bytes; - long _Expected_bytes = *_Expected; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange, reinterpret_cast(_Ptr), - _Desired, _Expected_bytes); - if (_Prev_bytes == _Expected_bytes) { - return true; - } - *_Expected = _Prev_bytes; - return false; -} - -inline bool _Atomic_compare_exchange_strong64( - volatile long long* _Ptr, long long* _Expected, long long _Desired, int _Order) { - long long _Prev_bytes; - long long _Expected_bytes = *_Expected; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange64, _Ptr, _Desired, _Expected_bytes); - if (_Prev_bytes == _Expected_bytes) { - return true; - } - *_Expected = _Prev_bytes; - return false; -} - -_END_EXTERN_C - _STD_BEGIN #if _HAS_CXX20 From 4e059141c721529619123401fe6d904fb7c962c4 Mon Sep 17 00:00:00 2001 From: Charles Barto Date: Wed, 29 Jun 2022 19:02:08 -0700 Subject: [PATCH 07/17] unify loads under _ATOMIC_LOAD_VERIFY_MEMORY_ORDER --- stl/inc/atomic | 78 ++++++++++++++------------------------------------ 1 file changed, 22 insertions(+), 56 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 49a6163557..dccc5b2f79 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -117,6 +117,24 @@ extern "C" _NODISCARD char __stdcall __std_atomic_has_cmpxchg16b() noexcept; } #endif // hardware +#define _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(_Order_var) \ + do { \ + switch (_Order_var) { \ + case _Atomic_memory_order_relaxed: \ + break; \ + case _Atomic_memory_order_consume: \ + case _Atomic_memory_order_acquire: \ + case _Atomic_memory_order_seq_cst: \ + _Compiler_or_memory_barrier(); \ + break; \ + case _Atomic_memory_order_release: \ + case _Atomic_memory_order_acq_rel: \ + default: \ + _INVALID_MEMORY_ORDER; \ + break; \ + } \ + } while (0) + _EXTERN_C enum { @@ -247,58 +265,19 @@ inline void _Atomic_store64(volatile long long* _Ptr, long long _Desired, int _O } inline char _Atomic_load8(const volatile char* _Ptr, int _Order) { char _As_bytes = __iso_volatile_load8(_Ptr); - switch (_Order) { - case _Atomic_memory_order_relaxed: - break; - case _Atomic_memory_order_consume: - case _Atomic_memory_order_acquire: - case _Atomic_memory_order_seq_cst: - _Compiler_or_memory_barrier(); - break; - case _Atomic_memory_order_release: - case _Atomic_memory_order_acq_rel: - default: - _INVALID_MEMORY_ORDER; - break; - } + _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(_Order); return _As_bytes; } inline short _Atomic_load16(const volatile short* _Ptr, int _Order) { short _As_bytes = __iso_volatile_load16(_Ptr); - switch (_Order) { - case _Atomic_memory_order_relaxed: - break; - case _Atomic_memory_order_consume: - case _Atomic_memory_order_acquire: - case _Atomic_memory_order_seq_cst: - _Compiler_or_memory_barrier(); - break; - case _Atomic_memory_order_release: - case _Atomic_memory_order_acq_rel: - default: - _INVALID_MEMORY_ORDER; - break; - } + _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(_Order); return _As_bytes; } inline int _Atomic_load32(const volatile int* _Ptr, int _Order) { int _As_bytes = __iso_volatile_load32(_Ptr); - switch (_Order) { - case _Atomic_memory_order_relaxed: - break; - case _Atomic_memory_order_consume: - case _Atomic_memory_order_acquire: - case _Atomic_memory_order_seq_cst: - _Compiler_or_memory_barrier(); - break; - case _Atomic_memory_order_release: - case _Atomic_memory_order_acq_rel: - default: - _INVALID_MEMORY_ORDER; - break; - } + _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(_Order); return _As_bytes; } @@ -308,20 +287,7 @@ inline long long _Atomic_load64(const volatile long long* _Ptr, int _Order) { #else long long _As_bytes = __iso_volatile_load64(_Ptr); #endif - switch (_Order) { - case _Atomic_memory_order_relaxed: - break; - case _Atomic_memory_order_consume: - case _Atomic_memory_order_acquire: - case _Atomic_memory_order_seq_cst: - _Compiler_or_memory_barrier(); - break; - case _Atomic_memory_order_release: - case _Atomic_memory_order_acq_rel: - default: - _INVALID_MEMORY_ORDER; - break; - } + _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(_Order); return _As_bytes; } From e529c2628dab829c9f544f26f4fcc85eb5e755b8 Mon Sep 17 00:00:00 2001 From: Charlie Barto Date: Tue, 5 Jul 2022 22:47:08 -0700 Subject: [PATCH 08/17] use macros to factor out common code rather than functions --- stl/inc/atomic | 356 +++++++++++++++++-------------------------------- 1 file changed, 122 insertions(+), 234 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index dccc5b2f79..8b73afa53a 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -70,6 +70,16 @@ extern "C" _NODISCARD char __stdcall __std_atomic_has_cmpxchg16b() noexcept; // The following code is SHARED with vcruntime and any updates // should be mirrored. Also: if any macros are added they should be // #undefed in vcruntime as well + +enum { + _Atomic_memory_order_relaxed, + _Atomic_memory_order_consume, + _Atomic_memory_order_acquire, + _Atomic_memory_order_release, + _Atomic_memory_order_acq_rel, + _Atomic_memory_order_seq_cst, +}; + #ifndef _INVALID_MEMORY_ORDER #ifdef _DEBUG #define _INVALID_MEMORY_ORDER _STL_REPORT_ERROR("Invalid memory order") @@ -78,6 +88,12 @@ extern "C" _NODISCARD char __stdcall __std_atomic_has_cmpxchg16b() noexcept; #endif // _DEBUG #endif // _INVALID_MEMORY_ORDER +extern "C" inline void _Check_memory_order(const int _Order) { + if (_Order > _Atomic_memory_order_seq_cst) { + _INVALID_MEMORY_ORDER; + } +} + #define _Compiler_barrier() _STL_DISABLE_DEPRECATED_WARNING _ReadWriteBarrier() _STL_RESTORE_DEPRECATED_WARNING #if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) @@ -117,227 +133,56 @@ extern "C" _NODISCARD char __stdcall __std_atomic_has_cmpxchg16b() noexcept; } #endif // hardware +// note: these macros are _not_ always safe to use with a trailing semicolon, +// we avoid wrapping them in do {} while(0) because MSVC generates code for such loops +// in debug mode. #define _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(_Order_var) \ - do { \ - switch (_Order_var) { \ - case _Atomic_memory_order_relaxed: \ - break; \ - case _Atomic_memory_order_consume: \ - case _Atomic_memory_order_acquire: \ - case _Atomic_memory_order_seq_cst: \ - _Compiler_or_memory_barrier(); \ - break; \ - case _Atomic_memory_order_release: \ - case _Atomic_memory_order_acq_rel: \ - default: \ - _INVALID_MEMORY_ORDER; \ - break; \ - } \ - } while (0) - -_EXTERN_C - -enum { - _Atomic_memory_order_relaxed, - _Atomic_memory_order_consume, - _Atomic_memory_order_acquire, - _Atomic_memory_order_release, - _Atomic_memory_order_acq_rel, - _Atomic_memory_order_seq_cst, -}; - -inline void _Check_memory_order(const int _Order) { - if (_Order > _Atomic_memory_order_seq_cst) { - _INVALID_MEMORY_ORDER; - } -} - -inline void _Atomic_store8(volatile char* _Ptr, char _Desired, int _Order) { - switch (_Order) { - case _Atomic_memory_order_relaxed: - __iso_volatile_store8(_Ptr, _Desired); - return; - case _Atomic_memory_order_release: - _Compiler_or_memory_barrier(); - __iso_volatile_store8(_Ptr, _Desired); - return; - default: - case _Atomic_memory_order_consume: - case _Atomic_memory_order_acquire: - case _Atomic_memory_order_acq_rel: - _INVALID_MEMORY_ORDER; - // [[fallthrough]]; - case _Atomic_memory_order_seq_cst: -#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) - _Memory_barrier(); - __iso_volatile_store8(_Ptr, _Desired); - _Memory_barrier(); -#else // ^^^ ARM32/ARM64/ARM64EC hardware / x86/x64 hardware vvv - (void) _InterlockedExchange8(_Ptr, _Desired); -#endif // hardware - return; - } -} - -inline void _Atomic_store16(volatile short* _Ptr, short _Desired, int _Order) { - switch (_Order) { - case _Atomic_memory_order_relaxed: - __iso_volatile_store16(_Ptr, _Desired); - return; - case _Atomic_memory_order_release: - _Compiler_or_memory_barrier(); - __iso_volatile_store16(_Ptr, _Desired); - return; - default: - case _Atomic_memory_order_consume: - case _Atomic_memory_order_acquire: - case _Atomic_memory_order_acq_rel: - _INVALID_MEMORY_ORDER; - // [[fallthrough]]; - case _Atomic_memory_order_seq_cst: -#if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) - _Memory_barrier(); - __iso_volatile_store16(_Ptr, _Desired); - _Memory_barrier(); -#else // ^^^ ARM32/ARM64/ARM64EC hardware / x86/x64 hardware vvv - (void) _InterlockedExchange16(_Ptr, _Desired); -#endif // hardware - return; - } -} + switch (_Order_var) { \ + case _Atomic_memory_order_relaxed: \ + break; \ + case _Atomic_memory_order_consume: \ + case _Atomic_memory_order_acquire: \ + case _Atomic_memory_order_seq_cst: \ + _Compiler_or_memory_barrier(); \ + break; \ + case _Atomic_memory_order_release: \ + case _Atomic_memory_order_acq_rel: \ + default: \ + _INVALID_MEMORY_ORDER; \ + break; \ + } + +#define _ATOMIC_STORE_PREFIX(_Width, _Ptr, _Desired) \ + case _Atomic_memory_order_relaxed: \ + __iso_volatile_store##_Width((_Ptr), (_Desired)); \ + return; \ + case _Atomic_memory_order_release: \ + _Compiler_or_memory_barrier(); \ + __iso_volatile_store##_Width((_Ptr), (_Desired)); \ + return; \ + default: \ + case _Atomic_memory_order_consume: \ + case _Atomic_memory_order_acquire: \ + case _Atomic_memory_order_acq_rel: \ + _INVALID_MEMORY_ORDER; \ + /* [[fallthrough]]; */ + + +#define _ATOMIC_STORE_SEQ_CST_ARM(_Width, _Ptr, _Desired) \ + _Memory_barrrier(); \ + __iso_volatile_store##_Width((_Ptr), (_Desired)); \ + _Memory_barrier(); +#define _ATOMIC_STORE_SEQ_CST_INTEL(_Width, _Ptr, _Desired) (void) _InterlockedExchange##_Width((_Ptr), (_Desired)); +#define _ATOMIC_STORE_32_SEQ_CST_INTEL(_Ptr, _Desired) \ + (void) _InterlockedExchange(reinterpret_cast(_Ptr), static_cast(_Desired)); -inline void _Atomic_store32(volatile int* _Ptr, int _Desired, int _Order) { - switch (_Order) { - case _Atomic_memory_order_relaxed: - __iso_volatile_store32(_Ptr, _Desired); - return; - case _Atomic_memory_order_release: - _Compiler_or_memory_barrier(); - __iso_volatile_store32(_Ptr, _Desired); - return; - default: - case _Atomic_memory_order_consume: - case _Atomic_memory_order_acquire: - case _Atomic_memory_order_acq_rel: - _INVALID_MEMORY_ORDER; - // [[fallthrough]]; - case _Atomic_memory_order_seq_cst: #if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) - _Memory_barrier(); - __iso_volatile_store32(_Ptr, _Desired); - _Memory_barrier(); +#define _ATOMIC_STORE_SEQ_CST(_Width, _Ptr, _Desired) _ATOMIC_STORE_SEQ_CST_ARM((_Width), (_Ptr), (_Desired)) +#define _ATOMIC_STORE_32_SEQ_CST(_Ptr, _Desired) _ATOMIC_STORE_SEQ_CST_ARM(32, (_Ptr), (_Desired)) #else // ^^^ ARM32/ARM64/ARM64EC hardware / x86/x64 hardware vvv - (void) _InterlockedExchange(reinterpret_cast(_Ptr), static_cast(_Desired)); +#define _ATOMIC_STORE_SEQ_CST(_Width, _Ptr, _Desired) _ATOMIC_STORE_SEQ_CST_INTEL(_Width, (_Ptr), (_Desired)) +#define _ATOMIC_STORE_32_SEQ_CST(_Ptr, _Desired) _ATOMIC_STORE_32_SEQ_CST_INTEL((_Ptr), (_Desired)) #endif // hardware - return; - } -} - -inline void _Atomic_store64(volatile long long* _Ptr, long long _Desired, int _Order) { - switch (_Order) { - case _Atomic_memory_order_relaxed: - __iso_volatile_store64(_Ptr, _Desired); - return; - case _Atomic_memory_order_release: - _Compiler_or_memory_barrier(); - __iso_volatile_store64(_Ptr, _Desired); - return; - default: - case _Atomic_memory_order_consume: - case _Atomic_memory_order_acquire: - case _Atomic_memory_order_acq_rel: - _INVALID_MEMORY_ORDER; - // [[fallthrough]]; - case _Atomic_memory_order_seq_cst: -#if defined(_M_IX86) - _Compiler_barrier(); - __iso_volatile_store64(_Ptr, _Desired); - _STD atomic_thread_fence(memory_order_seq_cst); -#elif defined(_M_ARM64) || defined(_M_ARM64EC) - _Memory_barrier(); - __iso_volatile_store64(_Ptr, _Desired); - _Memory_barrier(); -#else // ^^^ _M_ARM64, _M_ARM64EC / ARM32, x64 vvv - (void) _InterlockedExchange64(_Ptr, _Desired); -#endif // ^^^ ARM32, x64 ^^^ - return; - } -} -inline char _Atomic_load8(const volatile char* _Ptr, int _Order) { - char _As_bytes = __iso_volatile_load8(_Ptr); - _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(_Order); - return _As_bytes; -} - -inline short _Atomic_load16(const volatile short* _Ptr, int _Order) { - short _As_bytes = __iso_volatile_load16(_Ptr); - _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(_Order); - return _As_bytes; -} - -inline int _Atomic_load32(const volatile int* _Ptr, int _Order) { - int _As_bytes = __iso_volatile_load32(_Ptr); - _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(_Order); - return _As_bytes; -} - -inline long long _Atomic_load64(const volatile long long* _Ptr, int _Order) { -#ifdef _M_ARM - long long _As_bytes = __ldrexd(_Ptr); -#else - long long _As_bytes = __iso_volatile_load64(_Ptr); -#endif - _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(_Order); - return _As_bytes; -} - -inline bool _Atomic_compare_exchange_strong8(volatile char* _Ptr, char* _Expected, char _Desired, int _Order) { - char _Prev_bytes; - char _Expected_bytes = *_Expected; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange8, _Ptr, _Desired, _Expected_bytes); - if (_Prev_bytes == _Expected_bytes) { - return true; - } - *_Expected = _Prev_bytes; - return false; -} - -inline bool _Atomic_compare_exchange_strong16(volatile short* _Ptr, short* _Expected, short _Desired, int _Order) { - short _Prev_bytes; - short _Expected_bytes = *_Expected; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange16, _Ptr, _Desired, _Expected_bytes); - if (_Prev_bytes == _Expected_bytes) { - return true; - } - *_Expected = _Prev_bytes; - return false; -} - -inline bool _Atomic_compare_exchange_strong32(volatile int* _Ptr, int* _Expected, int _Desired, int _Order) { - long _Prev_bytes; - long _Expected_bytes = *_Expected; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange, reinterpret_cast(_Ptr), - _Desired, _Expected_bytes); - if (_Prev_bytes == _Expected_bytes) { - return true; - } - *_Expected = _Prev_bytes; - return false; -} - -inline bool _Atomic_compare_exchange_strong64( - volatile long long* _Ptr, long long* _Expected, long long _Desired, int _Order) { - long long _Prev_bytes; - long long _Expected_bytes = *_Expected; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange64, _Ptr, _Desired, _Expected_bytes); - if (_Prev_bytes == _Expected_bytes) { - return true; - } - *_Expected = _Prev_bytes; - return false; -} - -_END_EXTERN_C // End of code shared with vcruntime _EXTERN_C @@ -853,24 +698,31 @@ struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics void store(const _TVal _Value) noexcept { // store with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); const char _As_bytes = _Atomic_reinterpret_as(_Value); - _Atomic_store8(_Mem, _As_bytes, _Atomic_memory_order_seq_cst); + _ATOMIC_STORE_SEQ_CST(8, _Mem, _As_bytes) } void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order const auto _Mem = _Atomic_address_as(_Storage); const char _As_bytes = _Atomic_reinterpret_as(_Value); - _Atomic_store8(_Mem, _As_bytes, static_cast(_Order)); + switch (static_cast(_Order)) { + _ATOMIC_STORE_PREFIX(8, _Mem, _As_bytes) + case _Atomic_memory_order_seq_cst: + store(_Value); + return; + } } _NODISCARD _TVal load() const noexcept { // load with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); - char _As_bytes = _Atomic_load8(_Mem, _Atomic_memory_order_seq_cst); + char _As_bytes = __iso_volatile_load8(_Mem); + _Compiler_or_memory_barrier(); return reinterpret_cast<_TVal&>(_As_bytes); } _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order const auto _Mem = _Atomic_address_as(_Storage); - char _As_bytes = _Atomic_load8(_Mem, static_cast(_Order)); + char _As_bytes = __iso_volatile_load8(_Mem); + _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(static_cast(_Order)) return reinterpret_cast<_TVal&>(_As_bytes); } @@ -949,24 +801,31 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics void store(const _TVal _Value) noexcept { // store with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); const short _As_bytes = _Atomic_reinterpret_as(_Value); - _Atomic_store16(_Mem, _As_bytes, _Atomic_memory_order_seq_cst); + _ATOMIC_STORE_SEQ_CST(16, _Mem, _As_bytes) } void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order const auto _Mem = _Atomic_address_as(_Storage); const short _As_bytes = _Atomic_reinterpret_as(_Value); - _Atomic_store16(_Mem, _As_bytes, static_cast(_Order)); + switch (static_cast(_Order)) { + _ATOMIC_STORE_PREFIX(16, _Mem, _As_bytes) + case _Atomic_memory_order_seq_cst: + store(_Value); + return; + } } _NODISCARD _TVal load() const noexcept { // load with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); - short _As_bytes = _Atomic_load16(_Mem, _Atomic_memory_order_seq_cst); + short _As_bytes = __iso_volatile_load16(_Mem); + _Compiler_or_memory_barrier(); return reinterpret_cast<_TVal&>(_As_bytes); } _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order const auto _Mem = _Atomic_address_as(_Storage); - short _As_bytes = _Atomic_load16(_Mem, static_cast(_Order)); + short _As_bytes = __iso_volatile_load16(_Mem); + _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(static_cast(_Order)) return reinterpret_cast<_TVal&>(_As_bytes); } @@ -1044,24 +903,31 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics void store(const _TVal _Value) noexcept { // store with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); const int _As_bytes = _Atomic_reinterpret_as(_Value); - _Atomic_store32(_Mem, _As_bytes, _Atomic_memory_order_seq_cst); + _ATOMIC_STORE_32_SEQ_CST(_Mem, _As_bytes) } void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order const auto _Mem = _Atomic_address_as(_Storage); const int _As_bytes = _Atomic_reinterpret_as(_Value); - _Atomic_store32(_Mem, _As_bytes, static_cast(_Order)); + switch (static_cast(_Order)) { + _ATOMIC_STORE_PREFIX(32, _Mem, _As_bytes) + case _Atomic_memory_order_seq_cst: + store(_Value); + return; + } } _NODISCARD _TVal load() const noexcept { // load with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); - int _As_bytes = _Atomic_load32(_Mem, _Atomic_memory_order_seq_cst); + int _As_bytes = __iso_volatile_load32(_Mem); + _Compiler_or_memory_barrier(); return reinterpret_cast<_TVal&>(_As_bytes); } _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order const auto _Mem = _Atomic_address_as(_Storage); - int _As_bytes = _Atomic_load32(_Mem, static_cast(_Order)); + int _As_bytes = __iso_volatile_load32(_Mem); + _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(static_cast(_Order)) return reinterpret_cast<_TVal&>(_As_bytes); } @@ -1139,24 +1005,39 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics void store(const _TVal _Value) noexcept { // store with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); const long long _As_bytes = _Atomic_reinterpret_as(_Value); - _Atomic_store64(_Mem, _As_bytes, _Atomic_memory_order_seq_cst); + _ATOMIC_STORE_SEQ_CST(64, _Mem, _As_bytes); } void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order const auto _Mem = _Atomic_address_as(_Storage); const long long _As_bytes = _Atomic_reinterpret_as(_Value); - _Atomic_store64(_Mem, _As_bytes, static_cast(_Order)); + switch (static_cast(_Order)) { + _ATOMIC_STORE_PREFIX(64, _Mem, _As_bytes) + case _Atomic_memory_order_seq_cst: + store(_Value); + return; + } } _NODISCARD _TVal load() const noexcept { // load with sequential consistency - const auto _Mem = _Atomic_address_as(_Storage); - long long _As_bytes = _Atomic_load64(_Mem, _Atomic_memory_order_seq_cst); + const auto _Mem = _Atomic_address_as(_Storage); +#ifdef _M_ARM + long long _As_bytes = __ldrexd(_Mem); +#else + long long _As_bytes = __iso_volatile_load64(_Mem); +#endif + _Compiler_or_memory_barrier(); return reinterpret_cast<_TVal&>(_As_bytes); } _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order - const auto _Mem = _Atomic_address_as(_Storage); - long long _As_bytes = _Atomic_load64(_Mem, static_cast(_Order)); + const auto _Mem = _Atomic_address_as(_Storage); +#ifdef _M_ARM + long long _As_bytes = __ldrexd(_Mem); +#else + long long _As_bytes = __iso_volatile_load64(_Mem); +#endif + _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(static_cast(_Order)) return reinterpret_cast<_TVal&>(_As_bytes); } @@ -3078,6 +2959,13 @@ _STD_END #undef _CMPXCHG_MASK_OUT_PADDING_BITS #undef _ATOMIC_CHOOSE_INTRINSIC +#undef _ATOMIC_LOAD_VERIFY_MEMORY_ORDER +#undef _ATOMIC_STORE_PREFIX +#undef _ATOMIC_STORE_SEQ_CST_ARM +#undef _ATOMIC_STORE_SEQ_CST_INTEL +#undef _ATOMIC_STORE_32_SEQ_CST_INTEL +#undef _ATOMIC_STORE_SEQ_CST +#undef _ATOMIC_STORE_32_SEQ_CST #undef _ATOMIC_HAS_DCAS #undef _STD_COMPARE_EXCHANGE_128 From 34cb3b04e5a56bbfb5ef581e85088672a23725fe Mon Sep 17 00:00:00 2001 From: Charles Barto Date: Wed, 6 Jul 2022 11:52:23 -0700 Subject: [PATCH 09/17] Use the _Atomic enumerators for _ATOMIC_CHOOSE_INTRINSIC, and eliminate the C++ version of _Check_memory_order. --- stl/inc/atomic | 159 ++++++++++++++++++++++++------------------------- 1 file changed, 77 insertions(+), 82 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 8b73afa53a..db2f30ea34 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -112,24 +112,24 @@ extern "C" inline void _Check_memory_order(const int _Order) { _Result = _Intrinsic(__VA_ARGS__) #elif defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) #define _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _Intrinsic, ...) \ - switch (_Order) { \ - case memory_order_relaxed: \ - _Result = _INTRIN_RELAXED(_Intrinsic)(__VA_ARGS__); \ - break; \ - case memory_order_consume: \ - case memory_order_acquire: \ - _Result = _INTRIN_ACQUIRE(_Intrinsic)(__VA_ARGS__); \ - break; \ - case memory_order_release: \ - _Result = _INTRIN_RELEASE(_Intrinsic)(__VA_ARGS__); \ - break; \ - default: \ - _INVALID_MEMORY_ORDER; \ - /* [[fallthrough]]; */ \ - case memory_order_acq_rel: \ - case memory_order_seq_cst: \ - _Result = _Intrinsic(__VA_ARGS__); \ - break; \ + switch (_Order) { \ + case _Atomic_memory_order_relaxed: \ + _Result = _INTRIN_RELAXED(_Intrinsic)(__VA_ARGS__); \ + break; \ + case _Atomic_memory_order_consume: \ + case _Atomic_memory_order_acquire: \ + _Result = _INTRIN_ACQUIRE(_Intrinsic)(__VA_ARGS__); \ + break; \ + case _Atomic_memory_order_release: \ + _Result = _INTRIN_RELEASE(_Intrinsic)(__VA_ARGS__); \ + break; \ + default: \ + _INVALID_MEMORY_ORDER; \ + /* [[fallthrough]]; */ \ + case _Atomic_memory_order_acq_rel: \ + case _Atomic_memory_order_seq_cst: \ + _Result = _Intrinsic(__VA_ARGS__); \ + break; \ } #endif // hardware @@ -271,11 +271,6 @@ _Ty kill_dependency(_Ty _Arg) noexcept { // "magic" template that kills dependen return _Arg; } -inline void _Check_memory_order(const memory_order _Order) noexcept { - // check that _Order is a valid memory_order - ::_Check_memory_order(static_cast(_Order)); -} - inline void _Check_store_memory_order(const memory_order _Order) noexcept { switch (_Order) { case memory_order_relaxed: @@ -729,8 +724,8 @@ struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order char _As_bytes; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _As_bytes, _InterlockedExchange8, _Atomic_address_as(_Storage), - _Atomic_reinterpret_as(_Value)); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _As_bytes, _InterlockedExchange8, + _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Value)); return reinterpret_cast<_TVal&>(_As_bytes); } @@ -745,7 +740,7 @@ struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics const char _Mask_val = _Atomic_reinterpret_as(_Mask._Ref()); for (;;) { - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange8, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange8, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); if (_Prev_bytes == _Expected_bytes) { return true; @@ -759,8 +754,8 @@ struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics } } #endif // _CMPXCHG_MASK_OUT_PADDING_BITS - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange8, _Atomic_address_as(_Storage), - _Atomic_reinterpret_as(_Desired), _Expected_bytes); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange8, + _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); if (_Prev_bytes == _Expected_bytes) { return true; } @@ -832,8 +827,8 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order short _As_bytes; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _As_bytes, _InterlockedExchange16, _Atomic_address_as(_Storage), - _Atomic_reinterpret_as(_Value)); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _As_bytes, _InterlockedExchange16, + _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Value)); return reinterpret_cast<_TVal&>(_As_bytes); } @@ -847,7 +842,7 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics const short _Mask_val = _Atomic_reinterpret_as(_Mask._Ref()); for (;;) { - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange16, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange16, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); if (_Prev_bytes == _Expected_bytes) { return true; @@ -861,7 +856,7 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics } } #endif // _CMPXCHG_MASK_OUT_PADDING_BITS - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange16, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange16, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); if (_Prev_bytes == _Expected_bytes) { return true; @@ -934,8 +929,8 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order long _As_bytes; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _As_bytes, _InterlockedExchange, _Atomic_address_as(_Storage), - _Atomic_reinterpret_as(_Value)); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _As_bytes, _InterlockedExchange, + _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Value)); return reinterpret_cast<_TVal&>(_As_bytes); } @@ -949,7 +944,7 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics const long _Mask_val = _Atomic_reinterpret_as(_Mask); for (;;) { - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); if (_Prev_bytes == _Expected_bytes) { return true; @@ -963,8 +958,8 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics } } #endif // _CMPXCHG_MASK_OUT_PADDING_BITS - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange, _Atomic_address_as(_Storage), - _Atomic_reinterpret_as(_Desired), _Expected_bytes); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange, + _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); if (_Prev_bytes == _Expected_bytes) { return true; } @@ -1054,8 +1049,8 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order long long _As_bytes; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _As_bytes, _InterlockedExchange64, _Atomic_address_as(_Storage), - _Atomic_reinterpret_as(_Value)); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _As_bytes, _InterlockedExchange64, + _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Value)); return reinterpret_cast<_TVal&>(_As_bytes); } #endif // _M_IX86 @@ -1071,7 +1066,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics const long long _Mask_val = _Atomic_reinterpret_as(_Mask); for (;;) { - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange64, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange64, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); if (_Prev_bytes == _Expected_bytes) { @@ -1086,7 +1081,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics } } #endif // _CMPXCHG_MASK_OUT_PADDING_BITS - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Prev_bytes, _InterlockedCompareExchange64, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange64, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); if (_Prev_bytes == _Expected_bytes) { return true; @@ -1202,7 +1197,7 @@ struct _Atomic_storage<_Ty&, 16> { // lock-free using 16-byte intrinsics _CSTD memcpy(&_Mask_val, _Mask._Ptr(), sizeof(_TVal)); for (;;) { #if defined(_M_ARM64) || defined(_M_ARM64EC) - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedCompareExchange128, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedCompareExchange128, _Atomic_address_as(_Storage), _Desired_bytes._High, _Desired_bytes._Low, &_Expected_temp._Low); #else // ^^^ _M_ARM64, _M_ARM64EC / _M_X64 vvv @@ -1228,7 +1223,7 @@ struct _Atomic_storage<_Ty&, 16> { // lock-free using 16-byte intrinsics } #endif // _CMPXCHG_MASK_OUT_PADDING_BITS #if defined(_M_ARM64) || defined(_M_ARM64EC) - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedCompareExchange128, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedCompareExchange128, _Atomic_address_as(_Storage), _Desired_bytes._High, _Desired_bytes._Low, &_Expected_temp._Low); #else // ^^^ _M_ARM64, _M_ARM64EC / _M_X64 vvv (void) _Order; @@ -1303,29 +1298,29 @@ struct _Atomic_integral<_Ty, 1> : _Atomic_storage<_Ty> { // atomic integral oper _TVal fetch_add(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { char _Result; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedExchangeAdd8, _Atomic_address_as(this->_Storage), - static_cast(_Operand)); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd8, + _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_and(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { char _Result; - _ATOMIC_CHOOSE_INTRINSIC( - _Order, _Result, _InterlockedAnd8, _Atomic_address_as(this->_Storage), static_cast(_Operand)); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedAnd8, + _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_or(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { char _Result; - _ATOMIC_CHOOSE_INTRINSIC( - _Order, _Result, _InterlockedOr8, _Atomic_address_as(this->_Storage), static_cast(_Operand)); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedOr8, + _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_xor(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { char _Result; - _ATOMIC_CHOOSE_INTRINSIC( - _Order, _Result, _InterlockedXor8, _Atomic_address_as(this->_Storage), static_cast(_Operand)); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedXor8, + _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } @@ -1361,29 +1356,29 @@ struct _Atomic_integral<_Ty, 2> : _Atomic_storage<_Ty> { // atomic integral oper _TVal fetch_add(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { short _Result; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedExchangeAdd16, _Atomic_address_as(this->_Storage), - static_cast(_Operand)); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd16, + _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_and(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { short _Result; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedAnd16, _Atomic_address_as(this->_Storage), - static_cast(_Operand)); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedAnd16, + _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_or(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { short _Result; - _ATOMIC_CHOOSE_INTRINSIC( - _Order, _Result, _InterlockedOr16, _Atomic_address_as(this->_Storage), static_cast(_Operand)); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedOr16, + _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_xor(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { short _Result; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedXor16, _Atomic_address_as(this->_Storage), - static_cast(_Operand)); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedXor16, + _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } @@ -1419,29 +1414,29 @@ struct _Atomic_integral<_Ty, 4> : _Atomic_storage<_Ty> { // atomic integral oper _TVal fetch_add(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long _Result; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedExchangeAdd, _Atomic_address_as(this->_Storage), - static_cast(_Operand)); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd, + _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_and(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long _Result; - _ATOMIC_CHOOSE_INTRINSIC( - _Order, _Result, _InterlockedAnd, _Atomic_address_as(this->_Storage), static_cast(_Operand)); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedAnd, + _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_or(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long _Result; - _ATOMIC_CHOOSE_INTRINSIC( - _Order, _Result, _InterlockedOr, _Atomic_address_as(this->_Storage), static_cast(_Operand)); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedOr, + _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_xor(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long _Result; - _ATOMIC_CHOOSE_INTRINSIC( - _Order, _Result, _InterlockedXor, _Atomic_address_as(this->_Storage), static_cast(_Operand)); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedXor, + _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } @@ -1531,29 +1526,29 @@ struct _Atomic_integral<_Ty, 8> : _Atomic_storage<_Ty> { // atomic integral oper #else // ^^^ _M_IX86 / !_M_IX86 vvv _TVal fetch_add(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedExchangeAdd64, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd64, _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_and(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedAnd64, _Atomic_address_as(this->_Storage), - static_cast(_Operand)); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedAnd64, + _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_or(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedOr64, _Atomic_address_as(this->_Storage), - static_cast(_Operand)); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedOr64, + _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_xor(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; - _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _InterlockedXor64, _Atomic_address_as(this->_Storage), - static_cast(_Operand)); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedXor64, + _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } @@ -1920,11 +1915,11 @@ struct _Atomic_pointer : _Atomic_storage<_Ty> { static_cast(static_cast(_Diff) * sizeof(remove_pointer_t<_Ty>)); ptrdiff_t _Result; #if defined(_M_IX86) || defined(_M_ARM) - _ATOMIC_CHOOSE_INTRINSIC( - _Order, _Result, _InterlockedExchangeAdd, _Atomic_address_as(this->_Storage), _Shift_bytes); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd, + _Atomic_address_as(this->_Storage), _Shift_bytes); #else // ^^^ 32 bits / 64 bits vvv - _ATOMIC_CHOOSE_INTRINSIC( - _Order, _Result, _InterlockedExchangeAdd64, _Atomic_address_as(this->_Storage), _Shift_bytes); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd64, + _Atomic_address_as(this->_Storage), _Shift_bytes); #endif // hardware return reinterpret_cast<_Ty>(_Result); } @@ -2017,11 +2012,11 @@ struct _Atomic_pointer<_Ty&> : _Atomic_storage<_Ty&> { static_cast(static_cast(_Diff) * sizeof(remove_pointer_t<_Ty>)); ptrdiff_t _Result; #if defined(_M_IX86) || defined(_M_ARM) - _ATOMIC_CHOOSE_INTRINSIC( - _Order, _Result, _InterlockedExchangeAdd, _Atomic_address_as(this->_Storage), _Shift_bytes); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd, + _Atomic_address_as(this->_Storage), _Shift_bytes); #else // ^^^ 32 bits / 64 bits vvv - _ATOMIC_CHOOSE_INTRINSIC( - _Order, _Result, _InterlockedExchangeAdd64, _Atomic_address_as(this->_Storage), _Shift_bytes); + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd64, + _Atomic_address_as(this->_Storage), _Shift_bytes); #endif // hardware return reinterpret_cast<_Ty>(_Result); } From 469d731420e076c211b236c115d7a7adc8caa50a Mon Sep 17 00:00:00 2001 From: Charles Barto Date: Wed, 6 Jul 2022 13:46:01 -0700 Subject: [PATCH 10/17] make cast to an integer for _Check_memory_order --- stl/inc/atomic | 42 +++++++++++++++++++++--------------------- stl/inc/memory | 8 ++++---- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index db2f30ea34..d507b1f6bd 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -112,24 +112,24 @@ extern "C" inline void _Check_memory_order(const int _Order) { _Result = _Intrinsic(__VA_ARGS__) #elif defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) #define _ATOMIC_CHOOSE_INTRINSIC(_Order, _Result, _Intrinsic, ...) \ - switch (_Order) { \ - case _Atomic_memory_order_relaxed: \ - _Result = _INTRIN_RELAXED(_Intrinsic)(__VA_ARGS__); \ - break; \ - case _Atomic_memory_order_consume: \ - case _Atomic_memory_order_acquire: \ - _Result = _INTRIN_ACQUIRE(_Intrinsic)(__VA_ARGS__); \ - break; \ - case _Atomic_memory_order_release: \ - _Result = _INTRIN_RELEASE(_Intrinsic)(__VA_ARGS__); \ - break; \ - default: \ - _INVALID_MEMORY_ORDER; \ - /* [[fallthrough]]; */ \ - case _Atomic_memory_order_acq_rel: \ - case _Atomic_memory_order_seq_cst: \ - _Result = _Intrinsic(__VA_ARGS__); \ - break; \ + switch (_Order) { \ + case _Atomic_memory_order_relaxed: \ + _Result = _INTRIN_RELAXED(_Intrinsic)(__VA_ARGS__); \ + break; \ + case _Atomic_memory_order_consume: \ + case _Atomic_memory_order_acquire: \ + _Result = _INTRIN_ACQUIRE(_Intrinsic)(__VA_ARGS__); \ + break; \ + case _Atomic_memory_order_release: \ + _Result = _INTRIN_RELEASE(_Intrinsic)(__VA_ARGS__); \ + break; \ + default: \ + _INVALID_MEMORY_ORDER; \ + /* [[fallthrough]]; */ \ + case _Atomic_memory_order_acq_rel: \ + case _Atomic_memory_order_seq_cst: \ + _Result = _Intrinsic(__VA_ARGS__); \ + break; \ } #endif // hardware @@ -330,7 +330,7 @@ _NODISCARD inline memory_order _Combine_cas_memory_orders( {memory_order_seq_cst, memory_order_seq_cst, memory_order_seq_cst, memory_order_seq_cst, memory_order_seq_cst, memory_order_seq_cst}}; - _Check_memory_order(_Success); + _Check_memory_order(static_cast(_Success)); _Check_load_memory_order(_Failure); return _Combined_memory_orders[static_cast(_Success)][static_cast(_Failure)]; } @@ -574,7 +574,7 @@ struct _Atomic_storage { _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange _Value with _Storage with sequential consistency - _Check_memory_order(_Order); + _Check_memory_order(static_cast(_Order)); _Guard _Lock{_Spinlock}; _TVal _Result(_Storage); _Storage = _Value; @@ -583,7 +583,7 @@ struct _Atomic_storage { bool compare_exchange_strong(_TVal& _Expected, const _TVal _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with sequential consistency, plain - _Check_memory_order(_Order); + _Check_memory_order(static_cast(_Order)); const auto _Storage_ptr = _STD addressof(_Storage); const auto _Expected_ptr = _STD addressof(_Expected); bool _Result; diff --git a/stl/inc/memory b/stl/inc/memory index 72140847c5..8b444de0a4 100644 --- a/stl/inc/memory +++ b/stl/inc/memory @@ -3917,7 +3917,7 @@ public: } shared_ptr<_Ty> exchange(shared_ptr<_Ty> _Value, const memory_order _Order = memory_order_seq_cst) noexcept { - _Check_memory_order(_Order); + _Check_memory_order(static_cast(_Order)); shared_ptr<_Ty> _Result; _Result._Rep = this->_Repptr._Lock_and_load(); _Result._Ptr = this->_Ptr.load(memory_order_relaxed); @@ -3945,7 +3945,7 @@ public: bool compare_exchange_strong(shared_ptr<_Ty>& _Expected, shared_ptr<_Ty> _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { - _Check_memory_order(_Order); + _Check_memory_order(static_cast(_Order)); auto _Rep = this->_Repptr._Lock_and_load(); if (this->_Ptr.load(memory_order_relaxed) == _Expected._Ptr && _Rep == _Expected._Rep) { remove_extent_t<_Ty>* const _Tmp = _Desired._Ptr; @@ -4036,7 +4036,7 @@ public: } weak_ptr<_Ty> exchange(weak_ptr<_Ty> _Value, const memory_order _Order = memory_order_seq_cst) noexcept { - _Check_memory_order(_Order); + _Check_memory_order(static_cast(_Order)); weak_ptr<_Ty> _Result; _Result._Rep = this->_Repptr._Lock_and_load(); _Result._Ptr = this->_Ptr.load(memory_order_relaxed); @@ -4064,7 +4064,7 @@ public: bool compare_exchange_strong( weak_ptr<_Ty>& _Expected, weak_ptr<_Ty> _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { - _Check_memory_order(_Order); + _Check_memory_order(static_cast(_Order)); auto _Rep = this->_Repptr._Lock_and_load(); if (this->_Ptr.load(memory_order_relaxed) == _Expected._Ptr && _Rep == _Expected._Rep) { remove_extent_t<_Ty>* const _Tmp = _Desired._Ptr; From f30c8bd973f6529c58898694bf060703cdf1e1ca Mon Sep 17 00:00:00 2001 From: Charles Barto Date: Wed, 6 Jul 2022 13:48:30 -0700 Subject: [PATCH 11/17] Remove _Load_barrier --- stl/inc/atomic | 7 ------- 1 file changed, 7 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index d507b1f6bd..0d0005e903 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -350,13 +350,6 @@ _NODISCARD _Integral _Atomic_reinterpret_as(const _Ty& _Source) noexcept { } } -inline void _Load_barrier(const memory_order _Order) noexcept { // implement memory barrier for atomic load functions - _Check_load_memory_order(_Order); - if (_Order != memory_order_relaxed) { - _Compiler_or_memory_barrier(); - } -} - #if 1 // TRANSITION, ABI template struct _Atomic_padded { From 0d97b73a5aae756ad5ebb037ecc6cf10c3d5b5fa Mon Sep 17 00:00:00 2001 From: Charles Barto Date: Wed, 6 Jul 2022 15:35:15 -0700 Subject: [PATCH 12/17] x86 needs to use a fence for 64-bit stores, this was accidentially broken --- stl/inc/atomic | 63 +++++++++++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 24 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 0d0005e903..de09d37bd5 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -176,13 +176,50 @@ extern "C" inline void _Check_memory_order(const int _Order) { #define _ATOMIC_STORE_32_SEQ_CST_INTEL(_Ptr, _Desired) \ (void) _InterlockedExchange(reinterpret_cast(_Ptr), static_cast(_Desired)); +#define _ATOMIC_STORE_64_SEQ_CST_IX86(_Ptr, _Desired) \ + _Compiler_barrier(); \ + __iso_volatile_store64((_Ptr), (_Desired)); \ + _Atomic_thread_fence(_Atomic_memory_order_seq_cst); + #if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) #define _ATOMIC_STORE_SEQ_CST(_Width, _Ptr, _Desired) _ATOMIC_STORE_SEQ_CST_ARM((_Width), (_Ptr), (_Desired)) #define _ATOMIC_STORE_32_SEQ_CST(_Ptr, _Desired) _ATOMIC_STORE_SEQ_CST_ARM(32, (_Ptr), (_Desired)) +#define _ATOMIC_STORE_64_SEQ_CST(_Ptr, _Desired) _ATOMIC_STORE_SEQ_CST_ARM(64, (_Ptr), (_Desired)) #else // ^^^ ARM32/ARM64/ARM64EC hardware / x86/x64 hardware vvv #define _ATOMIC_STORE_SEQ_CST(_Width, _Ptr, _Desired) _ATOMIC_STORE_SEQ_CST_INTEL(_Width, (_Ptr), (_Desired)) #define _ATOMIC_STORE_32_SEQ_CST(_Ptr, _Desired) _ATOMIC_STORE_32_SEQ_CST_INTEL((_Ptr), (_Desired)) +#ifdef _M_IX86 +#define _ATOMIC_STORE_64_SEQ_CST(_Ptr, _Desired) _ATOMIC_STORE_64_SEQ_CST_IX86((_Ptr), (_Desired)) +#else // ^^^ x86 / x64 vvv +#define _ATOMIC_STORE_64_SEQ_CST(_Ptr, _Desired) _ATOMIC_STORE_SEQ_CST_INTEL(64, (_Ptr), (_Desired)) +#endif // x86/x64 #endif // hardware + +extern "C" inline void _Atomic_thread_fence(int _Order) { + if (_Order == _Atomic_memory_order_relaxed) { + return; + } + +#if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC)) + _Compiler_barrier(); + if (_Order == _Atomic_memory_order_seq_cst) { + volatile long _Guard; // Not initialized to avoid an unnecessary operation; the value does not matter + + // _mm_mfence could have been used, but it is not supported on older x86 CPUs and is slower on some recent CPUs. + // The memory fence provided by interlocked operations has some exceptions, but this is fine: + // std::atomic_thread_fence works with respect to other atomics only; it may not be a full fence for all ops. +#pragma warning(suppress : 6001) // "Using uninitialized memory '_Guard'" +#pragma warning(suppress : 28113) // "Accessing a local variable _Guard via an Interlocked function: This is an unusual + // usage which could be reconsidered." + (void) _InterlockedIncrement(&_Guard); + _Compiler_barrier(); + } +#elif defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) + _Memory_barrier(); +#else // ^^^ ARM32/ARM64/ARM64EC / unsupported hardware vvv +#error Unsupported hardware +#endif // unsupported hardware +} // End of code shared with vcruntime _EXTERN_C @@ -235,29 +272,7 @@ inline constexpr bool _Might_have_non_value_bits = #endif // _CMPXCHG_MASK_OUT_PADDING_BITS extern "C" inline void atomic_thread_fence(const memory_order _Order) noexcept { - if (_Order == memory_order_relaxed) { - return; - } - -#if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC)) - _Compiler_barrier(); - if (_Order == memory_order_seq_cst) { - volatile long _Guard; // Not initialized to avoid an unnecessary operation; the value does not matter - - // _mm_mfence could have been used, but it is not supported on older x86 CPUs and is slower on some recent CPUs. - // The memory fence provided by interlocked operations has some exceptions, but this is fine: - // std::atomic_thread_fence works with respect to other atomics only; it may not be a full fence for all ops. -#pragma warning(suppress : 6001) // "Using uninitialized memory '_Guard'" -#pragma warning(suppress : 28113) // "Accessing a local variable _Guard via an Interlocked function: This is an unusual - // usage which could be reconsidered." - (void) _InterlockedIncrement(&_Guard); - _Compiler_barrier(); - } -#elif defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) - _Memory_barrier(); -#else // ^^^ ARM32/ARM64/ARM64EC / unsupported hardware vvv -#error Unsupported hardware -#endif // unsupported hardware + ::_Atomic_thread_fence(static_cast(_Order)); } extern "C" inline void atomic_signal_fence(const memory_order _Order) noexcept { @@ -993,7 +1008,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics void store(const _TVal _Value) noexcept { // store with sequential consistency const auto _Mem = _Atomic_address_as(_Storage); const long long _As_bytes = _Atomic_reinterpret_as(_Value); - _ATOMIC_STORE_SEQ_CST(64, _Mem, _As_bytes); + _ATOMIC_STORE_64_SEQ_CST(_Mem, _As_bytes); } void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order From 5e8bf07e28cae96731ac436e83acc3fa507b01c9 Mon Sep 17 00:00:00 2001 From: Charles Barto Date: Wed, 6 Jul 2022 16:57:36 -0700 Subject: [PATCH 13/17] correctly forward macro parameters, and spell _Memory_barrier with fewer rs --- stl/inc/atomic | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index de09d37bd5..d6cd11e9ac 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -169,7 +169,7 @@ extern "C" inline void _Check_memory_order(const int _Order) { #define _ATOMIC_STORE_SEQ_CST_ARM(_Width, _Ptr, _Desired) \ - _Memory_barrrier(); \ + _Memory_barrier(); \ __iso_volatile_store##_Width((_Ptr), (_Desired)); \ _Memory_barrier(); #define _ATOMIC_STORE_SEQ_CST_INTEL(_Width, _Ptr, _Desired) (void) _InterlockedExchange##_Width((_Ptr), (_Desired)); @@ -182,7 +182,7 @@ extern "C" inline void _Check_memory_order(const int _Order) { _Atomic_thread_fence(_Atomic_memory_order_seq_cst); #if defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) -#define _ATOMIC_STORE_SEQ_CST(_Width, _Ptr, _Desired) _ATOMIC_STORE_SEQ_CST_ARM((_Width), (_Ptr), (_Desired)) +#define _ATOMIC_STORE_SEQ_CST(_Width, _Ptr, _Desired) _ATOMIC_STORE_SEQ_CST_ARM(_Width, (_Ptr), (_Desired)) #define _ATOMIC_STORE_32_SEQ_CST(_Ptr, _Desired) _ATOMIC_STORE_SEQ_CST_ARM(32, (_Ptr), (_Desired)) #define _ATOMIC_STORE_64_SEQ_CST(_Ptr, _Desired) _ATOMIC_STORE_SEQ_CST_ARM(64, (_Ptr), (_Desired)) #else // ^^^ ARM32/ARM64/ARM64EC hardware / x86/x64 hardware vvv From f547ee1cdcd067cc563e00b09e660de778b6da8a Mon Sep 17 00:00:00 2001 From: Charles Barto Date: Tue, 12 Jul 2022 16:07:32 -0700 Subject: [PATCH 14/17] get rid of vendor specific terms. --- stl/inc/atomic | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index d6cd11e9ac..ed968f2417 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -134,7 +134,7 @@ extern "C" inline void _Check_memory_order(const int _Order) { #endif // hardware // note: these macros are _not_ always safe to use with a trailing semicolon, -// we avoid wrapping them in do {} while(0) because MSVC generates code for such loops +// we avoid wrapping them in do {} while (0) because MSVC generates code for such loops // in debug mode. #define _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(_Order_var) \ switch (_Order_var) { \ @@ -172,8 +172,8 @@ extern "C" inline void _Check_memory_order(const int _Order) { _Memory_barrier(); \ __iso_volatile_store##_Width((_Ptr), (_Desired)); \ _Memory_barrier(); -#define _ATOMIC_STORE_SEQ_CST_INTEL(_Width, _Ptr, _Desired) (void) _InterlockedExchange##_Width((_Ptr), (_Desired)); -#define _ATOMIC_STORE_32_SEQ_CST_INTEL(_Ptr, _Desired) \ +#define _ATOMIC_STORE_SEQ_CST_X86_X64(_Width, _Ptr, _Desired) (void) _InterlockedExchange##_Width((_Ptr), (_Desired)); +#define _ATOMIC_STORE_32_SEQ_CST_X86_X64(_Ptr, _Desired) \ (void) _InterlockedExchange(reinterpret_cast(_Ptr), static_cast(_Desired)); #define _ATOMIC_STORE_64_SEQ_CST_IX86(_Ptr, _Desired) \ @@ -186,12 +186,12 @@ extern "C" inline void _Check_memory_order(const int _Order) { #define _ATOMIC_STORE_32_SEQ_CST(_Ptr, _Desired) _ATOMIC_STORE_SEQ_CST_ARM(32, (_Ptr), (_Desired)) #define _ATOMIC_STORE_64_SEQ_CST(_Ptr, _Desired) _ATOMIC_STORE_SEQ_CST_ARM(64, (_Ptr), (_Desired)) #else // ^^^ ARM32/ARM64/ARM64EC hardware / x86/x64 hardware vvv -#define _ATOMIC_STORE_SEQ_CST(_Width, _Ptr, _Desired) _ATOMIC_STORE_SEQ_CST_INTEL(_Width, (_Ptr), (_Desired)) -#define _ATOMIC_STORE_32_SEQ_CST(_Ptr, _Desired) _ATOMIC_STORE_32_SEQ_CST_INTEL((_Ptr), (_Desired)) +#define _ATOMIC_STORE_SEQ_CST(_Width, _Ptr, _Desired) _ATOMIC_STORE_SEQ_CST_X86_X64(_Width, (_Ptr), (_Desired)) +#define _ATOMIC_STORE_32_SEQ_CST(_Ptr, _Desired) _ATOMIC_STORE_32_SEQ_CST_X86_X64((_Ptr), (_Desired)) #ifdef _M_IX86 #define _ATOMIC_STORE_64_SEQ_CST(_Ptr, _Desired) _ATOMIC_STORE_64_SEQ_CST_IX86((_Ptr), (_Desired)) #else // ^^^ x86 / x64 vvv -#define _ATOMIC_STORE_64_SEQ_CST(_Ptr, _Desired) _ATOMIC_STORE_SEQ_CST_INTEL(64, (_Ptr), (_Desired)) +#define _ATOMIC_STORE_64_SEQ_CST(_Ptr, _Desired) _ATOMIC_STORE_SEQ_CST_X86_X64(64, (_Ptr), (_Desired)) #endif // x86/x64 #endif // hardware @@ -245,8 +245,8 @@ struct _Storage_for { // uninitialized space to store a _Ty alignas(_Ty) unsigned char _Storage[sizeof(_Ty)]; - _Storage_for() = default; - _Storage_for(const _Storage_for&) = delete; + _Storage_for() = default; + _Storage_for(const _Storage_for&) = delete; _Storage_for& operator=(const _Storage_for&) = delete; #if _CMPXCHG_MASK_OUT_PADDING_BITS @@ -517,7 +517,7 @@ public: _Atomic_lock_release(_Spinlock); } - _Atomic_lock_guard(const _Atomic_lock_guard&) = delete; + _Atomic_lock_guard(const _Atomic_lock_guard&) = delete; _Atomic_lock_guard& operator=(const _Atomic_lock_guard&) = delete; private: @@ -2098,7 +2098,7 @@ public: constexpr atomic() noexcept(is_nothrow_default_constructible_v<_Ty>) : _Base() {} - atomic(const atomic&) = delete; + atomic(const atomic&) = delete; atomic& operator=(const atomic&) = delete; #if _HAS_CXX17 @@ -2906,7 +2906,7 @@ public: constexpr _Locked_pointer() noexcept : _Storage{} {} explicit _Locked_pointer(_Ty* const _Ptr) noexcept : _Storage{reinterpret_cast(_Ptr)} {} - _Locked_pointer(const _Locked_pointer&) = delete; + _Locked_pointer(const _Locked_pointer&) = delete; _Locked_pointer& operator=(const _Locked_pointer&) = delete; _NODISCARD _Ty* _Lock_and_load() noexcept { @@ -2965,8 +2965,8 @@ _STD_END #undef _ATOMIC_LOAD_VERIFY_MEMORY_ORDER #undef _ATOMIC_STORE_PREFIX #undef _ATOMIC_STORE_SEQ_CST_ARM -#undef _ATOMIC_STORE_SEQ_CST_INTEL -#undef _ATOMIC_STORE_32_SEQ_CST_INTEL +#undef _ATOMIC_STORE_SEQ_CST_X86_X64 +#undef _ATOMIC_STORE_32_SEQ_CST_X86_X64 #undef _ATOMIC_STORE_SEQ_CST #undef _ATOMIC_STORE_32_SEQ_CST #undef _ATOMIC_HAS_DCAS From 560bfc4b51b9f3129a8728f9c76dde819dc11f1e Mon Sep 17 00:00:00 2001 From: Charles Barto Date: Tue, 12 Jul 2022 16:47:22 -0700 Subject: [PATCH 15/17] undef the 64-bit atomic store macros --- stl/inc/atomic | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index ed968f2417..3d44073cee 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -88,7 +88,7 @@ enum { #endif // _DEBUG #endif // _INVALID_MEMORY_ORDER -extern "C" inline void _Check_memory_order(const int _Order) { +extern "C" inline void _Check_memory_order(const unsigned int _Order) { if (_Order > _Atomic_memory_order_seq_cst) { _INVALID_MEMORY_ORDER; } @@ -2969,6 +2969,8 @@ _STD_END #undef _ATOMIC_STORE_32_SEQ_CST_X86_X64 #undef _ATOMIC_STORE_SEQ_CST #undef _ATOMIC_STORE_32_SEQ_CST +#undef _ATOMIC_STORE_64_SEQ_CST +#undef _ATOMIC_STORE_64_SEQ_CST_IX86 #undef _ATOMIC_HAS_DCAS #undef _STD_COMPARE_EXCHANGE_128 From f85d16dd368dc88cb715e3dcf4e803ebcb88b1e7 Mon Sep 17 00:00:00 2001 From: Charles Barto Date: Wed, 13 Jul 2022 15:46:15 -0700 Subject: [PATCH 16/17] revert clang format damage --- stl/inc/atomic | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index 3d44073cee..f08a81692d 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -245,8 +245,8 @@ struct _Storage_for { // uninitialized space to store a _Ty alignas(_Ty) unsigned char _Storage[sizeof(_Ty)]; - _Storage_for() = default; - _Storage_for(const _Storage_for&) = delete; + _Storage_for() = default; + _Storage_for(const _Storage_for&) = delete; _Storage_for& operator=(const _Storage_for&) = delete; #if _CMPXCHG_MASK_OUT_PADDING_BITS @@ -517,7 +517,7 @@ public: _Atomic_lock_release(_Spinlock); } - _Atomic_lock_guard(const _Atomic_lock_guard&) = delete; + _Atomic_lock_guard(const _Atomic_lock_guard&) = delete; _Atomic_lock_guard& operator=(const _Atomic_lock_guard&) = delete; private: @@ -2098,7 +2098,7 @@ public: constexpr atomic() noexcept(is_nothrow_default_constructible_v<_Ty>) : _Base() {} - atomic(const atomic&) = delete; + atomic(const atomic&) = delete; atomic& operator=(const atomic&) = delete; #if _HAS_CXX17 @@ -2906,7 +2906,7 @@ public: constexpr _Locked_pointer() noexcept : _Storage{} {} explicit _Locked_pointer(_Ty* const _Ptr) noexcept : _Storage{reinterpret_cast(_Ptr)} {} - _Locked_pointer(const _Locked_pointer&) = delete; + _Locked_pointer(const _Locked_pointer&) = delete; _Locked_pointer& operator=(const _Locked_pointer&) = delete; _NODISCARD _Ty* _Lock_and_load() noexcept { From 77be9c3b12cce3a6994004b9f3ce2f5517caddfe Mon Sep 17 00:00:00 2001 From: Charles Barto Date: Wed, 13 Jul 2022 18:40:23 -0700 Subject: [PATCH 17/17] add noexcept, fix casts from int to unsigned --- stl/inc/atomic | 96 +++++++++++++++++++++++++------------------------- stl/inc/memory | 8 ++--- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/stl/inc/atomic b/stl/inc/atomic index f08a81692d..d996e76935 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -88,7 +88,7 @@ enum { #endif // _DEBUG #endif // _INVALID_MEMORY_ORDER -extern "C" inline void _Check_memory_order(const unsigned int _Order) { +extern "C" inline void _Check_memory_order(const unsigned int _Order) noexcept { if (_Order > _Atomic_memory_order_seq_cst) { _INVALID_MEMORY_ORDER; } @@ -195,7 +195,7 @@ extern "C" inline void _Check_memory_order(const unsigned int _Order) { #endif // x86/x64 #endif // hardware -extern "C" inline void _Atomic_thread_fence(int _Order) { +extern "C" inline void _Atomic_thread_fence(const unsigned int _Order) noexcept { if (_Order == _Atomic_memory_order_relaxed) { return; } @@ -272,7 +272,7 @@ inline constexpr bool _Might_have_non_value_bits = #endif // _CMPXCHG_MASK_OUT_PADDING_BITS extern "C" inline void atomic_thread_fence(const memory_order _Order) noexcept { - ::_Atomic_thread_fence(static_cast(_Order)); + ::_Atomic_thread_fence(static_cast(_Order)); } extern "C" inline void atomic_signal_fence(const memory_order _Order) noexcept { @@ -345,7 +345,7 @@ _NODISCARD inline memory_order _Combine_cas_memory_orders( {memory_order_seq_cst, memory_order_seq_cst, memory_order_seq_cst, memory_order_seq_cst, memory_order_seq_cst, memory_order_seq_cst}}; - _Check_memory_order(static_cast(_Success)); + _Check_memory_order(static_cast(_Success)); _Check_load_memory_order(_Failure); return _Combined_memory_orders[static_cast(_Success)][static_cast(_Failure)]; } @@ -582,7 +582,7 @@ struct _Atomic_storage { _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange _Value with _Storage with sequential consistency - _Check_memory_order(static_cast(_Order)); + _Check_memory_order(static_cast(_Order)); _Guard _Lock{_Spinlock}; _TVal _Result(_Storage); _Storage = _Value; @@ -591,7 +591,7 @@ struct _Atomic_storage { bool compare_exchange_strong(_TVal& _Expected, const _TVal _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { // CAS with sequential consistency, plain - _Check_memory_order(static_cast(_Order)); + _Check_memory_order(static_cast(_Order)); const auto _Storage_ptr = _STD addressof(_Storage); const auto _Expected_ptr = _STD addressof(_Expected); bool _Result; @@ -707,7 +707,7 @@ struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order const auto _Mem = _Atomic_address_as(_Storage); const char _As_bytes = _Atomic_reinterpret_as(_Value); - switch (static_cast(_Order)) { + switch (static_cast(_Order)) { _ATOMIC_STORE_PREFIX(8, _Mem, _As_bytes) case _Atomic_memory_order_seq_cst: store(_Value); @@ -725,14 +725,14 @@ struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order const auto _Mem = _Atomic_address_as(_Storage); char _As_bytes = __iso_volatile_load8(_Mem); - _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(static_cast(_Order)) + _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(static_cast(_Order)) return reinterpret_cast<_TVal&>(_As_bytes); } _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order char _As_bytes; - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _As_bytes, _InterlockedExchange8, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _As_bytes, _InterlockedExchange8, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Value)); return reinterpret_cast<_TVal&>(_As_bytes); } @@ -748,7 +748,7 @@ struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics const char _Mask_val = _Atomic_reinterpret_as(_Mask._Ref()); for (;;) { - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange8, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange8, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); if (_Prev_bytes == _Expected_bytes) { return true; @@ -762,7 +762,7 @@ struct _Atomic_storage<_Ty, 1> { // lock-free using 1-byte intrinsics } } #endif // _CMPXCHG_MASK_OUT_PADDING_BITS - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange8, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange8, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); if (_Prev_bytes == _Expected_bytes) { return true; @@ -810,7 +810,7 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order const auto _Mem = _Atomic_address_as(_Storage); const short _As_bytes = _Atomic_reinterpret_as(_Value); - switch (static_cast(_Order)) { + switch (static_cast(_Order)) { _ATOMIC_STORE_PREFIX(16, _Mem, _As_bytes) case _Atomic_memory_order_seq_cst: store(_Value); @@ -828,14 +828,14 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order const auto _Mem = _Atomic_address_as(_Storage); short _As_bytes = __iso_volatile_load16(_Mem); - _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(static_cast(_Order)) + _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(static_cast(_Order)) return reinterpret_cast<_TVal&>(_As_bytes); } _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order short _As_bytes; - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _As_bytes, _InterlockedExchange16, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _As_bytes, _InterlockedExchange16, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Value)); return reinterpret_cast<_TVal&>(_As_bytes); } @@ -850,7 +850,7 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics const short _Mask_val = _Atomic_reinterpret_as(_Mask._Ref()); for (;;) { - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange16, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange16, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); if (_Prev_bytes == _Expected_bytes) { return true; @@ -864,7 +864,7 @@ struct _Atomic_storage<_Ty, 2> { // lock-free using 2-byte intrinsics } } #endif // _CMPXCHG_MASK_OUT_PADDING_BITS - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange16, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange16, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); if (_Prev_bytes == _Expected_bytes) { return true; @@ -912,7 +912,7 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order const auto _Mem = _Atomic_address_as(_Storage); const int _As_bytes = _Atomic_reinterpret_as(_Value); - switch (static_cast(_Order)) { + switch (static_cast(_Order)) { _ATOMIC_STORE_PREFIX(32, _Mem, _As_bytes) case _Atomic_memory_order_seq_cst: store(_Value); @@ -930,14 +930,14 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics _NODISCARD _TVal load(const memory_order _Order) const noexcept { // load with given memory order const auto _Mem = _Atomic_address_as(_Storage); int _As_bytes = __iso_volatile_load32(_Mem); - _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(static_cast(_Order)) + _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(static_cast(_Order)) return reinterpret_cast<_TVal&>(_As_bytes); } _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order long _As_bytes; - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _As_bytes, _InterlockedExchange, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _As_bytes, _InterlockedExchange, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Value)); return reinterpret_cast<_TVal&>(_As_bytes); } @@ -952,7 +952,7 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics const long _Mask_val = _Atomic_reinterpret_as(_Mask); for (;;) { - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); if (_Prev_bytes == _Expected_bytes) { return true; @@ -966,7 +966,7 @@ struct _Atomic_storage<_Ty, 4> { // lock-free using 4-byte intrinsics } } #endif // _CMPXCHG_MASK_OUT_PADDING_BITS - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); if (_Prev_bytes == _Expected_bytes) { return true; @@ -1014,7 +1014,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics void store(const _TVal _Value, const memory_order _Order) noexcept { // store with given memory order const auto _Mem = _Atomic_address_as(_Storage); const long long _As_bytes = _Atomic_reinterpret_as(_Value); - switch (static_cast(_Order)) { + switch (static_cast(_Order)) { _ATOMIC_STORE_PREFIX(64, _Mem, _As_bytes) case _Atomic_memory_order_seq_cst: store(_Value); @@ -1040,7 +1040,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics #else long long _As_bytes = __iso_volatile_load64(_Mem); #endif - _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(static_cast(_Order)) + _ATOMIC_LOAD_VERIFY_MEMORY_ORDER(static_cast(_Order)) return reinterpret_cast<_TVal&>(_As_bytes); } @@ -1057,7 +1057,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics _TVal exchange(const _TVal _Value, const memory_order _Order = memory_order_seq_cst) noexcept { // exchange with given memory order long long _As_bytes; - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _As_bytes, _InterlockedExchange64, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _As_bytes, _InterlockedExchange64, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Value)); return reinterpret_cast<_TVal&>(_As_bytes); } @@ -1074,7 +1074,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics const long long _Mask_val = _Atomic_reinterpret_as(_Mask); for (;;) { - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange64, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange64, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); if (_Prev_bytes == _Expected_bytes) { @@ -1089,7 +1089,7 @@ struct _Atomic_storage<_Ty, 8> { // lock-free using 8-byte intrinsics } } #endif // _CMPXCHG_MASK_OUT_PADDING_BITS - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange64, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Prev_bytes, _InterlockedCompareExchange64, _Atomic_address_as(_Storage), _Atomic_reinterpret_as(_Desired), _Expected_bytes); if (_Prev_bytes == _Expected_bytes) { return true; @@ -1205,7 +1205,7 @@ struct _Atomic_storage<_Ty&, 16> { // lock-free using 16-byte intrinsics _CSTD memcpy(&_Mask_val, _Mask._Ptr(), sizeof(_TVal)); for (;;) { #if defined(_M_ARM64) || defined(_M_ARM64EC) - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedCompareExchange128, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedCompareExchange128, _Atomic_address_as(_Storage), _Desired_bytes._High, _Desired_bytes._Low, &_Expected_temp._Low); #else // ^^^ _M_ARM64, _M_ARM64EC / _M_X64 vvv @@ -1231,7 +1231,7 @@ struct _Atomic_storage<_Ty&, 16> { // lock-free using 16-byte intrinsics } #endif // _CMPXCHG_MASK_OUT_PADDING_BITS #if defined(_M_ARM64) || defined(_M_ARM64EC) - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedCompareExchange128, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedCompareExchange128, _Atomic_address_as(_Storage), _Desired_bytes._High, _Desired_bytes._Low, &_Expected_temp._Low); #else // ^^^ _M_ARM64, _M_ARM64EC / _M_X64 vvv (void) _Order; @@ -1306,28 +1306,28 @@ struct _Atomic_integral<_Ty, 1> : _Atomic_storage<_Ty> { // atomic integral oper _TVal fetch_add(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { char _Result; - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd8, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd8, _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_and(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { char _Result; - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedAnd8, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedAnd8, _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_or(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { char _Result; - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedOr8, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedOr8, _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_xor(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { char _Result; - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedXor8, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedXor8, _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } @@ -1364,28 +1364,28 @@ struct _Atomic_integral<_Ty, 2> : _Atomic_storage<_Ty> { // atomic integral oper _TVal fetch_add(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { short _Result; - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd16, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd16, _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_and(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { short _Result; - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedAnd16, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedAnd16, _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_or(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { short _Result; - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedOr16, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedOr16, _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_xor(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { short _Result; - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedXor16, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedXor16, _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } @@ -1422,28 +1422,28 @@ struct _Atomic_integral<_Ty, 4> : _Atomic_storage<_Ty> { // atomic integral oper _TVal fetch_add(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long _Result; - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd, _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_and(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long _Result; - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedAnd, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedAnd, _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_or(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long _Result; - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedOr, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedOr, _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_xor(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long _Result; - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedXor, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedXor, _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } @@ -1534,28 +1534,28 @@ struct _Atomic_integral<_Ty, 8> : _Atomic_storage<_Ty> { // atomic integral oper #else // ^^^ _M_IX86 / !_M_IX86 vvv _TVal fetch_add(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd64, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd64, _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_and(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedAnd64, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedAnd64, _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_or(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedOr64, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedOr64, _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } _TVal fetch_xor(const _TVal _Operand, const memory_order _Order = memory_order_seq_cst) noexcept { long long _Result; - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedXor64, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedXor64, _Atomic_address_as(this->_Storage), static_cast(_Operand)); return static_cast<_TVal>(_Result); } @@ -1923,10 +1923,10 @@ struct _Atomic_pointer : _Atomic_storage<_Ty> { static_cast(static_cast(_Diff) * sizeof(remove_pointer_t<_Ty>)); ptrdiff_t _Result; #if defined(_M_IX86) || defined(_M_ARM) - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd, _Atomic_address_as(this->_Storage), _Shift_bytes); #else // ^^^ 32 bits / 64 bits vvv - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd64, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd64, _Atomic_address_as(this->_Storage), _Shift_bytes); #endif // hardware return reinterpret_cast<_Ty>(_Result); @@ -2020,10 +2020,10 @@ struct _Atomic_pointer<_Ty&> : _Atomic_storage<_Ty&> { static_cast(static_cast(_Diff) * sizeof(remove_pointer_t<_Ty>)); ptrdiff_t _Result; #if defined(_M_IX86) || defined(_M_ARM) - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd, _Atomic_address_as(this->_Storage), _Shift_bytes); #else // ^^^ 32 bits / 64 bits vvv - _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd64, + _ATOMIC_CHOOSE_INTRINSIC(static_cast(_Order), _Result, _InterlockedExchangeAdd64, _Atomic_address_as(this->_Storage), _Shift_bytes); #endif // hardware return reinterpret_cast<_Ty>(_Result); diff --git a/stl/inc/memory b/stl/inc/memory index 8b444de0a4..0e78721b9c 100644 --- a/stl/inc/memory +++ b/stl/inc/memory @@ -3917,7 +3917,7 @@ public: } shared_ptr<_Ty> exchange(shared_ptr<_Ty> _Value, const memory_order _Order = memory_order_seq_cst) noexcept { - _Check_memory_order(static_cast(_Order)); + _Check_memory_order(static_cast(_Order)); shared_ptr<_Ty> _Result; _Result._Rep = this->_Repptr._Lock_and_load(); _Result._Ptr = this->_Ptr.load(memory_order_relaxed); @@ -3945,7 +3945,7 @@ public: bool compare_exchange_strong(shared_ptr<_Ty>& _Expected, shared_ptr<_Ty> _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { - _Check_memory_order(static_cast(_Order)); + _Check_memory_order(static_cast(_Order)); auto _Rep = this->_Repptr._Lock_and_load(); if (this->_Ptr.load(memory_order_relaxed) == _Expected._Ptr && _Rep == _Expected._Rep) { remove_extent_t<_Ty>* const _Tmp = _Desired._Ptr; @@ -4036,7 +4036,7 @@ public: } weak_ptr<_Ty> exchange(weak_ptr<_Ty> _Value, const memory_order _Order = memory_order_seq_cst) noexcept { - _Check_memory_order(static_cast(_Order)); + _Check_memory_order(static_cast(_Order)); weak_ptr<_Ty> _Result; _Result._Rep = this->_Repptr._Lock_and_load(); _Result._Ptr = this->_Ptr.load(memory_order_relaxed); @@ -4064,7 +4064,7 @@ public: bool compare_exchange_strong( weak_ptr<_Ty>& _Expected, weak_ptr<_Ty> _Desired, const memory_order _Order = memory_order_seq_cst) noexcept { - _Check_memory_order(static_cast(_Order)); + _Check_memory_order(static_cast(_Order)); auto _Rep = this->_Repptr._Lock_and_load(); if (this->_Ptr.load(memory_order_relaxed) == _Expected._Ptr && _Rep == _Expected._Rep) { remove_extent_t<_Ty>* const _Tmp = _Desired._Ptr;