diff --git a/sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp b/sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp index 283048a5c5d05..e041c86e33b3f 100644 --- a/sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp +++ b/sycl/include/sycl/ext/intel/esimd/detail/atomic_intrin.hpp @@ -16,75 +16,118 @@ namespace __ESIMD_DNS { // This function implements atomic update of pre-existing variable in the // absense of C++ 20's atomic_ref. -template Ty atomic_load(Ty *ptr) { +// __atomic_* functions support only integral types. In order to +// support floating types for certain operations like min/max, +// 'cmpxchg' operation is applied for result values using +// 'bridging' variables in integral type. +template using CmpxchgTy = __ESIMD_DNS::uint_type_t; + +template inline Ty atomic_load(Ty *ptr) { #ifdef _WIN32 // TODO: Windows will be supported soon __ESIMD_UNSUPPORTED_ON_HOST; #else - return __atomic_load(ptr, __ATOMIC_SEQ_CST); + __ESIMD_UNSUPPORTED_ON_HOST; + // TODO : Enable with unit test + /* return sycl::bit_cast(__atomic_load_n((CmpxchgTy *)ptr, + __ATOMIC_SEQ_CST)); */ #endif } -template Ty atomic_store(Ty *ptr, Ty val) { +template inline Ty atomic_store(Ty *ptr, Ty val) { #ifdef _WIN32 // TODO: Windows will be supported soon __ESIMD_UNSUPPORTED_ON_HOST; #else - __atomic_store(ptr, val, __ATOMIC_SEQ_CST); + Ty ret = atomic_load((CmpxchgTy *)ptr); + __atomic_store_n((CmpxchgTy *)ptr, val, __ATOMIC_SEQ_CST); + return ret; #endif } -template Ty atomic_add_fetch(Ty *ptr, Ty val) { +template inline Ty atomic_add(Ty *ptr, Ty val) { #ifdef _WIN32 // TODO: Windows will be supported soon __ESIMD_UNSUPPORTED_ON_HOST; #else - return __atomic_add_fetch(ptr, val, __ATOMIC_SEQ_CST); + if constexpr (std::is_integral_v) { + return __atomic_fetch_add(ptr, val, __ATOMIC_SEQ_CST); + } else { + // For Floating type + Ty _old, _new; + CmpxchgTy _old_bits, _new_bits; + do { + _old = *ptr; + _new = _old + val; + _old_bits = *(CmpxchgTy *)&_old; + _new_bits = *(CmpxchgTy *)&_new; + } while (!__atomic_compare_exchange_n((CmpxchgTy *)ptr, &_old_bits, + _new_bits, false, __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST)); + return _old; + } #endif } -template Ty atomic_sub_fetch(Ty *ptr, Ty val) { +template inline Ty atomic_sub(Ty *ptr, Ty val) { #ifdef _WIN32 // TODO: Windows will be supported soon __ESIMD_UNSUPPORTED_ON_HOST; #else - return __atomic_sub_fetch(ptr, val, __ATOMIC_SEQ_CST); + if constexpr (std::is_integral_v) { + return __atomic_fetch_sub(ptr, val, __ATOMIC_SEQ_CST); + } else { + // For Floating type + Ty _old, _new; + CmpxchgTy _old_bits, _new_bits; + do { + _old = *ptr; + _new = _old - val; + _old_bits = *(CmpxchgTy *)&_old; + _new_bits = *(CmpxchgTy *)&_new; + } while (!__atomic_compare_exchange_n((CmpxchgTy *)ptr, &_old_bits, + _new_bits, false, __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST)); + return _old; + } #endif } -template Ty atomic_and_fetch(Ty *ptr, Ty val) { +template inline Ty atomic_and(Ty *ptr, Ty val) { #ifdef _WIN32 // TODO: Windows will be supported soon __ESIMD_UNSUPPORTED_ON_HOST; #else - return __atomic_and_fetch(ptr, val, __ATOMIC_SEQ_CST); + static_assert(std::is_integral::value); + return __atomic_fetch_and(ptr, val, __ATOMIC_SEQ_CST); #endif } -template Ty atomic_or_fetch(Ty *ptr, Ty val) { +template inline Ty atomic_or(Ty *ptr, Ty val) { #ifdef _WIN32 // TODO: Windows will be supported soon __ESIMD_UNSUPPORTED_ON_HOST; #else - return __atomic_or_fetch(ptr, val, __ATOMIC_SEQ_CST); + static_assert(std::is_integral::value); + return __atomic_fetch_or(ptr, val, __ATOMIC_SEQ_CST); #endif } -template Ty atomic_xor_fetch(Ty *ptr, Ty val) { +template inline Ty atomic_xor(Ty *ptr, Ty val) { #ifdef _WIN32 // TODO: Windows will be supported soon __ESIMD_UNSUPPORTED_ON_HOST; #else - return __atomic_xor_fetch(ptr, val, __ATOMIC_SEQ_CST); + static_assert(std::is_integral::value); + return __atomic_fetch_xor(ptr, val, __ATOMIC_SEQ_CST); #endif } -template Ty atomic_min(Ty *ptr, Ty val) { +template inline Ty atomic_min(Ty *ptr, Ty val) { #ifdef _WIN32 // TODO: Windows will be supported soon __ESIMD_UNSUPPORTED_ON_HOST; #else - // TODO FIXME: fix implementation for FP types. if constexpr (std::is_integral_v) { Ty _old, _new; do { @@ -92,19 +135,28 @@ template Ty atomic_min(Ty *ptr, Ty val) { _new = std::min(_old, val); } while (!__atomic_compare_exchange_n(ptr, &_old, _new, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)); - return _new; + return _old; } else { - __ESIMD_UNSUPPORTED_ON_HOST; + Ty _old, _new; + CmpxchgTy _old_bits, _new_bits; + do { + _old = *ptr; + _new = std::min(_old, val); + _old_bits = *(CmpxchgTy *)&_old; + _new_bits = *(CmpxchgTy *)&_new; + } while (!__atomic_compare_exchange_n((CmpxchgTy *)ptr, &_old_bits, + _new_bits, false, __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST)); + return _old; } #endif } -template Ty atomic_max(Ty *ptr, Ty val) { +template inline Ty atomic_max(Ty *ptr, Ty val) { #ifdef _WIN32 // TODO: Windows will be supported soon __ESIMD_UNSUPPORTED_ON_HOST; #else - // TODO FIXME: fix implementation for FP types. if constexpr (std::is_integral_v) { Ty _old, _new; do { @@ -112,30 +164,57 @@ template Ty atomic_max(Ty *ptr, Ty val) { _new = std::max(_old, val); } while (!__atomic_compare_exchange_n(ptr, &_old, _new, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)); - return _new; + return _old; } else { - __ESIMD_UNSUPPORTED_ON_HOST; + Ty _old, _new; + CmpxchgTy _old_bits, _new_bits; + do { + _old = *ptr; + _new = std::max(_old, val); + _old_bits = *(CmpxchgTy *)&_old; + _new_bits = *(CmpxchgTy *)&_new; + } while (!__atomic_compare_exchange_n((CmpxchgTy *)(CmpxchgTy *)ptr, + &_old_bits, _new_bits, false, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)); + return _old; } #endif } -template Ty atomic_cmpxchg(Ty *ptr, Ty expected, Ty desired) { +template +inline Ty atomic_cmpxchg(Ty *ptr, Ty expected, Ty desired) { #ifdef _WIN32 // TODO: Windows will be supported soon __ESIMD_UNSUPPORTED_ON_HOST; #else - // TODO FIXME: fix implementation for FP types. if constexpr (std::is_integral_v) { - Ty _old = expected; - __atomic_compare_exchange_n(ptr, &_old, desired, false, __ATOMIC_SEQ_CST, + Ty local = expected; + __atomic_compare_exchange_n(ptr, &local, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); - return *ptr; + // if exchange occured, this means 'local=expected=*ptr'. So local + // is returned as old val + // if exchange did not occur, *ptr value compared against 'local' + // is stored in 'local'. So local is returned as old val + return local; } else { - __ESIMD_UNSUPPORTED_ON_HOST; + CmpxchgTy desired_bits = *(CmpxchgTy *)&desired; + CmpxchgTy local_bits = *(CmpxchgTy *)&expected; + __atomic_compare_exchange_n((CmpxchgTy *)ptr, &local_bits, desired_bits, + false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); + return *((Ty *)&local_bits); } #endif } +inline void atomic_fence() { +#ifdef _WIN32 + // TODO: Windows will be supported soon + __ESIMD_UNSUPPORTED_ON_HOST; +#else + __atomic_thread_fence(__ATOMIC_SEQ_CST); +#endif +} + } // namespace __ESIMD_DNS /// @endcond ESIMD_DETAIL diff --git a/sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp index b21e848b289fe..0c00f0e63c17c 100644 --- a/sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/esimd/detail/memory_intrin.hpp @@ -485,7 +485,25 @@ __esimd_svm_atomic0(__ESIMD_DNS::vector_type_t addrs, ; #else { - __ESIMD_UNSUPPORTED_ON_HOST; + __ESIMD_DNS::vector_type_t Oldval = 0; + + for (int AddrIdx = 0; AddrIdx < N; AddrIdx += 1) { + if (pred[AddrIdx] == 0) { + // Skip Oldval vector elements correpsonding to + // predicates whose value is zero + continue; + } + if constexpr (Op == __ESIMD_NS::atomic_op::load) { + Oldval[AddrIdx] = __ESIMD_DNS::atomic_load((Ty *)addrs[AddrIdx]); + } else if constexpr (Op == __ESIMD_NS::atomic_op::inc) { + Oldval[AddrIdx] = + __ESIMD_DNS::atomic_add((Ty *)addrs[AddrIdx], static_cast(1)); + } else if constexpr (Op == __ESIMD_NS::atomic_op::dec) { + Oldval[AddrIdx] = + __ESIMD_DNS::atomic_sub((Ty *)addrs[AddrIdx], static_cast(1)); + } + } + return Oldval; } #endif // __SYCL_DEVICE_ONLY__ @@ -498,23 +516,49 @@ __esimd_svm_atomic1(__ESIMD_DNS::vector_type_t addrs, ; #else { - __ESIMD_DNS::vector_type_t retv; + __ESIMD_DNS::vector_type_t Oldval; - for (int i = 0; i < N; i++) { - if (pred[i]) { - Ty *p = reinterpret_cast(addrs[i]); + for (int AddrIdx = 0; AddrIdx < N; AddrIdx++) { + if (pred[AddrIdx] == 0) { + // Skip Output vector elements correpsonding to + // predicates whose value is zero + continue; + } - switch (Op) { - case __ESIMD_NS::atomic_op::add: - retv[i] = __ESIMD_DNS::atomic_add_fetch(p, src0[i]); - break; - default: - __ESIMD_UNSUPPORTED_ON_HOST; - } + if constexpr (Op == __ESIMD_NS::atomic_op::store) { + Oldval[AddrIdx] = + __ESIMD_DNS::atomic_store((Ty *)addrs[AddrIdx], src0[AddrIdx]); + } else if constexpr ((Op == __ESIMD_NS::atomic_op::add) || + (Op == __ESIMD_NS::atomic_op::fadd)) { + Oldval[AddrIdx] = + __ESIMD_DNS::atomic_add((Ty *)addrs[AddrIdx], src0[AddrIdx]); + } else if constexpr ((Op == __ESIMD_NS::atomic_op::sub) || + (Op == __ESIMD_NS::atomic_op::fsub)) { + Oldval[AddrIdx] = + __ESIMD_DNS::atomic_sub((Ty *)addrs[AddrIdx], src0[AddrIdx]); + } else if constexpr ((Op == __ESIMD_NS::atomic_op::minsint) || + (Op == __ESIMD_NS::atomic_op::min) || + (Op == __ESIMD_NS::atomic_op::fmin)) { + Oldval[AddrIdx] = + __ESIMD_DNS::atomic_min((Ty *)addrs[AddrIdx], src0[AddrIdx]); + } else if constexpr ((Op == __ESIMD_NS::atomic_op::maxsint) || + (Op == __ESIMD_NS::atomic_op::max) || + (Op == __ESIMD_NS::atomic_op::fmax)) { + Oldval[AddrIdx] = + __ESIMD_DNS::atomic_max((Ty *)addrs[AddrIdx], src0[AddrIdx]); + } else if constexpr (Op == __ESIMD_NS::atomic_op::bit_and) { + Oldval[AddrIdx] = + __ESIMD_DNS::atomic_and((Ty *)addrs[AddrIdx], src0[AddrIdx]); + } else if constexpr (Op == __ESIMD_NS::atomic_op::bit_or) { + Oldval[AddrIdx] = + __ESIMD_DNS::atomic_or((Ty *)addrs[AddrIdx], src0[AddrIdx]); + } else if constexpr (Op == __ESIMD_NS::atomic_op::bit_xor) { + Oldval[AddrIdx] = + __ESIMD_DNS::atomic_xor((Ty *)addrs[AddrIdx], src0[AddrIdx]); } } - return retv; + return Oldval; } #endif // __SYCL_DEVICE_ONLY__ @@ -528,7 +572,20 @@ __esimd_svm_atomic2(__ESIMD_DNS::vector_type_t addrs, ; #else { - __ESIMD_UNSUPPORTED_ON_HOST; + __ESIMD_DNS::vector_type_t Oldval; + + for (int AddrIdx = 0; AddrIdx < N; AddrIdx++) { + if (pred[AddrIdx] == 0) { + // Skip Output vector elements correpsonding to + // predicates whose value is zero + continue; + } + static_assert((Op == __ESIMD_NS::atomic_op::cmpxchg) || + (Op == __ESIMD_NS::atomic_op::fcmpxchg)); + Oldval[AddrIdx] = __ESIMD_DNS::atomic_cmpxchg((Ty *)addrs[AddrIdx], + src0[AddrIdx], src1[AddrIdx]); + } + return Oldval; } #endif // __SYCL_DEVICE_ONLY__ @@ -557,7 +614,9 @@ __ESIMD_INTRIN void __esimd_fence(uint8_t cntl) ; #else { - sycl::detail::getESIMDDeviceInterface()->cm_fence_ptr(); + // CM_EMU's 'cm_fence' is NOP. Disabled. + // sycl::detail::getESIMDDeviceInterface()->cm_fence_ptr(); + __ESIMD_DNS::atomic_fence(); } #endif // __SYCL_DEVICE_ONLY__ @@ -849,7 +908,7 @@ __esimd_dword_atomic0(__ESIMD_DNS::simd_mask_storage_t pred, switch (Op) { case __ESIMD_NS::atomic_op::inc: - retv[i] = __ESIMD_DNS::atomic_add_fetch(p, 1); + retv[i] = __ESIMD_DNS::atomic_add(p, 1); break; default: __ESIMD_UNSUPPORTED_ON_HOST; diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp index e75d2e8712db4..26f8f5aff3244 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp @@ -589,11 +589,11 @@ auto __esimd_emu_lsc_xatomic_offset_access_0( assert(BaseAddr != nullptr && "Invalid BaseAddr for lsc_xatomic_operation under emulation!!"); - __ESIMD_DNS::vector_type_t()> Output = 0; + __ESIMD_DNS::vector_type_t()> Oldval = 0; for (int OffsetIdx = 0; OffsetIdx < N; OffsetIdx += 1) { if (Pred[OffsetIdx] == 0) { - // Skip Output vector elements correpsonding to + // Skip Oldval vector elements correpsonding to // predicates whose value is zero continue; } @@ -609,18 +609,20 @@ auto __esimd_emu_lsc_xatomic_offset_access_0( VecIdx += vectorIndexIncrement()) { if ((ByteDistance >= 0) && (ByteDistance < BufByteWidth)) { - Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); - if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::inc) { - __ESIMD_DNS::atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), - static_cast(1)); + if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::load) { + Oldval[VecIdx] = + __ESIMD_DNS::atomic_load((Ty *)(BaseAddr + ByteDistance)); + } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::inc) { + Oldval[VecIdx] = __ESIMD_DNS::atomic_add( + (Ty *)(BaseAddr + ByteDistance), static_cast(1)); } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::dec) { - __ESIMD_DNS::atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), - static_cast(1)); + Oldval[VecIdx] = __ESIMD_DNS::atomic_sub( + (Ty *)(BaseAddr + ByteDistance), static_cast(1)); } } } } - return Output; + return Oldval; } /// Helper function for one-source LSC-atomic operation accessing BTI @@ -638,7 +640,7 @@ auto __esimd_emu_lsc_xatomic_offset_access_1( assert(BaseAddr != nullptr && "Invalid BaseAddr for lsc_xatomic_operation under emulation!!"); - __ESIMD_DNS::vector_type_t()> Output = 0; + __ESIMD_DNS::vector_type_t()> Oldval = 0; static_assert(AddressScale == 1); static_assert(ImmOffset == 0); @@ -662,74 +664,43 @@ auto __esimd_emu_lsc_xatomic_offset_access_1( VecIdx += vectorIndexIncrement()) { if ((ByteDistance >= 0) && (ByteDistance < BufByteWidth)) { - - // Keeping original values for return - Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); - if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::store) { - __ESIMD_DNS::atomic_store((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::add) { - __ESIMD_DNS::atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::sub) { - __ESIMD_DNS::atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::smin) { - __ESIMD_DNS::atomic_min((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::smax) { - __ESIMD_DNS::atomic_max((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::umin) { - if constexpr (!__ESIMD_DNS::is_fp_type::value) { - __ESIMD_DNS::atomic_min((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::umax) { - if constexpr (!__ESIMD_DNS::is_fp_type::value) { - __ESIMD_DNS::atomic_max((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::fadd) { - if constexpr (__ESIMD_DNS::is_fp_type::value) { - __ESIMD_DNS::atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::fsub) { - if constexpr (__ESIMD_DNS::is_fp_type::value) { - __ESIMD_DNS::atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::fmin) { - if constexpr (__ESIMD_DNS::is_fp_type::value) { - __ESIMD_DNS::atomic_min((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::fmax) { - if constexpr (__ESIMD_DNS::is_fp_type::value) { - __ESIMD_DNS::atomic_max((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } + Oldval[VecIdx] = __ESIMD_DNS::atomic_store( + (Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr ((Op == __ESIMD_NS::native::lsc::atomic_op::add) || + (Op == __ESIMD_NS::native::lsc::atomic_op::fadd)) { + Oldval[VecIdx] = __ESIMD_DNS::atomic_add( + (Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr ((Op == __ESIMD_NS::native::lsc::atomic_op::sub) || + (Op == __ESIMD_NS::native::lsc::atomic_op::fsub)) { + Oldval[VecIdx] = __ESIMD_DNS::atomic_sub( + (Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr ((Op == __ESIMD_NS::native::lsc::atomic_op::smin) || + (Op == __ESIMD_NS::native::lsc::atomic_op::umin) || + (Op == __ESIMD_NS::native::lsc::atomic_op::fmin)) { + Oldval[VecIdx] = __ESIMD_DNS::atomic_min( + (Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr ((Op == __ESIMD_NS::native::lsc::atomic_op::smax) || + (Op == __ESIMD_NS::native::lsc::atomic_op::umax) || + (Op == __ESIMD_NS::native::lsc::atomic_op::fmax)) { + Oldval[VecIdx] = __ESIMD_DNS::atomic_max( + (Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::bit_and) { - // TODO : Type Check? Integral type only? - __ESIMD_DNS::atomic_and_fetch((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); + Oldval[VecIdx] = __ESIMD_DNS::atomic_and( + (Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::bit_or) { - // TODO : Type Check? Integral type only? - __ESIMD_DNS::atomic_or_fetch((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); + Oldval[VecIdx] = __ESIMD_DNS::atomic_or( + (Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::bit_xor) { - // TODO : Type Check? Integral type only? - __ESIMD_DNS::atomic_xor_fetch((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); + Oldval[VecIdx] = __ESIMD_DNS::atomic_xor( + (Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); } } } } - return Output; + return Oldval; } /// Helper function for two-source LSC-atomic operation accessing BTI @@ -748,7 +719,7 @@ auto __esimd_emu_lsc_xatomic_offset_access_2( assert(BaseAddr != nullptr && "Invalid BaseAddr for lsc_xatomic_operation under emulation!!"); - __ESIMD_DNS::vector_type_t()> Output; + __ESIMD_DNS::vector_type_t()> Oldval; static_assert(AddressScale == 1); static_assert(ImmOffset == 0); @@ -772,24 +743,19 @@ auto __esimd_emu_lsc_xatomic_offset_access_2( VecIdx += vectorIndexIncrement()) { if ((ByteDistance >= 0) && (ByteDistance < BufByteWidth)) { - - // Keeping original values for return - Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); - if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::cmpxchg) { - __ESIMD_DNS::atomic_cmpxchg((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx], src1[VecIdx]); + Oldval[VecIdx] = __ESIMD_DNS::atomic_cmpxchg( + (Ty *)(BaseAddr + ByteDistance), src0[VecIdx], src1[VecIdx]); } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::fcmpxchg) { - if constexpr (__ESIMD_DNS::is_fp_type::value) { - __ESIMD_DNS::atomic_cmpxchg((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx], src1[VecIdx]); - } + static_assert(__ESIMD_DNS::is_fp_type::value); + Oldval[VecIdx] = __ESIMD_DNS::atomic_cmpxchg( + (Ty *)(BaseAddr + ByteDistance), src0[VecIdx], src1[VecIdx]); } } } } - return Output; + return Oldval; } // End : Shared utility/helper functions for LSC support under @@ -1627,11 +1593,11 @@ __esimd_lsc_xatomic_stateless_0(__ESIMD_DNS::simd_mask_storage_t pred, static_assert(ImmOffset == 0); static_assert(DS != __ESIMD_ENS::lsc_data_size::u16u32h); - __ESIMD_DNS::vector_type_t()> Output = 0; + __ESIMD_DNS::vector_type_t()> Oldval = 0; for (int AddrIdx = 0; AddrIdx < N; AddrIdx += 1) { if (pred[AddrIdx] == 0) { - // Skip Output vector elements correpsonding to + // Skip Oldval vector elements correpsonding to // predicates whose value is zero continue; } @@ -1648,19 +1614,20 @@ __esimd_lsc_xatomic_stateless_0(__ESIMD_DNS::simd_mask_storage_t pred, ChanelIdx += 1, ByteDistance += rawAddressIncrement(), VecIdx += vectorIndexIncrement()) { - // Keeping original values for return + 'load' - Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); - + if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::load) { + Oldval[VecIdx] = + __ESIMD_DNS::atomic_load((Ty *)(BaseAddr + ByteDistance)); + } if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::inc) { - __ESIMD_DNS::atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), - static_cast(1)); + Oldval[VecIdx] = __ESIMD_DNS::atomic_add( + (Ty *)(BaseAddr + ByteDistance), static_cast(1)); } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::dec) { - __ESIMD_DNS::atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), - static_cast(1)); + Oldval[VecIdx] = __ESIMD_DNS::atomic_sub( + (Ty *)(BaseAddr + ByteDistance), static_cast(1)); } } } - return Output; + return Oldval; } #endif // __SYCL_DEVICE_ONLY__ @@ -1700,11 +1667,11 @@ __esimd_lsc_xatomic_stateless_1( static_assert(ImmOffset == 0); static_assert(DS != __ESIMD_ENS::lsc_data_size::u16u32h); - __ESIMD_DNS::vector_type_t()> Output = 0; + __ESIMD_DNS::vector_type_t()> Oldval = 0; for (int AddrIdx = 0; AddrIdx < N; AddrIdx += 1) { if (pred[AddrIdx] == 0) { - // Skip Output vector elements correpsonding to + // Skip Oldval vector elements correpsonding to // predicates whose value is zero continue; } @@ -1721,70 +1688,40 @@ __esimd_lsc_xatomic_stateless_1( ChanelIdx += 1, ByteDistance += rawAddressIncrement(), VecIdx += vectorIndexIncrement()) { - // Keeping original values for return - Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); - if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::store) { - __ESIMD_DNS::atomic_store((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::add) { - __ESIMD_DNS::atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::sub) { - __ESIMD_DNS::atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::smin) { - __ESIMD_DNS::atomic_min((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::smax) { - __ESIMD_DNS::atomic_max((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::umin) { - if constexpr (!__ESIMD_DNS::is_fp_type::value) { - __ESIMD_DNS::atomic_min((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::umax) { - if constexpr (!__ESIMD_DNS::is_fp_type::value) { - __ESIMD_DNS::atomic_max((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::fadd) { - if constexpr (__ESIMD_DNS::is_fp_type::value) { - __ESIMD_DNS::atomic_add_fetch((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::fsub) { - if constexpr (__ESIMD_DNS::is_fp_type::value) { - __ESIMD_DNS::atomic_sub_fetch((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::fmin) { - if constexpr (__ESIMD_DNS::is_fp_type::value) { - __ESIMD_DNS::atomic_min((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::fmax) { - if constexpr (__ESIMD_DNS::is_fp_type::value) { - __ESIMD_DNS::atomic_max((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); - } + Oldval[VecIdx] = __ESIMD_DNS::atomic_store( + (Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr ((Op == __ESIMD_NS::native::lsc::atomic_op::add) || + (Op == __ESIMD_NS::native::lsc::atomic_op::fadd)) { + Oldval[VecIdx] = __ESIMD_DNS::atomic_add( + (Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr ((Op == __ESIMD_NS::native::lsc::atomic_op::sub) || + (Op == __ESIMD_NS::native::lsc::atomic_op::fsub)) { + Oldval[VecIdx] = __ESIMD_DNS::atomic_sub( + (Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr ((Op == __ESIMD_NS::native::lsc::atomic_op::smin) || + (Op == __ESIMD_NS::native::lsc::atomic_op::umin) || + (Op == __ESIMD_NS::native::lsc::atomic_op::fmin)) { + Oldval[VecIdx] = __ESIMD_DNS::atomic_min( + (Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); + } else if constexpr ((Op == __ESIMD_NS::native::lsc::atomic_op::smax) || + (Op == __ESIMD_NS::native::lsc::atomic_op::umax) || + (Op == __ESIMD_NS::native::lsc::atomic_op::fmax)) { + Oldval[VecIdx] = __ESIMD_DNS::atomic_max( + (Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::bit_and) { - // TODO : Type Check? Integral type only? - __ESIMD_DNS::atomic_and_fetch((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); + Oldval[VecIdx] = __ESIMD_DNS::atomic_and( + (Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::bit_or) { - // TODO : Type Check? Integral type only? - __ESIMD_DNS::atomic_or_fetch((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); + Oldval[VecIdx] = __ESIMD_DNS::atomic_or( + (Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::bit_xor) { - // TODO : Type Check? Integral type only? - __ESIMD_DNS::atomic_xor_fetch((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx]); + Oldval[VecIdx] = __ESIMD_DNS::atomic_xor( + (Ty *)(BaseAddr + ByteDistance), src0[VecIdx]); } } } - return Output; + return Oldval; } #endif // __SYCL_DEVICE_ONLY__ @@ -1825,11 +1762,11 @@ __esimd_lsc_xatomic_stateless_2( static_assert(ImmOffset == 0); static_assert(DS != __ESIMD_ENS::lsc_data_size::u16u32h); - __ESIMD_DNS::vector_type_t()> Output = 0; + __ESIMD_DNS::vector_type_t()> Oldval = 0; for (int AddrIdx = 0; AddrIdx < N; AddrIdx += 1) { if (Pred[AddrIdx] == 0) { - // Skip Output vector elements correpsonding to + // Skip Oldval vector elements correpsonding to // predicates whose value is zero continue; } @@ -1846,21 +1783,13 @@ __esimd_lsc_xatomic_stateless_2( ChanelIdx += 1, ByteDistance += rawAddressIncrement(), VecIdx += vectorIndexIncrement()) { - // Keeping original values for return - Output[VecIdx] = *((Ty *)(BaseAddr + ByteDistance)); - - if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::cmpxchg) { - __ESIMD_DNS::atomic_cmpxchg((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx], src1[VecIdx]); - } else if constexpr (Op == __ESIMD_NS::native::lsc::atomic_op::fcmpxchg) { - if constexpr (__ESIMD_DNS::is_fp_type::value) { - __ESIMD_DNS::atomic_cmpxchg((Ty *)(BaseAddr + ByteDistance), - src0[VecIdx], src1[VecIdx]); - } - } + static_assert((Op == __ESIMD_NS::native::lsc::atomic_op::cmpxchg) || + (Op == __ESIMD_NS::native::lsc::atomic_op::fcmpxchg)); + Oldval[VecIdx] = __ESIMD_DNS::atomic_cmpxchg( + (Ty *)(BaseAddr + ByteDistance), src0[VecIdx], src1[VecIdx]); } } - return Output; + return Oldval; } #endif // __SYCL_DEVICE_ONLY__ @@ -1879,7 +1808,7 @@ __ESIMD_INTRIN void __esimd_lsc_fence(__ESIMD_DNS::simd_mask_storage_t pred) ; #else // __SYCL_DEVICE_ONLY__ { - __ESIMD_UNSUPPORTED_ON_HOST; + __ESIMD_DNS::atomic_fence(); } #endif // __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp index 1a48a65db6e54..880fa2ceb7e7a 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp @@ -1299,6 +1299,8 @@ __ESIMD_API void lsc_store2d(T *Ptr, unsigned SurfaceWidth, /// @param offsets is the zero-based offsets. /// @param pred is predicates. /// +/// @return A vector of the old values at the memory locations before the +/// update. template <__ESIMD_NS::atomic_op Op, typename T, int N, lsc_data_size DS = lsc_data_size::default_size> __ESIMD_API __ESIMD_NS::simd @@ -1336,6 +1338,8 @@ lsc_slm_atomic_update(__ESIMD_NS::simd offsets, /// @param src0 is the first atomic operand. /// @param pred is predicates. /// +/// @return A vector of the old values at the memory locations before the +/// update. template <__ESIMD_NS::atomic_op Op, typename T, int N, lsc_data_size DS = lsc_data_size::default_size> __ESIMD_API __ESIMD_NS::simd @@ -1376,6 +1380,8 @@ lsc_slm_atomic_update(__ESIMD_NS::simd offsets, /// @param src1 is the second atomic operand. /// @param pred is predicates. /// +/// @return A vector of the old values at the memory locations before the +/// update. template <__ESIMD_NS::atomic_op Op, typename T, int N, lsc_data_size DS = lsc_data_size::default_size> __ESIMD_API __ESIMD_NS::simd @@ -1551,6 +1557,8 @@ lsc_atomic_update(T *p, __ESIMD_NS::simd offsets, /// @param offsets is the zero-based offsets. /// @param pred is predicates. /// +/// @return A vector of the old values at the memory locations before the +/// update. template <__ESIMD_NS::atomic_op Op, typename T, int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, @@ -1602,6 +1610,8 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd offsets, /// @param src0 is the first atomic operand. /// @param pred is predicates. /// +/// @return A vector of the old values at the memory locations before the +/// update. template <__ESIMD_NS::atomic_op Op, typename T, int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none, @@ -1654,6 +1664,8 @@ lsc_atomic_update(AccessorTy acc, __ESIMD_NS::simd offsets, /// @param src1 is the second atomic operand. /// @param pred is predicates. /// +/// @return A vector of the old values at the memory locations before the +/// update. template <__ESIMD_NS::atomic_op Op, typename T, int N, lsc_data_size DS = lsc_data_size::default_size, cache_hint L1H = cache_hint::none, cache_hint L3H = cache_hint::none,