From 343d10ceded19849f9a2788f42070a56eee5e222 Mon Sep 17 00:00:00 2001 From: Taiki Endo Date: Thu, 19 Sep 2024 04:19:07 +0900 Subject: [PATCH] riscv32: Support 64-bit atomics (Zacas extension) --- .github/.cspell/project-dictionary.txt | 6 + .github/workflows/ci.yml | 13 +- src/cfgs.rs | 68 ++++- src/imp/atomic128/riscv64.rs | 4 +- src/imp/atomic64/README.md | 31 +++ src/imp/atomic64/macros.rs | 330 +++++++++++++++++++++++++ src/imp/atomic64/riscv32.rs | 273 ++++++++++++++++++++ src/imp/detect/common.rs | 4 +- src/imp/fallback/mod.rs | 16 +- src/imp/mod.rs | 123 ++++++++- src/tests/mod.rs | 23 +- src/utils.rs | 2 +- tools/build.sh | 4 +- 13 files changed, 859 insertions(+), 38 deletions(-) create mode 100644 src/imp/atomic64/macros.rs create mode 100644 src/imp/atomic64/riscv32.rs diff --git a/.github/.cspell/project-dictionary.txt b/.github/.cspell/project-dictionary.txt index a9fe5eab..26d2ae84 100644 --- a/.github/.cspell/project-dictionary.txt +++ b/.github/.cspell/project-dictionary.txt @@ -46,6 +46,8 @@ DWCAS elems espup exynos +fild +fistp getauxval getisax getpid @@ -70,6 +72,7 @@ ldclrpa ldclrpal ldclrpl ldiapp +ldrexd ldsetp ldsetpa ldsetpal @@ -101,6 +104,8 @@ minu mipsn miscompiles mmfr +movlps +movq mpidr mstatus mvfr @@ -150,6 +155,7 @@ stilp stlxp stpq stqcx +strexd stxp subarch subc diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dc8f8388..d83d1900 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -259,8 +259,6 @@ jobs: target: riscv64gc-unknown-linux-gnu - rust: '1.59' # LLVM 13 target: riscv64gc-unknown-linux-gnu - - rust: '1.73' # LLVM 17 (oldest version we can use experimental-zacas on this target) - target: riscv64gc-unknown-linux-gnu - rust: stable target: riscv64gc-unknown-linux-gnu - rust: nightly @@ -317,6 +315,9 @@ jobs: # TODO: LLVM bug: Undefined temporary symbol error when building std. - run: printf 'RELEASE=--release\n' >>"${GITHUB_ENV}" if: startsWith(matrix.target, 'mips-') || startsWith(matrix.target, 'mipsel-') + # for serde + - run: printf '%s\n' "RUSTFLAGS=${RUSTFLAGS} --cfg no_diagnostic_namespace" >>"${GITHUB_ENV}" + if: matrix.rust == 'nightly-2024-02-13' - run: tools/test.sh -vv ${TARGET:-} ${DOCTEST_XCOMPILE:-} ${BUILD_STD:-} ${RELEASE:-} # We test doctest only once with the default build conditions because doctest is slow. Both api-test @@ -388,21 +389,21 @@ jobs: RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-cpu=pwr8 RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-cpu=pwr8 if: startsWith(matrix.target, 'powerpc64-') - # riscv64 +zabha + # riscv +zabha - run: tools/test.sh -vv --tests ${TARGET:-} ${BUILD_STD:-} ${RELEASE:-} env: RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+zabha RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+zabha QEMU_CPU: max # TODO: cranelift doesn't support cfg(target_feature): https://github.com/rust-lang/rustc_codegen_cranelift/issues/1400 - if: startsWith(matrix.target, 'riscv64') && !contains(matrix.flags, 'codegen-backend=cranelift') - # riscv64 +experimental-zacas + if: startsWith(matrix.target, 'riscv') && !contains(matrix.flags, 'codegen-backend=cranelift') + # riscv +experimental-zacas - run: tools/test.sh -vv --tests ${TARGET:-} ${BUILD_STD:-} ${RELEASE:-} env: RUSTDOCFLAGS: ${{ env.RUSTDOCFLAGS }} -C target-feature=+experimental-zacas RUSTFLAGS: ${{ env.RUSTFLAGS }} -C target-feature=+experimental-zacas # TODO: cranelift doesn't support cfg(target_feature): https://github.com/rust-lang/rustc_codegen_cranelift/issues/1400 - if: startsWith(matrix.target, 'riscv64') && !contains(matrix.flags, 'codegen-backend=cranelift') + if: startsWith(matrix.target, 'riscv') && !contains(matrix.flags, 'codegen-backend=cranelift') # s390x z196 (arch9) - run: tools/test.sh -vv --tests ${TARGET:-} ${BUILD_STD:-} ${RELEASE:-} env: diff --git a/src/cfgs.rs b/src/cfgs.rs index b90ce262..67ae079a 100644 --- a/src/cfgs.rs +++ b/src/cfgs.rs @@ -154,6 +154,36 @@ mod atomic_32_macros { ), target_has_atomic = "64", not(any(target_pointer_width = "16", target_pointer_width = "32")), + all( + target_arch = "riscv32", + not(any(miri, portable_atomic_sanitize_thread)), + not(portable_atomic_no_asm), + any( + target_feature = "experimental-zacas", + portable_atomic_target_feature = "experimental-zacas", + // TODO(riscv) + // all( + // feature = "fallback", + // not(portable_atomic_no_outline_atomics), + // any(test, portable_atomic_outline_atomics), // TODO(riscv): currently disabled by default + // any( + // all( + // target_os = "linux", + // any( + // target_env = "gnu", + // all( + // any(target_env = "musl", target_env = "ohos"), + // not(target_feature = "crt-static"), + // ), + // portable_atomic_outline_atomics, + // ), + // ), + // target_os = "android", + // ), + // not(any(miri, portable_atomic_sanitize_thread)), + // ), + ), + ), )) )] #[macro_use] @@ -201,6 +231,36 @@ mod atomic_64_macros { ), target_has_atomic = "64", not(any(target_pointer_width = "16", target_pointer_width = "32")), + all( + target_arch = "riscv32", + not(any(miri, portable_atomic_sanitize_thread)), + not(portable_atomic_no_asm), + any( + target_feature = "experimental-zacas", + portable_atomic_target_feature = "experimental-zacas", + // TODO(riscv) + // all( + // feature = "fallback", + // not(portable_atomic_no_outline_atomics), + // any(test, portable_atomic_outline_atomics), // TODO(riscv): currently disabled by default + // any( + // all( + // target_os = "linux", + // any( + // target_env = "gnu", + // all( + // any(target_env = "musl", target_env = "ohos"), + // not(target_feature = "crt-static"), + // ), + // portable_atomic_outline_atomics, + // ), + // ), + // target_os = "android", + // ), + // not(any(miri, portable_atomic_sanitize_thread)), + // ), + ), + ), ))) )] #[macro_use] @@ -247,11 +307,11 @@ mod atomic_64_macros { any( target_feature = "experimental-zacas", portable_atomic_target_feature = "experimental-zacas", - // TODO(riscv64) + // TODO(riscv) // all( // feature = "fallback", // not(portable_atomic_no_outline_atomics), - // any(test, portable_atomic_outline_atomics), // TODO(riscv64): currently disabled by default + // any(test, portable_atomic_outline_atomics), // TODO(riscv): currently disabled by default // any( // all( // target_os = "linux", @@ -366,11 +426,11 @@ mod atomic_128_macros { any( target_feature = "experimental-zacas", portable_atomic_target_feature = "experimental-zacas", - // TODO(riscv64) + // TODO(riscv) // all( // feature = "fallback", // not(portable_atomic_no_outline_atomics), - // any(test, portable_atomic_outline_atomics), // TODO(riscv64): currently disabled by default + // any(test, portable_atomic_outline_atomics), // TODO(riscv): currently disabled by default // any( // all( // target_os = "linux", diff --git a/src/imp/atomic128/riscv64.rs b/src/imp/atomic128/riscv64.rs index f1203a03..dc485826 100644 --- a/src/imp/atomic128/riscv64.rs +++ b/src/imp/atomic128/riscv64.rs @@ -18,8 +18,6 @@ Generated asm: - riscv64 (+experimental-zacas) https://godbolt.org/z/5Kc17T1W8 */ -// TODO: 64-bit atomic using amocas.d for riscv32 - include!("macros.rs"); // TODO @@ -31,7 +29,7 @@ include!("macros.rs"); // See detect/auxv.rs for more. #[cfg(test)] // TODO #[cfg(not(portable_atomic_no_outline_atomics))] -#[cfg(any(test, portable_atomic_outline_atomics))] // TODO(riscv64): currently disabled by default +#[cfg(any(test, portable_atomic_outline_atomics))] // TODO(riscv): currently disabled by default #[cfg(any( test, not(any( diff --git a/src/imp/atomic64/README.md b/src/imp/atomic64/README.md index af19a80c..b4c87910 100644 --- a/src/imp/atomic64/README.md +++ b/src/imp/atomic64/README.md @@ -1,3 +1,34 @@ # Implementation of 64-bit atomics on 32-bit architectures (See the [`atomic128` module](../atomic128) for 128-bit atomics on 64-bit architectures.) + +## 64-bit atomics instructions + +Here is the table of targets that support 64-bit atomics and the instructions used: + +| target_arch | load | store | CAS | RMW | note | +| ----------- | ---- | ----- | --- | --- | ---- | +| x86 | cmpxchg8b or fild or movlps or movq | cmpxchg8b or fistp or movlps | cmpxchg8b | cmpxchg8b | provided by `core::sync::atomic` | +| arm | ldrexd | ldrexd/strexd | ldrexd/strexd | ldrexd/strexd | provided by `core::sync::atomic` for Armv6+, otherwise provided by us for Linux/Android using kuser_cmpxchg64 (see arm_linux.rs for more) | +| riscv32 | amocas.d | amocas.d | amocas.d | amocas.d | Experimental. Requires experimental-zacas target feature. Currently compile-time detection only due to LLVM marking it as experimental.
Requires 1.82+ (LLVM 19+) | + +If `core::sync::atomic` provides 64-bit atomics, we use them. +On compiler versions or platforms where these are not supported, the fallback implementation is used. + +## Run-time CPU feature detection + +[detect](../detect) module has run-time CPU feature detection implementations. + +Here is the table of targets that support run-time CPU feature detection and the instruction or API used: + +| target_arch | target_os/target_env | instruction/API | features | note | +| ----------- | -------------------- | --------------- | -------- | ---- | +| riscv32 | linux | riscv_hwprobe | all | Currently only used in tests due to LLVM marking zacas as experimental | + +Run-time detection is enabled by default on most targets and can be disabled with `--cfg portable_atomic_no_outline_atomics`. + +On some targets, run-time detection is disabled by default mainly for compatibility with older versions of operating systems or incomplete build environments, and can be enabled by `--cfg portable_atomic_outline_atomics`. (When both cfg are enabled, `*_no_*` cfg is preferred.) + +For targets not included in the above table, run-time detection is always disabled and works the same as when `--cfg portable_atomic_no_outline_atomics` is set. + +See also [docs on `portable_atomic_no_outline_atomics`](https://github.com/taiki-e/portable-atomic/blob/HEAD/README.md#optional-cfg-no-outline-atomics) in the top-level readme. diff --git a/src/imp/atomic64/macros.rs b/src/imp/atomic64/macros.rs new file mode 100644 index 00000000..678ac5dd --- /dev/null +++ b/src/imp/atomic64/macros.rs @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +macro_rules! atomic64 { + ($atomic_type:ident, $int_type:ident, $atomic_max:ident, $atomic_min:ident) => { + #[repr(C, align(8))] + pub(crate) struct $atomic_type { + v: core::cell::UnsafeCell<$int_type>, + } + + // Send is implicitly implemented. + // SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock. + unsafe impl Sync for $atomic_type {} + + impl_default_no_fetch_ops!($atomic_type, $int_type); + impl_default_bit_opts!($atomic_type, $int_type); + impl $atomic_type { + #[inline] + pub(crate) const fn new(v: $int_type) -> Self { + Self { v: core::cell::UnsafeCell::new(v) } + } + + #[inline] + pub(crate) fn is_lock_free() -> bool { + is_lock_free() + } + pub(crate) const IS_ALWAYS_LOCK_FREE: bool = IS_ALWAYS_LOCK_FREE; + + #[inline] + pub(crate) fn get_mut(&mut self) -> &mut $int_type { + // SAFETY: the mutable reference guarantees unique ownership. + // (UnsafeCell::get_mut requires Rust 1.50) + unsafe { &mut *self.v.get() } + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn load(&self, order: Ordering) -> $int_type { + crate::utils::assert_load_ordering(order); + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { + atomic_load(self.v.get().cast::(), order) as $int_type + } + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn store(&self, val: $int_type, order: Ordering) { + crate::utils::assert_store_ordering(order); + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { + atomic_store(self.v.get().cast::(), val as u64, order) + } + } + + #[inline] + pub(crate) fn swap(&self, val: $int_type, order: Ordering) -> $int_type { + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { + atomic_swap(self.v.get().cast::(), val as u64, order) as $int_type + } + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn compare_exchange( + &self, + current: $int_type, + new: $int_type, + success: Ordering, + failure: Ordering, + ) -> Result<$int_type, $int_type> { + crate::utils::assert_compare_exchange_ordering(success, failure); + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { + match atomic_compare_exchange( + self.v.get().cast::(), + current as u64, + new as u64, + success, + failure, + ) { + Ok(v) => Ok(v as $int_type), + Err(v) => Err(v as $int_type), + } + } + } + + #[inline] + #[cfg_attr(all(debug_assertions, not(portable_atomic_no_track_caller)), track_caller)] + pub(crate) fn compare_exchange_weak( + &self, + current: $int_type, + new: $int_type, + success: Ordering, + failure: Ordering, + ) -> Result<$int_type, $int_type> { + crate::utils::assert_compare_exchange_ordering(success, failure); + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { + match atomic_compare_exchange_weak( + self.v.get().cast::(), + current as u64, + new as u64, + success, + failure, + ) { + Ok(v) => Ok(v as $int_type), + Err(v) => Err(v as $int_type), + } + } + } + + #[inline] + pub(crate) fn fetch_add(&self, val: $int_type, order: Ordering) -> $int_type { + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { + atomic_add(self.v.get().cast::(), val as u64, order) as $int_type + } + } + + #[inline] + pub(crate) fn fetch_sub(&self, val: $int_type, order: Ordering) -> $int_type { + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { + atomic_sub(self.v.get().cast::(), val as u64, order) as $int_type + } + } + + #[inline] + pub(crate) fn fetch_and(&self, val: $int_type, order: Ordering) -> $int_type { + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { + atomic_and(self.v.get().cast::(), val as u64, order) as $int_type + } + } + + #[inline] + pub(crate) fn fetch_nand(&self, val: $int_type, order: Ordering) -> $int_type { + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { + atomic_nand(self.v.get().cast::(), val as u64, order) as $int_type + } + } + + #[inline] + pub(crate) fn fetch_or(&self, val: $int_type, order: Ordering) -> $int_type { + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { + atomic_or(self.v.get().cast::(), val as u64, order) as $int_type + } + } + + #[inline] + pub(crate) fn fetch_xor(&self, val: $int_type, order: Ordering) -> $int_type { + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { + atomic_xor(self.v.get().cast::(), val as u64, order) as $int_type + } + } + + #[inline] + pub(crate) fn fetch_max(&self, val: $int_type, order: Ordering) -> $int_type { + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { + $atomic_max(self.v.get().cast::(), val as u64, order) as $int_type + } + } + + #[inline] + pub(crate) fn fetch_min(&self, val: $int_type, order: Ordering) -> $int_type { + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { + $atomic_min(self.v.get().cast::(), val as u64, order) as $int_type + } + } + + #[inline] + pub(crate) fn fetch_not(&self, order: Ordering) -> $int_type { + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { + atomic_not(self.v.get().cast::(), order) as $int_type + } + } + #[inline] + pub(crate) fn not(&self, order: Ordering) { + self.fetch_not(order); + } + + #[inline] + pub(crate) fn fetch_neg(&self, order: Ordering) -> $int_type { + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: any data races are prevented by atomic intrinsics, the kernel user helper, or the lock + // and the raw pointer passed in is valid because we got it from a reference. + unsafe { + atomic_neg(self.v.get().cast::(), order) as $int_type + } + } + #[inline] + pub(crate) fn neg(&self, order: Ordering) { + self.fetch_neg(order); + } + + #[inline] + pub(crate) const fn as_ptr(&self) -> *mut $int_type { + self.v.get() + } + } + }; +} + +#[cfg(target_arch = "riscv32")] +macro_rules! atomic_rmw_by_atomic_update { + () => { + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_swap(dst: *mut u64, val: u64, order: Ordering) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, |_| val) } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_add(dst: *mut u64, val: u64, order: Ordering) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, |x| x.wrapping_add(val)) } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_sub(dst: *mut u64, val: u64, order: Ordering) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, |x| x.wrapping_sub(val)) } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_and(dst: *mut u64, val: u64, order: Ordering) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, |x| x & val) } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_nand(dst: *mut u64, val: u64, order: Ordering) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, |x| !(x & val)) } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_or(dst: *mut u64, val: u64, order: Ordering) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, |x| x | val) } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_xor(dst: *mut u64, val: u64, order: Ordering) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, |x| x ^ val) } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_not(dst: *mut u64, order: Ordering) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, |x| !x) } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_neg(dst: *mut u64, order: Ordering) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, u64::wrapping_neg) } + } + atomic_rmw_by_atomic_update!(cmp); + }; + (cmp) => { + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_max(dst: *mut u64, val: u64, order: Ordering) -> u64 { + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: the caller must uphold the safety contract. + unsafe { + atomic_update(dst, order, |x| core::cmp::max(x as i64, val as i64) as u64) + } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_umax(dst: *mut u64, val: u64, order: Ordering) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, |x| core::cmp::max(x, val)) } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_min(dst: *mut u64, val: u64, order: Ordering) -> u64 { + #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)] + // SAFETY: the caller must uphold the safety contract. + unsafe { + atomic_update(dst, order, |x| core::cmp::min(x as i64, val as i64) as u64) + } + } + #[inline] + #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces + unsafe fn atomic_umin(dst: *mut u64, val: u64, order: Ordering) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { atomic_update(dst, order, |x| core::cmp::min(x, val)) } + } + }; +} diff --git a/src/imp/atomic64/riscv32.rs b/src/imp/atomic64/riscv32.rs new file mode 100644 index 00000000..e61f3728 --- /dev/null +++ b/src/imp/atomic64/riscv32.rs @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +/* +Atomic{I,U}64 implementation on riscv32 using amocas.d (DWCAS). + +Note: On Miri and ThreadSanitizer which do not support inline assembly, we don't use +this module and use fallback implementation instead. + +Refs: +- RISC-V Instruction Set Manual + https://github.com/riscv/riscv-isa-manual/tree/riscv-isa-release-8b9dc50-2024-08-30 + "Zacas" Extension for Atomic Compare-and-Swap (CAS) Instructions + https://github.com/riscv/riscv-isa-manual/blob/riscv-isa-release-8b9dc50-2024-08-30/src/zacas.adoc +- RISC-V Atomics ABI Specification + https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/draft-20240829-13bfa9f54634cb60d86b9b333e109f077805b4b3/riscv-atomic.adoc + +Generated asm: +- riscv32 (+experimental-zacas) https://godbolt.org/z/d3f6EsG3f +*/ + +// TODO: merge duplicated code with atomic128/riscv64.rs + +include!("macros.rs"); + +// TODO +// #[cfg(not(any(target_feature = "experimental-zacas", portable_atomic_target_feature = "experimental-zacas")))] +// #[path = "../fallback/outline_atomics.rs"] +// mod fallback; + +// On musl with static linking, it seems that libc is not always available. +// See detect/auxv.rs for more. +#[cfg(test)] // TODO +#[cfg(not(portable_atomic_no_outline_atomics))] +#[cfg(any(test, portable_atomic_outline_atomics))] // TODO(riscv): currently disabled by default +#[cfg(any( + test, + not(any( + target_feature = "experimental-zacas", + portable_atomic_target_feature = "experimental-zacas" + )) +))] +#[cfg(any( + all( + target_os = "linux", + any( + target_env = "gnu", + all(any(target_env = "musl", target_env = "ohos"), not(target_feature = "crt-static")), + portable_atomic_outline_atomics, + ), + ), + target_os = "android", +))] +#[path = "../detect/riscv_linux.rs"] +mod detect; + +use core::{arch::asm, sync::atomic::Ordering}; + +use crate::utils::{Pair, U64}; + +// https://github.com/riscv-non-isa/riscv-asm-manual/blob/ad0de8c004e29c9a7ac33cfd054f4d4f9392f2fb/src/asm-manual.adoc#arch +#[cfg(any( + target_feature = "experimental-zacas", + portable_atomic_target_feature = "experimental-zacas" +))] +macro_rules! start_zacas { + () => { + // zacas available, no-op + "" + }; +} +#[cfg(any( + target_feature = "experimental-zacas", + portable_atomic_target_feature = "experimental-zacas" +))] +macro_rules! end_zacas { + () => { + // zacas available, no-op + "" + }; +} +#[cfg(not(any( + target_feature = "experimental-zacas", + portable_atomic_target_feature = "experimental-zacas" +)))] +macro_rules! start_zacas { + () => { + ".option push\n.option arch, +experimental-zacas" + }; +} +#[cfg(not(any( + target_feature = "experimental-zacas", + portable_atomic_target_feature = "experimental-zacas" +)))] +macro_rules! end_zacas { + () => { + ".option pop" + }; +} + +macro_rules! atomic_rmw_amocas_order { + ($op:ident, $order:ident) => { + atomic_rmw_amocas_order!($op, $order, failure = $order) + }; + ($op:ident, $order:ident, failure = $failure:ident) => { + match $order { + Ordering::Relaxed => $op!("", ""), + Ordering::Acquire => $op!("", ".aq"), + Ordering::Release => $op!("", ".rl"), + Ordering::AcqRel => $op!("", ".aqrl"), + Ordering::SeqCst if $failure == Ordering::SeqCst => $op!("fence rw,rw", ".aqrl"), + Ordering::SeqCst => $op!("", ".aqrl"), + _ => unreachable!(), + } + }; +} + +#[inline] +unsafe fn atomic_load(src: *mut u64, order: Ordering) -> u64 { + debug_assert!(src as usize % 8 == 0); + + // SAFETY: the caller must uphold the safety contract. + unsafe { + let (out_lo, out_hi); + macro_rules! load { + ($fence:tt, $asm_order:tt) => { + asm!( + start_zacas!(), + $fence, + concat!("amocas.d", $asm_order, " a2, a2, 0({src})"), + end_zacas!(), + src = in(reg) ptr_reg!(src), + inout("a2") 0_u32 => out_lo, + inout("a3") 0_u32 => out_hi, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw_amocas_order!(load, order); + U64 { pair: Pair { lo: out_lo, hi: out_hi } }.whole + } +} + +#[inline] +unsafe fn atomic_store(dst: *mut u64, val: u64, order: Ordering) { + // SAFETY: the caller must uphold the safety contract. + unsafe { + atomic_swap(dst, val, order); + } +} + +#[inline] +unsafe fn atomic_compare_exchange( + dst: *mut u64, + old: u64, + new: u64, + success: Ordering, + failure: Ordering, +) -> Result { + debug_assert!(dst as usize % 8 == 0); + let order = crate::utils::upgrade_success_ordering(success, failure); + + // SAFETY: the caller must uphold the safety contract. + let prev = unsafe { + let old = U64 { whole: old }; + let new = U64 { whole: new }; + let (prev_lo, prev_hi); + macro_rules! cmpxchg { + ($fence:tt, $asm_order:tt) => { + asm!( + start_zacas!(), + $fence, + concat!("amocas.d", $asm_order, " a4, a2, 0({dst})"), + end_zacas!(), + dst = in(reg) ptr_reg!(dst), + // must be allocated to even/odd register pair + inout("a4") old.pair.lo => prev_lo, + inout("a5") old.pair.hi => prev_hi, + // must be allocated to even/odd register pair + in("a2") new.pair.lo, + in("a3") new.pair.hi, + options(nostack, preserves_flags), + ) + }; + } + atomic_rmw_amocas_order!(cmpxchg, order, failure = failure); + U64 { pair: Pair { lo: prev_lo, hi: prev_hi } }.whole + }; + if prev == old { + Ok(prev) + } else { + Err(prev) + } +} + +// amocas is always strong. +use atomic_compare_exchange as atomic_compare_exchange_weak; + +// 64-bit atomic load by two 32-bit atomic loads. (see arm_linux.rs for more) +#[inline] +unsafe fn byte_wise_atomic_load(src: *const u64) -> u64 { + // SAFETY: the caller must uphold the safety contract. + unsafe { + let (out_lo, out_hi); + asm!( + "lw {out_lo}, ({src})", + "lw {out_hi}, 4({src})", + src = in(reg) ptr_reg!(src), + out_lo = out(reg) out_lo, + out_hi = out(reg) out_hi, + options(pure, nostack, preserves_flags, readonly), + ); + U64 { pair: Pair { lo: out_lo, hi: out_hi } }.whole + } +} + +#[inline(always)] +unsafe fn atomic_update(dst: *mut u64, order: Ordering, mut f: F) -> u64 +where + F: FnMut(u64) -> u64, +{ + // SAFETY: the caller must uphold the safety contract. + unsafe { + let mut prev = byte_wise_atomic_load(dst); + loop { + let next = f(prev); + match atomic_compare_exchange_weak(dst, prev, next, order, Ordering::Relaxed) { + Ok(x) => return x, + Err(x) => prev = x, + } + } + } +} + +atomic_rmw_by_atomic_update!(); + +#[inline] +fn is_lock_free() -> bool { + #[cfg(any( + target_feature = "experimental-zacas", + portable_atomic_target_feature = "experimental-zacas" + ))] + { + // zacas is available at compile-time. + true + } + #[cfg(not(any( + target_feature = "experimental-zacas", + portable_atomic_target_feature = "experimental-zacas" + )))] + { + detect::detect().has_zacas() + } +} +const IS_ALWAYS_LOCK_FREE: bool = cfg!(any( + target_feature = "experimental-zacas", + portable_atomic_target_feature = "experimental-zacas" +)); + +atomic64!(AtomicI64, i64, atomic_max, atomic_min); +atomic64!(AtomicU64, u64, atomic_umax, atomic_umin); + +#[allow(clippy::undocumented_unsafe_blocks, clippy::wildcard_imports)] +#[cfg(test)] +mod tests { + use super::*; + + test_atomic_int!(i64); + test_atomic_int!(u64); + + // load/store/swap implementation is not affected by signedness, so it is + // enough to test only unsigned types. + stress_test!(u64); +} diff --git a/src/imp/detect/common.rs b/src/imp/detect/common.rs index c94eb29c..9c9d2ebf 100644 --- a/src/imp/detect/common.rs +++ b/src/imp/detect/common.rs @@ -106,7 +106,7 @@ flags! { HAS_QUADWORD_ATOMICS(1, has_quadword_atomics, "quadword-atomics", any(target_feature = "quadword-atomics", portable_atomic_target_feature = "quadword-atomics")), } -#[cfg(target_arch = "riscv64")] +#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] flags! { // amocas.{w,d,q} HAS_ZACAS(1, has_zacas, "zacas", any(target_feature = "experimental-zacas", portable_atomic_target_feature = "experimental-zacas")), @@ -321,7 +321,7 @@ mod tests_common { } } } - #[cfg(target_arch = "riscv64")] + #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] #[test] #[cfg_attr(portable_atomic_test_outline_atomics_detect_false, ignore)] fn test_detect() { diff --git a/src/imp/fallback/mod.rs b/src/imp/fallback/mod.rs index c23985db..d4dd3a52 100644 --- a/src/imp/fallback/mod.rs +++ b/src/imp/fallback/mod.rs @@ -375,7 +375,21 @@ macro_rules! atomic { #[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(any(test, portable_atomic_no_atomic_64)))] #[cfg_attr( not(portable_atomic_no_cfg_target_has_atomic), - cfg(any(test, not(target_has_atomic = "64"))) + cfg(any( + test, + not(any( + target_has_atomic = "64", + all( + target_arch = "riscv32", + not(any(miri, portable_atomic_sanitize_thread)), + not(portable_atomic_no_asm), + any( + target_feature = "experimental-zacas", + portable_atomic_target_feature = "experimental-zacas", + ), + ), + )) + )) )] cfg_no_fast_atomic_64! { atomic!(AtomicI64, i64, 8); diff --git a/src/imp/mod.rs b/src/imp/mod.rs index bc135476..0a015b08 100644 --- a/src/imp/mod.rs +++ b/src/imp/mod.rs @@ -71,11 +71,11 @@ mod x86_64; any( target_feature = "experimental-zacas", portable_atomic_target_feature = "experimental-zacas", - // TODO(riscv64) + // TODO(riscv) // all( // feature = "fallback", // not(portable_atomic_no_outline_atomics), - // any(test, portable_atomic_outline_atomics), // TODO(riscv64): currently disabled by default + // any(test, portable_atomic_outline_atomics), // TODO(riscv): currently disabled by default // any( // all( // target_os = "linux", @@ -164,6 +164,40 @@ mod s390x; #[path = "atomic64/arm_linux.rs"] mod arm_linux; +// riscv32 64-bit atomics +#[cfg(all( + target_arch = "riscv32", + not(any(miri, portable_atomic_sanitize_thread)), + not(portable_atomic_no_asm), + any( + target_feature = "experimental-zacas", + portable_atomic_target_feature = "experimental-zacas", + // TODO(riscv) + // all( + // feature = "fallback", + // not(portable_atomic_no_outline_atomics), + // any(test, portable_atomic_outline_atomics), // TODO(riscv): currently disabled by default + // any( + // all( + // target_os = "linux", + // any( + // target_env = "gnu", + // all( + // any(target_env = "musl", target_env = "ohos"), + // not(target_feature = "crt-static"), + // ), + // portable_atomic_outline_atomics, + // ), + // ), + // target_os = "android", + // ), + // not(any(miri, portable_atomic_sanitize_thread)), + // ), + ), +))] +#[path = "atomic64/riscv32.rs"] +mod riscv32; + // MSP430 atomics #[cfg(target_arch = "msp430")] pub(crate) mod msp430; @@ -366,13 +400,45 @@ items! { #[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(not(portable_atomic_no_atomic_cas)))] #[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(target_has_atomic = "ptr"))] items! { - #[cfg(not(all( - target_arch = "arm", - not(any(miri, portable_atomic_sanitize_thread)), - any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), - any(target_os = "linux", target_os = "android"), - not(any(target_feature = "v6", portable_atomic_target_feature = "v6")), - not(portable_atomic_no_outline_atomics), + #[cfg(not(any( + all( + target_arch = "arm", + not(any(miri, portable_atomic_sanitize_thread)), + any(not(portable_atomic_no_asm), portable_atomic_unstable_asm), + any(target_os = "linux", target_os = "android"), + not(any(target_feature = "v6", portable_atomic_target_feature = "v6")), + not(portable_atomic_no_outline_atomics), + ), + all( + target_arch = "riscv32", + not(any(miri, portable_atomic_sanitize_thread)), + not(portable_atomic_no_asm), + any( + target_feature = "experimental-zacas", + portable_atomic_target_feature = "experimental-zacas", + // TODO(riscv) + // all( + // feature = "fallback", + // not(portable_atomic_no_outline_atomics), + // any(test, portable_atomic_outline_atomics), // TODO(riscv): currently disabled by default + // any( + // all( + // target_os = "linux", + // any( + // target_env = "gnu", + // all( + // any(target_env = "musl", target_env = "ohos"), + // not(target_feature = "crt-static"), + // ), + // portable_atomic_outline_atomics, + // ), + // ), + // target_os = "android", + // ), + // not(any(miri, portable_atomic_sanitize_thread)), + // ), + ), + ), )))] #[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_64))] #[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "64")))] @@ -405,11 +471,11 @@ items! { any( target_feature = "experimental-zacas", portable_atomic_target_feature = "experimental-zacas", - // TODO(riscv64) + // TODO(riscv) // all( // feature = "fallback", // not(portable_atomic_no_outline_atomics), - // any(test, portable_atomic_outline_atomics), // TODO(riscv64): currently disabled by default + // any(test, portable_atomic_outline_atomics), // TODO(riscv): currently disabled by default // any( // all( // target_os = "linux", @@ -477,6 +543,37 @@ items! { #[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_64))] #[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "64")))] pub(crate) use self::arm_linux::{AtomicI64, AtomicU64}; +#[cfg(all( + target_arch = "riscv32", + not(any(miri, portable_atomic_sanitize_thread)), + not(portable_atomic_no_asm), + any( + target_feature = "experimental-zacas", + portable_atomic_target_feature = "experimental-zacas", + // TODO(riscv) + // all( + // feature = "fallback", + // not(portable_atomic_no_outline_atomics), + // any(test, portable_atomic_outline_atomics), // TODO(riscv): currently disabled by default + // any( + // all( + // target_os = "linux", + // any( + // target_env = "gnu", + // all( + // any(target_env = "musl", target_env = "ohos"), + // not(target_feature = "crt-static"), + // ), + // portable_atomic_outline_atomics, + // ), + // ), + // target_os = "android", + // ), + // not(any(miri, portable_atomic_sanitize_thread)), + // ), + ), +))] +pub(crate) use self::riscv32::{AtomicI64, AtomicU64}; // 128-bit atomics (platform-specific) // AArch64 @@ -508,11 +605,11 @@ pub(crate) use self::x86_64::{AtomicI128, AtomicU128}; any( target_feature = "experimental-zacas", portable_atomic_target_feature = "experimental-zacas", - // TODO(riscv64) + // TODO(riscv) // all( // feature = "fallback", // not(portable_atomic_no_outline_atomics), - // any(test, portable_atomic_outline_atomics), // TODO(riscv64): currently disabled by default + // any(test, portable_atomic_outline_atomics), // TODO(riscv): currently disabled by default // any( // all( // target_os = "linux", diff --git a/src/tests/mod.rs b/src/tests/mod.rs index 0310d31c..124603c3 100644 --- a/src/tests/mod.rs +++ b/src/tests/mod.rs @@ -79,7 +79,23 @@ fn test_is_lock_free() { assert!(AtomicU32::is_lock_free()); #[cfg(not(portable_atomic_no_cfg_target_has_atomic))] { - if cfg!(all( + if cfg!(any( + target_has_atomic = "64", + all( + target_arch = "riscv32", + not(any(miri, portable_atomic_sanitize_thread)), + not(portable_atomic_no_asm), + any( + target_feature = "experimental-zacas", + portable_atomic_target_feature = "experimental-zacas", + ), + ), + )) { + assert!(AtomicI64::is_always_lock_free()); + assert!(AtomicI64::is_lock_free()); + assert!(AtomicU64::is_always_lock_free()); + assert!(AtomicU64::is_lock_free()); + } else if cfg!(all( feature = "fallback", target_arch = "arm", not(any(miri, portable_atomic_sanitize_thread)), @@ -94,11 +110,6 @@ fn test_is_lock_free() { assert!(AtomicI64::is_lock_free()); assert!(!AtomicU64::is_always_lock_free()); assert!(AtomicU64::is_lock_free()); - } else if cfg!(target_has_atomic = "64") { - assert!(AtomicI64::is_always_lock_free()); - assert!(AtomicI64::is_lock_free()); - assert!(AtomicU64::is_always_lock_free()); - assert!(AtomicU64::is_lock_free()); } else { assert!(!AtomicI64::is_always_lock_free()); assert!(!AtomicI64::is_lock_free()); diff --git a/src/utils.rs b/src/utils.rs index 7093e054..c530efa2 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -359,7 +359,7 @@ pub(crate) union U128 { pub(crate) pair: Pair, } #[allow(dead_code)] -#[cfg(target_arch = "arm")] +#[cfg(any(target_arch = "arm", target_arch = "riscv32"))] /// A 64-bit value represented as a pair of 32-bit values. /// /// This type is `#[repr(C)]`, both fields have the same in-memory representation diff --git a/tools/build.sh b/tools/build.sh index 3421344b..75d23524 100755 --- a/tools/build.sh +++ b/tools/build.sh @@ -587,9 +587,9 @@ build() { RUSTFLAGS="${target_rustflags} -C target-cpu=pwr7" \ x_cargo "${args[@]}" "$@" ;; - riscv64*) + riscv*) case "${target}" in - # TODO(riscv64): support CAS in riscv.rs when zacas enabled + # TODO(riscv): support CAS in riscv.rs when zacas enabled riscv??i-* | riscv??im-* | riscv??imc-*) ;; *) CARGO_TARGET_DIR="${target_dir}/zacas" \