From fed8bd154e7dd663e67307729b5e2bfe01d117e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Brammer?= Date: Mon, 28 Aug 2023 00:44:00 +0200 Subject: [PATCH 1/2] Return f32 and f64 in XMM0 instead of FP0 on i686 Rust calling convention i686 already uses SSE2 to do calculations with f32 and f64, but the C calling convention uses the x87 stack to return values. The Rust calling convention does not need to do this, and LLVM makes it easy to use XMM0 instead, which saves move instructions and fixes problems with NaN values. --- compiler/rustc_span/src/symbol.rs | 1 + compiler/rustc_ty_utils/src/abi.rs | 7 +++++++ library/std/src/f32/tests.rs | 4 ++-- library/std/src/f64/tests.rs | 4 ++-- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 382754be2ca83..f021243b11aee 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -1489,6 +1489,7 @@ symbols! { sreg, sreg_low16, sse, + sse2, sse4a_target_feature, stable, staged_api, diff --git a/compiler/rustc_ty_utils/src/abi.rs b/compiler/rustc_ty_utils/src/abi.rs index 16183403d67aa..acab46244433b 100644 --- a/compiler/rustc_ty_utils/src/abi.rs +++ b/compiler/rustc_ty_utils/src/abi.rs @@ -7,6 +7,7 @@ use rustc_middle::ty::layout::{ use rustc_middle::ty::{self, InstanceDef, Ty, TyCtxt}; use rustc_session::config::OptLevel; use rustc_span::def_id::DefId; +use rustc_span::symbol::sym; use rustc_target::abi::call::{ ArgAbi, ArgAttribute, ArgAttributes, ArgExtension, Conv, FnAbi, PassMode, Reg, RegKind, RiscvInterruptKind, @@ -371,6 +372,8 @@ fn fn_abi_new_uncached<'tcx>( let target = &cx.tcx.sess.target; let target_env_gnu_like = matches!(&target.env[..], "gnu" | "musl" | "uclibc"); let win_x64_gnu = target.os == "windows" && target.arch == "x86_64" && target.env == "gnu"; + let x86_sse2 = target.arch == "x86" + && cx.tcx.sess.parse_sess.config.contains(&(sym::target_feature, Some(sym::sse2))); let linux_s390x_gnu_like = target.os == "linux" && target.arch == "s390x" && target_env_gnu_like; let linux_sparc64_gnu_like = @@ -415,6 +418,10 @@ fn fn_abi_new_uncached<'tcx>( is_return, drop_target_pointee, ); + // Use XMM0 instead of FP0 to preserve NaN payloads + if x86_sse2 && rust_abi && is_return && matches!(scalar.primitive(), F32 | F64) { + attrs.set(ArgAttribute::InReg); + } attrs }); diff --git a/library/std/src/f32/tests.rs b/library/std/src/f32/tests.rs index 9ca4e8f2f45fe..c6fdf660f2fb3 100644 --- a/library/std/src/f32/tests.rs +++ b/library/std/src/f32/tests.rs @@ -328,7 +328,7 @@ macro_rules! assert_f32_biteq { // Ignore test on x87 floating point, these platforms do not guarantee NaN // payloads are preserved and flush denormals to zero, failing the tests. -#[cfg(not(target_arch = "x86"))] +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse2"))))] #[test] fn test_next_up() { let tiny = f32::from_bits(1); @@ -361,7 +361,7 @@ fn test_next_up() { // Ignore test on x87 floating point, these platforms do not guarantee NaN // payloads are preserved and flush denormals to zero, failing the tests. -#[cfg(not(target_arch = "x86"))] +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse2"))))] #[test] fn test_next_down() { let tiny = f32::from_bits(1); diff --git a/library/std/src/f64/tests.rs b/library/std/src/f64/tests.rs index f88d01593b5e4..3833bd71986be 100644 --- a/library/std/src/f64/tests.rs +++ b/library/std/src/f64/tests.rs @@ -318,7 +318,7 @@ macro_rules! assert_f64_biteq { // Ignore test on x87 floating point, these platforms do not guarantee NaN // payloads are preserved and flush denormals to zero, failing the tests. -#[cfg(not(target_arch = "x86"))] +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse2"))))] #[test] fn test_next_up() { let tiny = f64::from_bits(1); @@ -350,7 +350,7 @@ fn test_next_up() { // Ignore test on x87 floating point, these platforms do not guarantee NaN // payloads are preserved and flush denormals to zero, failing the tests. -#[cfg(not(target_arch = "x86"))] +#[cfg(not(all(target_arch = "x86", not(target_feature = "sse2"))))] #[test] fn test_next_down() { let tiny = f64::from_bits(1); From b7dfd48a0621f36357fb941e674c3c8b52a29016 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Brammer?= Date: Sun, 8 Oct 2023 02:16:04 +0200 Subject: [PATCH 2/2] Enable usage of XMM0 per-target Activating SSE2 with -C target-feature doesn't change the Rust ABI anymore, while deactivating it still does. --- compiler/rustc_target/src/spec/i686_apple_darwin.rs | 1 + compiler/rustc_target/src/spec/i686_linux_android.rs | 1 + compiler/rustc_target/src/spec/i686_pc_windows_gnu.rs | 1 + compiler/rustc_target/src/spec/i686_pc_windows_gnullvm.rs | 1 + compiler/rustc_target/src/spec/i686_pc_windows_msvc.rs | 1 + compiler/rustc_target/src/spec/i686_unknown_freebsd.rs | 1 + compiler/rustc_target/src/spec/i686_unknown_haiku.rs | 1 + compiler/rustc_target/src/spec/i686_unknown_linux_gnu.rs | 1 + compiler/rustc_target/src/spec/i686_unknown_linux_musl.rs | 1 + compiler/rustc_target/src/spec/i686_unknown_netbsd.rs | 1 + compiler/rustc_target/src/spec/i686_unknown_openbsd.rs | 1 + compiler/rustc_target/src/spec/i686_uwp_windows_gnu.rs | 1 + compiler/rustc_target/src/spec/i686_uwp_windows_msvc.rs | 1 + compiler/rustc_target/src/spec/i686_wrs_vxworks.rs | 1 + compiler/rustc_target/src/spec/mod.rs | 6 ++++++ compiler/rustc_ty_utils/src/abi.rs | 1 + 16 files changed, 21 insertions(+) diff --git a/compiler/rustc_target/src/spec/i686_apple_darwin.rs b/compiler/rustc_target/src/spec/i686_apple_darwin.rs index b5103d15db695..3bf2177fb5ad4 100644 --- a/compiler/rustc_target/src/spec/i686_apple_darwin.rs +++ b/compiler/rustc_target/src/spec/i686_apple_darwin.rs @@ -9,6 +9,7 @@ pub fn target() -> Target { base.add_pre_link_args(LinkerFlavor::Darwin(Cc::Yes, Lld::No), &["-m32"]); base.stack_probes = StackProbeType::X86; base.frame_pointer = FramePointer::Always; + base.x86_use_xmm0 = true; Target { // Clang automatically chooses a more specific target based on diff --git a/compiler/rustc_target/src/spec/i686_linux_android.rs b/compiler/rustc_target/src/spec/i686_linux_android.rs index c7c30c23901d3..65ea366fb6114 100644 --- a/compiler/rustc_target/src/spec/i686_linux_android.rs +++ b/compiler/rustc_target/src/spec/i686_linux_android.rs @@ -12,6 +12,7 @@ pub fn target() -> Target { base.cpu = "pentiumpro".into(); base.features = "+mmx,+sse,+sse2,+sse3,+ssse3".into(); base.stack_probes = StackProbeType::X86; + base.x86_use_xmm0 = true; Target { llvm_target: "i686-linux-android".into(), diff --git a/compiler/rustc_target/src/spec/i686_pc_windows_gnu.rs b/compiler/rustc_target/src/spec/i686_pc_windows_gnu.rs index 7a11138754fa8..bfbc1f58fab62 100644 --- a/compiler/rustc_target/src/spec/i686_pc_windows_gnu.rs +++ b/compiler/rustc_target/src/spec/i686_pc_windows_gnu.rs @@ -14,6 +14,7 @@ pub fn target() -> Target { &["-m", "i386pe", "--large-address-aware"], ); base.add_pre_link_args(LinkerFlavor::Gnu(Cc::Yes, Lld::No), &["-Wl,--large-address-aware"]); + base.x86_use_xmm0 = true; Target { llvm_target: "i686-pc-windows-gnu".into(), diff --git a/compiler/rustc_target/src/spec/i686_pc_windows_gnullvm.rs b/compiler/rustc_target/src/spec/i686_pc_windows_gnullvm.rs index 3154b512a5202..f5cd9f76929f3 100644 --- a/compiler/rustc_target/src/spec/i686_pc_windows_gnullvm.rs +++ b/compiler/rustc_target/src/spec/i686_pc_windows_gnullvm.rs @@ -13,6 +13,7 @@ pub fn target() -> Target { LinkerFlavor::Gnu(Cc::No, Lld::No), &["-m", "i386pe", "--large-address-aware"], ); + base.x86_use_xmm0 = true; Target { llvm_target: "i686-pc-windows-gnu".into(), diff --git a/compiler/rustc_target/src/spec/i686_pc_windows_msvc.rs b/compiler/rustc_target/src/spec/i686_pc_windows_msvc.rs index db4c00dc697d7..c2aaac285dc07 100644 --- a/compiler/rustc_target/src/spec/i686_pc_windows_msvc.rs +++ b/compiler/rustc_target/src/spec/i686_pc_windows_msvc.rs @@ -19,6 +19,7 @@ pub fn target() -> Target { ); // Workaround for #95429 base.has_thread_local = false; + base.x86_use_xmm0 = true; Target { llvm_target: "i686-pc-windows-msvc".into(), diff --git a/compiler/rustc_target/src/spec/i686_unknown_freebsd.rs b/compiler/rustc_target/src/spec/i686_unknown_freebsd.rs index 35ca78034f170..edf0c62d44c21 100644 --- a/compiler/rustc_target/src/spec/i686_unknown_freebsd.rs +++ b/compiler/rustc_target/src/spec/i686_unknown_freebsd.rs @@ -6,6 +6,7 @@ pub fn target() -> Target { base.max_atomic_width = Some(64); base.add_pre_link_args(LinkerFlavor::Gnu(Cc::Yes, Lld::No), &["-m32", "-Wl,-znotext"]); base.stack_probes = StackProbeType::X86; + base.x86_use_xmm0 = true; Target { llvm_target: "i686-unknown-freebsd".into(), diff --git a/compiler/rustc_target/src/spec/i686_unknown_haiku.rs b/compiler/rustc_target/src/spec/i686_unknown_haiku.rs index e6b72336c5cf5..41c7ddef06aa5 100644 --- a/compiler/rustc_target/src/spec/i686_unknown_haiku.rs +++ b/compiler/rustc_target/src/spec/i686_unknown_haiku.rs @@ -6,6 +6,7 @@ pub fn target() -> Target { base.max_atomic_width = Some(64); base.add_pre_link_args(LinkerFlavor::Gnu(Cc::Yes, Lld::No), &["-m32"]); base.stack_probes = StackProbeType::X86; + base.x86_use_xmm0 = true; Target { llvm_target: "i686-unknown-haiku".into(), diff --git a/compiler/rustc_target/src/spec/i686_unknown_linux_gnu.rs b/compiler/rustc_target/src/spec/i686_unknown_linux_gnu.rs index 73e536a7e4d93..b579e70f63717 100644 --- a/compiler/rustc_target/src/spec/i686_unknown_linux_gnu.rs +++ b/compiler/rustc_target/src/spec/i686_unknown_linux_gnu.rs @@ -7,6 +7,7 @@ pub fn target() -> Target { base.supported_sanitizers = SanitizerSet::ADDRESS; base.add_pre_link_args(LinkerFlavor::Gnu(Cc::Yes, Lld::No), &["-m32"]); base.stack_probes = StackProbeType::X86; + base.x86_use_xmm0 = true; Target { llvm_target: "i686-unknown-linux-gnu".into(), diff --git a/compiler/rustc_target/src/spec/i686_unknown_linux_musl.rs b/compiler/rustc_target/src/spec/i686_unknown_linux_musl.rs index 3825082ba25e4..8c2b9e9f7014d 100644 --- a/compiler/rustc_target/src/spec/i686_unknown_linux_musl.rs +++ b/compiler/rustc_target/src/spec/i686_unknown_linux_musl.rs @@ -20,6 +20,7 @@ pub fn target() -> Target { // This may or may not be related to this bug: // https://llvm.org/bugs/show_bug.cgi?id=30879 base.frame_pointer = FramePointer::Always; + base.x86_use_xmm0 = true; Target { llvm_target: "i686-unknown-linux-musl".into(), diff --git a/compiler/rustc_target/src/spec/i686_unknown_netbsd.rs b/compiler/rustc_target/src/spec/i686_unknown_netbsd.rs index b191996c7de0d..205c6a2067a96 100644 --- a/compiler/rustc_target/src/spec/i686_unknown_netbsd.rs +++ b/compiler/rustc_target/src/spec/i686_unknown_netbsd.rs @@ -6,6 +6,7 @@ pub fn target() -> Target { base.max_atomic_width = Some(64); base.add_pre_link_args(LinkerFlavor::Gnu(Cc::Yes, Lld::No), &["-m32"]); base.stack_probes = StackProbeType::X86; + base.x86_use_xmm0 = true; Target { llvm_target: "i686-unknown-netbsdelf".into(), diff --git a/compiler/rustc_target/src/spec/i686_unknown_openbsd.rs b/compiler/rustc_target/src/spec/i686_unknown_openbsd.rs index 8babe55971280..34699b942990f 100644 --- a/compiler/rustc_target/src/spec/i686_unknown_openbsd.rs +++ b/compiler/rustc_target/src/spec/i686_unknown_openbsd.rs @@ -6,6 +6,7 @@ pub fn target() -> Target { base.max_atomic_width = Some(64); base.add_pre_link_args(LinkerFlavor::Gnu(Cc::Yes, Lld::No), &["-m32", "-fuse-ld=lld"]); base.stack_probes = StackProbeType::X86; + base.x86_use_xmm0 = true; Target { llvm_target: "i686-unknown-openbsd".into(), diff --git a/compiler/rustc_target/src/spec/i686_uwp_windows_gnu.rs b/compiler/rustc_target/src/spec/i686_uwp_windows_gnu.rs index a3e32569827fb..884f229f5ed7b 100644 --- a/compiler/rustc_target/src/spec/i686_uwp_windows_gnu.rs +++ b/compiler/rustc_target/src/spec/i686_uwp_windows_gnu.rs @@ -13,6 +13,7 @@ pub fn target() -> Target { &["-m", "i386pe", "--large-address-aware"], ); base.add_pre_link_args(LinkerFlavor::Gnu(Cc::Yes, Lld::No), &["-Wl,--large-address-aware"]); + base.x86_use_xmm0 = true; Target { llvm_target: "i686-pc-windows-gnu".into(), diff --git a/compiler/rustc_target/src/spec/i686_uwp_windows_msvc.rs b/compiler/rustc_target/src/spec/i686_uwp_windows_msvc.rs index 4c657fe908ac4..0de04d23bf859 100644 --- a/compiler/rustc_target/src/spec/i686_uwp_windows_msvc.rs +++ b/compiler/rustc_target/src/spec/i686_uwp_windows_msvc.rs @@ -4,6 +4,7 @@ pub fn target() -> Target { let mut base = super::windows_uwp_msvc_base::opts(); base.cpu = "pentium4".into(); base.max_atomic_width = Some(64); + base.x86_use_xmm0 = true; Target { llvm_target: "i686-pc-windows-msvc".into(), diff --git a/compiler/rustc_target/src/spec/i686_wrs_vxworks.rs b/compiler/rustc_target/src/spec/i686_wrs_vxworks.rs index b5cfdfcebea90..0a20e5f4cef8f 100644 --- a/compiler/rustc_target/src/spec/i686_wrs_vxworks.rs +++ b/compiler/rustc_target/src/spec/i686_wrs_vxworks.rs @@ -6,6 +6,7 @@ pub fn target() -> Target { base.max_atomic_width = Some(64); base.add_pre_link_args(LinkerFlavor::Gnu(Cc::Yes, Lld::No), &["-m32"]); base.stack_probes = StackProbeType::X86; + base.x86_use_xmm0 = true; Target { llvm_target: "i686-unknown-linux-gnu".into(), diff --git a/compiler/rustc_target/src/spec/mod.rs b/compiler/rustc_target/src/spec/mod.rs index 8aa72797a0d25..6a0848b6c299b 100644 --- a/compiler/rustc_target/src/spec/mod.rs +++ b/compiler/rustc_target/src/spec/mod.rs @@ -1935,6 +1935,9 @@ pub struct TargetOptions { /// wasm32 where the whole program either has simd or not. pub simd_types_indirect: bool, + /// On x86, use XMM0 instead of FP0 as float return register for the Rust ABI + pub x86_use_xmm0: bool, + /// Pass a list of symbol which should be exported in the dylib to the linker. pub limit_rdylib_exports: bool, @@ -2213,6 +2216,7 @@ impl Default for TargetOptions { requires_uwtable: false, default_uwtable: false, simd_types_indirect: true, + x86_use_xmm0: false, limit_rdylib_exports: true, override_export_symbols: None, merge_functions: MergeFunctions::Aliases, @@ -2878,6 +2882,7 @@ impl Target { key!(requires_uwtable, bool); key!(default_uwtable, bool); key!(simd_types_indirect, bool); + key!(x86_use_xmm0, bool); key!(limit_rdylib_exports, bool); key!(override_export_symbols, opt_list); key!(merge_functions, MergeFunctions)?; @@ -3135,6 +3140,7 @@ impl ToJson for Target { target_option_val!(requires_uwtable); target_option_val!(default_uwtable); target_option_val!(simd_types_indirect); + target_option_val!(x86_use_xmm0); target_option_val!(limit_rdylib_exports); target_option_val!(override_export_symbols); target_option_val!(merge_functions); diff --git a/compiler/rustc_ty_utils/src/abi.rs b/compiler/rustc_ty_utils/src/abi.rs index acab46244433b..b3beac89acc73 100644 --- a/compiler/rustc_ty_utils/src/abi.rs +++ b/compiler/rustc_ty_utils/src/abi.rs @@ -373,6 +373,7 @@ fn fn_abi_new_uncached<'tcx>( let target_env_gnu_like = matches!(&target.env[..], "gnu" | "musl" | "uclibc"); let win_x64_gnu = target.os == "windows" && target.arch == "x86_64" && target.env == "gnu"; let x86_sse2 = target.arch == "x86" + && target.x86_use_xmm0 && cx.tcx.sess.parse_sess.config.contains(&(sym::target_feature, Some(sym::sse2))); let linux_s390x_gnu_like = target.os == "linux" && target.arch == "s390x" && target_env_gnu_like;