Skip to content

Commit

Permalink
Adjust -Ctarget-cpu=native handling in cg_llvm
Browse files Browse the repository at this point in the history
When cg_llvm encounters the `-Ctarget-cpu=native` it computes an
explciit set of features that applies to the target in order to
correctly compile code for the host CPU (because e.g. `skylake` alone is
not sufficient to tell if some of the instructions are available or
not).

However there were a couple of issues with how we did this. Firstly, the
order in which features were overriden wasn't quite right – conceptually
you'd expect `-Ctarget-cpu=native` option to override the features that
are implicitly set by the target definition. However due to how other
`-Ctarget-cpu` values are handled we must adopt the following order
of priority:

* Features from -Ctarget-cpu=*; are overriden by
* Features implied by --target; are overriden by
* Features from -Ctarget-feature; are overriden by
* function specific features.

Another problem was in that the function level `target-features`
attribute would overwrite the entire set of the globally enabled
features, rather than just the features the
`#[target_feature(enable/disable)]` specified. With something like
`-Ctarget-cpu=native` we'd end up in a situation wherein a function
without `#[target_feature(enable)]` annotation would have a broader
set of features compared to a function with one such attribute. This
turned out to be a cause of heavy run-time regressions in some code
using these function-level attributes in conjunction with
`-Ctarget-cpu=native`, for example.

With this PR rustc is more careful about specifying the entire set of
features for functions that use `#[target_feature(enable/disable)]` or
`#[instruction_set]` attributes.

Sadly testing the original reproducer for this behaviour is quite
impossible – we cannot rely on `-Ctarget-cpu=native` to be anything in
particular on developer or CI machines.
  • Loading branch information
nagisa committed Mar 16, 2021
1 parent 0517acd commit 72fb437
Show file tree
Hide file tree
Showing 7 changed files with 155 additions and 52 deletions.
30 changes: 10 additions & 20 deletions compiler/rustc_codegen_llvm/src/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,18 +152,6 @@ fn set_probestack(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
}
}

pub fn llvm_target_features(sess: &Session) -> impl Iterator<Item = &str> {
const RUSTC_SPECIFIC_FEATURES: &[&str] = &["crt-static"];

let cmdline = sess
.opts
.cg
.target_feature
.split(',')
.filter(|f| !RUSTC_SPECIFIC_FEATURES.iter().any(|s| f.contains(s)));
sess.target.features.split(',').chain(cmdline).filter(|l| !l.is_empty())
}

pub fn apply_target_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
let target_cpu = SmallCStr::new(llvm_util::target_cpu(cx.tcx.sess));
llvm::AddFunctionAttrStringValue(
Expand Down Expand Up @@ -301,20 +289,22 @@ pub fn from_fn_attrs(cx: &CodegenCx<'ll, 'tcx>, llfn: &'ll Value, instance: ty::
// The target doesn't care; the subtarget reads our attribute.
apply_tune_cpu_attr(cx, llfn);

let features = llvm_target_features(cx.tcx.sess)
.map(|s| s.to_string())
.chain(codegen_fn_attrs.target_features.iter().map(|f| {
let function_features = codegen_fn_attrs
.target_features
.iter()
.map(|f| {
let feature = &f.as_str();
format!("+{}", llvm_util::to_llvm_feature(cx.tcx.sess, feature))
}))
})
.chain(codegen_fn_attrs.instruction_set.iter().map(|x| match x {
InstructionSetAttr::ArmA32 => "-thumb-mode".to_string(),
InstructionSetAttr::ArmT32 => "+thumb-mode".to_string(),
}))
.collect::<Vec<String>>()
.join(",");

if !features.is_empty() {
.collect::<Vec<String>>();
if !function_features.is_empty() {
let mut global_features = llvm_util::llvm_global_features(cx.tcx.sess);
global_features.extend(function_features.into_iter());
let features = global_features.join(",");
let val = CString::new(features).unwrap();
llvm::AddFunctionAttrStringValue(
llfn,
Expand Down
5 changes: 1 addition & 4 deletions compiler/rustc_codegen_llvm/src/back/write.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use crate::attributes;
use crate::back::lto::ThinBuffer;
use crate::back::profiling::{
selfprofile_after_pass_callback, selfprofile_before_pass_callback, LlvmSelfProfiler,
Expand Down Expand Up @@ -166,8 +165,6 @@ pub fn target_machine_factory(

let code_model = to_llvm_code_model(sess.code_model());

let mut features = llvm_util::handle_native_features(sess);
features.extend(attributes::llvm_target_features(sess).map(|s| s.to_owned()));
let mut singlethread = sess.target.singlethread;

// On the wasm target once the `atomics` feature is enabled that means that
Expand All @@ -182,7 +179,7 @@ pub fn target_machine_factory(

let triple = SmallCStr::new(&sess.target.llvm_target);
let cpu = SmallCStr::new(llvm_util::target_cpu(sess));
let features = features.join(",");
let features = llvm_util::llvm_global_features(sess).join(",");
let features = CString::new(features).unwrap();
let abi = SmallCStr::new(&sess.target.llvm_abiname);
let trap_unreachable =
Expand Down
66 changes: 56 additions & 10 deletions compiler/rustc_codegen_llvm/src/llvm_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -218,13 +218,39 @@ pub fn target_cpu(sess: &Session) -> &str {
handle_native(name)
}

pub fn handle_native_features(sess: &Session) -> Vec<String> {
/// The list of LLVM features computed from CLI flags (`-Ctarget-cpu`, `-Ctarget-feature`,
/// `--target` and similar).
// FIXME(nagisa): Cache the output of this somehow? Maybe make this a query? We're calling this
// for every function that has `#[target_feature]` on it. The global features won't change between
// the functions; only crates, maybe…
pub fn llvm_global_features(sess: &Session) -> Vec<String> {
// FIXME(nagisa): this should definitely be available more centrally and to other codegen backends.
/// These features control behaviour of rustc rather than llvm.
const RUSTC_SPECIFIC_FEATURES: &[&str] = &["crt-static"];

// Features that come earlier are overriden by conflicting features later in the string.
// Typically we'll want more explicit settings to override the implicit ones, so:
//
// * Features from -Ctarget-cpu=*; are overriden by [^1]
// * Features implied by --target; are overriden by
// * Features from -Ctarget-feature; are overriden by
// * function specific features.
//
// [^1]: target-cpu=native is handled here, other target-cpu values are handled implicitly
// through LLVM TargetMachine implementation.
//
// FIXME(nagisa): it isn't clear what's the best interaction between features implied by
// `-Ctarget-cpu` and `--target` are. On one hand, you'd expect CLI arguments to always
// override anything that's implicit, so e.g. when there's no `--target` flag, features implied
// the host target are overriden by `-Ctarget-cpu=*`. On the other hand, what about when both
// `--target` and `-Ctarget-cpu=*` are specified? Both then imply some target features and both
// flags are specified by the user on the CLI. It isn't as clear-cut which order of precedence
// should be taken in cases like these.
let mut features = vec![];

// -Ctarget-cpu=native
match sess.opts.cg.target_cpu {
Some(ref s) => {
if s != "native" {
return vec![];
}

Some(ref s) if s == "native" => {
let features_string = unsafe {
let ptr = llvm::LLVMGetHostCPUFeatures();
let features_string = if !ptr.is_null() {
Expand All @@ -242,11 +268,31 @@ pub fn handle_native_features(sess: &Session) -> Vec<String> {

features_string
};

features_string.split(",").map(|s| s.to_owned()).collect()
features.extend(features_string.split(",").map(String::from));
}
None => vec![],
}
Some(_) | None => {}
};

// Features implied by an implicit or explicit `--target`.
features.extend(
sess.target
.features
.split(',')
.filter(|f| !f.is_empty() && !RUSTC_SPECIFIC_FEATURES.iter().any(|s| f.contains(s)))
.map(String::from),
);

// -Ctarget-features
features.extend(
sess.opts
.cg
.target_feature
.split(',')
.filter(|f| !f.is_empty() && !RUSTC_SPECIFIC_FEATURES.iter().any(|s| f.contains(s)))
.map(String::from),
);

features
}

pub fn tune_cpu(sess: &Session) -> Option<&str> {
Expand Down
41 changes: 41 additions & 0 deletions src/test/assembly/target-feature-multiple.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// assembly-output: emit-asm
// needs-llvm-components: x86
// revisions: TWOFLAGS SINGLEFLAG
// compile-flags: --target=x86_64-unknown-linux-gnu
// [TWOFLAGS] compile-flags: -C target-feature=+rdrnd -C target-feature=+rdseed
// [SINGLEFLAG] compile-flags: -C target-feature=+rdrnd,+rdseed

// Target features set via flags aren't necessarily reflected in the IR, so the only way to test
// them is to build code that requires the features to be enabled to work.
//
// In this particular test if `rdrnd,rdseed` somehow didn't make it to LLVM, the instruction
// selection should crash.
//
// > LLVM ERROR: Cannot select: 0x7f00f400c010: i32,i32,ch = X86ISD::RDSEED 0x7f00f400bfa8:2
// > In function: foo
//
// See also src/test/codegen/target-feature-overrides.rs
#![feature(no_core, lang_items, link_llvm_intrinsics, abi_unadjusted)]
#![crate_type = "lib"]
#![no_core]

#[lang = "sized"]
trait Sized {}
#[lang = "copy"]
trait Copy {}

// Use of these requires target features to be enabled
extern "unadjusted" {
#[link_name = "llvm.x86.rdrand.32"]
fn x86_rdrand32_step() -> (u32, i32);
#[link_name = "llvm.x86.rdseed.32"]
fn x86_rdseed32_step() -> (u32, i32);
}

#[no_mangle]
pub unsafe fn foo() -> (u32, u32) {
// CHECK-LABEL: foo:
// CHECK: rdrand
// CHECK: rdseed
(x86_rdrand32_step().0, x86_rdseed32_step().0)
}
9 changes: 0 additions & 9 deletions src/test/codegen/target-feature-multiple.rs

This file was deleted.

9 changes: 0 additions & 9 deletions src/test/codegen/target-feature-on-functions.rs

This file was deleted.

47 changes: 47 additions & 0 deletions src/test/codegen/target-feature-overrides.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// revisions: COMPAT INCOMPAT
// needs-llvm-components: x86
// compile-flags: --target=x86_64-unknown-linux-gnu -Copt-level=3
// [COMPAT] compile-flags: -Ctarget-feature=+avx2,+avx
// [INCOMPAT] compile-flags: -Ctarget-feature=-avx2,-avx

// See also src/test/assembly/target-feature-multiple.rs
#![feature(no_core, lang_items)]
#![crate_type = "lib"]
#![no_core]


#[lang = "sized"]
trait Sized {}
#[lang = "copy"]
trait Copy {}

extern "C" {
fn peach() -> u32;
}

#[inline]
#[target_feature(enable = "avx")]
#[no_mangle]
pub unsafe fn apple() -> u32 {
// CHECK-LABEL: @apple()
// CHECK-SAME: [[APPLEATTRS:#[0-9]+]] {
// CHECK: {{.*}}call{{.*}}@peach
peach()
}

// target features same as global (not reflected or overriden in IR)
#[no_mangle]
pub unsafe fn banana() -> u32 {
// CHECK-LABEL: @banana()
// CHECK-SAME: [[BANANAATTRS:#[0-9]+]] {
// COMPAT: {{.*}}call{{.*}}@peach
// INCOMPAT: {{.*}}call{{.*}}@apple
apple() // Compatible for inline in COMPAT revision and can't be inlined in INCOMPAT
}

// CHECK: attributes [[APPLEATTRS]]
// COMPAT-SAME: "target-features"="+avx2,+avx,+avx"
// INCOMPAT-SAME: "target-features"="-avx2,-avx,+avx"
// CHECK: attributes [[BANANAATTRS]]
// CHECK-NOT: target-features
// CHECK-SAME: }

0 comments on commit 72fb437

Please sign in to comment.