Skip to content

Commit

Permalink
Force LLVM to use CMOV for binary search
Browse files Browse the repository at this point in the history
Since https://reviews.llvm.org/D118118, LLVM will no longer turn CMOVs
into branches if it comes from a `select` marked with an `unpredictable`
metadata attribute.

This PR introduces `core::intrinsics::select_unpredictable` which emits
such a `select` and uses it in the implementation of `binary_search_by`.
  • Loading branch information
Amanieu committed Jul 26, 2024
1 parent 355efac commit 472058a
Show file tree
Hide file tree
Showing 8 changed files with 67 additions and 7 deletions.
10 changes: 10 additions & 0 deletions compiler/rustc_codegen_llvm/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1353,6 +1353,16 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
}
}

pub fn set_unpredictable(&mut self, inst: &'ll Value) {
unsafe {
llvm::LLVMSetMetadata(
inst,
llvm::MD_unpredictable as c_uint,
llvm::LLVMMDNodeInContext(self.cx.llcx, ptr::null(), 0),
);
}
}

pub fn minnum(&mut self, lhs: &'ll Value, rhs: &'ll Value) -> &'ll Value {
unsafe { llvm::LLVMRustBuildMinNum(self.llbuilder, lhs, rhs) }
}
Expand Down
8 changes: 8 additions & 0 deletions compiler/rustc_codegen_llvm/src/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,14 @@ impl<'ll, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'_, 'll, 'tcx> {
}
sym::unlikely => self
.call_intrinsic("llvm.expect.i1", &[args[0].immediate(), self.const_bool(false)]),
sym::select_unpredictable => {
let cond = args[0].immediate();
let true_val = args[1].immediate();
let false_val = args[2].immediate();
let result = self.select(cond, true_val, false_val);
self.set_unpredictable(&result);
result
}
sym::catch_unwind => {
catch_unwind_intrinsic(
self,
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_codegen_llvm/src/llvm/ffi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,7 @@ pub enum MetadataType {
MD_nontemporal = 9,
MD_mem_parallel_loop_access = 10,
MD_nonnull = 11,
MD_unpredictable = 15,
MD_align = 17,
MD_type = 19,
MD_vcall_visibility = 28,
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_hir_analysis/src/check/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ pub fn intrinsic_operation_unsafety(tcx: TyCtxt<'_>, intrinsic_id: LocalDefId) -
| sym::type_id
| sym::likely
| sym::unlikely
| sym::select_unpredictable
| sym::ptr_guaranteed_cmp
| sym::minnumf16
| sym::minnumf32
Expand Down Expand Up @@ -487,6 +488,7 @@ pub fn check_intrinsic_type(
sym::assume => (0, 0, vec![tcx.types.bool], tcx.types.unit),
sym::likely => (0, 0, vec![tcx.types.bool], tcx.types.bool),
sym::unlikely => (0, 0, vec![tcx.types.bool], tcx.types.bool),
sym::select_unpredictable => (1, 0, vec![tcx.types.bool, param(0), param(0)], param(0)),

sym::read_via_copy => (1, 0, vec![Ty::new_imm_ptr(tcx, param(0))], param(0)),
sym::write_via_move => {
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1701,6 +1701,7 @@ symbols! {
saturating_add,
saturating_div,
saturating_sub,
select_unpredictable,
self_in_typedefs,
self_struct_ctor,
semitransparent,
Expand Down
28 changes: 28 additions & 0 deletions library/core/src/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1010,6 +1010,34 @@ pub const fn unlikely(b: bool) -> bool {
b
}

/// Returns either `true_val` or `false_val` depending on condition `b` with a
/// hint to the compiler that this condition is unlikely to be correctly
/// predicted by a CPU's branch predictor (e.g. a binary search).
///
/// This is otherwise functionally equivalent to `if b { true_val } else { false_val }`.
///
/// Note that, unlike most intrinsics, this is safe to call;
/// it does not require an `unsafe` block.
/// Therefore, implementations must not require the user to uphold
/// any safety invariants.
///
/// This intrinsic does not have a stable counterpart.
#[cfg(not(bootstrap))]
#[unstable(feature = "core_intrinsics", issue = "none")]
#[rustc_intrinsic]
#[rustc_nounwind]
#[miri::intrinsic_fallback_is_spec]
#[inline]
pub fn select_unpredictable<T>(b: bool, true_val: T, false_val: T) -> T {
if b { true_val } else { false_val }
}

#[cfg(bootstrap)]
#[inline]
pub fn select_unpredictable<T>(b: bool, true_val: T, false_val: T) -> T {
if b { true_val } else { false_val }
}

extern "rust-intrinsic" {
/// Executes a breakpoint trap, for inspection by a debugger.
///
Expand Down
13 changes: 6 additions & 7 deletions library/core/src/slice/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
use crate::cmp::Ordering::{self, Equal, Greater, Less};
use crate::fmt;
use crate::hint;
use crate::intrinsics::{exact_div, unchecked_sub};
use crate::intrinsics::{exact_div, select_unpredictable, unchecked_sub};
use crate::mem::{self, SizedTypeProperties};
use crate::num::NonZero;
use crate::ops::{Bound, OneSidedRange, Range, RangeBounds};
Expand Down Expand Up @@ -2803,12 +2803,11 @@ impl<T> [T] {
// we have `left + size/2 < self.len()`, and this is in-bounds.
let cmp = f(unsafe { self.get_unchecked(mid) });

// This control flow produces conditional moves, which results in
// fewer branches and instructions than if/else or matching on
// cmp::Ordering.
// This is x86 asm for u8: https://rust.godbolt.org/z/698eYffTx.
left = if cmp == Less { mid + 1 } else { left };
right = if cmp == Greater { mid } else { right };
// Binary search interacts poorly with branch prediction, so force
// the compiler to use conditional moves if supported by the target
// architecture.
left = select_unpredictable(cmp == Less, mid + 1, left);
right = select_unpredictable(cmp == Greater, mid, right);
if cmp == Equal {
// SAFETY: same as the `get_unchecked` above
unsafe { hint::assert_unchecked(mid < self.len()) };
Expand Down
11 changes: 11 additions & 0 deletions tests/codegen/intrinsics/select_unpredictable.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
//@ compile-flags: -O

#![feature(core_intrinsics)]
#![crate_type = "lib"]

#[no_mangle]
pub fn foo(p: bool, a: u64, b: u64) -> u64 {
// CHECK-LABEL: define{{.*}}i64 @foo
// CHECK: select i1 %p, i64 %a, i64 %b, !unpredictable
core::intrinsics::select_unpredictable(p, a, b)
}

0 comments on commit 472058a

Please sign in to comment.