Skip to content

Commit

Permalink
Use fmuladd for fma and document this behavior
Browse files Browse the repository at this point in the history
  • Loading branch information
GabrielMajeri committed Sep 1, 2018
1 parent 4e89670 commit 3d7712b
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 8 deletions.
13 changes: 13 additions & 0 deletions src/api/math/float/fma.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,19 @@ macro_rules! impl_math_float_fma {
impl $id {
/// Fused multiply add: `self * y + z`
///
/// On some architectures, it is possible to combine a multiply
/// followed by an addition in a single instruction.
/// Besides performance, this may also offer better precision
/// than performing the operations individually.
///
/// Note that using this function does **not** guarantee that a FMA
/// instruction will be emitted;
/// the architecture may not support it, or the compiler may decide
/// it's more efficient in a specific case not to use it.
///
/// Use your architecture's intrinsic if you absolutely require
/// the extended precision in all circumstances.
///
/// Most architectures which have support for FMA
/// also have an equivalent version of this function,
/// fused multiply subtract (`self * y - z`).
Expand Down
18 changes: 10 additions & 8 deletions src/codegen/math/float/fma.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,25 @@ crate trait Fma {
#[cfg(not(target_arch = "s390x"))]
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.fma.v2f32"]
// We use the `fmuladd` intrinsic instead of `fma` to allow LLVM to decide
// on a per-case basis whether it's better to use FMA or not.
#[link_name = "llvm.fmuladd.v2f32"]
fn fma_v2f32(x: f32x2, y: f32x2, z: f32x2) -> f32x2;
#[link_name = "llvm.fma.v4f32"]
#[link_name = "llvm.fmuladd.v4f32"]
fn fma_v4f32(x: f32x4, y: f32x4, z: f32x4) -> f32x4;
#[link_name = "llvm.fma.v8f32"]
#[link_name = "llvm.fmuladd.v8f32"]
fn fma_v8f32(x: f32x8, y: f32x8, z: f32x8) -> f32x8;
#[link_name = "llvm.fma.v16f32"]
#[link_name = "llvm.fmuladd.v16f32"]
fn fma_v16f32(x: f32x16, y: f32x16, z: f32x16) -> f32x16;
/* FIXME 64-bit single elem vectors
#[link_name = "llvm.fma.v1f64"]
#[link_name = "llvm.fmuladd.v1f64"]
fn fma_v1f64(x: f64x1, y: f64x1, z: f64x1) -> f64x1;
*/
#[link_name = "llvm.fma.v2f64"]
#[link_name = "llvm.fmuladd.v2f64"]
fn fma_v2f64(x: f64x2, y: f64x2, z: f64x2) -> f64x2;
#[link_name = "llvm.fma.v4f64"]
#[link_name = "llvm.fmuladd.v4f64"]
fn fma_v4f64(x: f64x4, y: f64x4, z: f64x4) -> f64x4;
#[link_name = "llvm.fma.v8f64"]
#[link_name = "llvm.fmuladd.v8f64"]
fn fma_v8f64(x: f64x8, y: f64x8, z: f64x8) -> f64x8;
}

Expand Down

0 comments on commit 3d7712b

Please sign in to comment.