Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add f16 and f128 inline ASM support for aarch64 #129536

Merged
merged 2 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions compiler/rustc_codegen_llvm/src/asm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -913,8 +913,10 @@ fn llvm_asm_scalar_type<'ll>(cx: &CodegenCx<'ll, '_>, scalar: Scalar) -> &'ll Ty
Primitive::Int(Integer::I16, _) => cx.type_i16(),
Primitive::Int(Integer::I32, _) => cx.type_i32(),
Primitive::Int(Integer::I64, _) => cx.type_i64(),
Primitive::Float(Float::F16) => cx.type_f16(),
Primitive::Float(Float::F32) => cx.type_f32(),
Primitive::Float(Float::F64) => cx.type_f64(),
Primitive::Float(Float::F128) => cx.type_f128(),
// FIXME(erikdesjardins): handle non-default addrspace ptr sizes
Primitive::Pointer(_) => cx.type_from_integer(dl.ptr_sized_integer()),
_ => unreachable!(),
Expand Down Expand Up @@ -948,7 +950,9 @@ fn llvm_fixup_input<'ll, 'tcx>(
value
}
}
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) => {
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s))
if s.primitive() != Primitive::Float(Float::F128) =>
{
let elem_ty = llvm_asm_scalar_type(bx.cx, s);
let count = 16 / layout.size.bytes();
let vec_ty = bx.cx.type_vector(elem_ty, count);
Expand Down Expand Up @@ -1090,7 +1094,9 @@ fn llvm_fixup_output<'ll, 'tcx>(
value
}
}
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) => {
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s))
if s.primitive() != Primitive::Float(Float::F128) =>
{
value = bx.extract_element(value, bx.const_i32(0));
if let Primitive::Pointer(_) = s.primitive() {
value = bx.inttoptr(value, layout.llvm_type(bx.cx));
Expand Down Expand Up @@ -1222,7 +1228,9 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
layout.llvm_type(cx)
}
}
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) => {
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s))
if s.primitive() != Primitive::Float(Float::F128) =>
{
let elem_ty = llvm_asm_scalar_type(cx, s);
let count = 16 / layout.size.bytes();
cx.type_vector(elem_ty, count)
Expand Down
8 changes: 4 additions & 4 deletions compiler/rustc_target/src/asm/aarch64.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,11 @@ impl AArch64InlineAsmRegClass {
_arch: InlineAsmArch,
) -> &'static [(InlineAsmType, Option<Symbol>)] {
match self {
Self::reg => types! { _: I8, I16, I32, I64, F32, F64; },
Self::reg => types! { _: I8, I16, I32, I64, F16, F32, F64; },
Self::vreg | Self::vreg_low16 => types! {
neon: I8, I16, I32, I64, F32, F64,
VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2), VecF64(1),
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4), VecF64(2);
neon: I8, I16, I32, I64, F16, F32, F64, F128,
VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2), VecF64(1),
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2);
},
Self::preg => &[],
}
Expand Down
116 changes: 111 additions & 5 deletions tests/assembly/asm/aarch64-types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
//@ [arm64ec] compile-flags: --target arm64ec-pc-windows-msvc
//@ [arm64ec] needs-llvm-components: aarch64

#![feature(no_core, lang_items, rustc_attrs, repr_simd, asm_experimental_arch)]
#![feature(no_core, lang_items, rustc_attrs, repr_simd, asm_experimental_arch, f16, f128)]
#![crate_type = "rlib"]
#![no_core]
#![allow(asm_sub_register, non_camel_case_types)]
// FIXME(f16_f128): Only needed for FIXME in check! and check_reg!
#![feature(auto_traits)]

#[rustc_builtin_macro]
macro_rules! asm {
Expand Down Expand Up @@ -39,6 +41,8 @@ pub struct i32x2(i32, i32);
#[repr(simd)]
pub struct i64x1(i64);
#[repr(simd)]
pub struct f16x4(f16, f16, f16, f16);
#[repr(simd)]
pub struct f32x2(f32, f32);
#[repr(simd)]
pub struct f64x1(f64);
Expand All @@ -51,30 +55,42 @@ pub struct i32x4(i32, i32, i32, i32);
#[repr(simd)]
pub struct i64x2(i64, i64);
#[repr(simd)]
pub struct f16x8(f16, f16, f16, f16, f16, f16, f16, f16);
#[repr(simd)]
pub struct f32x4(f32, f32, f32, f32);
#[repr(simd)]
pub struct f64x2(f64, f64);

impl Copy for i8 {}
impl Copy for i16 {}
impl Copy for f16 {}
impl Copy for i32 {}
impl Copy for f32 {}
impl Copy for i64 {}
impl Copy for f64 {}
impl Copy for f128 {}
impl Copy for ptr {}
impl Copy for i8x8 {}
impl Copy for i16x4 {}
impl Copy for i32x2 {}
impl Copy for i64x1 {}
impl Copy for f16x4 {}
impl Copy for f32x2 {}
impl Copy for f64x1 {}
impl Copy for i8x16 {}
impl Copy for i16x8 {}
impl Copy for i32x4 {}
impl Copy for i64x2 {}
impl Copy for f16x8 {}
impl Copy for f32x4 {}
impl Copy for f64x2 {}

// FIXME(f16_f128): Only needed for FIXME in check! and check_reg!
#[lang = "freeze"]
unsafe auto trait Freeze {}
#[lang = "unpin"]
auto trait Unpin {}

extern "C" {
fn extern_func();
static extern_static: u8;
Expand Down Expand Up @@ -111,38 +127,44 @@ pub unsafe fn issue_75761() {

macro_rules! check {
($func:ident $ty:ident $class:ident $mov:literal $modifier:literal) => {
// FIXME(f16_f128): Change back to `$func(x: $ty) -> $ty` once arm64ec can pass and return
// `f16` and `f128` without LLVM erroring.
// LLVM issue: <https://github.com/llvm/llvm-project/issues/94434>
#[no_mangle]
pub unsafe fn $func(x: $ty) -> $ty {
pub unsafe fn $func(inp: &$ty, out: &mut $ty) {
// Hack to avoid function merging
extern "Rust" {
fn dont_merge(s: &str);
}
dont_merge(stringify!($func));

let x = *inp;
let y;
asm!(
concat!($mov, " {:", $modifier, "}, {:", $modifier, "}"),
out($class) y,
in($class) x
);
y
*out = y;
}
};
}

macro_rules! check_reg {
($func:ident $ty:ident $reg:tt $mov:literal) => {
// FIXME(f16_f128): See FIXME in `check!`
#[no_mangle]
pub unsafe fn $func(x: $ty) -> $ty {
pub unsafe fn $func(inp: &$ty, out: &mut $ty) {
// Hack to avoid function merging
extern "Rust" {
fn dont_merge(s: &str);
}
dont_merge(stringify!($func));

let x = *inp;
let y;
asm!(concat!($mov, " ", $reg, ", ", $reg), lateout($reg) y, in($reg) x);
y
*out = y;
}
};
}
Expand All @@ -159,6 +181,12 @@ check!(reg_i8 i8 reg "mov" "");
// CHECK: //NO_APP
check!(reg_i16 i16 reg "mov" "");

// CHECK-LABEL: {{("#)?}}reg_f16{{"?}}
// CHECK: //APP
// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
// CHECK: //NO_APP
check!(reg_f16 f16 reg "mov" "");

// CHECK-LABEL: {{("#)?}}reg_i32{{"?}}
// CHECK: //APP
// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
Expand Down Expand Up @@ -201,6 +229,12 @@ check!(vreg_i8 i8 vreg "fmov" "s");
// CHECK: //NO_APP
check!(vreg_i16 i16 vreg "fmov" "s");

// CHECK-LABEL: {{("#)?}}vreg_f16{{"?}}
// CHECK: //APP
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
// CHECK: //NO_APP
check!(vreg_f16 f16 vreg "fmov" "s");

// CHECK-LABEL: {{("#)?}}vreg_i32{{"?}}
// CHECK: //APP
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
Expand All @@ -225,6 +259,12 @@ check!(vreg_i64 i64 vreg "fmov" "s");
// CHECK: //NO_APP
check!(vreg_f64 f64 vreg "fmov" "s");

// CHECK-LABEL: {{("#)?}}vreg_f128{{"?}}
// CHECK: //APP
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
// CHECK: //NO_APP
check!(vreg_f128 f128 vreg "fmov" "s");

// CHECK-LABEL: {{("#)?}}vreg_ptr{{"?}}
// CHECK: //APP
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
Expand Down Expand Up @@ -255,6 +295,12 @@ check!(vreg_i32x2 i32x2 vreg "fmov" "s");
// CHECK: //NO_APP
check!(vreg_i64x1 i64x1 vreg "fmov" "s");

// CHECK-LABEL: {{("#)?}}vreg_f16x4{{"?}}
// CHECK: //APP
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
// CHECK: //NO_APP
check!(vreg_f16x4 f16x4 vreg "fmov" "s");

// CHECK-LABEL: {{("#)?}}vreg_f32x2{{"?}}
// CHECK: //APP
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
Expand Down Expand Up @@ -291,6 +337,12 @@ check!(vreg_i32x4 i32x4 vreg "fmov" "s");
// CHECK: //NO_APP
check!(vreg_i64x2 i64x2 vreg "fmov" "s");

// CHECK-LABEL: {{("#)?}}vreg_f16x8{{"?}}
// CHECK: //APP
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
// CHECK: //NO_APP
check!(vreg_f16x8 f16x8 vreg "fmov" "s");

// CHECK-LABEL: {{("#)?}}vreg_f32x4{{"?}}
// CHECK: //APP
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
Expand All @@ -315,6 +367,12 @@ check!(vreg_low16_i8 i8 vreg_low16 "fmov" "s");
// CHECK: //NO_APP
check!(vreg_low16_i16 i16 vreg_low16 "fmov" "s");

// CHECK-LABEL: {{("#)?}}vreg_low16_f16{{"?}}
// CHECK: //APP
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
// CHECK: //NO_APP
check!(vreg_low16_f16 f16 vreg_low16 "fmov" "s");

// CHECK-LABEL: {{("#)?}}vreg_low16_f32{{"?}}
// CHECK: //APP
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
Expand All @@ -333,6 +391,12 @@ check!(vreg_low16_i64 i64 vreg_low16 "fmov" "s");
// CHECK: //NO_APP
check!(vreg_low16_f64 f64 vreg_low16 "fmov" "s");

// CHECK-LABEL: {{("#)?}}vreg_low16_f128{{"?}}
// CHECK: //APP
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
// CHECK: //NO_APP
check!(vreg_low16_f128 f128 vreg_low16 "fmov" "s");

// CHECK-LABEL: {{("#)?}}vreg_low16_ptr{{"?}}
// CHECK: //APP
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
Expand Down Expand Up @@ -363,6 +427,12 @@ check!(vreg_low16_i32x2 i32x2 vreg_low16 "fmov" "s");
// CHECK: //NO_APP
check!(vreg_low16_i64x1 i64x1 vreg_low16 "fmov" "s");

// CHECK-LABEL: {{("#)?}}vreg_low16_f16x4{{"?}}
// CHECK: //APP
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
// CHECK: //NO_APP
check!(vreg_low16_f16x4 f16x4 vreg_low16 "fmov" "s");

// CHECK-LABEL: {{("#)?}}vreg_low16_f32x2{{"?}}
// CHECK: //APP
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
Expand Down Expand Up @@ -399,6 +469,12 @@ check!(vreg_low16_i32x4 i32x4 vreg_low16 "fmov" "s");
// CHECK: //NO_APP
check!(vreg_low16_i64x2 i64x2 vreg_low16 "fmov" "s");

// CHECK-LABEL: {{("#)?}}vreg_low16_f16x8{{"?}}
// CHECK: //APP
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
// CHECK: //NO_APP
check!(vreg_low16_f16x8 f16x8 vreg_low16 "fmov" "s");

// CHECK-LABEL: {{("#)?}}vreg_low16_f32x4{{"?}}
// CHECK: //APP
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
Expand All @@ -423,6 +499,12 @@ check_reg!(x0_i8 i8 "x0" "mov");
// CHECK: //NO_APP
check_reg!(x0_i16 i16 "x0" "mov");

// CHECK-LABEL: {{("#)?}}x0_f16{{"?}}
// CHECK: //APP
// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
// CHECK: //NO_APP
check_reg!(x0_f16 f16 "x0" "mov");

// CHECK-LABEL: {{("#)?}}x0_i32{{"?}}
// CHECK: //APP
// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
Expand Down Expand Up @@ -465,6 +547,12 @@ check_reg!(v0_i8 i8 "s0" "fmov");
// CHECK: //NO_APP
check_reg!(v0_i16 i16 "s0" "fmov");

// CHECK-LABEL: {{("#)?}}v0_f16{{"?}}
// CHECK: //APP
// CHECK: fmov s0, s0
// CHECK: //NO_APP
check_reg!(v0_f16 f16 "s0" "fmov");

// CHECK-LABEL: {{("#)?}}v0_i32{{"?}}
// CHECK: //APP
// CHECK: fmov s0, s0
Expand All @@ -489,6 +577,12 @@ check_reg!(v0_i64 i64 "s0" "fmov");
// CHECK: //NO_APP
check_reg!(v0_f64 f64 "s0" "fmov");

// CHECK-LABEL: {{("#)?}}v0_f128{{"?}}
// CHECK: //APP
// CHECK: fmov s0, s0
// CHECK: //NO_APP
check_reg!(v0_f128 f128 "s0" "fmov");

// CHECK-LABEL: {{("#)?}}v0_ptr{{"?}}
// CHECK: //APP
// CHECK: fmov s0, s0
Expand Down Expand Up @@ -519,6 +613,12 @@ check_reg!(v0_i32x2 i32x2 "s0" "fmov");
// CHECK: //NO_APP
check_reg!(v0_i64x1 i64x1 "s0" "fmov");

// CHECK-LABEL: {{("#)?}}v0_f16x4{{"?}}
// CHECK: //APP
// CHECK: fmov s0, s0
// CHECK: //NO_APP
check_reg!(v0_f16x4 f16x4 "s0" "fmov");

// CHECK-LABEL: {{("#)?}}v0_f32x2{{"?}}
// CHECK: //APP
// CHECK: fmov s0, s0
Expand Down Expand Up @@ -555,6 +655,12 @@ check_reg!(v0_i32x4 i32x4 "s0" "fmov");
// CHECK: //NO_APP
check_reg!(v0_i64x2 i64x2 "s0" "fmov");

// CHECK-LABEL: {{("#)?}}v0_f16x8{{"?}}
// CHECK: //APP
// CHECK: fmov s0, s0
// CHECK: //NO_APP
check_reg!(v0_f16x8 f16x8 "s0" "fmov");

// CHECK-LABEL: {{("#)?}}v0_f32x4{{"?}}
// CHECK: //APP
// CHECK: fmov s0, s0
Expand Down
6 changes: 3 additions & 3 deletions tests/ui/asm/aarch64/type-check-3.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -95,23 +95,23 @@ error: type `i128` cannot be used with this register class
LL | asm!("{}", in(reg) 0i128);
| ^^^^^
|
= note: register class `reg` supports these types: i8, i16, i32, i64, f32, f64
= note: register class `reg` supports these types: i8, i16, i32, i64, f16, f32, f64

error: type `float64x2_t` cannot be used with this register class
--> $DIR/type-check-3.rs:75:28
|
LL | asm!("{}", in(reg) f64x2);
| ^^^^^
|
= note: register class `reg` supports these types: i8, i16, i32, i64, f32, f64
= note: register class `reg` supports these types: i8, i16, i32, i64, f16, f32, f64

error: type `Simd256bit` cannot be used with this register class
--> $DIR/type-check-3.rs:77:29
|
LL | asm!("{}", in(vreg) f64x4);
| ^^^^^
|
= note: register class `vreg` supports these types: i8, i16, i32, i64, f32, f64, i8x8, i16x4, i32x2, i64x1, f32x2, f64x1, i8x16, i16x8, i32x4, i64x2, f32x4, f64x2
= note: register class `vreg` supports these types: i8, i16, i32, i64, f16, f32, f64, f128, i8x8, i16x4, i32x2, i64x1, f16x4, f32x2, f64x1, i8x16, i16x8, i32x4, i64x2, f16x8, f32x4, f64x2

error: incompatible types for asm inout argument
--> $DIR/type-check-3.rs:88:33
Expand Down
Loading
Loading