-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Add intrinsics for SME FP8 FVDOT, FVDOTB and FVDOTT intrinsics #119922
Conversation
Add support for the following SME 8 bit floating-point dot-product intrinsics: ``` // Only if __ARM_FEATURE_SME_F8F16 != 0 void svvdot_lane_za16[_mf8]_vg1x2_fpm(uint32_t slice, svmfloat8x2_t zn, svmfloat8_t zm, uint64_t imm_idx, fpm_t fpm) __arm_streaming __arm_inout("za"); // Only if __ARM_FEATURE_SME_F8F32 != 0 void svvdott_lane_za32[_mf8]_vg1x4_fpm(uint32_t slice, svmfloat8x2_t zn, svmfloat8_t zm, uint64_t imm_idx, fpm_t fpm) __arm_streaming __arm_inout("za"); void svvdotb_lane_za32[_mf8]_vg1x4_fpm(uint32_t slice, svmfloat8x2_t zn, svmfloat8_t zm, uint64_t imm_idx, fpm_t fpm) __arm_streaming __arm_inout("za"); ``` Co-authored-by: Momchil Velikov <momchil.velikov@arm.com> Co-authored-by: Marian Lukac <marian.lukac@arm.com>
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-clang Author: Jonathan Thackray (jthackray) ChangesAdd support for the following SME 8 bit floating-point dot-product intrinsics:
Full diff: https://github.com/llvm/llvm-project/pull/119922.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td
index 0fae70866cd55e..8623491384b2a5 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -748,11 +748,16 @@ let SMETargetGuard = "sme2" in {
let SMETargetGuard = "sme-f8f32" in {
def SVDOT_LANE_FP8_ZA32_VG1x2 : Inst<"svdot_lane_za32[_mf8]_vg1x2_fpm", "vm2di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za32_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_3>]>;
def SVDOT_LANE_FP8_ZA32_VG1x4 : Inst<"svdot_lane_za32[_mf8]_vg1x4_fpm", "vm4di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za32_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_3>]>;
+
+ def SVVDOTB_LANE_FP8_ZA32_VG1x4 : Inst<"svvdotb_lane_za32[_f8]_vg1x4", "vm2di>", "m", MergeNone, "aarch64_sme_fp8_fvdotb_lane_za32_vg1x4", [IsOverloadNone, IsStreaming, IsInOutZA, SetsFPMR], [ImmCheck<3, ImmCheck0_3>]>;
+ def SVVDOTT_LANE_FP8_ZA32_VG1x4 : Inst<"svvdott_lane_za32[_f8]_vg1x4", "vm2di>", "m", MergeNone, "aarch64_sme_fp8_fvdott_lane_za32_vg1x4", [IsOverloadNone, IsStreaming, IsInOutZA, SetsFPMR], [ImmCheck<3, ImmCheck0_3>]>;
}
let SMETargetGuard = "sme-f8f16" in {
def SVDOT_LANE_FP8_ZA16_VG1x2 : Inst<"svdot_lane_za16[_mf8]_vg1x2_fpm", "vm2di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za16_vg1x2", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
def SVDOT_LANE_FP8_ZA16_VG1x4 : Inst<"svdot_lane_za16[_mf8]_vg1x4_fpm", "vm4di>", "m", MergeNone, "aarch64_sme_fp8_fdot_lane_za16_vg1x4", [IsStreaming, IsInOutZA, SetsFPMR, IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>;
+
+ def SVVDOT_LANE_FP8_ZA16_VG1x2 : Inst<"svvdot_lane_za16[_f8]_vg1x2", "vm2di>", "m", MergeNone, "aarch64_sme_fp8_fvdot_lane_za16_vg1x2", [IsOverloadNone, IsStreaming, IsInOutZA, SetsFPMR], [ImmCheck<3, ImmCheck0_7>]>;
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fvdot.c b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fvdot.c
new file mode 100644
index 00000000000000..a968355540ed5d
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/sme2-intrinsics/acle_sme2_fp8_fvdot.c
@@ -0,0 +1,80 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +sme-f8f32 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sme.h>
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1, A2, A3) A1##A2##A3
+#endif
+
+// CHECK-LABEL: define dso_local void @test_svvdot_lane_za16_f8_vg1x2(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fvdot.lane.za16.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]], i32 7)
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: define dso_local void @_Z30test_svvdot_lane_za16_f8_vg1x2j13svmfloat8x2_tu13__SVMfloat8_tm(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0:[0-9]+]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fvdot.lane.za16.vg1x2(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]], i32 7)
+// CPP-CHECK-NEXT: ret void
+//
+void test_svvdot_lane_za16_f8_vg1x2(uint32_t slice, svmfloat8x2_t zn,
+ svmfloat8_t zm,
+ fpm_t fpmr) __arm_streaming
+ __arm_inout("za") {
+ SVE_ACLE_FUNC(svvdot_lane_za16, _f8, _vg1x2)(slice, zn, zm, 7, fpmr);
+}
+
+// CHECK-LABEL: define dso_local void @test_svvdotb_lane_za32_f8_vg1x4(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fvdotb.lane.za32.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]], i32 3)
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: define dso_local void @_Z31test_svvdotb_lane_za32_f8_vg1x4j13svmfloat8x2_tu13__SVMfloat8_tm(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fvdotb.lane.za32.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]], i32 3)
+// CPP-CHECK-NEXT: ret void
+//
+void test_svvdotb_lane_za32_f8_vg1x4(uint32_t slice, svmfloat8x2_t zn,
+ svmfloat8_t zm,
+ fpm_t fpmr) __arm_streaming
+ __arm_inout("za") {
+ SVE_ACLE_FUNC(svvdotb_lane_za32, _f8, _vg1x4)(slice, zn, zm, 3, fpmr);
+}
+
+// CHECK-LABEL: define dso_local void @test_svvdott_lane_za32_f8_vg1x4(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fvdott.lane.za32.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]], i32 3)
+// CHECK-NEXT: ret void
+//
+// CPP-CHECK-LABEL: define dso_local void @_Z31test_svvdott_lane_za32_f8_vg1x4j13svmfloat8x2_tu13__SVMfloat8_tm(
+// CPP-CHECK-SAME: i32 noundef [[SLICE:%.*]], <vscale x 16 x i8> [[ZN_COERCE0:%.*]], <vscale x 16 x i8> [[ZN_COERCE1:%.*]], <vscale x 16 x i8> [[ZM:%.*]], i64 noundef [[FPMR:%.*]]) #[[ATTR0]] {
+// CPP-CHECK-NEXT: entry:
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR]])
+// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fp8.fvdott.lane.za32.vg1x4(i32 [[SLICE]], <vscale x 16 x i8> [[ZN_COERCE0]], <vscale x 16 x i8> [[ZN_COERCE1]], <vscale x 16 x i8> [[ZM]], i32 3)
+// CPP-CHECK-NEXT: ret void
+//
+void test_svvdott_lane_za32_f8_vg1x4(uint32_t slice, svmfloat8x2_t zn,
+ svmfloat8_t zm,
+ fpm_t fpmr) __arm_streaming
+ __arm_inout("za") {
+ SVE_ACLE_FUNC(svvdott_lane_za32, _f8, _vg1x4)(slice, zn, zm, 3, fpmr);
+}
diff --git a/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_fp8_fvdot.c b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_fp8_fvdot.c
new file mode 100644
index 00000000000000..5a8f37620c57d5
--- /dev/null
+++ b/clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_fp8_fvdot.c
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -verify -emit-llvm -o - %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_sme.h>
+
+void test_features(uint32_t slice, fpm_t fpmr, svmfloat8x2_t zn,
+ svmfloat8_t zm) __arm_streaming __arm_inout("za") {
+// expected-error@+1 {{'svvdot_lane_za16_f8_vg1x2' needs target feature sme,sme-f8f16}}
+ svvdot_lane_za16_f8_vg1x2(slice, zn, zm, 7, fpmr);
+// expected-error@+1 {{'svvdotb_lane_za32_f8_vg1x4' needs target feature sme,sme-f8f32}}
+ svvdotb_lane_za32_f8_vg1x4(slice, zn, zm, 3, fpmr);
+// expected-error@+1 {{'svvdott_lane_za32_f8_vg1x4' needs target feature sme,sme-f8f32}}
+ svvdott_lane_za32_f8_vg1x4(slice, zn, zm, 3, fpmr);
+}
+
+void test_imm(uint32_t slice, fpm_t fpmr, svmfloat8x2_t zn,
+ svmfloat8_t zm) __arm_streaming __arm_inout("za") {
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}}
+ svvdot_lane_za16_f8_vg1x2(slice, zn, zm, -1, fpmr);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svvdotb_lane_za32_f8_vg1x4(slice, zn, zm, -1, fpmr);
+// expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}}
+ svvdott_lane_za32_f8_vg1x4(slice, zn, zm, -1, fpmr);
+
+// expected-error@+1{{argument value 8 is outside the valid range [0, 7]}}
+ svvdot_lane_za16_f8_vg1x2(slice, zn, zm, 8, fpmr);
+// expected-error@+1{{argument value 4 is outside the valid range [0, 3]}}
+ svvdotb_lane_za32_f8_vg1x4(slice, zn, zm, 4, fpmr);
+// expected-error@+1{{argument value 4 is outside the valid range [0, 3]}}
+ svvdott_lane_za32_f8_vg1x4(slice, zn, zm, 4, fpmr);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 654bc64a30bd89..f7d05c4aee9869 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3879,6 +3879,11 @@ class SME2_FP8_FDOT_LANE_VG1x4 :
def int_aarch64_sme_fp8_fdot_lane_za32_vg1x2 : SME2_FP8_FDOT_LANE_VG1x2;
def int_aarch64_sme_fp8_fdot_lane_za32_vg1x4 : SME2_FP8_FDOT_LANE_VG1x4;
+
+ def int_aarch64_sme_fp8_fvdot_lane_za16_vg1x2 : SME2_FP8_FDOT_LANE_VG1x2;
+
+ def int_aarch64_sme_fp8_fvdotb_lane_za32_vg1x4 : SME2_FP8_FDOT_LANE_VG1x2;
+ def int_aarch64_sme_fp8_fvdott_lane_za32_vg1x4 : SME2_FP8_FDOT_LANE_VG1x2;
}
//
diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
index fa577cf92e99d1..b6ffbac7351d4a 100644
--- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td
@@ -985,8 +985,8 @@ def LUTI4_S_4ZZT2Z : sme2_luti4_vector_vg4_strided<0b00, 0b00, "luti4">;
} //[HasSME2p1, HasSME_LUTv2]
let Predicates = [HasSMEF8F16] in {
-defm FVDOT_VG2_M2ZZI_BtoH : sme2p1_multi_vec_array_vg2_index_f8f16<"fvdot", 0b11, 0b110, ZZ_b_mul_r, ZPR4b8>;
-defm FDOT_VG2_M2ZZI_BtoH : sme2_fp8_fdot_index_za16_vg1x2<"fdot", int_aarch64_sme_fp8_fdot_lane_za16_vg1x2>;
+defm FVDOT_VG2_M2ZZI_BtoH : sme2_fp8_fdot_index_za16_vg1x2<"fvdot", 0b110, int_aarch64_sme_fp8_fvdot_lane_za16_vg1x2>;
+defm FDOT_VG2_M2ZZI_BtoH : sme2_fp8_fdot_index_za16_vg1x2<"fdot", 0b010, int_aarch64_sme_fp8_fdot_lane_za16_vg1x2>;
defm FDOT_VG4_M4ZZI_BtoH : sme2_fp8_fdot_index_za16_vg1x4<"fdot", int_aarch64_sme_fp8_fdot_lane_za16_vg1x4>;
defm FDOT_VG2_M2ZZ_BtoH : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0010001, MatrixOp16, ZZ_b, ZPR4b8>;
defm FDOT_VG4_M4ZZ_BtoH : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0110001, MatrixOp16, ZZZZ_b, ZPR4b8>;
@@ -1017,8 +1017,8 @@ defm FDOT_VG4_M4ZZ_BtoS : sme2_dot_mla_add_sub_array_vg24_single<"fdot", 0b0110
defm FDOT_VG2_M2Z2Z_BtoS : sme2_dot_mla_add_sub_array_vg2_multi<"fdot", 0b0100110, MatrixOp32, ZZ_b_mul_r, nxv16i8, null_frag>;
defm FDOT_VG4_M4Z4Z_BtoS : sme2_dot_mla_add_sub_array_vg4_multi<"fdot", 0b0100110, MatrixOp32, ZZZZ_b_mul_r, nxv16i8, null_frag>;
-def FVDOTB_VG4_M2ZZI_BtoS : sme2_fp8_multi_vec_array_vg4_index<"fvdotb", 0b0>;
-def FVDOTT_VG4_M2ZZI_BtoS : sme2_fp8_multi_vec_array_vg4_index<"fvdott", 0b1>;
+defm FVDOTB_VG4_M2ZZI_BtoS : sme2_fp8_fdotv_index_za32_vg1x4<"fvdotb", 0b0, int_aarch64_sme_fp8_fvdotb_lane_za32_vg1x4>;
+defm FVDOTT_VG4_M2ZZI_BtoS : sme2_fp8_fdotv_index_za32_vg1x4<"fvdott", 0b1, int_aarch64_sme_fp8_fvdott_lane_za32_vg1x4>;
defm FMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"fmlall", 0b01, 0b000, null_frag>;
defm FMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"fmlall", 0b10, 0b100, null_frag>;
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 9f25749c83db83..6119072b09b68a 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -5797,9 +5797,9 @@ multiclass sme2_fmop4a_fp8_fp16_2way<string mnemonic> {
// FP8 SME FDOT instructions
-multiclass sme2_fp8_fdot_index_za16_vg1x2<string mnemonic,
+multiclass sme2_fp8_fdot_index_za16_vg1x2<string mnemonic, bits<3> op,
SDPatternOperator intrinsic> {
- def NAME : sme2_multi_vec_array_vg2_index<0b11, {0b0,?,?,0b10,?}, MatrixOp16,
+ def NAME : sme2_multi_vec_array_vg2_index<0b11, {op{2},?,?,op{1-0},?}, MatrixOp16,
ZZ_b_mul_r, ZPR4b8,
VectorIndexH32b_timm, mnemonic>,
SMEPseudo2Instr<NAME, 1>{
@@ -5882,3 +5882,14 @@ multiclass sme2_fp8_fdot_index_za32_vg1x4<string mnemonic,
def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, ZPR4b8, nxv16i8, VectorIndexS32b_timm, tileslice16>;
}
+
+multiclass sme2_fp8_fdotv_index_za32_vg1x4<string mnemonic, bit T, SDPatternOperator intrinsic> {
+ def NAME : sme2_fp8_multi_vec_array_vg4_index<mnemonic, T>,
+ SMEPseudo2Instr<NAME, 1> {
+ let Uses=[FPMR, FPCR];
+ }
+
+ def _PSEUDO : sme2_za_array_2op_multi_index_pseudo<NAME, sme_elm_idx0_7, ZZ_b_mul_r, ZPR4b8, VectorIndexS32b_timm, SMEMatrixArray>;
+
+ def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat<NAME, intrinsic, sme_elm_idx0_7, ZPR4b8, nxv16i8, VectorIndexH32b_timm, tileslice16>;
+}
diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp8-fvdot.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp8-fvdot.ll
new file mode 100644
index 00000000000000..ed02dea441d286
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp8-fvdot.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "// kill:" --version 4
+; RUN: llc -force-streaming < %s | FileCheck %s
+target triple = "aarch64-linux"
+
+define void @test_fvdot16_1x2_indexed(i32 %slice.0,
+; CHECK-LABEL: test_fvdot16_1x2_indexed:
+; CHECK: // %bb.0:
+; CHECK: mov w8, w0
+; CHECK: fvdot za.h[w8, 7, vgx2], { z0.b, z1.b }, z2.b[3]
+; CHECK: ret
+ <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2,
+ <vscale x 16 x i8> %zm) #0 {
+ %slice = add i32 %slice.0, 7
+ call void @llvm.aarch64.sme.fp8.fvdot.lane.za16.vg1x2(i32 %slice,
+ <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2,
+ <vscale x 16 x i8> %zm, i32 3)
+ ret void
+}
+
+define void @test_fvdot32_bottom_1x4_indexed(i32 %slice.0,
+; CHECK-LABEL: test_fvdot32_bottom_1x4_indexed:
+; CHECK: // %bb.0:
+; CHECK: mov w8, w0
+; CHECK: fvdotb za.s[w8, 7, vgx4], { z0.b, z1.b }, z2.b[3]
+; CHECK: ret
+ <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2,
+ <vscale x 16 x i8> %zm) #0 {
+ %slice = add i32 %slice.0, 7
+ call void @llvm.aarch64.sme.fp8.fvdotb.lane.za32.vg1x4(i32 %slice,
+ <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2,
+ <vscale x 16 x i8> %zm, i32 3)
+ ret void
+}
+
+define void @test_fvdot32_top_1x4_indexed(i32 %slice.0,
+; CHECK-LABEL: test_fvdot32_top_1x4_indexed:
+; CHECK: // %bb.0:
+; CHECK: mov w8, w0
+; CHECK: fvdott za.s[w8, 7, vgx4], { z0.b, z1.b }, z2.b[3]
+; CHECK: ret
+ <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2,
+ <vscale x 16 x i8> %zm) #0 {
+ %slice = add i32 %slice.0, 7
+ call void @llvm.aarch64.sme.fp8.fvdott.lane.za32.vg1x4(i32 %slice,
+ <vscale x 16 x i8> %zn1, <vscale x 16 x i8> %zn2,
+ <vscale x 16 x i8> %zm, i32 3)
+ ret void
+}
+
+declare void @llvm.aarch64.sme.fp8.fvdot.lane.za16.vg1x2(i32, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
+declare void @llvm.aarch64.sme.fp8.fvdotb.lane.za32.vg1x4(i32, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
+declare void @llvm.aarch64.sme.fp8.fvdott.lane.za32.vg1x4(i32, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
+
+attributes #0 = { "target-features" = "+sme-f8f32,+sme-f8f16" }
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you Jonathan for the patch!
LGTM!
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/51/builds/7753 Here is the relevant piece of the build log for the reference
|
Add support for the following SME 8 bit floating-point dot-product intrinsics: