Skip to content

Commit

Permalink
[AArch64] Implement intrinsics for F1CVTL/F2CVTL and BF1CVTL/BF2CVTL (#…
Browse files Browse the repository at this point in the history
…116959)

This patch implements the following intrinsics:

8-bit floating-point convert to deinterleaved half-precision or
BFloat16.
``` c
  // Variant is also available for: _bf16[_mf8]_x2
  svfloat16x2_t svcvtl1_f16[_mf8]_x2_fpm(svmfloat8_t zn, fpm_t fpm) __arm_streaming;
  svfloat16x2_t svcvtl2_f16[_mf8]_x2_fpm(svmfloat8_t zn, fpm_t fpm) __arm_streaming;
```

Defined in ARM-software/acle#323

Co-authored-by: Caroline Concatto caroline.concatto@arm.com
Co-authored-by: Marian Lukac marian.lukac@arm.com
  • Loading branch information
SpencerAbson authored Nov 28, 2024
1 parent 12ccb62 commit e4ee970
Show file tree
Hide file tree
Showing 11 changed files with 213 additions and 7 deletions.
1 change: 1 addition & 0 deletions clang/include/clang/Basic/TargetBuiltins.h
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,7 @@ namespace clang {
bool isTupleSet() const { return Flags & IsTupleSet; }
bool isReadZA() const { return Flags & IsReadZA; }
bool isWriteZA() const { return Flags & IsWriteZA; }
bool setsFPMR() const { return Flags & SetsFPMR; }
bool isReductionQV() const { return Flags & IsReductionQV; }
uint64_t getBits() const { return Flags; }
bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }
Expand Down
10 changes: 6 additions & 4 deletions clang/include/clang/Basic/arm_sve.td
Original file line number Diff line number Diff line change
Expand Up @@ -2422,14 +2422,16 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in {
def SVUUNPK_X4 : SInst<"svunpk_{d}[_{3}_x4]", "42.h", "UsUiUl", MergeNone, "aarch64_sve_uunpk_x4", [IsStreaming], []>;
}

//
// Multi-vector scaling
//
let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,fp8" in {
let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,fp8" in {
// Multi-vector scaling
def FSCALE_SINGLE_X2 : Inst<"svscale[_single_{d}_x2]", "22x", "fhd", MergeNone, "aarch64_sme_fp8_scale_single_x2", [IsStreaming],[]>;
def FSCALE_SINGLE_X4 : Inst<"svscale[_single_{d}_x4]", "44x", "fhd", MergeNone, "aarch64_sme_fp8_scale_single_x4", [IsStreaming],[]>;
def FSCALE_X2 : Inst<"svscale[_{d}_x2]", "222.x", "fhd", MergeNone, "aarch64_sme_fp8_scale_x2", [IsStreaming],[]>;
def FSCALE_X4 : Inst<"svscale[_{d}_x4]", "444.x", "fhd", MergeNone, "aarch64_sme_fp8_scale_x4", [IsStreaming],[]>;

// Convert from FP8 to deinterleaved half-precision/BFloat16 multi-vector
def SVF1CVTL : Inst<"svcvtl1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvtl1_x2", [IsStreaming, SetsFPMR], []>;
def SVF2CVTL : Inst<"svcvtl2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvtl2_x2", [IsStreaming, SetsFPMR], []>;
}

let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/arm_sve_sme_incl.td
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ include "arm_immcheck_incl.td"
// l: int64_t
// m: uint32_t
// n: uint64_t
// >: fpm_t

// [: svuint8_t
// t: svint32_t
Expand All @@ -103,6 +104,7 @@ include "arm_immcheck_incl.td"
// M: svfloat32_t
// N: svfloat64_t
// $: svbfloat16_t
// ~: svmfloat8_t

// J: Prefetch type (sv_prfop)

Expand Down Expand Up @@ -235,6 +237,7 @@ def IsInOutZA : FlagType<0x200000000000>;
def IsInZT0 : FlagType<0x400000000000>;
def IsOutZT0 : FlagType<0x800000000000>;
def IsInOutZT0 : FlagType<0x1000000000000>;
def SetsFPMR : FlagType<0x2000000000000>;

defvar InvalidMode = "";

Expand Down
4 changes: 4 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10859,6 +10859,10 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
else if (TypeFlags.isUndef())
return UndefValue::get(Ty);
else if (Builtin->LLVMIntrinsic != 0) {
// Emit set FPMR for intrinsics that require it
if (TypeFlags.setsFPMR())
Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
Ops.pop_back_val());
if (TypeFlags.getMergeType() == SVETypeFlags::MergeZeroExp)
InsertExplicitZeroOperand(Builder, Ty, Ops);

Expand Down
81 changes: 81 additions & 0 deletions clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_cvt.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py

// REQUIRES: aarch64-registered-target

// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s

#include <arm_sve.h>

#ifdef SVE_OVERLOADED_FORMS
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
#else
#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
#endif

// CHECK-LABEL: @test_cvtl1_f16_x2(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fp8.cvtl1.x2.nxv8f16(<vscale x 16 x i8> [[ZN:%.*]])
// CHECK-NEXT: ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z17test_cvtl1_f16_x2u13__SVMfloat8_tm(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fp8.cvtl1.x2.nxv8f16(<vscale x 16 x i8> [[ZN:%.*]])
// CPP-CHECK-NEXT: ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]]
//
svfloat16x2_t test_cvtl1_f16_x2(svmfloat8_t zn, fpm_t fpmr) __arm_streaming {
return SVE_ACLE_FUNC(svcvtl1_f16,_mf8,_x2_fpm)(zn, fpmr);
}

// CHECK-LABEL: @test_cvtl2_f16_x2(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fp8.cvtl2.x2.nxv8f16(<vscale x 16 x i8> [[ZN:%.*]])
// CHECK-NEXT: ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z17test_cvtl2_f16_x2u13__SVMfloat8_tm(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fp8.cvtl2.x2.nxv8f16(<vscale x 16 x i8> [[ZN:%.*]])
// CPP-CHECK-NEXT: ret { <vscale x 8 x half>, <vscale x 8 x half> } [[TMP0]]
//
svfloat16x2_t test_cvtl2_f16_x2(svmfloat8_t zn, fpm_t fpmr) __arm_streaming {
return SVE_ACLE_FUNC(svcvtl2_f16,_mf8,_x2_fpm)(zn, fpmr);
}

// CHECK-LABEL: @test_cvtl1_bf16_x2(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fp8.cvtl1.x2.nxv8bf16(<vscale x 16 x i8> [[ZN:%.*]])
// CHECK-NEXT: ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z18test_cvtl1_bf16_x2u13__SVMfloat8_tm(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fp8.cvtl1.x2.nxv8bf16(<vscale x 16 x i8> [[ZN:%.*]])
// CPP-CHECK-NEXT: ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]]
//
svbfloat16x2_t test_cvtl1_bf16_x2(svmfloat8_t zn, fpm_t fpmr) __arm_streaming {
return SVE_ACLE_FUNC(svcvtl1_bf16,_mf8,_x2_fpm)(zn, fpmr);
}

// CHECK-LABEL: @test_cvtl2_bf16_x2(
// CHECK-NEXT: entry:
// CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
// CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fp8.cvtl2.x2.nxv8bf16(<vscale x 16 x i8> [[ZN:%.*]])
// CHECK-NEXT: ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]]
//
// CPP-CHECK-LABEL: @_Z18test_cvtl2_bf16_x2u13__SVMfloat8_tm(
// CPP-CHECK-NEXT: entry:
// CPP-CHECK-NEXT: tail call void @llvm.aarch64.set.fpmr(i64 [[FPMR:%.*]])
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fp8.cvtl2.x2.nxv8bf16(<vscale x 16 x i8> [[ZN:%.*]])
// CPP-CHECK-NEXT: ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } [[TMP0]]
//
svbfloat16x2_t test_cvtl2_bf16_x2(svmfloat8_t zn, fpm_t fpmr) __arm_streaming {
return SVE_ACLE_FUNC(svcvtl2_bf16,_mf8,_x2_fpm)(zn, fpmr);
}
17 changes: 17 additions & 0 deletions clang/test/Sema/aarch64-fp8-intrinsics/acle_sme2_fp8_cvt.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -verify -emit-llvm-only %s

// REQUIRES: aarch64-registered-target

#include <arm_sve.h>


void test_features_sme2_fp8(svmfloat8_t zn, fpm_t fpmr) __arm_streaming {
// expected-error@+1 {{'svcvtl1_f16_mf8_x2_fpm' needs target feature sme,sme2,fp8}}
svcvtl1_f16_mf8_x2_fpm(zn, fpmr);
// expected-error@+1 {{'svcvtl2_f16_mf8_x2_fpm' needs target feature sme,sme2,fp8}}
svcvtl2_f16_mf8_x2_fpm(zn, fpmr);
// expected-error@+1 {{'svcvtl1_bf16_mf8_x2_fpm' needs target feature sme,sme2,fp8}}
svcvtl1_bf16_mf8_x2_fpm(zn, fpmr);
// expected-error@+1 {{'svcvtl2_bf16_mf8_x2_fpm' needs target feature sme,sme2,fp8}}
svcvtl2_bf16_mf8_x2_fpm(zn, fpmr);
}
17 changes: 15 additions & 2 deletions clang/utils/TableGen/SveEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ namespace {
class SVEType {
bool Float, Signed, Immediate, Void, Constant, Pointer, BFloat, MFloat;
bool DefaultType, IsScalable, Predicate, PredicatePattern, PrefetchOp,
Svcount;
Svcount, Fpm;
unsigned Bitwidth, ElementBitwidth, NumVectors;

public:
Expand All @@ -62,7 +62,7 @@ class SVEType {
: Float(false), Signed(true), Immediate(false), Void(false),
Constant(false), Pointer(false), BFloat(false), MFloat(false),
DefaultType(false), IsScalable(true), Predicate(false),
PredicatePattern(false), PrefetchOp(false), Svcount(false),
PredicatePattern(false), PrefetchOp(false), Svcount(false), Fpm(false),
Bitwidth(128), ElementBitwidth(~0U), NumVectors(NumVectors) {
if (!TS.empty())
applyTypespec(TS);
Expand Down Expand Up @@ -101,6 +101,7 @@ class SVEType {
bool isPrefetchOp() const { return PrefetchOp; }
bool isSvcount() const { return Svcount; }
bool isConstant() const { return Constant; }
bool isFpm() const { return Fpm; }
unsigned getElementSizeInBits() const { return ElementBitwidth; }
unsigned getNumVectors() const { return NumVectors; }

Expand Down Expand Up @@ -497,6 +498,9 @@ std::string SVEType::str() const {
if (isPrefetchOp())
return "enum svprfop";

if (isFpm())
return "fpm_t";

std::string S;
if (Void)
S += "void";
Expand Down Expand Up @@ -752,6 +756,9 @@ void SVEType::applyModifier(char Mod) {
ElementBitwidth = Bitwidth = 32;
NumVectors = 0;
break;
case '>':
Fpm = true;
[[fallthrough]];
case 'n':
Predicate = false;
Svcount = false;
Expand Down Expand Up @@ -926,6 +933,12 @@ void SVEType::applyModifier(char Mod) {
Float = false;
BFloat = false;
break;
case '~':
Float = false;
BFloat = false;
MFloat = true;
ElementBitwidth = 8;
break;
case '.':
llvm_unreachable(". is never a type in itself");
break;
Expand Down
9 changes: 9 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsAArch64.td
Original file line number Diff line number Diff line change
Expand Up @@ -3813,6 +3813,15 @@ let TargetPrefix = "aarch64" in {
LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>],
[IntrNoMem]>;

class SME2_FP8_CVT_X2_Single_Intrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>],
[llvm_nxv16i8_ty],
[IntrReadMem, IntrInaccessibleMemOnly]>;
//
// CVT from FP8 to deinterleaved half-precision/BFloat16 multi-vector
//
def int_aarch64_sve_fp8_cvtl1_x2 : SME2_FP8_CVT_X2_Single_Intrinsic;
def int_aarch64_sve_fp8_cvtl2_x2 : SME2_FP8_CVT_X2_Single_Intrinsic;
}

// SVE2.1 - ZIPQ1, ZIPQ2, UZPQ1, UZPQ2
Expand Down
34 changes: 34 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
void SelectPExtPair(SDNode *N, unsigned Opc);
void SelectWhilePair(SDNode *N, unsigned Opc);
void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
void SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs, unsigned Opcode);
void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
bool IsTupleInput, unsigned Opc);
Expand Down Expand Up @@ -1866,6 +1867,27 @@ void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
CurDAG->RemoveDeadNode(N);
}

void AArch64DAGToDAGISel::SelectCVTIntrinsicFP8(SDNode *N, unsigned NumVecs,
unsigned Opcode) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
SmallVector<SDValue, 4> Ops(N->op_begin() + 2, N->op_end());
Ops.push_back(/*Chain*/ N->getOperand(0));

SDNode *Instruction =
CurDAG->getMachineNode(Opcode, DL, {MVT::Untyped, MVT::Other}, Ops);
SDValue SuperReg = SDValue(Instruction, 0);

for (unsigned i = 0; i < NumVecs; ++i)
ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
AArch64::zsub0 + i, DL, VT, SuperReg));

// Copy chain
unsigned ChainIdx = NumVecs;
ReplaceUses(SDValue(N, ChainIdx), SDValue(Instruction, 1));
CurDAG->RemoveDeadNode(N);
}

void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
unsigned NumVecs,
bool IsZmMulti,
Expand Down Expand Up @@ -5547,6 +5569,18 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
SelectMultiVectorLuti(Node, 4, AArch64::LUTI4_4ZZT2Z);
return;
}
case Intrinsic::aarch64_sve_fp8_cvtl1_x2:
if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
Node->getValueType(0),
{AArch64::BF1CVTL_2ZZ_BtoH, AArch64::F1CVTL_2ZZ_BtoH}))
SelectCVTIntrinsicFP8(Node, 2, Opc);
return;
case Intrinsic::aarch64_sve_fp8_cvtl2_x2:
if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::FP>(
Node->getValueType(0),
{AArch64::BF2CVTL_2ZZ_BtoH, AArch64::F2CVTL_2ZZ_BtoH}))
SelectCVTIntrinsicFP8(Node, 2, Opc);
return;
}
} break;
case ISD::INTRINSIC_WO_CHAIN: {
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AArch64/SMEInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -2412,7 +2412,7 @@ multiclass sme2p1_fp_cvt_vector_vg2_single<string mnemonic, bit l> {

// SME2 multi-vec FP8 up convert two registers
multiclass sme2p1_fp8_cvt_vector_vg2_single<string mnemonic, bits<2> opc, bit L> {
def _NAME : sme2_cvt_unpk_vector_vg2<opc, 0b110, L, ZZ_h_mul_r, ZPR8, mnemonic>{
def NAME : sme2_cvt_unpk_vector_vg2<opc, 0b110, L, ZZ_h_mul_r, ZPR8, mnemonic>{
let Uses = [FPMR, FPCR];
}
}
Expand Down
42 changes: 42 additions & 0 deletions llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-cvt.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2,+fp8 -verify-machineinstrs -force-streaming < %s | FileCheck %s

; F1CVTL / F2CVTL

define { <vscale x 8 x half>, <vscale x 8 x half> } @f1cvtl(<vscale x 16 x i8> %zm) {
; CHECK-LABEL: f1cvtl:
; CHECK: // %bb.0:
; CHECK-NEXT: f1cvtl { z0.h, z1.h }, z0.b
; CHECK-NEXT: ret
%res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fp8.cvtl1.x2.nxv8f16(<vscale x 16 x i8> %zm)
ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
}

define { <vscale x 8 x half>, <vscale x 8 x half> } @f2cvtl(<vscale x 16 x i8> %zm) {
; CHECK-LABEL: f2cvtl:
; CHECK: // %bb.0:
; CHECK-NEXT: f2cvtl { z0.h, z1.h }, z0.b
; CHECK-NEXT: ret
%res = call { <vscale x 8 x half>, <vscale x 8 x half> } @llvm.aarch64.sve.fp8.cvtl2.x2.nxvbf16(<vscale x 16 x i8> %zm)
ret { <vscale x 8 x half>, <vscale x 8 x half> } %res
}

; BF1CVTL / BF2CVTL

define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @bf1cvtl(<vscale x 16 x i8> %zm) {
; CHECK-LABEL: bf1cvtl:
; CHECK: // %bb.0:
; CHECK-NEXT: bf1cvtl { z0.h, z1.h }, z0.b
; CHECK-NEXT: ret
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fp8.cvtl1.x2.nxv8bf16(<vscale x 16 x i8> %zm)
ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
}

define { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @bf2cvtl( <vscale x 16 x i8> %zm) {
; CHECK-LABEL: bf2cvtl:
; CHECK: // %bb.0:
; CHECK-NEXT: bf2cvtl { z0.h, z1.h }, z0.b
; CHECK-NEXT: ret
%res = call { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } @llvm.aarch64.sve.fp8.cvtl2.x2.nxv8bf16(<vscale x 16 x i8> %zm)
ret { <vscale x 8 x bfloat>, <vscale x 8 x bfloat> } %res
}

0 comments on commit e4ee970

Please sign in to comment.