-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[aarch64] atan2 intrinsic lowering (p5) #112611
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-globalisel Author: Tex Riddell (tex3d) ChangesThis change is part of this proposal: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294
Part 5 for Implement the atan2 HLSL Function #70096. Patch is 35.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112611.diff 14 Files Affected:
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index 532a3ca334b1ae..c4586894e3e490 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -92,7 +92,9 @@ TLI_DEFINE_VECFUNC("llvm.atan.f64", "_simd_atan_d2", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("atanf", "_simd_atan_f4", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.atan.f32", "_simd_atan_f4", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atan2", "_simd_atan2_d2", FIXED(2), "_ZGV_LLVM_N2vv")
+TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_simd_atan2_d2", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("atan2f", "_simd_atan2_f4", FIXED(4), "_ZGV_LLVM_N4vv")
+TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_simd_atan2_f4", FIXED(4), "_ZGV_LLVM_N4vv")
TLI_DEFINE_VECFUNC("cos", "_simd_cos_d2", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("llvm.cos.f64", "_simd_cos_d2", FIXED(2), "_ZGV_LLVM_N2v")
@@ -531,6 +533,7 @@ TLI_DEFINE_VECFUNC("atan", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("llvm.atan.f64", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("atan2", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv")
+TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("atanh", "_ZGVnN2v_atanh", FIXED(2), "_ZGV_LLVM_N2v")
@@ -635,6 +638,7 @@ TLI_DEFINE_VECFUNC("atanf", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv")
+TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv")
TLI_DEFINE_VECFUNC("atanhf", "_ZGVnN4v_atanhf", FIXED(4), "_ZGV_LLVM_N4v")
@@ -748,6 +752,8 @@ TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVsMxv_atanf", SCALABLE(4), MASKED, "_ZGV
TLI_DEFINE_VECFUNC("atan2", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("atan2f", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv")
+TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv")
+TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("atanh", "_ZGVsMxv_atanh", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("atanhf", "_ZGVsMxv_atanhf", SCALABLE(4), MASKED, "_ZGVsMxv")
@@ -933,6 +939,11 @@ TLI_DEFINE_VECFUNC("atan2f", "armpl_vatan2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N
TLI_DEFINE_VECFUNC("atan2", "armpl_svatan2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("atan2f", "armpl_svatan2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvv")
+TLI_DEFINE_VECFUNC("llvm.atan2.f64", "armpl_vatan2q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv")
+TLI_DEFINE_VECFUNC("llvm.atan2.f32", "armpl_vatan2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv")
+TLI_DEFINE_VECFUNC("llvm.atan2.f64", "armpl_svatan2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvv")
+TLI_DEFINE_VECFUNC("llvm.atan2.f32", "armpl_svatan2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvv")
+
TLI_DEFINE_VECFUNC("atanh", "armpl_vatanhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("atanhf", "armpl_vatanhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atanh", "armpl_svatanh_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index d9121cf166e5aa..83bf3c335cac89 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -154,6 +154,7 @@ def : GINodeEquiv<G_FTAN, ftan>;
def : GINodeEquiv<G_FACOS, facos>;
def : GINodeEquiv<G_FASIN, fasin>;
def : GINodeEquiv<G_FATAN, fatan>;
+def : GINodeEquiv<G_FATAN2, fatan2>;
def : GINodeEquiv<G_FCOSH, fcosh>;
def : GINodeEquiv<G_FSINH, fsinh>;
def : GINodeEquiv<G_FTANH, ftanh>;
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3b2fd95076c465..af6240dcde2f50 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -457,6 +457,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(ACOS_F);
case TargetOpcode::G_FATAN:
RTLIBCASE(ATAN_F);
+ case TargetOpcode::G_FATAN2:
+ RTLIBCASE(ATAN2_F);
case TargetOpcode::G_FSINH:
RTLIBCASE(SINH_F);
case TargetOpcode::G_FCOSH:
@@ -1202,6 +1204,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
+ case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
@@ -3122,6 +3125,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
+ case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
@@ -5041,6 +5045,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FACOS:
case G_FASIN:
case G_FATAN:
+ case G_FATAN2:
case G_FCOSH:
case G_FSINH:
case G_FTANH:
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 9574464207d99f..722ceea29c951c 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -828,6 +828,7 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
+ case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
@@ -1715,6 +1716,7 @@ bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) {
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
+ case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 60150c3328aaa7..1b81ed463735c3 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -733,18 +733,19 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Promote);
}
- for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
- ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
- ISD::FACOS, ISD::FASIN, ISD::FATAN,
- ISD::FCOSH, ISD::FSINH, ISD::FTANH,
- ISD::FTAN, ISD::FEXP, ISD::FEXP2,
- ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
- ISD::FLOG10, ISD::STRICT_FREM, ISD::STRICT_FPOW,
- ISD::STRICT_FPOWI, ISD::STRICT_FCOS, ISD::STRICT_FSIN,
- ISD::STRICT_FACOS, ISD::STRICT_FASIN, ISD::STRICT_FATAN,
- ISD::STRICT_FCOSH, ISD::STRICT_FSINH, ISD::STRICT_FTANH,
- ISD::STRICT_FEXP, ISD::STRICT_FEXP2, ISD::STRICT_FLOG,
- ISD::STRICT_FLOG2, ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) {
+ for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
+ ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
+ ISD::FACOS, ISD::FASIN, ISD::FATAN,
+ ISD::FATAN2, ISD::FCOSH, ISD::FSINH,
+ ISD::FTANH, ISD::FTAN, ISD::FEXP,
+ ISD::FEXP2, ISD::FEXP10, ISD::FLOG,
+ ISD::FLOG2, ISD::FLOG10, ISD::STRICT_FREM,
+ ISD::STRICT_FPOW, ISD::STRICT_FPOWI, ISD::STRICT_FCOS,
+ ISD::STRICT_FSIN, ISD::STRICT_FACOS, ISD::STRICT_FASIN,
+ ISD::STRICT_FATAN, ISD::STRICT_FATAN2, ISD::STRICT_FCOSH,
+ ISD::STRICT_FSINH, ISD::STRICT_FTANH, ISD::STRICT_FEXP,
+ ISD::STRICT_FEXP2, ISD::STRICT_FLOG, ISD::STRICT_FLOG2,
+ ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) {
setOperationAction(Op, MVT::f16, Promote);
setOperationAction(Op, MVT::v4f16, Expand);
setOperationAction(Op, MVT::v8f16, Expand);
@@ -1188,7 +1189,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// silliness like this:
// clang-format off
for (auto Op :
- {ISD::SELECT, ISD::SELECT_CC,
+ {ISD::SELECT, ISD::SELECT_CC, ISD::FATAN2,
ISD::BR_CC, ISD::FADD, ISD::FSUB,
ISD::FMUL, ISD::FDIV, ISD::FMA,
ISD::FNEG, ISD::FABS, ISD::FCEIL,
@@ -1647,6 +1648,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FACOS, VT, Expand);
setOperationAction(ISD::FASIN, VT, Expand);
setOperationAction(ISD::FATAN, VT, Expand);
+ setOperationAction(ISD::FATAN2, VT, Expand);
setOperationAction(ISD::FCOSH, VT, Expand);
setOperationAction(ISD::FSINH, VT, Expand);
setOperationAction(ISD::FTANH, VT, Expand);
@@ -1902,6 +1904,7 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
setOperationAction(ISD::FASIN, VT, Expand);
setOperationAction(ISD::FACOS, VT, Expand);
setOperationAction(ISD::FATAN, VT, Expand);
+ setOperationAction(ISD::FATAN2, VT, Expand);
setOperationAction(ISD::FSINH, VT, Expand);
setOperationAction(ISD::FCOSH, VT, Expand);
setOperationAction(ISD::FTANH, VT, Expand);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index e9d01602c298af..e22d9e49a09d4e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -269,9 +269,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.libcallFor({{s64, s128}})
.minScalarOrElt(1, MinFPScalar);
- getActionDefinitionsBuilder(
- {G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10, G_FTAN, G_FEXP,
- G_FEXP2, G_FEXP10, G_FACOS, G_FASIN, G_FATAN, G_FCOSH, G_FSINH, G_FTANH})
+ getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
+ G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
+ G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
+ G_FSINH, G_FTANH})
// We need a call for these, so we always need to scalarize.
.scalarize(0)
// Regardless of FP16 support, widen 16-bit elements to 32-bits.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
index 314c5458e30909..ed7bcff5160f81 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
@@ -2345,6 +2345,14 @@ define float @test_atan_f32(float %x) {
ret float %y
}
+declare float @llvm.atan2.f32(float, float)
+define float @test_atan2_f32(float %x, float %y) {
+ ; CHECK-LABEL: name: test_atan2_f32
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FATAN2 %{{[0-9]+}}
+ %z = call float @llvm.atan2.f32(float %x, float %y)
+ ret float %z
+}
+
declare float @llvm.cosh.f32(float)
define float @test_cosh_f32(float %x) {
; CHECK-LABEL: name: test_cosh_f32
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-atan2.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-atan2.mir
new file mode 100644
index 00000000000000..ac4f4358e98790
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-atan2.mir
@@ -0,0 +1,255 @@
+# RUN: llc -verify-machineinstrs -mtriple aarch64--- \
+# RUN: -run-pass=legalizer -mattr=+fullfp16 -global-isel %s -o - \
+# RUN: | FileCheck %s
+...
+---
+name: test_v4f16.atan2
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.0:
+ liveins: $d0, $d1
+ ; CHECK-LABEL: name: test_v4f16.atan2
+ ; CHECK: [[V1:%[0-9]+]]:_(s16), [[V2:%[0-9]+]]:_(s16), [[V3:%[0-9]+]]:_(s16), [[V4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s16>)
+ ; CHECK: [[V5:%[0-9]+]]:_(s16), [[V6:%[0-9]+]]:_(s16), [[V7:%[0-9]+]]:_(s16), [[V8:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s16>)
+
+ ; CHECK-DAG: [[V1_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V1]](s16)
+ ; CHECK-DAG: [[V5_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V5]](s16)
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-NEXT: $s0 = COPY [[V1_S32]](s32)
+ ; CHECK-NEXT: $s1 = COPY [[V5_S32]](s32)
+ ; CHECK-NEXT: BL &atan2f
+ ; CHECK-NEXT: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[ELT1_S32:%[0-9]+]]:_(s32) = COPY $s0
+ ; CHECK-NEXT: [[ELT1:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT1_S32]](s32)
+
+ ; CHECK-DAG: [[V2_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V2]](s16)
+ ; CHECK-DAG: [[V6_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V6]](s16)
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-NEXT: $s0 = COPY [[V2_S32]](s32)
+ ; CHECK-NEXT: $s1 = COPY [[V6_S32]](s32)
+ ; CHECK-NEXT: BL &atan2f
+ ; CHECK-NEXT: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[ELT2_S32:%[0-9]+]]:_(s32) = COPY $s0
+ ; CHECK-NEXT: [[ELT2:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT2_S32]](s32)
+
+ ; CHECK-DAG: [[V3_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V3]](s16)
+ ; CHECK-DAG: [[V7_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V7]](s16)
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-NEXT: $s0 = COPY [[V3_S32]](s32)
+ ; CHECK-NEXT: $s1 = COPY [[V7_S32]](s32)
+ ; CHECK-NEXT: BL &atan2f
+ ; CHECK-NEXT: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[ELT3_S32:%[0-9]+]]:_(s32) = COPY $s0
+ ; CHECK-NEXT: [[ELT3:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT3_S32]](s32)
+
+ ; CHECK-DAG: [[V4_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V4]](s16)
+ ; CHECK-DAG: [[V8_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V8]](s16)
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-NEXT: $s0 = COPY [[V4_S32]](s32)
+ ; CHECK-NEXT: $s1 = COPY [[V8_S32]](s32)
+ ; CHECK-NEXT: BL &atan2f
+ ; CHECK-NEXT: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[ELT4_S32:%[0-9]+]]:_(s32) = COPY $s0
+ ; CHECK-NEXT: [[ELT4:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT4_S32]](s32)
+
+ ; CHECK-DAG: %{{[0-9]+}}:_(<4 x s16>) = G_BUILD_VECTOR [[ELT1]](s16), [[ELT2]](s16), [[ELT3]](s16), [[ELT4]](s16)
+
+ %0:_(<4 x s16>) = COPY $d0
+ %1:_(<4 x s16>) = COPY $d1
+ %2:_(<4 x s16>) = G_FATAN2 %0, %1
+ $d0 = COPY %2(<4 x s16>)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: test_v8f16.atan2
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.0:
+ liveins: $q0, $q1
+
+ ; CHECK-LABEL: name: test_v8f16.atan2
+
+ ; This is big, so let's just check for the 8 calls to atan2f, the the
+ ; G_UNMERGE_VALUES, and the G_BUILD_VECTOR. The other instructions ought
+ ; to be covered by the other tests.
+
+ ; CHECK: G_UNMERGE_VALUES
+ ; CHECK: BL &atan2f
+ ; CHECK: BL &atan2f
+ ; CHECK: BL &atan2f
+ ; CHECK: BL &atan2f
+ ; CHECK: BL &atan2f
+ ; CHECK: BL &atan2f
+ ; CHECK: BL &atan2f
+ ; CHECK: BL &atan2f
+ ; CHECK: G_BUILD_VECTOR
+
+ %0:_(<8 x s16>) = COPY $q0
+ %1:_(<8 x s16>) = COPY $q1
+ %2:_(<8 x s16>) = G_FATAN2 %0, %1
+ $q0 = COPY %2(<8 x s16>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: test_v2f32.atan2
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.0:
+ liveins: $d0, $d1
+
+ ; CHECK-LABEL: name: test_v2f32.atan2
+ ; CHECK: [[V1:%[0-9]+]]:_(s32), [[V2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s32>)
+ ; CHECK: [[V3:%[0-9]+]]:_(s32), [[V4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s32>)
+
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-DAG: $s0 = COPY [[V1]](s32)
+ ; CHECK-DAG: $s1 = COPY [[V3]](s32)
+ ; CHECK: BL &atan2f
+ ; CHECK: ADJCALLSTACKUP
+ ; CHECK: [[ELT1:%[0-9]+]]:_(s32) = COPY $s0
+
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-DAG: $s0 = COPY [[V2]](s32)
+ ; CHECK-DAG: $s1 = COPY [[V4]](s32)
+ ; CHECK: BL &atan2f
+ ; CHECK: ADJCALLSTACKUP
+ ; CHECK: [[ELT2:%[0-9]+]]:_(s32) = COPY $s0
+
+ ; CHECK: %2:_(<2 x s32>) = G_BUILD_VECTOR [[ELT1]](s32), [[ELT2]](s32)
+
+ %0:_(<2 x s32>) = COPY $d0
+ %1:_(<2 x s32>) = COPY $d1
+ %2:_(<2 x s32>) = G_FATAN2 %0, %1
+ $d0 = COPY %2(<2 x s32>)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: test_v4f32.atan2
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.0:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: test_v4f32.atan2
+ ; CHECK: [[V1:%[0-9]+]]:_(s32), [[V2:%[0-9]+]]:_(s32), [[V3:%[0-9]+]]:_(s32), [[V4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s32>)
+ ; CHECK: [[V5:%[0-9]+]]:_(s32), [[V6:%[0-9]+]]:_(s32), [[V7:%[0-9]+]]:_(s32), [[V8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s32>)
+
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-DAG: $s0 = COPY [[V1]](s32)
+ ; CHECK-DAG: $s1 = COPY [[V5]](s32)
+ ; CHECK: BL &atan2f
+ ; CHECK: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[ELT1:%[0-9]+]]:_(s32) = COPY $s0
+
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-DAG: $s0 = COPY [[V2]](s32)
+ ; CHECK-DAG: $s1 = COPY [[V6]](s32)
+ ; CHECK: BL &atan2f
+ ; CHECK: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[ELT2:%[0-9]+]]:_(s32) = COPY $s0
+
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-DAG: $s0 = COPY [[V3]](s32)
+ ; CHECK-DAG: $s1 = COPY [[V7]](s32)
+ ; CHECK: BL &atan2f
+ ; CHECK: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[ELT3:%[0-9]+]]:_(s32) = COPY $s0
+
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-DAG: $s0 = COPY [[V4]](s32)
+ ; CHECK-DAG: $s1 = COPY [[V8]](s32)
+ ; CHECK: BL &atan2f
+ ; CHECK: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[ELT4:%[0-9]+]]:_(s32) = COPY $s0
+
+ ; CHECK: %2:_(<4 x s32>) = G_BUILD_VECTOR [[ELT1]](s32), [[ELT2]](s32), [[ELT3]](s32), [[ELT4]](s32)
+
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %2:_(<4 x s32>) = G_FATAN2 %0, %1
+ $q0 = COPY %2(<4 x s32>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: test_v2f64.atan2
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.0:
+ liveins: $q0, $q1
+
+ ; CHECK-LABEL: name: test_v2f64.atan2
+ ; CHECK: [[V1:%[0-9]+]]:_(s64), [[V2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s64>)
+ ; CHECK: [[V3:%[0-9]+]]:_(s64), [[V4:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s64>)
+
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-DAG: $d0 = COPY [[V1]](s64)
+ ; CHECK-DAG: $d1 = COPY [[V3]](s64)
+ ; CHECK: BL &atan2
+ ; CHECK: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[ELT1:%[0-9]+]]:_(s64) = COPY $d0
+
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-DAG: $d0 = COPY [[V2]](s64)
+ ; CHECK-DAG: $d1 = COPY [[V4]](s64)
+ ; CHECK: BL &atan2
+ ; CHECK: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[ELT2:%[0-9]+]]:_(s64) = COPY $d0
+
+ ; CHECK: %2:_(<2 x s64>) = G_BUILD_VECTOR [[ELT1]](s64), [[ELT2]](s64)
+
+ %0:_(<2 x s64>) = COPY $q0
+ %1:_(<2 x s64>) = COPY $q1
+ %2:_(<2 x s64>) = G_FATAN2 %0, %1
+ $q0 = COPY %2(<2 x s64>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: test_atan2_half
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.0:
+ liveins: $h0, $h1
+ ; CHECK-LABEL: name: test_atan2_half
+ ; CHECK: [[REG1:%[0-9]+]]:_(s32) = G_FPEXT %0(s16)
+ ; CHECK: [[REG2:%[0-9]+]]:_(s32) = G_FPEXT %1(s16)
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-NEXT: $s0 = COPY [[REG1]](s32)
+ ; CHECK-NEXT: $s1 = COPY [[REG2]](s32)
+ ; CHECK-NEXT: BL &atan2f
+ ; CHECK: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[REG2:%[0-9]+]]:_(s32) = C...
[truncated]
|
@llvm/pr-subscribers-llvm-analysis Author: Tex Riddell (tex3d) ChangesThis change is part of this proposal: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294
Part 5 for Implement the atan2 HLSL Function #70096. Patch is 35.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112611.diff 14 Files Affected:
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index 532a3ca334b1ae..c4586894e3e490 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -92,7 +92,9 @@ TLI_DEFINE_VECFUNC("llvm.atan.f64", "_simd_atan_d2", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("atanf", "_simd_atan_f4", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.atan.f32", "_simd_atan_f4", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atan2", "_simd_atan2_d2", FIXED(2), "_ZGV_LLVM_N2vv")
+TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_simd_atan2_d2", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("atan2f", "_simd_atan2_f4", FIXED(4), "_ZGV_LLVM_N4vv")
+TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_simd_atan2_f4", FIXED(4), "_ZGV_LLVM_N4vv")
TLI_DEFINE_VECFUNC("cos", "_simd_cos_d2", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("llvm.cos.f64", "_simd_cos_d2", FIXED(2), "_ZGV_LLVM_N2v")
@@ -531,6 +533,7 @@ TLI_DEFINE_VECFUNC("atan", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("llvm.atan.f64", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("atan2", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv")
+TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("atanh", "_ZGVnN2v_atanh", FIXED(2), "_ZGV_LLVM_N2v")
@@ -635,6 +638,7 @@ TLI_DEFINE_VECFUNC("atanf", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv")
+TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv")
TLI_DEFINE_VECFUNC("atanhf", "_ZGVnN4v_atanhf", FIXED(4), "_ZGV_LLVM_N4v")
@@ -748,6 +752,8 @@ TLI_DEFINE_VECFUNC("llvm.atan.f32", "_ZGVsMxv_atanf", SCALABLE(4), MASKED, "_ZGV
TLI_DEFINE_VECFUNC("atan2", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("atan2f", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv")
+TLI_DEFINE_VECFUNC("llvm.atan2.f64", "_ZGVsMxvv_atan2", SCALABLE(2), MASKED, "_ZGVsMxvv")
+TLI_DEFINE_VECFUNC("llvm.atan2.f32", "_ZGVsMxvv_atan2f", SCALABLE(4), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("atanh", "_ZGVsMxv_atanh", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("atanhf", "_ZGVsMxv_atanhf", SCALABLE(4), MASKED, "_ZGVsMxv")
@@ -933,6 +939,11 @@ TLI_DEFINE_VECFUNC("atan2f", "armpl_vatan2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N
TLI_DEFINE_VECFUNC("atan2", "armpl_svatan2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("atan2f", "armpl_svatan2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvv")
+TLI_DEFINE_VECFUNC("llvm.atan2.f64", "armpl_vatan2q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv")
+TLI_DEFINE_VECFUNC("llvm.atan2.f32", "armpl_vatan2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv")
+TLI_DEFINE_VECFUNC("llvm.atan2.f64", "armpl_svatan2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvv")
+TLI_DEFINE_VECFUNC("llvm.atan2.f32", "armpl_svatan2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvv")
+
TLI_DEFINE_VECFUNC("atanh", "armpl_vatanhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("atanhf", "armpl_vatanhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("atanh", "armpl_svatanh_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index d9121cf166e5aa..83bf3c335cac89 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -154,6 +154,7 @@ def : GINodeEquiv<G_FTAN, ftan>;
def : GINodeEquiv<G_FACOS, facos>;
def : GINodeEquiv<G_FASIN, fasin>;
def : GINodeEquiv<G_FATAN, fatan>;
+def : GINodeEquiv<G_FATAN2, fatan2>;
def : GINodeEquiv<G_FCOSH, fcosh>;
def : GINodeEquiv<G_FSINH, fsinh>;
def : GINodeEquiv<G_FTANH, ftanh>;
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 3b2fd95076c465..af6240dcde2f50 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -457,6 +457,8 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
RTLIBCASE(ACOS_F);
case TargetOpcode::G_FATAN:
RTLIBCASE(ATAN_F);
+ case TargetOpcode::G_FATAN2:
+ RTLIBCASE(ATAN2_F);
case TargetOpcode::G_FSINH:
RTLIBCASE(SINH_F);
case TargetOpcode::G_FCOSH:
@@ -1202,6 +1204,7 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
+ case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
@@ -3122,6 +3125,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
+ case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
@@ -5041,6 +5045,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FACOS:
case G_FASIN:
case G_FATAN:
+ case G_FATAN2:
case G_FCOSH:
case G_FSINH:
case G_FTANH:
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 9574464207d99f..722ceea29c951c 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -828,6 +828,7 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
+ case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
@@ -1715,6 +1716,7 @@ bool llvm::isPreISelGenericFloatingPointOpcode(unsigned Opc) {
case TargetOpcode::G_FACOS:
case TargetOpcode::G_FASIN:
case TargetOpcode::G_FATAN:
+ case TargetOpcode::G_FATAN2:
case TargetOpcode::G_FCOSH:
case TargetOpcode::G_FSINH:
case TargetOpcode::G_FTANH:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 60150c3328aaa7..1b81ed463735c3 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -733,18 +733,19 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, MVT::bf16, Promote);
}
- for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
- ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
- ISD::FACOS, ISD::FASIN, ISD::FATAN,
- ISD::FCOSH, ISD::FSINH, ISD::FTANH,
- ISD::FTAN, ISD::FEXP, ISD::FEXP2,
- ISD::FEXP10, ISD::FLOG, ISD::FLOG2,
- ISD::FLOG10, ISD::STRICT_FREM, ISD::STRICT_FPOW,
- ISD::STRICT_FPOWI, ISD::STRICT_FCOS, ISD::STRICT_FSIN,
- ISD::STRICT_FACOS, ISD::STRICT_FASIN, ISD::STRICT_FATAN,
- ISD::STRICT_FCOSH, ISD::STRICT_FSINH, ISD::STRICT_FTANH,
- ISD::STRICT_FEXP, ISD::STRICT_FEXP2, ISD::STRICT_FLOG,
- ISD::STRICT_FLOG2, ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) {
+ for (auto Op : {ISD::FREM, ISD::FPOW, ISD::FPOWI,
+ ISD::FCOS, ISD::FSIN, ISD::FSINCOS,
+ ISD::FACOS, ISD::FASIN, ISD::FATAN,
+ ISD::FATAN2, ISD::FCOSH, ISD::FSINH,
+ ISD::FTANH, ISD::FTAN, ISD::FEXP,
+ ISD::FEXP2, ISD::FEXP10, ISD::FLOG,
+ ISD::FLOG2, ISD::FLOG10, ISD::STRICT_FREM,
+ ISD::STRICT_FPOW, ISD::STRICT_FPOWI, ISD::STRICT_FCOS,
+ ISD::STRICT_FSIN, ISD::STRICT_FACOS, ISD::STRICT_FASIN,
+ ISD::STRICT_FATAN, ISD::STRICT_FATAN2, ISD::STRICT_FCOSH,
+ ISD::STRICT_FSINH, ISD::STRICT_FTANH, ISD::STRICT_FEXP,
+ ISD::STRICT_FEXP2, ISD::STRICT_FLOG, ISD::STRICT_FLOG2,
+ ISD::STRICT_FLOG10, ISD::STRICT_FTAN}) {
setOperationAction(Op, MVT::f16, Promote);
setOperationAction(Op, MVT::v4f16, Expand);
setOperationAction(Op, MVT::v8f16, Expand);
@@ -1188,7 +1189,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// silliness like this:
// clang-format off
for (auto Op :
- {ISD::SELECT, ISD::SELECT_CC,
+ {ISD::SELECT, ISD::SELECT_CC, ISD::FATAN2,
ISD::BR_CC, ISD::FADD, ISD::FSUB,
ISD::FMUL, ISD::FDIV, ISD::FMA,
ISD::FNEG, ISD::FABS, ISD::FCEIL,
@@ -1647,6 +1648,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FACOS, VT, Expand);
setOperationAction(ISD::FASIN, VT, Expand);
setOperationAction(ISD::FATAN, VT, Expand);
+ setOperationAction(ISD::FATAN2, VT, Expand);
setOperationAction(ISD::FCOSH, VT, Expand);
setOperationAction(ISD::FSINH, VT, Expand);
setOperationAction(ISD::FTANH, VT, Expand);
@@ -1902,6 +1904,7 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
setOperationAction(ISD::FASIN, VT, Expand);
setOperationAction(ISD::FACOS, VT, Expand);
setOperationAction(ISD::FATAN, VT, Expand);
+ setOperationAction(ISD::FATAN2, VT, Expand);
setOperationAction(ISD::FSINH, VT, Expand);
setOperationAction(ISD::FCOSH, VT, Expand);
setOperationAction(ISD::FTANH, VT, Expand);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index e9d01602c298af..e22d9e49a09d4e 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -269,9 +269,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.libcallFor({{s64, s128}})
.minScalarOrElt(1, MinFPScalar);
- getActionDefinitionsBuilder(
- {G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2, G_FLOG10, G_FTAN, G_FEXP,
- G_FEXP2, G_FEXP10, G_FACOS, G_FASIN, G_FATAN, G_FCOSH, G_FSINH, G_FTANH})
+ getActionDefinitionsBuilder({G_FCOS, G_FSIN, G_FPOW, G_FLOG, G_FLOG2,
+ G_FLOG10, G_FTAN, G_FEXP, G_FEXP2, G_FEXP10,
+ G_FACOS, G_FASIN, G_FATAN, G_FATAN2, G_FCOSH,
+ G_FSINH, G_FTANH})
// We need a call for these, so we always need to scalarize.
.scalarize(0)
// Regardless of FP16 support, widen 16-bit elements to 32-bits.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
index 314c5458e30909..ed7bcff5160f81 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll
@@ -2345,6 +2345,14 @@ define float @test_atan_f32(float %x) {
ret float %y
}
+declare float @llvm.atan2.f32(float, float)
+define float @test_atan2_f32(float %x, float %y) {
+ ; CHECK-LABEL: name: test_atan2_f32
+ ; CHECK: %{{[0-9]+}}:_(s32) = G_FATAN2 %{{[0-9]+}}
+ %z = call float @llvm.atan2.f32(float %x, float %y)
+ ret float %z
+}
+
declare float @llvm.cosh.f32(float)
define float @test_cosh_f32(float %x) {
; CHECK-LABEL: name: test_cosh_f32
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-atan2.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-atan2.mir
new file mode 100644
index 00000000000000..ac4f4358e98790
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-atan2.mir
@@ -0,0 +1,255 @@
+# RUN: llc -verify-machineinstrs -mtriple aarch64--- \
+# RUN: -run-pass=legalizer -mattr=+fullfp16 -global-isel %s -o - \
+# RUN: | FileCheck %s
+...
+---
+name: test_v4f16.atan2
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.0:
+ liveins: $d0, $d1
+ ; CHECK-LABEL: name: test_v4f16.atan2
+ ; CHECK: [[V1:%[0-9]+]]:_(s16), [[V2:%[0-9]+]]:_(s16), [[V3:%[0-9]+]]:_(s16), [[V4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s16>)
+ ; CHECK: [[V5:%[0-9]+]]:_(s16), [[V6:%[0-9]+]]:_(s16), [[V7:%[0-9]+]]:_(s16), [[V8:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s16>)
+
+ ; CHECK-DAG: [[V1_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V1]](s16)
+ ; CHECK-DAG: [[V5_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V5]](s16)
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-NEXT: $s0 = COPY [[V1_S32]](s32)
+ ; CHECK-NEXT: $s1 = COPY [[V5_S32]](s32)
+ ; CHECK-NEXT: BL &atan2f
+ ; CHECK-NEXT: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[ELT1_S32:%[0-9]+]]:_(s32) = COPY $s0
+ ; CHECK-NEXT: [[ELT1:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT1_S32]](s32)
+
+ ; CHECK-DAG: [[V2_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V2]](s16)
+ ; CHECK-DAG: [[V6_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V6]](s16)
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-NEXT: $s0 = COPY [[V2_S32]](s32)
+ ; CHECK-NEXT: $s1 = COPY [[V6_S32]](s32)
+ ; CHECK-NEXT: BL &atan2f
+ ; CHECK-NEXT: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[ELT2_S32:%[0-9]+]]:_(s32) = COPY $s0
+ ; CHECK-NEXT: [[ELT2:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT2_S32]](s32)
+
+ ; CHECK-DAG: [[V3_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V3]](s16)
+ ; CHECK-DAG: [[V7_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V7]](s16)
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-NEXT: $s0 = COPY [[V3_S32]](s32)
+ ; CHECK-NEXT: $s1 = COPY [[V7_S32]](s32)
+ ; CHECK-NEXT: BL &atan2f
+ ; CHECK-NEXT: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[ELT3_S32:%[0-9]+]]:_(s32) = COPY $s0
+ ; CHECK-NEXT: [[ELT3:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT3_S32]](s32)
+
+ ; CHECK-DAG: [[V4_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V4]](s16)
+ ; CHECK-DAG: [[V8_S32:%[0-9]+]]:_(s32) = G_FPEXT [[V8]](s16)
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-NEXT: $s0 = COPY [[V4_S32]](s32)
+ ; CHECK-NEXT: $s1 = COPY [[V8_S32]](s32)
+ ; CHECK-NEXT: BL &atan2f
+ ; CHECK-NEXT: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[ELT4_S32:%[0-9]+]]:_(s32) = COPY $s0
+ ; CHECK-NEXT: [[ELT4:%[0-9]+]]:_(s16) = G_FPTRUNC [[ELT4_S32]](s32)
+
+ ; CHECK-DAG: %{{[0-9]+}}:_(<4 x s16>) = G_BUILD_VECTOR [[ELT1]](s16), [[ELT2]](s16), [[ELT3]](s16), [[ELT4]](s16)
+
+ %0:_(<4 x s16>) = COPY $d0
+ %1:_(<4 x s16>) = COPY $d1
+ %2:_(<4 x s16>) = G_FATAN2 %0, %1
+ $d0 = COPY %2(<4 x s16>)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: test_v8f16.atan2
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.0:
+ liveins: $q0, $q1
+
+ ; CHECK-LABEL: name: test_v8f16.atan2
+
+ ; This is big, so let's just check for the 8 calls to atan2f, the the
+ ; G_UNMERGE_VALUES, and the G_BUILD_VECTOR. The other instructions ought
+ ; to be covered by the other tests.
+
+ ; CHECK: G_UNMERGE_VALUES
+ ; CHECK: BL &atan2f
+ ; CHECK: BL &atan2f
+ ; CHECK: BL &atan2f
+ ; CHECK: BL &atan2f
+ ; CHECK: BL &atan2f
+ ; CHECK: BL &atan2f
+ ; CHECK: BL &atan2f
+ ; CHECK: BL &atan2f
+ ; CHECK: G_BUILD_VECTOR
+
+ %0:_(<8 x s16>) = COPY $q0
+ %1:_(<8 x s16>) = COPY $q1
+ %2:_(<8 x s16>) = G_FATAN2 %0, %1
+ $q0 = COPY %2(<8 x s16>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: test_v2f32.atan2
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.0:
+ liveins: $d0, $d1
+
+ ; CHECK-LABEL: name: test_v2f32.atan2
+ ; CHECK: [[V1:%[0-9]+]]:_(s32), [[V2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s32>)
+ ; CHECK: [[V3:%[0-9]+]]:_(s32), [[V4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s32>)
+
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-DAG: $s0 = COPY [[V1]](s32)
+ ; CHECK-DAG: $s1 = COPY [[V3]](s32)
+ ; CHECK: BL &atan2f
+ ; CHECK: ADJCALLSTACKUP
+ ; CHECK: [[ELT1:%[0-9]+]]:_(s32) = COPY $s0
+
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-DAG: $s0 = COPY [[V2]](s32)
+ ; CHECK-DAG: $s1 = COPY [[V4]](s32)
+ ; CHECK: BL &atan2f
+ ; CHECK: ADJCALLSTACKUP
+ ; CHECK: [[ELT2:%[0-9]+]]:_(s32) = COPY $s0
+
+ ; CHECK: %2:_(<2 x s32>) = G_BUILD_VECTOR [[ELT1]](s32), [[ELT2]](s32)
+
+ %0:_(<2 x s32>) = COPY $d0
+ %1:_(<2 x s32>) = COPY $d1
+ %2:_(<2 x s32>) = G_FATAN2 %0, %1
+ $d0 = COPY %2(<2 x s32>)
+ RET_ReallyLR implicit $d0
+
+...
+---
+name: test_v4f32.atan2
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.0:
+ liveins: $q0, $q1
+ ; CHECK-LABEL: name: test_v4f32.atan2
+ ; CHECK: [[V1:%[0-9]+]]:_(s32), [[V2:%[0-9]+]]:_(s32), [[V3:%[0-9]+]]:_(s32), [[V4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s32>)
+ ; CHECK: [[V5:%[0-9]+]]:_(s32), [[V6:%[0-9]+]]:_(s32), [[V7:%[0-9]+]]:_(s32), [[V8:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %{{[0-9]+}}(<4 x s32>)
+
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-DAG: $s0 = COPY [[V1]](s32)
+ ; CHECK-DAG: $s1 = COPY [[V5]](s32)
+ ; CHECK: BL &atan2f
+ ; CHECK: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[ELT1:%[0-9]+]]:_(s32) = COPY $s0
+
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-DAG: $s0 = COPY [[V2]](s32)
+ ; CHECK-DAG: $s1 = COPY [[V6]](s32)
+ ; CHECK: BL &atan2f
+ ; CHECK: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[ELT2:%[0-9]+]]:_(s32) = COPY $s0
+
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-DAG: $s0 = COPY [[V3]](s32)
+ ; CHECK-DAG: $s1 = COPY [[V7]](s32)
+ ; CHECK: BL &atan2f
+ ; CHECK: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[ELT3:%[0-9]+]]:_(s32) = COPY $s0
+
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-DAG: $s0 = COPY [[V4]](s32)
+ ; CHECK-DAG: $s1 = COPY [[V8]](s32)
+ ; CHECK: BL &atan2f
+ ; CHECK: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[ELT4:%[0-9]+]]:_(s32) = COPY $s0
+
+ ; CHECK: %2:_(<4 x s32>) = G_BUILD_VECTOR [[ELT1]](s32), [[ELT2]](s32), [[ELT3]](s32), [[ELT4]](s32)
+
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %2:_(<4 x s32>) = G_FATAN2 %0, %1
+ $q0 = COPY %2(<4 x s32>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: test_v2f64.atan2
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.0:
+ liveins: $q0, $q1
+
+ ; CHECK-LABEL: name: test_v2f64.atan2
+ ; CHECK: [[V1:%[0-9]+]]:_(s64), [[V2:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s64>)
+ ; CHECK: [[V3:%[0-9]+]]:_(s64), [[V4:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES %{{[0-9]+}}(<2 x s64>)
+
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-DAG: $d0 = COPY [[V1]](s64)
+ ; CHECK-DAG: $d1 = COPY [[V3]](s64)
+ ; CHECK: BL &atan2
+ ; CHECK: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[ELT1:%[0-9]+]]:_(s64) = COPY $d0
+
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-DAG: $d0 = COPY [[V2]](s64)
+ ; CHECK-DAG: $d1 = COPY [[V4]](s64)
+ ; CHECK: BL &atan2
+ ; CHECK: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[ELT2:%[0-9]+]]:_(s64) = COPY $d0
+
+ ; CHECK: %2:_(<2 x s64>) = G_BUILD_VECTOR [[ELT1]](s64), [[ELT2]](s64)
+
+ %0:_(<2 x s64>) = COPY $q0
+ %1:_(<2 x s64>) = COPY $q1
+ %2:_(<2 x s64>) = G_FATAN2 %0, %1
+ $q0 = COPY %2(<2 x s64>)
+ RET_ReallyLR implicit $q0
+
+...
+---
+name: test_atan2_half
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: _ }
+ - { id: 1, class: _ }
+body: |
+ bb.0:
+ liveins: $h0, $h1
+ ; CHECK-LABEL: name: test_atan2_half
+ ; CHECK: [[REG1:%[0-9]+]]:_(s32) = G_FPEXT %0(s16)
+ ; CHECK: [[REG2:%[0-9]+]]:_(s32) = G_FPEXT %1(s16)
+ ; CHECK-NEXT: ADJCALLSTACKDOWN
+ ; CHECK-NEXT: $s0 = COPY [[REG1]](s32)
+ ; CHECK-NEXT: $s1 = COPY [[REG2]](s32)
+ ; CHECK-NEXT: BL &atan2f
+ ; CHECK: ADJCALLSTACKUP
+ ; CHECK-NEXT: [[REG2:%[0-9]+]]:_(s32) = C...
[truncated]
|
Failures in buildkite are unrelated. I might update the branch just to kick it off again, hopefully with that issue fixed. |
Known Windows issue: |
This change is part of this proposal: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294 - `VecFuncs.def`: define intrinsic to sleef/armpl mapping - `LegalizerHelper.cpp`: add missing fewerElementsVector handling for the new atan2 intrinsic - `AArch64ISelLowering.cpp`: Add arch64 specializations for lowering like neon instructions - `AArch64LegalizerInfo.cpp`: Legalize atan2. Part 5 for Implement the atan2 HLSL Function llvm#70096.
Before, this test was manually adapted from the pattern used for some of the other arc/hyperbolic intrinsics (such as legalize-atan.mir). This changes the test to the result of using update_mir_test_checks.py instead.
This change is part of this proposal: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294 - `VecFuncs.def`: define intrinsic to sleef/armpl mapping - `LegalizerHelper.cpp`: add missing fewerElementsVector handling for the new atan2 intrinsic - `AArch64ISelLowering.cpp`: Add arch64 specializations for lowering like neon instructions - `AArch64LegalizerInfo.cpp`: Legalize atan2. Part 5 for Implement the atan2 HLSL Function llvm#70096.
This change is part of this proposal: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294
VecFuncs.def
: define intrinsic to sleef/armpl mappingLegalizerHelper.cpp
: add missing fewerElementsVector handling for the new atan2 intrinsicAArch64ISelLowering.cpp
: Add arch64 specializations for lowering like neon instructionsAArch64LegalizerInfo.cpp
: Legalize atan2.Part 5 for Implement the atan2 HLSL Function #70096.