-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add Instruction selection support for x87 ld/st #97016
Conversation
Other ld/st also have c++ selection.
@llvm/pr-subscribers-backend-x86 Author: Malay Sanghi (MalaySanghi) ChangesOther ld/st also have c++ selection. Full diff: https://github.com/llvm/llvm-project/pull/97016.diff 3 Files Affected:
diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
index 303783ea3fd22..64f977e9c9d66 100644
--- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
@@ -195,6 +195,14 @@ X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const {
return &X86::VR512RegClass;
}
+ if (RB.getID() == X86::PSRRegBankID) {
+ if (Ty.getSizeInBits() == 80)
+ return &X86::RFP80RegClass;
+ if (Ty.getSizeInBits() == 64)
+ return &X86::RFP64RegClass;
+ return &X86::RFP32RegClass;
+ }
+
llvm_unreachable("Unknown RegBank!");
}
@@ -462,6 +470,8 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
: (HasAVX512 ? X86::VMOVSSZmr :
HasAVX ? X86::VMOVSSmr :
X86::MOVSSmr);
+ if (X86::PSRRegBankID == RB.getID())
+ return Isload ? X86::LD_Fp32m : X86::ST_Fp32m;
} else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
if (X86::GPRRegBankID == RB.getID())
return Isload ? X86::MOV64rm : X86::MOV64mr;
@@ -472,6 +482,10 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
: (HasAVX512 ? X86::VMOVSDZmr :
HasAVX ? X86::VMOVSDmr :
X86::MOVSDmr);
+ if (X86::PSRRegBankID == RB.getID())
+ return Isload ? X86::LD_Fp64m : X86::ST_Fp64m;
+ } else if (Ty == LLT::scalar(80) || Ty == LLT::pointer(0, 80)) {
+ return Isload ? X86::LD_Fp80m : X86::ST_FpP80m;
} else if (Ty.isVector() && Ty.getSizeInBits() == 128) {
if (Alignment >= Align(16))
return Isload ? (HasVLX ? X86::VMOVAPSZ128rm
@@ -611,7 +625,10 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
I.removeOperand(0);
addFullAddress(MIB, AM).addUse(DefReg);
}
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ bool Constrained = constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ if (Constrained)
+ I.addImplicitDefUseOperands(MF);
+ return Constrained;
}
static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) {
@@ -1503,14 +1520,15 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,
const Register DstReg = I.getOperand(0).getReg();
const LLT DstTy = MRI.getType(DstReg);
const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
- Align Alignment = Align(DstTy.getSizeInBytes());
+ // Create the load from the constant pool.
+ const ConstantFP *CFP = I.getOperand(1).getFPImm();
+ const auto &DataLayout = MF.getDataLayout();
+ Align Alignment = DataLayout.getPrefTypeAlign(CFP->getType());
const DebugLoc &DbgLoc = I.getDebugLoc();
unsigned Opc =
getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Alignment);
- // Create the load from the constant pool.
- const ConstantFP *CFP = I.getOperand(1).getFPImm();
unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Alignment);
MachineInstr *LoadInst = nullptr;
unsigned char OpFlag = STI.classifyLocalReference(nullptr);
@@ -1525,7 +1543,7 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
- LLT::pointer(0, MF.getDataLayout().getPointerSizeInBits()), Alignment);
+ LLT::pointer(0, DataLayout.getPointerSizeInBits()), Alignment);
LoadInst =
addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg),
diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll b/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll
index 39302734dde78..bb0f0ae14f304 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll
@@ -7,15 +7,6 @@
; When we cannot produce a test case anymore, that means we can remove
; the fallback path.
-; Check that we fallback on invoke translation failures.
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: G_STORE %1:psr(s80), %0:gpr(p0) :: (store (s80) into %ir.ptr, align 16) (in function: test_x86_fp80_dump)
-; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for test_x86_fp80_dump
-; FALLBACK-WITH-REPORT-OUT-LABEL: test_x86_fp80_dump:
-define void @test_x86_fp80_dump(ptr %ptr){
- store x86_fp80 0xK4002A000000000000000, ptr %ptr, align 16
- ret void
-}
-
; Check that we fallback on byVal argument
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' call void @ScaleObjectOverwrite_3(ptr %index, ptr byval(%struct.PointListStruct) %index)' (in function: ScaleObjectOverwrite_2)
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for ScaleObjectOverwrite_2
diff --git a/llvm/test/CodeGen/X86/GlobalISel/x87.ll b/llvm/test/CodeGen/X86/GlobalISel/x87.ll
new file mode 100644
index 0000000000000..ebec84b03ba20
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/x87.ll
@@ -0,0 +1,221 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-32,GISEL_X86
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-64,GISEL_X64
+; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-32,SDAG_X86
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-64,SDAG_X64
+
+define x86_fp80 @f0(x86_fp80 noundef %a) nounwind {
+; GISEL_X86-LABEL: f0:
+; GISEL_X86: # %bb.0:
+; GISEL_X86-NEXT: pushl %ebp
+; GISEL_X86-NEXT: movl %esp, %ebp
+; GISEL_X86-NEXT: andl $-16, %esp
+; GISEL_X86-NEXT: subl $48, %esp
+; GISEL_X86-NEXT: fldt 8(%ebp)
+; GISEL_X86-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}
+; GISEL_X86-NEXT: fxch %st(1)
+; GISEL_X86-NEXT: fstpt {{[0-9]+}}(%esp)
+; GISEL_X86-NEXT: fstpt (%esp)
+; GISEL_X86-NEXT: fldt {{[0-9]+}}(%esp)
+; GISEL_X86-NEXT: fldt (%esp)
+; GISEL_X86-NEXT: faddp %st, %st(1)
+; GISEL_X86-NEXT: movl %ebp, %esp
+; GISEL_X86-NEXT: popl %ebp
+; GISEL_X86-NEXT: retl
+;
+; GISEL_X64-LABEL: f0:
+; GISEL_X64: # %bb.0:
+; GISEL_X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; GISEL_X64-NEXT: fxch %st(1)
+; GISEL_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT: fldt -{{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT: fldt -{{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT: faddp %st, %st(1)
+; GISEL_X64-NEXT: retq
+;
+; SDAG_X86-LABEL: f0:
+; SDAG_X86: # %bb.0:
+; SDAG_X86-NEXT: pushl %ebp
+; SDAG_X86-NEXT: movl %esp, %ebp
+; SDAG_X86-NEXT: andl $-16, %esp
+; SDAG_X86-NEXT: subl $48, %esp
+; SDAG_X86-NEXT: fldt 8(%ebp)
+; SDAG_X86-NEXT: fld %st(0)
+; SDAG_X86-NEXT: fstpt {{[0-9]+}}(%esp)
+; SDAG_X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; SDAG_X86-NEXT: fld %st(0)
+; SDAG_X86-NEXT: fstpt (%esp)
+; SDAG_X86-NEXT: faddp %st, %st(1)
+; SDAG_X86-NEXT: movl %ebp, %esp
+; SDAG_X86-NEXT: popl %ebp
+; SDAG_X86-NEXT: retl
+;
+; SDAG_X64-LABEL: f0:
+; SDAG_X64: # %bb.0:
+; SDAG_X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; SDAG_X64-NEXT: fld %st(0)
+; SDAG_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
+; SDAG_X64-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; SDAG_X64-NEXT: fld %st(0)
+; SDAG_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
+; SDAG_X64-NEXT: faddp %st, %st(1)
+; SDAG_X64-NEXT: retq
+ %a.addr = alloca x86_fp80, align 16
+ %x = alloca x86_fp80, align 16
+ store x86_fp80 %a, ptr %a.addr, align 16
+ store x86_fp80 0xK400A8000000000000000, ptr %x, align 16
+ %load1 = load x86_fp80, ptr %a.addr, align 16
+ %load2 = load x86_fp80, ptr %x, align 16
+ %add = fadd x86_fp80 %load1, %load2
+ ret x86_fp80 %add
+}
+
+
+define void @f1(ptr %a, ptr %b) nounwind {
+; GISEL_X86-LABEL: f1:
+; GISEL_X86: # %bb.0:
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GISEL_X86-NEXT: fldt (%eax)
+; GISEL_X86-NEXT: fldt (%ecx)
+; GISEL_X86-NEXT: fsubrp %st, %st(1)
+; GISEL_X86-NEXT: fstpt (%eax)
+; GISEL_X86-NEXT: retl
+;
+; CHECK-64-LABEL: f1:
+; CHECK-64: # %bb.0:
+; CHECK-64-NEXT: fldt (%rdi)
+; CHECK-64-NEXT: fldt (%rsi)
+; CHECK-64-NEXT: fsubrp %st, %st(1)
+; CHECK-64-NEXT: fstpt (%rdi)
+; CHECK-64-NEXT: retq
+;
+; SDAG_X86-LABEL: f1:
+; SDAG_X86: # %bb.0:
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; SDAG_X86-NEXT: fldt (%ecx)
+; SDAG_X86-NEXT: fldt (%eax)
+; SDAG_X86-NEXT: fsubrp %st, %st(1)
+; SDAG_X86-NEXT: fstpt (%ecx)
+; SDAG_X86-NEXT: retl
+ %load1 = load x86_fp80, ptr %a, align 4
+ %load2 = load x86_fp80, ptr %b, align 4
+ %sub = fsub x86_fp80 %load1, %load2
+ store x86_fp80 %sub, ptr %a, align 4
+ ret void
+}
+
+define void @f2(ptr %a, ptr %b) nounwind {
+; GISEL_X86-LABEL: f2:
+; GISEL_X86: # %bb.0:
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GISEL_X86-NEXT: fldt (%eax)
+; GISEL_X86-NEXT: fldt (%ecx)
+; GISEL_X86-NEXT: fmulp %st, %st(1)
+; GISEL_X86-NEXT: fstpt (%eax)
+; GISEL_X86-NEXT: retl
+;
+; CHECK-64-LABEL: f2:
+; CHECK-64: # %bb.0:
+; CHECK-64-NEXT: fldt (%rdi)
+; CHECK-64-NEXT: fldt (%rsi)
+; CHECK-64-NEXT: fmulp %st, %st(1)
+; CHECK-64-NEXT: fstpt (%rdi)
+; CHECK-64-NEXT: retq
+;
+; SDAG_X86-LABEL: f2:
+; SDAG_X86: # %bb.0:
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; SDAG_X86-NEXT: fldt (%ecx)
+; SDAG_X86-NEXT: fldt (%eax)
+; SDAG_X86-NEXT: fmulp %st, %st(1)
+; SDAG_X86-NEXT: fstpt (%ecx)
+; SDAG_X86-NEXT: retl
+ %load1 = load x86_fp80, ptr %a, align 16
+ %load2 = load x86_fp80, ptr %b, align 16
+ %mul = fmul x86_fp80 %load1, %load2
+ store x86_fp80 %mul, ptr %a, align 16
+ ret void
+}
+
+define void @f3(ptr %a, ptr %b) nounwind {
+; GISEL_X86-LABEL: f3:
+; GISEL_X86: # %bb.0:
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GISEL_X86-NEXT: fldt (%eax)
+; GISEL_X86-NEXT: fldt (%ecx)
+; GISEL_X86-NEXT: fdivrp %st, %st(1)
+; GISEL_X86-NEXT: fstpt (%eax)
+; GISEL_X86-NEXT: retl
+;
+; CHECK-64-LABEL: f3:
+; CHECK-64: # %bb.0:
+; CHECK-64-NEXT: fldt (%rdi)
+; CHECK-64-NEXT: fldt (%rsi)
+; CHECK-64-NEXT: fdivrp %st, %st(1)
+; CHECK-64-NEXT: fstpt (%rdi)
+; CHECK-64-NEXT: retq
+;
+; SDAG_X86-LABEL: f3:
+; SDAG_X86: # %bb.0:
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; SDAG_X86-NEXT: fldt (%ecx)
+; SDAG_X86-NEXT: fldt (%eax)
+; SDAG_X86-NEXT: fdivrp %st, %st(1)
+; SDAG_X86-NEXT: fstpt (%ecx)
+; SDAG_X86-NEXT: retl
+ %load1 = load x86_fp80, ptr %a, align 4
+ %load2 = load x86_fp80, ptr %b, align 4
+ %div = fdiv x86_fp80 %load1, %load2
+ store x86_fp80 %div, ptr %a, align 4
+ ret void
+}
+
+define void @f6(ptr %0, ptr %1) nounwind {
+; GISEL_X86-LABEL: f6:
+; GISEL_X86: # %bb.0:
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GISEL_X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; GISEL_X86-NEXT: flds (%eax)
+; GISEL_X86-NEXT: faddp %st, %st(1)
+; GISEL_X86-NEXT: fstps (%ecx)
+; GISEL_X86-NEXT: retl
+;
+; GISEL_X64-LABEL: f6:
+; GISEL_X64: # %bb.0:
+; GISEL_X64-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; GISEL_X64-NEXT: flds (%rdi)
+; GISEL_X64-NEXT: faddp %st, %st(1)
+; GISEL_X64-NEXT: fstps (%rsi)
+; GISEL_X64-NEXT: retq
+;
+; SDAG_X86-LABEL: f6:
+; SDAG_X86: # %bb.0:
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; SDAG_X86-NEXT: flds (%ecx)
+; SDAG_X86-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}
+; SDAG_X86-NEXT: fstps (%eax)
+; SDAG_X86-NEXT: retl
+;
+; SDAG_X64-LABEL: f6:
+; SDAG_X64: # %bb.0:
+; SDAG_X64-NEXT: flds (%rdi)
+; SDAG_X64-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; SDAG_X64-NEXT: fstps (%rsi)
+; SDAG_X64-NEXT: retq
+ %load1 = load float, ptr %0
+ %add = fadd float %load1, 20.0
+ store float %add, ptr %1
+ ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-32: {{.*}}
|
@llvm/pr-subscribers-llvm-globalisel Author: Malay Sanghi (MalaySanghi) ChangesOther ld/st also have c++ selection. Full diff: https://github.com/llvm/llvm-project/pull/97016.diff 3 Files Affected:
diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
index 303783ea3fd22..64f977e9c9d66 100644
--- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
+++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
@@ -195,6 +195,14 @@ X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const {
return &X86::VR512RegClass;
}
+ if (RB.getID() == X86::PSRRegBankID) {
+ if (Ty.getSizeInBits() == 80)
+ return &X86::RFP80RegClass;
+ if (Ty.getSizeInBits() == 64)
+ return &X86::RFP64RegClass;
+ return &X86::RFP32RegClass;
+ }
+
llvm_unreachable("Unknown RegBank!");
}
@@ -462,6 +470,8 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
: (HasAVX512 ? X86::VMOVSSZmr :
HasAVX ? X86::VMOVSSmr :
X86::MOVSSmr);
+ if (X86::PSRRegBankID == RB.getID())
+ return Isload ? X86::LD_Fp32m : X86::ST_Fp32m;
} else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
if (X86::GPRRegBankID == RB.getID())
return Isload ? X86::MOV64rm : X86::MOV64mr;
@@ -472,6 +482,10 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
: (HasAVX512 ? X86::VMOVSDZmr :
HasAVX ? X86::VMOVSDmr :
X86::MOVSDmr);
+ if (X86::PSRRegBankID == RB.getID())
+ return Isload ? X86::LD_Fp64m : X86::ST_Fp64m;
+ } else if (Ty == LLT::scalar(80) || Ty == LLT::pointer(0, 80)) {
+ return Isload ? X86::LD_Fp80m : X86::ST_FpP80m;
} else if (Ty.isVector() && Ty.getSizeInBits() == 128) {
if (Alignment >= Align(16))
return Isload ? (HasVLX ? X86::VMOVAPSZ128rm
@@ -611,7 +625,10 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
I.removeOperand(0);
addFullAddress(MIB, AM).addUse(DefReg);
}
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ bool Constrained = constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ if (Constrained)
+ I.addImplicitDefUseOperands(MF);
+ return Constrained;
}
static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) {
@@ -1503,14 +1520,15 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,
const Register DstReg = I.getOperand(0).getReg();
const LLT DstTy = MRI.getType(DstReg);
const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
- Align Alignment = Align(DstTy.getSizeInBytes());
+ // Create the load from the constant pool.
+ const ConstantFP *CFP = I.getOperand(1).getFPImm();
+ const auto &DataLayout = MF.getDataLayout();
+ Align Alignment = DataLayout.getPrefTypeAlign(CFP->getType());
const DebugLoc &DbgLoc = I.getDebugLoc();
unsigned Opc =
getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Alignment);
- // Create the load from the constant pool.
- const ConstantFP *CFP = I.getOperand(1).getFPImm();
unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Alignment);
MachineInstr *LoadInst = nullptr;
unsigned char OpFlag = STI.classifyLocalReference(nullptr);
@@ -1525,7 +1543,7 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,
MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
- LLT::pointer(0, MF.getDataLayout().getPointerSizeInBits()), Alignment);
+ LLT::pointer(0, DataLayout.getPointerSizeInBits()), Alignment);
LoadInst =
addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg),
diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll b/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll
index 39302734dde78..bb0f0ae14f304 100644
--- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll
+++ b/llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll
@@ -7,15 +7,6 @@
; When we cannot produce a test case anymore, that means we can remove
; the fallback path.
-; Check that we fallback on invoke translation failures.
-; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: G_STORE %1:psr(s80), %0:gpr(p0) :: (store (s80) into %ir.ptr, align 16) (in function: test_x86_fp80_dump)
-; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for test_x86_fp80_dump
-; FALLBACK-WITH-REPORT-OUT-LABEL: test_x86_fp80_dump:
-define void @test_x86_fp80_dump(ptr %ptr){
- store x86_fp80 0xK4002A000000000000000, ptr %ptr, align 16
- ret void
-}
-
; Check that we fallback on byVal argument
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' call void @ScaleObjectOverwrite_3(ptr %index, ptr byval(%struct.PointListStruct) %index)' (in function: ScaleObjectOverwrite_2)
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for ScaleObjectOverwrite_2
diff --git a/llvm/test/CodeGen/X86/GlobalISel/x87.ll b/llvm/test/CodeGen/X86/GlobalISel/x87.ll
new file mode 100644
index 0000000000000..ebec84b03ba20
--- /dev/null
+++ b/llvm/test/CodeGen/X86/GlobalISel/x87.ll
@@ -0,0 +1,221 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-32,GISEL_X86
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-64,GISEL_X64
+; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-32,SDAG_X86
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-64,SDAG_X64
+
+define x86_fp80 @f0(x86_fp80 noundef %a) nounwind {
+; GISEL_X86-LABEL: f0:
+; GISEL_X86: # %bb.0:
+; GISEL_X86-NEXT: pushl %ebp
+; GISEL_X86-NEXT: movl %esp, %ebp
+; GISEL_X86-NEXT: andl $-16, %esp
+; GISEL_X86-NEXT: subl $48, %esp
+; GISEL_X86-NEXT: fldt 8(%ebp)
+; GISEL_X86-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}
+; GISEL_X86-NEXT: fxch %st(1)
+; GISEL_X86-NEXT: fstpt {{[0-9]+}}(%esp)
+; GISEL_X86-NEXT: fstpt (%esp)
+; GISEL_X86-NEXT: fldt {{[0-9]+}}(%esp)
+; GISEL_X86-NEXT: fldt (%esp)
+; GISEL_X86-NEXT: faddp %st, %st(1)
+; GISEL_X86-NEXT: movl %ebp, %esp
+; GISEL_X86-NEXT: popl %ebp
+; GISEL_X86-NEXT: retl
+;
+; GISEL_X64-LABEL: f0:
+; GISEL_X64: # %bb.0:
+; GISEL_X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; GISEL_X64-NEXT: fxch %st(1)
+; GISEL_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT: fldt -{{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT: fldt -{{[0-9]+}}(%rsp)
+; GISEL_X64-NEXT: faddp %st, %st(1)
+; GISEL_X64-NEXT: retq
+;
+; SDAG_X86-LABEL: f0:
+; SDAG_X86: # %bb.0:
+; SDAG_X86-NEXT: pushl %ebp
+; SDAG_X86-NEXT: movl %esp, %ebp
+; SDAG_X86-NEXT: andl $-16, %esp
+; SDAG_X86-NEXT: subl $48, %esp
+; SDAG_X86-NEXT: fldt 8(%ebp)
+; SDAG_X86-NEXT: fld %st(0)
+; SDAG_X86-NEXT: fstpt {{[0-9]+}}(%esp)
+; SDAG_X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; SDAG_X86-NEXT: fld %st(0)
+; SDAG_X86-NEXT: fstpt (%esp)
+; SDAG_X86-NEXT: faddp %st, %st(1)
+; SDAG_X86-NEXT: movl %ebp, %esp
+; SDAG_X86-NEXT: popl %ebp
+; SDAG_X86-NEXT: retl
+;
+; SDAG_X64-LABEL: f0:
+; SDAG_X64: # %bb.0:
+; SDAG_X64-NEXT: fldt {{[0-9]+}}(%rsp)
+; SDAG_X64-NEXT: fld %st(0)
+; SDAG_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
+; SDAG_X64-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; SDAG_X64-NEXT: fld %st(0)
+; SDAG_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
+; SDAG_X64-NEXT: faddp %st, %st(1)
+; SDAG_X64-NEXT: retq
+ %a.addr = alloca x86_fp80, align 16
+ %x = alloca x86_fp80, align 16
+ store x86_fp80 %a, ptr %a.addr, align 16
+ store x86_fp80 0xK400A8000000000000000, ptr %x, align 16
+ %load1 = load x86_fp80, ptr %a.addr, align 16
+ %load2 = load x86_fp80, ptr %x, align 16
+ %add = fadd x86_fp80 %load1, %load2
+ ret x86_fp80 %add
+}
+
+
+define void @f1(ptr %a, ptr %b) nounwind {
+; GISEL_X86-LABEL: f1:
+; GISEL_X86: # %bb.0:
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GISEL_X86-NEXT: fldt (%eax)
+; GISEL_X86-NEXT: fldt (%ecx)
+; GISEL_X86-NEXT: fsubrp %st, %st(1)
+; GISEL_X86-NEXT: fstpt (%eax)
+; GISEL_X86-NEXT: retl
+;
+; CHECK-64-LABEL: f1:
+; CHECK-64: # %bb.0:
+; CHECK-64-NEXT: fldt (%rdi)
+; CHECK-64-NEXT: fldt (%rsi)
+; CHECK-64-NEXT: fsubrp %st, %st(1)
+; CHECK-64-NEXT: fstpt (%rdi)
+; CHECK-64-NEXT: retq
+;
+; SDAG_X86-LABEL: f1:
+; SDAG_X86: # %bb.0:
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; SDAG_X86-NEXT: fldt (%ecx)
+; SDAG_X86-NEXT: fldt (%eax)
+; SDAG_X86-NEXT: fsubrp %st, %st(1)
+; SDAG_X86-NEXT: fstpt (%ecx)
+; SDAG_X86-NEXT: retl
+ %load1 = load x86_fp80, ptr %a, align 4
+ %load2 = load x86_fp80, ptr %b, align 4
+ %sub = fsub x86_fp80 %load1, %load2
+ store x86_fp80 %sub, ptr %a, align 4
+ ret void
+}
+
+define void @f2(ptr %a, ptr %b) nounwind {
+; GISEL_X86-LABEL: f2:
+; GISEL_X86: # %bb.0:
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GISEL_X86-NEXT: fldt (%eax)
+; GISEL_X86-NEXT: fldt (%ecx)
+; GISEL_X86-NEXT: fmulp %st, %st(1)
+; GISEL_X86-NEXT: fstpt (%eax)
+; GISEL_X86-NEXT: retl
+;
+; CHECK-64-LABEL: f2:
+; CHECK-64: # %bb.0:
+; CHECK-64-NEXT: fldt (%rdi)
+; CHECK-64-NEXT: fldt (%rsi)
+; CHECK-64-NEXT: fmulp %st, %st(1)
+; CHECK-64-NEXT: fstpt (%rdi)
+; CHECK-64-NEXT: retq
+;
+; SDAG_X86-LABEL: f2:
+; SDAG_X86: # %bb.0:
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; SDAG_X86-NEXT: fldt (%ecx)
+; SDAG_X86-NEXT: fldt (%eax)
+; SDAG_X86-NEXT: fmulp %st, %st(1)
+; SDAG_X86-NEXT: fstpt (%ecx)
+; SDAG_X86-NEXT: retl
+ %load1 = load x86_fp80, ptr %a, align 16
+ %load2 = load x86_fp80, ptr %b, align 16
+ %mul = fmul x86_fp80 %load1, %load2
+ store x86_fp80 %mul, ptr %a, align 16
+ ret void
+}
+
+define void @f3(ptr %a, ptr %b) nounwind {
+; GISEL_X86-LABEL: f3:
+; GISEL_X86: # %bb.0:
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GISEL_X86-NEXT: fldt (%eax)
+; GISEL_X86-NEXT: fldt (%ecx)
+; GISEL_X86-NEXT: fdivrp %st, %st(1)
+; GISEL_X86-NEXT: fstpt (%eax)
+; GISEL_X86-NEXT: retl
+;
+; CHECK-64-LABEL: f3:
+; CHECK-64: # %bb.0:
+; CHECK-64-NEXT: fldt (%rdi)
+; CHECK-64-NEXT: fldt (%rsi)
+; CHECK-64-NEXT: fdivrp %st, %st(1)
+; CHECK-64-NEXT: fstpt (%rdi)
+; CHECK-64-NEXT: retq
+;
+; SDAG_X86-LABEL: f3:
+; SDAG_X86: # %bb.0:
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; SDAG_X86-NEXT: fldt (%ecx)
+; SDAG_X86-NEXT: fldt (%eax)
+; SDAG_X86-NEXT: fdivrp %st, %st(1)
+; SDAG_X86-NEXT: fstpt (%ecx)
+; SDAG_X86-NEXT: retl
+ %load1 = load x86_fp80, ptr %a, align 4
+ %load2 = load x86_fp80, ptr %b, align 4
+ %div = fdiv x86_fp80 %load1, %load2
+ store x86_fp80 %div, ptr %a, align 4
+ ret void
+}
+
+define void @f6(ptr %0, ptr %1) nounwind {
+; GISEL_X86-LABEL: f6:
+; GISEL_X86: # %bb.0:
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; GISEL_X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
+; GISEL_X86-NEXT: flds (%eax)
+; GISEL_X86-NEXT: faddp %st, %st(1)
+; GISEL_X86-NEXT: fstps (%ecx)
+; GISEL_X86-NEXT: retl
+;
+; GISEL_X64-LABEL: f6:
+; GISEL_X64: # %bb.0:
+; GISEL_X64-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; GISEL_X64-NEXT: flds (%rdi)
+; GISEL_X64-NEXT: faddp %st, %st(1)
+; GISEL_X64-NEXT: fstps (%rsi)
+; GISEL_X64-NEXT: retq
+;
+; SDAG_X86-LABEL: f6:
+; SDAG_X86: # %bb.0:
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; SDAG_X86-NEXT: flds (%ecx)
+; SDAG_X86-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}
+; SDAG_X86-NEXT: fstps (%eax)
+; SDAG_X86-NEXT: retl
+;
+; SDAG_X64-LABEL: f6:
+; SDAG_X64: # %bb.0:
+; SDAG_X64-NEXT: flds (%rdi)
+; SDAG_X64-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
+; SDAG_X64-NEXT: fstps (%rsi)
+; SDAG_X64-NEXT: retq
+ %load1 = load float, ptr %0
+ %add = fadd float %load1, 20.0
+ store float %add, ptr %1
+ ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-32: {{.*}}
|
++ @e-kud @arsenm @RKSimon @phoebewang for review |
There is no higher priority feature than x87 load and stores? |
; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-32,GISEL_X86 | ||
; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-64,GISEL_X64 | ||
; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-32,SDAG_X86 | ||
; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-64,SDAG_X64 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please can you add fast-isel tests and move this file up to llvm\test\CodeGen\X86\isel-x87.ll - that's where we're putting test multi-isel test files
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've added a run line for fast-isel but fallsback to selection dag for all the functions. It seems add/store of fp80 are not supported.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure it belongs in the top-level directory. Presumably this would be covered by an existing test there, in which case it would gain global-isel run lines. I don't think this should bother testing the fast-isel case (or the DAG case, but I could see why that one would be more useful to see whenever the output matches)
One of the criteria we use is plum C/C++ validation test suite. There is a bunch of tests for I agree that this may be not a high priority, but this is some unknown area. It revealed the lack of float/integer differentiating, and the complexities we have while trying to match addresses through SelectionDAG patterns. |
@@ -0,0 +1,225 @@ | |||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 | |||
; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-32,GISEL_X86 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Move under the GlobalISel test subdirectory? I also wouldn't add in the fast-isel lines
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was originally in globalisel subdir. The previous commit has a review comment to add fast-isel and move here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please no, x86 is trying to doing isel comparison tests - we don't want them dumped inside the GlobalISel specific subdir
@RKSimon @arsenm Edit: I figured out why they weren't posted. I've posted them now however this comment summarizes things |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
…vm#97016) Add x87 G_LOAD/G_STORE selection support to existing C++ lowering.
Other ld/st also have c++ selection.