Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Instruction selection support for x87 ld/st #97016

Merged
merged 4 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 23 additions & 5 deletions llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,15 @@ X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const {
return &X86::VR512RegClass;
}

if (RB.getID() == X86::PSRRegBankID) {
if (Ty.getSizeInBits() == 80)
return &X86::RFP80RegClass;
if (Ty.getSizeInBits() == 64)
return &X86::RFP64RegClass;
if (Ty.getSizeInBits() == 32)
return &X86::RFP32RegClass;
}

MalaySanghi marked this conversation as resolved.
Show resolved Hide resolved
llvm_unreachable("Unknown RegBank!");
}

Expand Down Expand Up @@ -462,6 +471,8 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
: (HasAVX512 ? X86::VMOVSSZmr :
HasAVX ? X86::VMOVSSmr :
X86::MOVSSmr);
if (X86::PSRRegBankID == RB.getID())
return Isload ? X86::LD_Fp32m : X86::ST_Fp32m;
} else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
if (X86::GPRRegBankID == RB.getID())
return Isload ? X86::MOV64rm : X86::MOV64mr;
Expand All @@ -472,6 +483,10 @@ unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
: (HasAVX512 ? X86::VMOVSDZmr :
HasAVX ? X86::VMOVSDmr :
X86::MOVSDmr);
if (X86::PSRRegBankID == RB.getID())
return Isload ? X86::LD_Fp64m : X86::ST_Fp64m;
} else if (Ty == LLT::scalar(80)) {
return Isload ? X86::LD_Fp80m : X86::ST_FpP80m;
} else if (Ty.isVector() && Ty.getSizeInBits() == 128) {
if (Alignment >= Align(16))
return Isload ? (HasVLX ? X86::VMOVAPSZ128rm
Expand Down Expand Up @@ -611,7 +626,9 @@ bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
I.removeOperand(0);
addFullAddress(MIB, AM).addUse(DefReg);
}
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
bool Constrained = constrainSelectedInstRegOperands(I, TII, TRI, RBI);
I.addImplicitDefUseOperands(MF);
return Constrained;
}

static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) {
Expand Down Expand Up @@ -1503,14 +1520,15 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,
const Register DstReg = I.getOperand(0).getReg();
const LLT DstTy = MRI.getType(DstReg);
const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
Align Alignment = Align(DstTy.getSizeInBytes());
// Create the load from the constant pool.
const ConstantFP *CFP = I.getOperand(1).getFPImm();
const auto &DL = MF.getDataLayout();
Align Alignment = DL.getPrefTypeAlign(CFP->getType());
const DebugLoc &DbgLoc = I.getDebugLoc();

unsigned Opc =
getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Alignment);

// Create the load from the constant pool.
const ConstantFP *CFP = I.getOperand(1).getFPImm();
unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Alignment);
MachineInstr *LoadInst = nullptr;
unsigned char OpFlag = STI.classifyLocalReference(nullptr);
Expand All @@ -1525,7 +1543,7 @@ bool X86InstructionSelector::materializeFP(MachineInstr &I,

MachineMemOperand *MMO = MF.getMachineMemOperand(
MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
LLT::pointer(0, MF.getDataLayout().getPointerSizeInBits()), Alignment);
LLT::pointer(0, DL.getPointerSizeInBits()), Alignment);

LoadInst =
addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg),
Expand Down
9 changes: 0 additions & 9 deletions llvm/test/CodeGen/X86/GlobalISel/x86_64-fallback.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,6 @@
; When we cannot produce a test case anymore, that means we can remove
; the fallback path.

; Check that we fallback on invoke translation failures.
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: cannot select: G_STORE %1:psr(s80), %0:gpr(p0) :: (store (s80) into %ir.ptr, align 16) (in function: test_x86_fp80_dump)
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for test_x86_fp80_dump
; FALLBACK-WITH-REPORT-OUT-LABEL: test_x86_fp80_dump:
define void @test_x86_fp80_dump(ptr %ptr){
store x86_fp80 0xK4002A000000000000000, ptr %ptr, align 16
ret void
}

; Check that we fallback on byVal argument
; FALLBACK-WITH-REPORT-ERR: remark: <unknown>:0:0: unable to translate instruction: call: ' call void @ScaleObjectOverwrite_3(ptr %index, ptr byval(%struct.PointListStruct) %index)' (in function: ScaleObjectOverwrite_2)
; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for ScaleObjectOverwrite_2
Expand Down
225 changes: 225 additions & 0 deletions llvm/test/CodeGen/X86/isel-x87.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-32,GISEL_X86
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Move under the GlobalISel test subdirectory? I also wouldn't add in the fast-isel lines

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was originally in globalisel subdir. The previous commit has a review comment to add fast-isel and move here.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please no, x86 is trying to doing isel comparison tests - we don't want them dumped inside the GlobalISel specific subdir

; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-32,SDAG_X86
; RUN: llc < %s -mtriple=i686-- -mattr=+x87,-sse,-sse2 -fast-isel=true | FileCheck %s --check-prefixes=CHECK-32,SDAG_X86,FAST_X86
; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -global-isel | FileCheck %s --check-prefixes=CHECK-64,GISEL_X64
; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 | FileCheck %s --check-prefixes=CHECK-64,SDAG_X64
; RUN: llc < %s -mtriple=x86_64-- -mattr=+x87,-sse,-sse2 -fast-isel=true | FileCheck %s --check-prefixes=CHECK-64,SDAG_X64,FAST_X64

define x86_fp80 @f0(x86_fp80 noundef %a) nounwind {
; GISEL_X86-LABEL: f0:
; GISEL_X86: # %bb.0:
; GISEL_X86-NEXT: pushl %ebp
; GISEL_X86-NEXT: movl %esp, %ebp
; GISEL_X86-NEXT: andl $-16, %esp
; GISEL_X86-NEXT: subl $48, %esp
; GISEL_X86-NEXT: fldt 8(%ebp)
; GISEL_X86-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}
; GISEL_X86-NEXT: fxch %st(1)
; GISEL_X86-NEXT: fstpt {{[0-9]+}}(%esp)
; GISEL_X86-NEXT: fstpt (%esp)
; GISEL_X86-NEXT: fldt {{[0-9]+}}(%esp)
; GISEL_X86-NEXT: fldt (%esp)
; GISEL_X86-NEXT: faddp %st, %st(1)
; GISEL_X86-NEXT: movl %ebp, %esp
; GISEL_X86-NEXT: popl %ebp
; GISEL_X86-NEXT: retl
;
; SDAG_X86-LABEL: f0:
; SDAG_X86: # %bb.0:
; SDAG_X86-NEXT: pushl %ebp
; SDAG_X86-NEXT: movl %esp, %ebp
; SDAG_X86-NEXT: andl $-16, %esp
; SDAG_X86-NEXT: subl $48, %esp
; SDAG_X86-NEXT: fldt 8(%ebp)
; SDAG_X86-NEXT: fld %st(0)
; SDAG_X86-NEXT: fstpt {{[0-9]+}}(%esp)
; SDAG_X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
; SDAG_X86-NEXT: fld %st(0)
; SDAG_X86-NEXT: fstpt (%esp)
; SDAG_X86-NEXT: faddp %st, %st(1)
; SDAG_X86-NEXT: movl %ebp, %esp
; SDAG_X86-NEXT: popl %ebp
; SDAG_X86-NEXT: retl
;
; GISEL_X64-LABEL: f0:
; GISEL_X64: # %bb.0:
; GISEL_X64-NEXT: fldt {{[0-9]+}}(%rsp)
; GISEL_X64-NEXT: fldt {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
; GISEL_X64-NEXT: fxch %st(1)
; GISEL_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
; GISEL_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
; GISEL_X64-NEXT: fldt -{{[0-9]+}}(%rsp)
; GISEL_X64-NEXT: fldt -{{[0-9]+}}(%rsp)
; GISEL_X64-NEXT: faddp %st, %st(1)
; GISEL_X64-NEXT: retq
;
; SDAG_X64-LABEL: f0:
; SDAG_X64: # %bb.0:
; SDAG_X64-NEXT: fldt {{[0-9]+}}(%rsp)
; SDAG_X64-NEXT: fld %st(0)
; SDAG_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
; SDAG_X64-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
; SDAG_X64-NEXT: fld %st(0)
; SDAG_X64-NEXT: fstpt -{{[0-9]+}}(%rsp)
; SDAG_X64-NEXT: faddp %st, %st(1)
; SDAG_X64-NEXT: retq
%a.addr = alloca x86_fp80, align 16
%x = alloca x86_fp80, align 16
store x86_fp80 %a, ptr %a.addr, align 16
store x86_fp80 0xK400A8000000000000000, ptr %x, align 16
%load1 = load x86_fp80, ptr %a.addr, align 16
%load2 = load x86_fp80, ptr %x, align 16
%add = fadd x86_fp80 %load1, %load2
ret x86_fp80 %add
}


define void @f1(ptr %a, ptr %b) nounwind {
; GISEL_X86-LABEL: f1:
; GISEL_X86: # %bb.0:
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; GISEL_X86-NEXT: fldt (%eax)
; GISEL_X86-NEXT: fldt (%ecx)
; GISEL_X86-NEXT: fsubrp %st, %st(1)
; GISEL_X86-NEXT: fstpt (%eax)
; GISEL_X86-NEXT: retl
;
; SDAG_X86-LABEL: f1:
; SDAG_X86: # %bb.0:
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; SDAG_X86-NEXT: fldt (%ecx)
; SDAG_X86-NEXT: fldt (%eax)
; SDAG_X86-NEXT: fsubrp %st, %st(1)
; SDAG_X86-NEXT: fstpt (%ecx)
; SDAG_X86-NEXT: retl
;
; CHECK-64-LABEL: f1:
; CHECK-64: # %bb.0:
; CHECK-64-NEXT: fldt (%rdi)
; CHECK-64-NEXT: fldt (%rsi)
; CHECK-64-NEXT: fsubrp %st, %st(1)
; CHECK-64-NEXT: fstpt (%rdi)
; CHECK-64-NEXT: retq
%load1 = load x86_fp80, ptr %a, align 4
%load2 = load x86_fp80, ptr %b, align 4
%sub = fsub x86_fp80 %load1, %load2
store x86_fp80 %sub, ptr %a, align 4
ret void
}

define void @f2(ptr %a, ptr %b) nounwind {
; GISEL_X86-LABEL: f2:
; GISEL_X86: # %bb.0:
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; GISEL_X86-NEXT: fldt (%eax)
; GISEL_X86-NEXT: fldt (%ecx)
; GISEL_X86-NEXT: fmulp %st, %st(1)
; GISEL_X86-NEXT: fstpt (%eax)
; GISEL_X86-NEXT: retl
;
; SDAG_X86-LABEL: f2:
; SDAG_X86: # %bb.0:
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; SDAG_X86-NEXT: fldt (%ecx)
; SDAG_X86-NEXT: fldt (%eax)
; SDAG_X86-NEXT: fmulp %st, %st(1)
; SDAG_X86-NEXT: fstpt (%ecx)
; SDAG_X86-NEXT: retl
;
; CHECK-64-LABEL: f2:
; CHECK-64: # %bb.0:
; CHECK-64-NEXT: fldt (%rdi)
; CHECK-64-NEXT: fldt (%rsi)
; CHECK-64-NEXT: fmulp %st, %st(1)
; CHECK-64-NEXT: fstpt (%rdi)
; CHECK-64-NEXT: retq
%load1 = load x86_fp80, ptr %a, align 16
%load2 = load x86_fp80, ptr %b, align 16
%mul = fmul x86_fp80 %load1, %load2
store x86_fp80 %mul, ptr %a, align 16
ret void
}

define void @f3(ptr %a, ptr %b) nounwind {
; GISEL_X86-LABEL: f3:
; GISEL_X86: # %bb.0:
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; GISEL_X86-NEXT: fldt (%eax)
; GISEL_X86-NEXT: fldt (%ecx)
; GISEL_X86-NEXT: fdivrp %st, %st(1)
; GISEL_X86-NEXT: fstpt (%eax)
; GISEL_X86-NEXT: retl
;
; SDAG_X86-LABEL: f3:
; SDAG_X86: # %bb.0:
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; SDAG_X86-NEXT: fldt (%ecx)
; SDAG_X86-NEXT: fldt (%eax)
; SDAG_X86-NEXT: fdivrp %st, %st(1)
; SDAG_X86-NEXT: fstpt (%ecx)
; SDAG_X86-NEXT: retl
;
; CHECK-64-LABEL: f3:
; CHECK-64: # %bb.0:
; CHECK-64-NEXT: fldt (%rdi)
; CHECK-64-NEXT: fldt (%rsi)
; CHECK-64-NEXT: fdivrp %st, %st(1)
; CHECK-64-NEXT: fstpt (%rdi)
; CHECK-64-NEXT: retq
%load1 = load x86_fp80, ptr %a, align 4
%load2 = load x86_fp80, ptr %b, align 4
%div = fdiv x86_fp80 %load1, %load2
store x86_fp80 %div, ptr %a, align 4
ret void
}

define void @f6(ptr %a, ptr %b) nounwind {
; GISEL_X86-LABEL: f6:
; GISEL_X86: # %bb.0:
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; GISEL_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; GISEL_X86-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}
; GISEL_X86-NEXT: flds (%eax)
; GISEL_X86-NEXT: faddp %st, %st(1)
; GISEL_X86-NEXT: fstps (%ecx)
; GISEL_X86-NEXT: retl
;
; SDAG_X86-LABEL: f6:
; SDAG_X86: # %bb.0:
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; SDAG_X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; SDAG_X86-NEXT: flds (%ecx)
; SDAG_X86-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}
; SDAG_X86-NEXT: fstps (%eax)
; SDAG_X86-NEXT: retl
;
; GISEL_X64-LABEL: f6:
; GISEL_X64: # %bb.0:
; GISEL_X64-NEXT: flds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
; GISEL_X64-NEXT: flds (%rdi)
; GISEL_X64-NEXT: faddp %st, %st(1)
; GISEL_X64-NEXT: fstps (%rsi)
; GISEL_X64-NEXT: retq
;
; SDAG_X64-LABEL: f6:
; SDAG_X64: # %bb.0:
; SDAG_X64-NEXT: flds (%rdi)
; SDAG_X64-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(%rip)
; SDAG_X64-NEXT: fstps (%rsi)
; SDAG_X64-NEXT: retq
%load1 = load float, ptr %a
%add = fadd float %load1, 20.0
store float %add, ptr %b
ret void
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-32: {{.*}}
; FAST_X64: {{.*}}
; FAST_X86: {{.*}}
Loading