Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SOL] Fix ALU32 instructions with explicit sign extension #116

Merged
merged 1 commit into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 35 additions & 10 deletions llvm/lib/Target/SBF/SBFISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,11 @@ SBFTargetLowering::SBFTargetLowering(const TargetMachine &TM,

if (STI.getHasAlu32()) {
setOperationAction(ISD::BSWAP, MVT::i32, Promote);
setOperationAction(ISD::BR_CC, MVT::i32, Promote);
setOperationAction(ISD::BR_CC, MVT::i32, Custom);
setOperationAction(ISD::CTTZ, MVT::i32, Expand);
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
}

setOperationAction(ISD::CTTZ, MVT::i64, Expand);
Expand Down Expand Up @@ -763,6 +767,30 @@ SDValue SBFTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
if (!getHasJmpExt())
NegateCC(LHS, RHS, CC);

bool IsSignedCmp = (CC == ISD::SETGT ||
CC == ISD::SETGE ||
CC == ISD::SETLT ||
CC == ISD::SETLE);
bool Is32Num = LHS.getValueType() == MVT::i32 ||
RHS.getValueType() == MVT::i32;

if (getHasAlu32() && Is32Num) {
if (isIntOrFPConstant(RHS) || isIntOrFPConstant(LHS)) {
// Immediate values are sign extended in SBF, so we sign extend the
// registers for a correct comparison.
LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, LHS);
RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, RHS);
} else if (IsSignedCmp) {
// If the comparison is signed, we sign extend registers
LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, LHS);
RHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, RHS);
} else {
// If the comparison is unsigned, we zero extend registers
LHS = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, LHS);
RHS = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, RHS);
}
}

return DAG.getNode(SBFISD::BR_CC, DL, Op.getValueType(), Chain, LHS, RHS,
DAG.getConstant(CC, DL, MVT::i64), Dest);
}
Expand Down Expand Up @@ -941,7 +969,7 @@ SBFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
if (!isSigned) {
unsigned MovOp =
Subtarget->getHasExplicitSignExt()
? SBF::MOV_rr : SBF::MOV_32_64;
? SBF::MOV_32_64_no_sext : SBF::MOV_32_64;
Register PromotedReg0 = RegInfo.createVirtualRegister(RC);
BuildMI(BB, DL, TII.get(MovOp), PromotedReg0).addReg(Reg);
return PromotedReg0;
Expand Down Expand Up @@ -1087,15 +1115,12 @@ SBFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
CC == ISD::SETLT ||
CC == ISD::SETLE);

// eBPF at the moment only has 64-bit comparison. Any 32-bit comparison need
// to be promoted, however if the 32-bit comparison operands are destination
// registers then they are implicitly zero-extended already, there is no
// need of explicit zero-extend sequence for them.
//
// We simply do extension for all situations in this method, but we will
// try to remove those unnecessary in SBFMIPeephole pass.
// SBF at the moment only has 64-bit comparison. Any 32-bit comparison needs
// to be promoted. If we are comparing against an immediate value, we must
// sign extend the registers. Likewise for signed comparisons. Unsigned
// comparisons will zero extent registers.
if (is32BitCmp)
LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp);
LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp || !isSelectRROp);

if (isSelectRROp) {
Register RHS = MI.getOperand(2).getReg();
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/SBF/SBFInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -1015,6 +1015,12 @@ let isCodeGenOnly = 1 in {
def MOV_32_64_addr : MATH_RI<SBF_ALU, SBF_MOV,
(outs GPR:$dst), (ins u64imm:$imm),
"mov32 $dst, $imm", []>, Requires<[SBFNoLddw]>;

def MOV_32_64_no_sext : MATH_RR<SBF_ALU64, SBF_MOV,
(outs GPR:$dst),
(ins GPR32:$src),
"mov64 $dst, $src",
[]>;
}

let DecoderNamespace = "SBFv2", Predicates = [SBFNoLddw] in {
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/SBF/SBFSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ SBFSubtarget &SBFSubtarget::initializeSubtargetDependencies(const Triple &TT,
void SBFSubtarget::initializeEnvironment(const Triple &TT) {
assert(TT.getArch() == Triple::sbf && "expected Triple::sbf");
HasJmpExt = false;
HasAlu32 = false;
UseDwarfRIS = false;

// SBFv2 features
Expand All @@ -49,6 +48,8 @@ void SBFSubtarget::initializeEnvironment(const Triple &TT) {
HasPqrClass = false;
NewCallConvention = false;
HasStoreImm = false;
HasAlu32 = false;
HasExplicitSignExt = false;
}

void SBFSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
Expand Down
8 changes: 5 additions & 3 deletions llvm/lib/Target/SBF/SBFTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,10 +158,12 @@ void SBFPassConfig::addMachineSSAOptimization() {
// Peephole ran at last.
TargetPassConfig::addMachineSSAOptimization();

const SBFSubtarget *Subtarget = getSBFTargetMachine().getSubtargetImpl();
// const SBFSubtarget *Subtarget = getSBFTargetMachine().getSubtargetImpl();
if (!DisableMIPeephole) {
if (Subtarget->getHasAlu32())
addPass(createSBFMIPeepholePass());
// TODO: The peephole doesn't work with explicit sign extension. A future PR
// will revamp the implementation.
// if (Subtarget->getHasAlu32())
// addPass(createSBFMIPeepholePass());
addPass(createSBFMIPeepholeTruncElimPass());
}
}
Expand Down
16 changes: 12 additions & 4 deletions llvm/test/CodeGen/SBF/32-bit-subreg-cond-select.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
; RUN: llc -O2 -march=sbf -mattr=+alu32 < %s | FileCheck %s
; RUN: llc -O2 -march=sbf -mattr=+alu32,+explicit-sext -verify-machineinstrs < %s | FileCheck %s
;
; unsigned int select_cc_32 (unsigned a, unsigned b, int c, int d)
; {
Expand Down Expand Up @@ -56,19 +56,23 @@ entry:
ret i32 %c.d
}
; CHECK-LABEL: select_cc_32
; CHECK: mov32 r{{[0-9]+}}, w{{[0-9]+}}
; CHECK: mov64 r{{[0-9]+}}, w{{[0-9]+}}
; CHECK: mov64 r{{[0-9]+}}, w{{[0-9]+}}
; CHECK: jgt r{{[0-9]+}}, r{{[0-9]+}}
; CHECK-NOT: lsh64 r{{[0-9]+}}, 32
; CHECK-NOT: rsh64 r{{[0-9]+}}, 32

; Function Attrs: norecurse nounwind readnone
define dso_local i64 @select_cc_32_64(i32 %a, i32 %b, i64 %c, i64 %d) local_unnamed_addr #0 {
entry:
%cmp = icmp ugt i32 %a, %b
%cmp = icmp sgt i32 %a, %b
%c.d = select i1 %cmp, i64 %c, i64 %d
ret i64 %c.d
}
; CHECK-LABEL: select_cc_32_64
; CHECK: mov32 r{{[0-9]+}}, w{{[0-9]+}}
; CHECK: mov32 r{{[0-9]+}}, w{{[0-9]+}}
; CHECK: jsgt r{{[0-9]+}}, r{{[0-9]+}}
; CHECK-NOT: lsh64 r{{[0-9]+}}, 32
; CHECK-NOT: rsh64 r{{[0-9]+}}, 32

Expand All @@ -80,6 +84,7 @@ entry:
ret i32 %c.d
}
; CHECK-LABEL: select_cc_64_32
; CHECK: jsgt r{{[0-9]+}}, r{{[0-9]+}}
; CHECK-NOT: lsh64 r{{[0-9]+}}, 32

; Function Attrs: norecurse nounwind readnone
Expand All @@ -91,18 +96,20 @@ entry:
}
; CHECK-LABEL: selecti_cc_32
; CHECK: mov32 r{{[0-9]+}}, w{{[0-9]+}}
; CHECK: jgt r{{[0-9]+}}, 10
; CHECK-NOT: lsh64 r{{[0-9]+}}, 32
; CHECK-NOT: rsh64 r{{[0-9]+}}, 32

; Function Attrs: norecurse nounwind readnone
define dso_local i64 @selecti_cc_32_64(i32 %a, i64 %c, i64 %d) local_unnamed_addr #0 {
entry:
%cmp = icmp ugt i32 %a, 11
%cmp = icmp sgt i32 %a, 11
%c.d = select i1 %cmp, i64 %c, i64 %d
ret i64 %c.d
}
; CHECK-LABEL: selecti_cc_32_64
; CHECK: mov32 r{{[0-9]+}}, w{{[0-9]+}}
; CHECK: jsgt r{{[0-9]+}}, 11,
; CHECK-NOT: lsh64 r{{[0-9]+}}, 32
; CHECK-NOT: rsh64 r{{[0-9]+}}, 32

Expand All @@ -114,4 +121,5 @@ entry:
ret i32 %c.d
}
; CHECK-LABEL: selecti_cc_64_32
; CHECK: jsgt r{{[0-9]+}}, 12
; CHECK-NOT: lsh64 r{{[0-9]+}}, 32
76 changes: 37 additions & 39 deletions llvm/test/CodeGen/SBF/atomics_sbf.ll
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
; RUN: llc < %s -march=sbf -mcpu=v3 -verify-machineinstrs | tee -i /tmp/log | FileCheck %s
; RUN: llc < %s -march=sbf -mattr=+alu32,+explicit-sext -verify-machineinstrs | tee -i /tmp/log | FileCheck %s
;
; CHECK-LABEL: test_load_add_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov32 w3, w0
; CHECK: mov64 w3, w0
; CHECK: add32 w3, w2
; CHECK: stxw [r1 + 0], w3
define dso_local i32 @test_load_add_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
Expand All @@ -25,7 +25,7 @@ entry:

; CHECK-LABEL: test_load_sub_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov32 w3, w0
; CHECK: mov64 w3, w0
; CHECK: sub32 w3, w2
; CHECK: stxw [r1 + 0], w3
define dso_local i32 @test_load_sub_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
Expand Down Expand Up @@ -67,8 +67,10 @@ entry:

; CHECK-LABEL: test_cas_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: jeq r0, r2,
; CHECK: mov32 w3, w0
; CHECK: mov64 r4, w0
; CHECK: mov64 r2, w2
; CHECK: jeq r4, r2,
; CHECK: mov64 w3, w0
; CHECK: stxw [r1 + 0], w3
define dso_local i32 @test_cas_32(i32* nocapture %p, i32 %old, i32 %new) local_unnamed_addr {
entry:
Expand All @@ -91,7 +93,7 @@ entry:

; CHECK-LABEL: test_load_and_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov32 w3, w0
; CHECK: mov64 w3, w0
; CHECK: and32 w3, w2
; CHECK: stxw [r1 + 0], w3
define dso_local i32 @test_load_and_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
Expand All @@ -113,7 +115,7 @@ entry:

; CHECK-LABEL: test_load_nand_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov32 w3, w0
; CHECK: mov64 w3, w0
; CHECK: and32 w3, w2
; CHECK: xor32 w3, -1
; CHECK: stxw [r1 + 0], w3
Expand All @@ -137,7 +139,7 @@ entry:

; CHECK-LABEL: test_load_or_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov32 w3, w0
; CHECK: mov64 w3, w0
; CHECK: or32 w3, w2
; CHECK: stxw [r1 + 0], w3
define dso_local i32 @test_load_or_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
Expand All @@ -159,7 +161,7 @@ entry:

; CHECK-LABEL: test_load_xor_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov32 w3, w0
; CHECK: mov64 w3, w0
; CHECK: xor32 w3, w2
; CHECK: stxw [r1 + 0], w3
define dso_local i32 @test_load_xor_32(i32* nocapture %p, i32 %v) local_unnamed_addr {
Expand All @@ -181,15 +183,11 @@ entry:

; CHECK-LABEL: test_min_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov64 r4, r0
; CHECK: lsh64 r4, 32
; CHECK: arsh64 r4, 32
; CHECK: mov32 r5, w2
; CHECK: lsh64 r5, 32
; CHECK: arsh64 r5, 32
; CHECK: mov32 w3, w0
; CHECK: jslt r4, r5, LBB16_2
; CHECK: mov32 w3, w2
; CHECK: mov32 r4, w2
; CHECK: mov32 r5, w0
; CHECK: mov64 w3, w0
; CHECK: jsgt r4, r5, LBB16_2
; CHECK: mov64 w3, w2
; CHECK: stxw [r1 + 0], w3
define dso_local i32 @test_min_32(i32* nocapture %ptr, i32 %v) local_unnamed_addr #0 {
entry:
Expand All @@ -200,7 +198,7 @@ entry:
; CHECK-LABEL: test_min_64
; CHECK: ldxdw r0, [r1 + 0]
; CHECK: mov64 r3, r0
; CHECK: jslt r0, r2,
; CHECK: jsgt r2, r0,
; CHECK: mov64 r3, r2
; CHECK: stxdw [r1 + 0], r3
define dso_local i64 @test_min_64(i64* nocapture %ptr, i64 %v) local_unnamed_addr #0 {
Expand All @@ -211,15 +209,11 @@ entry:

; CHECK-LABEL: test_max_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov64 r4, r0
; CHECK: lsh64 r4, 32
; CHECK: arsh64 r4, 32
; CHECK: mov32 r4, w0
; CHECK: mov32 r5, w2
; CHECK: lsh64 r5, 32
; CHECK: arsh64 r5, 32
; CHECK: mov32 w3, w0
; CHECK: mov64 w3, w0
; CHECK: jsgt r4, r5, LBB18_2
; CHECK: mov32 w3, w2
; CHECK: mov64 w3, w2
; CHECK: stxw [r1 + 0], w3
define dso_local i32 @test_max_32(i32* nocapture %ptr, i32 %v) local_unnamed_addr #0 {
entry:
Expand All @@ -241,10 +235,11 @@ entry:

; CHECK-LABEL: test_umin_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov32 r4, w2
; CHECK: mov32 w3, w0
; CHECK: jlt r0, r4,
; CHECK: mov32 w3, w2
; CHECK: mov64 r4, w2
; CHECK: mov64 r5, w0
; CHECK: mov64 w3, w0
; CHECK: jgt r4, r5,
; CHECK: mov64 w3, w2
; CHECK: stxw [r1 + 0], w3
define dso_local i32 @test_umin_32(i32* nocapture %ptr, i32 %v) local_unnamed_addr #0 {
entry:
Expand All @@ -255,7 +250,7 @@ entry:
; CHECK-LABEL: test_umin_64
; CHECK: ldxdw r0, [r1 + 0]
; CHECK: mov64 r3, r0
; CHECK: jlt r0, r2,
; CHECK: jgt r2, r0,
; CHECK: mov64 r3, r2
; CHECK: stxdw [r1 + 0], r3
define dso_local i64 @test_umin_64(i64* nocapture %ptr, i64 %v) local_unnamed_addr #0 {
Expand All @@ -266,10 +261,11 @@ entry:

; CHECK-LABEL: test_umax_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov32 r4, w2
; CHECK: mov32 w3, w0
; CHECK: jgt r0, r4,
; CHECK: mov32 w3, w2
; CHECK: mov64 r4, w0
; CHECK: mov64 r5, w2
; CHECK: mov64 w3, w0
; CHECK: jgt r4, r5
; CHECK: mov64 w3, w2
; CHECK: stxw [r1 + 0], w3
define dso_local i32 @test_umax_32(i32* nocapture %ptr, i32 %v) local_unnamed_addr #0 {
entry:
Expand Down Expand Up @@ -305,8 +301,9 @@ entry:
; CHECK-LABEL: test_load_32
; CHECK: ldxw w0, [r1 + 0]
; CHECK: mov32 w2, 0
; CHECK: jeq r0, 0, LBB25_2
; CHECK: mov32 w2, w0
; CHECK: mov32 r3, w0
; CHECK: jeq r3, 0, LBB25_2
; CHECK: mov64 w2, w0
; CHECK: LBB25_2:
; CHECK: stxw [r1 + 0], w2
define dso_local i32 @test_load_32(ptr nocapture %p) local_unnamed_addr {
Expand All @@ -333,8 +330,9 @@ entry:

; CHECK-LABEL: test_weak_cas_32
; CHECK: ldxw w4, [r1 + 0]
; CHECK: mov32 r2, w2
; CHECK: jeq r4, r2,
; CHECK: mov64 r5, w4
; CHECK: mov64 r2, w2
; CHECK: jeq r5, r2,
; CHECK: stxw [r1 + 0], w3
define dso_local void @test_weak_cas_32(i32* nocapture %p, i32 %old, i32 %new) local_unnamed_addr {
entry:
Expand Down
Loading
Loading