Skip to content

Commit

Permalink
[ARM] Add NEON support for ISD::ABDS/ABDU nodes.
Browse files Browse the repository at this point in the history
As noted on #94466, NEON has ABDS/ABDU instructions but only handles then via intrinsics, plus some VABDL custom patterns.

Fixes #94466
  • Loading branch information
RKSimon committed Jun 6, 2024
1 parent a53ed21 commit 2f9a635
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 43 deletions.
29 changes: 12 additions & 17 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,9 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);

if (!VT.isFloatingPoint() &&
VT != MVT::v2i64 && VT != MVT::v1i64)
for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
for (auto Opcode : {ISD::ABS, ISD::ABDS, ISD::ABDU, ISD::SMIN, ISD::SMAX,
ISD::UMIN, ISD::UMAX})
setOperationAction(Opcode, VT, Legal);
if (!VT.isFloatingPoint())
for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
Expand Down Expand Up @@ -4174,7 +4174,15 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
}
case Intrinsic::arm_neon_vabs:
return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
Op.getOperand(1));
Op.getOperand(1));
case Intrinsic::arm_neon_vabds:
if (Op.getValueType().isInteger())
return DAG.getNode(ISD::ABDS, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
return SDValue();
case Intrinsic::arm_neon_vabdu:
return DAG.getNode(ISD::ABDU, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::arm_neon_vmulls:
case Intrinsic::arm_neon_vmullu: {
unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
Expand Down Expand Up @@ -13496,18 +13504,6 @@ static SDValue PerformVSetCCToVCTPCombine(SDNode *N,
DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32));
}

static SDValue PerformABSCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
SelectionDAG &DAG = DCI.DAG;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();

if (TLI.isOperationLegal(N->getOpcode(), N->getValueType(0)))
return SDValue();

return TLI.expandABS(N, DAG);
}

/// PerformADDECombine - Target-specific dag combine transform from
/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
Expand Down Expand Up @@ -18871,7 +18867,6 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
Expand Down
36 changes: 11 additions & 25 deletions llvm/lib/Target/ARM/ARMInstrNEON.td
Original file line number Diff line number Diff line change
Expand Up @@ -5640,10 +5640,10 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
// VABD : Vector Absolute Difference
defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
"vabd", "s", int_arm_neon_vabds, 1>;
"vabd", "s", abds, 1>;
defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
"vabd", "u", int_arm_neon_vabdu, 1>;
"vabd", "u", abdu, 1>;
def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
"vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
Expand All @@ -5657,44 +5657,30 @@ def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,

// VABDL : Vector Absolute Difference Long (Q = | D - D |)
defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
"vabdl", "s", int_arm_neon_vabds, zext, 1>;
"vabdl", "s", abds, zext, 1>;
defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
"vabdl", "u", int_arm_neon_vabdu, zext, 1>;
"vabdl", "u", abdu, zext, 1>;

let Predicates = [HasNEON] in {
def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))),
def : Pat<(v8i16 (zext (abdu (v8i8 DPR:$opA), (v8i8 DPR:$opB)))),
(VABDLuv8i16 DPR:$opA, DPR:$opB)>;
def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),
def : Pat<(v4i32 (zext (abdu (v4i16 DPR:$opA), (v4i16 DPR:$opB)))),
(VABDLuv4i32 DPR:$opA, DPR:$opB)>;
}

// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the
// shift/xor pattern for ABS.

def abd_shr :
PatFrag<(ops node:$in1, node:$in2, node:$shift),
(ARMvshrsImm (sub (zext node:$in1),
(zext node:$in2)), (i32 $shift))>;

let Predicates = [HasNEON] in {
def : Pat<(xor (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)),
(v2i64 (add (sub (zext (v2i32 DPR:$opA)),
(zext (v2i32 DPR:$opB))),
(abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)))),
def : Pat<(v2i64 (zext (abdu (v2i32 DPR:$opA), (v2i32 DPR:$opB)))),
(VABDLuv2i64 DPR:$opA, DPR:$opB)>;
}

// VABA : Vector Absolute Difference and Accumulate
defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
"vaba", "s", int_arm_neon_vabds, add>;
"vaba", "s", abds, add>;
defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
"vaba", "u", int_arm_neon_vabdu, add>;
"vaba", "u", abdu, add>;

// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
"vabal", "s", int_arm_neon_vabds, zext, add>;
"vabal", "s", abds, zext, add>;
defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
"vabal", "u", int_arm_neon_vabdu, zext, add>;
"vabal", "u", abdu, zext, add>;

// Vector Maximum and Minimum.

Expand Down
5 changes: 4 additions & 1 deletion llvm/test/CodeGen/ARM/neon_vabs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,10 @@ define <2 x i64> @test13(<2 x i32> %a, <2 x i32> %b) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: vmov d16, r2, r3
; CHECK-NEXT: vmov d17, r0, r1
; CHECK-NEXT: vabdl.u32 q8, d17, d16
; CHECK-NEXT: vsubl.u32 q8, d17, d16
; CHECK-NEXT: vshr.s64 q9, q8, #63
; CHECK-NEXT: vsra.s64 q8, q8, #63
; CHECK-NEXT: veor q8, q9, q8
; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: vmov r2, r3, d17
; CHECK-NEXT: mov pc, lr
Expand Down

0 comments on commit 2f9a635

Please sign in to comment.