diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 01079a95b47466..2e869f11b84314 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17866,6 +17866,9 @@ static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N, // and generate vecreduce.add(concat_vector(DOT, DOT2, ..)). static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG, const AArch64Subtarget *ST) { + if (!ST->isNeonAvailable()) + return SDValue(); + if (!ST->hasDotProd()) return performVecReduceAddCombineWithUADDLP(N, DAG); diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll new file mode 100644 index 00000000000000..00a15f4bcd6394 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll @@ -0,0 +1,143 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mattr=+sve < %s | FileCheck %s +; RUN: llc -mattr=+dotprod,+sve < %s | FileCheck %s -check-prefix=DOT +; RUN: llc -mattr=+dotprod,+sve -force-streaming-compatible < %s | FileCheck %s --check-prefix=STREAMING-SVE +; RUN: llc -mattr=+dotprod,+sme -force-streaming < %s | FileCheck %s --check-prefix=STREAMING-SVE + +target triple = "aarch64-unknown-linux-gnu" + +define i32 @reduce_uaddv_v16i8(<32 x i8> %a) { +; CHECK-LABEL: reduce_uaddv_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll2 v2.8h, v1.16b, #0 +; CHECK-NEXT: ushll2 v3.8h, v0.16b, #0 +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: uaddl2 v4.4s, v3.8h, v2.8h +; CHECK-NEXT: uaddl v2.4s, v3.4h, v2.4h +; CHECK-NEXT: uaddl2 v5.4s, v0.8h, v1.8h +; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h +; CHECK-NEXT: add v1.4s, v5.4s, v4.4s +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +; +; DOT-LABEL: reduce_uaddv_v16i8: +; DOT: // %bb.0: +; DOT-NEXT: movi v2.16b, #1 +; DOT-NEXT: movi v3.2d, #0000000000000000 +; DOT-NEXT: udot v3.4s, v1.16b, v2.16b +; DOT-NEXT: udot v3.4s, v0.16b, v2.16b +; DOT-NEXT: addv s0, v3.4s +; DOT-NEXT: fmov w0, s0 +; DOT-NEXT: ret +; +; STREAMING-SVE-LABEL: reduce_uaddv_v16i8: +; STREAMING-SVE: // %bb.0: +; STREAMING-SVE-NEXT: // kill: def $q1 killed $q1 def $z1 +; STREAMING-SVE-NEXT: uunpklo z2.h, z1.b +; STREAMING-SVE-NEXT: // kill: def $q0 killed $q0 def $z0 +; STREAMING-SVE-NEXT: uunpklo z3.h, z0.b +; STREAMING-SVE-NEXT: ptrue p0.s, vl4 +; STREAMING-SVE-NEXT: ext z1.b, z1.b, z1.b, #8 +; STREAMING-SVE-NEXT: ext z0.b, z0.b, z0.b, #8 +; STREAMING-SVE-NEXT: uunpklo z1.h, z1.b +; STREAMING-SVE-NEXT: uunpklo z0.h, z0.b +; STREAMING-SVE-NEXT: uunpklo z4.s, z2.h +; STREAMING-SVE-NEXT: ext z2.b, z2.b, z2.b, #8 +; STREAMING-SVE-NEXT: uunpklo z6.s, z3.h +; STREAMING-SVE-NEXT: ext z3.b, z3.b, z3.b, #8 +; STREAMING-SVE-NEXT: mov z5.d, z1.d +; STREAMING-SVE-NEXT: uunpklo z7.s, z0.h +; STREAMING-SVE-NEXT: ext z0.b, z0.b, z0.b, #8 +; STREAMING-SVE-NEXT: uunpklo z2.s, z2.h +; STREAMING-SVE-NEXT: uunpklo z3.s, z3.h +; STREAMING-SVE-NEXT: add z4.s, z6.s, z4.s +; STREAMING-SVE-NEXT: ext z5.b, z5.b, z1.b, #8 +; STREAMING-SVE-NEXT: uunpklo z1.s, z1.h +; STREAMING-SVE-NEXT: uunpklo z0.s, z0.h +; STREAMING-SVE-NEXT: add z2.s, z3.s, z2.s +; STREAMING-SVE-NEXT: uunpklo z5.s, z5.h +; STREAMING-SVE-NEXT: add z1.s, z7.s, z1.s +; STREAMING-SVE-NEXT: add z0.s, z0.s, z5.s +; STREAMING-SVE-NEXT: add z1.s, z4.s, z1.s +; STREAMING-SVE-NEXT: add z0.s, z2.s, z0.s +; STREAMING-SVE-NEXT: add z0.s, z1.s, z0.s +; STREAMING-SVE-NEXT: uaddv d0, p0, z0.s +; STREAMING-SVE-NEXT: fmov x0, d0 +; STREAMING-SVE-NEXT: // kill: def $w0 killed $w0 killed $x0 +; STREAMING-SVE-NEXT: ret + %1 = zext <32 x i8> %a to <32 x i32> + %2 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %1) + ret i32 %2 +} + +define i32 @reduce_saddv_v16i8(<32 x i8> %a) { +; CHECK-LABEL: reduce_saddv_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: sshll2 v2.8h, v1.16b, #0 +; CHECK-NEXT: sshll2 v3.8h, v0.16b, #0 +; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: saddl2 v4.4s, v3.8h, v2.8h +; CHECK-NEXT: saddl v2.4s, v3.4h, v2.4h +; CHECK-NEXT: saddl2 v5.4s, v0.8h, v1.8h +; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h +; CHECK-NEXT: add v1.4s, v5.4s, v4.4s +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +; +; DOT-LABEL: reduce_saddv_v16i8: +; DOT: // %bb.0: +; DOT-NEXT: movi v2.16b, #1 +; DOT-NEXT: movi v3.2d, #0000000000000000 +; DOT-NEXT: sdot v3.4s, v1.16b, v2.16b +; DOT-NEXT: sdot v3.4s, v0.16b, v2.16b +; DOT-NEXT: addv s0, v3.4s +; DOT-NEXT: fmov w0, s0 +; DOT-NEXT: ret +; +; STREAMING-SVE-LABEL: reduce_saddv_v16i8: +; STREAMING-SVE: // %bb.0: +; STREAMING-SVE-NEXT: // kill: def $q1 killed $q1 def $z1 +; STREAMING-SVE-NEXT: sunpklo z2.h, z1.b +; STREAMING-SVE-NEXT: // kill: def $q0 killed $q0 def $z0 +; STREAMING-SVE-NEXT: sunpklo z3.h, z0.b +; STREAMING-SVE-NEXT: ptrue p0.s, vl4 +; STREAMING-SVE-NEXT: ext z1.b, z1.b, z1.b, #8 +; STREAMING-SVE-NEXT: ext z0.b, z0.b, z0.b, #8 +; STREAMING-SVE-NEXT: sunpklo z1.h, z1.b +; STREAMING-SVE-NEXT: sunpklo z0.h, z0.b +; STREAMING-SVE-NEXT: sunpklo z4.s, z2.h +; STREAMING-SVE-NEXT: ext z2.b, z2.b, z2.b, #8 +; STREAMING-SVE-NEXT: sunpklo z6.s, z3.h +; STREAMING-SVE-NEXT: ext z3.b, z3.b, z3.b, #8 +; STREAMING-SVE-NEXT: mov z5.d, z1.d +; STREAMING-SVE-NEXT: sunpklo z7.s, z0.h +; STREAMING-SVE-NEXT: ext z0.b, z0.b, z0.b, #8 +; STREAMING-SVE-NEXT: sunpklo z2.s, z2.h +; STREAMING-SVE-NEXT: sunpklo z3.s, z3.h +; STREAMING-SVE-NEXT: add z4.s, z6.s, z4.s +; STREAMING-SVE-NEXT: ext z5.b, z5.b, z1.b, #8 +; STREAMING-SVE-NEXT: sunpklo z1.s, z1.h +; STREAMING-SVE-NEXT: sunpklo z0.s, z0.h +; STREAMING-SVE-NEXT: add z2.s, z3.s, z2.s +; STREAMING-SVE-NEXT: sunpklo z5.s, z5.h +; STREAMING-SVE-NEXT: add z1.s, z7.s, z1.s +; STREAMING-SVE-NEXT: add z0.s, z0.s, z5.s +; STREAMING-SVE-NEXT: add z1.s, z4.s, z1.s +; STREAMING-SVE-NEXT: add z0.s, z2.s, z0.s +; STREAMING-SVE-NEXT: add z0.s, z1.s, z0.s +; STREAMING-SVE-NEXT: uaddv d0, p0, z0.s +; STREAMING-SVE-NEXT: fmov x0, d0 +; STREAMING-SVE-NEXT: // kill: def $w0 killed $w0 killed $x0 +; STREAMING-SVE-NEXT: ret + %1 = sext <32 x i8> %a to <32 x i32> + %2 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %1) + ret i32 %2 +}