-
Notifications
You must be signed in to change notification settings - Fork 12.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AArch64] Avoid NEON dot product in streaming[-compatible] functions (#…
…101677) The NEON dot product is not valid in streaming mode. A follow-up patch will improve codegen for these operations.
- Loading branch information
1 parent
8bfa089
commit 12937b1
Showing
2 changed files
with
146 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
143 changes: 143 additions & 0 deletions
143
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reductions.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
; RUN: llc -mattr=+sve < %s | FileCheck %s | ||
; RUN: llc -mattr=+dotprod,+sve < %s | FileCheck %s -check-prefix=DOT | ||
; RUN: llc -mattr=+dotprod,+sve -force-streaming-compatible < %s | FileCheck %s --check-prefix=STREAMING-SVE | ||
; RUN: llc -mattr=+dotprod,+sme -force-streaming < %s | FileCheck %s --check-prefix=STREAMING-SVE | ||
|
||
target triple = "aarch64-unknown-linux-gnu" | ||
|
||
define i32 @reduce_uaddv_v16i8(<32 x i8> %a) { | ||
; CHECK-LABEL: reduce_uaddv_v16i8: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: ushll2 v2.8h, v1.16b, #0 | ||
; CHECK-NEXT: ushll2 v3.8h, v0.16b, #0 | ||
; CHECK-NEXT: ushll v1.8h, v1.8b, #0 | ||
; CHECK-NEXT: ushll v0.8h, v0.8b, #0 | ||
; CHECK-NEXT: uaddl2 v4.4s, v3.8h, v2.8h | ||
; CHECK-NEXT: uaddl v2.4s, v3.4h, v2.4h | ||
; CHECK-NEXT: uaddl2 v5.4s, v0.8h, v1.8h | ||
; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h | ||
; CHECK-NEXT: add v1.4s, v5.4s, v4.4s | ||
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s | ||
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s | ||
; CHECK-NEXT: addv s0, v0.4s | ||
; CHECK-NEXT: fmov w0, s0 | ||
; CHECK-NEXT: ret | ||
; | ||
; DOT-LABEL: reduce_uaddv_v16i8: | ||
; DOT: // %bb.0: | ||
; DOT-NEXT: movi v2.16b, #1 | ||
; DOT-NEXT: movi v3.2d, #0000000000000000 | ||
; DOT-NEXT: udot v3.4s, v1.16b, v2.16b | ||
; DOT-NEXT: udot v3.4s, v0.16b, v2.16b | ||
; DOT-NEXT: addv s0, v3.4s | ||
; DOT-NEXT: fmov w0, s0 | ||
; DOT-NEXT: ret | ||
; | ||
; STREAMING-SVE-LABEL: reduce_uaddv_v16i8: | ||
; STREAMING-SVE: // %bb.0: | ||
; STREAMING-SVE-NEXT: // kill: def $q1 killed $q1 def $z1 | ||
; STREAMING-SVE-NEXT: uunpklo z2.h, z1.b | ||
; STREAMING-SVE-NEXT: // kill: def $q0 killed $q0 def $z0 | ||
; STREAMING-SVE-NEXT: uunpklo z3.h, z0.b | ||
; STREAMING-SVE-NEXT: ptrue p0.s, vl4 | ||
; STREAMING-SVE-NEXT: ext z1.b, z1.b, z1.b, #8 | ||
; STREAMING-SVE-NEXT: ext z0.b, z0.b, z0.b, #8 | ||
; STREAMING-SVE-NEXT: uunpklo z1.h, z1.b | ||
; STREAMING-SVE-NEXT: uunpklo z0.h, z0.b | ||
; STREAMING-SVE-NEXT: uunpklo z4.s, z2.h | ||
; STREAMING-SVE-NEXT: ext z2.b, z2.b, z2.b, #8 | ||
; STREAMING-SVE-NEXT: uunpklo z6.s, z3.h | ||
; STREAMING-SVE-NEXT: ext z3.b, z3.b, z3.b, #8 | ||
; STREAMING-SVE-NEXT: mov z5.d, z1.d | ||
; STREAMING-SVE-NEXT: uunpklo z7.s, z0.h | ||
; STREAMING-SVE-NEXT: ext z0.b, z0.b, z0.b, #8 | ||
; STREAMING-SVE-NEXT: uunpklo z2.s, z2.h | ||
; STREAMING-SVE-NEXT: uunpklo z3.s, z3.h | ||
; STREAMING-SVE-NEXT: add z4.s, z6.s, z4.s | ||
; STREAMING-SVE-NEXT: ext z5.b, z5.b, z1.b, #8 | ||
; STREAMING-SVE-NEXT: uunpklo z1.s, z1.h | ||
; STREAMING-SVE-NEXT: uunpklo z0.s, z0.h | ||
; STREAMING-SVE-NEXT: add z2.s, z3.s, z2.s | ||
; STREAMING-SVE-NEXT: uunpklo z5.s, z5.h | ||
; STREAMING-SVE-NEXT: add z1.s, z7.s, z1.s | ||
; STREAMING-SVE-NEXT: add z0.s, z0.s, z5.s | ||
; STREAMING-SVE-NEXT: add z1.s, z4.s, z1.s | ||
; STREAMING-SVE-NEXT: add z0.s, z2.s, z0.s | ||
; STREAMING-SVE-NEXT: add z0.s, z1.s, z0.s | ||
; STREAMING-SVE-NEXT: uaddv d0, p0, z0.s | ||
; STREAMING-SVE-NEXT: fmov x0, d0 | ||
; STREAMING-SVE-NEXT: // kill: def $w0 killed $w0 killed $x0 | ||
; STREAMING-SVE-NEXT: ret | ||
%1 = zext <32 x i8> %a to <32 x i32> | ||
%2 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %1) | ||
ret i32 %2 | ||
} | ||
|
||
define i32 @reduce_saddv_v16i8(<32 x i8> %a) { | ||
; CHECK-LABEL: reduce_saddv_v16i8: | ||
; CHECK: // %bb.0: | ||
; CHECK-NEXT: sshll2 v2.8h, v1.16b, #0 | ||
; CHECK-NEXT: sshll2 v3.8h, v0.16b, #0 | ||
; CHECK-NEXT: sshll v1.8h, v1.8b, #0 | ||
; CHECK-NEXT: sshll v0.8h, v0.8b, #0 | ||
; CHECK-NEXT: saddl2 v4.4s, v3.8h, v2.8h | ||
; CHECK-NEXT: saddl v2.4s, v3.4h, v2.4h | ||
; CHECK-NEXT: saddl2 v5.4s, v0.8h, v1.8h | ||
; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h | ||
; CHECK-NEXT: add v1.4s, v5.4s, v4.4s | ||
; CHECK-NEXT: add v0.4s, v0.4s, v2.4s | ||
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s | ||
; CHECK-NEXT: addv s0, v0.4s | ||
; CHECK-NEXT: fmov w0, s0 | ||
; CHECK-NEXT: ret | ||
; | ||
; DOT-LABEL: reduce_saddv_v16i8: | ||
; DOT: // %bb.0: | ||
; DOT-NEXT: movi v2.16b, #1 | ||
; DOT-NEXT: movi v3.2d, #0000000000000000 | ||
; DOT-NEXT: sdot v3.4s, v1.16b, v2.16b | ||
; DOT-NEXT: sdot v3.4s, v0.16b, v2.16b | ||
; DOT-NEXT: addv s0, v3.4s | ||
; DOT-NEXT: fmov w0, s0 | ||
; DOT-NEXT: ret | ||
; | ||
; STREAMING-SVE-LABEL: reduce_saddv_v16i8: | ||
; STREAMING-SVE: // %bb.0: | ||
; STREAMING-SVE-NEXT: // kill: def $q1 killed $q1 def $z1 | ||
; STREAMING-SVE-NEXT: sunpklo z2.h, z1.b | ||
; STREAMING-SVE-NEXT: // kill: def $q0 killed $q0 def $z0 | ||
; STREAMING-SVE-NEXT: sunpklo z3.h, z0.b | ||
; STREAMING-SVE-NEXT: ptrue p0.s, vl4 | ||
; STREAMING-SVE-NEXT: ext z1.b, z1.b, z1.b, #8 | ||
; STREAMING-SVE-NEXT: ext z0.b, z0.b, z0.b, #8 | ||
; STREAMING-SVE-NEXT: sunpklo z1.h, z1.b | ||
; STREAMING-SVE-NEXT: sunpklo z0.h, z0.b | ||
; STREAMING-SVE-NEXT: sunpklo z4.s, z2.h | ||
; STREAMING-SVE-NEXT: ext z2.b, z2.b, z2.b, #8 | ||
; STREAMING-SVE-NEXT: sunpklo z6.s, z3.h | ||
; STREAMING-SVE-NEXT: ext z3.b, z3.b, z3.b, #8 | ||
; STREAMING-SVE-NEXT: mov z5.d, z1.d | ||
; STREAMING-SVE-NEXT: sunpklo z7.s, z0.h | ||
; STREAMING-SVE-NEXT: ext z0.b, z0.b, z0.b, #8 | ||
; STREAMING-SVE-NEXT: sunpklo z2.s, z2.h | ||
; STREAMING-SVE-NEXT: sunpklo z3.s, z3.h | ||
; STREAMING-SVE-NEXT: add z4.s, z6.s, z4.s | ||
; STREAMING-SVE-NEXT: ext z5.b, z5.b, z1.b, #8 | ||
; STREAMING-SVE-NEXT: sunpklo z1.s, z1.h | ||
; STREAMING-SVE-NEXT: sunpklo z0.s, z0.h | ||
; STREAMING-SVE-NEXT: add z2.s, z3.s, z2.s | ||
; STREAMING-SVE-NEXT: sunpklo z5.s, z5.h | ||
; STREAMING-SVE-NEXT: add z1.s, z7.s, z1.s | ||
; STREAMING-SVE-NEXT: add z0.s, z0.s, z5.s | ||
; STREAMING-SVE-NEXT: add z1.s, z4.s, z1.s | ||
; STREAMING-SVE-NEXT: add z0.s, z2.s, z0.s | ||
; STREAMING-SVE-NEXT: add z0.s, z1.s, z0.s | ||
; STREAMING-SVE-NEXT: uaddv d0, p0, z0.s | ||
; STREAMING-SVE-NEXT: fmov x0, d0 | ||
; STREAMING-SVE-NEXT: // kill: def $w0 killed $w0 killed $x0 | ||
; STREAMING-SVE-NEXT: ret | ||
%1 = sext <32 x i8> %a to <32 x i32> | ||
%2 = call i32 @llvm.vector.reduce.add.v32i32(<32 x i32> %1) | ||
ret i32 %2 | ||
} |