Skip to content

Commit

Permalink
[X86][AVX512] lower1BitShuffle - fold broadcast(setcc(x,y)) -> setcc(…
Browse files Browse the repository at this point in the history
…broadcast(x),broadcast(y)) (PR52500)

AVX512 has excellent broadcast ops for everything but vXi1 bool vectors - so if we're broadcasting a comparison result, see if we can broadcast the comparison operands instead.
  • Loading branch information
RKSimon committed Mar 21, 2022
1 parent 5cfb110 commit 5fd9451
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 16 deletions.
13 changes: 12 additions & 1 deletion llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18942,7 +18942,18 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
Offset += NumElts; // Increment for next iteration.
}


// If we're broadcasting a SETCC result, try to broadcast the ops instead.
// TODO: What other unary shuffles would benefit from this?
if (isBroadcastShuffleMask(Mask) && V1.getOpcode() == ISD::SETCC &&
V1->hasOneUse()) {
SDValue Op0 = V1.getOperand(0);
SDValue Op1 = V1.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(V1.getOperand(2))->get();
EVT OpVT = Op0.getValueType();
return DAG.getSetCC(
DL, VT, DAG.getVectorShuffle(OpVT, DL, Op0, DAG.getUNDEF(OpVT), Mask),
DAG.getVectorShuffle(OpVT, DL, Op1, DAG.getUNDEF(OpVT), Mask), CC);
}

MVT ExtVT;
switch (VT.SimpleTy) {
Expand Down
23 changes: 8 additions & 15 deletions llvm/test/CodeGen/X86/vector-shuffle-v1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -919,10 +919,8 @@ define <16 x i1> @PR52500(<16 x i1> %msk, i32 %in) {
; AVX512F-NEXT: movl $789, %eax # imm = 0x315
; AVX512F-NEXT: vmovd %eax, %xmm1
; AVX512F-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k2
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; AVX512F-NEXT: vpbroadcastd %xmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1 {%k1}
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
Expand All @@ -937,10 +935,8 @@ define <16 x i1> @PR52500(<16 x i1> %msk, i32 %in) {
; AVX512VL-NEXT: movl $789, %eax # imm = 0x315
; AVX512VL-NEXT: vmovd %eax, %xmm1
; AVX512VL-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; AVX512VL-NEXT: vptestnmd %zmm0, %zmm0, %k2
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
; AVX512VL-NEXT: vpbroadcastd %xmm0, %zmm0
; AVX512VL-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
; AVX512VL-NEXT: vptestnmd %zmm0, %zmm0, %k1 {%k1}
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512VL-NEXT: vzeroupper
Expand All @@ -949,16 +945,13 @@ define <16 x i1> @PR52500(<16 x i1> %msk, i32 %in) {
; VL_BW_DQ-LABEL: PR52500:
; VL_BW_DQ: # %bb.0:
; VL_BW_DQ-NEXT: vpsllw $7, %xmm0, %xmm0
; VL_BW_DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VL_BW_DQ-NEXT: vmovd %edi, %xmm2
; VL_BW_DQ-NEXT: vpmovb2m %xmm0, %k1
; VL_BW_DQ-NEXT: vmovd %edi, %xmm0
; VL_BW_DQ-NEXT: movl $789, %eax # imm = 0x315
; VL_BW_DQ-NEXT: vmovd %eax, %xmm3
; VL_BW_DQ-NEXT: vpmulld %xmm3, %xmm2, %xmm2
; VL_BW_DQ-NEXT: vptestnmd %zmm2, %zmm2, %k0
; VL_BW_DQ-NEXT: vpmovm2d %k0, %zmm2
; VL_BW_DQ-NEXT: vpbroadcastd %xmm2, %zmm2
; VL_BW_DQ-NEXT: vpmovd2m %zmm2, %k1
; VL_BW_DQ-NEXT: vpcmpgtb %xmm0, %xmm1, %k0 {%k1}
; VL_BW_DQ-NEXT: vmovd %eax, %xmm1
; VL_BW_DQ-NEXT: vpmulld %xmm1, %xmm0, %xmm0
; VL_BW_DQ-NEXT: vpbroadcastd %xmm0, %zmm0
; VL_BW_DQ-NEXT: vptestnmd %zmm0, %zmm0, %k0 {%k1}
; VL_BW_DQ-NEXT: vpmovm2b %k0, %xmm0
; VL_BW_DQ-NEXT: vzeroupper
; VL_BW_DQ-NEXT: retq
Expand Down

0 comments on commit 5fd9451

Please sign in to comment.