From e1d22512906e69846c8f6a2d29b30832b7c12b46 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 26 Aug 2024 11:30:13 -0700 Subject: [PATCH] [SLP]Fix minbitwidth analysis for gather nodes with icmp users. If the node is not in MinBWs container and the user node is icmp node, the compiler should not check the type size of the user instruction, it is always 1 and is not good for actual bitwidth analysis. Fixes https://github.com/llvm/llvm-project/issues/105988 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 4 ++++ .../Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll | 8 +++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index def73e8d0c0db7..ed47ed661ab946 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -15975,6 +15975,10 @@ void BoUpSLP::computeMinimumValueSizes() { auto It = MinBWs.find(TE); if (It != MinBWs.end() && It->second.first > UserTESz) return true; + // The size of icmp is always 1 and should not be + // considered. + if (TE->getOpcode() == Instruction::ICmp) + return true; return DL->getTypeSizeInBits(U->getType()) > UserTESz; })); })) { diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll b/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll index 10ca5a2700ebee..7e75970de34929 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/gather-with-cmp-user.ll @@ -7,15 +7,13 @@ define i1 @test(i32 %g, i16 %d) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = and i16 [[D]], 1 ; CHECK-NEXT: [[XOR_I_I:%.*]] = xor i32 [[G]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[G]] to i8 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i8> poison, i8 [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[XOR_I_I]] to i8 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i8> [[TMP2]], i8 [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[G]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[XOR_I_I]], i32 1 +; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i32> [[TMP9]] to <2 x i8> ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i8> [[TMP4]], <2 x i8> poison, <4 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i8> [[TMP5]], ; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt <4 x i8> [[TMP6]], ; CHECK-NEXT: [[TMP8:%.*]] = zext <4 x i1> [[TMP7]] to <4 x i8> -; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i8> [[TMP4]] to <2 x i32> ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[TMP8]] to <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = icmp sgt <4 x i32> [[TMP10]], [[TMP11]]