-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
EarlyCSE: refactor getOrCreateResult (NFC) #113339
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-powerpc Author: Ramkumar Ramachandra (artagnon) ChangesgetOrCreateResult suffers from the deficiency that it doesn't attempt to create casts when types mismatch. Fix this deficiency, making EarlyCSE more powerful. -- 8< -- Patch is 314.37 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113339.diff 8 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index a1dbb4e1d5e75f..9714611cda8b0f 100644
--- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -31,6 +31,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -964,32 +965,45 @@ class EarlyCSE {
bool overridingStores(const ParseMemoryInst &Earlier,
const ParseMemoryInst &Later);
- Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
- // TODO: We could insert relevant casts on type mismatch here.
- if (auto *LI = dyn_cast<LoadInst>(Inst))
- return LI->getType() == ExpectedType ? LI : nullptr;
- if (auto *SI = dyn_cast<StoreInst>(Inst)) {
- Value *V = SI->getValueOperand();
- return V->getType() == ExpectedType ? V : nullptr;
+ Value *getOrCreateResult(Instruction *Inst, Type *ExpectedType) const {
+ if (!isa<IntrinsicInst, LoadInst, StoreInst>(Inst))
+ llvm_unreachable("Instruction not supported");
+
+ // The load or the store's first operand.
+ Value *V;
+ if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
+ if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::masked_load:
+ V = II;
+ break;
+ case Intrinsic::masked_store:
+ V = II->getOperand(0);
+ break;
+ default:
+ return nullptr;
+ }
+ else
+ return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
+ } else {
+ V = isa<LoadInst>(Inst) ? Inst : cast<StoreInst>(Inst)->getValueOperand();
}
- assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
- auto *II = cast<IntrinsicInst>(Inst);
- if (isHandledNonTargetIntrinsic(II->getIntrinsicID()))
- return getOrCreateResultNonTargetMemIntrinsic(II, ExpectedType);
- return TTI.getOrCreateResultFromMemIntrinsic(II, ExpectedType);
- }
- Value *getOrCreateResultNonTargetMemIntrinsic(IntrinsicInst *II,
- Type *ExpectedType) const {
- // TODO: We could insert relevant casts on type mismatch here.
- switch (II->getIntrinsicID()) {
- case Intrinsic::masked_load:
- return II->getType() == ExpectedType ? II : nullptr;
- case Intrinsic::masked_store: {
- Value *V = II->getOperand(0);
- return V->getType() == ExpectedType ? V : nullptr;
- }
- }
+ Type *ActualType = V->getType();
+ BasicBlock *TheBB = Inst->getParent();
+
+ // First handle the case when no cast is required.
+ if (ActualType == ExpectedType)
+ return V;
+
+ // Try to create BitCast, SExt, or Trunc.
+ IRBuilder<> Builder(TheBB, std::next(Inst->getIterator()));
+ if (CastInst::castIsValid(Instruction::BitCast, V, ExpectedType))
+ return Builder.CreateBitCast(V, ExpectedType);
+ if (CastInst::castIsValid(Instruction::SExt, V, ExpectedType))
+ return Builder.CreateSExt(V, ExpectedType);
+ if (CastInst::castIsValid(Instruction::Trunc, V, ExpectedType))
+ return Builder.CreateTrunc(V, ExpectedType);
return nullptr;
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index c3694158e7b971..6fe26286b74c22 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -3683,7 +3683,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[DEF]](p4) :: (invariant load (p1) from `ptr addrspace(4) undef`, addrspace 4)
; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[LOAD]](p1) :: ("amdgpu-noclobber" load (<32 x s32>) from %ir.ptr0, addrspace 1)
; CHECK-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s8) from `ptr addrspace(1) undef`, addrspace 1)
- ; CHECK-NEXT: [[LOAD3:%[0-9]+]]:_(s16) = G_LOAD [[DEF1]](p1) :: ("amdgpu-noclobber" load (s16) from `ptr addrspace(1) undef`, addrspace 1)
+ ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s16) = G_SEXT [[LOAD2]](s8)
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc
; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_v32i32_i8_i8_i16
; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
@@ -3720,7 +3720,7 @@ define amdgpu_kernel void @test_call_external_void_func_v32i32_i8_i8_i16() #0 {
; CHECK-NEXT: G_STORE [[COPY18]](s16), [[PTR_ADD3]](p5) :: (store (s16) into stack + 8, align 8, addrspace 5)
; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32)
- ; CHECK-NEXT: G_STORE [[LOAD3]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5)
+ ; CHECK-NEXT: G_STORE [[SEXT]](s16), [[PTR_ADD4]](p5) :: (store (s16) into stack + 12, align 4, addrspace 5)
; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
index 86254329923971..4a81bec15411fb 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-folding-implicit-def-subreg.ll
@@ -4,7 +4,7 @@
define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64 %arg2, i1 %arg3, i1 %arg4, i1 %arg5, i1 %arg6, ptr addrspace(3) %arg7, ptr addrspace(3) %arg8, ptr addrspace(3) %arg9, ptr addrspace(3) %arg10) {
; GFX90A-LABEL: name: f1
; GFX90A: bb.0.bb:
- ; GFX90A-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GFX90A-NEXT: successors: %bb.56(0x40000000), %bb.1(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr15, $sgpr10_sgpr11
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $sgpr32 = S_MOV_B32 0
@@ -30,44 +30,25 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr2 = DS_READ_B32_gfx9 renamable $vgpr3, 0, 0, implicit $exec :: (load (s32) from `ptr addrspace(3) null`, align 8, addrspace 3)
; GFX90A-NEXT: renamable $sgpr42_sgpr43 = S_MOV_B64 0
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr24_sgpr25, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_VCCZ %bb.2, implicit $vcc
+ ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.56, implicit $vcc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.1.bb103:
- ; GFX90A-NEXT: successors: %bb.59(0x40000000), %bb.2(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr42_sgpr43, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000FF, $sgpr20_sgpr21_sgpr22_sgpr23:0x00000000000000FF, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $sgpr30_sgpr31 = S_MOV_B64 0
- ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr26_sgpr27, implicit-def dead $scc
- ; GFX90A-NEXT: $vgpr22 = IMPLICIT_DEF
- ; GFX90A-NEXT: $vgpr10 = IMPLICIT_DEF
- ; GFX90A-NEXT: $vgpr24 = IMPLICIT_DEF
- ; GFX90A-NEXT: $vgpr18 = IMPLICIT_DEF
- ; GFX90A-NEXT: $vgpr20 = IMPLICIT_DEF
- ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.59, implicit $vcc
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.2:
- ; GFX90A-NEXT: successors: %bb.3(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr22, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6, $sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54, $sgpr55, $sgpr16_sgpr17_sgpr18, $sgpr18_sgpr19, $sgpr20_sgpr21_sgpr22, $vgpr2, $vgpr3, $vgpr10, $vgpr24, $vgpr18, $vgpr20
+ ; GFX90A-NEXT: bb.1:
+ ; GFX90A-NEXT: successors: %bb.60(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000FF, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $sgpr23 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr19 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr21 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr23 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr25 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr23 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $sgpr28_sgpr29 = S_MOV_B64 0
+ ; GFX90A-NEXT: S_BRANCH %bb.60
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.3.Flow17:
- ; GFX90A-NEXT: successors: %bb.4(0x40000000), %bb.58(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr23, $sgpr33, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr30_sgpr31, $sgpr42_sgpr43, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000FF, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr2_vgpr3:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: renamable $vgpr30 = V_AND_B32_e32 1023, $vgpr31, implicit $exec
- ; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr30_sgpr31, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_VCCZ %bb.58, implicit $vcc
- ; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.4.bb15:
- ; GFX90A-NEXT: successors: %bb.35(0x40000000), %bb.5(0x40000000)
+ ; GFX90A-NEXT: bb.2.bb15:
+ ; GFX90A-NEXT: successors: %bb.33(0x40000000), %bb.3(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr33, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr54_sgpr55:0x000000000000000F, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000FF, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003F, $vgpr2_vgpr3:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr42_sgpr43
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 2, $vgpr2_vgpr3, implicit $exec
@@ -78,10 +59,10 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr40, renamable $vcc = V_ADD_CO_U32_e64 $vgpr46, killed $vgpr0, 0, implicit $exec
; GFX90A-NEXT: renamable $vgpr41, dead renamable $vcc = V_ADDC_U32_e64 0, $vgpr47, killed $vcc, 0, implicit $exec
; GFX90A-NEXT: renamable $vcc = S_AND_B64 $exec, renamable $sgpr26_sgpr27, implicit-def dead $scc
- ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.35, implicit $vcc
+ ; GFX90A-NEXT: S_CBRANCH_VCCNZ %bb.33, implicit $vcc
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.5:
- ; GFX90A-NEXT: successors: %bb.6(0x80000000)
+ ; GFX90A-NEXT: bb.3:
+ ; GFX90A-NEXT: successors: %bb.4(0x80000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr30, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr2_vgpr3:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr42_sgpr43
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr34_sgpr35 = S_MOV_B64 -1
@@ -108,96 +89,96 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $vgpr17 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr14 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr52 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr50 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr16 = IMPLICIT_DEF
- ; GFX90A-NEXT: renamable $vgpr53 = IMPLICIT_DEF
+ ; GFX90A-NEXT: renamable $vgpr51 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr13 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $vgpr11 = IMPLICIT_DEF
; GFX90A-NEXT: renamable $sgpr15 = IMPLICIT_DEF
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.6.Flow20:
- ; GFX90A-NEXT: successors: %bb.7(0x80000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.4.Flow20:
+ ; GFX90A-NEXT: successors: %bb.5(0x80000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $sgpr15, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $vgpr19 = COPY renamable $sgpr15, implicit $exec
; GFX90A-NEXT: renamable $vgpr18 = COPY $sgpr15, implicit $exec
; GFX90A-NEXT: renamable $vgpr21 = COPY $sgpr15, implicit $exec
; GFX90A-NEXT: renamable $vgpr20 = COPY $sgpr15, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr52 = COPY $sgpr15, implicit $exec
+ ; GFX90A-NEXT: renamable $vgpr25 = COPY $sgpr15, implicit $exec
; GFX90A-NEXT: renamable $vgpr23 = COPY $sgpr15, implicit $exec
; GFX90A-NEXT: renamable $vgpr22 = COPY $sgpr15, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr25 = COPY $sgpr15, implicit $exec
- ; GFX90A-NEXT: renamable $vgpr24 = COPY $sgpr15, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.7.Flow19:
- ; GFX90A-NEXT: successors: %bb.63(0x40000000), %bb.8(0x40000000)
- ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr30, $vgpr31, $vgpr52, $vgpr53, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr24_vgpr25:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
+ ; GFX90A-NEXT: bb.5.Flow19:
+ ; GFX90A-NEXT: successors: %bb.65(0x40000000), %bb.6(0x40000000)
+ ; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr15, $vgpr17, $vgpr25, $vgpr30, $vgpr31, $vgpr50, $vgpr51, $vgpr52, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr24_sgpr25, $sgpr26_sgpr27, $sgpr28_sgpr29, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr56_sgpr57, $sgpr16_sgpr17_sgpr18_sgpr19:0x00000000000000F0, $sgpr20_sgpr21_sgpr22_sgpr23:0x000000000000003C, $vgpr0_vgpr1:0x000000000000000F, $vgpr2_vgpr3:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr10_vgpr11:0x000000000000000F, $vgpr12_vgpr13:0x000000000000000F, $vgpr14_vgpr15:0x0000000000000003, $vgpr16_vgpr17:0x0000000000000003, $vgpr18_vgpr19:0x000000000000000F, $vgpr20_vgpr21:0x000000000000000F, $vgpr22_vgpr23:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_MOV_B64 0
; GFX90A-NEXT: $sgpr30_sgpr31 = S_AND_SAVEEXEC_B64 $sgpr28_sgpr29, implicit-def $exec, implicit-def $scc, implicit $exec
- ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.63, implicit $exec
+ ; GFX90A-NEXT: S_CBRANCH_EXECNZ %bb.65, implicit $exec
; GFX90A-NEXT: {{ $}}
- ; GFX90A-NEXT: bb.8.Flow32:
- ; GFX90A-NEXT: successors: %bb.9(0x40000000), %bb.10(0x40000000)
+ ; GFX90A-NEXT: bb.6.Flow32:
+ ; GFX90A-NEXT: successors: %bb.7(0x40000000), %bb.8(0x40000000)
; GFX90A-NEXT: liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7:0x000000000000000F, $sgpr10_sgpr11, $sgpr16_sgpr17, $sgpr30_sgpr31, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr52_sgpr53, $sgpr54_sgpr55, $sgpr56_sgpr57, $vgpr0_vgpr1:0x000000000000000F, $vgpr4_vgpr5:0x000000000000000F, $vgpr6_vgpr7:0x000000000000000F, $vgpr8_vgpr9:0x000000000000000F, $vgpr40_vgpr41:0x000000000000000F, $vgpr42_vgpr43:0x000000000000000F, $vgpr44_vgpr45:0x000000000000000F, $vgpr46_vgpr47:0x000000000000000F, $vgpr56_vgpr57:0x000000000000000F, $vgpr58_vgpr59:0x000000000000000F, $vgpr60_vgpr61:0x000000000000000F, $vgpr62_vgpr63:0x000000000000000F, $sgpr0_sgpr1_sgpr2_sgpr3
; GFX90A-NEXT: {{ $}}
; GFX90A-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr30_sgpr3...
[truncated]
|
f8862d4
to
a4d759c
Compare
a4d759c
to
fcb5184
Compare
fcb5184
to
c047193
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The idea here is reasonable, but I don't think you can create a bitcast in getOrCreateResult(). This function may be called in cases where we will not actually use the result, such as
InVal.DefInst == getMatchingValue(InVal, MemInst, CurrentGeneration)) { |
getOrCreateResult suffers from the deficiency that it doesn't attempt to create casts when types mismatch. Fix this deficiency, making EarlyCSE more powerful.
c047193
to
1bf8fe9
Compare
Makes sense. I do think that the refactoring of getOrCreateResult is a good change, so I've made this patch an NFC doing that. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG, but I think the TODO should be preserved. The general idea of supporting bitcasts makes sense, it's just not that straightforward to implement.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/73/builds/8067 Here is the relevant piece of the build log for the reference
|
No description provided.