From d1b385591bd6b5e4f751ee183b2f64f025a7b0c7 Mon Sep 17 00:00:00 2001 From: Kevin McAfee Date: Fri, 5 Jul 2024 15:43:24 -0700 Subject: [PATCH 1/6] Check for side effects when lowering target intrinsics, update NVVM ldu/ldg intrinsics with IntrWillReturn and test for DCE --- llvm/include/llvm/IR/IntrinsicsNVVM.td | 12 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 2 +- llvm/test/CodeGen/NVPTX/ldu-ldg.ll | 126 ++++++++++++++++++ 3 files changed, 133 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 854eb2f8dd6dff..1e7fdb53059e20 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -1529,30 +1529,30 @@ def int_nvvm_mbarrier_pending_count : // pointer's alignment. def int_nvvm_ldu_global_i : Intrinsic<[llvm_anyint_ty], [llvm_anyptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], "llvm.nvvm.ldu.global.i">; def int_nvvm_ldu_global_f : Intrinsic<[llvm_anyfloat_ty], [llvm_anyptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], "llvm.nvvm.ldu.global.f">; def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], "llvm.nvvm.ldu.global.p">; // Generated within nvvm. Use for ldg on sm_35 or later. Second arg is the // pointer's alignment. def int_nvvm_ldg_global_i : Intrinsic<[llvm_anyint_ty], [llvm_anyptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], "llvm.nvvm.ldg.global.i">; def int_nvvm_ldg_global_f : Intrinsic<[llvm_anyfloat_ty], [llvm_anyptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], "llvm.nvvm.ldg.global.f">; def int_nvvm_ldg_global_p : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_i32_ty], - [IntrReadMem, IntrArgMemOnly, IntrNoCallback, NoCapture>], + [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillReturn, NoCapture>], "llvm.nvvm.ldg.global.p">; // Use for generic pointers diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index b0746014daf5ac..2fd12c7c0b1bd0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5227,7 +5227,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // definition. const Function *F = I.getCalledFunction(); bool HasChain = !F->doesNotAccessMemory(); - bool OnlyLoad = HasChain && F->onlyReadsMemory(); + bool OnlyLoad = HasChain && F->onlyReadsMemory() && !I.mayHaveSideEffects(); // Build the operand list. SmallVector Ops; diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll index a449a1b1f713c5..9c266c78f5ac33 100644 --- a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll +++ b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll @@ -148,3 +148,129 @@ define <2 x half> @test_ldg_v2f16(ptr addrspace(1) %ptr) { %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) ret <2 x half> %val } + +; CHECK-LABEL: test_ldu_i8_dead +define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) { + ; CHECK-NOT: ldu.global.u8 + %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +; CHECK-LABEL: test_ldu_i16_dead +define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) { + ; CHECK-NOT: ldu.global.u16 + %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) + ret void +} + +; CHECK-LABEL: test_ldu_i32_dead +define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) { + ; CHECK-NOT: ldu.global.u32 + %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +; CHECK-LABEL: test_ldu_i64_dead +define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) { + ; CHECK-NOT: ldu.global.u64 + %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) + ret void +} + +; CHECK-LABEL: test_ldu_p_dead +define void @test_ldu_p_dead(ptr addrspace(1) %ptr) { + ; CHECK-NOT: ldu.global.u64 + %val = tail call ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 8) + ret void +} + +; CHECK-LABEL: test_ldu_f32_dead +define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) { + ; CHECK-NOT: ldu.global.f32 + %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +; CHECK-LABEL: test_ldu_f64_dead +define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) { + ; CHECK-NOT: ldu.global.f64 + %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) + ret void +} + +; CHECK-LABEL: test_ldu_f16_dead +define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) { + ; CHECK-NOT: ldu.global.u16 + %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2) + ret void +} + +; CHECK-LABEL: test_ldu_v2f16_dead +define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) { + ; CHECK-NOT: ldu.global.u32 + %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +; CHECK-LABEL: test_ldg_i8_dead +define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) { + ; CHECK-NOT: ld.global.nc.u8 + %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +; CHECK-LABEL: test_ldg_i16_dead +define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) { + ; CHECK-NOT: ld.global.nc.u16 + %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) + ret void +} + +; CHECK-LABEL: test_ldg_i32_dead +define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) { + ; CHECK-NOT: ld.global.nc.u32 + %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +; CHECK-LABEL: test_ldg_i64_dead +define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) { + ; CHECK-NOT: ld.global.nc.u64 + %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) + ret void +} + +; CHECK-LABEL: test_ldg_p_dead +define void @test_ldg_p_dead(ptr addrspace(1) %ptr) { + ; CHECK-NOT: ld.global.nc.u64 + %val = tail call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 8) + ret void +} + +; CHECK-LABEL: test_ldg_f32_dead +define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) { + ; CHECK-NOT: ld.global.nc.f32 + %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +; CHECK-LABEL: test_ldg_f64_dead +define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) { + ; CHECK-NOT: ld.global.nc.f64 + %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) + ret void +} + +; CHECK-LABEL: test_ldg_f16_dead +define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) { + ; CHECK-NOT: ld.global.nc.u16 + %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2) + ret void +} + +; CHECK-LABEL: test_ldg_v2f16_dead +define void @test_ldg_v2f16_dead(ptr addrspace(1) %ptr) { + ; CHECK-NOT: ld.global.nc.u32 + %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} From 5666ebd40633986fc6125ca234b32689d4407340 Mon Sep 17 00:00:00 2001 From: Kevin McAfee Date: Fri, 19 Jul 2024 10:51:16 -0700 Subject: [PATCH 2/6] Use function attributes rather than instruction --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2fd12c7c0b1bd0..923ef3928b341e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5227,7 +5227,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // definition. const Function *F = I.getCalledFunction(); bool HasChain = !F->doesNotAccessMemory(); - bool OnlyLoad = HasChain && F->onlyReadsMemory() && !I.mayHaveSideEffects(); + bool OnlyLoad = HasChain && F->onlyReadsMemory() && F->willReturn(); // Build the operand list. SmallVector Ops; From 84c32954f392c91b3ed09264c77ed888113a2a60 Mon Sep 17 00:00:00 2001 From: Kevin McAfee Date: Mon, 22 Jul 2024 14:52:26 -0700 Subject: [PATCH 3/6] No SDAG change --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 923ef3928b341e..b0746014daf5ac 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5227,7 +5227,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // definition. const Function *F = I.getCalledFunction(); bool HasChain = !F->doesNotAccessMemory(); - bool OnlyLoad = HasChain && F->onlyReadsMemory() && F->willReturn(); + bool OnlyLoad = HasChain && F->onlyReadsMemory(); // Build the operand list. SmallVector Ops; From a252b0d4022c303d19dc638d2dedd713615b7baf Mon Sep 17 00:00:00 2001 From: Kevin McAfee Date: Mon, 22 Jul 2024 15:11:38 -0700 Subject: [PATCH 4/6] Revert ldu-ldg.ll --- llvm/test/CodeGen/NVPTX/ldu-ldg.ll | 126 ----------------------------- 1 file changed, 126 deletions(-) diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll index 9c266c78f5ac33..a449a1b1f713c5 100644 --- a/llvm/test/CodeGen/NVPTX/ldu-ldg.ll +++ b/llvm/test/CodeGen/NVPTX/ldu-ldg.ll @@ -148,129 +148,3 @@ define <2 x half> @test_ldg_v2f16(ptr addrspace(1) %ptr) { %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) ret <2 x half> %val } - -; CHECK-LABEL: test_ldu_i8_dead -define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) { - ; CHECK-NOT: ldu.global.u8 - %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) - ret void -} - -; CHECK-LABEL: test_ldu_i16_dead -define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) { - ; CHECK-NOT: ldu.global.u16 - %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) - ret void -} - -; CHECK-LABEL: test_ldu_i32_dead -define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) { - ; CHECK-NOT: ldu.global.u32 - %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) - ret void -} - -; CHECK-LABEL: test_ldu_i64_dead -define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) { - ; CHECK-NOT: ldu.global.u64 - %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) - ret void -} - -; CHECK-LABEL: test_ldu_p_dead -define void @test_ldu_p_dead(ptr addrspace(1) %ptr) { - ; CHECK-NOT: ldu.global.u64 - %val = tail call ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 8) - ret void -} - -; CHECK-LABEL: test_ldu_f32_dead -define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) { - ; CHECK-NOT: ldu.global.f32 - %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) - ret void -} - -; CHECK-LABEL: test_ldu_f64_dead -define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) { - ; CHECK-NOT: ldu.global.f64 - %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) - ret void -} - -; CHECK-LABEL: test_ldu_f16_dead -define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) { - ; CHECK-NOT: ldu.global.u16 - %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2) - ret void -} - -; CHECK-LABEL: test_ldu_v2f16_dead -define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) { - ; CHECK-NOT: ldu.global.u32 - %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) - ret void -} - -; CHECK-LABEL: test_ldg_i8_dead -define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) { - ; CHECK-NOT: ld.global.nc.u8 - %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) - ret void -} - -; CHECK-LABEL: test_ldg_i16_dead -define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) { - ; CHECK-NOT: ld.global.nc.u16 - %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) - ret void -} - -; CHECK-LABEL: test_ldg_i32_dead -define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) { - ; CHECK-NOT: ld.global.nc.u32 - %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) - ret void -} - -; CHECK-LABEL: test_ldg_i64_dead -define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) { - ; CHECK-NOT: ld.global.nc.u64 - %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) - ret void -} - -; CHECK-LABEL: test_ldg_p_dead -define void @test_ldg_p_dead(ptr addrspace(1) %ptr) { - ; CHECK-NOT: ld.global.nc.u64 - %val = tail call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 8) - ret void -} - -; CHECK-LABEL: test_ldg_f32_dead -define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) { - ; CHECK-NOT: ld.global.nc.f32 - %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) - ret void -} - -; CHECK-LABEL: test_ldg_f64_dead -define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) { - ; CHECK-NOT: ld.global.nc.f64 - %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) - ret void -} - -; CHECK-LABEL: test_ldg_f16_dead -define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) { - ; CHECK-NOT: ld.global.nc.u16 - %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2) - ret void -} - -; CHECK-LABEL: test_ldg_v2f16_dead -define void @test_ldg_v2f16_dead(ptr addrspace(1) %ptr) { - ; CHECK-NOT: ld.global.nc.u32 - %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) - ret void -} From 7f8f4f37a4de247a7eb61626b8904ee045df3179 Mon Sep 17 00:00:00 2001 From: Kevin McAfee Date: Mon, 22 Jul 2024 15:13:26 -0700 Subject: [PATCH 5/6] Add new test to check that nvvm ldu/ldg intrinsics are DCE'd --- llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll | 187 ++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll b/llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll new file mode 100644 index 00000000000000..bc20b325d3dbb0 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll @@ -0,0 +1,187 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -O3 -S | FileCheck %s + +; ldu/ldg intrinsics were erroneously not marked IntrWillReturn, preventing +; them from being eliminated at IR level when dead. + +declare i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align) +declare i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align) +declare i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align) +declare i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align) +declare ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 %align) +declare float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align) +declare double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align) +declare half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align) +declare <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align) + +declare i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 %align) +declare i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 %align) +declare i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 %align) +declare i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 %align) +declare ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 %align) +declare float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 %align) +declare double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 %align) +declare half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 %align) +declare <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 %align) + +define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldu_i8_dead( +; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: ret void +; + %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldu_i16_dead( +; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: ret void +; + %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) + ret void +} + +define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldu_i32_dead( +; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: ret void +; + %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldu_i64_dead( +; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: ret void +; + %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) + ret void +} + +define void @test_ldu_p_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldu_p_dead( +; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: ret void +; + %val = tail call ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 8) + ret void +} + +define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldu_f32_dead( +; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: ret void +; + %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldu_f64_dead( +; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: ret void +; + %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) + ret void +} + +define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldu_f16_dead( +; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: ret void +; + %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2) + ret void +} + +define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldu_v2f16_dead( +; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: ret void +; + %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldg_i8_dead( +; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: ret void +; + %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldg_i16_dead( +; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: ret void +; + %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) + ret void +} + +define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldg_i32_dead( +; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: ret void +; + %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldg_i64_dead( +; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: ret void +; + %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) + ret void +} + +define void @test_ldg_p_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldg_p_dead( +; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: ret void +; + %val = tail call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 8) + ret void +} + +define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldg_f32_dead( +; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: ret void +; + %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} + +define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldg_f64_dead( +; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: ret void +; + %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) + ret void +} + +define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldg_f16_dead( +; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: ret void +; + %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2) + ret void +} + +define void @test_ldg_v2f16_dead(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define void @test_ldg_v2f16_dead( +; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-NEXT: ret void +; + %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) + ret void +} From f66ce52e71d4f1087f1fb7759343bcf9363d480e Mon Sep 17 00:00:00 2001 From: Kevin McAfee Date: Tue, 23 Jul 2024 11:07:10 -0700 Subject: [PATCH 6/6] Update test to run only DCE pass --- .../DCE/nvvm-ldu-ldg-willreturn.ll} | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) rename llvm/test/{CodeGen/NVPTX/ldu-ldg-willreturn.ll => Transforms/DCE/nvvm-ldu-ldg-willreturn.ll} (76%) diff --git a/llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll b/llvm/test/Transforms/DCE/nvvm-ldu-ldg-willreturn.ll similarity index 76% rename from llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll rename to llvm/test/Transforms/DCE/nvvm-ldu-ldg-willreturn.ll index bc20b325d3dbb0..64a023ef451376 100644 --- a/llvm/test/CodeGen/NVPTX/ldu-ldg-willreturn.ll +++ b/llvm/test/Transforms/DCE/nvvm-ldu-ldg-willreturn.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt < %s -O3 -S | FileCheck %s +; RUN: opt -S < %s -passes=dce | FileCheck %s ; ldu/ldg intrinsics were erroneously not marked IntrWillReturn, preventing ; them from being eliminated at IR level when dead. @@ -26,7 +26,7 @@ declare <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 % define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @test_ldu_i8_dead( -; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { ; CHECK-NEXT: ret void ; %val = tail call i8 @llvm.nvvm.ldu.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) @@ -35,7 +35,7 @@ define void @test_ldu_i8_dead(ptr addrspace(1) %ptr) { define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @test_ldu_i16_dead( -; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { ; CHECK-NEXT: ret void ; %val = tail call i16 @llvm.nvvm.ldu.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) @@ -44,7 +44,7 @@ define void @test_ldu_i16_dead(ptr addrspace(1) %ptr) { define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @test_ldu_i32_dead( -; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { ; CHECK-NEXT: ret void ; %val = tail call i32 @llvm.nvvm.ldu.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) @@ -53,7 +53,7 @@ define void @test_ldu_i32_dead(ptr addrspace(1) %ptr) { define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @test_ldu_i64_dead( -; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { ; CHECK-NEXT: ret void ; %val = tail call i64 @llvm.nvvm.ldu.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) @@ -62,7 +62,7 @@ define void @test_ldu_i64_dead(ptr addrspace(1) %ptr) { define void @test_ldu_p_dead(ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @test_ldu_p_dead( -; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { ; CHECK-NEXT: ret void ; %val = tail call ptr @llvm.nvvm.ldu.global.p.p1(ptr addrspace(1) %ptr, i32 8) @@ -71,7 +71,7 @@ define void @test_ldu_p_dead(ptr addrspace(1) %ptr) { define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @test_ldu_f32_dead( -; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { ; CHECK-NEXT: ret void ; %val = tail call float @llvm.nvvm.ldu.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) @@ -80,7 +80,7 @@ define void @test_ldu_f32_dead(ptr addrspace(1) %ptr) { define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @test_ldu_f64_dead( -; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { ; CHECK-NEXT: ret void ; %val = tail call double @llvm.nvvm.ldu.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) @@ -89,7 +89,7 @@ define void @test_ldu_f64_dead(ptr addrspace(1) %ptr) { define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @test_ldu_f16_dead( -; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { ; CHECK-NEXT: ret void ; %val = tail call half @llvm.nvvm.ldu.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2) @@ -98,7 +98,7 @@ define void @test_ldu_f16_dead(ptr addrspace(1) %ptr) { define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @test_ldu_v2f16_dead( -; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { ; CHECK-NEXT: ret void ; %val = tail call <2 x half> @llvm.nvvm.ldu.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4) @@ -107,7 +107,7 @@ define void @test_ldu_v2f16_dead(ptr addrspace(1) %ptr) { define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @test_ldg_i8_dead( -; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { ; CHECK-NEXT: ret void ; %val = tail call i8 @llvm.nvvm.ldg.global.i.i8.p1(ptr addrspace(1) %ptr, i32 4) @@ -116,7 +116,7 @@ define void @test_ldg_i8_dead(ptr addrspace(1) %ptr) { define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @test_ldg_i16_dead( -; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { ; CHECK-NEXT: ret void ; %val = tail call i16 @llvm.nvvm.ldg.global.i.i16.p1(ptr addrspace(1) %ptr, i32 2) @@ -125,7 +125,7 @@ define void @test_ldg_i16_dead(ptr addrspace(1) %ptr) { define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @test_ldg_i32_dead( -; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { ; CHECK-NEXT: ret void ; %val = tail call i32 @llvm.nvvm.ldg.global.i.i32.p1(ptr addrspace(1) %ptr, i32 4) @@ -134,7 +134,7 @@ define void @test_ldg_i32_dead(ptr addrspace(1) %ptr) { define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @test_ldg_i64_dead( -; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { ; CHECK-NEXT: ret void ; %val = tail call i64 @llvm.nvvm.ldg.global.i.i64.p1(ptr addrspace(1) %ptr, i32 8) @@ -143,7 +143,7 @@ define void @test_ldg_i64_dead(ptr addrspace(1) %ptr) { define void @test_ldg_p_dead(ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @test_ldg_p_dead( -; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { ; CHECK-NEXT: ret void ; %val = tail call ptr @llvm.nvvm.ldg.global.p.p1(ptr addrspace(1) %ptr, i32 8) @@ -152,7 +152,7 @@ define void @test_ldg_p_dead(ptr addrspace(1) %ptr) { define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @test_ldg_f32_dead( -; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { ; CHECK-NEXT: ret void ; %val = tail call float @llvm.nvvm.ldg.global.f.f32.p1(ptr addrspace(1) %ptr, i32 4) @@ -161,7 +161,7 @@ define void @test_ldg_f32_dead(ptr addrspace(1) %ptr) { define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @test_ldg_f64_dead( -; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { ; CHECK-NEXT: ret void ; %val = tail call double @llvm.nvvm.ldg.global.f.f64.p1(ptr addrspace(1) %ptr, i32 8) @@ -170,7 +170,7 @@ define void @test_ldg_f64_dead(ptr addrspace(1) %ptr) { define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @test_ldg_f16_dead( -; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { ; CHECK-NEXT: ret void ; %val = tail call half @llvm.nvvm.ldg.global.f.f16.p1(ptr addrspace(1) %ptr, i32 2) @@ -179,7 +179,7 @@ define void @test_ldg_f16_dead(ptr addrspace(1) %ptr) { define void @test_ldg_v2f16_dead(ptr addrspace(1) %ptr) { ; CHECK-LABEL: define void @test_ldg_v2f16_dead( -; CHECK-SAME: ptr addrspace(1) nocapture readnone [[PTR:%.*]]) local_unnamed_addr #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) { ; CHECK-NEXT: ret void ; %val = tail call <2 x half> @llvm.nvvm.ldg.global.f.v2f16.p1(ptr addrspace(1) %ptr, i32 4)